From ec27633cfbdd1fca6b6f9b23fd1bb30ce7a3e3f6 Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=A9my=20Lal?= Date: Fri, 8 Nov 2024 14:59:03 +0100 Subject: [PATCH] Import nodejs_20.18.0+dfsg.orig-ada.tar.xz [dgit import orig nodejs_20.18.0+dfsg.orig-ada.tar.xz] --- .clang-format | 2 + .editorconfig | 5 + .github/FUNDING.yml | 1 + .github/ISSUE_TEMPLATE/1-bug-report.yml | 40 + .github/ISSUE_TEMPLATE/2-feature-request.yml | 23 + .github/ISSUE_TEMPLATE/config.yml | 5 + .github/dependabot.yml | 23 + .github/workflows/aarch64.yml | 46 + .github/workflows/alpine.yml | 45 + .github/workflows/cifuzz.yml | 41 + .github/workflows/codeql.yml | 47 + .github/workflows/dependency-review.yml | 14 + .github/workflows/documentation.yml | 35 + .github/workflows/emscripten.yml | 39 + .github/workflows/lint_and_format_check.yml | 38 + .github/workflows/macos_install.yml | 44 + .github/workflows/release-script-tests.yml | 40 + .github/workflows/release_create.yml | 61 + .github/workflows/release_prepare.yml | 57 + .github/workflows/scorecard.yml | 70 + .github/workflows/ubuntu-release.yml | 40 + .github/workflows/ubuntu-s390x.yml | 43 + .github/workflows/ubuntu-sanitized.yml | 40 + .github/workflows/ubuntu-undef.yml | 40 + .github/workflows/ubuntu.yml | 43 + .github/workflows/ubuntu_install.yml | 46 + .github/workflows/ubuntu_old.yml | 41 + .github/workflows/ubuntu_pedantic.yml | 41 + .github/workflows/visual_studio.yml | 45 + .github/workflows/visual_studio_clang.yml | 46 + .github/workflows/wpt-updater.yml | 40 + .gitignore | 27 + .python-version | 1 + CMakeLists.txt | 182 + Dockerfile | 12 + LICENSE-APACHE | 201 + LICENSE-MIT | 18 + README.md | 329 + SECURITY.md | 8 + benchmarks/CMakeLists.txt | 304 + benchmarks/bbc_bench.cpp | 36 + benchmarks/bench.cpp | 72 + benchmarks/bench_search_params.cpp | 343 + benchmarks/benchmark_header.h | 65 + benchmarks/benchmark_template.cpp | 878 ++ benchmarks/competitors/servo-url/Cargo.lock | 83 + benchmarks/competitors/servo-url/Cargo.toml | 16 + benchmarks/competitors/servo-url/README.md | 17 + .../competitors/servo-url/cbindgen.toml | 12 + benchmarks/competitors/servo-url/lib.rs | 44 + benchmarks/competitors/servo-url/servo_url.h | 30 + benchmarks/model_bench.cpp | 280 + benchmarks/percent_encode.cpp | 266 + .../performancecounters/apple_arm_events.h | 1110 ++ .../performancecounters/event_counter.h | 155 + .../performancecounters/linux-perf-events.h | 105 + benchmarks/wpt_bench.cpp | 227 + clang-format-ignore.txt | 0 cmake/CPM.cmake | 24 + cmake/ada-config.cmake.in | 1 + cmake/ada-flags.cmake | 62 + cmake/add-cpp-test.cmake | 70 + cmake/codecoverage.cmake | 742 ++ docs/RELEASE.md | 19 + docs/cli.md | 204 + docs/doxygen/footer.html | 7 + docs/doxygen/header.html | 84 + doxygen | 2741 +++++ fuzz/ada_c.c | 65 + fuzz/ada_c.options | 3 + fuzz/build.sh | 52 + fuzz/can_parse.cc | 23 + fuzz/idna.cc | 22 + fuzz/parse.cc | 225 + fuzz/parse.options | 3 + fuzz/url.dict | 26 + fuzz/url_search_params.cc | 56 + include/ada.h | 33 + include/ada/ada_idna.h | 149 + include/ada/ada_version.h | 20 + include/ada/character_sets-inl.h | 521 + include/ada/character_sets.h | 24 + include/ada/checkers-inl.h | 67 + include/ada/checkers.h | 123 + include/ada/common_defs.h | 313 + include/ada/encoding_type.h | 32 + include/ada/expected.h | 2519 +++++ include/ada/helpers.h | 232 + include/ada/implementation.h | 60 + include/ada/log.h | 79 + include/ada/parser.h | 53 + include/ada/scheme-inl.h | 87 + include/ada/scheme.h | 77 + include/ada/serializers.h | 45 + include/ada/state.h | 126 + include/ada/unicode-inl.h | 29 + include/ada/unicode.h | 226 + include/ada/url-inl.h | 253 + include/ada/url.h | 417 + include/ada/url_aggregator-inl.h | 928 ++ include/ada/url_aggregator.h | 320 + include/ada/url_base-inl.h | 39 + include/ada/url_base.h | 142 + include/ada/url_components.h | 80 + include/ada/url_search_params-inl.h | 227 + include/ada/url_search_params.h | 188 + include/ada_c.h | 187 + pyproject.toml | 38 + singleheader/CMakeLists.txt | 66 + singleheader/README.md | 36 + singleheader/amalgamate.py | 150 + singleheader/demo.c | 41 + singleheader/demo.cpp | 15 + src/CMakeLists.txt | 60 + src/ada.cpp | 13 + src/ada_c.cpp | 743 ++ src/ada_idna.cpp | 9650 ++++++++++++++++ src/checkers.cpp | 127 + src/helpers.cpp | 795 ++ src/implementation.cpp | 82 + src/parser.cpp | 937 ++ src/serializers.cpp | 80 + src/unicode.cpp | 482 + src/url-getters.cpp | 95 + src/url-setters.cpp | 236 + src/url.cpp | 592 + src/url_aggregator.cpp | 1739 +++ src/url_components.cpp | 126 + tests/CMakeLists.txt | 66 + tests/ada_c.cpp | 358 + tests/basic_fuzzer.cpp | 140 + tests/basic_tests.cpp | 465 + tests/from_file_tests.cpp | 18 + tests/installation/CMakeLists.txt | 24 + tests/url_components.cpp | 145 + tests/url_search_params.cpp | 259 + tests/wasm/CMakeLists.txt | 13 + tests/wasm/test.js.in | 33 + tests/wasm/wasm.cpp | 45 + tests/wpt/CMakeLists.txt | 5 + tests/wpt/IdnaTestV2.json | 9754 ++++++++++++++++ tests/wpt/ada_extra_setters_tests.json | 134 + tests/wpt/ada_extra_urltestdata.json | 289 + tests/wpt/ada_long_urltestdata.json | 19 + tests/wpt/percent-encoding.json | 48 + tests/wpt/setters_tests.json | 2424 ++++ tests/wpt/toascii.json | 202 + tests/wpt/urltestdata-javascript-only.json | 18 + tests/wpt/urltestdata.json | 9996 +++++++++++++++++ tests/wpt/verifydnslength_tests.json | 88 + tests/wpt_tests.cpp | 477 + tools/CMakeLists.txt | 1 + tools/cli/CMakeLists.txt | 29 + tools/cli/adaparse.cpp | 292 + tools/cli/benchmark_adaparse.sh | 58 + tools/cli/benchmark_write_to_file.sh | 58 + tools/cli/line_iterator.h | 30 + tools/prepare-doxygen.sh | 22 + tools/release/__init__.py | 0 tools/release/create_release.py | 26 + tools/release/lib/__init__.py | 0 tools/release/lib/release.py | 201 + tools/release/lib/tests/__init__.py | 0 .../lib/tests/samples/ada_version_h.txt | 20 + .../tests/samples/ada_version_h_expected.txt | 20 + .../release/lib/tests/samples/cmakelists.txt | 10 + .../lib/tests/samples/cmakelists_expected.txt | 10 + tools/release/lib/tests/samples/doxygen.txt | 82 + .../lib/tests/samples/doxygen_expected.txt | 82 + tools/release/lib/tests/test_release.py | 524 + .../release/lib/tests/test_update_versions.py | 53 + tools/release/lib/versions.py | 52 + tools/release/requirements.txt | 2 + tools/release/update_versions.py | 21 + tools/run-clangcldocker.sh | 22 + tools/update-wpt.sh | 27 + 176 files changed, 60752 insertions(+) create mode 100644 .clang-format create mode 100644 .editorconfig create mode 100644 .github/FUNDING.yml create mode 100644 .github/ISSUE_TEMPLATE/1-bug-report.yml create mode 100644 .github/ISSUE_TEMPLATE/2-feature-request.yml create mode 100644 .github/ISSUE_TEMPLATE/config.yml create mode 100644 .github/dependabot.yml create mode 100644 .github/workflows/aarch64.yml create mode 100644 .github/workflows/alpine.yml create mode 100644 .github/workflows/cifuzz.yml create mode 100644 .github/workflows/codeql.yml create mode 100644 .github/workflows/dependency-review.yml create mode 100644 .github/workflows/documentation.yml create mode 100644 .github/workflows/emscripten.yml create mode 100644 .github/workflows/lint_and_format_check.yml create mode 100644 .github/workflows/macos_install.yml create mode 100644 .github/workflows/release-script-tests.yml create mode 100644 .github/workflows/release_create.yml create mode 100644 .github/workflows/release_prepare.yml create mode 100644 .github/workflows/scorecard.yml create mode 100644 .github/workflows/ubuntu-release.yml create mode 100644 .github/workflows/ubuntu-s390x.yml create mode 100644 .github/workflows/ubuntu-sanitized.yml create mode 100644 .github/workflows/ubuntu-undef.yml create mode 100644 .github/workflows/ubuntu.yml create mode 100644 .github/workflows/ubuntu_install.yml create mode 100644 .github/workflows/ubuntu_old.yml create mode 100644 .github/workflows/ubuntu_pedantic.yml create mode 100644 .github/workflows/visual_studio.yml create mode 100644 .github/workflows/visual_studio_clang.yml create mode 100644 .github/workflows/wpt-updater.yml create mode 100644 .gitignore create mode 100644 .python-version create mode 100644 CMakeLists.txt create mode 100644 Dockerfile create mode 100644 LICENSE-APACHE create mode 100644 LICENSE-MIT create mode 100644 README.md create mode 100644 SECURITY.md create mode 100644 benchmarks/CMakeLists.txt create mode 100644 benchmarks/bbc_bench.cpp create mode 100644 benchmarks/bench.cpp create mode 100644 benchmarks/bench_search_params.cpp create mode 100644 benchmarks/benchmark_header.h create mode 100644 benchmarks/benchmark_template.cpp create mode 100644 benchmarks/competitors/servo-url/Cargo.lock create mode 100644 benchmarks/competitors/servo-url/Cargo.toml create mode 100644 benchmarks/competitors/servo-url/README.md create mode 100644 benchmarks/competitors/servo-url/cbindgen.toml create mode 100644 benchmarks/competitors/servo-url/lib.rs create mode 100644 benchmarks/competitors/servo-url/servo_url.h create mode 100644 benchmarks/model_bench.cpp create mode 100644 benchmarks/percent_encode.cpp create mode 100644 benchmarks/performancecounters/apple_arm_events.h create mode 100644 benchmarks/performancecounters/event_counter.h create mode 100644 benchmarks/performancecounters/linux-perf-events.h create mode 100644 benchmarks/wpt_bench.cpp create mode 100644 clang-format-ignore.txt create mode 100644 cmake/CPM.cmake create mode 100644 cmake/ada-config.cmake.in create mode 100644 cmake/ada-flags.cmake create mode 100644 cmake/add-cpp-test.cmake create mode 100644 cmake/codecoverage.cmake create mode 100644 docs/RELEASE.md create mode 100644 docs/cli.md create mode 100644 docs/doxygen/footer.html create mode 100644 docs/doxygen/header.html create mode 100644 doxygen create mode 100644 fuzz/ada_c.c create mode 100644 fuzz/ada_c.options create mode 100755 fuzz/build.sh create mode 100644 fuzz/can_parse.cc create mode 100644 fuzz/idna.cc create mode 100644 fuzz/parse.cc create mode 100644 fuzz/parse.options create mode 100644 fuzz/url.dict create mode 100644 fuzz/url_search_params.cc create mode 100644 include/ada.h create mode 100644 include/ada/ada_idna.h create mode 100644 include/ada/ada_version.h create mode 100644 include/ada/character_sets-inl.h create mode 100644 include/ada/character_sets.h create mode 100644 include/ada/checkers-inl.h create mode 100644 include/ada/checkers.h create mode 100644 include/ada/common_defs.h create mode 100644 include/ada/encoding_type.h create mode 100644 include/ada/expected.h create mode 100644 include/ada/helpers.h create mode 100644 include/ada/implementation.h create mode 100644 include/ada/log.h create mode 100644 include/ada/parser.h create mode 100644 include/ada/scheme-inl.h create mode 100644 include/ada/scheme.h create mode 100644 include/ada/serializers.h create mode 100644 include/ada/state.h create mode 100644 include/ada/unicode-inl.h create mode 100644 include/ada/unicode.h create mode 100644 include/ada/url-inl.h create mode 100644 include/ada/url.h create mode 100644 include/ada/url_aggregator-inl.h create mode 100644 include/ada/url_aggregator.h create mode 100644 include/ada/url_base-inl.h create mode 100644 include/ada/url_base.h create mode 100644 include/ada/url_components.h create mode 100644 include/ada/url_search_params-inl.h create mode 100644 include/ada/url_search_params.h create mode 100644 include/ada_c.h create mode 100644 pyproject.toml create mode 100644 singleheader/CMakeLists.txt create mode 100644 singleheader/README.md create mode 100755 singleheader/amalgamate.py create mode 100644 singleheader/demo.c create mode 100644 singleheader/demo.cpp create mode 100644 src/CMakeLists.txt create mode 100644 src/ada.cpp create mode 100644 src/ada_c.cpp create mode 100644 src/ada_idna.cpp create mode 100644 src/checkers.cpp create mode 100644 src/helpers.cpp create mode 100644 src/implementation.cpp create mode 100644 src/parser.cpp create mode 100644 src/serializers.cpp create mode 100644 src/unicode.cpp create mode 100644 src/url-getters.cpp create mode 100644 src/url-setters.cpp create mode 100644 src/url.cpp create mode 100644 src/url_aggregator.cpp create mode 100644 src/url_components.cpp create mode 100644 tests/CMakeLists.txt create mode 100644 tests/ada_c.cpp create mode 100644 tests/basic_fuzzer.cpp create mode 100644 tests/basic_tests.cpp create mode 100644 tests/from_file_tests.cpp create mode 100644 tests/installation/CMakeLists.txt create mode 100644 tests/url_components.cpp create mode 100644 tests/url_search_params.cpp create mode 100644 tests/wasm/CMakeLists.txt create mode 100644 tests/wasm/test.js.in create mode 100644 tests/wasm/wasm.cpp create mode 100644 tests/wpt/CMakeLists.txt create mode 100644 tests/wpt/IdnaTestV2.json create mode 100644 tests/wpt/ada_extra_setters_tests.json create mode 100644 tests/wpt/ada_extra_urltestdata.json create mode 100644 tests/wpt/ada_long_urltestdata.json create mode 100644 tests/wpt/percent-encoding.json create mode 100644 tests/wpt/setters_tests.json create mode 100644 tests/wpt/toascii.json create mode 100644 tests/wpt/urltestdata-javascript-only.json create mode 100644 tests/wpt/urltestdata.json create mode 100644 tests/wpt/verifydnslength_tests.json create mode 100644 tests/wpt_tests.cpp create mode 100644 tools/CMakeLists.txt create mode 100644 tools/cli/CMakeLists.txt create mode 100644 tools/cli/adaparse.cpp create mode 100644 tools/cli/benchmark_adaparse.sh create mode 100644 tools/cli/benchmark_write_to_file.sh create mode 100644 tools/cli/line_iterator.h create mode 100755 tools/prepare-doxygen.sh create mode 100644 tools/release/__init__.py create mode 100755 tools/release/create_release.py create mode 100644 tools/release/lib/__init__.py create mode 100644 tools/release/lib/release.py create mode 100644 tools/release/lib/tests/__init__.py create mode 100644 tools/release/lib/tests/samples/ada_version_h.txt create mode 100644 tools/release/lib/tests/samples/ada_version_h_expected.txt create mode 100644 tools/release/lib/tests/samples/cmakelists.txt create mode 100644 tools/release/lib/tests/samples/cmakelists_expected.txt create mode 100644 tools/release/lib/tests/samples/doxygen.txt create mode 100644 tools/release/lib/tests/samples/doxygen_expected.txt create mode 100644 tools/release/lib/tests/test_release.py create mode 100644 tools/release/lib/tests/test_update_versions.py create mode 100644 tools/release/lib/versions.py create mode 100644 tools/release/requirements.txt create mode 100755 tools/release/update_versions.py create mode 100755 tools/run-clangcldocker.sh create mode 100755 tools/update-wpt.sh diff --git a/.clang-format b/.clang-format new file mode 100644 index 000000000..1acba5a7b --- /dev/null +++ b/.clang-format @@ -0,0 +1,2 @@ +BasedOnStyle: Google +SortIncludes: false diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 000000000..0b3779e53 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,5 @@ +root = true + +[*] +end_of_line = lf +insert_final_newline = true diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 000000000..de203636b --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1 @@ +github: [anonrig, lemire] diff --git a/.github/ISSUE_TEMPLATE/1-bug-report.yml b/.github/ISSUE_TEMPLATE/1-bug-report.yml new file mode 100644 index 000000000..c11dae751 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/1-bug-report.yml @@ -0,0 +1,40 @@ +name: 🐛 Bug report +description: Create a report to help us improve +body: + - type: markdown + attributes: + value: | + Thank you for reporting an issue. + + Please fill in as much of the following form as you're able. + - type: input + attributes: + label: Version + description: Which Ada version are you referring to? + - type: input + attributes: + label: Platform + description: | + UNIX: output of `uname -a` + Windows: output of `"$([Environment]::OSVersion.VersionString) $(('x86', 'x64')[[Environment]::Is64BitOperatingSystem])"` in PowerShell console + - type: textarea + attributes: + label: What steps will reproduce the bug? + description: Enter details about your bug, preferably a simple code snippet that can be run directly without installing third-party dependencies. + - type: textarea + attributes: + label: How often does it reproduce? Is there a required condition? + - type: textarea + attributes: + label: What is the expected behavior? + description: If possible please provide textual output instead of screenshots. + - type: textarea + attributes: + label: What do you see instead? + description: If possible please provide textual output instead of screenshots. + validations: + required: true + - type: textarea + attributes: + label: Additional information + description: Tell us anything else you think we should know. diff --git a/.github/ISSUE_TEMPLATE/2-feature-request.yml b/.github/ISSUE_TEMPLATE/2-feature-request.yml new file mode 100644 index 000000000..f7cae8c0e --- /dev/null +++ b/.github/ISSUE_TEMPLATE/2-feature-request.yml @@ -0,0 +1,23 @@ +name: 🚀 Feature request +description: Suggest an idea for this project +labels: [feature request] +body: + - type: markdown + attributes: + value: | + Thank you for suggesting an idea to make Node.js better. + + Please fill in as much of the following form as you're able. + - type: textarea + attributes: + label: What is the problem this feature will solve? + validations: + required: true + - type: textarea + attributes: + label: What is the feature you are proposing to solve the problem? + validations: + required: true + - type: textarea + attributes: + label: What alternatives have you considered? diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 000000000..3159b474e --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,5 @@ +blank_issues_enabled: true +contact_links: + - name: Looking for documentation? + url: https://ada-url.github.io/ada + about: Please navigate to our documentation website. diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 000000000..1cfc22ead --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,23 @@ +# Set update schedule for GitHub Actions + +version: 2 +updates: + - package-ecosystem: github-actions + directory: / + schedule: + interval: monthly + + - package-ecosystem: docker + directory: / + schedule: + interval: monthly + + - package-ecosystem: cargo + directory: /benchmarks/competitors/servo-url + schedule: + interval: monthly + + - package-ecosystem: pip + directory: /tools/release + schedule: + interval: monthly \ No newline at end of file diff --git a/.github/workflows/aarch64.yml b/.github/workflows/aarch64.yml new file mode 100644 index 000000000..1d544bf76 --- /dev/null +++ b/.github/workflows/aarch64.yml @@ -0,0 +1,46 @@ +name: Ubuntu aarch64 (GCC 12) + +on: + pull_request: + types: [opened, synchronize, reopened, ready_for_review] + paths-ignore: + - '**.md' + - 'docs/**' + push: + branches: + - main + paths-ignore: + - '**.md' + - 'docs/**' + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + shared: [OFF] + steps: + - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + - uses: uraimo/run-on-arch-action@b0ffb25eb00af00468375982384441f063da1741 # v2.7.2 + name: Build and Test + id: runcmd + env: + CXX: g++-12 + with: + arch: aarch64 + githubToken: ${{ github.token }} + distro: ubuntu22.04 + install: | + apt-get update -q -y + apt-get install -y cmake make g++ ninja-build git + run: | + cmake -DADA_SANITIZE_BOUNDS_STRICT=ON -DBUILD_SHARED_LIBS=${{matrix.shared}} -B build + cmake --build build + ctest --test-dir build diff --git a/.github/workflows/alpine.yml b/.github/workflows/alpine.yml new file mode 100644 index 000000000..058b74e3e --- /dev/null +++ b/.github/workflows/alpine.yml @@ -0,0 +1,45 @@ +name: Alpine Linux + +on: + pull_request: + types: [opened, synchronize, reopened, ready_for_review] + paths-ignore: + - '**.md' + - 'docs/**' + push: + branches: + - main + paths-ignore: + - '**.md' + - 'docs/**' + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + ubuntu-build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + - name: start docker + run: | + docker run -w /src -dit --name alpine -v $PWD:/src alpine:latest + echo 'docker exec alpine "$@";' > ./alpine.sh + chmod +x ./alpine.sh + - name: install packages + run: | + ./alpine.sh apk update + ./alpine.sh apk add build-base cmake g++ linux-headers git bash icu-dev + - name: cmake + run: | + ./alpine.sh cmake -DADA_BENCHMARKS=ON -B build_for_alpine + - name: build + run: | + ./alpine.sh cmake --build build_for_alpine + - name: test + run: | + ./alpine.sh bash -c "cd build_for_alpine && ctest ." diff --git a/.github/workflows/cifuzz.yml b/.github/workflows/cifuzz.yml new file mode 100644 index 000000000..7f1f1f02d --- /dev/null +++ b/.github/workflows/cifuzz.yml @@ -0,0 +1,41 @@ +name: CIFuzz + +on: + pull_request: + branches: + - main + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +permissions: read-all + +jobs: + Fuzzing: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + sanitizer: [address, undefined, memory] + steps: + - name: Build Fuzzers (${{ matrix.sanitizer }}) + id: build + uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master + with: + oss-fuzz-project-name: 'ada-url' + language: c++ + sanitizer: ${{ matrix.sanitizer }} + - name: Run Fuzzers (${{ matrix.sanitizer }}) + uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master + with: + oss-fuzz-project-name: 'ada-url' + language: c++ + fuzz-seconds: 600 + sanitizer: ${{ matrix.sanitizer }} + - name: Upload Crash + uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + if: steps.build.outcome == 'success' + with: + name: ${{ matrix.sanitizer }}-artifacts + path: ./out/artifacts diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 000000000..a993f2940 --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,47 @@ +name: "CodeQL" + +on: + schedule: + - cron: '0 0 * * 1' + +permissions: + contents: read + security-events: write + pull-requests: read + actions: read + +jobs: + analyze: + name: Analyze + + runs-on: ubuntu-latest + + permissions: + actions: read + contents: read + security-events: write + + strategy: + fail-fast: false + matrix: + language: [ 'cpp', 'python' ] + + steps: + - name: Checkout repository + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@4dd16135b69a43b6c8efb853346f8437d92d3c93 # v2.2.5 + with: + languages: ${{ matrix.language }} + + # Autobuild attempts to build any compiled languages (C/C++, C#, Go, or Java). + # If this step fails, then you should remove it and run the build manually (see below) + - name: Autobuild + uses: github/codeql-action/autobuild@4dd16135b69a43b6c8efb853346f8437d92d3c93 # v2.2.5 + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@4dd16135b69a43b6c8efb853346f8437d92d3c93 # v2.2.5 + with: + category: "/language:${{matrix.language}}" diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml new file mode 100644 index 000000000..913b995bd --- /dev/null +++ b/.github/workflows/dependency-review.yml @@ -0,0 +1,14 @@ +name: 'Dependency Review' +on: [pull_request] + +permissions: + contents: read + +jobs: + dependency-review: + runs-on: ubuntu-latest + steps: + - name: 'Checkout Repository' + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + - name: 'Dependency Review' + uses: actions/dependency-review-action@5a2ce3f5b92ee19cbb1541a4984c76d921601d7c # v4.3.4 diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml new file mode 100644 index 000000000..87458661e --- /dev/null +++ b/.github/workflows/documentation.yml @@ -0,0 +1,35 @@ +name: Doxygen GitHub Pages + +on: + push: + branches: + - main + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + deploy: + permissions: + contents: write + pages: write + id-token: write + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + - name: Install theme + run: ./tools/prepare-doxygen.sh + - uses: mattnotmitt/doxygen-action@e0c8cd4cd05e28b88e723b25b30188ecf2505b40 # edge + with: + doxyfile-path: './doxygen' + - name: Deploy to GitHub Pages + uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: docs/html diff --git a/.github/workflows/emscripten.yml b/.github/workflows/emscripten.yml new file mode 100644 index 000000000..92471dfee --- /dev/null +++ b/.github/workflows/emscripten.yml @@ -0,0 +1,39 @@ +name: emscripten + +on: + pull_request: + types: [opened, synchronize, reopened, ready_for_review] + paths-ignore: + - '**.md' + - 'docs/**' + push: + branches: + - main + paths-ignore: + - '**.md' + - 'docs/**' + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + - uses: actions/setup-node@1e60f620b9541d16bece96c5465dc8ee9832be0b # v4.0.3 + - uses: mymindstorm/setup-emsdk@6ab9eb1bda2574c4ddb79809fc9247783eaf9021 # v14 + - name: Verify + run: emcc -v + - name: Checkout + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v3.6.0 + - name: Configure + run: emcmake cmake -B buildwasm -D ADA_TOOLS=OFF + - name: Build + run: cmake --build buildwasm + - name: Test + run: ctest --test-dir buildwasm diff --git a/.github/workflows/lint_and_format_check.yml b/.github/workflows/lint_and_format_check.yml new file mode 100644 index 000000000..416008d28 --- /dev/null +++ b/.github/workflows/lint_and_format_check.yml @@ -0,0 +1,38 @@ +name: Lint and format + +on: + pull_request: + types: [opened, synchronize, reopened, ready_for_review] + paths-ignore: + - '**.md' + - 'docs/**' + push: + branches: + - main + paths-ignore: + - '**.md' + - 'docs/**' + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + lint-and-format: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + + - name: Run clang-format + uses: jidicula/clang-format-action@c74383674bf5f7c69f60ce562019c1c94bc1421a # v4.13.0 + with: + clang-format-version: '17' + fallback-style: 'Google' + + - uses: chartboost/ruff-action@e18ae971ccee1b2d7bbef113930f00c670b78da4 # v1.0.0 + name: Lint with Ruff + with: + version: 0.4.4 diff --git a/.github/workflows/macos_install.yml b/.github/workflows/macos_install.yml new file mode 100644 index 000000000..55e1e7639 --- /dev/null +++ b/.github/workflows/macos_install.yml @@ -0,0 +1,44 @@ +name: macos (Installation) + +on: + pull_request: + types: [opened, synchronize, reopened, ready_for_review] + paths-ignore: + - '**.md' + - 'docs/**' + push: + branches: + - main + paths-ignore: + - '**.md' + - 'docs/**' + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + ubuntu-build: + runs-on: macos-latest + strategy: + matrix: + include: + - {shared: ON} + - {shared: OFF} + steps: + - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + - name: Prepare + run: cmake -DBUILD_SHARED_LIBS=${{matrix.shared}} -DCMAKE_INSTALL_PREFIX:PATH=destination -B build + - name: Build + run: cmake --build build -j=2 + - name: Install + run: cmake --install build + - name: Prepare test package + run: cmake -DCMAKE_INSTALL_PREFIX:PATH=../../destination -S tests/installation -B buildbabyada + - name: Build test package + run: cmake --build buildbabyada + - name: Run example + run: ./buildbabyada/main diff --git a/.github/workflows/release-script-tests.yml b/.github/workflows/release-script-tests.yml new file mode 100644 index 000000000..11279fa0c --- /dev/null +++ b/.github/workflows/release-script-tests.yml @@ -0,0 +1,40 @@ +name: Release Script Tests + +on: + # workflow_call is used to indicate that a workflow can be called by another workflow. + workflow_call: + pull_request: + types: [opened, synchronize, reopened, ready_for_review] + paths-ignore: + - '**.md' + - 'docs/**' + push: + branches: + - main + paths-ignore: + - '**.md' + - 'docs/**' + +permissions: + contents: read + +jobs: + release-script-test: + runs-on: ubuntu-latest + defaults: + run: + working-directory: ./tools/release + + steps: + - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + + - name: Prepare Python + uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + with: + cache: 'pip' # caching pip dependencies + + - name: Install dependencies + run: pip install -r requirements.txt + + - name: Run tests + run: pytest -v diff --git a/.github/workflows/release_create.yml b/.github/workflows/release_create.yml new file mode 100644 index 000000000..f5b873209 --- /dev/null +++ b/.github/workflows/release_create.yml @@ -0,0 +1,61 @@ +name: Release Create + +on: + pull_request: + types: [closed] + +env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + +jobs: + check-release-conditions: + runs-on: ubuntu-latest + if: | + github.event.pull_request.merged == true && + github.event.pull_request.base.ref == 'main' && + startsWith(github.event.pull_request.head.ref, 'release/v') && + startsWith(github.event.pull_request.user.login, 'github-actions') + + steps: + - name: Check release conditions + run: | + echo "All conditions have been met!" + + release-script-test: + needs: check-release-conditions + uses: ./.github/workflows/release-script-tests.yml + + create-release: + permissions: + contents: write + needs: release-script-test + runs-on: ubuntu-latest + if: ${{ needs.release-script-test.result == 'success' }} + + env: + NEXT_RELEASE_TAG: ${{ github.event.pull_request.head.ref }} + steps: + - name: Checkout + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + + - name: Prepare Python + uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + with: + cache: 'pip' # caching pip dependencies + + - name: Install dependencies + run: pip install -r ./tools/release/requirements.txt + + - name: Extract Tag from branch name + run: | + NEXT_RELEASE_TAG=$(echo $NEXT_RELEASE_TAG | sed 's/^release\///') + echo "NEXT_RELEASE_TAG=${NEXT_RELEASE_TAG}" >> $GITHUB_ENV + + - name: Target release Tag + run: echo "New tag $NEXT_RELEASE_TAG" + + - name: Amalgamation + run: ./singleheader/amalgamate.py + + - name: "Create release" + run: ./tools/release/create_release.py diff --git a/.github/workflows/release_prepare.yml b/.github/workflows/release_prepare.yml new file mode 100644 index 000000000..3e9fc6a9f --- /dev/null +++ b/.github/workflows/release_prepare.yml @@ -0,0 +1,57 @@ +name: Release Prepare + +on: + workflow_dispatch: + inputs: + tag: + type: string + required: true + description: "Tag for the next release. Ex.: v5.0.0" + +env: + NEXT_RELEASE_TAG: ${{ github.event.inputs.tag }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + +jobs: + release-script-test: + uses: ./.github/workflows/release-script-tests.yml + + prepare-release-and-pull-request: + permissions: + contents: write + pull-requests: write + needs: release-script-test + runs-on: ubuntu-latest + if: ${{ needs.release-script-test.result == 'success' }} + env: + CXX: clang++-14 + steps: + - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + + - name: Prepare Python + uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + with: + cache: 'pip' # caching pip dependencies + + - name: Install dependencies + run: pip install -r ./tools/release/requirements.txt + + - name: Update source code versions + run: ./tools/release/update_versions.py + + - name: Ada Build + run: cmake -B build && cmake --build build + - name: Ada Test + run: ctest --output-on-failure --test-dir build + + - name: Create PR with code updates for new release + uses: peter-evans/create-pull-request@f3a21bf3404eae73a97f65817ab35f351a1a63fe #v5.0.0 + with: + commit-message: "chore: release ${{ env.NEXT_RELEASE_TAG }}" + branch: "release/${{ env.NEXT_RELEASE_TAG }}" + title: "chore: release ${{ env.NEXT_RELEASE_TAG }}" + token: ${{ env.GITHUB_TOKEN }} + body: | + This pull PR updates the source code version to ${{ env.NEXT_RELEASE_TAG }} + delete-branch: true + reviewers: "lemire,anonrig" diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml new file mode 100644 index 000000000..39433ad6e --- /dev/null +++ b/.github/workflows/scorecard.yml @@ -0,0 +1,70 @@ +# This workflow uses actions that are not certified by GitHub. They are provided +# by a third-party and are governed by separate terms of service, privacy +# policy, and support documentation. + +name: Scorecard supply-chain security +on: + # For Branch-Protection check. Only the default branch is supported. See + # https://github.com/ossf/scorecard/blob/main/docs/checks.md#branch-protection + branch_protection_rule: + # To guarantee Maintained check is occasionally updated. See + # https://github.com/ossf/scorecard/blob/main/docs/checks.md#maintained + schedule: + - cron: '0 0 * * 1' + +# Declare default permissions as read only. +permissions: read-all + +jobs: + analysis: + name: Scorecard analysis + runs-on: ubuntu-latest + permissions: + # Needed to upload the results to code-scanning dashboard. + security-events: write + # Needed to publish results and get a badge (see publish_results below). + id-token: write + # Uncomment the permissions below if installing in a private repository. + # contents: read + # actions: read + + steps: + - name: "Checkout code" + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + with: + persist-credentials: false + + - name: "Run analysis" + uses: ossf/scorecard-action@62b2cac7ed8198b15735ed49ab1e5cf35480ba46 # v2.4.0 + with: + results_file: results.sarif + results_format: sarif + # (Optional) "write" PAT token. Uncomment the `repo_token` line below if: + # - you want to enable the Branch-Protection check on a *public* repository, or + # - you are installing Scorecard on a *private* repository + # To create the PAT, follow the steps in https://github.com/ossf/scorecard-action#authentication-with-pat. + # repo_token: ${{ secrets.SCORECARD_TOKEN }} + + # Public repositories: + # - Publish results to OpenSSF REST API for easy access by consumers + # - Allows the repository to include the Scorecard badge. + # - See https://github.com/ossf/scorecard-action#publishing-results. + # For private repositories: + # - `publish_results` will always be set to `false`, regardless + # of the value entered here. + publish_results: true + + # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF + # format to the repository Actions tab. + - name: "Upload artifact" + uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + with: + name: SARIF file + path: results.sarif + retention-days: 5 + + # Upload the results to GitHub's code scanning dashboard. + - name: "Upload to code-scanning" + uses: github/codeql-action/upload-sarif@4dd16135b69a43b6c8efb853346f8437d92d3c93 # v3.26.6 + with: + sarif_file: results.sarif diff --git a/.github/workflows/ubuntu-release.yml b/.github/workflows/ubuntu-release.yml new file mode 100644 index 000000000..93ef62702 --- /dev/null +++ b/.github/workflows/ubuntu-release.yml @@ -0,0 +1,40 @@ +name: Ubuntu 22.04 (Release build) + +on: + pull_request: + types: [opened, synchronize, reopened, ready_for_review] + paths-ignore: + - '**.md' + - 'docs/**' + push: + branches: + - main + paths-ignore: + - '**.md' + - 'docs/**' + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + ubuntu-release-build: + runs-on: ubuntu-22.04 + strategy: + matrix: + cxx: [g++-12, clang++-14] + steps: + - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + - name: Setup Ninja + run: sudo apt-get install ninja-build + - name: Prepare + run: cmake -DBUILD_TESTING=OFF -DCMAKE_BUILD_TYPE=Release -G Ninja -B build + env: + CXX: ${{matrix.cxx}} + - name: Build + run: cmake --build build -j=2 + - name: Test + run: ctest --output-on-failure --test-dir build diff --git a/.github/workflows/ubuntu-s390x.yml b/.github/workflows/ubuntu-s390x.yml new file mode 100644 index 000000000..4404d36a9 --- /dev/null +++ b/.github/workflows/ubuntu-s390x.yml @@ -0,0 +1,43 @@ +name: Ubuntu s390x (GCC 11) + +on: + pull_request: + types: [opened, synchronize, reopened, ready_for_review] + paths-ignore: + - '**.md' + - 'docs/**' + push: + branches: + - main + paths-ignore: + - '**.md' + - 'docs/**' + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + - uses: uraimo/run-on-arch-action@b0ffb25eb00af00468375982384441f063da1741 # v2.7.2 + name: Test + id: runcmd + with: + arch: s390x + distro: ubuntu_latest + githubToken: ${{ github.token }} + install: | + apt-get update -q -y + apt-get install -y cmake make g++ git + apt-get install -y ninja-build + run: | + cmake -DCMAKE_BUILD_TYPE=Release -G Ninja -B build + rm -r -f dependencies + cmake --build build -j=2 + ctest --output-on-failure --test-dir build diff --git a/.github/workflows/ubuntu-sanitized.yml b/.github/workflows/ubuntu-sanitized.yml new file mode 100644 index 000000000..bd3be3f56 --- /dev/null +++ b/.github/workflows/ubuntu-sanitized.yml @@ -0,0 +1,40 @@ +name: Ubuntu 22.04 (GCC 12 SANITIZED) + +on: + pull_request: + types: [opened, synchronize, reopened, ready_for_review] + paths-ignore: + - '**.md' + - 'docs/**' + push: + branches: + - main + paths-ignore: + - '**.md' + - 'docs/**' + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + ubuntu-build: + runs-on: ubuntu-22.04 + strategy: + matrix: + shared: [ON, OFF] + steps: + - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + - name: Setup Ninja + run: sudo apt-get install ninja-build + - name: Prepare + run: cmake -DADA_SANITIZE=ON -DADA_DEVELOPMENT_CHECKS=ON -DBUILD_SHARED_LIBS=${{matrix.shared}} -G Ninja -B build + env: + CXX: g++-12 + - name: Build + run: cmake --build build -j=2 + - name: Test + run: ctest --output-on-failure --test-dir build diff --git a/.github/workflows/ubuntu-undef.yml b/.github/workflows/ubuntu-undef.yml new file mode 100644 index 000000000..4214d186a --- /dev/null +++ b/.github/workflows/ubuntu-undef.yml @@ -0,0 +1,40 @@ +name: Ubuntu 22.04 (GCC 12 SANITIZE UNDEFINED) + +on: + pull_request: + types: [opened, synchronize, reopened, ready_for_review] + paths-ignore: + - '**.md' + - 'docs/**' + push: + branches: + - main + paths-ignore: + - '**.md' + - 'docs/**' + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + ubuntu-build: + runs-on: ubuntu-22.04 + strategy: + matrix: + shared: [ON, OFF] + steps: + - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + - name: Setup Ninja + run: sudo apt-get install ninja-build + - name: Prepare + run: cmake -D ADA_SANITIZE_UNDEFINED=ON -DADA_DEVELOPMENT_CHECKS=ON -DBUILD_SHARED_LIBS=${{matrix.shared}} -G Ninja -B build + env: + CXX: g++-12 + - name: Build + run: cmake --build build -j=2 + - name: Test + run: ctest --output-on-failure --test-dir build diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml new file mode 100644 index 000000000..b340168a7 --- /dev/null +++ b/.github/workflows/ubuntu.yml @@ -0,0 +1,43 @@ +name: Ubuntu 22.04 + +on: + pull_request: + types: [opened, synchronize, reopened, ready_for_review] + paths-ignore: + - '**.md' + - 'docs/**' + push: + branches: + - main + paths-ignore: + - '**.md' + - 'docs/**' + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + ubuntu-build: + runs-on: ubuntu-22.04 + strategy: + matrix: + shared: [ON, OFF] + cxx: [g++-12, clang++-14] + steps: + - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + - name: Setup Ninja + run: sudo apt-get install ninja-build + - name: Prepare + run: cmake -D ADA_BENCHMARKS=ON -DBUILD_SHARED_LIBS=${{matrix.shared}} -G Ninja -B build + env: + CXX: ${{matrix.cxx}} + - name: Build + run: cmake --build build -j=2 + - name: Test + run: ctest --output-on-failure --test-dir build + - name: Run default benchmark + run: cd build && benchmarks/bench diff --git a/.github/workflows/ubuntu_install.yml b/.github/workflows/ubuntu_install.yml new file mode 100644 index 000000000..7bce37fb9 --- /dev/null +++ b/.github/workflows/ubuntu_install.yml @@ -0,0 +1,46 @@ +name: Ubuntu 22.04 (Installation) + +on: + pull_request: + types: [opened, synchronize, reopened, ready_for_review] + paths-ignore: + - '**.md' + - 'docs/**' + push: + branches: + - main + paths-ignore: + - '**.md' + - 'docs/**' + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + ubuntu-build: + runs-on: ubuntu-22.04 + strategy: + matrix: + include: + - {shared: ON} + - {shared: OFF} + steps: + - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + - name: Setup Ninja + run: sudo apt-get install ninja-build + - name: Prepare + run: cmake -G Ninja -DBUILD_SHARED_LIBS=${{matrix.shared}} -DCMAKE_INSTALL_PREFIX:PATH=destination -B build + - name: Build + run: cmake --build build -j=2 + - name: Install + run: cmake --install build + - name: Prepare test package + run: cmake -DCMAKE_INSTALL_PREFIX:PATH=../../destination -S tests/installation -B buildbabyada + - name: Build test package + run: cmake --build buildbabyada + - name: Run example + run: ./buildbabyada/main diff --git a/.github/workflows/ubuntu_old.yml b/.github/workflows/ubuntu_old.yml new file mode 100644 index 000000000..8447b29fd --- /dev/null +++ b/.github/workflows/ubuntu_old.yml @@ -0,0 +1,41 @@ +name: Ubuntu 20.04 + +on: + pull_request: + types: [opened, synchronize, reopened, ready_for_review] + paths-ignore: + - '**.md' + - 'docs/**' + push: + branches: + - main + paths-ignore: + - '**.md' + - 'docs/**' + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + ubuntu-build: + runs-on: ubuntu-20.04 + strategy: + matrix: + shared: [ON, OFF] + cxx: [g++-9, clang++-10] + steps: + - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + - name: Setup Ninja + run: sudo apt-get install ninja-build + - name: Prepare + run: cmake -DBUILD_SHARED_LIBS=${{matrix.shared}} -G Ninja -B build + env: + CXX: ${{matrix.cxx}} + - name: Build + run: cmake --build build -j=2 + - name: Test + run: ctest --output-on-failure --test-dir build diff --git a/.github/workflows/ubuntu_pedantic.yml b/.github/workflows/ubuntu_pedantic.yml new file mode 100644 index 000000000..56d72c253 --- /dev/null +++ b/.github/workflows/ubuntu_pedantic.yml @@ -0,0 +1,41 @@ +name: Ubuntu 22.04 (GCC 12) Fails On Compiler Warnings + +on: + pull_request: + types: [opened, synchronize, reopened, ready_for_review] + paths-ignore: + - '**.md' + - 'docs/**' + push: + branches: + - main + paths-ignore: + - '**.md' + - 'docs/**' + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + ubuntu-build: + runs-on: ubuntu-22.04 + strategy: + matrix: + shared: [ON, OFF] + steps: + - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + - name: Setup Ninja + run: sudo apt-get install ninja-build + - name: Prepare + run: cmake -DBUILD_SHARED_LIBS=${{matrix.shared}} -G Ninja -B build + env: + CXX: g++-12 + CXXFLAGS: -Werror + - name: Build + run: cmake --build build -j=2 + - name: Test + run: ctest --output-on-failure --test-dir build diff --git a/.github/workflows/visual_studio.yml b/.github/workflows/visual_studio.yml new file mode 100644 index 000000000..71171ec6c --- /dev/null +++ b/.github/workflows/visual_studio.yml @@ -0,0 +1,45 @@ +name: VS17-CI + +on: + pull_request: + types: [opened, synchronize, reopened, ready_for_review] + paths-ignore: + - '**.md' + - 'docs/**' + push: + branches: + - main + paths-ignore: + - '**.md' + - 'docs/**' + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + ci: + name: windows-vs17 + runs-on: windows-latest + strategy: + fail-fast: false + matrix: + include: + - {gen: Visual Studio 17 2022, arch: x64, devchecks: OFF, shared: OFF, config: Release} + - {gen: Visual Studio 17 2022, arch: x64, devchecks: ON, shared: OFF, config: Debug} + - {gen: Visual Studio 17 2022, arch: x64, devchecks: ON, shared: ON, config: Debug} + - {gen: Visual Studio 17 2022, arch: Win32, devchecks: ON, shared: OFF, config: Debug} + - {gen: Visual Studio 17 2022, arch: Win32, devchecks: ON, shared: ON, config: Debug} + steps: + - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + - name: Configure + run: | + cmake -DADA_DEVELOPMENT_CHECKS="${{matrix.devchecks}}" -G "${{matrix.gen}}" -A ${{matrix.arch}} -DBUILD_SHARED_LIBS=${{matrix.shared}} -B build + - name: Build + run: cmake --build build --config "${{matrix.config}}" --verbose + - name: Run tests + working-directory: build + run: ctest -C "${{matrix.config}}" --output-on-failure diff --git a/.github/workflows/visual_studio_clang.yml b/.github/workflows/visual_studio_clang.yml new file mode 100644 index 000000000..349bc8c33 --- /dev/null +++ b/.github/workflows/visual_studio_clang.yml @@ -0,0 +1,46 @@ +name: VS17-clang-CI + +on: + pull_request: + types: [opened, synchronize, reopened, ready_for_review] + paths-ignore: + - '**.md' + - 'docs/**' + push: + branches: + - main + paths-ignore: + - '**.md' + - 'docs/**' + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + ci: + name: windows-vs17 + runs-on: windows-latest + strategy: + fail-fast: false + matrix: + include: + - {gen: Visual Studio 17 2022, arch: x64, devchecks: ON} + steps: + - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + - name: Configure + run: | + cmake -DADA_DEVELOPMENT_CHECKS="${{matrix.devchecks}}" -G "${{matrix.gen}}" -A ${{matrix.arch}} -T ClangCL -B build + - name: Build Debug + run: cmake --build build --config Debug --verbose + - name: Run Debug tests + working-directory: build + run: ctest -C Debug --output-on-failure + - name: Build Release + run: cmake --build build --config Release --verbose + - name: Run Release tests + working-directory: build + run: ctest -C Release --output-on-failure diff --git a/.github/workflows/wpt-updater.yml b/.github/workflows/wpt-updater.yml new file mode 100644 index 000000000..fdca9ab0a --- /dev/null +++ b/.github/workflows/wpt-updater.yml @@ -0,0 +1,40 @@ +name: Update WPT + +on: + schedule: + - cron: '0 0 * * *' + +env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + +concurrency: + group: wpt-updater + cancel-in-progress: true + +permissions: + contents: read + +jobs: + issue: + runs-on: ubuntu-latest + permissions: + contents: write + pull-requests: write + steps: + - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + - name: Fetch tests + run: tools/update-wpt.sh + - name: Open pull request + uses: peter-evans/create-pull-request@6d6857d36972b65feb161a90e484f2984215f83e #v6.0.5 + with: + token: ${{ secrets.GH_PAT }} + commit-message: "test: update web platform tests" + branch: "automatic-update-wpt" + title: "Update web platform tests" + body: | + This is an automated pull request for updating the WPT. + - [Web Platform Tests](https://github.com/web-platform-tests/wpt/tree/master/url) + - [Commit History](https://github.com/web-platform-tests/wpt/commits/master/url/resources) + cc @anonrig @lemire + team-reviewers: core + delete-branch: true diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..bb1f23e33 --- /dev/null +++ b/.gitignore @@ -0,0 +1,27 @@ +# common build directory +build +*-build-* + +# Python cache +__pycache__ +venv + +cmake-build-debug + +.cache +docs/html +docs/theme + +# Generated using only the Github workflow +benchmark_result.json + +singleheader/ada.h +singleheader/ada_c.h +singleheader/ada.cpp +singleheader/singleheader.zip + +benchmarks/competitors/servo-url/debug +benchmarks/competitors/servo-url/target + +#ignore VScode +.vscode/ \ No newline at end of file diff --git a/.python-version b/.python-version new file mode 100644 index 000000000..e4fba2183 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.12 diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 000000000..a7ce37965 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,182 @@ +cmake_minimum_required(VERSION 3.16) + +project(ada + DESCRIPTION "Fast spec-compliant URL parser" + LANGUAGES C CXX + VERSION 2.9.2 +) +set(CMAKE_CXX_STANDARD 17) +set(ADA_LIB_VERSION "2.9.2" CACHE STRING "ada library version") +set(ADA_LIB_SOVERSION "2" CACHE STRING "ada library soversion") + +include(GNUInstallDirs) + +include(CTest) +include(cmake/ada-flags.cmake) + +set(ADA_SOURCE_DIR src) + +add_subdirectory(src) + +set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/scripts/cmake) + +option(ADA_BENCHMARKS "Build benchmarks" OFF) +option(ADA_TESTING "Build tests" ${BUILD_TESTING}) + +# There are cases where when embedding ada as a dependency for other CMake +# projects as submodules or subdirectories (via FetchContent) can lead to +# errors due to CPM, so this is here to support disabling all the testing +# and tooling for ada if one only wishes to use the ada library. +if(ADA_TESTING OR ADA_BENCHMARKS OR ADA_TOOLS) + include(cmake/CPM.cmake) + # CPM requires git as an implicit dependency + find_package(Git QUIET) + # We use googletest in the tests + if(Git_FOUND AND ADA_TESTING) + CPMAddPackage( + NAME GTest + GITHUB_REPOSITORY google/googletest + VERSION 1.14.0 + OPTIONS "BUILD_GMOCK OFF" "INSTALL_GTEST OFF" + ) + endif() + # We use simdjson in both the benchmarks and tests + if(Git_FOUND AND (ADA_TESTING OR ADA_BENCHMARKS)) + CPMAddPackage("gh:simdjson/simdjson@3.9.1") + endif() + # We use Google Benchmark, but it does not build under several 32-bit systems. + if(Git_FOUND AND ADA_BENCHMARKS AND (CMAKE_SIZEOF_VOID_P EQUAL 8)) + CPMAddPackage( + NAME benchmark + GITHUB_REPOSITORY google/benchmark + GIT_TAG f91b6b4 + OPTIONS "BENCHMARK_ENABLE_TESTING OFF" + "BENCHMARK_ENABLE_INSTALL OFF" + "BENCHMARK_ENABLE_WERROR OFF" + + ) + endif() + + if (ADA_TESTING AND NOT EMSCRIPTEN) + if(Git_FOUND) + set(CTEST_TEST_TIMEOUT 5) + message(STATUS "The tests are enabled.") + add_subdirectory(tests) + else() + message(STATUS "The tests are disabled because git was not found.") + endif() + else() + if(is_top_project) + message(STATUS "The tests are disabled.") + endif() + endif(ADA_TESTING AND NOT EMSCRIPTEN) + + If(ADA_BENCHMARKS AND NOT EMSCRIPTEN) + if(Git_FOUND) + message(STATUS "Ada benchmarks enabled.") + add_subdirectory(benchmarks) + else() + message(STATUS "The benchmarks are disabled because git was not found.") + endif() + else(ADA_BENCHMARKS AND NOT EMSCRIPTEN) + if(is_top_project) + message(STATUS "Ada benchmarks disabled. Set ADA_BENCHMARKS=ON to enable them.") + endif() + endif(ADA_BENCHMARKS AND NOT EMSCRIPTEN) + + if (ADA_TESTING AND EMSCRIPTEN) + add_subdirectory(tests/wasm) + endif(ADA_TESTING AND EMSCRIPTEN) +endif() + + +add_library(ada::ada ALIAS ada) + +set_target_properties( + ada PROPERTIES + VERSION "${ADA_LIB_VERSION}" + SOVERSION "${ADA_LIB_SOVERSION}" + WINDOWS_EXPORT_ALL_SYMBOLS YES +) + +include(CMakePackageConfigHelpers) +include(GNUInstallDirs) + +if(NOT ADA_COVERAGE AND NOT EMSCRIPTEN) + add_subdirectory(singleheader) +endif() + +if(ADA_TOOLS) + if(Git_FOUND) + add_subdirectory(tools) + else() + message(STATUS "The tools are disabled because git was not found.") + endif() +endif() + +install( + FILES include/ada.h include/ada_c.h + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" + COMPONENT ada_development +) + +install( + DIRECTORY include/ada + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" + COMPONENT ada_development +) + +install( + TARGETS ada + EXPORT ada_targets + RUNTIME COMPONENT ada_runtime + LIBRARY COMPONENT ada_runtime + NAMELINK_COMPONENT ada_development + ARCHIVE COMPONENT ada_development + INCLUDES DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" +) + +configure_file(cmake/ada-config.cmake.in ada-config.cmake @ONLY) + +write_basic_package_version_file( + ada-config-version.cmake + COMPATIBILITY SameMinorVersion +) + +set( + ADA_INSTALL_CMAKEDIR "${CMAKE_INSTALL_LIBDIR}/cmake/ada" + CACHE STRING "CMake package config location relative to the install prefix" +) +mark_as_advanced(ADA_INSTALL_CMAKEDIR) + +install( + FILES + "${PROJECT_BINARY_DIR}/ada-config.cmake" + "${PROJECT_BINARY_DIR}/ada-config-version.cmake" + DESTINATION "${ADA_INSTALL_CMAKEDIR}" + COMPONENT ada_development +) + +install( + EXPORT ada_targets + NAMESPACE ada:: + DESTINATION "${ADA_INSTALL_CMAKEDIR}" + COMPONENT ada_development +) + +install( + EXPORT ada_targets + NAMESPACE ada:: + DESTINATION "${ADA_INSTALL_CMAKEDIR}" + COMPONENT example_development +) + +if(is_top_project) + set(CPACK_PACKAGE_VENDOR "Ada Authors") + set(CPACK_PACKAGE_CONTACT "yagiz@nizipli.com") + set(CPACK_RESOURCE_FILE_LICENSE "${PROJECT_SOURCE_DIR}/LICENSE-MIT") + set(CPACK_RPM_PACKAGE_LICENSE "${PROJECT_SOURCE_DIR}/LICENSE-MIT") + set(CPACK_RESOURCE_FILE_README "${PROJECT_SOURCE_DIR}/README.md") + set(CPACK_SOURCE_GENERATOR "TGZ;ZIP") + include(CPack) +endif() diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000..9882798ed --- /dev/null +++ b/Dockerfile @@ -0,0 +1,12 @@ +FROM debian:12-slim@sha256:2ccc7e39b0a6f504d252f807da1fc4b5bcd838e83e4dec3e2f57b2a4a64e7214 + +RUN apt-get update && apt-get install -y \ + apt-transport-https \ + gcc \ + clang \ + clang-tools \ + cmake + +WORKDIR /repo + +CMD ["bash", "-c", "cmake -B build && cmake --build build && cd build && ctest --output-on-failure"] diff --git a/LICENSE-APACHE b/LICENSE-APACHE new file mode 100644 index 000000000..1204b0aab --- /dev/null +++ b/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2023 Yagiz Nizipli and Daniel Lemire + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/LICENSE-MIT b/LICENSE-MIT new file mode 100644 index 000000000..bd2abacfc --- /dev/null +++ b/LICENSE-MIT @@ -0,0 +1,18 @@ +Copyright 2023 Yagiz Nizipli and Daniel Lemire + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 000000000..3cd30dd84 --- /dev/null +++ b/README.md @@ -0,0 +1,329 @@ +# Ada +[![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/ada-url/ada/badge)](https://securityscorecards.dev/viewer/?uri=github.com/ada-url/ada) +[![OpenSSF Best Practices](https://bestpractices.coreinfrastructure.org/projects/7085/badge)](https://bestpractices.coreinfrastructure.org/projects/7085) +[![Ubuntu 22.04](https://github.com/ada-url/ada/actions/workflows/ubuntu.yml/badge.svg)](https://github.com/ada-url/ada/actions/workflows/ubuntu.yml) +[![VS17-CI](https://github.com/ada-url/ada/actions/workflows/visual_studio.yml/badge.svg)](https://github.com/ada-url/ada/actions/workflows/visual_studio.yml) +[![VS17-clang-CI](https://github.com/ada-url/ada/actions/workflows/visual_studio_clang.yml/badge.svg)](https://github.com/ada-url/ada/actions/workflows/visual_studio_clang.yml) +[![Ubuntu s390x (GCC 11)](https://github.com/ada-url/ada/actions/workflows/ubuntu-s390x.yml/badge.svg)](https://github.com/ada-url/ada/actions/workflows/ubuntu-s390x.yml) + +Ada is a fast and spec-compliant URL parser written in C++. +Specification for URL parser can be found from the +[WHATWG](https://url.spec.whatwg.org/#url-parsing) website. + +The Ada library passes the full range of tests from the specification, +across a wide range of platforms (e.g., Windows, Linux, macOS). It fully +supports the relevant [Unicode Technical Standard](https://www.unicode.org/reports/tr46/#ToUnicode). + +A common use of a URL parser is to take a URL string and normalize it. +The WHATWG URL specification has been adopted by most browsers. Other tools, such as curl and many +standard libraries, follow the RFC 3986. The following table illustrates possible differences in practice +(encoding of the host, encoding of the path): + +| string source | string value | +|:--------------|:--------------| +| input string | https://www.7‑Eleven.com/Home/Privacy/Montréal | +| ada's normalized string | https://www.xn--7eleven-506c.com/Home/Privacy/Montr%C3%A9al | +| curl 7.87 | (returns the original unchanged) | + +### Requirements + +The project is otherwise self-contained and it has no dependency. +A recent C++ compiler supporting C++17. We test GCC 9 or better, LLVM 10 or better and Microsoft Visual Studio 2022. + +## Ada is fast. + +On a benchmark where we need to validate and normalize [thousands URLs found +on popular websites](https://github.com/ada-url/url-various-datasets/tree/main/top100), +we find that ada can be several times faster than popular competitors (system: Apple MacBook 2022 +with LLVM 14). + + +``` + ada ▏ 188 ns/URL ███▏ +servo url ▏ 664 ns/URL ███████████▎ + CURL ▏ 1471 ns/URL █████████████████████████ +``` + +Ada has improved the performance of the popular JavaScript environment Node.js: + +> Since Node.js 18, a new URL parser dependency was added to Node.js — Ada. This addition bumped the Node.js performance when parsing URLs to a new level. Some results could reach up to an improvement of **400%**. ([State of Node.js Performance 2023](https://blog.rafaelgss.dev/state-of-nodejs-performance-2023)) + +The Ada library is used by important systems besides Node.js such as Redpanda and Cloudflare Workers. + + + +[![the ada library](http://img.youtube.com/vi/tQ-6OWRDsZg/0.jpg)](https://www.youtube.com/watch?v=tQ-6OWRDsZg)
+ +## Quick Start + + + +Linux or macOS users might follow the following instructions if they have a recent C++ compiler installed and a standard utility (`wget`) + + +1. Pull the library in a directory + ``` + wget https://github.com/ada-url/ada/releases/download/v2.6.10/ada.cpp + wget https://github.com/ada-url/ada/releases/download/v2.6.10/ada.h + ``` +2. Create a new file named `demo.cpp` with this content: + ```C++ + #include "ada.cpp" + #include "ada.h" + #include + + int main(int, char *[]) { + auto url = ada::parse("https://www.google.com"); + if (!url) { + std::cout << "failure" << std::endl; + return EXIT_FAILURE; + } + url->set_protocol("http"); + std::cout << url->get_protocol() << std::endl; + std::cout << url->get_host() << std::endl; + return EXIT_SUCCESS; + } + ``` +2. Compile + ``` + c++ -std=c++17 -o demo demo.cpp + ``` +3. `./demo` + + ``` + http: + www.google.com + ``` + +## Bindings of Ada + +We provide clients for different programming languages through our C API. + +- [Rust](https://github.com/ada-url/rust): Rust bindings for Ada +- [Go](https://github.com/ada-url/goada): Go bindings for Ada +- [Python](https://github.com/ada-url/python): Python bindings for Ada +- [R](https://github.com/schochastics/adaR): R wrapper for Ada + +## Usage + +Ada supports two types of URL instances, `ada::url` and `ada::url_aggregator`. The usage is +the same in either case: we have an parsing function template `ada::parse` which can return +either a result of type `ada::result` or of type `ada::result` +depending on your needs. The `ada::url_aggregator` class is smaller and it is backed by a precomputed +serialized URL string. The `ada::url` class is made of several separate strings for the various +components (path, host, and so forth). + +### Parsing & Validation + +- Parse and validate a URL from an ASCII or a valid UTF-8 string. + +```cpp +ada::result url = ada::parse("https://www.google.com"); +if (url) { /* URL is valid */ } +``` + +After calling 'parse', you *must* check that the result is valid before +accessing it when you are not sure that it will succeed. The following +code is unsafe: + +```cpp +ada::result url = ada::parse("some bad url"); +url->get_href(); +``` + +You should do... + +```cpp +ada::result url = ada::parse("some bad url"); +if(url) { + // next line is now safe: + url->get_href(); +} else { + // report a parsing failure +} +``` + +For simplicity, in the examples below, we skip the check because +we know that parsing succeeds. All strings are assumed to be valid +UTF-8 strings. + +### Examples + +- Get/Update credentials + +```cpp +ada::result url = ada::parse("https://www.google.com"); +url->set_username("username"); +url->set_password("password"); +// ada->get_href() will return "https://username:password@www.google.com/" +``` + +- Get/Update Protocol + +```cpp +ada::result url = ada::parse("https://www.google.com"); +url->set_protocol("wss"); +// url->get_protocol() will return "wss:" +// url->get_href() will return "wss://www.google.com/" +``` + +- Get/Update host + +```cpp +ada::result url = ada::parse("https://www.google.com"); +url->set_host("github.com"); +// url->get_host() will return "github.com" +// you can use `url.set_hostname` depending on your usage. +``` + +- Get/Update port + +```cpp +ada::result url = ada::parse("https://www.google.com"); +url->set_port("8080"); +// url->get_port() will return "8080" +``` + +- Get/Update pathname + +```cpp +ada::result url = ada::parse("https://www.google.com"); +url->set_pathname("/my-super-long-path") +// url->get_pathname() will return "/my-super-long-path" +``` + +- Get/Update search/query + +```cpp +ada::result url = ada::parse("https://www.google.com"); +url->set_search("target=self"); +// url->get_search() will return "?target=self" +``` + +- Get/Update hash/fragment + +```cpp +ada::result url = ada::parse("https://www.google.com"); +url->set_hash("is-this-the-real-life"); +// url->get_hash() will return "#is-this-the-real-life" +``` +For more information about command-line options, please refer to the [CLI documentation](docs/cli.md). + +- URL search params + +```cpp +ada::url_search_params search_params("a=b&c=d&e=f"); +search_params.append("g=h"); + +search_params.get("g"); // will return "h" + +auto keys = search_params.get_keys(); +while (keys.has_next()) { + auto key = keys.next(); // "a", "c", "e", "g" +} +``` + +### C wrapper + +See the file `include/ada_c.h` for our C interface. We expect ASCII or UTF-8 strings. + +```C +#include "ada_c.h" +#include +#include +#include +#include + +static void ada_print(ada_string string) { + printf("%.*s\n", (int)string.length, string.data); +} + +int main(int c, char *arg[] ) { + const char* input = + "https://username:password@www.google.com:8080/" + "pathname?query=true#hash-exists"; + ada_url url = ada_parse(input, strlen(input)); + if(!ada_is_valid(url)) { puts("failure"); return EXIT_FAILURE; } + ada_print(ada_get_href(url)); // prints https://username:password@host:8080/pathname?query=true#hash-exists + ada_print(ada_get_protocol(url)); // prints https: + ada_print(ada_get_username(url)); // prints username + ada_set_href(url, "https://www.yagiz.co", strlen("https://www.yagiz.co")); + if(!ada_is_valid(url)) { puts("failure"); return EXIT_FAILURE; } + ada_set_hash(url, "new-hash", strlen("new-hash")); + ada_set_hostname(url, "new-host", strlen("new-host")); + ada_set_host(url, "changed-host:9090", strlen("changed-host:9090")); + ada_set_pathname(url, "new-pathname", strlen("new-pathname")); + ada_set_search(url, "new-search", strlen("new-search")); + ada_set_protocol(url, "wss", 3); + ada_print(ada_get_href(url)); // will print wss://changed-host:9090/new-pathname?new-search#new-hash + + // Manipulating search params + ada_string search = ada_get_search(url); + ada_url_search_params search_params = + ada_parse_search_params(search.data, search.length); + ada_search_params_append(search_params, "a", 1, "b", 1); + ada_owned_string result = ada_search_params_to_string(search_params); + ada_set_search(url, result.data, result.length); + ada_free_owned_string(result); + ada_free_search_params(search_params); + + ada_free(url); + return EXIT_SUCCESS; +} +``` + +When linking against the ada library from C++, be minding that ada requires access to the standard +C++ library. E.g., you may link with the C++ compiler. + +E.g., if you grab our single-header C++ files (`ada.cpp` and `ada.h`), as well as the C header (`ada_c.h`), +you can often compile a C program (`demo.c`) as follows under Linux/macOS systems: + +``` +c++ -c ada.cpp -std=c++17 +cc -c demo.c +c++ demo.o ada.o -o cdemo +./cdemo +``` + +### CMake dependency + +See the file `tests/installation/CMakeLists.txt` for an example of how you might use ada from your own +CMake project, after having installed ada on your system. + +## Installation + +### Homebrew + +Ada is available through [Homebrew](https://formulae.brew.sh/formula/ada-url#default). +You can install Ada using `brew install ada-url`. + +## Contributing + +### Building + +Ada uses cmake as a build system. It's recommended you to run the following commands to build it locally. + +- **Build**: `cmake -B build && cmake --build build` +- **Test**: `ctest --output-on-failure --test-dir build` + +Windows users need additional flags to specify the build configuration, e.g. `--config Release`. + +The project can also be built via docker using default docker file of repository with following commands. + +`docker build -t ada-builder . && docker run --rm -it -v ${PWD}:/repo ada-builder` + +### Amalgamation + +You may amalgamate all source files into only two files (`ada.h` and `ada.cpp`) by typing executing the Python +3 script `singleheader/amalgamate.py`. By default, the files are created in the `singleheader` directory. + +### License + +This code is made available under the Apache License 2.0 as well as the MIT license. + +Our tests include third-party code and data. The benchmarking code includes third-party code: it is provided for research purposes only and not part of the library. + +### Further reading + + +* Yagiz Nizipli, Daniel Lemire, [Parsing Millions of URLs per Second](https://doi.org/10.1002/spe.3296), Software: Practice and Experience 54(5) May 2024. diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 000000000..255f50955 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,8 @@ +# Security Policy + +## Reporting a Vulnerability + +Please use the following contact information for reporting a vulnerability: + +- [Daniel Lemire](https://github.com/lemire) - daniel@lemire.me +- [Yagiz Nizipli](https://github.com/anonrig) - yagiz@nizipli.com diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt new file mode 100644 index 000000000..a20c2b82c --- /dev/null +++ b/benchmarks/CMakeLists.txt @@ -0,0 +1,304 @@ +# bench_search_params +add_executable(bench_search_params bench_search_params.cpp) +target_link_libraries(bench_search_params PRIVATE ada) + +# Bench +add_executable(wpt_bench wpt_bench.cpp) +target_link_libraries(wpt_bench PRIVATE ada) +target_link_libraries(wpt_bench PRIVATE simdjson) +target_include_directories(wpt_bench PUBLIC "$") +target_include_directories(wpt_bench PUBLIC "$") + +# Bench +add_executable(bench bench.cpp) +target_link_libraries(bench PRIVATE ada) +target_include_directories(bench PUBLIC "$") +target_include_directories(bench PUBLIC "$") + +# Benchdata +CPMAddPackage("gh:ada-url/url-dataset#9749b92c13e970e70409948fa862461191504ccc") +add_executable(benchdata bench.cpp) +target_link_libraries(benchdata PRIVATE ada) +target_include_directories(benchdata PUBLIC "$") +target_include_directories(benchdata PUBLIC "$") +target_compile_definitions(benchdata PRIVATE ADA_URL_FILE="${url-dataset_SOURCE_DIR}/out.txt") + + +# BBC Bench +add_executable(bbc_bench bbc_bench.cpp) +target_link_libraries(bbc_bench PRIVATE ada) +target_include_directories(bbc_bench PUBLIC "$") +target_include_directories(bbc_bench PUBLIC "$") + +# Percent Encode +add_executable(percent_encode percent_encode.cpp) +target_link_libraries(percent_encode PRIVATE ada) +target_include_directories(percent_encode PUBLIC "$") +target_include_directories(percent_encode PUBLIC "$") +if(MSVC AND BUILD_SHARED_LIBS) + # Copy the ada dll into the directory + add_custom_command(TARGET percent_encode POST_BUILD # Adds a post-build event + COMMAND ${CMAKE_COMMAND} -E copy_if_different # which executes "cmake -E copy_if_different..." + "$" # <--this is in-file + "$") # <--this is out-file path +endif() + +if(CMAKE_SYSTEM_NAME MATCHES "Linux") + # The model_bench program requires accurate/low-overhead performance counters. + # We only have such support under Linux. + add_executable(model_bench model_bench.cpp) + target_link_libraries(model_bench PRIVATE ada) + target_compile_definitions(model_bench PRIVATE ADA_URL_FILE="${url-dataset_SOURCE_DIR}/out.txt") +endif() + +target_link_libraries(wpt_bench PRIVATE benchmark::benchmark) +target_link_libraries(bench PRIVATE benchmark::benchmark) +target_link_libraries(benchdata PRIVATE benchmark::benchmark) +target_link_libraries(bbc_bench PRIVATE benchmark::benchmark) +target_link_libraries(percent_encode PRIVATE benchmark::benchmark) +target_link_libraries(bench_search_params PRIVATE benchmark::benchmark) + +option(ADA_COMPETITION "Whether to install various competitors." OFF) + +# We only build url_whatwg if ICU is found, so we need to make +# finding ICU easy. + +if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin") + message(STATUS "Apple system detected.") + # People who run macOS often use brew. + if(EXISTS /opt/homebrew/opt/icu4c) + message(STATUS "icu is provided by homebrew at /opt/homebrew/opt/icu4c.") + ## This is a bit awkward, but it is a lot better than asking the + ## user to figure that out. + list(APPEND CMAKE_PREFIX_PATH "/opt/homebrew/opt/icu4c/include") + list(APPEND CMAKE_LIBRARY_PATH "/opt/homebrew/opt/icu4c/lib") + elseif(EXISTS /usr/local/opt/icu4c) + message(STATUS "icu is provided by homebrew at /usr/local/opt/icu4c.") + list(APPEND CMAKE_PREFIX_PATH "/usr/local/opt/icu4c/include") + list(APPEND CMAKE_LIBRARY_PATH "/usr/local/opt/icu4c/lib") + endif() +endif() + +find_package(ICU COMPONENTS uc i18n) +### If the user does not have ICU, let us help them with instructions: +if(NOT ICU_FOUND) + if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin") + if(EXISTS /opt/homebrew) + message(STATUS "Under macOS, you may install ICU with brew, using 'brew install icu4c'.") + else() + message(STATUS "Under macOS, you should install brew (see https://brew.sh) and then icu4c ('brew install icu4c').") + endif() + elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux") + message(STATUS "Under Linux, you may be able to install ICU with a command such as 'apt-get install libicu-dev'." ) + endif() +endif(NOT ICU_FOUND) + +if(ICU_FOUND) + CPMAddPackage( + NAME url_whatwg + GITHUB_REPOSITORY rmisev/url_whatwg + GIT_TAG 72bcabf + OPTIONS "URL_BUILD_TESTS OFF" "URL_USE_LIBS OFF" + ) + add_library(url_whatwg_lib STATIC "${url_whatwg_SOURCE_DIR}/src/url.cpp" + "${url_whatwg_SOURCE_DIR}/src/url_idna.cpp" + "${url_whatwg_SOURCE_DIR}/src/url_ip.cpp" + "${url_whatwg_SOURCE_DIR}/src/url_percent_encode.cpp" + "${url_whatwg_SOURCE_DIR}/src/url_search_params.cpp" + "${url_whatwg_SOURCE_DIR}/src/url_utf.cpp" + "${url_whatwg_SOURCE_DIR}/src/url.cpp") + target_include_directories(url_whatwg_lib PUBLIC "${url_whatwg_SOURCE_DIR}/include") + target_link_libraries(url_whatwg_lib PRIVATE ICU::uc ICU::i18n) + + + target_link_libraries(bench PRIVATE url_whatwg_lib) + target_link_libraries(benchdata PRIVATE url_whatwg_lib) + target_link_libraries(bbc_bench PRIVATE url_whatwg_lib) + target_link_libraries(wpt_bench PRIVATE url_whatwg_lib) + + target_include_directories(bench PUBLIC "${url_whatwg_SOURCE_DIR}") + target_include_directories(benchdata PUBLIC "${url_whatwg_SOURCE_DIR}") + target_include_directories(bbc_bench PUBLIC "${url_whatwg_SOURCE_DIR}") + target_include_directories(wpt_bench PUBLIC "${url_whatwg_SOURCE_DIR}") + + target_compile_definitions(bench PRIVATE ADA_url_whatwg_ENABLED=1) + target_compile_definitions(benchdata PRIVATE ADA_url_whatwg_ENABLED=1) + target_compile_definitions(bbc_bench PRIVATE ADA_url_whatwg_ENABLED=1) + target_compile_definitions(wpt_bench PRIVATE ADA_url_whatwg_ENABLED=1) + +endif(ICU_FOUND) + +if(ADA_COMPETITION) + # URI Parser + CPMAddPackage( + NAME uriparser + GITHUB_REPOSITORY uriparser/uriparser + GIT_TAG 634b678 + OPTIONS "URIPARSER_BUILD_TESTS OFF" "URIPARSER_BUILD_DOCS OFF" + ) + target_link_libraries(bench PRIVATE uriparser) + target_link_libraries(bbc_bench PRIVATE uriparser) + # URL Parser + CPMAddPackage( + NAME urlparser + GITHUB_REPOSITORY netmindms/urlparser + GIT_TAG 69c09ed + ) + add_library(urlparser STATIC "${urlparser_SOURCE_DIR}/src/EdUrlParser.cpp") + target_include_directories(urlparser PUBLIC "${urlparser_SOURCE_DIR}/src") + target_link_libraries(bench PRIVATE urlparser) + target_link_libraries(bbc_bench PRIVATE urlparser) + + # HTTP Parser + CPMAddPackage( + NAME httpparser + GITHUB_REPOSITORY nodejs/http-parser + VERSION 2.9.4 + ) + add_library(httpparser STATIC "${httpparser_SOURCE_DIR}/http_parser.c") + set_source_files_properties("${httpparser_SOURCE_DIR}/http_parser.c" PROPERTIES LANGUAGE C) + target_include_directories(httpparser PUBLIC "${httpparser_SOURCE_DIR}") + target_link_libraries(bench PRIVATE httpparser) + target_link_libraries(bbc_bench PRIVATE httpparser) + + + target_compile_definitions(bench PRIVATE ADA_VARIOUS_COMPETITION_ENABLED=1) + target_compile_definitions(bbc_bench PRIVATE ADA_VARIOUS_COMPETITION_ENABLED=1) +endif(ADA_COMPETITION) + +# CURL +find_package(CURL) +if(CURL_FOUND) + message(STATUS "curl version " ${CURL_VERSION_STRING}) + if (CURL_VERSION_STRING VERSION_LESS "7.62.0") + message(STATUS "curl is too old, we need version 7.62.0 or better") + else() + include_directories(${CURL_INCLUDE_DIRS}) + if(NOT CURL_LIBRARIES) + target_link_libraries(bench PRIVATE CURL::libcurl) + target_link_libraries(benchdata PRIVATE CURL::libcurl) + target_link_libraries(bbc_bench PRIVATE CURL::libcurl) + else() + target_link_libraries(bench PRIVATE ${CURL_LIBRARIES}) + target_link_libraries(benchdata PRIVATE ${CURL_LIBRARIES}) + target_link_libraries(bbc_bench PRIVATE ${CURL_LIBRARIES}) + endif() + target_compile_definitions(bench PRIVATE ADA_CURL_ENABLED=1) + target_compile_definitions(benchdata PRIVATE ADA_CURL_ENABLED=1) + target_compile_definitions(bbc_bench PRIVATE ADA_CURL_ENABLED=1) + endif() +else(CURL_FOUND) + message(STATUS "Curl not found! Please install the curl library.") +endif(CURL_FOUND) + +option(ADA_BOOST_URL "Whether to install boost URL." OFF) + +message(STATUS "Compiler is " ${CMAKE_CXX_COMPILER_ID}) + +if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") +message(STATUS "Compiler version " ${CMAKE_CXX_COMPILER_VERSION}) + +if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9) +message(STATUS "Compiler is too old, disabling boost url.") +SET(ADA_BOOST_URL OFF CACHE BOOL "Whether to install boost URL." FORCE) +endif() +endif() + +# Boost +if(ADA_BOOST_URL) +find_package( + Boost 1.80 + COMPONENTS system +) +endif(ADA_BOOST_URL) + +if(Boost_FOUND) + CPMAddPackage( + NAME boost_url + GITHUB_REPOSITORY boostorg/url + GIT_TAG boost-1.81.0 + ) + add_library(boost_url INTERFACE) + target_include_directories(boost_url INTERFACE + "${boost_url_SOURCE_DIR}/include") + + target_link_libraries(bench PRIVATE Boost::system) + target_link_libraries(bench PRIVATE boost_url) + target_compile_definitions(bench PRIVATE ADA_BOOST_ENABLED=1) + + target_link_libraries(benchdata PRIVATE Boost::system) + target_link_libraries(benchdata PRIVATE boost_url) + target_compile_definitions(benchdata PRIVATE ADA_BOOST_ENABLED=1) + + target_link_libraries(bbc_bench PRIVATE Boost::system) + target_link_libraries(bbc_bench PRIVATE boost_url) + target_compile_definitions(bbc_bench PRIVATE ADA_BOOST_ENABLED=1) +else(Boost_FOUND) +if(ADA_BOOST_URL) + message(STATUS "Boost 1.80 or better was not found, please install it for benchmarking purposes.") +endif(ADA_BOOST_URL) +endif(Boost_FOUND) + +# Zuri +find_package(ZURI QUIET) +if(ZURI_FOUND) + message(STATUS "Zuri found") + target_link_libraries(bench PRIVATE zuri) + target_link_libraries(benchdata PRIVATE zuri) + target_link_libraries(bbc_bench PRIVATE zuri) + target_compile_definitions(bench PRIVATE ADA_ZURI_ENABLED=1) + target_compile_definitions(benchdata PRIVATE ADA_ZURI_ENABLED=1) + target_compile_definitions(bbc_bench PRIVATE ADA_ZURI_ENABLED=1) +else(ZURI_FOUND) + message(STATUS "Zuri not found! Please install to include in benchmark.") +endif(ZURI_FOUND) + +if(NOT WIN32) +# We want the check whether Rust is available before trying to build a crate. +CPMAddPackage( + NAME corrosion + GITHUB_REPOSITORY corrosion-rs/corrosion + VERSION 0.4.4 + DOWNLOAD_ONLY ON + OPTIONS "Rust_FIND_QUIETLY OFF" +) +include("${corrosion_SOURCE_DIR}/cmake/FindRust.cmake") +endif() + +if(RUST_FOUND) + message(STATUS "Rust found: " ${Rust_VERSION} ) + add_subdirectory("${corrosion_SOURCE_DIR}" "${PROJECT_BINARY_DIR}/_deps/corrosion" EXCLUDE_FROM_ALL) + # Important: we want to build in release mode! + corrosion_import_crate(MANIFEST_PATH "competitors/servo-url/Cargo.toml" NO_LINKER_OVERRIDE PROFILE release) + + # Check if servo-url target was created successfully + if(TARGET servo-url) + message(STATUS "servo-url target was created. Linking benchmarks and servo-url.") + target_link_libraries(bench PRIVATE servo-url) + target_compile_definitions(bench PRIVATE ADA_RUST_VERSION="${Rust_VERSION}") + + target_link_libraries(benchdata PRIVATE servo-url) + target_compile_definitions(benchdata PRIVATE ADA_RUST_VERSION="${Rust_VERSION}") + + target_link_libraries(bbc_bench PRIVATE servo-url) + target_compile_definitions(bbc_bench PRIVATE ADA_RUST_VERSION="${Rust_VERSION}") + + target_link_libraries(percent_encode PRIVATE servo-url) + target_compile_definitions(percent_encode PRIVATE ADA_RUST_VERSION="${Rust_VERSION}") + + target_link_libraries(wpt_bench PRIVATE servo-url) + target_compile_definitions(wpt_bench PRIVATE ADA_RUST_VERSION="${Rust_VERSION}") + endif() +else() + message(STATUS "Rust/Cargo is unavailable." ) + message(STATUS "We will not benchmark servo-url." ) + if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin") + message(STATUS "Under macOS, you may be able to install rust with") + message(STATUS "curl https://sh.rustup.rs -sSf | sh") + elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux") + message(STATUS "Under Linux, you may be able to install rust with a command such as") + message(STATUS "apt-get install cargo" ) + message(STATUS "or" ) + message(STATUS "curl https://sh.rustup.rs -sSf | sh") + endif() +endif() diff --git a/benchmarks/bbc_bench.cpp b/benchmarks/bbc_bench.cpp new file mode 100644 index 000000000..907d49189 --- /dev/null +++ b/benchmarks/bbc_bench.cpp @@ -0,0 +1,36 @@ +#include "benchmark_header.h" + +/** + * Realistic URL examples collected from the BBC homepage. + */ +std::string url_examples[] = { + "https://static.files.bbci.co.uk/orbit/737a4ee2bed596eb65afc4d2ce9af568/js/" + "polyfills.js", + "https://static.files.bbci.co.uk/orbit/737a4ee2bed596eb65afc4d2ce9af568/" + "css/orbit-v5-ltr.min.css", + "https://static.files.bbci.co.uk/orbit/737a4ee2bed596eb65afc4d2ce9af568/js/" + "require.min.js", + "https://static.files.bbci.co.uk/fonts/reith/2.512/BBCReithSans_W_Rg.woff2", + "https://nav.files.bbci.co.uk/searchbox/c8bfe8595e453f2b9483fda4074e9d15/" + "css/box.css", + "https://static.files.bbci.co.uk/cookies/d3bb303e79f041fec95388e04f84e716/" + "cookie-banner/cookie-library.bundle.js", + "https://static.files.bbci.co.uk/account/id-cta/597/style/id-cta.css", + "https://gn-web-assets.api.bbc.com/wwhp/" + "20220908-1153-091014d07889c842a7bdc06e00fa711c9e04f049/responsive/css/" + "old-ie.min.css", + "https://gn-web-assets.api.bbc.com/wwhp/" + "20220908-1153-091014d07889c842a7bdc06e00fa711c9e04f049/modules/vendor/" + "bower/modernizr/modernizr.js"}; + +void init_data(const char* v = nullptr) {} + +double url_examples_bytes = []() -> double { + size_t bytes{0}; + for (std::string& url_string : url_examples) { + bytes += url_string.size(); + } + return double(bytes); +}(); + +#include "benchmark_template.cpp" diff --git a/benchmarks/bench.cpp b/benchmarks/bench.cpp new file mode 100644 index 000000000..8986c936f --- /dev/null +++ b/benchmarks/bench.cpp @@ -0,0 +1,72 @@ +#include "benchmark_header.h" + +/** + * Realistic URL examples collected on the actual web. + */ +std::string url_examples_default[] = { + "https://www.google.com/" + "webhp?hl=en&ictx=2&sa=X&ved=0ahUKEwil_" + "oSxzJj8AhVtEFkFHTHnCGQQPQgI", + "https://support.google.com/websearch/" + "?p=ws_results_help&hl=en-CA&fg=1", + "https://en.wikipedia.org/wiki/Dog#Roles_with_humans", + "https://www.tiktok.com/@aguyandagolden/video/7133277734310038830", + "https://business.twitter.com/en/help/troubleshooting/" + "how-twitter-ads-work.html?ref=web-twc-ao-gbl-adsinfo&utm_source=twc&utm_" + "medium=web&utm_campaign=ao&utm_content=adsinfo", + "https://images-na.ssl-images-amazon.com/images/I/" + "41Gc3C8UysL.css?AUIClients/AmazonGatewayAuiAssets", + "https://www.reddit.com/?after=t3_zvz1ze", + "https://www.reddit.com/login/?dest=https%3A%2F%2Fwww.reddit.com%2F", + "postgresql://other:9818274x1!!@localhost:5432/" + "otherdb?connect_timeout=10&application_name=myapp", + "http://192.168.1.1", // ipv4 + "http://[2606:4700:4700::1111]", // ipv6 +}; + +std::vector url_examples; + +double url_examples_bytes = []() -> double { + size_t bytes{0}; + for (std::string& url_string : url_examples) { + bytes += url_string.size(); + } + return double(bytes); +}(); + +#ifdef ADA_URL_FILE +const char* default_file = ADA_URL_FILE; +#else +const char* default_file = nullptr; +#endif + +size_t init_data(const char* input = default_file) { + // compute the number of bytes. + auto compute = []() -> double { + size_t bytes{0}; + for (std::string& url_string : url_examples) { + bytes += url_string.size(); + } + return double(bytes); + }; + if (input == nullptr) { + for (const std::string& s : url_examples_default) { + url_examples.emplace_back(s); + } + url_examples_bytes = compute(); + return url_examples.size(); + } + + if (!file_exists(input)) { + std::cout << "File not found !" << input << std::endl; + for (const std::string& s : url_examples_default) { + url_examples.emplace_back(s); + } + } else { + std::cout << "Loading " << input << std::endl; + url_examples = split_string(read_file(input)); + } + url_examples_bytes = compute(); + return url_examples.size(); +} +#include "benchmark_template.cpp" diff --git a/benchmarks/bench_search_params.cpp b/benchmarks/bench_search_params.cpp new file mode 100644 index 000000000..26cf53e26 --- /dev/null +++ b/benchmarks/bench_search_params.cpp @@ -0,0 +1,343 @@ +#include "benchmark_header.h" + +/** + * Realistic URL examples collected from Indeed.com, see + * https://github.com/ada-url/ada/pull/459#issuecomment-1624187633 + */ +std::string url_examples_default[] = { + "https://secure.indeed.com/" + "auth?continue=https%3A%2F%2Fm5.apply.indeed.com%2Fbeta%2Findeedapply%" + "2Fresumeapply%3FdraftId%3Dd2f89678-c675-4dd6-8776-c7de2df808cc-Y21o%" + "26draftDc%3Dcmh%26postUrl%3Dhttp%253A%252F%252Fmuffit%252Fprocess-" + "indeedapply%26jk%3D4ce8c8f85737012d%26mob%3D0%26referer%3Dhttps%253A%252F%" + "252Fwww.indeed.com%252F%26formParent%3D%26hl%3Den_US%26jobTitle%" + "3DEmbedded%2BSoftware%2BEngineer%26questions%3Diq%253A%252F%" + "252F5a5f158dfd632ec505eb%253Fv%253D1%26twoPaneVjAllocId%3D%" + "26onappliedstatus%3D_updateIndeedApplyStatus%26preload%3D0%26autoString%" + "3Dnone%26iip%3D1%26recentsearchquery%3D%257B%2522what%2522%253A%" + "2522software%2Bengineer%2522%252C%2522where%2522%253A%2522austin%252C%" + "2Btx%2522%257D%26isCreateIAJobApiSuccess%3Dfalse%26onclose%" + "3DindeedApplyHandleModalClose%26onContinueClick%" + "3DindeedApplyHandleModalClose%26jobUrl%3Dhttps%253A%252F%252Fwww.indeed." + "com%252Fviewjob%253Fjk%253D4ce8c8f85737012d%26onready%3D_onButtonReady%" + "26onapplied%3DindeedApplyHandleApply%26href%3Dhttps%253A%252F%252Fwww." + "indeed.com%252Fviewjob%253Fjk%253D4ce8c8f85737012d%2526from%253Dmobhp_" + "jobfeed_auto%2526tk%253D1h4m9jbiui7lq801%2526viewtype%253Dembedded%" + "2526advn%253D2919294681304046%2526adid%253D409899006%2526xkcb%253DSoCq-_" + "M3NWbCoeUCiZ0LbzkdCdPP%2526topwindowlocation%253D%25252F%26coverletter%" + "3DOPTIONAL%26resume%3Drequired%26twoPaneAllocId%3D%26jobMeta%3D%257B%" + "2526quot%253Bvtk%2526quot%253B%253A%2526quot%253B1h4m9jddo28q3001%" + "2526quot%253B%252C%2526quot%253Btk%2526quot%253B%253A%2526quot%" + "253B1h4m9jbiui7lq801%2526quot%253B%257D%26src%3Didd%26ms%3D1688670424981%" + "26jobCompany%3DSigmaSense%252C%2BLLC%26onclick%" + "3DindeedApplyHandleButtonClick%26pingbackUrl%3Dhttps%253A%252F%252Fgdc." + "indeed.com%252Fconv%252ForgIndApp%253Fco%253DUS%2526vjtk%" + "253D1h4m9jddo28q3001%2526jk%253D4ce8c8f85737012d%2526mvj%253D0%2526tk%" + "253D1h4m9jbiui7lq801%2526trk.origin%253Djobsearch%2526sj%253D1%2526vjfrom%" + "253Dmobhp_jobfeed_auto%2526advn%253D2919294681304046%2526adid%" + "253D409899006%2526ad%253D-6NYlbfkN0BLmp7eN89U-" + "imdIS3k1HPy83nFSQVS0CyWSe3vCO57TwIlXkEWIh-" + "pJhJKr5e0ECbg2AnsbYecK2l6IQRkcmJAo04wMd0HwXw9frAU8JSwJ1mjwcEN4QeCXiILN_" + "wIA4Wr_ywZCGdozVPXXsoaJzqbyZBeGNAHJQuiHvWOxPzh1LKLSr_" + "pFbOxn1NmCOkmvvMW36P569CcM6K7a7vOkj32OJUAg8NT_" + "oipaaUGwXpvKlH6ebfTW6B3WWuJtZ9tsQNwH330zZOVkF1mhjr837W2e-OaEjikG0Nrqh-" + "9DFBdDUmSLosfcp0hGtARFGYWfp7xU-897-fsivVLte1sPZhzSqWn9P_" + "D9hHnfmG2LZnTVBp3Jx6QcGng4-U5K8v9KFx7XN9GjcqQum735VDirUpQ61ZT-" + "WOT5Ilm1xI3nNocOcUQJELhqt6WiAgSIyvTKw7SAfCj2fzp0DshQHzxqVdhe-" + "iJ9apJI0JWZa195l_ZNFYvu8-rusj79RaBev9_" + "LPbejUXOZON2MDA37bFHRZsyWNXOCCKl0tswubGZku70sD7HVHm5aYYINKdL_" + "uKogRuW4r7C99AU69eZMUJF78gl%2526xkcb%253DSoCq-_M3NWbCoeUCiZ0LbzkdCdPP%" + "2526astse%253Dad9474a7b6ec862d%2526assa%253D8360%26co%3DUS%26advNum%" + "3D2919294681304046%26noButtonUI%3Dtrue%26iaUid%3D1h4m9je9qjcbf800%26spn%" + "3D1%26jobId%3D5a5f158dfd632ec505eb%26isITA%3D0%26apiToken%" + "3Daa102235a5ccb18bd3668c0e14aa3ea7e2503cfac2a7a9bf3d6549899e125af4%" + "26jobLocation%3DAustin%252C%2BTX%2B78758%26twoPaneGroup%3D-1%" + "26indeedcsrftoken%3D7bG1QaY6YSlr3rfgMbu9YRVPyk1v2TF0%26phone%3DOPTIONAL%" + "26jobApplies%3D-1%26twoPaneVjGroup%3D-1%26returnToJobSearchUrl%3Dhttp%" + "253A%252F%252Fwww.indeed.com%252F%26indeedApplyableJobApiURI%3D&cfb=2&obo=" + "http%3A%2F%2Fwww.indeed.com%2F&hl=en_US&from=indapply-login-SmartApply&" + "branding=indeed-apply", + // + "https://secure.indeed.com/" + "auth?continue=https%3A%2F%2Fm5.apply.indeed.com%2Fbeta%2Findeedapply%" + "2Fresumeapply%3FdraftId%3Dcd45b794-ede7-48a2-a143-6023319e90a4-Y21o%" + "26draftDc%3Dcmh%26postUrl%3Dhttps%253A%252F%252Fapply.workable.com%" + "252Fapi%252Fv1%252Fjobboards%252Findeed%252Fjobs%252FEC33BF8806%252Fapply%" + "26jk%3D0ffb6f7ed64d3bae%26mob%3D0%26referer%3Dhttps%253A%252F%252Fwww." + "indeed.com%252F%26formParent%3D%26hl%3Den_US%26jobTitle%3DEmbedded%" + "2BSoftware%2BEngineer%26questions%3Dhttps%253A%252F%252Fapply.workable." + "com%252Fapi%252Fv1%252Fjobboards%252Findeed%252Fjobs%252FEC33BF8806%" + "252Fquestions%26twoPaneVjAllocId%3D%26onappliedstatus%3D_" + "updateIndeedApplyStatus%26preload%3D0%26autoString%3Dnone%26iip%3D1%" + "26recentsearchquery%3D%257B%2522what%2522%253A%2522software%2Bengineer%" + "2522%252C%2522where%2522%253A%2522austin%252C%2Btx%2522%257D%" + "26isCreateIAJobApiSuccess%3Dfalse%26onclose%3DindeedApplyHandleModalClose%" + "26onContinueClick%3DindeedApplyHandleModalClose%26jobUrl%3Dhttps%253A%" + "252F%252Fwww.indeed.com%252Fviewjob%253Fjk%253D0ffb6f7ed64d3bae%26onready%" + "3D_onButtonReady%26onapplied%3DindeedApplyHandleApply%26href%3Dhttps%253A%" + "252F%252Fwww.indeed.com%252Fviewjob%253Fjk%253D0ffb6f7ed64d3bae%2526from%" + "253Dhp%2526tk%253D1h4m9jbiui7lq801%2526viewtype%253Dembedded%2526advn%" + "253D2169897021852324%2526adid%253D412530207%2526xkcb%253DSoDv-_" + "M3NWbCoe0CiZ0LbzkdCdPP%2526topwindowlocation%253D%25252F%26coverletter%3D%" + "26twoPaneAllocId%3D%26src%3Didd%26ms%3D1688670502027%26jobCompany%3DShift%" + "2BRobotics%26onclick%3DindeedApplyHandleButtonClick%26pingbackUrl%3Dhttps%" + "253A%252F%252Fgdc.indeed.com%252Fconv%252ForgIndApp%253Fco%253DUS%" + "2526vjtk%253D1h4m9ltcgii2t800%2526jk%253D0ffb6f7ed64d3bae%2526mvj%253D0%" + "2526tk%253D1h4m9jbiui7lq801%2526trk.origin%253Djobsearch%2526sj%253D1%" + "2526vjfrom%253Dhp%2526advn%253D2169897021852324%2526adid%253D412530207%" + "2526ad%253D-6NYlbfkN0ADTLHW1lVcttxG1n9WEfcRI1-" + "ixIWqaQXrnishWQ6BGJjne4HH5OGRzbL9TFjFzxuxk65rhcUupJlJ21QkpPLqd89n0B4cMJw-" + "xmaYdF9-dzypunDDP4jQEuuhT-tpejJCNc8jlBI6FGBAtkAXuipq96Z-" + "vOtd24jCWqboqknQBia2fKh5sYbqLv3E7C6vlBmxO2FH4-qm1_" + "vkeeUq1lsktOtkKCFK2RSR5V5xbkBHcu0hkuZAShjpg2ro3F4e9VbP5_" + "tC3BKSqdL9un4SibeC59V880-mAhOnU_" + "yhuURbniZCCFxjEH66D3euJEOSBZDVnpK0jsbAbxwAnx9dtEdC_" + "HG3BG2PgUf9uwPA8SgdtHuhTAkToYjDBF1l5ENrF3WSXIMTCANToEbE3FpgMwNgOkTDf_" + "4E0Zf-vZ5LjmNY_8q8gL9SwhL6dAsnb-iH5Nm9OGEI32LTlhl9KtszAFZ99UGlzmRjo_" + "iD7ienJa3zd_Ebh_NZWkb_4pEKal6--pSAPlVPbC6azvhPiBzQgMhzpUS9Z-7YYhU%25253D%" + "2526xkcb%253DSoDv-_M3NWbCoe0CiZ0LbzkdCdPP%2526astse%253Dc630be9cfe791df9%" + "2526assa%253D240%26co%3DUS%26advNum%3D2169897021852324%26noButtonUI%" + "3Dtrue%26iaUid%3D1h4m9lujpkblm800%26spn%3D1%26jobId%3D5F6DD26C1B%26isITA%" + "3D0%26apiToken%" + "3D3a51613a4d8b9799d352130065868b0c34bce36cee7f4dffa3ed16b0c7936634%" + "26jobLocation%3DAustin%252C%2BTexas%252C%2BUnited%2BStates%26twoPaneGroup%" + "3D-1%26indeedcsrftoken%3D7bG1QaY6YSlr3rfgMbu9YRVPyk1v2TF0%26phone%" + "3Doptional%26jobApplies%3D-1%26twoPaneVjGroup%3D-1%26returnToJobSearchUrl%" + "3Dhttp%253A%252F%252Fwww.indeed.com%252F%26indeedApplyableJobApiURI%3D&" + "cfb=2&obo=http%3A%2F%2Fwww.indeed.com%2F&hl=en_US&from=indapply-login-" + "SmartApply&branding=indeed-apply", + // + "https://secure.indeed.com/" + "auth?hl=en_US&co=US&continue=https%3A%2F%2Fwww.indeed.com%" + "2Fthirdpartysignin%3Fjk%3D67557c870d9debaf%26from%3Dhp%26from%3Djsfe-" + "3pintercept-viewjob%26tk%3D1h4m9jbiui7lq801%26viewtype%3Dembedded%26advn%" + "3D8187210054516026%26adid%3D378267801%26ad%3D-6NYlbfkN0CfpH2aSe_" + "yWN7pjV6WFrWU4hEZi9Btn9eCdDUBIhjK5M5mY81rEexvugfeSup1QuHOvw9d5hvgsJ79xiL2b" + "Cis9Y8r23bY8qvwxN3cXtMQH5eaPpn4zk1QcFRVOjQFg-" + "0YX6StKUcjnJroSlWw3vVqor9zKJ4mUJ-Ksql7DBTYyyZGXojbnMo-" + "neBlW1zDoHnAAl1ZZZa38U8p1jl35T8o9uwhvY3mVw2XDdmKpKawVuyFfiNGl3_" + "jyLBWarAGLeTBHVsVlBONBK8GK4zH1pVL31V4M43uQUjWUhjRqH4lnq92jt7uCHE97bhKm2hMo" + "6dpJ6I-" + "1REKDf9gE0gloVW3r2lBI2TpIWbePg2zuBg4CnvYaRAm7elrbL8hYuiPYtB3hjTkldS_IYH3-" + "NgunawHQ-" + "LwIxAO35DyDhaY1DrGuFWaTQj6f1JlddpnImKhUaKP3jgV0q9uKoQxvyyFhLOlLGDxfMsVecGZ" + "B4lwuUK0TE74Qix1iR26X1QtEguPk8yp8DQZ-AfOqT_" + "S7A0PtcI2eI0sLM1y3BHB3p0KdpYJUsDv02t7UYO_gNEmMOmcsr5gLsmE-cu52BF_" + "n2lEDE3kKpIKqMu91dFTmI25H393tb-" + "PfCUfVAVaUveXuO2hjWSctjtFCo9RPl6ix3ilDs1QgKt08BtT4IUb5I24JlxIJXNvkHhkH75vw" + "PH9SHKr5XfuN32rOCTUr9JWLmVEcQ4x5A0pHUXQRyz8OxdfsifIibHB8SpDYTtyY50lSL4sAe3" + "M4PDq0d54xfqWuSQqhGqo0lE944k8JjiQue8M1cIcqpssOOqE8SIi-" + "hDdv1KG0G1kQuLBIYMzzrGCJ6WDZm_KbLiyK0wTrPf2cWfHIyU1JI1pdWKbK6fop_" + "kuNd3OBEAl00YETNwOrg4HrZdK8NXEkG_QWXA-A0nYxFWz58uoHND5rkyVDO0o%26xkcb%" + "3DSoBZ-_M3NWbCoZUCiZ0LbzkdCdPP%26topwindowlocation%3D%252F%253Fadvn%" + "253D2169897021852324%2526vjk%253D0ffb6f7ed64d3bae%26vjtk%" + "3D1h4m9npiq21a4002&from=jsfe-3pintercept-viewjob&branding=third-party-" + "applies", + // + "https://secure.indeed.com/" + "auth?continue=https%3A%2F%2Fm5.apply.indeed.com%2Fbeta%2Findeedapply%" + "2Fresumeapply%3FdraftId%3Dde4f06da-7b31-465c-96d2-80f791a85bf7-Y21o%" + "26draftDc%3Dcmh%26postUrl%3Dhttp%253A%252F%252Fmuffit%252Fprocess-" + "indeedapply%26jk%3D7590bdb1fe928d49%26mob%3D0%26referer%3Dhttps%253A%252F%" + "252Fwww.indeed.com%252F%253Fvjk%253D4ce8c8f85737012d%2526advn%" + "253D2919294681304046%26formParent%3D%26hl%3Den_US%26jobTitle%3DSenior%" + "2BSoftware%2BDeveloper%2B%2528onsite%2529%26questions%3Diq%253A%252F%" + "252F0efc2325f6b4a2c5bc27%253Fv%253D1%26twoPaneVjAllocId%3D%" + "26onappliedstatus%3D_updateIndeedApplyStatus%26preload%3D0%26autoString%" + "3Dnone%26iip%3D1%26recentsearchquery%3D%257B%2522what%2522%253A%" + "2522software%2Bengineer%2522%252C%2522where%2522%253A%2522austin%252C%" + "2Btx%2522%257D%26isCreateIAJobApiSuccess%3Dfalse%26onclose%" + "3DindeedApplyHandleModalClose%26onContinueClick%" + "3DindeedApplyHandleModalClose%26jobUrl%3Dhttps%253A%252F%252Fwww.indeed." + "com%252Fviewjob%253Fjk%253D7590bdb1fe928d49%26onready%3D_onButtonReady%" + "26onapplied%3DindeedApplyHandleApply%26href%3Dhttps%253A%252F%252Fwww." + "indeed.com%252Fviewjob%253Fjk%253D7590bdb1fe928d49%2526from%253Dhp%2526tk%" + "253D1h4m9jbiui7lq801%2526viewtype%253Dembedded%2526advn%" + "253D5522285726153717%2526adid%253D414206073%2526xkcb%253DSoDt-_" + "M3NWbCoZUCiZ0KbzkdCdPP%2526topwindowlocation%253D%25252F%25253Fvjk%" + "25253D4ce8c8f85737012d%252526advn%25253D2919294681304046%26coverletter%" + "3DOPTIONAL%26resume%3Drequired%26twoPaneAllocId%3D%26jobMeta%3D%257B%" + "2526quot%253Bvtk%2526quot%253B%253A%2526quot%253B1h4m9oh7mirks800%" + "2526quot%253B%252C%2526quot%253Btk%2526quot%253B%253A%2526quot%" + "253B1h4m9jbiui7lq801%2526quot%253B%257D%26src%3Didd%26ms%3D1688670587917%" + "26jobCompany%3DCitizens%2BInc%26onclick%3DindeedApplyHandleButtonClick%" + "26pingbackUrl%3Dhttps%253A%252F%252Fgdc.indeed.com%252Fconv%252ForgIndApp%" + "253Fco%253DUS%2526vjtk%253D1h4m9oh7mirks800%2526jk%253D7590bdb1fe928d49%" + "2526mvj%253D0%2526tk%253D1h4m9jbiui7lq801%2526trk.origin%253Djobsearch%" + "2526sj%253D1%2526vjfrom%253Dhp%2526advn%253D5522285726153717%2526adid%" + "253D414206073%2526ad%253D-" + "6NYlbfkN0CHSAkotDdvvZVbhOqFdbxXOHJMhXe1DXuaBPnaU5fYte-" + "aud5Z0lqoqFyp33jrJfy1DYFhCWCqBjAqfX3PBXom-d5E4gy3cqbwZuMtWn4flXO-" + "Fd9DkMZrQjqK002kTnGqvqfkH0ftIspK3hwJPRmAEy7EY87A9OOFRyFmxA9AdiimsdRWyksA-" + "nCQ0w1VI28XDuVMu7qO_D46dH-" + "dtW5jWIG4jTe8HCv21447lFobYgFb9oJdF8NrjyCNP4fdGeojlELmcjS5cvC5dKfXi8IZm4sWW" + "-7b5SBQKvBMmSVDjiTsgYZS6lb8B-" + "a3YF1Lny7hpNfClmOcLe49wiZAG9LWJ7uRUEfzOPrUCwxdHNQK-vEo3ZhDK4AeER-" + "LfOUabNSjrKz7_91l8sQjBNOR-FJ25ioX0sqoNByLfJC7cWzjDxqvW-l82GsWQR2O_" + "6Khe2oq91fjVXMAFQdSQWdr_DWCf_" + "e2FYtN69Qql9maXH550XNcfynxCicTL71xLstYfWqbSMpADJhrW_" + "0pf4x58zLVfYLBJ7MPQaW15uKzbFn68lAlyF5GXDqWxowOm58EyeS7OmQkBdGyxYanZ6452m6O" + "%2526xkcb%253DSoDt-_M3NWbCoZUCiZ0KbzkdCdPP%2526astse%253Db4f6f6ed591bacca%" + "2526assa%253D6102%26co%3DUS%26advNum%3D5522285726153717%26noButtonUI%" + "3Dtrue%26iaUid%3D1h4m9oi2qj4h4800%26spn%3D1%26jobId%" + "3D0efc2325f6b4a2c5bc27%26isITA%3D0%26apiToken%" + "3Daa102235a5ccb18bd3668c0e14aa3ea7e2503cfac2a7a9bf3d6549899e125af4%" + "26jobLocation%3DAustin%252C%2BTX%2B78758%26twoPaneGroup%3D-1%" + "26indeedcsrftoken%3D7bG1QaY6YSlr3rfgMbu9YRVPyk1v2TF0%26phone%3DOPTIONAL%" + "26jobApplies%3D-1%26twoPaneVjGroup%3D-1%26returnToJobSearchUrl%3Dhttp%" + "253A%252F%252Fwww.indeed.com%252F%253Fvjk%253D4ce8c8f85737012d%2526advn%" + "253D2919294681304046%26indeedApplyableJobApiURI%3D&cfb=2&obo=http%3A%2F%" + "2Fwww.indeed.com%2F&hl=en_US&from=indapply-login-SmartApply&branding=" + "indeed-apply"}; + +std::vector url_examples; + +double url_examples_bytes = []() -> double { + size_t bytes{0}; + for (std::string& url_string : url_examples) { + bytes += url_string.size(); + } + return double(bytes); +}(); + +#ifdef ADA_URL_FILE +const char* default_file = ADA_URL_FILE; +#else +const char* default_file = nullptr; +#endif + +size_t init_data(const char* input = default_file) { + // compute the number of bytes. + auto compute = []() -> double { + size_t bytes{0}; + for (std::string& url_string : url_examples) { + bytes += url_string.size(); + } + return double(bytes); + }; + if (input == nullptr) { + for (const std::string& s : url_examples_default) { + url_examples.emplace_back(s); + } + url_examples_bytes = compute(); + return url_examples.size(); + } + + if (!file_exists(input)) { + std::cout << "File not found !" << input << std::endl; + for (const std::string& s : url_examples_default) { + url_examples.emplace_back(s); + } + } else { + std::cout << "Loading " << input << std::endl; + url_examples = split_string(read_file(input)); + } + url_examples_bytes = compute(); + return url_examples.size(); +} + +size_t count_ada_invalid() { + size_t how_many = 0; + for (std::string& url_string : url_examples) { + auto url = ada::parse(url_string); + if (!url) { + how_many++; + } + } + return how_many; +} + +template +static void BasicBench_AdaURL(benchmark::State& state) { + // volatile to prevent optimizations. + volatile size_t param_count = 0; + + for (auto _ : state) { + for (std::string& url_string : url_examples) { + ada::result url = ada::parse(url_string); + if (url) { + auto params = ada::url_search_params{url->get_search()}; + param_count += params.size(); + } + } + } + if (collector.has_events()) { + event_aggregate aggregate{}; + for (size_t i = 0; i < N; i++) { + std::atomic_thread_fence(std::memory_order_acquire); + collector.start(); + for (std::string& url_string : url_examples) { + ada::result url = ada::parse(url_string); + if (url) { + auto params = ada::url_search_params{url->get_search()}; + param_count += params.size(); + } + } + std::atomic_thread_fence(std::memory_order_release); + event_count allocate_count = collector.end(); + aggregate << allocate_count; + } + state.counters["cycles/url"] = + aggregate.best.cycles() / std::size(url_examples); + state.counters["instructions/url"] = + aggregate.best.instructions() / std::size(url_examples); + state.counters["instructions/cycle"] = + aggregate.best.instructions() / aggregate.best.cycles(); + state.counters["instructions/byte"] = + aggregate.best.instructions() / url_examples_bytes; + state.counters["instructions/ns"] = + aggregate.best.instructions() / aggregate.best.elapsed_ns(); + state.counters["GHz"] = + aggregate.best.cycles() / aggregate.best.elapsed_ns(); + state.counters["ns/url"] = + aggregate.best.elapsed_ns() / std::size(url_examples); + state.counters["cycle/byte"] = aggregate.best.cycles() / url_examples_bytes; + } + (void)param_count; + state.counters["time/byte"] = benchmark::Counter( + url_examples_bytes, benchmark::Counter::kIsIterationInvariantRate | + benchmark::Counter::kInvert); + state.counters["time/url"] = + benchmark::Counter(double(std::size(url_examples)), + benchmark::Counter::kIsIterationInvariantRate | + benchmark::Counter::kInvert); + state.counters["speed"] = benchmark::Counter( + url_examples_bytes, benchmark::Counter::kIsIterationInvariantRate); + state.counters["url/s"] = + benchmark::Counter(double(std::size(url_examples)), + benchmark::Counter::kIsIterationInvariantRate); +} + +auto url_search_params_AdaURL = BasicBench_AdaURL; +BENCHMARK(url_search_params_AdaURL); + +int main(int argc, char** argv) { + if (argc > 1 && file_exists(argv[1])) { + init_data(argv[1]); + } else { + init_data(); + } +#if (__APPLE__ && __aarch64__) || defined(__linux__) + if (!collector.has_events()) { + benchmark::AddCustomContext("performance counters", + "No privileged access (sudo may help)."); + } +#else + if (!collector.has_events()) { + benchmark::AddCustomContext("performance counters", "Unsupported system."); + } +#endif + benchmark::AddCustomContext("input bytes", + std::to_string(size_t(url_examples_bytes))); + benchmark::AddCustomContext("number of URLs", + std::to_string(std::size(url_examples))); + benchmark::AddCustomContext( + "bytes/URL", + std::to_string(url_examples_bytes / std::size(url_examples))); + if (collector.has_events()) { + benchmark::AddCustomContext("performance counters", "Enabled"); + } + benchmark::Initialize(&argc, argv); + benchmark::RunSpecifiedBenchmarks(); + benchmark::Shutdown(); +} diff --git a/benchmarks/benchmark_header.h b/benchmarks/benchmark_header.h new file mode 100644 index 000000000..3351e2725 --- /dev/null +++ b/benchmarks/benchmark_header.h @@ -0,0 +1,65 @@ +#include +#include +#include +#include +#include +#include + +#if ADA_VARIOUS_COMPETITION_ENABLED +#include +#include +#include +#endif +#if ADA_url_whatwg_ENABLED +#include +#endif + +#include "ada.h" +#include "performancecounters/event_counter.h" +event_collector collector; +size_t N = 1000; + +#include + +bool file_exists(const char* filename) { + namespace fs = std::filesystem; + std::filesystem::path f{filename}; + if (std::filesystem::exists(filename)) { + return true; + } else { + return false; + } +} + +std::string read_file(std::string filename) { + constexpr size_t read_size = 4096; + auto stream = std::ifstream(filename.c_str()); + stream.exceptions(std::ios_base::badbit); + std::string out; + std::string buf(read_size, '\0'); + while (stream.read(&buf[0], read_size)) { + out.append(buf, 0, size_t(stream.gcount())); + } + out.append(buf, 0, size_t(stream.gcount())); + return out; +} + +std::vector split_string(const std::string& str) { + std::vector result; + std::stringstream ss{str}; + for (std::string line; std::getline(ss, line, '\n');) { + std::string_view view = line; + // Some parsers like boost/url will refuse to parse a URL with trailing + // whitespace. + while (!view.empty() && std::isspace(view.back())) { + view.remove_suffix(1); + } + while (!view.empty() && std::isspace(view.front())) { + view.remove_prefix(1); + } + if (!view.empty()) { + result.emplace_back(view); + } + } + return result; +} diff --git a/benchmarks/benchmark_template.cpp b/benchmarks/benchmark_template.cpp new file mode 100644 index 000000000..18205b7d6 --- /dev/null +++ b/benchmarks/benchmark_template.cpp @@ -0,0 +1,878 @@ +/** + * The main benchmark is to take an input string, and convert it into a + * normalized URL (or 'href'). + */ + +size_t count_ada_invalid() { + size_t how_many = 0; + for (std::string& url_string : url_examples) { + auto url = ada::parse(url_string); + if (!url) { + how_many++; + } + } + return how_many; +} + +enum { JUST_PARSE = 1, PARSE_AND_HREF = 0 }; + +template +static void BasicBench_AdaURL(benchmark::State& state) { + // volatile to prevent optimizations. + volatile size_t success = 0; + volatile size_t href_size = 0; + + for (auto _ : state) { + for (std::string& url_string : url_examples) { + ada::result url = ada::parse(url_string); + if (url) { + success++; + if constexpr (!just_parse) { + href_size += url->get_href().size(); + } + } + } + } + if (collector.has_events()) { + event_aggregate aggregate{}; + for (size_t i = 0; i < N; i++) { + std::atomic_thread_fence(std::memory_order_acquire); + collector.start(); + for (std::string& url_string : url_examples) { + ada::result url = ada::parse(url_string); + if (url) { + success++; + if constexpr (!just_parse) { + href_size += url->get_href().size(); + } + } + } + std::atomic_thread_fence(std::memory_order_release); + event_count allocate_count = collector.end(); + aggregate << allocate_count; + } + state.counters["cycles/url"] = + aggregate.best.cycles() / std::size(url_examples); + state.counters["instructions/url"] = + aggregate.best.instructions() / std::size(url_examples); + state.counters["instructions/cycle"] = + aggregate.best.instructions() / aggregate.best.cycles(); + state.counters["instructions/byte"] = + aggregate.best.instructions() / url_examples_bytes; + state.counters["instructions/ns"] = + aggregate.best.instructions() / aggregate.best.elapsed_ns(); + state.counters["GHz"] = + aggregate.best.cycles() / aggregate.best.elapsed_ns(); + state.counters["ns/url"] = + aggregate.best.elapsed_ns() / std::size(url_examples); + state.counters["cycle/byte"] = aggregate.best.cycles() / url_examples_bytes; + } + (void)success; + state.counters["time/byte"] = benchmark::Counter( + url_examples_bytes, benchmark::Counter::kIsIterationInvariantRate | + benchmark::Counter::kInvert); + state.counters["time/url"] = + benchmark::Counter(double(std::size(url_examples)), + benchmark::Counter::kIsIterationInvariantRate | + benchmark::Counter::kInvert); + state.counters["speed"] = benchmark::Counter( + url_examples_bytes, benchmark::Counter::kIsIterationInvariantRate); + state.counters["url/s"] = + benchmark::Counter(double(std::size(url_examples)), + benchmark::Counter::kIsIterationInvariantRate); +} + +auto BasicBench_AdaURL_href = BasicBench_AdaURL; +BENCHMARK(BasicBench_AdaURL_href); +auto BasicBench_AdaURL_aggregator_href = + BasicBench_AdaURL; +BENCHMARK(BasicBench_AdaURL_aggregator_href); + +static void BasicBench_AdaURL_CanParse(benchmark::State& state) { + // volatile to prevent optimizations. + volatile size_t success = 0; + + for (auto _ : state) { + for (std::string& url_string : url_examples) { + bool can_parse = ada::can_parse(url_string); + if (can_parse) { + success++; + } + } + } + if (collector.has_events()) { + event_aggregate aggregate{}; + for (size_t i = 0; i < N; i++) { + std::atomic_thread_fence(std::memory_order_acquire); + collector.start(); + for (std::string& url_string : url_examples) { + bool can_parse = ada::can_parse(url_string); + if (can_parse) { + success++; + } + } + std::atomic_thread_fence(std::memory_order_release); + event_count allocate_count = collector.end(); + aggregate << allocate_count; + } + state.counters["cycles/url"] = + aggregate.best.cycles() / std::size(url_examples); + state.counters["instructions/url"] = + aggregate.best.instructions() / std::size(url_examples); + state.counters["instructions/cycle"] = + aggregate.best.instructions() / aggregate.best.cycles(); + state.counters["instructions/byte"] = + aggregate.best.instructions() / url_examples_bytes; + state.counters["instructions/ns"] = + aggregate.best.instructions() / aggregate.best.elapsed_ns(); + state.counters["GHz"] = + aggregate.best.cycles() / aggregate.best.elapsed_ns(); + state.counters["ns/url"] = + aggregate.best.elapsed_ns() / std::size(url_examples); + state.counters["cycle/byte"] = aggregate.best.cycles() / url_examples_bytes; + } + (void)success; + state.counters["time/byte"] = benchmark::Counter( + url_examples_bytes, benchmark::Counter::kIsIterationInvariantRate | + benchmark::Counter::kInvert); + state.counters["time/url"] = + benchmark::Counter(double(std::size(url_examples)), + benchmark::Counter::kIsIterationInvariantRate | + benchmark::Counter::kInvert); + state.counters["speed"] = benchmark::Counter( + url_examples_bytes, benchmark::Counter::kIsIterationInvariantRate); + state.counters["url/s"] = + benchmark::Counter(double(std::size(url_examples)), + benchmark::Counter::kIsIterationInvariantRate); +} + +BENCHMARK(BasicBench_AdaURL_CanParse); + +#if ADA_url_whatwg_ENABLED +size_t count_whatwg_invalid() { + size_t how_many = 0; + for (std::string& url_string : url_examples) { + upa::url url; + if (!upa::success(url.parse(url_string, nullptr))) { + how_many++; + } + } + return how_many; +} + +template +static void BasicBench_whatwg(benchmark::State& state) { + // volatile to prevent optimizations. + volatile size_t success = 0; + volatile size_t href_size = 0; + for (auto _ : state) { + for (std::string& url_string : url_examples) { + upa::url url; + if (upa::success(url.parse(url_string, nullptr))) { + success++; + if (!just_parse) { + href_size += url.href().size(); + } + } + } + } + if (collector.has_events()) { + event_aggregate aggregate{}; + for (size_t i = 0; i < N; i++) { + std::atomic_thread_fence(std::memory_order_acquire); + collector.start(); + for (std::string& url_string : url_examples) { + upa::url url; + if (upa::success(url.parse(url_string, nullptr))) { + success++; + if (!just_parse) { + href_size += url.href().size(); + } + } + } + std::atomic_thread_fence(std::memory_order_release); + event_count allocate_count = collector.end(); + aggregate << allocate_count; + } + state.counters["cycles/url"] = + aggregate.best.cycles() / std::size(url_examples); + state.counters["instructions/url"] = + aggregate.best.instructions() / std::size(url_examples); + state.counters["instructions/cycle"] = + aggregate.best.instructions() / aggregate.best.cycles(); + state.counters["instructions/byte"] = + aggregate.best.instructions() / url_examples_bytes; + state.counters["instructions/ns"] = + aggregate.best.instructions() / aggregate.best.elapsed_ns(); + state.counters["GHz"] = + aggregate.best.cycles() / aggregate.best.elapsed_ns(); + state.counters["ns/url"] = + aggregate.best.elapsed_ns() / std::size(url_examples); + state.counters["cycle/byte"] = aggregate.best.cycles() / url_examples_bytes; + } + (void)success; + (void)href_size; + state.counters["time/byte"] = benchmark::Counter( + url_examples_bytes, benchmark::Counter::kIsIterationInvariantRate | + benchmark::Counter::kInvert); + state.counters["time/url"] = benchmark::Counter( + std::size(url_examples), benchmark::Counter::kIsIterationInvariantRate | + benchmark::Counter::kInvert); + state.counters["speed"] = benchmark::Counter( + url_examples_bytes, benchmark::Counter::kIsIterationInvariantRate); + state.counters["url/s"] = benchmark::Counter( + std::size(url_examples), benchmark::Counter::kIsIterationInvariantRate); +} +BENCHMARK(BasicBench_whatwg); +// There is no need for BasicBench_whatwg_just_parse because whatwg appears to +// provide the href at a minimal cost, probably because it is already +// materialized. auto BasicBench_whatwg_just_parse = +// BasicBench_whatwg; BENCHMARK(BasicBench_whatwg_just_parse); + +#endif // ADA_url_whatwg_ENABLED + +#if ADA_CURL_ENABLED +#include + +size_t count_curl_invalid() { + size_t how_many = 0; + CURLU* url = curl_url(); + for (std::string& url_string : url_examples) { + CURLUcode rc = curl_url_set(url, CURLUPART_URL, url_string.c_str(), 0); + // Returns a CURLUcode error value, which is (0) if everything went fine. + if (rc != 0) { + how_many++; + } + } + curl_url_cleanup(url); + return how_many; +} + +// curl follows RFC3986+ +template +static void BasicBench_CURL(benchmark::State& state) { + // volatile to prevent optimizations. + volatile size_t success = 0; + volatile size_t href_size = 0; + + CURLU* url = curl_url(); + for (auto _ : state) { + for (std::string& url_string : url_examples) { + CURLUcode rc = curl_url_set(url, CURLUPART_URL, url_string.c_str(), 0); + // Returns a CURLUcode error value, which is (0) if everything went fine. + if (rc == 0) { + success++; + if (!just_parse) { + char* buffer; + // When asked to return the full URL, curl_url_get will return a + // normalized and possibly cleaned up version of what was previously + // parsed. + rc = curl_url_get(url, CURLUPART_URL, &buffer, 0); + if (rc == 0) { + href_size += strlen(buffer); + curl_free(buffer); + } + } + } + } + } + if (collector.has_events()) { + event_aggregate aggregate{}; + for (size_t i = 0; i < N; i++) { + std::atomic_thread_fence(std::memory_order_acquire); + collector.start(); + for (std::string& url_string : url_examples) { + CURLUcode rc = curl_url_set(url, CURLUPART_URL, url_string.c_str(), 0); + // Returns a CURLUcode error value, which is (0) if everything went + // fine. + if (!just_parse) { + char* buffer; + rc = curl_url_get(url, CURLUPART_URL, &buffer, 0); + if (rc == 0) { + href_size += strlen(buffer); + curl_free(buffer); + } + } + } + std::atomic_thread_fence(std::memory_order_release); + event_count allocate_count = collector.end(); + aggregate << allocate_count; + } + state.counters["cycles/url"] = + aggregate.best.cycles() / std::size(url_examples); + state.counters["instructions/url"] = + aggregate.best.instructions() / std::size(url_examples); + state.counters["instructions/cycle"] = + aggregate.best.instructions() / aggregate.best.cycles(); + state.counters["instructions/byte"] = + aggregate.best.instructions() / url_examples_bytes; + state.counters["instructions/ns"] = + aggregate.best.instructions() / aggregate.best.elapsed_ns(); + state.counters["GHz"] = + aggregate.best.cycles() / aggregate.best.elapsed_ns(); + state.counters["ns/url"] = + aggregate.best.elapsed_ns() / std::size(url_examples); + state.counters["cycle/byte"] = aggregate.best.cycles() / url_examples_bytes; + } + (void)success; + curl_url_cleanup(url); + state.counters["time/byte"] = benchmark::Counter( + url_examples_bytes, benchmark::Counter::kIsIterationInvariantRate | + benchmark::Counter::kInvert); + state.counters["time/url"] = benchmark::Counter( + std::size(url_examples), benchmark::Counter::kIsIterationInvariantRate | + benchmark::Counter::kInvert); + state.counters["speed"] = benchmark::Counter( + url_examples_bytes, benchmark::Counter::kIsIterationInvariantRate); + state.counters["url/s"] = benchmark::Counter( + std::size(url_examples), benchmark::Counter::kIsIterationInvariantRate); +} +BENCHMARK(BasicBench_CURL); +// 'just parsing' is faster with curl, but maybe not so important for us. +// auto BasicBench_CURL_just_parse = BasicBench_CURL; +// BENCHMARK(BasicBench_CURL_just_parse); +#endif + +#if ADA_BOOST_ENABLED +#include +using namespace boost::urls; + +size_t count_boosturl_invalid() { + size_t how_many = 0; + for (std::string& url_string : url_examples) { + try { + url u(url_string); + u.normalize(); + } catch (...) { + how_many++; + } + } + return how_many; +} + +// Boost URL follows RFC3986 +template +static void BasicBench_BoostURL(benchmark::State& state) { + // volatile to prevent optimizations. + volatile size_t success = 0; + volatile size_t href_size = 0; + + for (auto _ : state) { + for (std::string& url_string : url_examples) { + try { + url u(url_string); + u.normalize(); + success++; + if (!just_parse) { + href_size += u.buffer().size(); + } + } catch (...) { + } + } + } + if (collector.has_events()) { + event_aggregate aggregate{}; + for (size_t i = 0; i < N; i++) { + std::atomic_thread_fence(std::memory_order_acquire); + collector.start(); + for (std::string& url_string : url_examples) { + try { + url u(url_string); + u.normalize(); + success++; + if (!just_parse) { + href_size += u.buffer().size(); + } + } catch (...) { + } + } + std::atomic_thread_fence(std::memory_order_release); + event_count allocate_count = collector.end(); + aggregate << allocate_count; + } + state.counters["cycles/url"] = + aggregate.best.cycles() / std::size(url_examples); + state.counters["instructions/url"] = + aggregate.best.instructions() / std::size(url_examples); + state.counters["instructions/cycle"] = + aggregate.best.instructions() / aggregate.best.cycles(); + state.counters["instructions/byte"] = + aggregate.best.instructions() / url_examples_bytes; + state.counters["instructions/ns"] = + aggregate.best.instructions() / aggregate.best.elapsed_ns(); + state.counters["GHz"] = + aggregate.best.cycles() / aggregate.best.elapsed_ns(); + state.counters["ns/url"] = + aggregate.best.elapsed_ns() / std::size(url_examples); + state.counters["cycle/byte"] = aggregate.best.cycles() / url_examples_bytes; + } + (void)success; + (void)href_size; + + state.counters["time/byte"] = benchmark::Counter( + url_examples_bytes, benchmark::Counter::kIsIterationInvariantRate | + benchmark::Counter::kInvert); + state.counters["time/url"] = benchmark::Counter( + std::size(url_examples), benchmark::Counter::kIsIterationInvariantRate | + benchmark::Counter::kInvert); + state.counters["speed"] = benchmark::Counter( + url_examples_bytes, benchmark::Counter::kIsIterationInvariantRate); + state.counters["url/s"] = benchmark::Counter( + std::size(url_examples), benchmark::Counter::kIsIterationInvariantRate); +} +BENCHMARK(BasicBench_BoostURL); +// There is no need for 'just_parse' because BoostURL materializes the href. +// auto BasicBench_BoostURL_just_parse = BasicBench_BoostURL; +// BENCHMARK(BasicBench_BoostURL_just_parse); +#endif // ADA_BOOST_ENABLED + +#if ADA_ZURI_ENABLED +#include + +size_t count_zuri_invalid() { + size_t how_many = 0; + for (std::string& url_string : url_examples) { + struct zuri2k uri; + zuri_error err = zuri_parse2k(&uri, url_string.c_str()); + if (err) how_many++; + } + return how_many; +} + +// ZURI follows RFC3986 +template +static void BasicBench_ZURI(benchmark::State& state) { + // volatile to prevent optimizations. + volatile size_t success = 0; + volatile size_t href_size = 0; + + for (auto _ : state) { + for (std::string& url_string : url_examples) { + struct zuri2k uri; + benchmark::DoNotOptimize(uri); + zuri_error err = zuri_parse2k(&uri, url_string.c_str()); + if (!err) { + success++; + if constexpr (!just_parse) { + char buf[2048]; + benchmark::DoNotOptimize(href_size += + zuri_read2k(&uri, &buf[0], sizeof(buf))); + } + } + } + } + if (collector.has_events()) { + event_aggregate aggregate{}; + for (size_t i = 0; i < N; i++) { + std::atomic_thread_fence(std::memory_order_acquire); + collector.start(); + for (std::string& url_string : url_examples) { + struct zuri2k uri; + benchmark::DoNotOptimize(uri); + zuri_error err = zuri_parse2k(&uri, url_string.c_str()); + if (!err) { + success++; + if constexpr (!just_parse) { + char buf[2048]; + benchmark::DoNotOptimize(href_size += + zuri_read2k(&uri, &buf[0], sizeof(buf))); + } + } + } + std::atomic_thread_fence(std::memory_order_release); + event_count allocate_count = collector.end(); + aggregate << allocate_count; + } + state.counters["cycles/url"] = + aggregate.best.cycles() / std::size(url_examples); + state.counters["instructions/url"] = + aggregate.best.instructions() / std::size(url_examples); + state.counters["instructions/cycle"] = + aggregate.best.instructions() / aggregate.best.cycles(); + state.counters["instructions/byte"] = + aggregate.best.instructions() / url_examples_bytes; + state.counters["instructions/ns"] = + aggregate.best.instructions() / aggregate.best.elapsed_ns(); + state.counters["GHz"] = + aggregate.best.cycles() / aggregate.best.elapsed_ns(); + state.counters["ns/url"] = + aggregate.best.elapsed_ns() / std::size(url_examples); + state.counters["cycle/byte"] = aggregate.best.cycles() / url_examples_bytes; + } + (void)success; + (void)href_size; + + state.counters["time/byte"] = benchmark::Counter( + url_examples_bytes, benchmark::Counter::kIsIterationInvariantRate | + benchmark::Counter::kInvert); + state.counters["time/url"] = benchmark::Counter( + std::size(url_examples), benchmark::Counter::kIsIterationInvariantRate | + benchmark::Counter::kInvert); + state.counters["speed"] = benchmark::Counter( + url_examples_bytes, benchmark::Counter::kIsIterationInvariantRate); + state.counters["url/s"] = benchmark::Counter( + std::size(url_examples), benchmark::Counter::kIsIterationInvariantRate); +} + +BENCHMARK(BasicBench_ZURI); +#endif // ADA_ZURI_ENABLED + +#if ADA_VARIOUS_COMPETITION_ENABLED +static void BasicBench_uriparser_just_parse(benchmark::State& state) { + // volatile to prevent optimizations. + volatile bool is_valid = true; + const char* errorPos; + UriUriA uri; + for (auto _ : state) { + for (std::string& url_string : url_examples) { + is_valid &= (uriParseSingleUriA(&uri, url_string.c_str(), &errorPos) == + URI_SUCCESS); + } + } + if (!is_valid) { + std::cout << "uri-parser: invalid? " << std::endl; + } + if (collector.has_events()) { + event_aggregate aggregate{}; + for (size_t i = 0; i < N; i++) { + std::atomic_thread_fence(std::memory_order_acquire); + collector.start(); + for (std::string& url_string : url_examples) { + is_valid &= (uriParseSingleUriA(&uri, url_string.c_str(), &errorPos) == + URI_SUCCESS); + } + std::atomic_thread_fence(std::memory_order_release); + event_count allocate_count = collector.end(); + aggregate << allocate_count; + } + state.counters["cycles/url"] = + aggregate.best.cycles() / std::size(url_examples); + state.counters["instructions/url"] = + aggregate.best.instructions() / std::size(url_examples); + state.counters["instructions/cycle"] = + aggregate.best.instructions() / aggregate.best.cycles(); + state.counters["instructions/byte"] = + aggregate.best.instructions() / url_examples_bytes; + state.counters["instructions/ns"] = + aggregate.best.instructions() / aggregate.best.elapsed_ns(); + state.counters["GHz"] = + aggregate.best.cycles() / aggregate.best.elapsed_ns(); + state.counters["ns/url"] = + aggregate.best.elapsed_ns() / std::size(url_examples); + state.counters["cycle/byte"] = aggregate.best.cycles() / url_examples_bytes; + } + uriFreeUriMembersA(&uri); + + state.counters["time/byte"] = benchmark::Counter( + url_examples_bytes, benchmark::Counter::kIsIterationInvariantRate | + benchmark::Counter::kInvert); + state.counters["time/url"] = + benchmark::Counter(double(std::size(url_examples)), + benchmark::Counter::kIsIterationInvariantRate | + benchmark::Counter::kInvert); + state.counters["speed"] = benchmark::Counter( + url_examples_bytes, benchmark::Counter::kIsIterationInvariantRate); + state.counters["url/s"] = + benchmark::Counter(double(std::size(url_examples)), + benchmark::Counter::kIsIterationInvariantRate); +} +BENCHMARK(BasicBench_uriparser_just_parse); +#endif // ADA_VARIOUS_COMPETITION_ENABLED + +#if ADA_VARIOUS_COMPETITION_ENABLED +static void BasicBench_urlparser_just_parse(benchmark::State& state) { + // volatile to prevent optimizations. + for (auto _ : state) { + for (std::string& url_string : url_examples) { + std::unique_ptr url(EdUrlParser::parseUrl(url_string)); + } + } + if (collector.has_events()) { + event_aggregate aggregate{}; + for (size_t i = 0; i < N; i++) { + std::atomic_thread_fence(std::memory_order_acquire); + collector.start(); + for (std::string& url_string : url_examples) { + std::unique_ptr url(EdUrlParser::parseUrl(url_string)); + } + std::atomic_thread_fence(std::memory_order_release); + event_count allocate_count = collector.end(); + aggregate << allocate_count; + } + state.counters["cycles/url"] = + aggregate.best.cycles() / std::size(url_examples); + state.counters["instructions/url"] = + aggregate.best.instructions() / std::size(url_examples); + state.counters["instructions/cycle"] = + aggregate.best.instructions() / aggregate.best.cycles(); + state.counters["instructions/byte"] = + aggregate.best.instructions() / url_examples_bytes; + state.counters["instructions/ns"] = + aggregate.best.instructions() / aggregate.best.elapsed_ns(); + state.counters["GHz"] = + aggregate.best.cycles() / aggregate.best.elapsed_ns(); + state.counters["ns/url"] = + aggregate.best.elapsed_ns() / std::size(url_examples); + state.counters["cycle/byte"] = aggregate.best.cycles() / url_examples_bytes; + } + + state.counters["time/byte"] = benchmark::Counter( + url_examples_bytes, benchmark::Counter::kIsIterationInvariantRate | + benchmark::Counter::kInvert); + state.counters["time/url"] = + benchmark::Counter(double(std::size(url_examples)), + benchmark::Counter::kIsIterationInvariantRate | + benchmark::Counter::kInvert); + state.counters["speed"] = benchmark::Counter( + url_examples_bytes, benchmark::Counter::kIsIterationInvariantRate); + state.counters["url/s"] = + benchmark::Counter(double(std::size(url_examples)), + benchmark::Counter::kIsIterationInvariantRate); +} +BENCHMARK(BasicBench_urlparser_just_parse); +#endif // ADA_VARIOUS_COMPETITION_ENABLED + +#if ADA_VARIOUS_COMPETITION_ENABLED +static void BasicBench_http_parser_just_parse(benchmark::State& state) { + volatile bool is_valid{true}; + struct http_parser_url u; + http_parser_url_init(&u); + for (auto _ : state) { + for (std::string& url_string : url_examples) { + is_valid &= + !http_parser_parse_url(url_string.data(), url_string.size(), 0, &u); + } + } + if (collector.has_events()) { + event_aggregate aggregate{}; + for (size_t i = 0; i < N; i++) { + std::atomic_thread_fence(std::memory_order_acquire); + collector.start(); + for (std::string& url_string : url_examples) { + is_valid &= + !http_parser_parse_url(url_string.data(), url_string.size(), 0, &u); + } + std::atomic_thread_fence(std::memory_order_release); + event_count allocate_count = collector.end(); + aggregate << allocate_count; + } + state.counters["cycles/url"] = + aggregate.best.cycles() / std::size(url_examples); + state.counters["instructions/url"] = + aggregate.best.instructions() / std::size(url_examples); + state.counters["instructions/cycle"] = + aggregate.best.instructions() / aggregate.best.cycles(); + state.counters["instructions/byte"] = + aggregate.best.instructions() / url_examples_bytes; + state.counters["instructions/ns"] = + aggregate.best.instructions() / aggregate.best.elapsed_ns(); + state.counters["GHz"] = + aggregate.best.cycles() / aggregate.best.elapsed_ns(); + state.counters["ns/url"] = + aggregate.best.elapsed_ns() / std::size(url_examples); + state.counters["cycle/byte"] = aggregate.best.cycles() / url_examples_bytes; + } + + if (!is_valid) { + std::cout << "http_parser: invalid? " << std::endl; + } + state.counters["time/byte"] = benchmark::Counter( + url_examples_bytes, benchmark::Counter::kIsIterationInvariantRate | + benchmark::Counter::kInvert); + state.counters["time/url"] = + benchmark::Counter(double(std::size(url_examples)), + benchmark::Counter::kIsIterationInvariantRate | + benchmark::Counter::kInvert); + state.counters["speed"] = benchmark::Counter( + url_examples_bytes, benchmark::Counter::kIsIterationInvariantRate); + state.counters["url/s"] = + benchmark::Counter(double(std::size(url_examples)), + benchmark::Counter::kIsIterationInvariantRate); +} +BENCHMARK(BasicBench_http_parser_just_parse); +#endif // ADA_VARIOUS_COMPETITION_ENABLED + +#if defined(ADA_RUST_VERSION) +#include "competitors/servo-url/servo_url.h" +size_t count_rust_invalid() { + size_t how_many = 0; + for (std::string& url_string : url_examples) { + servo_url::Url* url = + servo_url::parse_url(url_string.c_str(), url_string.length()); + servo_url::free_url(url); + if (!url) { + how_many++; + } + } + return how_many; +} + +// Emilio from Mozilla recommended that using an opaque-pointer will improve the +// performance of this benchmark. It has indeed improved but with the cost of +// validating the output. Reference: +// https://twitter.com/ecbos_/status/1627494441656238082?s=61&t=vCdcfSGWHH056CBdklWfCg +static void BasicBench_ServoUrl(benchmark::State& state) { + // Other benchmarks copy the 'standard url' to a structure. + // We try to mimic the effect. + volatile size_t success = 0; + + for (auto _ : state) { + for (std::string& url_string : url_examples) { + // benchmark::DoNotOptimize is unnecessary and potentially misleading. + const char* url_href = + servo_url::parse_url_to_href(url_string.c_str(), url_string.length()); + if (url_href) { + // if you'd like you could print it: printf("%s\n", url_href); + success++; + servo_url::free_string(url_href); + } + } + } + if (collector.has_events()) { + event_aggregate aggregate{}; + for (size_t i = 0; i < N; i++) { + std::atomic_thread_fence(std::memory_order_acquire); + collector.start(); + for (std::string& url_string : url_examples) { + const char* url_href = servo_url::parse_url_to_href( + url_string.c_str(), url_string.length()); + if (url_href) { + success++; + servo_url::free_string(url_href); + } + } + std::atomic_thread_fence(std::memory_order_release); + event_count allocate_count = collector.end(); + aggregate << allocate_count; + } + (void)success; + state.counters["cycles/url"] = + aggregate.best.cycles() / std::size(url_examples); + state.counters["instructions/url"] = + aggregate.best.instructions() / std::size(url_examples); + state.counters["instructions/cycle"] = + aggregate.best.instructions() / aggregate.best.cycles(); + state.counters["instructions/byte"] = + aggregate.best.instructions() / url_examples_bytes; + state.counters["instructions/ns"] = + aggregate.best.instructions() / aggregate.best.elapsed_ns(); + state.counters["GHz"] = + aggregate.best.cycles() / aggregate.best.elapsed_ns(); + state.counters["ns/url"] = + aggregate.best.elapsed_ns() / std::size(url_examples); + state.counters["cycle/byte"] = aggregate.best.cycles() / url_examples_bytes; + } + + state.counters["time/byte"] = benchmark::Counter( + url_examples_bytes, benchmark::Counter::kIsIterationInvariantRate | + benchmark::Counter::kInvert); + state.counters["time/url"] = + benchmark::Counter(double(std::size(url_examples)), + benchmark::Counter::kIsIterationInvariantRate | + benchmark::Counter::kInvert); + state.counters["speed"] = benchmark::Counter( + url_examples_bytes, benchmark::Counter::kIsIterationInvariantRate); + state.counters["url/s"] = + benchmark::Counter(double(std::size(url_examples)), + benchmark::Counter::kIsIterationInvariantRate); +} +BENCHMARK(BasicBench_ServoUrl); +#endif // ADA_RUST + +int main(int argc, char** argv) { + if (argc > 1 && file_exists(argv[1])) { + init_data(argv[1]); + } else { + init_data(); + } + benchmark::AddCustomContext("ada spec", "Ada follows whatwg/url"); + size_t ada_bad_url = count_ada_invalid(); +#if ADA_url_whatwg_ENABLED + size_t whatwg_bad_url = count_whatwg_invalid(); +#endif +#if defined(ADA_RUST_VERSION) + benchmark::AddCustomContext("rust version ", ADA_RUST_VERSION); + size_t servo_bad_url = count_rust_invalid(); +#endif +#if ADA_CURL_ENABLED + // the curl dependency will depend on the system. + benchmark::AddCustomContext("curl version ", LIBCURL_VERSION); + benchmark::AddCustomContext("curl spec", + "Curl follows RFC3986, not whatwg/url"); + size_t curl_bad_url = count_curl_invalid(); +#else + benchmark::AddCustomContext("curl ", "OMITTED"); +#endif +#if ADA_BOOST_ENABLED + benchmark::AddCustomContext("boost-url spec", + "Boost URL follows RFC3986, not whatwg/url"); + size_t boost_bad_url = count_boosturl_invalid(); +#endif +#if ADA_ZURI_ENABLED + benchmark::AddCustomContext("zuri spec", + "Zuri follows RFC3986, not whatwg/url"); + size_t zuri_bad_url = count_zuri_invalid(); +#else + benchmark::AddCustomContext("zuri ", "OMITTED"); +#endif +#if (__APPLE__ && __aarch64__) || defined(__linux__) + if (!collector.has_events()) { + benchmark::AddCustomContext("performance counters", + "No privileged access (sudo may help)."); + } +#else + if (!collector.has_events()) { + benchmark::AddCustomContext("performance counters", "Unsupported system."); + } +#endif + benchmark::AddCustomContext("input bytes", + std::to_string(size_t(url_examples_bytes))); + benchmark::AddCustomContext("number of URLs", + std::to_string(std::size(url_examples))); + benchmark::AddCustomContext( + "bytes/URL", + std::to_string(url_examples_bytes / std::size(url_examples))); +#if ADA_VARIOUS_COMPETITION_ENABLED + benchmark::AddCustomContext("WARNING", + "BasicBench_urlparser and BasicBench_uriparser " + "do not use a normalized task."); +#endif + if (collector.has_events()) { + benchmark::AddCustomContext("performance counters", "Enabled"); + } + std::stringstream badcounts; + badcounts << "---------------------\n"; + badcounts << "ada---count of bad URLs " << std::to_string(ada_bad_url) + << "\n"; +#if defined(ADA_RUST_VERSION) + badcounts << "servo/url---count of bad URLs " << std::to_string(servo_bad_url) + << "\n"; +#endif +#if ADA_url_whatwg_ENABLED + badcounts << "whatwg---count of bad URLs " + << std::to_string(whatwg_bad_url) << "\n"; +#endif +#if ADA_CURL_ENABLED + badcounts << "curl---count of bad URLs " << std::to_string(curl_bad_url) + << "\n"; +#endif +#if ADA_BOOST_ENABLED + badcounts << "boost-url---count of bad URLs " << std::to_string(boost_bad_url) + << "\n"; +#endif +#if ADA_ZURI_ENABLED + badcounts << "zuri---count of bad URLs " << std::to_string(zuri_bad_url) + << "\n"; +#endif + badcounts << "-------------------------------\n"; + benchmark::AddCustomContext("bad urls", badcounts.str()); + + if (size_t(url_examples_bytes) > 1000000) { + N = 10; + } + + benchmark::Initialize(&argc, argv); + benchmark::RunSpecifiedBenchmarks(); + benchmark::Shutdown(); +} diff --git a/benchmarks/competitors/servo-url/Cargo.lock b/benchmarks/competitors/servo-url/Cargo.lock new file mode 100644 index 000000000..141f37877 --- /dev/null +++ b/benchmarks/competitors/servo-url/Cargo.lock @@ -0,0 +1,83 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "form_urlencoded" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "idna" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + +[[package]] +name = "libc" +version = "0.2.153" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" + +[[package]] +name = "percent-encoding" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" + +[[package]] +name = "servo-url" +version = "0.1.0" +dependencies = [ + "libc", + "url", +] + +[[package]] +name = "tinyvec" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "unicode-bidi" +version = "0.3.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d54675592c1dbefd78cbd98db9bacd89886e1ca50692a0692baefffdeb92dd58" + +[[package]] +name = "unicode-normalization" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "url" +version = "2.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] diff --git a/benchmarks/competitors/servo-url/Cargo.toml b/benchmarks/competitors/servo-url/Cargo.toml new file mode 100644 index 000000000..17aacb42c --- /dev/null +++ b/benchmarks/competitors/servo-url/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "servo-url" +version = "0.1.0" + +[lib] +path = "lib.rs" +crate-type = ["cdylib"] + +[dependencies] +url = "2.5.2" +libc = "0.2" + +[profile.release] +opt-level = 3 +debug = false +lto = true diff --git a/benchmarks/competitors/servo-url/README.md b/benchmarks/competitors/servo-url/README.md new file mode 100644 index 000000000..47a3c7823 --- /dev/null +++ b/benchmarks/competitors/servo-url/README.md @@ -0,0 +1,17 @@ +## Servo URL FFI + +This folder includes FFI bindings for servo/url. + +### Links + +- https://github.com/eqrion/cbindgen/blob/master/docs.md +- https://gist.github.com/zbraniecki/b251714d77ffebbc73c03447f2b2c69f +- https://michael-f-bryan.github.io/rust-ffi-guide/setting_up.html + +### Building + +- Generating cbindgen output + - Install dependencies with `brew install cbindgen` + - Generate with `cbindgen --config cbindgen.toml --crate servo-url --output servo_url.h` +- Building + - Run with `cargo build --release` diff --git a/benchmarks/competitors/servo-url/cbindgen.toml b/benchmarks/competitors/servo-url/cbindgen.toml new file mode 100644 index 000000000..0daad0cf8 --- /dev/null +++ b/benchmarks/competitors/servo-url/cbindgen.toml @@ -0,0 +1,12 @@ +autogen_warning = "/* Warning, this file is autogenerated by cbindgen. Don't modify this manually. */" +include_version = true +braces = "SameLine" +line_length = 100 +tab_width = 2 +language = "C++" +namespaces = ["servo_url"] +include_guard = "servo_url_ffi_h" + +[parse] +parse_deps = true +include = ["url"] diff --git a/benchmarks/competitors/servo-url/lib.rs b/benchmarks/competitors/servo-url/lib.rs new file mode 100644 index 000000000..700ebf37e --- /dev/null +++ b/benchmarks/competitors/servo-url/lib.rs @@ -0,0 +1,44 @@ +use url::Url; +use std::slice; +use libc::{c_char, size_t}; + +extern crate url; +extern crate libc; + +#[no_mangle] +pub unsafe extern "C" fn parse_url(raw_input: *const c_char, raw_input_length: size_t) -> *mut Url { + let input = std::str::from_utf8_unchecked(slice::from_raw_parts(raw_input as *const u8, raw_input_length)); + // This code would assume that the URL is parsed successfully: + // let result = Url::parse(input).unwrap(); + // Box::into_raw(Box::new(result)) + // But we might get an invalid input. So we want to return null in case of + // error. We can do it in such a manner: + match Url::parse(input) { + Ok(result) => Box::into_raw(Box::new(result)), + Err(_) => std::ptr::null_mut(), + } +} + +#[no_mangle] +pub unsafe extern "C" fn parse_url_to_href(raw_input: *const c_char, raw_input_length: size_t) -> *const c_char { + let input = std::str::from_utf8_unchecked(slice::from_raw_parts(raw_input as *const u8, raw_input_length)); + match Url::parse(input) { + Ok(result) => std::ffi::CString::new(result.as_str()).unwrap().into_raw(), + Err(_) => std::ptr::null_mut(), + } +} + +#[no_mangle] +pub unsafe extern "C" fn free_url(raw: *mut Url) { + if raw.is_null() { + return; + } + + drop(Box::from_raw(raw)) +} + +#[no_mangle] +pub unsafe extern fn free_string(ptr: *const c_char) { + // Take the ownership back to rust and drop the owner + let _ = std::ffi::CString::from_raw(ptr as *mut _); +} diff --git a/benchmarks/competitors/servo-url/servo_url.h b/benchmarks/competitors/servo-url/servo_url.h new file mode 100644 index 000000000..406d5fc2f --- /dev/null +++ b/benchmarks/competitors/servo-url/servo_url.h @@ -0,0 +1,30 @@ +#ifndef servo_url_ffi_h +#define servo_url_ffi_h + +/* This file was modified manually. */ + +#include +#include +#include +#include +#include + +namespace servo_url { + +/// A parsed URL record. +struct Url; + +extern "C" { + +Url *parse_url(const char *raw_input, size_t raw_input_length); + +void free_url(Url *raw); + +const char *parse_url_to_href(const char *raw_input, size_t raw_input_length); + +void free_string(const char *); +} // extern "C" + +} // namespace servo_url + +#endif // servo_url_ffi_h diff --git a/benchmarks/model_bench.cpp b/benchmarks/model_bench.cpp new file mode 100644 index 000000000..e9d1501d9 --- /dev/null +++ b/benchmarks/model_bench.cpp @@ -0,0 +1,280 @@ +#include +#include +#include +#include +#include +#include + +#include "ada.h" +#include "performancecounters/event_counter.h" +event_collector collector; + +bool file_exists(const char *filename) { + namespace fs = std::filesystem; + std::filesystem::path f{filename}; + if (std::filesystem::exists(filename)) { + return true; + } else { + std::cout << " file missing: " << filename << std::endl; + return false; + } +} + +std::string read_file(std::string filename) { + constexpr size_t read_size = 4096; + std::ifstream stream(filename.c_str()); + stream.exceptions(std::ios_base::badbit); + std::string out; + std::string buf(read_size, '\0'); + while (stream.read(&buf[0], read_size)) { + out.append(buf, 0, size_t(stream.gcount())); + } + out.append(buf, 0, size_t(stream.gcount())); + return out; +} + +std::vector split_string(const std::string &str) { + auto result = std::vector{}; + std::stringstream ss{str}; + for (std::string line; std::getline(ss, line, '\n');) { + std::string_view view = line; + // Some parsers like boost/url will refuse to parse a URL with trailing + // whitespace. + while (!view.empty() && std::isspace(view.back())) { + view.remove_suffix(1); + } + while (!view.empty() && std::isspace(view.front())) { + view.remove_prefix(1); + } + if (!view.empty()) { + result.emplace_back(view); + } + } + return result; +} + +struct stat_numbers { + std::string url_string{}; + std::string href{}; + ada::url_components components{}; + event_aggregate counters{}; + bool is_valid = true; + bool has_port = false; + bool has_credentials = false; + bool has_fragment = false; + bool has_search = false; +}; + +size_t count_ascii_bytes(const std::string &s) { + size_t counter = 0; + for (uint8_t c : s) { + if (c < 128) { + counter++; + } + } + return counter; +} + +template +std::vector collect_values( + const std::vector &url_examples, size_t trials) { + std::vector numbers(url_examples.size()); + for (size_t i = 0; i < url_examples.size(); i++) { + numbers[i].url_string = url_examples[i]; + ada::result url = ada::parse(url_examples[i]); + if (url) { + numbers[i].is_valid = true; + numbers[i].href = url->get_href(); + numbers[i].components = url->get_components(); + numbers[i].has_port = url->has_port(); + numbers[i].has_credentials = url->has_credentials(); + numbers[i].has_fragment = url->has_hash(); + numbers[i].has_search = url->has_search(); + } else { + numbers[i].is_valid = false; + } + } + volatile size_t href_size = 0; + for (size_t i = 0; i < trials; i++) { + for (stat_numbers &n : numbers) { + std::atomic_thread_fence(std::memory_order_acquire); + collector.start(); + ada::result url = ada::parse(n.url_string); + if (url) { + href_size += url->get_href().size(); + } + std::atomic_thread_fence(std::memory_order_release); + event_count allocate_count = collector.end(); + n.counters << allocate_count; + } + } + return numbers; +} + +#ifdef ADA_URL_FILE +const char *default_file = ADA_URL_FILE; +#else +const char *default_file = nullptr; +#endif + +std::vector init_data(const char *input = default_file) { + std::vector input_urls; + if (input == nullptr) { + return input_urls; + } + + if (!file_exists(input)) { + std::cout << "File not found !" << input << std::endl; + return input_urls; + } else { + std::cout << "# Loading " << input << std::endl; + input_urls = split_string(read_file(input)); + } + return input_urls; +} + +void print(const stat_numbers &n) { + std::cout << std::setw(15) << n.url_string.size() << ","; + std::cout << std::setw(15) << n.counters.best.cycles() << "," << std::setw(15) + << size_t(n.counters.cycles()) << ","; + std::cout << std::setw(15) << n.counters.best.instructions() << "," + << std::setw(15) << n.counters.instructions() << ","; + std::cout << std::setw(15) << n.is_valid << ","; + + // hash size + + std::cout << std::setw(15) << n.href.size() << ","; + size_t end = n.href.size(); + if (n.components.hash_start != ada::url_components::omitted) { + std::cout << std::setw(15) << (end - n.components.hash_start) << ","; + end = n.components.hash_start; + } else { + std::cout << std::setw(15) << 0 << ","; + } + // search size + if (n.components.search_start != ada::url_components::omitted) { + std::cout << std::setw(15) << (end - n.components.search_start) << ","; + end = n.components.search_start; + } else { + std::cout << std::setw(15) << 0 << ","; + } + // path size + std::cout << std::setw(15) << (end - n.components.pathname_start) << ","; + end = n.components.pathname_start; + // port size + std::cout << std::setw(15) << (end - n.components.host_end) << ","; + end = n.components.host_end; + // host size + std::cout << std::setw(15) << (end - n.components.host_start) << ","; + end = n.components.host_start; + // user/pass size + std::cout << std::setw(15) << (end - n.components.protocol_end) << ","; + end = n.components.protocol_end; + // protocol type + ada::result url = + ada::parse(n.url_string); + if (url) { + std::cout << std::setw(15) << int(url->type); + } else { + std::cout << std::setw(15) << -1; + } + std::cout << ","; + std::cout << std::setw(15) << n.has_port << ","; + std::cout << std::setw(15) << n.has_credentials << ","; + std::cout << std::setw(15) << n.has_fragment << ","; + std::cout << std::setw(15) << n.has_search << ","; + std::cout << std::setw(15) + << (n.url_string.size() - count_ascii_bytes(n.url_string)) << ","; + std::cout << std::setw(15) << (n.href.size() - count_ascii_bytes(n.href)) + << ","; + std::cout << std::setw(15) + << (count_ascii_bytes(n.url_string) == n.url_string.size()) << ","; + std::cout << std::setw(15) << (n.href == n.url_string); +} +void print(const std::vector numbers) { + std::cout << std::setw(15) << "input_size" + << ","; + std::cout << std::setw(15) << "best_cycles" + << ","; + std::cout << std::setw(15) << "mean_cycles" + << ","; + std::cout << std::setw(15) << "best_instr" + << ","; + std::cout << std::setw(15) << "mean_instr" + << ","; + std::cout << std::setw(15) << "is_valid" + << ","; + std::cout << std::setw(15) << "href_size" + << ","; + std::cout << std::setw(15) << "hash_size" + << ","; + std::cout << std::setw(15) << "search_size" + << ","; + std::cout << std::setw(15) << "path_size" + << ","; + std::cout << std::setw(15) << "port_size" + << ","; + std::cout << std::setw(15) << "host_size" + << ","; + std::cout << std::setw(15) << "credential_size" + << ","; + std::cout << std::setw(15) << "protocol_type" + << ","; + std::cout << std::setw(15) << "has_port" + << ","; + std::cout << std::setw(15) << "has_authority" + << ","; + std::cout << std::setw(15) << "has_fragment" + << ","; + std::cout << std::setw(15) << "has_search" + << ","; + std::cout << std::setw(15) << "non_ascii_bytes" + << ","; + std::cout << std::setw(15) << "href_non_ascii_bytes" + << ","; + std::cout << std::setw(15) << "is_ascii" + << ","; + std::cout << std::setw(15) << "input_is_href"; + + std::cout << std::endl; + + for (const stat_numbers &n : numbers) { + print(n); + std::cout << std::endl; + } +} + +int main(int argc, char **argv) { + std::vector input_urls; + if (argc == 1) { + input_urls = init_data(); + } else { + input_urls = init_data(argv[1]); + } + if (input_urls.empty()) { + std::cout << "pass the path to a file containing a list of URL (one per " + "line) as a parameter." + << std::endl; + return EXIT_FAILURE; + } + if (!collector.has_events()) { + std::cout << "We require access to performance counters. (Try sudo.)" + << std::endl; + return EXIT_FAILURE; + } + std::string empty; + // We always start with a null URL for calibration. + input_urls.insert(input_urls.begin(), empty); + bool use_ada_url = (getenv("USE_URL") != nullptr); + size_t trials = 100; + std::cout << "# trials " << trials << std::endl; + if (use_ada_url) { + std::cout << "# ada::url" << std::endl; + print(collect_values(input_urls, trials)); + } else { + std::cout << "# ada::url_aggregator" << std::endl; + print(collect_values(input_urls, trials)); + } + + return EXIT_SUCCESS; +} diff --git a/benchmarks/percent_encode.cpp b/benchmarks/percent_encode.cpp new file mode 100644 index 000000000..97f969f54 --- /dev/null +++ b/benchmarks/percent_encode.cpp @@ -0,0 +1,266 @@ +#include + +#include "ada.h" +#include "ada/character_sets.h" +#include "ada/unicode.h" +#include "performancecounters/event_counter.h" +event_collector collector; +size_t N = 1000; + +#include + +std::string examples[] = {"á|", "other:9818274x1!!", + "ref=web-twc-ao-gbl-adsinfo&utm_source=twc&utm_", + "connect_timeout=10&application_name=myapp"}; + +void init_data() {} + +double examples_bytes = []() -> double { + size_t bytes{0}; + for (std::string& url_string : examples) { + bytes += url_string.size(); + } + return double(bytes); +}(); + +static void Fragment(benchmark::State& state) { + for (auto _ : state) { + for (std::string& url_string : examples) { + benchmark::DoNotOptimize(ada::unicode::percent_encode( + url_string, ada::character_sets::FRAGMENT_PERCENT_ENCODE)); + } + } + if (collector.has_events()) { + event_aggregate aggregate{}; + for (size_t i = 0; i < N; i++) { + std::atomic_thread_fence(std::memory_order_acquire); + collector.start(); + for (std::string& url_string : examples) { + benchmark::DoNotOptimize(ada::unicode::percent_encode( + url_string, ada::character_sets::FRAGMENT_PERCENT_ENCODE)); + } + std::atomic_thread_fence(std::memory_order_release); + event_count allocate_count = collector.end(); + aggregate << allocate_count; + } + state.counters["instructions/url"] = + aggregate.best.instructions() / std::size(examples); + state.counters["instructions/cycle"] = + aggregate.total.instructions() / aggregate.total.cycles(); + state.counters["instructions/byte"] = + aggregate.best.instructions() / examples_bytes; + state.counters["GHz"] = + aggregate.total.cycles() / aggregate.total.elapsed_ns(); + } + state.counters["time/byte"] = benchmark::Counter( + examples_bytes, benchmark::Counter::kIsIterationInvariantRate | + benchmark::Counter::kInvert); + state.counters["time/url"] = + benchmark::Counter(double(std::size(examples)), + benchmark::Counter::kIsIterationInvariantRate | + benchmark::Counter::kInvert); + state.counters["speed"] = benchmark::Counter( + examples_bytes, benchmark::Counter::kIsIterationInvariantRate); + state.counters["url/s"] = + benchmark::Counter(double(std::size(examples)), + benchmark::Counter::kIsIterationInvariantRate); +} +BENCHMARK(Fragment); + +static void Query(benchmark::State& state) { + for (auto _ : state) { + for (std::string& url_string : examples) { + benchmark::DoNotOptimize(ada::unicode::percent_encode( + url_string, ada::character_sets::QUERY_PERCENT_ENCODE)); + } + } + if (collector.has_events()) { + event_aggregate aggregate{}; + for (size_t i = 0; i < N; i++) { + std::atomic_thread_fence(std::memory_order_acquire); + collector.start(); + for (std::string& url_string : examples) { + benchmark::DoNotOptimize(ada::unicode::percent_encode( + url_string, ada::character_sets::QUERY_PERCENT_ENCODE)); + } + std::atomic_thread_fence(std::memory_order_release); + event_count allocate_count = collector.end(); + aggregate << allocate_count; + } + state.counters["instructions/url"] = + aggregate.best.instructions() / std::size(examples); + state.counters["instructions/cycle"] = + aggregate.total.instructions() / aggregate.total.cycles(); + state.counters["instructions/byte"] = + aggregate.best.instructions() / examples_bytes; + state.counters["GHz"] = + aggregate.total.cycles() / aggregate.total.elapsed_ns(); + } + state.counters["time/byte"] = benchmark::Counter( + examples_bytes, benchmark::Counter::kIsIterationInvariantRate | + benchmark::Counter::kInvert); + state.counters["time/url"] = + benchmark::Counter(double(std::size(examples)), + benchmark::Counter::kIsIterationInvariantRate | + benchmark::Counter::kInvert); + state.counters["speed"] = benchmark::Counter( + examples_bytes, benchmark::Counter::kIsIterationInvariantRate); + state.counters["url/s"] = + benchmark::Counter(double(std::size(examples)), + benchmark::Counter::kIsIterationInvariantRate); +} +BENCHMARK(Query); + +static void SpecialQuery(benchmark::State& state) { + for (auto _ : state) { + for (std::string& url_string : examples) { + benchmark::DoNotOptimize(ada::unicode::percent_encode( + url_string, ada::character_sets::FRAGMENT_PERCENT_ENCODE)); + } + } + if (collector.has_events()) { + event_aggregate aggregate{}; + for (size_t i = 0; i < N; i++) { + std::atomic_thread_fence(std::memory_order_acquire); + collector.start(); + for (std::string& url_string : examples) { + benchmark::DoNotOptimize(ada::unicode::percent_encode( + url_string, ada::character_sets::SPECIAL_QUERY_PERCENT_ENCODE)); + } + std::atomic_thread_fence(std::memory_order_release); + event_count allocate_count = collector.end(); + aggregate << allocate_count; + } + state.counters["instructions/url"] = + aggregate.best.instructions() / std::size(examples); + state.counters["instructions/cycle"] = + aggregate.total.instructions() / aggregate.total.cycles(); + state.counters["instructions/byte"] = + aggregate.best.instructions() / examples_bytes; + state.counters["GHz"] = + aggregate.total.cycles() / aggregate.total.elapsed_ns(); + } + state.counters["time/byte"] = benchmark::Counter( + examples_bytes, benchmark::Counter::kIsIterationInvariantRate | + benchmark::Counter::kInvert); + state.counters["time/url"] = + benchmark::Counter(double(std::size(examples)), + benchmark::Counter::kIsIterationInvariantRate | + benchmark::Counter::kInvert); + state.counters["speed"] = benchmark::Counter( + examples_bytes, benchmark::Counter::kIsIterationInvariantRate); + state.counters["url/s"] = + benchmark::Counter(double(std::size(examples)), + benchmark::Counter::kIsIterationInvariantRate); +} +BENCHMARK(SpecialQuery); + +static void UserInfo(benchmark::State& state) { + for (auto _ : state) { + for (std::string& url_string : examples) { + benchmark::DoNotOptimize(ada::unicode::percent_encode( + url_string, ada::character_sets::USERINFO_PERCENT_ENCODE)); + } + } + if (collector.has_events()) { + event_aggregate aggregate{}; + for (size_t i = 0; i < N; i++) { + std::atomic_thread_fence(std::memory_order_acquire); + collector.start(); + for (std::string& url_string : examples) { + benchmark::DoNotOptimize(ada::unicode::percent_encode( + url_string, ada::character_sets::USERINFO_PERCENT_ENCODE)); + } + std::atomic_thread_fence(std::memory_order_release); + event_count allocate_count = collector.end(); + aggregate << allocate_count; + } + state.counters["instructions/url"] = + aggregate.best.instructions() / std::size(examples); + state.counters["instructions/cycle"] = + aggregate.total.instructions() / aggregate.total.cycles(); + state.counters["instructions/byte"] = + aggregate.best.instructions() / examples_bytes; + state.counters["GHz"] = + aggregate.total.cycles() / aggregate.total.elapsed_ns(); + } + state.counters["time/byte"] = benchmark::Counter( + examples_bytes, benchmark::Counter::kIsIterationInvariantRate | + benchmark::Counter::kInvert); + state.counters["time/url"] = + benchmark::Counter(double(std::size(examples)), + benchmark::Counter::kIsIterationInvariantRate | + benchmark::Counter::kInvert); + state.counters["speed"] = benchmark::Counter( + examples_bytes, benchmark::Counter::kIsIterationInvariantRate); + state.counters["url/s"] = + benchmark::Counter(double(std::size(examples)), + benchmark::Counter::kIsIterationInvariantRate); +} +BENCHMARK(UserInfo); + +static void C0Control(benchmark::State& state) { + for (auto _ : state) { + for (std::string& url_string : examples) { + benchmark::DoNotOptimize(ada::unicode::percent_encode( + url_string, ada::character_sets::C0_CONTROL_PERCENT_ENCODE)); + } + } + if (collector.has_events()) { + event_aggregate aggregate{}; + for (size_t i = 0; i < N; i++) { + std::atomic_thread_fence(std::memory_order_acquire); + collector.start(); + for (std::string& url_string : examples) { + benchmark::DoNotOptimize(ada::unicode::percent_encode( + url_string, ada::character_sets::C0_CONTROL_PERCENT_ENCODE)); + } + std::atomic_thread_fence(std::memory_order_release); + event_count allocate_count = collector.end(); + aggregate << allocate_count; + } + state.counters["instructions/url"] = + aggregate.best.instructions() / std::size(examples); + state.counters["instructions/cycle"] = + aggregate.total.instructions() / aggregate.total.cycles(); + state.counters["instructions/byte"] = + aggregate.best.instructions() / examples_bytes; + state.counters["GHz"] = + aggregate.total.cycles() / aggregate.total.elapsed_ns(); + } + state.counters["time/byte"] = benchmark::Counter( + examples_bytes, benchmark::Counter::kIsIterationInvariantRate | + benchmark::Counter::kInvert); + state.counters["time/url"] = + benchmark::Counter(double(std::size(examples)), + benchmark::Counter::kIsIterationInvariantRate | + benchmark::Counter::kInvert); + state.counters["speed"] = benchmark::Counter( + examples_bytes, benchmark::Counter::kIsIterationInvariantRate); + state.counters["url/s"] = + benchmark::Counter(double(std::size(examples)), + benchmark::Counter::kIsIterationInvariantRate); +} +BENCHMARK(C0Control); + +int main(int argc, char** argv) { +#if defined(ADA_RUST_VERSION) + benchmark::AddCustomContext("rust version ", ADA_RUST_VERSION); +#endif +#if (__APPLE__ && __aarch64__) || defined(__linux__) + if (!collector.has_events()) { + benchmark::AddCustomContext("performance counters", + "No privileged access (sudo may help)."); + } +#else + if (!collector.has_events()) { + benchmark::AddCustomContext("performance counters", "Unsupported system."); + } +#endif + if (collector.has_events()) { + benchmark::AddCustomContext("performance counters", "Enabled"); + } + benchmark::Initialize(&argc, argv); + benchmark::RunSpecifiedBenchmarks(); + benchmark::Shutdown(); +} diff --git a/benchmarks/performancecounters/apple_arm_events.h b/benchmarks/performancecounters/apple_arm_events.h new file mode 100644 index 000000000..608817277 --- /dev/null +++ b/benchmarks/performancecounters/apple_arm_events.h @@ -0,0 +1,1110 @@ +/* clang-format off */ + +// Original design from: +// ============================================================================= +// XNU kperf/kpc +// Available for 64-bit Intel/Apple Silicon, macOS/iOS, with root privileges +// +// References: +// +// XNU source (since xnu 2422.1.72): +// https://github.com/apple/darwin-xnu/blob/main/osfmk/kern/kpc.h +// https://github.com/apple/darwin-xnu/blob/main/bsd/kern/kern_kpc.c +// +// Lightweight PET (Profile Every Thread, since xnu 3789.1.32): +// https://github.com/apple/darwin-xnu/blob/main/osfmk/kperf/pet.c +// https://github.com/apple/darwin-xnu/blob/main/osfmk/kperf/kperf_kpc.c +// +// System Private frameworks (since macOS 10.11, iOS 8.0): +// /System/Library/PrivateFrameworks/kperf.framework +// /System/Library/PrivateFrameworks/kperfdata.framework +// +// Xcode framework (since Xcode 7.0): +// /Applications/Xcode.app/Contents/SharedFrameworks/DVTInstrumentsFoundation.framework +// +// CPU database (plist files) +// macOS (since macOS 10.11): +// /usr/share/kpep/.plist +// iOS (copied from Xcode, since iOS 10.0, Xcode 8.0): +// /Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform +// /DeviceSupport//DeveloperDiskImage.dmg/usr/share/kpep/.plist +// +// +// Created by YaoYuan on 2021. +// Released into the public domain (unlicense.org). +// ============================================================================= + +#ifndef M1CYCLES_H +#define M1CYCLES_H + +#include +#include +#include +#include +#include + +#include // for dlopen() and dlsym() +#include // for mach_absolute_time() +#include // for kdebug trace decode +#include // for sysctl() +#include // for usleep() + +struct performance_counters { + double cycles; + double branches; + double missed_branches; + double instructions; + performance_counters(uint64_t c, uint64_t b, uint64_t m, uint64_t i) + : cycles(c), branches(b), missed_branches(m), instructions(i) {} + performance_counters(double c, double b, double m, double i) + : cycles(c), branches(b), missed_branches(m), instructions(i) {} + performance_counters(double init) + : cycles(init), + branches(init), + missed_branches(init), + instructions(init) {} + + inline performance_counters &operator-=(const performance_counters &other) { + cycles -= other.cycles; + branches -= other.branches; + missed_branches -= other.missed_branches; + instructions -= other.instructions; + return *this; + } + inline performance_counters &min(const performance_counters &other) { + cycles = other.cycles < cycles ? other.cycles : cycles; + branches = other.branches < branches ? other.branches : branches; + missed_branches = other.missed_branches < missed_branches + ? other.missed_branches + : missed_branches; + instructions = + other.instructions < instructions ? other.instructions : instructions; + return *this; + } + inline performance_counters &operator+=(const performance_counters &other) { + cycles += other.cycles; + branches += other.branches; + missed_branches += other.missed_branches; + instructions += other.instructions; + return *this; + } + + inline performance_counters &operator/=(double numerator) { + cycles /= numerator; + branches /= numerator; + missed_branches /= numerator; + instructions /= numerator; + return *this; + } +}; + +inline performance_counters operator-(const performance_counters &a, + const performance_counters &b) { + return performance_counters(a.cycles - b.cycles, a.branches - b.branches, + a.missed_branches - b.missed_branches, + a.instructions - b.instructions); +} + +typedef float f32; +typedef double f64; +typedef int8_t i8; +typedef uint8_t u8; +typedef int16_t i16; +typedef uint16_t u16; +typedef int32_t i32; +typedef uint32_t u32; +typedef int64_t i64; +typedef uint64_t u64; +typedef size_t usize; + +// ----------------------------------------------------------------------------- +// header (reverse engineered) +// This framework wraps some sysctl calls to communicate with the kpc in kernel. +// Most functions requires root privileges, or process is "blessed". +// ----------------------------------------------------------------------------- + +// Cross-platform class constants. +#define KPC_CLASS_FIXED (0) +#define KPC_CLASS_CONFIGURABLE (1) +#define KPC_CLASS_POWER (2) +#define KPC_CLASS_RAWPMU (3) + +// Cross-platform class mask constants. +#define KPC_CLASS_FIXED_MASK (1u << KPC_CLASS_FIXED) // 1 +#define KPC_CLASS_CONFIGURABLE_MASK (1u << KPC_CLASS_CONFIGURABLE) // 2 +#define KPC_CLASS_POWER_MASK (1u << KPC_CLASS_POWER) // 4 +#define KPC_CLASS_RAWPMU_MASK (1u << KPC_CLASS_RAWPMU) // 8 + +// PMU version constants. +#define KPC_PMU_ERROR (0) // Error +#define KPC_PMU_INTEL_V3 (1) // Intel +#define KPC_PMU_ARM_APPLE (2) // ARM64 +#define KPC_PMU_INTEL_V2 (3) // Old Intel +#define KPC_PMU_ARM_V2 (4) // Old ARM + +// The maximum number of counters we could read from every class in one go. +// ARMV7: FIXED: 1, CONFIGURABLE: 4 +// ARM32: FIXED: 2, CONFIGURABLE: 6 +// ARM64: FIXED: 2, CONFIGURABLE: CORE_NCTRS - FIXED (6 or 8) +// x86: 32 +#define KPC_MAX_COUNTERS 32 + +// Bits for defining what to do on an action. +// Defined in https://github.com/apple/darwin-xnu/blob/main/osfmk/kperf/action.h +#define KPERF_SAMPLER_TH_INFO (1U << 0) +#define KPERF_SAMPLER_TH_SNAPSHOT (1U << 1) +#define KPERF_SAMPLER_KSTACK (1U << 2) +#define KPERF_SAMPLER_USTACK (1U << 3) +#define KPERF_SAMPLER_PMC_THREAD (1U << 4) +#define KPERF_SAMPLER_PMC_CPU (1U << 5) +#define KPERF_SAMPLER_PMC_CONFIG (1U << 6) +#define KPERF_SAMPLER_MEMINFO (1U << 7) +#define KPERF_SAMPLER_TH_SCHEDULING (1U << 8) +#define KPERF_SAMPLER_TH_DISPATCH (1U << 9) +#define KPERF_SAMPLER_TK_SNAPSHOT (1U << 10) +#define KPERF_SAMPLER_SYS_MEM (1U << 11) +#define KPERF_SAMPLER_TH_INSCYC (1U << 12) +#define KPERF_SAMPLER_TK_INFO (1U << 13) + +// Maximum number of kperf action ids. +#define KPERF_ACTION_MAX (32) + +// Maximum number of kperf timer ids. +#define KPERF_TIMER_MAX (8) + +// x86/arm config registers are 64-bit +typedef u64 kpc_config_t; + +/// Print current CPU identification string to the buffer (same as snprintf), +/// such as "cpu_7_8_10b282dc_46". This string can be used to locate the PMC +/// database in /usr/share/kpep. +/// @return string's length, or negative value if error occurs. +/// @note This method does not requires root privileges. +/// @details sysctl get(hw.cputype), get(hw.cpusubtype), +/// get(hw.cpufamily), get(machdep.cpu.model) +static int (*kpc_cpu_string)(char *buf, usize buf_size); + +/// Get the version of KPC that's being run. +/// @return See `PMU version constants` above. +/// @details sysctl get(kpc.pmu_version) +static u32 (*kpc_pmu_version)(void); + +/// Get running PMC classes. +/// @return See `class mask constants` above, +/// 0 if error occurs or no class is set. +/// @details sysctl get(kpc.counting) +static u32 (*kpc_get_counting)(void); + +/// Set PMC classes to enable counting. +/// @param classes See `class mask constants` above, set 0 to shutdown counting. +/// @return 0 for success. +/// @details sysctl set(kpc.counting) +static int (*kpc_set_counting)(u32 classes); + +/// Get running PMC classes for current thread. +/// @return See `class mask constants` above, +/// 0 if error occurs or no class is set. +/// @details sysctl get(kpc.thread_counting) +static u32 (*kpc_get_thread_counting)(void); + +/// Set PMC classes to enable counting for current thread. +/// @param classes See `class mask constants` above, set 0 to shutdown counting. +/// @return 0 for success. +/// @details sysctl set(kpc.thread_counting) +static int (*kpc_set_thread_counting)(u32 classes); + +/// Get how many config registers there are for a given mask. +/// For example: Intel may returns 1 for `KPC_CLASS_FIXED_MASK`, +/// returns 4 for `KPC_CLASS_CONFIGURABLE_MASK`. +/// @param classes See `class mask constants` above. +/// @return 0 if error occurs or no class is set. +/// @note This method does not requires root privileges. +/// @details sysctl get(kpc.config_count) +static u32 (*kpc_get_config_count)(u32 classes); + +/// Get config registers. +/// @param classes see `class mask constants` above. +/// @param config Config buffer to receive values, should not smaller than +/// kpc_get_config_count(classes) * sizeof(kpc_config_t). +/// @return 0 for success. +/// @details sysctl get(kpc.config_count), get(kpc.config) +static int (*kpc_get_config)(u32 classes, kpc_config_t *config); + +/// Set config registers. +/// @param classes see `class mask constants` above. +/// @param config Config buffer, should not smaller than +/// kpc_get_config_count(classes) * sizeof(kpc_config_t). +/// @return 0 for success. +/// @details sysctl get(kpc.config_count), set(kpc.config) +static int (*kpc_set_config)(u32 classes, kpc_config_t *config); + +/// Get how many counters there are for a given mask. +/// For example: Intel may returns 3 for `KPC_CLASS_FIXED_MASK`, +/// returns 4 for `KPC_CLASS_CONFIGURABLE_MASK`. +/// @param classes See `class mask constants` above. +/// @note This method does not requires root privileges. +/// @details sysctl get(kpc.counter_count) +static u32 (*kpc_get_counter_count)(u32 classes); + +/// Get counter accumulations. +/// If `all_cpus` is true, the buffer count should not smaller than +/// (cpu_count * counter_count). Otherwise, the buffer count should not smaller +/// than (counter_count). +/// @see kpc_get_counter_count(), kpc_cpu_count(). +/// @param all_cpus true for all CPUs, false for current cpu. +/// @param classes See `class mask constants` above. +/// @param curcpu A pointer to receive current cpu id, can be NULL. +/// @param buf Buffer to receive counter's value. +/// @return 0 for success. +/// @details sysctl get(hw.ncpu), get(kpc.counter_count), get(kpc.counters) +static int (*kpc_get_cpu_counters)(bool all_cpus, u32 classes, int *curcpu, + u64 *buf); + +/// Get counter accumulations for current thread. +/// @param tid Thread id, should be 0. +/// @param buf_count The number of buf's elements (not bytes), +/// should not smaller than kpc_get_counter_count(). +/// @param buf Buffer to receive counter's value. +/// @return 0 for success. +/// @details sysctl get(kpc.thread_counters) +static int (*kpc_get_thread_counters)(u32 tid, u32 buf_count, u64 *buf); + +/// Acquire/release the counters used by the Power Manager. +/// @param val 1:acquire, 0:release +/// @return 0 for success. +/// @details sysctl set(kpc.force_all_ctrs) +static int (*kpc_force_all_ctrs_set)(int val); + +/// Get the state of all_ctrs. +/// @return 0 for success. +/// @details sysctl get(kpc.force_all_ctrs) +static int (*kpc_force_all_ctrs_get)(int *val_out); + +/// Set number of actions, should be `KPERF_ACTION_MAX`. +/// @details sysctl set(kperf.action.count) +static int (*kperf_action_count_set)(u32 count); + +/// Get number of actions. +/// @details sysctl get(kperf.action.count) +static int (*kperf_action_count_get)(u32 *count); + +/// Set what to sample when a trigger fires an action, e.g. +/// `KPERF_SAMPLER_PMC_CPU`. +/// @details sysctl set(kperf.action.samplers) +static int (*kperf_action_samplers_set)(u32 actionid, u32 sample); + +/// Get what to sample when a trigger fires an action. +/// @details sysctl get(kperf.action.samplers) +static int (*kperf_action_samplers_get)(u32 actionid, u32 *sample); + +/// Apply a task filter to the action, -1 to disable filter. +/// @details sysctl set(kperf.action.filter_by_task) +static int (*kperf_action_filter_set_by_task)(u32 actionid, i32 port); + +/// Apply a pid filter to the action, -1 to disable filter. +/// @details sysctl set(kperf.action.filter_by_pid) +static int (*kperf_action_filter_set_by_pid)(u32 actionid, i32 pid); + +/// Set number of time triggers, should be `KPERF_TIMER_MAX`. +/// @details sysctl set(kperf.timer.count) +static int (*kperf_timer_count_set)(u32 count); + +/// Get number of time triggers. +/// @details sysctl get(kperf.timer.count) +static int (*kperf_timer_count_get)(u32 *count); + +/// Set timer number and period. +/// @details sysctl set(kperf.timer.period) +static int (*kperf_timer_period_set)(u32 actionid, u64 tick); + +/// Get timer number and period. +/// @details sysctl get(kperf.timer.period) +static int (*kperf_timer_period_get)(u32 actionid, u64 *tick); + +/// Set timer number and actionid. +/// @details sysctl set(kperf.timer.action) +static int (*kperf_timer_action_set)(u32 actionid, u32 timerid); + +/// Get timer number and actionid. +/// @details sysctl get(kperf.timer.action) +static int (*kperf_timer_action_get)(u32 actionid, u32 *timerid); + +/// Set which timer ID does PET (Profile Every Thread). +/// @details sysctl set(kperf.timer.pet_timer) +static int (*kperf_timer_pet_set)(u32 timerid); + +/// Get which timer ID does PET (Profile Every Thread). +/// @details sysctl get(kperf.timer.pet_timer) +static int (*kperf_timer_pet_get)(u32 *timerid); + +/// Enable or disable sampling. +/// @details sysctl set(kperf.sampling) +static int (*kperf_sample_set)(u32 enabled); + +/// Get is currently sampling. +/// @details sysctl get(kperf.sampling) +static int (*kperf_sample_get)(u32 *enabled); + +/// Reset kperf: stop sampling, kdebug, timers and actions. +/// @return 0 for success. +static int (*kperf_reset)(void); + +/// Nanoseconds to CPU ticks. +static u64 (*kperf_ns_to_ticks)(u64 ns); + +/// CPU ticks to nanoseconds. +static u64 (*kperf_ticks_to_ns)(u64 ticks); + +/// CPU ticks frequency (mach_absolute_time). +static u64 (*kperf_tick_frequency)(void); + +/// Get lightweight PET mode (not in kperf.framework). +static int kperf_lightweight_pet_get(u32 *enabled) { + if (!enabled) return -1; + usize size = 4; + return sysctlbyname("kperf.lightweight_pet", enabled, &size, NULL, 0); +} + +/// Set lightweight PET mode (not in kperf.framework). +static int kperf_lightweight_pet_set(u32 enabled) { + return sysctlbyname("kperf.lightweight_pet", NULL, NULL, &enabled, 4); +} + +// ----------------------------------------------------------------------------- +// header (reverse engineered) +// This framework provides some functions to access the local CPU database. +// These functions do not require root privileges. +// ----------------------------------------------------------------------------- + +// KPEP CPU architecture constants. +#define KPEP_ARCH_I386 0 +#define KPEP_ARCH_X86_64 1 +#define KPEP_ARCH_ARM 2 +#define KPEP_ARCH_ARM64 3 + +/// KPEP event (size: 48/28 bytes on 64/32 bit OS) +typedef struct kpep_event { + const char *name; ///< Unique name of a event, such as "INST_RETIRED.ANY". + const char *description; ///< Description for this event. + const char *errata; ///< Errata, currently NULL. + const char *alias; ///< Alias name, such as "Instructions", "Cycles". + const char *fallback; ///< Fallback event name for fixed counter. + u32 mask; + u8 number; + u8 umask; + u8 reserved; + u8 is_fixed; +} kpep_event; + +/// KPEP database (size: 144/80 bytes on 64/32 bit OS) +typedef struct kpep_db { + const char *name; ///< Database name, such as "haswell". + const char *cpu_id; ///< Plist name, such as "cpu_7_8_10b282dc". + const char *marketing_name; ///< Marketing name, such as "Intel Haswell". + void *plist_data; ///< Plist data (CFDataRef), currently NULL. + void *event_map; ///< All events (CFDict). + kpep_event + *event_arr; ///< Event struct buffer (sizeof(kpep_event) * events_count). + kpep_event **fixed_event_arr; ///< Fixed counter events (sizeof(kpep_event *) + ///< * fixed_counter_count) + void *alias_map; ///< All aliases (CFDict). + usize reserved_1; + usize reserved_2; + usize reserved_3; + usize event_count; ///< All events count. + usize alias_count; + usize fixed_counter_count; + usize config_counter_count; + usize power_counter_count; + u32 architecture; ///< see `KPEP CPU architecture constants` above. + u32 fixed_counter_bits; + u32 config_counter_bits; + u32 power_counter_bits; +} kpep_db; + +/// KPEP config (size: 80/44 bytes on 64/32 bit OS) +typedef struct kpep_config { + kpep_db *db; + kpep_event **ev_arr; ///< (sizeof(kpep_event *) * counter_count), init NULL + usize *ev_map; ///< (sizeof(usize *) * counter_count), init 0 + usize *ev_idx; ///< (sizeof(usize *) * counter_count), init -1 + u32 *flags; ///< (sizeof(u32 *) * counter_count), init 0 + u64 *kpc_periods; ///< (sizeof(u64 *) * counter_count), init 0 + usize event_count; /// kpep_config_events_count() + usize counter_count; + u32 classes; ///< See `class mask constants` above. + u32 config_counter; + u32 power_counter; + u32 reserved; +} kpep_config; + +/// Error code for kpep_config_xxx() and kpep_db_xxx() functions. +typedef enum { + KPEP_CONFIG_ERROR_NONE = 0, + KPEP_CONFIG_ERROR_INVALID_ARGUMENT = 1, + KPEP_CONFIG_ERROR_OUT_OF_MEMORY = 2, + KPEP_CONFIG_ERROR_IO = 3, + KPEP_CONFIG_ERROR_BUFFER_TOO_SMALL = 4, + KPEP_CONFIG_ERROR_CUR_SYSTEM_UNKNOWN = 5, + KPEP_CONFIG_ERROR_DB_PATH_INVALID = 6, + KPEP_CONFIG_ERROR_DB_NOT_FOUND = 7, + KPEP_CONFIG_ERROR_DB_ARCH_UNSUPPORTED = 8, + KPEP_CONFIG_ERROR_DB_VERSION_UNSUPPORTED = 9, + KPEP_CONFIG_ERROR_DB_CORRUPT = 10, + KPEP_CONFIG_ERROR_EVENT_NOT_FOUND = 11, + KPEP_CONFIG_ERROR_CONFLICTING_EVENTS = 12, + KPEP_CONFIG_ERROR_COUNTERS_NOT_FORCED = 13, + KPEP_CONFIG_ERROR_EVENT_UNAVAILABLE = 14, + KPEP_CONFIG_ERROR_ERRNO = 15, + KPEP_CONFIG_ERROR_MAX +} kpep_config_error_code; + +/// Error description for kpep_config_error_code. +static const char *kpep_config_error_names[KPEP_CONFIG_ERROR_MAX] = { + "none", + "invalid argument", + "out of memory", + "I/O", + "buffer too small", + "current system unknown", + "database path invalid", + "database not found", + "database architecture unsupported", + "database version unsupported", + "database corrupt", + "event not found", + "conflicting events", + "all counters must be forced", + "event unavailable", + "check errno"}; + +/// Error description. +static const char *kpep_config_error_desc(int code) { + if (0 <= code && code < KPEP_CONFIG_ERROR_MAX) { + return kpep_config_error_names[code]; + } + return "unknown error"; +} + +/// Create a config. +/// @param db A kpep db, see kpep_db_create() +/// @param cfg_ptr A pointer to receive the new config. +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_config_create)(kpep_db *db, kpep_config **cfg_ptr); + +/// Free the config. +static void (*kpep_config_free)(kpep_config *cfg); + +/// Add an event to config. +/// @param cfg The config. +/// @param ev_ptr A event pointer. +/// @param flag 0: all, 1: user space only +/// @param err Error bitmap pointer, can be NULL. +/// If return value is `CONFLICTING_EVENTS`, this bitmap contains +/// the conflicted event indices, e.g. "1 << 2" means index 2. +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_config_add_event)(kpep_config *cfg, kpep_event **ev_ptr, + u32 flag, u32 *err); + +/// Remove event at index. +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_config_remove_event)(kpep_config *cfg, usize idx); + +/// Force all counters. +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_config_force_counters)(kpep_config *cfg); + +/// Get events count. +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_config_events_count)(kpep_config *cfg, usize *count_ptr); + +/// Get all event pointers. +/// @param buf A buffer to receive event pointers. +/// @param buf_size The buffer's size in bytes, should not smaller than +/// kpep_config_events_count() * sizeof(void *). +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_config_events)(kpep_config *cfg, kpep_event **buf, + usize buf_size); + +/// Get kpc register configs. +/// @param buf A buffer to receive kpc register configs. +/// @param buf_size The buffer's size in bytes, should not smaller than +/// kpep_config_kpc_count() * sizeof(kpc_config_t). +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_config_kpc)(kpep_config *cfg, kpc_config_t *buf, + usize buf_size); + +/// Get kpc register config count. +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_config_kpc_count)(kpep_config *cfg, usize *count_ptr); + +/// Get kpc classes. +/// @param classes See `class mask constants` above. +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_config_kpc_classes)(kpep_config *cfg, u32 *classes_ptr); + +/// Get the index mapping from event to counter. +/// @param buf A buffer to receive indexes. +/// @param buf_size The buffer's size in bytes, should not smaller than +/// kpep_config_events_count() * sizeof(kpc_config_t). +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_config_kpc_map)(kpep_config *cfg, usize *buf, usize buf_size); + +/// Open a kpep database file in "/usr/share/kpep/" or "/usr/local/share/kpep/". +/// @param name File name, for example "haswell", "cpu_100000c_1_92fb37c8". +/// Pass NULL for current CPU. +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_db_create)(const char *name, kpep_db **db_ptr); + +/// Free the kpep database. +static void (*kpep_db_free)(kpep_db *db); + +/// Get the database's name. +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_db_name)(kpep_db *db, const char **name); + +/// Get the event alias count. +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_db_aliases_count)(kpep_db *db, usize *count); + +/// Get all alias. +/// @param buf A buffer to receive all alias strings. +/// @param buf_size The buffer's size in bytes, +/// should not smaller than kpep_db_aliases_count() * sizeof(void *). +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_db_aliases)(kpep_db *db, const char **buf, usize buf_size); + +/// Get counters count for given classes. +/// @param classes 1: Fixed, 2: Configurable. +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_db_counters_count)(kpep_db *db, u8 classes, usize *count); + +/// Get all event count. +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_db_events_count)(kpep_db *db, usize *count); + +/// Get all events. +/// @param buf A buffer to receive all event pointers. +/// @param buf_size The buffer's size in bytes, +/// should not smaller than kpep_db_events_count() * sizeof(void *). +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_db_events)(kpep_db *db, kpep_event **buf, usize buf_size); + +/// Get one event by name. +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_db_event)(kpep_db *db, const char *name, kpep_event **ev_ptr); + +/// Get event's name. +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_event_name)(kpep_event *ev, const char **name_ptr); + +/// Get event's alias. +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_event_alias)(kpep_event *ev, const char **alias_ptr); + +/// Get event's description. +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_event_description)(kpep_event *ev, const char **str_ptr); + +// ----------------------------------------------------------------------------- +// load kperf/kperfdata dynamic library +// ----------------------------------------------------------------------------- + +typedef struct { + const char *name; + void **impl; +} lib_symbol; + +#define lib_nelems(x) (sizeof(x) / sizeof((x)[0])) +#define lib_symbol_def(name) \ + { #name, (void **)&name } + +static const lib_symbol lib_symbols_kperf[] = { + lib_symbol_def(kpc_pmu_version), + lib_symbol_def(kpc_cpu_string), + lib_symbol_def(kpc_set_counting), + lib_symbol_def(kpc_get_counting), + lib_symbol_def(kpc_set_thread_counting), + lib_symbol_def(kpc_get_thread_counting), + lib_symbol_def(kpc_get_config_count), + lib_symbol_def(kpc_get_counter_count), + lib_symbol_def(kpc_set_config), + lib_symbol_def(kpc_get_config), + lib_symbol_def(kpc_get_cpu_counters), + lib_symbol_def(kpc_get_thread_counters), + lib_symbol_def(kpc_force_all_ctrs_set), + lib_symbol_def(kpc_force_all_ctrs_get), + lib_symbol_def(kperf_action_count_set), + lib_symbol_def(kperf_action_count_get), + lib_symbol_def(kperf_action_samplers_set), + lib_symbol_def(kperf_action_samplers_get), + lib_symbol_def(kperf_action_filter_set_by_task), + lib_symbol_def(kperf_action_filter_set_by_pid), + lib_symbol_def(kperf_timer_count_set), + lib_symbol_def(kperf_timer_count_get), + lib_symbol_def(kperf_timer_period_set), + lib_symbol_def(kperf_timer_period_get), + lib_symbol_def(kperf_timer_action_set), + lib_symbol_def(kperf_timer_action_get), + lib_symbol_def(kperf_sample_set), + lib_symbol_def(kperf_sample_get), + lib_symbol_def(kperf_reset), + lib_symbol_def(kperf_timer_pet_set), + lib_symbol_def(kperf_timer_pet_get), + lib_symbol_def(kperf_ns_to_ticks), + lib_symbol_def(kperf_ticks_to_ns), + lib_symbol_def(kperf_tick_frequency), +}; + +static const lib_symbol lib_symbols_kperfdata[] = { + lib_symbol_def(kpep_config_create), + lib_symbol_def(kpep_config_free), + lib_symbol_def(kpep_config_add_event), + lib_symbol_def(kpep_config_remove_event), + lib_symbol_def(kpep_config_force_counters), + lib_symbol_def(kpep_config_events_count), + lib_symbol_def(kpep_config_events), + lib_symbol_def(kpep_config_kpc), + lib_symbol_def(kpep_config_kpc_count), + lib_symbol_def(kpep_config_kpc_classes), + lib_symbol_def(kpep_config_kpc_map), + lib_symbol_def(kpep_db_create), + lib_symbol_def(kpep_db_free), + lib_symbol_def(kpep_db_name), + lib_symbol_def(kpep_db_aliases_count), + lib_symbol_def(kpep_db_aliases), + lib_symbol_def(kpep_db_counters_count), + lib_symbol_def(kpep_db_events_count), + lib_symbol_def(kpep_db_events), + lib_symbol_def(kpep_db_event), + lib_symbol_def(kpep_event_name), + lib_symbol_def(kpep_event_alias), + lib_symbol_def(kpep_event_description), +}; + +#define lib_path_kperf "/System/Library/PrivateFrameworks/kperf.framework/kperf" +#define lib_path_kperfdata \ + "/System/Library/PrivateFrameworks/kperfdata.framework/kperfdata" + +static bool lib_inited = false; +static bool lib_has_err = false; +static char lib_err_msg[256]; + +static void *lib_handle_kperf = NULL; +static void *lib_handle_kperfdata = NULL; + +static void lib_deinit(void) { + lib_inited = false; + lib_has_err = false; + if (lib_handle_kperf) dlclose(lib_handle_kperf); + if (lib_handle_kperfdata) dlclose(lib_handle_kperfdata); + lib_handle_kperf = NULL; + lib_handle_kperfdata = NULL; + for (usize i = 0; i < lib_nelems(lib_symbols_kperf); i++) { + const lib_symbol *symbol = &lib_symbols_kperf[i]; + *symbol->impl = NULL; + } + for (usize i = 0; i < lib_nelems(lib_symbols_kperfdata); i++) { + const lib_symbol *symbol = &lib_symbols_kperfdata[i]; + *symbol->impl = NULL; + } +} + +static bool lib_init(void) { +#define return_err() \ + do { \ + lib_deinit(); \ + lib_inited = true; \ + lib_has_err = true; \ + return false; \ + } while (false) + + if (lib_inited) return !lib_has_err; + + // load dynamic library + lib_handle_kperf = dlopen(lib_path_kperf, RTLD_LAZY); + if (!lib_handle_kperf) { + snprintf(lib_err_msg, sizeof(lib_err_msg), + "Failed to load kperf.framework, message: %s.", dlerror()); + return_err(); + } + lib_handle_kperfdata = dlopen(lib_path_kperfdata, RTLD_LAZY); + if (!lib_handle_kperfdata) { + snprintf(lib_err_msg, sizeof(lib_err_msg), + "Failed to load kperfdata.framework, message: %s.", dlerror()); + return_err(); + } + + // load symbol address from dynamic library + for (usize i = 0; i < lib_nelems(lib_symbols_kperf); i++) { + const lib_symbol *symbol = &lib_symbols_kperf[i]; + *symbol->impl = dlsym(lib_handle_kperf, symbol->name); + if (!*symbol->impl) { + snprintf(lib_err_msg, sizeof(lib_err_msg), + "Failed to load kperf function: %s.", symbol->name); + return_err(); + } + } + for (usize i = 0; i < lib_nelems(lib_symbols_kperfdata); i++) { + const lib_symbol *symbol = &lib_symbols_kperfdata[i]; + *symbol->impl = dlsym(lib_handle_kperfdata, symbol->name); + if (!*symbol->impl) { + snprintf(lib_err_msg, sizeof(lib_err_msg), + "Failed to load kperfdata function: %s.", symbol->name); + return_err(); + } + } + + lib_inited = true; + lib_has_err = false; + return true; + +#undef return_err +} + +// ----------------------------------------------------------------------------- +// kdebug private structs +// https://github.com/apple/darwin-xnu/blob/main/bsd/sys_private/kdebug_private.h +// ----------------------------------------------------------------------------- + +/* + * Ensure that both LP32 and LP64 variants of arm64 use the same kd_buf + * structure. + */ +#if defined(__arm64__) +typedef uint64_t kd_buf_argtype; +#else +typedef uintptr_t kd_buf_argtype; +#endif + +typedef struct { + uint64_t timestamp; + kd_buf_argtype arg1; + kd_buf_argtype arg2; + kd_buf_argtype arg3; + kd_buf_argtype arg4; + kd_buf_argtype arg5; /* the thread ID */ + uint32_t debugid; /* see */ + +/* + * Ensure that both LP32 and LP64 variants of arm64 use the same kd_buf + * structure. + */ +#if defined(__LP64__) || defined(__arm64__) + uint32_t cpuid; /* cpu index, from 0 */ + kd_buf_argtype unused; +#endif +} kd_buf; + +/* bits for the type field of kd_regtype */ +#define KDBG_CLASSTYPE 0x10000 +#define KDBG_SUBCLSTYPE 0x20000 +#define KDBG_RANGETYPE 0x40000 +#define KDBG_TYPENONE 0x80000 +#define KDBG_CKTYPES 0xF0000 + +/* only trace at most 4 types of events, at the code granularity */ +#define KDBG_VALCHECK 0x00200000U + +typedef struct { + unsigned int type; + unsigned int value1; + unsigned int value2; + unsigned int value3; + unsigned int value4; +} kd_regtype; + +typedef struct { + /* number of events that can fit in the buffers */ + int nkdbufs; + /* set if trace is disabled */ + int nolog; + /* kd_ctrl_page.flags */ + unsigned int flags; + /* number of threads in thread map */ + int nkdthreads; + /* the owning pid */ + int bufid; +} kbufinfo_t; + +// ----------------------------------------------------------------------------- +// kdebug utils +// ----------------------------------------------------------------------------- + +/// Clean up trace buffers and reset ktrace/kdebug/kperf. +/// @return 0 on success. +static int kdebug_reset(void) { + int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDREMOVE}; + return sysctl(mib, 3, NULL, NULL, NULL, 0); +} + +/// Disable and reinitialize the trace buffers. +/// @return 0 on success. +static int kdebug_reinit(void) { + int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDSETUP}; + return sysctl(mib, 3, NULL, NULL, NULL, 0); +} + +/// Set debug filter. +static int kdebug_setreg(kd_regtype *kdr) { + int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDSETREG}; + usize size = sizeof(kd_regtype); + return sysctl(mib, 3, kdr, &size, NULL, 0); +} + +/// Set maximum number of trace entries (kd_buf). +/// Only allow allocation up to half the available memory (sane_size). +/// @return 0 on success. +static int kdebug_trace_setbuf(int nbufs) { + int mib[4] = {CTL_KERN, KERN_KDEBUG, KERN_KDSETBUF, nbufs}; + return sysctl(mib, 4, NULL, NULL, NULL, 0); +} + +/// Enable or disable kdebug trace. +/// Trace buffer must already be initialized. +/// @return 0 on success. +static int kdebug_trace_enable(bool enable) { + int mib[4] = {CTL_KERN, KERN_KDEBUG, KERN_KDENABLE, enable}; + return sysctl(mib, 4, NULL, 0, NULL, 0); +} + +/// Retrieve trace buffer information from kernel. +/// @return 0 on success. +static int kdebug_get_bufinfo(kbufinfo_t *info) { + if (!info) return -1; + int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDGETBUF}; + size_t needed = sizeof(kbufinfo_t); + return sysctl(mib, 3, info, &needed, NULL, 0); +} + +/// Retrieve trace buffers from kernel. +/// @param buf Memory to receive buffer data, array of `kd_buf`. +/// @param len Length of `buf` in bytes. +/// @param count Number of trace entries (kd_buf) obtained. +/// @return 0 on success. +static int kdebug_trace_read(void *buf, usize len, usize *count) { + if (count) *count = 0; + if (!buf || !len) return -1; + + // Note: the input and output units are not the same. + // input: bytes + // output: number of kd_buf + int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDREADTR}; + int ret = sysctl(mib, 3, buf, &len, NULL, 0); + if (ret != 0) return ret; + *count = len; + return 0; +} + +/// Block until there are new buffers filled or `timeout_ms` have passed. +/// @param timeout_ms timeout milliseconds, 0 means wait forever. +/// @param suc set true if new buffers filled. +/// @return 0 on success. +static int kdebug_wait(usize timeout_ms, bool *suc) { + if (timeout_ms == 0) return -1; + int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDBUFWAIT}; + usize val = timeout_ms; + int ret = sysctl(mib, 3, NULL, &val, NULL, 0); + if (suc) *suc = !!val; + return ret; +} + +// ----------------------------------------------------------------------------- +// Demo +// ----------------------------------------------------------------------------- + +#define EVENT_NAME_MAX 8 +typedef struct { + const char *alias; /// name for print + const char *names[EVENT_NAME_MAX]; /// name from pmc db +} event_alias; + +/// Event names from /usr/share/kpep/.plist +static const event_alias profile_events[] = { + {"cycles", + { + "FIXED_CYCLES", // Apple A7-A15 + "CPU_CLK_UNHALTED.THREAD", // Intel Core 1th-10th + "CPU_CLK_UNHALTED.CORE", // Intel Yonah, Merom + }}, + {"instructions", + { + "FIXED_INSTRUCTIONS", // Apple A7-A15 + "INST_RETIRED.ANY" // Intel Yonah, Merom, Core 1th-10th + }}, + {"branches", + { + "INST_BRANCH", // Apple A7-A15 + "BR_INST_RETIRED.ALL_BRANCHES", // Intel Core 1th-10th + "INST_RETIRED.ANY", // Intel Yonah, Merom + }}, + {"branch-misses", + { + "BRANCH_MISPRED_NONSPEC", // Apple A7-A15, since iOS 15, macOS 12 + "BRANCH_MISPREDICT", // Apple A7-A14 + "BR_MISP_RETIRED.ALL_BRANCHES", // Intel Core 2th-10th + "BR_INST_RETIRED.MISPRED", // Intel Yonah, Merom + }}, +}; + +static kpep_event *get_event(kpep_db *db, const event_alias *alias) { + for (usize j = 0; j < EVENT_NAME_MAX; j++) { + const char *name = alias->names[j]; + if (!name) break; + kpep_event *ev = NULL; + if (kpep_db_event(db, name, &ev) == 0) { + return ev; + } + } + return NULL; +} + +struct AppleEvents { + kpc_config_t regs[KPC_MAX_COUNTERS] = {0}; + usize counter_map[KPC_MAX_COUNTERS] = {0}; + u64 counters_0[KPC_MAX_COUNTERS] = {0}; + u64 counters_1[KPC_MAX_COUNTERS] = {0}; + static constexpr usize ev_count = + sizeof(profile_events) / sizeof(profile_events[0]); + bool init = false; + bool worked = false; + inline bool setup_performance_counters() { + if (init) { + return worked; + } + init = true; + + // load dylib + if (!lib_init()) { + printf("Error: %s\n", lib_err_msg); + return (worked = false); + } + + // check permission + int force_ctrs = 0; + if (kpc_force_all_ctrs_get(&force_ctrs)) { + printf("Permission denied, xnu/kpc requires root privileges.\n"); + return (worked = false); + } + int ret; + // load pmc db + kpep_db *db = NULL; + if ((ret = kpep_db_create(NULL, &db))) { + printf("Error: cannot load pmc database: %d.\n", ret); + return (worked = false); + } + printf("loaded db: %s (%s)\n", db->name, db->marketing_name); + // printf("number of fixed counters: %zu\n", db->fixed_counter_count); + // printf("number of configurable counters: %zu\n", + // db->config_counter_count); + + // create a config + kpep_config *cfg = NULL; + if ((ret = kpep_config_create(db, &cfg))) { + printf("Failed to create kpep config: %d (%s).\n", ret, + kpep_config_error_desc(ret)); + return (worked = false); + } + if ((ret = kpep_config_force_counters(cfg))) { + printf("Failed to force counters: %d (%s).\n", ret, + kpep_config_error_desc(ret)); + return (worked = false); + } + + // get events + kpep_event *ev_arr[ev_count] = {0}; + for (usize i = 0; i < ev_count; i++) { + const event_alias *alias = profile_events + i; + ev_arr[i] = get_event(db, alias); + if (!ev_arr[i]) { + printf("Cannot find event: %s.\n", alias->alias); + return (worked = false); + } + } + + // add event to config + for (usize i = 0; i < ev_count; i++) { + kpep_event *ev = ev_arr[i]; + if ((ret = kpep_config_add_event(cfg, &ev, 0, NULL))) { + printf("Failed to add event: %d (%s).\n", ret, + kpep_config_error_desc(ret)); + return (worked = false); + } + } + + // prepare buffer and config + u32 classes = 0; + usize reg_count = 0; + if ((ret = kpep_config_kpc_classes(cfg, &classes))) { + printf("Failed get kpc classes: %d (%s).\n", ret, + kpep_config_error_desc(ret)); + return (worked = false); + } + if ((ret = kpep_config_kpc_count(cfg, ®_count))) { + printf("Failed get kpc count: %d (%s).\n", ret, + kpep_config_error_desc(ret)); + return (worked = false); + } + if ((ret = kpep_config_kpc_map(cfg, counter_map, sizeof(counter_map)))) { + printf("Failed get kpc map: %d (%s).\n", ret, + kpep_config_error_desc(ret)); + return (worked = false); + } + if ((ret = kpep_config_kpc(cfg, regs, sizeof(regs)))) { + printf("Failed get kpc registers: %d (%s).\n", ret, + kpep_config_error_desc(ret)); + return (worked = false); + } + + // set config to kernel + if ((ret = kpc_force_all_ctrs_set(1))) { + printf("Failed force all ctrs: %d.\n", ret); + return (worked = false); + } + if ((classes & KPC_CLASS_CONFIGURABLE_MASK) && reg_count) { + if ((ret = kpc_set_config(classes, regs))) { + printf("Failed set kpc config: %d.\n", ret); + return (worked = false); + } + } + + // start counting + if ((ret = kpc_set_counting(classes))) { + printf("Failed set counting: %d.\n", ret); + return (worked = false); + } + if ((ret = kpc_set_thread_counting(classes))) { + printf("Failed set thread counting: %d.\n", ret); + return (worked = false); + } + + return (worked = true); + } + + inline performance_counters get_counters() { + static bool warned = false; + int ret; + // get counters before + if ((ret = kpc_get_thread_counters(0, KPC_MAX_COUNTERS, counters_0))) { + if (!warned) { + printf("Failed get thread counters before: %d.\n", ret); + warned = true; + } + return 1; + } + /* + // We could print it out this way if we wanted to: + printf("counters value:\n"); + for (usize i = 0; i < ev_count; i++) { + const event_alias *alias = profile_events + i; + usize idx = counter_map[i]; + u64 val = counters_1[idx] - counters_0[idx]; + printf("%14s: %llu\n", alias->alias, val); + }*/ + return performance_counters{ + counters_0[counter_map[0]], counters_0[counter_map[2]], + counters_0[counter_map[2]], counters_0[counter_map[1]]}; + } +}; + +#endif diff --git a/benchmarks/performancecounters/event_counter.h b/benchmarks/performancecounters/event_counter.h new file mode 100644 index 000000000..4a1e5eca3 --- /dev/null +++ b/benchmarks/performancecounters/event_counter.h @@ -0,0 +1,155 @@ +#ifndef __EVENT_COUNTER_H +#define __EVENT_COUNTER_H + +#include +#ifndef _MSC_VER +#include +#endif +#include + +#include + +#include +#include + +#include "linux-perf-events.h" +#ifdef __linux__ +#include +#endif + +#if __APPLE__ && __aarch64__ +#include "apple_arm_events.h" +#endif + +struct event_count { + std::chrono::duration elapsed; + std::vector event_counts; + event_count() : elapsed(0), event_counts{0, 0, 0, 0, 0} {} + event_count(const std::chrono::duration _elapsed, + const std::vector _event_counts) + : elapsed(_elapsed), event_counts(_event_counts) {} + event_count(const event_count& other) + : elapsed(other.elapsed), event_counts(other.event_counts) {} + + // The types of counters (so we can read the getter more easily) + enum event_counter_types { + CPU_CYCLES, + INSTRUCTIONS, + BRANCH_MISSES = 2, + BRANCH = 4 + }; + + double elapsed_sec() const { + return std::chrono::duration(elapsed).count(); + } + double elapsed_ns() const { + return std::chrono::duration(elapsed).count(); + } + double cycles() const { + return static_cast(event_counts[CPU_CYCLES]); + } + double instructions() const { + return static_cast(event_counts[INSTRUCTIONS]); + } + double branches() const { return static_cast(event_counts[BRANCH]); } + double branch_misses() const { + return static_cast(event_counts[BRANCH_MISSES]); + } + event_count& operator=(const event_count& other) { + this->elapsed = other.elapsed; + this->event_counts = other.event_counts; + return *this; + } + event_count operator+(const event_count& other) const { + return event_count(elapsed + other.elapsed, + { + event_counts[0] + other.event_counts[0], + event_counts[1] + other.event_counts[1], + event_counts[2] + other.event_counts[2], + event_counts[3] + other.event_counts[3], + event_counts[4] + other.event_counts[4], + }); + } + + void operator+=(const event_count& other) { *this = *this + other; } +}; + +struct event_aggregate { + bool has_events = false; + int iterations = 0; + event_count total{}; + event_count best{}; + event_count worst{}; + + event_aggregate() = default; + + void operator<<(const event_count& other) { + if (iterations == 0 || other.elapsed < best.elapsed) { + best = other; + } + if (iterations == 0 || other.elapsed > worst.elapsed) { + worst = other; + } + iterations++; + total += other; + } + + double elapsed_sec() const { return total.elapsed_sec() / iterations; } + double elapsed_ns() const { return total.elapsed_ns() / iterations; } + double cycles() const { return total.cycles() / iterations; } + double instructions() const { return total.instructions() / iterations; } +}; + +struct event_collector { + event_count count{}; + std::chrono::time_point start_clock{}; + +#if defined(__linux__) + LinuxEvents linux_events; + event_collector() + : linux_events(std::vector{ + PERF_COUNT_HW_CPU_CYCLES, + PERF_COUNT_HW_INSTRUCTIONS, + }) {} + bool has_events() { return linux_events.is_working(); } +#elif __APPLE__ && __aarch64__ + AppleEvents apple_events; + performance_counters diff; + event_collector() : diff(0) { apple_events.setup_performance_counters(); } + bool has_events() { return apple_events.setup_performance_counters(); } +#else + event_collector() {} + bool has_events() { return false; } +#endif + + inline void start() { +#if defined(__linux) + linux_events.start(); +#elif __APPLE__ && __aarch64__ + if (has_events()) { + diff = apple_events.get_counters(); + } +#endif + start_clock = std::chrono::steady_clock::now(); + } + inline event_count& end() { + const auto end_clock = std::chrono::steady_clock::now(); +#if defined(__linux) + linux_events.end(count.event_counts); +#elif __APPLE__ && __aarch64__ + if (has_events()) { + performance_counters end = apple_events.get_counters(); + diff = end - diff; + } + count.event_counts[0] = diff.cycles; + count.event_counts[1] = diff.instructions; + count.event_counts[2] = diff.missed_branches; + count.event_counts[3] = 0; + count.event_counts[4] = diff.branches; +#endif + count.elapsed = end_clock - start_clock; + return count; + } +}; + +#endif diff --git a/benchmarks/performancecounters/linux-perf-events.h b/benchmarks/performancecounters/linux-perf-events.h new file mode 100644 index 000000000..7ed0f8d24 --- /dev/null +++ b/benchmarks/performancecounters/linux-perf-events.h @@ -0,0 +1,105 @@ +#pragma once +#ifdef __linux__ + +#include // for __NR_perf_event_open +#include // for perf event constants +#include // for ioctl +#include // for syscall + +#include // for errno +#include // for memset +#include + +#include +#include + +template +class LinuxEvents { + int fd; + bool working; + perf_event_attr attribs{}; + size_t num_events{}; + std::vector temp_result_vec{}; + std::vector ids{}; + + public: + explicit LinuxEvents(std::vector config_vec) : fd(0), working(true) { + memset(&attribs, 0, sizeof(attribs)); + attribs.type = TYPE; + attribs.size = sizeof(attribs); + attribs.disabled = 1; + attribs.exclude_kernel = 1; + attribs.exclude_hv = 1; + + attribs.sample_period = 0; + attribs.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID; + const int pid = 0; // the current process + const int cpu = -1; // all CPUs + const unsigned long flags = 0; + + int group = -1; // no group + num_events = config_vec.size(); + ids.resize(config_vec.size()); + uint32_t i = 0; + for (auto config : config_vec) { + attribs.config = config; + int _fd = static_cast( + syscall(__NR_perf_event_open, &attribs, pid, cpu, group, flags)); + if (_fd == -1) { + report_error("perf_event_open"); + } + ioctl(_fd, PERF_EVENT_IOC_ID, &ids[i++]); + if (group == -1) { + group = _fd; + fd = _fd; + } + } + + temp_result_vec.resize(num_events * 2 + 1); + } + + ~LinuxEvents() { + if (fd != -1) { + close(fd); + } + } + + inline void start() { + if (fd != -1) { + if (ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) == -1) { + report_error("ioctl(PERF_EVENT_IOC_RESET)"); + } + + if (ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) == -1) { + report_error("ioctl(PERF_EVENT_IOC_ENABLE)"); + } + } + } + + inline void end(std::vector &results) { + if (fd != -1) { + if (ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) == -1) { + report_error("ioctl(PERF_EVENT_IOC_DISABLE)"); + } + + if (read(fd, temp_result_vec.data(), temp_result_vec.size() * 8) == -1) { + report_error("read"); + } + } + // our actual results are in slots 1,3,5, ... of this structure + for (uint32_t i = 1; i < temp_result_vec.size(); i += 2) { + results[i / 2] = temp_result_vec[i]; + } + for (uint32_t i = 2; i < temp_result_vec.size(); i += 2) { + if (ids[i / 2 - 1] != temp_result_vec[i]) { + report_error("event mismatch"); + } + } + } + + bool is_working() { return working; } + + private: + void report_error(const std::string &) { working = false; } +}; +#endif \ No newline at end of file diff --git a/benchmarks/wpt_bench.cpp b/benchmarks/wpt_bench.cpp new file mode 100644 index 000000000..2b37db119 --- /dev/null +++ b/benchmarks/wpt_bench.cpp @@ -0,0 +1,227 @@ +#include "benchmark_header.h" +#include "simdjson.h" + +using namespace simdjson; + +double url_examples_bytes{}; + +std::vector> url_examples; + +size_t init_data(const char *source) { + ondemand::parser parser; + std::vector> answer; + + if (!file_exists(source)) { + return 0; + } + padded_string json = padded_string::load(source); + ondemand::document doc = parser.iterate(json); + for (auto element : doc.get_array()) { + if (element.type() == ondemand::json_type::object) { + std::string_view input; + if (element["input"].get_string(true).get(input) != simdjson::SUCCESS) { + printf("missing input.\n"); + } + std::string_view base; + if (element["base"].get_string(true).get(base) != simdjson::SUCCESS) { + } + url_examples.push_back({std::string(input), std::string(base)}); + url_examples_bytes += input.size() + base.size(); + } + } + return url_examples.size(); +} + +template +static void BasicBench_AdaURL(benchmark::State &state) { + // volatile to prevent optimizations. + volatile size_t href_size = 0; + + for (auto _ : state) { + for (const std::pair &url_strings : + url_examples) { + ada::result base; + result *base_ptr = nullptr; + if (!url_strings.second.empty()) { + base = ada::parse(url_strings.second); + if (base) { + base_ptr = &*base; + } else { + continue; + } + } + auto url = ada::parse(url_strings.first, base_ptr); + if (url) { + href_size += url->get_href().size(); + } + } + } + if (collector.has_events()) { + event_aggregate aggregate{}; + for (size_t i = 0; i < N; i++) { + std::atomic_thread_fence(std::memory_order_acquire); + collector.start(); + for (const std::pair &url_strings : + url_examples) { + ada::result base; + result *base_ptr = nullptr; + if (!url_strings.second.empty()) { + base = ada::parse(url_strings.second); + if (base) { + base_ptr = &*base; + } else { + continue; + } + } + auto url = ada::parse(url_strings.first, base_ptr); + if (url) { + href_size += url->get_href().size(); + } + } + std::atomic_thread_fence(std::memory_order_release); + event_count allocate_count = collector.end(); + aggregate << allocate_count; + } + state.counters["cycles/url"] = + aggregate.best.cycles() / std::size(url_examples); + state.counters["instructions/url"] = + aggregate.best.instructions() / std::size(url_examples); + state.counters["instructions/cycle"] = + aggregate.best.instructions() / aggregate.best.cycles(); + state.counters["instructions/byte"] = + aggregate.best.instructions() / url_examples_bytes; + state.counters["instructions/ns"] = + aggregate.best.instructions() / aggregate.best.elapsed_ns(); + state.counters["GHz"] = + aggregate.best.cycles() / aggregate.best.elapsed_ns(); + state.counters["ns/url"] = + aggregate.best.elapsed_ns() / std::size(url_examples); + state.counters["cycle/byte"] = aggregate.best.cycles() / url_examples_bytes; + } + state.counters["time/byte"] = benchmark::Counter( + url_examples_bytes, benchmark::Counter::kIsIterationInvariantRate | + benchmark::Counter::kInvert); + state.counters["time/url"] = + benchmark::Counter(double(std::size(url_examples)), + benchmark::Counter::kIsIterationInvariantRate | + benchmark::Counter::kInvert); + state.counters["speed"] = benchmark::Counter( + url_examples_bytes, benchmark::Counter::kIsIterationInvariantRate); + state.counters["url/s"] = + benchmark::Counter(double(std::size(url_examples)), + benchmark::Counter::kIsIterationInvariantRate); +} +auto BasicBench_AdaURL_url = BasicBench_AdaURL; +BENCHMARK(BasicBench_AdaURL_url); +auto BasicBench_AdaURL_url_aggregator = BasicBench_AdaURL; +BENCHMARK(BasicBench_AdaURL_url_aggregator); + +#if ADA_url_whatwg_ENABLED + +#include + +static void BasicBench_whatwg(benchmark::State &state) { + volatile size_t success{}; + for (auto _ : state) { + for (const std::pair &url_strings : + url_examples) { + upa::url base; + upa::url *base_ptr = nullptr; + if (!url_strings.second.empty()) { + if (upa::success(base.parse(url_strings.second, nullptr))) { + base_ptr = &base; + } + } + upa::url url; + if (upa::success(url.parse(url_strings.first, base_ptr))) { + success++; + } + } + } + if (collector.has_events()) { + event_aggregate aggregate{}; + for (size_t i = 0; i < N; i++) { + std::atomic_thread_fence(std::memory_order_acquire); + collector.start(); + for (const std::pair &url_strings : + url_examples) { + upa::url base; + upa::url *base_ptr = nullptr; + if (!url_strings.second.empty()) { + if (upa::success(base.parse(url_strings.second, nullptr))) { + base_ptr = &base; + } + } + upa::url url; + if (upa::success(url.parse(url_strings.first, base_ptr))) { + success++; + } + } + std::atomic_thread_fence(std::memory_order_release); + event_count allocate_count = collector.end(); + aggregate << allocate_count; + } + (void)success; + state.counters["cycles/url"] = + aggregate.best.cycles() / std::size(url_examples); + state.counters["instructions/url"] = + aggregate.best.instructions() / std::size(url_examples); + state.counters["instructions/cycle"] = + aggregate.best.instructions() / aggregate.best.cycles(); + state.counters["instructions/byte"] = + aggregate.best.instructions() / url_examples_bytes; + state.counters["instructions/ns"] = + aggregate.best.instructions() / aggregate.best.elapsed_ns(); + state.counters["GHz"] = + aggregate.best.cycles() / aggregate.best.elapsed_ns(); + state.counters["ns/url"] = + aggregate.best.elapsed_ns() / std::size(url_examples); + state.counters["cycle/byte"] = aggregate.best.cycles() / url_examples_bytes; + } + state.counters["time/byte"] = benchmark::Counter( + url_examples_bytes, benchmark::Counter::kIsIterationInvariantRate | + benchmark::Counter::kInvert); + state.counters["time/url"] = + benchmark::Counter(double(std::size(url_examples)), + benchmark::Counter::kIsIterationInvariantRate | + benchmark::Counter::kInvert); + state.counters["speed"] = benchmark::Counter( + url_examples_bytes, benchmark::Counter::kIsIterationInvariantRate); + state.counters["url/s"] = + benchmark::Counter(double(std::size(url_examples)), + benchmark::Counter::kIsIterationInvariantRate); +} +BENCHMARK(BasicBench_whatwg); +#endif // ADA_url_whatwg_ENABLED + +int main(int argc, char **argv) { + if (argc == 1 || !init_data(argv[1])) { + std::cout + << "pass the path to the file wpt/urltestdata.json as a parameter." + << std::endl; + std::cout + << "E.g., './build/benchmarks/wpt_bench tests/wpt/urltestdata.json'" + << std::endl; + return EXIT_SUCCESS; + } +#if defined(ADA_RUST_VERSION) + benchmark::AddCustomContext("rust version ", ADA_RUST_VERSION); +#endif +#if (__APPLE__ && __aarch64__) || defined(__linux__) + if (!collector.has_events()) { + benchmark::AddCustomContext("performance counters", + "No privileged access (sudo may help)."); + } +#else + if (!collector.has_events()) { + benchmark::AddCustomContext("performance counters", "Unsupported system."); + } +#endif + + if (collector.has_events()) { + benchmark::AddCustomContext("performance counters", "Enabled"); + } + benchmark::Initialize(&argc, argv); + benchmark::RunSpecifiedBenchmarks(); + benchmark::Shutdown(); +} diff --git a/clang-format-ignore.txt b/clang-format-ignore.txt new file mode 100644 index 000000000..e69de29bb diff --git a/cmake/CPM.cmake b/cmake/CPM.cmake new file mode 100644 index 000000000..ad6b74a8b --- /dev/null +++ b/cmake/CPM.cmake @@ -0,0 +1,24 @@ +# SPDX-License-Identifier: MIT +# +# SPDX-FileCopyrightText: Copyright (c) 2019-2023 Lars Melchior and contributors + +set(CPM_DOWNLOAD_VERSION 0.38.6) +set(CPM_HASH_SUM "11c3fa5f1ba14f15d31c2fb63dbc8628ee133d81c8d764caad9a8db9e0bacb07") + +if(CPM_SOURCE_CACHE) + set(CPM_DOWNLOAD_LOCATION "${CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake") +elseif(DEFINED ENV{CPM_SOURCE_CACHE}) + set(CPM_DOWNLOAD_LOCATION "$ENV{CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake") +else() + set(CPM_DOWNLOAD_LOCATION "${CMAKE_BINARY_DIR}/cmake/CPM_${CPM_DOWNLOAD_VERSION}.cmake") +endif() + +# Expand relative path. This is important if the provided path contains a tilde (~) +get_filename_component(CPM_DOWNLOAD_LOCATION ${CPM_DOWNLOAD_LOCATION} ABSOLUTE) + +file(DOWNLOAD + https://github.com/cpm-cmake/CPM.cmake/releases/download/v${CPM_DOWNLOAD_VERSION}/CPM.cmake + ${CPM_DOWNLOAD_LOCATION} EXPECTED_HASH SHA256=${CPM_HASH_SUM} +) + +include(${CPM_DOWNLOAD_LOCATION}) diff --git a/cmake/ada-config.cmake.in b/cmake/ada-config.cmake.in new file mode 100644 index 000000000..0c5d540b1 --- /dev/null +++ b/cmake/ada-config.cmake.in @@ -0,0 +1 @@ +include("${CMAKE_CURRENT_LIST_DIR}/ada_targets.cmake") diff --git a/cmake/ada-flags.cmake b/cmake/ada-flags.cmake new file mode 100644 index 000000000..40ba5ab50 --- /dev/null +++ b/cmake/ada-flags.cmake @@ -0,0 +1,62 @@ +option(ADA_LOGGING "verbose output (useful for debugging)" OFF) +option(ADA_DEVELOPMENT_CHECKS "development checks (useful for debugging)" OFF) +option(ADA_SANITIZE "Sanitize addresses" OFF) +if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + option(ADA_SANITIZE_BOUNDS_STRICT "Sanitize bounds (strict): only for GCC" OFF) +endif() +option(ADA_SANITIZE_UNDEFINED "Sanitize undefined behaviour" OFF) +if(ADA_SANITIZE) + message(STATUS "Address sanitizer enabled.") +endif() +if(ADA_SANITIZE_WITHOUT_LEAKS) + message(STATUS "Address sanitizer (but not leak) enabled.") +endif() +if(ADA_SANITIZE_UNDEFINED) + message(STATUS "Undefined sanitizer enabled.") +endif() +option(ADA_COVERAGE "Compute coverage" OFF) +option(ADA_TOOLS "Build cli tools (adaparse)" ON) + +if (ADA_COVERAGE) + message(STATUS "You want to compute coverage. We assume that you have installed gcovr.") + if (NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Debug CACHE STRING "Choose the type of build." FORCE) + endif() + ####################### + # You need to install gcovr. Under macos, you may do so with brew. + # brew install gcovr + # Then build... + # cmake -D ADA_COVERAGE=ON -B buildcoverage + # cmake --build buildcoverage + # cmake --build buildcoverage --target ada_coverage + # + # open buildcoverage/ada_coverage/index.html + ##################### + include(${PROJECT_SOURCE_DIR}/cmake/codecoverage.cmake) + APPEND_COVERAGE_COMPILER_FLAGS() + setup_target_for_coverage_gcovr_html(NAME ada_coverage EXECUTABLE ctest EXCLUDE "${PROJECT_SOURCE_DIR}/dependencies/*" "${PROJECT_SOURCE_DIR}/tools/*" "${PROJECT_SOURCE_DIR}/singleheader/*" ${PROJECT_SOURCE_DIR}/include/ada/common_defs.h) +endif() + +if (NOT CMAKE_BUILD_TYPE) + if(ADA_SANITIZE OR ADA_SANITIZE_WITHOUT_LEAKS OR ADA_SANITIZE_BOUNDS_STRICT OR ADA_SANITIZE_UNDEFINED) + message(STATUS "No build type selected, default to Debug because you have sanitizers.") + set(CMAKE_BUILD_TYPE Debug CACHE STRING "Choose the type of build." FORCE) + else() + message(STATUS "No build type selected, default to Release") + set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE) + endif() +endif() + +set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/tools/cmake") +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + +find_program(CCACHE_FOUND ccache) +if(CCACHE_FOUND) + message(STATUS "Ccache found using it as compiler launcher.") + set(CMAKE_C_COMPILER_LAUNCHER ccache) + set(CMAKE_CXX_COMPILER_LAUNCHER ccache) +endif(CCACHE_FOUND) diff --git a/cmake/add-cpp-test.cmake b/cmake/add-cpp-test.cmake new file mode 100644 index 000000000..4576ad2d3 --- /dev/null +++ b/cmake/add-cpp-test.cmake @@ -0,0 +1,70 @@ +# Helper so we don't have to repeat ourselves so much +# Usage: add_cpp_test(testname [COMPILE_ONLY] [SOURCES a.cpp b.cpp ...] [LABELS acceptance per_implementation ...]) +# SOURCES defaults to testname.cpp if not specified. +function(add_cpp_test TEST_NAME) + # Parse arguments + cmake_parse_arguments(PARSE_ARGV 1 ARGS "COMPILE_ONLY;LIBRARY;WILL_FAIL" "" "SOURCES;LABELS;DEPENDENCY_OF") + if (NOT ARGS_SOURCES) + list(APPEND ARGS_SOURCES ${TEST_NAME}.cpp) + endif() + if (ARGS_COMPILE_ONLY) + list(APPEND ${ARGS_LABELS} compile_only) + endif() + if(ADA_SANITIZE) + add_compile_options(-fsanitize=address -fno-omit-frame-pointer -fno-sanitize-recover=all) + add_compile_definitions(ASAN_OPTIONS=detect_leaks=1) + endif() + if(ADA_SANITIZE_WITHOUT_LEAKS) + add_compile_options(-fsanitize=address -fno-omit-frame-pointer -fno-sanitize-recover=all) + endif() + if(ADA_SANITIZE_BOUNDS_STRICT) + add_compile_options(-fsanitize=bounds-strict -fno-sanitize-recover=all) + add_link_options(-fsanitize=bounds-strict) + endif() + if(ADA_SANITIZE_UNDEFINED) + add_compile_options(-fsanitize=undefined -fno-sanitize-recover=all) + add_link_options(-fsanitize=undefined) + endif() + # Add the compile target + if (ARGS_LIBRARY) + add_library(${TEST_NAME} STATIC ${ARGS_SOURCES}) + else(ARGS_LIBRARY) + add_executable(${TEST_NAME} ${ARGS_SOURCES}) + endif(ARGS_LIBRARY) + + # Add test + if (ARGS_COMPILE_ONLY OR ARGS_LIBRARY) + add_test( + NAME ${TEST_NAME} + COMMAND ${CMAKE_COMMAND} --build . --target ${TEST_NAME} --config $ + WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + ) + set_target_properties(${TEST_NAME} PROPERTIES EXCLUDE_FROM_ALL TRUE EXCLUDE_FROM_DEFAULT_BUILD TRUE) + else() + add_test(${TEST_NAME} ${TEST_NAME}) + + # Add to