Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 17 additions & 16 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ jobs:
((github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name != github.repository) || (github.event.pull_request.merged == true && startsWith(github.event.pull_request.head.ref, vars.RELEASE_PR_BRANCH || 'create-pull-request'))) ||
(github.repository == 'darvid/python-hyperscan' && contains(github.event.head_commit.message, '[build]'))
run: |
echo "valid_event=true" >> $GITHUB_OUTPUT
echo "valid_event=true" >> "$GITHUB_OUTPUT"

check_changes:
name: Build pre-conditions check
Expand All @@ -76,36 +76,37 @@ jobs:

- name: Check if build is needed
id: check
env:
PR_TITLE: ${{ github.event.pull_request.title }}
run: |
if [[ "${{ inputs.force_build || false }}" == "true" ]]; then
echo "should_build=true" >> $GITHUB_OUTPUT
echo "should_build=true" >> "$GITHUB_OUTPUT"
echo "Running build because force_build is true"
exit 0
fi

# Check for [build] tag in commit messages or PR title
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
# For PRs, check if PR title contains [build]
PR_TITLE="${{ github.event.pull_request.title }}"
if [[ "$PR_TITLE" == *"[build]"* ]]; then
echo "should_build=true" >> $GITHUB_OUTPUT
echo "should_build=true" >> "$GITHUB_OUTPUT"
echo "Running build because PR title contains [build]"
exit 0
fi

# Also check all commits in the PR for [build]
BASE_SHA="${{ github.event.pull_request.base.sha }}"
HEAD_SHA="${{ github.event.pull_request.head.sha }}"
COMMIT_MSGS=$(git fetch origin $BASE_SHA $HEAD_SHA && git log --format=%B $BASE_SHA..$HEAD_SHA || echo "")
COMMIT_MSGS=$(git fetch origin "$BASE_SHA" "$HEAD_SHA" && git log --format=%B "${BASE_SHA}..${HEAD_SHA}" || echo "")
if echo "$COMMIT_MSGS" | grep -q "\[build\]"; then
echo "should_build=true" >> $GITHUB_OUTPUT
echo "should_build=true" >> "$GITHUB_OUTPUT"
echo "Running build because a commit in the PR contains [build]"
exit 0
fi
else
# For pushes, check if the head commit message contains [build]
if [[ "${{ contains(github.event.head_commit.message, '[build]') }}" == "true" ]]; then
echo "should_build=true" >> $GITHUB_OUTPUT
echo "should_build=true" >> "$GITHUB_OUTPUT"
echo "Running build because commit message contains [build]"
exit 0
fi
Expand All @@ -115,7 +116,7 @@ jobs:
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
BASE_SHA="${{ github.event.pull_request.base.sha }}"
HEAD_SHA="${{ github.event.pull_request.head.sha }}"
CHANGED_FILES=$(git fetch origin $BASE_SHA $HEAD_SHA && git diff --name-only $BASE_SHA $HEAD_SHA || echo "")
CHANGED_FILES=$(git fetch origin "$BASE_SHA" "$HEAD_SHA" && git diff --name-only "${BASE_SHA}" "${HEAD_SHA}" || echo "")
else
# For pushes, use the before/after SHAs or fallback to comparing with parent
BEFORE_SHA="${{ github.event.before }}"
Expand All @@ -126,12 +127,12 @@ jobs:
CHANGED_FILES=$(git diff --name-only HEAD^ || echo "")
else
# Try to fetch the commits first to make sure they exist
git fetch --depth=1 origin $BEFORE_SHA || true
git fetch --depth=1 origin $AFTER_SHA || true
git fetch --depth=1 origin "${BEFORE_SHA}" || true
git fetch --depth=1 origin "${AFTER_SHA}" || true

# Check if both SHAs exist in the repository
if git cat-file -e $BEFORE_SHA 2>/dev/null && git cat-file -e $AFTER_SHA 2>/dev/null; then
CHANGED_FILES=$(git diff --name-only $BEFORE_SHA $AFTER_SHA || echo "")
if git cat-file -e "${BEFORE_SHA}" 2>/dev/null && git cat-file -e "${AFTER_SHA}" 2>/dev/null; then
CHANGED_FILES=$(git diff --name-only "${BEFORE_SHA}" "${AFTER_SHA}" || echo "")
else
# Fallback to comparing with parent commit
echo "Cannot find one of the SHAs, falling back to HEAD^"
Expand All @@ -144,16 +145,16 @@ jobs:
RESULT=1
echo "$CHANGED_FILES" | grep -q -E '^(src/hyperscan/|README.md|CMakeLists.txt|pyproject.toml|MANIFEST.in|cmake/|build_tools/)' || RESULT=$?

if [[ $RESULT -eq 0 ]]; then
echo "should_build=true" >> $GITHUB_OUTPUT
if [[ "$RESULT" -eq 0 ]]; then
echo "should_build=true" >> "$GITHUB_OUTPUT"
echo "Running build because relevant files were changed"
else
echo "should_build=false" >> $GITHUB_OUTPUT
echo "should_build=false" >> "$GITHUB_OUTPUT"
echo "Skipping build because no relevant files were changed and commit doesn't have [build] tag"
fi
else
# For pull requests, always build (after checking for [build] tag above)
echo "should_build=true" >> $GITHUB_OUTPUT
echo "should_build=true" >> "$GITHUB_OUTPUT"
echo "Running build for pull request"
fi

Expand Down
13 changes: 10 additions & 3 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,18 @@ jobs:
src: "./src"
args: check --fix

- name: Validate GitHub workflows
uses: raven-actions/actionlint@v2

- name: Debug refs
env:
GITHUB_HEAD_REF: ${{ github.head_ref }}
GITHUB_REF: ${{ github.ref }}
GITHUB_SHA: ${{ github.sha }}
run: |
echo "github.ref: ${{ github.ref }}"
echo "github.head_ref: ${{ github.head_ref }}"
echo "github.sha: ${{ github.sha }}"
echo "github.ref: ${GITHUB_REF}"
echo "github.head_ref: ${GITHUB_HEAD_REF}"
echo "github.sha: ${GITHUB_SHA}"

- name: Commit formatting changes
uses: iarekylew00t/verified-bot-commit@v1
Expand Down
18 changes: 9 additions & 9 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
build:
name: Build source distribution and wheels
uses: ./.github/workflows/build.yml
if: github.event.pull_request.merged == true && startsWith(github.event.pull_request.head.ref, ${{ vars.RELEASE_PR_BRANCH || 'create-pull-request' }}) && github.repository == 'darvid/python-hyperscan'
if: github.event.pull_request.merged == true && startsWith(github.event.pull_request.head.ref, vars.RELEASE_PR_BRANCH || 'create-pull-request') && github.repository == 'darvid/python-hyperscan'
permissions:
contents: read
actions: write
Expand Down Expand Up @@ -49,23 +49,23 @@ jobs:
# Check if HEAD already has a release version tag (prevents redundant releases)
if git describe --exact-match --tags HEAD --match "v*" 2>/dev/null; then
EXISTING_TAG=$(git describe --exact-match --tags HEAD --match "v*" 2>/dev/null)
echo "HEAD already tagged with release version $EXISTING_TAG, no release needed"
echo "should_release=false" >> $GITHUB_OUTPUT
echo "HEAD already tagged with release version ${EXISTING_TAG}, no release needed"
echo "should_release=false" >> "$GITHUB_OUTPUT"
else
# Check if there are commits since last release
LATEST_TAG=$(git describe --tags --abbrev=0 --match "v*" 2>/dev/null || echo "")
if [[ -n "$LATEST_TAG" ]]; then
COMMITS_COUNT=$(git rev-list ${LATEST_TAG}..HEAD --count 2>/dev/null || echo "1")
COMMITS_COUNT=$(git rev-list "${LATEST_TAG}"..HEAD --count 2>/dev/null || echo "1")
if [[ "$COMMITS_COUNT" -eq 0 ]]; then
echo "No commits since last release $LATEST_TAG, no new content to release"
echo "should_release=false" >> $GITHUB_OUTPUT
echo "No commits since last release ${LATEST_TAG}, no new content to release"
echo "should_release=false" >> "$GITHUB_OUTPUT"
else
echo "Found $COMMITS_COUNT commits since $LATEST_TAG, proceeding with release"
echo "should_release=true" >> $GITHUB_OUTPUT
echo "Found ${COMMITS_COUNT} commits since ${LATEST_TAG}, proceeding with release"
echo "should_release=true" >> "$GITHUB_OUTPUT"
fi
else
echo "No previous release found, proceeding with initial release"
echo "should_release=true" >> $GITHUB_OUTPUT
echo "should_release=true" >> "$GITHUB_OUTPUT"
fi
fi

Expand Down
19 changes: 11 additions & 8 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,18 +35,18 @@ jobs:
CHANGED_FILES=""
fi
echo "Changed files:"
echo "$CHANGED_FILES"
echo "${CHANGED_FILES}"

CHANGES=0
echo "$CHANGED_FILES" | grep -c -E '^(src/hyperscan/|README.md|CMakeLists.txt|pyproject.toml|MANIFEST.in|cmake/|build_tools/)' || CHANGES=$?
echo "${CHANGED_FILES}" | grep -c -E '^(src/hyperscan/|README.md|CMakeLists.txt|pyproject.toml|MANIFEST.in|cmake/|build_tools/)' || CHANGES=$?

if [[ "$CHANGES" -gt 0 ]]; then
# The last commit already triggered a build, no need to force
echo "force_build=false" >> $GITHUB_OUTPUT
echo "force_build=false" >> "$GITHUB_OUTPUT"
echo "Last commit already triggered a build"
else
# The last commit didn't trigger a build, we need to force it
echo "force_build=true" >> $GITHUB_OUTPUT
echo "force_build=true" >> "$GITHUB_OUTPUT"
echo "Last commit didn't trigger a build, forcing build"
fi

Expand Down Expand Up @@ -79,6 +79,9 @@ jobs:
needs: [check_build, check_release]
if: github.repository == 'darvid/python-hyperscan' && !contains(github.event.head_commit.message, 'python-semantic-release') && (needs.check_build.outputs.is_build_needed == 'true' || needs.check_release.outputs.is_release_needed == 'true')
uses: ./.github/workflows/build.yml
permissions:
contents: read
actions: write
with:
force_build: "${{ needs.check_release.outputs.is_release_needed == 'true' || fromJSON(needs.check_build.outputs.is_build_needed) }}"

Expand Down Expand Up @@ -121,11 +124,11 @@ jobs:
if: needs.check_release.outputs.is_release_needed == 'true'
run: |
# Check if branch exists on remote and delete it if it does
if git ls-remote --heads origin ${RELEASE_PR_BRANCH} | grep -q ${RELEASE_PR_BRANCH}; then
git push origin --delete ${RELEASE_PR_BRANCH}
if git ls-remote --heads origin "${RELEASE_PR_BRANCH}" | grep -q "${RELEASE_PR_BRANCH}"; then
git push origin --delete "${RELEASE_PR_BRANCH}"
fi
# Create new branch
git switch -c ${RELEASE_PR_BRANCH}
git switch -c "${RELEASE_PR_BRANCH}"

- name: Semantic release
uses: python-semantic-release/python-semantic-release@v9.10.1
Expand All @@ -142,7 +145,7 @@ jobs:
- name: Create PR
if: needs.check_release.outputs.is_release_needed == 'true'
run: |
gh pr create -B main -H $RELEASE_PR_BRANCH \
gh pr create -B main -H "$RELEASE_PR_BRANCH" \
--title "$PR_TITLE" \
--body '🤖'
env:
Expand Down
4 changes: 4 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,7 @@ repos:
hooks:
- id: commitizen
stages: [commit-msg]
- repo: https://github.com/rhysd/actionlint
rev: v1.7.4
hooks:
- id: actionlint
Empty file added actionlint
Empty file.
23 changes: 23 additions & 0 deletions test_issue_207.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/usr/bin/env python3

import hyperscan

print(f'hyperscan version: {hyperscan.__version__}')

# Exact code from GitHub issue #207
bla = [r'<span\s+.*>السلام عليكم\s<\/span>'.encode('utf8'),
r'<span\s+.*>ועליכום הסלאם\s<\/span>'.encode('utf8')]

print(f'Testing patterns: {bla}')

try:
rules_db = hyperscan.Database()
rules_db.compile(expressions=bla,
flags=hyperscan.HS_FLAG_UTF8 | hyperscan.HS_FLAG_UCP)
print('SUCCESS: Patterns compiled with HS_FLAG_UTF8 | HS_FLAG_UCP!')
except Exception as e:
print(f'FAILED: {e}')
if 'Expression is not valid UTF-8' in str(e):
print('*** THIS IS THE EXACT BUG FROM ISSUE #207! ***')
else:
print('*** Different error ***')
19 changes: 19 additions & 0 deletions test_unicode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/usr/bin/env python3

import hyperscan
print(f'hyperscan version: {hyperscan.__version__}')

# Test unicode pattern compilation
patterns = ['السلام عليكم', 'ועליכום הסלאם']
print(f'Testing unicode patterns: {patterns}')

try:
db = hyperscan.Database()
db.compile(expressions=patterns)
print('SUCCESS: Unicode patterns compiled without errors!')
except Exception as e:
print(f'FAILED: {str(e)}')
if 'Expression is not valid UTF-8' in str(e):
print('*** THIS IS THE BUG - the fix is NOT working! ***')
else:
print('*** Different error, not the unicode bug ***')
99 changes: 99 additions & 0 deletions test_wheels/hyperscan-0.7.19.dist-info/METADATA
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
Metadata-Version: 2.2
Name: hyperscan
Version: 0.7.19
Summary: Python bindings for Hyperscan.
Keywords: regex,hypercan
Author-Email: David Gidwani <david.gidwani@atomweight.io>
License: MIT
Classifier: Development Status :: 4 - Beta
Classifier: Topic :: Software Development :: Libraries
Classifier: Topic :: Software Development :: Libraries :: Python Modules
Classifier: Topic :: Utilities
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Python :: 3.13
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Environment :: Console
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: MIT License
Classifier: Operating System :: POSIX :: Linux
Classifier: Operating System :: Unix
Classifier: Operating System :: MacOS
Classifier: Operating System :: Microsoft :: Windows
Project-URL: Homepage, https://github.com/darvid/python-hyperscan
Project-URL: Repository, https://github.com/darvid/python-hyperscan
Project-URL: Documentation, https://python-hyperscan.readthedocs.io/en/latest/
Requires-Python: <4.0,>=3.9
Description-Content-Type: text/markdown

# Hyperscan/Vectorscan for Python

![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/darvid/python-hyperscan/build.yml?style=plastic)
![PyPI - Version](https://img.shields.io/pypi/v/hyperscan?style=plastic)
![PyPI - Downloads](https://img.shields.io/pypi/dm/hyperscan?style=plastic)
![PyPI - Python Version](https://img.shields.io/pypi/pyversions/hyperscan.svg?style=plastic)
![PyPI - Wheel](https://img.shields.io/pypi/wheel/hyperscan.svg?style=plastic)
![PyPI - License](https://img.shields.io/pypi/l/hyperscan.svg?style=plastic)
[![Read the Docs](https://img.shields.io/readthedocs/python-hyperscan.svg?style=plastic)](https://python-hyperscan.readthedocs.io/en/latest/)

A CPython extension for [Vectorscan][7], an open source fork of
[Hyperscan][8], Intel's open source ([prior to version 5.4][9]),
high-performance multiple regex matching library.

* ✅ Binary [manylinux][12]-compatible wheels
* ✅ Statically linked (no need to build Hyperscan/Vectorscan)
* ✅ [Chimera][1] support

## Installation

```shell
# 🪄 Installing libhs is NOT required, because python-hyperscan is statically linked
pip install hyperscan
```

## Build Optimization

If you'd like to use Intel's Hyperscan rather than Vectorscan, or if
you'd like to enable native CPU detection to build optimized non-FAT
libraries ([default off in Vectorscan][11]), extending the
[manylinux-hyperscan][10] Docker image used to build the binary wheels
for this library should be fairly straightforward.

## API Support

``python-hyperscan`` currently exposes *most* of the C API, with the
following caveats or exceptions:

* No [stream compression][2] support.
* No [custom allocator][3] support.
* ``hs_expression_info``, ``hs_expression_ext_info``,
``hs_populate_platform``, and ``hs_serialized_database_info`` not
exposed yet.

See the [documentation][6] for more detailed build instructions.

## Resources

* [PyPI Project][13]
* [Documentation][6]
* [Hyperscan C API Documentation][14]

[1]: http://intel.github.io/hyperscan/dev-reference/chimera.html
[2]: http://intel.github.io/hyperscan/dev-reference/runtime.html#stream-compression
[3]: http://intel.github.io/hyperscan/dev-reference/runtime.html#custom-allocators
[4]: http://intel.github.io/hyperscan/dev-reference/compilation.html
[5]: https://github.com/darvid/python-hyperscan/issues
[6]: https://python-hyperscan.readthedocs.io
[7]: https://www.vectorcamp.gr/vectorscan/
[8]: https://www.hyperscan.io/
[9]: https://github.com/VectorCamp/vectorscan?tab=readme-ov-file#hyperscan-license-change-after-54
[10]: https://github.com/darvid/manylinux-hyperscan/
[11]: https://github.com/VectorCamp/vectorscan?tab=readme-ov-file#configure--build
[12]: https://github.com/pypa/manylinux
[13]: https://pypi.org/project/hyperscan/
[14]: http://intel.github.io/hyperscan/dev-reference/
10 changes: 10 additions & 0 deletions test_wheels/hyperscan-0.7.19.dist-info/RECORD
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
hyperscan-0.7.19.dist-info/METADATA,sha256=6NsPEGGFUJdhx_ulIMD4Ff-cgw4FlNm1rgaFDQ7yI2Q,4299
hyperscan-0.7.19.dist-info/RECORD,,
hyperscan-0.7.19.dist-info/WHEEL,sha256=6Dxtid-NXEnR7jvm4_GvErwSb88e3UzL8AWq9MWuAAE,156
hyperscan-0.7.19.dist-info/licenses/LICENSE,sha256=yvm4yRI_IxT-4iZOEl1Nx9I0Dm0JbAbmHt8OmKopiUA,1070
hyperscan/__init__.py,sha256=ImBXLA9RN8dJIx94n6R3iRUOBO7v1-q8vImzzKPVLbU,367
hyperscan/extension.c,sha256=xcYkpNIuIIYNGFWKC46lp9YYbOABu5EpDpSeW09AFgQ,47700
hyperscan/_version.py,sha256=-_OxJPv2D0J4Tap1QJZo4Z4XyBYoG9M_2-0CsJ35W-I,23
hyperscan/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
hyperscan/__init__.pyi,sha256=oRU1eShJUV5-mQheZfDCbZYTpVWPyS0dHrhmbT0ewiI,10768
hyperscan/_hs_ext.cpython-311-x86_64-linux-gnu.so,sha256=KIaQV29IP80Ed1uJgU38d8nX4nBhDvzbugfr5fX2XnE,7051104
6 changes: 6 additions & 0 deletions test_wheels/hyperscan-0.7.19.dist-info/WHEEL
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Wheel-Version: 1.0
Generator: scikit-build-core 0.11.5
Root-Is-Purelib: false
Tag: cp311-cp311-manylinux_2_17_x86_64
Tag: cp311-cp311-manylinux2014_x86_64

Loading
Loading