diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 1606316..942dc69 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -17,7 +17,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.7', '3.8', '3.9', '3.10', '3.11'] + python-version: ['3.8', '3.9', '3.10', '3.11'] steps: - uses: actions/checkout@v3 - name: Set up Python @@ -42,12 +42,6 @@ jobs: - name: Build package run: >- make build - - name: Publish package to Test PyPI (always) - uses: pypa/gh-action-pypi-publish@release/v1 - with: - password: ${{ secrets.TEST_PYPI_API_TOKEN }} - repository_url: https://test.pypi.org/legacy/ - skip_existing: true - name: Publish package to PyPI (only if pushing a tag) if: startsWith(github.ref, 'refs/tags') uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/README.md b/README.md index 8170fb5..328dc4d 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,8 @@ Signup to ScrapingBee to [get your API key](https://app.scrapingbee.com/account/ 'device': 'desktop', # Use some data extraction rules 'extract_rules': {'title': 'h1'}, + # Use AI to extract data from the page + 'ai_extract_rules': {'product_name': 'The name of the product', 'price': 'The price in USD'}, # Wrap response in JSON 'json_response': False, # Interact with the webpage you want to scrape diff --git a/requirements.txt b/requirements.txt index e907555..f90f773 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,17 +3,17 @@ certifi==2022.12.7 charset-normalizer==3.1.0 distlib==0.3.6 filelock==3.10.0 -flake8==3.9.2 +flake8==6.0.0 idna==3.4 iniconfig==2.0.0 -mccabe==0.6.1 +mccabe==0.7.0 more-itertools==9.1.0 packaging==23.0 platformdirs==3.1.1 pluggy==0.13.1 py==1.11.0 -pycodestyle==2.7.0 -pyflakes==2.3.1 +pycodestyle==2.10.0 +pyflakes==3.0.1 pytest==7.2.2 requests==2.28.2 six==1.16.0 diff --git a/scrapingbee/__version__.py b/scrapingbee/__version__.py index 159d48b..0309ae2 100644 --- a/scrapingbee/__version__.py +++ b/scrapingbee/__version__.py @@ -1 +1 @@ -__version__ = "2.0.1" +__version__ = "2.0.2" diff --git a/scrapingbee/utils.py b/scrapingbee/utils.py index adf5759..7501533 100644 --- a/scrapingbee/utils.py +++ b/scrapingbee/utils.py @@ -46,6 +46,8 @@ def process_params(params: dict) -> dict: new_params[k] = process_cookies(v) elif k == 'extract_rules': new_params[k] = process_json_stringify_param(v, 'extract_rules') + elif k == 'ai_extract_rules': + new_params[k] = process_json_stringify_param(v, 'ai_extract_rules') elif k == 'js_scenario': new_params[k] = process_json_stringify_param(v, 'js_scenario') else: diff --git a/setup.py b/setup.py index 4ee8299..94e8519 100644 --- a/setup.py +++ b/setup.py @@ -31,6 +31,6 @@ 'Programming Language :: Python :: 3.11', 'Topic :: Software Development :: Libraries :: Python Modules', ], - python_requires='>=3.7', + python_requires='>=3.8', install_requires=['requests'], ) diff --git a/tests/test_client.py b/tests/test_client.py index be84469..2a3d0f8 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -113,6 +113,27 @@ def test_get_with_js_scenario(mock_session, client): ) +@mock.patch('scrapingbee.client.Session') +def test_get_with_ai_extract_rules(mock_session, client): + '''It should format the ai_extract_rules and add them to the url''' + client.get('https://httpbin.org', params={ + 'ai_extract_rules': { + "product_name": "The name of the product", + "price": "The price in USD" + } + }) + + mock_session.return_value.request.assert_called_with( + 'GET', + 'https://app.scrapingbee.com/api/v1/' + '?api_key=API_KEY&url=https%3A%2F%2Fhttpbin.org&' + 'ai_extract_rules=%7B%22product_name%22%3A+%22The+name+of+the+product%22%2C+%22' + 'price%22%3A+%22The+price+in+USD%22%7D', + data=None, + headers=DEFAULT_HEADERS, + ) + + @mock.patch('scrapingbee.client.Session') def test_post(mock_session, client): '''It should make a POST request with some data''' diff --git a/tests/test_utils.py b/tests/test_utils.py index 9d1a8ea..583e497 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -18,7 +18,7 @@ def test_process_headers(): """It should add a Spb- prefix to header names""" output = process_headers({"Accept-Language": "En-US"}) assert output == { - "User-Agent": "ScrapingBee-Python/2.0.1", + "User-Agent": "ScrapingBee-Python/2.0.2", "Spb-Accept-Language": "En-US", } @@ -46,6 +46,13 @@ def test_process_js_scenario(): assert output == '{"instructions": [{"click": "#buttonId"}]}' +def test_process_ai_extract_rules(): + """It should format ai_extract_rules to a stringified JSON""" + output = process_json_stringify_param( + {"product_name": "The name of the product", "price": "The price in USD"}, "ai_extract_rules") + assert output == '{"product_name": "The name of the product", "price": "The price in USD"}' + + def test_process_params(): """It should keep boolean parameters""" output = process_params({"render_js": True})