diff --git a/crewai_tools/__init__.py b/crewai_tools/__init__.py index 4d2ea7e1..6cd3125f 100644 --- a/crewai_tools/__init__.py +++ b/crewai_tools/__init__.py @@ -1,5 +1,6 @@ from .tools import ( AIMindTool, + ApifyActorsTool, BraveSearchTool, BrowserbaseLoadTool, CodeDocsSearchTool, diff --git a/crewai_tools/tools/__init__.py b/crewai_tools/tools/__init__.py index 4a9786fe..b1c3b6d6 100644 --- a/crewai_tools/tools/__init__.py +++ b/crewai_tools/tools/__init__.py @@ -1,4 +1,5 @@ from .ai_mind_tool.ai_mind_tool import AIMindTool +from .apify_actors_tool.apify_actors_tool import ApifyActorsTool from .brave_search_tool.brave_search_tool import BraveSearchTool from .browserbase_load_tool.browserbase_load_tool import BrowserbaseLoadTool from .code_docs_search_tool.code_docs_search_tool import CodeDocsSearchTool diff --git a/crewai_tools/tools/apify_actors_tool/README.md b/crewai_tools/tools/apify_actors_tool/README.md new file mode 100644 index 00000000..c00891de --- /dev/null +++ b/crewai_tools/tools/apify_actors_tool/README.md @@ -0,0 +1,96 @@ +# ApifyActorsTool + +Integrate [Apify Actors](https://apify.com/actors) into your CrewAI workflows. + +## Description + +The `ApifyActorsTool` connects [Apify Actors](https://apify.com/actors), cloud-based programs for web scraping and automation, to your CrewAI workflows. +Use any of the 4,000+ Actors on [Apify Store](https://apify.com/store) for use cases such as extracting data from social media, search engines, online maps, e-commerce sites, travel portals, or general websites. + +For details, see the [Apify CrewAI integration](https://docs.apify.com/platform/integrations/crewai) in Apify documentation. + +## Installation + +To use `ApifyActorsTool`, install the necessary packages and set up your Apify API token. Follow the [Apify API documentation](https://docs.apify.com/platform/integrations/api) for steps to obtain the token. + +### Steps + +1. **Install dependencies** + Install `crewai[tools]` and `langchain-apify`: + ```bash + pip install 'crewai[tools]' langchain-apify + ``` + +2. **Set your API token** + Export the token as an environment variable: + ```bash + export APIFY_API_TOKEN='your-api-token-here' + ``` + +## Usage example + +Use the `ApifyActorsTool` manually to run the [RAG Web Browser Actor](https://apify.com/apify/rag-web-browser) to perform a web search: + +```python +from crewai_tools import ApifyActorsTool + +# Initialize the tool with an Apify Actor +tool = ApifyActorsTool(actor_name="apify/rag-web-browser") + +# Run the tool with input parameters +results = tool.run(run_input={"query": "What is CrewAI?", "maxResults": 5}) + +# Process the results +for result in results: + print(f"URL: {result['metadata']['url']}") + print(f"Content: {result.get('markdown', 'N/A')[:100]}...") +``` + +### Expected output + +Here is the output from running the code above: + +```text +URL: https://www.example.com/crewai-intro +Content: CrewAI is a framework for building AI-powered workflows... +URL: https://docs.crewai.com/ +Content: Official documentation for CrewAI... +``` + +The `ApifyActorsTool` automatically fetches the Actor definition and input schema from Apify using the provided `actor_name` and then constructs the tool description and argument schema. This means you need to specify only a valid `actor_name`, and the tool handles the rest when used with agents—no need to specify the `run_input`. Here's how it works: + +```python +from crewai import Agent +from crewai_tools import ApifyActorsTool + +rag_browser = ApifyActorsTool(actor_name="apify/rag-web-browser") + +agent = Agent( + role="Research Analyst", + goal="Find and summarize information about specific topics", + backstory="You are an experienced researcher with attention to detail", + tools=[rag_browser], +) +``` + +You can run other Actors from [Apify Store](https://apify.com/store) simply by changing the `actor_name` and, when using it manually, adjusting the `run_input` based on the Actor input schema. + +For an example of usage with agents, see the [CrewAI Actor template](https://apify.com/templates/python-crewai). + +## Configuration + +The `ApifyActorsTool` requires these inputs to work: + +- **`actor_name`** + The ID of the Apify Actor to run, e.g., `"apify/rag-web-browser"`. Browse all Actors on [Apify Store](https://apify.com/store). +- **`run_input`** + A dictionary of input parameters for the Actor when running the tool manually. + - For example, for the `apify/rag-web-browser` Actor: `{"query": "search term", "maxResults": 5}` + - See the Actor's [input schema](https://apify.com/apify/rag-web-browser/input-schema) for the list of input parameters. + +## Resources + +- **[Apify](https://apify.com/)**: Explore the Apify platform. +- **[How to build an AI agent on Apify](https://blog.apify.com/how-to-build-an-ai-agent/)** - A complete step-by-step guide to creating, publishing, and monetizing AI agents on the Apify platform. +- **[RAG Web Browser Actor](https://apify.com/apify/rag-web-browser)**: A popular Actor for web search for LLMs. +- **[CrewAI Integration Guide](https://docs.apify.com/platform/integrations/crewai)**: Follow the official guide for integrating Apify and CrewAI. diff --git a/crewai_tools/tools/apify_actors_tool/apify_actors_tool.py b/crewai_tools/tools/apify_actors_tool/apify_actors_tool.py new file mode 100644 index 00000000..37ae7312 --- /dev/null +++ b/crewai_tools/tools/apify_actors_tool/apify_actors_tool.py @@ -0,0 +1,92 @@ +from crewai.tools import BaseTool +from pydantic import Field +from typing import TYPE_CHECKING, Any, Dict, List +import os + +if TYPE_CHECKING: + from langchain_apify import ApifyActorsTool as _ApifyActorsTool + +class ApifyActorsTool(BaseTool): + """Tool that runs Apify Actors. + + To use, you should have the environment variable `APIFY_API_TOKEN` set + with your API key. + + For details, see https://docs.apify.com/platform/integrations/crewai + + Args: + actor_name (str): The name of the Apify Actor to run. + *args: Variable length argument list passed to BaseTool. + **kwargs: Arbitrary keyword arguments passed to BaseTool. + + Returns: + List[Dict[str, Any]]: Results from the Actor execution. + + Raises: + ValueError: If `APIFY_API_TOKEN` is not set or if the tool is not initialized. + ImportError: If `langchain_apify` package is not installed. + + Example: + .. code-block:: python + from crewai_tools import ApifyActorsTool + + tool = ApifyActorsTool(actor_name="apify/rag-web-browser") + + results = tool.run(run_input={"query": "What is CrewAI?", "maxResults": 5}) + for result in results: + print(f"URL: {result['metadata']['url']}") + print(f"Content: {result.get('markdown', 'N/A')[:100]}...") + """ + actor_tool: '_ApifyActorsTool' = Field(description="Apify Actor Tool") + + def __init__( + self, + actor_name: str, + *args: Any, + **kwargs: Any + ) -> None: + if not os.environ.get("APIFY_API_TOKEN"): + msg = ( + "APIFY_API_TOKEN environment variable is not set. " + "Please set it to your API key, to learn how to get it, " + "see https://docs.apify.com/platform/integrations/api" + ) + raise ValueError(msg) + + try: + from langchain_apify import ApifyActorsTool as _ApifyActorsTool + except ImportError: + raise ImportError( + "Could not import langchain_apify python package. " + "Please install it with `pip install langchain-apify` or `uv add langchain-apify`." + ) + actor_tool = _ApifyActorsTool(actor_name) + + kwargs.update( + { + "name": actor_tool.name, + "description": actor_tool.description, + "args_schema": actor_tool.args_schema, + "actor_tool": actor_tool, + } + ) + super().__init__(*args, **kwargs) + + def _run(self, run_input: Dict[str, Any]) -> List[Dict[str, Any]]: + """Run the Actor tool with the given input. + + Returns: + List[Dict[str, Any]]: Results from the Actor execution. + + Raises: + ValueError: If 'actor_tool' is not initialized. + """ + try: + return self.actor_tool._run(run_input) + except Exception as e: + msg = ( + f'Failed to run ApifyActorsTool {self.name}. ' + 'Please check your Apify account Actor run logs for more details.' + f'Error: {e}' + ) + raise RuntimeError(msg) from e diff --git a/pyproject.toml b/pyproject.toml index 24749368..2073ba21 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -88,6 +88,9 @@ exa-py = [ qdrant-client = [ "qdrant-client>=1.12.1", ] +apify = [ + "langchain-apify>=0.1.2,<1.0.0", +] diff --git a/uv.lock b/uv.lock index c4665207..42bcdf78 100644 --- a/uv.lock +++ b/uv.lock @@ -145,6 +145,29 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/46/eb/e7f063ad1fec6b3178a3cd82d1a3c4de82cccf283fc42746168188e1cdd5/anyio-4.8.0-py3-none-any.whl", hash = "sha256:b5011f270ab5eb0abf13385f851315585cc37ef330dd88e27ec3d34d651fd47a", size = 96041 }, ] +[[package]] +name = "apify-client" +version = "1.9.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "apify-shared" }, + { name = "httpx" }, + { name = "more-itertools" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/64/95/5456d114f83749f74969577824abc74fc3ab5e1722acab37f77b0b1800c6/apify_client-1.9.2.tar.gz", hash = "sha256:af76b78c3153263040615daec0619765e067466bbb82e569afe799ad72c53050", size = 46276 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c5/3d/d71b7c79c8da2ca2ccd2aa43cea0cfb2d17475ea8fafb4e7d7a78b24c02c/apify_client-1.9.2-py3-none-any.whl", hash = "sha256:a441fb59b5ec1c42aead73284c90304029442ddc26e764c151b8dc7f15e38600", size = 72505 }, +] + +[[package]] +name = "apify-shared" +version = "1.2.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d5/12/3c6639116aba2b851b38bfcd5d81bb91f733f041c12c1d547028bd075a08/apify_shared-1.2.1.tar.gz", hash = "sha256:986557e2b01c584aa57258fb4af83d32ecb6979c0d804ccbfda7c0e79a2d00b1", size = 13597 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/31/ff/9d60edc7602587a2e83f86687c2a1794d6ec7711599dbd0fb075e12d6abd/apify_shared-1.2.1-py3-none-any.whl", hash = "sha256:cee136729c41c215796d8ca9f7b2e5736995d44f9471663d61827d16f592df9b", size = 12404 }, +] + [[package]] name = "appdirs" version = "1.4.4" @@ -532,7 +555,7 @@ name = "click" version = "8.1.8" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "colorama", marker = "platform_system == 'Windows'" }, + { name = "colorama", marker = "sys_platform == 'win32'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/b9/2e/0090cbf739cee7d23781ad4b89a9894a41538e4fcf4c31dcdd705b78eb8b/click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a", size = 226593 } wheels = [ @@ -660,6 +683,9 @@ dependencies = [ ] [package.optional-dependencies] +apify = [ + { name = "langchain-apify" }, +] beautifulsoup4 = [ { name = "beautifulsoup4" }, ] @@ -721,6 +747,7 @@ weaviate-client = [ [package.dev-dependencies] dev = [ { name = "pytest" }, + { name = "pytest-asyncio" }, ] [package.metadata] @@ -738,6 +765,7 @@ requires-dist = [ { name = "firecrawl-py", marker = "extra == 'firecrawl-py'", specifier = ">=1.8.0" }, { name = "hyperbrowser", marker = "extra == 'hyperbrowser'", specifier = ">=0.18.0" }, { name = "lancedb", specifier = ">=0.5.4" }, + { name = "langchain-apify", marker = "extra == 'apify'", specifier = ">=0.1.2" }, { name = "linkup-sdk", marker = "extra == 'linkup-sdk'", specifier = ">=0.2.2" }, { name = "multion", marker = "extra == 'multion'", specifier = ">=1.1.0" }, { name = "openai", specifier = ">=1.12.0" }, @@ -759,7 +787,11 @@ requires-dist = [ ] [package.metadata.requires-dev] -dev = [{ name = "pytest", specifier = ">=8.3.4" }] +dev = [ + { name = "pytest", specifier = ">=8.0.0" }, + { name = "pytest", specifier = ">=8.3.4" }, + { name = "pytest-asyncio", specifier = ">=0.25.2" }, +] [[package]] name = "cryptography" @@ -922,6 +954,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c1/8b/5fe2cc11fee489817272089c4203e679c63b570a5aaeb18d852ae3cbba6a/et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa", size = 18059 }, ] +[[package]] +name = "eval-type-backport" +version = "0.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/30/ea/8b0ac4469d4c347c6a385ff09dc3c048c2d021696664e26c7ee6791631b5/eval_type_backport-0.2.2.tar.gz", hash = "sha256:f0576b4cf01ebb5bd358d02314d31846af5e07678387486e2c798af0e7d849c1", size = 9079 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ce/31/55cd413eaccd39125368be33c46de24a1f639f2e12349b0361b4678f3915/eval_type_backport-0.2.2-py3-none-any.whl", hash = "sha256:cb6ad7c393517f476f96d456d0412ea80f0a8cf96f6892834cd9340149111b0a", size = 5830 }, +] + [[package]] name = "exa-py" version = "1.8.7" @@ -2023,6 +2064,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3e/cb/a375aa0a8e5ab806766ea6f167088bbb9b8aea836031e5f80f6f492b1f8d/langchain-0.3.15-py3-none-any.whl", hash = "sha256:2657735184054cae8181ac43fce6cbc9ee64ca81a2ad2aed3ccd6e5d6fe1f19f", size = 1009589 }, ] +[[package]] +name = "langchain-apify" +version = "0.1.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "apify-client" }, + { name = "eval-type-backport" }, + { name = "langchain-core" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b7/4a/b08697398365925e8c135c4fa5ddc0c3fe579c75e4abe711c8fbf9ddd708/langchain_apify-0.1.2.tar.gz", hash = "sha256:be78f7dcc67fe2aaa83a3f7732303ee71b8b192b7bc949485f02135a5ff3ead1", size = 14571 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d4/03/443499dbc42f6157075f8368402ac90dbbf1fe612795e9de861d388b92e8/langchain_apify-0.1.2-py3-none-any.whl", hash = "sha256:316c2b86c0f9e66a436af55ebe9cfe17f71b17fb657cf280e2b718759aa43c93", size = 16166 }, +] + [[package]] name = "langchain-cohere" version = "0.3.5" @@ -2383,6 +2438,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9a/67/7e8406a29b6c45be7af7740456f7f37025f0506ae2e05fb9009a53946860/monotonic-1.6-py2.py3-none-any.whl", hash = "sha256:68687e19a14f11f26d140dd5c86f3dba4bf5df58003000ed467e0e2a69bca96c", size = 8154 }, ] +[[package]] +name = "more-itertools" +version = "10.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/88/3b/7fa1fe835e2e93fd6d7b52b2f95ae810cf5ba133e1845f726f5a992d62c2/more-itertools-10.6.0.tar.gz", hash = "sha256:2cd7fad1009c31cc9fb6a035108509e6547547a7a738374f10bd49a09eb3ee3b", size = 125009 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/23/62/0fe302c6d1be1c777cab0616e6302478251dfbf9055ad426f5d0def75c89/more_itertools-10.6.0-py3-none-any.whl", hash = "sha256:6eb054cb4b6db1473f6e15fcc676a08e4732548acd47c708f0e179c2c7c01e89", size = 63038 }, +] + [[package]] name = "mpmath" version = "1.3.0" @@ -3152,7 +3216,7 @@ name = "portalocker" version = "2.10.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "pywin32", marker = "platform_system == 'Windows'" }, + { name = "pywin32", marker = "sys_platform == 'win32'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/ed/d3/c6c64067759e87af98cc668c1cc75171347d0f1577fab7ca3749134e3cd4/portalocker-2.10.1.tar.gz", hash = "sha256:ef1bf844e878ab08aee7e40184156e1151f228f103aa5c6bd0724cc330960f8f", size = 40891 } wheels = [ @@ -3665,6 +3729,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/11/92/76a1c94d3afee238333bc0a42b82935dd8f9cf8ce9e336ff87ee14d9e1cf/pytest-8.3.4-py3-none-any.whl", hash = "sha256:50e16d954148559c9a74109af1eaf0c945ba2d8f30f0a3d3335edde19788b6f6", size = 343083 }, ] +[[package]] +name = "pytest-asyncio" +version = "0.25.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f2/a8/ecbc8ede70921dd2f544ab1cadd3ff3bf842af27f87bbdea774c7baa1d38/pytest_asyncio-0.25.3.tar.gz", hash = "sha256:fc1da2cf9f125ada7e710b4ddad05518d4cee187ae9412e9ac9271003497f07a", size = 54239 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/67/17/3493c5624e48fd97156ebaec380dcaafee9506d7e2c46218ceebbb57d7de/pytest_asyncio-0.25.3-py3-none-any.whl", hash = "sha256:9e89518e0f9bd08928f97a3482fdc4e244df17529460bc038291ccaf8f85c7c3", size = 19467 }, +] + [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -4571,7 +4647,7 @@ name = "tqdm" version = "4.67.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "colorama", marker = "platform_system == 'Windows'" }, + { name = "colorama", marker = "sys_platform == 'win32'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737 } wheels = [