diff --git a/README.md b/README.md index 02d2b110b..a2b832c42 100644 --- a/README.md +++ b/README.md @@ -58,38 +58,40 @@ uv run python examples/hello_world.py ## πŸ§ͺ Example Usage ```python -from typing import Optional from pydantic import BaseModel, Field +from agentics.core.transducible_functions import transducible, Transduce + +class ProductDescription(BaseModel): + name: str + features: str + price: float + +class ViralTweet(BaseModel): + tweet: str = Field(..., description="Engaging tweet under 280 characters") + hashtags: list[str] = Field(..., description="3-5 relevant hashtags") + hook: str = Field(..., description="Attention-grabbing opening line") + +@transducible() +async def generate_viral_tweet(product: ProductDescription) -> ViralTweet: + """Transform boring product descriptions into viral social media content.""" + return Transduce(product) + +# Transform a product into viral content +product = ProductDescription( + name="Agentics Framework", + features="Type-safe AI workflows with LLM-powered transductions", + price=0.0 # Open source! +) -from agentics.core.transducible_functions import Transduce, transducible - - -class Movie(BaseModel): - movie_name: Optional[str] = None - description: Optional[str] = None - year: Optional[int] = None - - -class Genre(BaseModel): - genre: Optional[str] = Field(None, description="e.g., comedy, drama, action") - - -@transducible(provide_explanation=True) -async def classify_genre(state: Movie) -> Genre: - """Classify the genre of the source Movie.""" - return Transduce(state) - +tweet = await generate_viral_tweet(product) +print(f"πŸ”₯ {tweet.tweet}") +print(f"πŸ“± {' '.join(tweet.hashtags)}") +``` -genre, explanation = await classify_genre( - Movie( - movie_name="The Godfather", - description=( - "The aging patriarch of an organized crime dynasty transfers control " - "of his clandestine empire to his reluctant son." - ), - year=1972, - ) -) +**Output:** +``` +πŸ”₯ Stop wrestling with unstructured LLM outputs! 🎯 Agentics gives you type-safe AI workflows that just work. Build production-ready agents in minutes, not weeks. And it's FREE! πŸš€ +πŸ“± #AI #OpenSource #Python #LLM #DevTools ``` --- @@ -126,16 +128,24 @@ Apache 2.0 ## πŸ‘₯ Authors -**Principal Investigator** +**Project Lead and Main Contributor** - Alfio Massimiliano Gliozzo (IBM Research) β€” gliozzo@us.ibm.com **Core Contributors** -- Nahuel Defosse (IBM Research) β€” nahuel.defosse@ibm.com -- Junkyu Lee (IBM Research) β€” Junkyu.Lee@ibm.com -- Naweed Aghmad Khan (IBM Research) β€” naweed.khan@ibm.com -- Christodoulos Constantinides (IBM Watson) β€” Christodoulos.Constantinides@ibm.com -- Mustafa Eyceoz (Red Hat) β€” Mustafa.Eyceoz@partner.ibm.com +- Junkyu Lee (IBM) β€” Junkyu.Lee@ibm.com +- Nahuel Defosse (IBM) β€” nahuel.defosse@ibm.com +- Naweed Aghmad Khan (IBM) β€” naweed.khan@ibm.com +**Community Contributors** +- Christodoulos Constantinides (IBM) β€” Christodoulos.Constantinides@ibm.com +- Nandana Mihindukulasooriya (IBM) β€” nandana@ibm.com +- Mustafa Eyceoz (Red Hat) β€” Mustafa.Eyceoz@partner.ibm.com +- Gaetano Rossiello (IBM) β€” gaetano.rossiello@ibm.com +- Agostino Capponi (Columbia University) β€” ac3827@columbia.edu +- Chunghyun Han (Columbia University) β€” ch4005@columbia.edu +- Abhinav Goel (Columbia University) ag5252@columbia.edu +- Chaitya Shan (Columbia University) β€” cs4621@columbia.edu +- Brian Zi Qi Zhu (Columbia University) β€” bzz2101@columbia.edu --- diff --git a/docs/agentics.md b/docs/agentics.md index ea4388bff..b122867e3 100644 --- a/docs/agentics.md +++ b/docs/agentics.md @@ -1,11 +1,20 @@ -# Agentics +# AG (Agentics) -Agentics objects are wrappers around list of objects having the same Pydantic Type. +**AG** (short for "Agentics") objects are wrappers around lists of objects having the same Pydantic type. They are designed to enable async logical transduction among their instances. -Agentics enable us to think about AI workflows in terms of structured data transformations rather than agent behaviours, knowledge and tasks. +AG containers enable us to think about AI workflows in terms of structured data transformations rather than agent behaviors, knowledge, and tasks. -## The Agentics class -Agentics is a Python class that wraps a list of Pydantic objects and enables structured, type-driven logical transduction between them. +## The AG Class + +AG (Agentics) is a Python class that wraps a list of Pydantic objects and enables structured, type-driven logical transduction between them. + +**Import and Usage:** +```python +from agentics import AG # Recommended: use AG alias + +# Create a typed container +movies = AG(atype=Movie) +``` Internally, Agentics is implemented as a Pydantic model. It holds: β€’ atype: a reference to the Pydantic class shared by all objects in the list. @@ -105,15 +114,16 @@ print(movies.states[0]) ``` -You can also modify and rebind an exiting Agentic. Similarly can also remove attributes. The following code is equivalent to the code before +You can also modify and rebind an exiting Agentic. Similarly can also remove attributes. The following code is equivalent to the code before. ```python movies = AG.from_csv("data/orders.csv") +print(movies[0]) movies.add_attribute("review",str) movies.add_attribute("quality_score",int,description="The quality of the movies in a scale 0 to 10") print(movies[0]) movies.subset_atype("title","genre","description") -print(movies[0]) ## note that movies[0] is equivalent to +print(movies[0]) ## note that movies[0] is a shorthand for movies.states[0] ``` @@ -138,8 +148,7 @@ async def main(): "What is the best F1 team in history?", ] - answers = await (AG(atype=Answer) \ - << input_questions) + answers = await (AG(atype=Answer) << input_questions) answers.pretty_print() @@ -149,9 +158,10 @@ asyncio.run(main()) ## Reference code -[explore this example](src/agentics/examples/agentics_basics.py) - +See the [examples directory](../examples/) for practical demonstrations of AG usage, including: +- `hello_world.py` - Basic transduction example +- `generate_tweets.py` - Content generation +- `emotion_extractor.py` - Text analysis -## See Next: Transduction - -Wrapping pydantic types into Agentics provides them with the ability to perform transduction, as described in the [next section](transduction.md) +## Go to Index +- πŸ‘‰ [Index](index.md) diff --git a/docs/core_concepts.md b/docs/core_concepts.md index 0068e764b..d05c9b45b 100644 --- a/docs/core_concepts.md +++ b/docs/core_concepts.md @@ -6,7 +6,7 @@ Agentics is built around a small set of concepts that work together: - **Transducible functions** – LLM-powered, type-safe transformations - **Typed state containers (AGs)** – collections of typed rows/documents - **Logical Transduction Algebra (LTA)** – the formal backbone -- **Map–Reduce** – the execution pattern for large workloads +- **Map–Reduce** – the programming model used to execute large-scale workloads This page gives you the mental model you need before diving into code. @@ -20,12 +20,13 @@ You describe your data using **Pydantic models**: ```python from pydantic import BaseModel +fromp typing import Optional class Product(BaseModel): - id: str | None = None - title: str | None = None - description: str | None = None - price: float | None = None + id: Optional[str] = None + title: Optional[str] = None + description: Optional[str] = None + price: Optional[float] = None ``` These models serve three roles: @@ -64,17 +65,17 @@ Example: from pydantic import BaseModel class Review(BaseModel): - text: str + text: Optional[str] = None class ReviewSummary(BaseModel): - sentiment: str - summary: str + sentiment: Optional[str] = None + summary: Optional[str] = None ``` A transducible function might be: ```python -fn: (Review) -> ReviewSummary +fn: Review -> ReviewSummary ``` with instructions like: @@ -92,20 +93,19 @@ You don’t call the LLM directly; you **call the transducible function**, which --- -## 3. Typed State Containers (AGs): Working with Collections πŸ—‚οΈ +## 3. Typed State Containers (AG): Working with Collections πŸ—‚οΈ Transformations rarely happen on a single object. You typically work with **collections** of items (rows, documents, events, etc.). -Agentics introduces **typed state containers** (AG) to: +Agentics introduces **typed state containers** (called **AG**, short for "Agentics") to: -- Hold a collection of instances of a given Pydantic type -- Preserve that type information across operations +- Hold a collection of instances of a given Pydantic type +- Preserve that type information across operations - Provide a uniform interface for Map–Reduce, filtering, joining, etc. Conceptually, you can think of an `AG[Source]` like a type-aware table: - ```text AG[Review] β”œβ”€ row 0: Review(text="…") @@ -113,18 +113,24 @@ AG[Review] └─ row n: Review(text="…") ``` -Applying a transducible function `(Review) -> ReviewSummary` over an `AG[Review]` conceptually yields an `AG[ReviewSummary]`. +Applying a transducible function `Review -> ReviewSummary` over an `AG` with atype `Review` conceptually yields an `AG` of type `ReviewSummary`. Typed state containers give you: -- **Clarity** – you always know what type you’re holding. -- **Safety** – operations can check types and schemas instead of guessing. +- **Clarity** – you always know what type you're holding. +- **Safety** – operations can check types and schemas instead of guessing. - **Composability** – containers can flow between functions and stages. -You can think of state containers as the **data plane** of Agentics. +You can think of state containers (AGs) as the **data plane** of Agentics. + + +```python +from agentics import AG # Recommended alias +movies = AG(atype=Movie) # Create a typed container +``` -Note: The name Agentics is derived as a legacy from the first version of Agentics, in which data models and transformations were blended into the same object. By introducing transducible functions as first class citizens, Agentics 2.0 uses AGs primarily as a data structure, although it is still possible to use them directly for transformations. See agentics v1.0 documentation to learn more. +**Historical Note:** In Agentics 1.0, data models and transformations were blended into the same object. Agentics 2.0 separates concerns by introducing transducible functions as first-class citizens, while AG containers focus on data management. The v1.0 API is still supported for backward compatibility. --- @@ -158,7 +164,7 @@ In short: Once you have: -- Typed collections (`AG[Source]`), and +- Typed collections (`AG[Source]`) and - Typed transformations (`Source -> Target`), you need a way to run these at scale. Agentics uses a familiar pattern: **Map–Reduce**. @@ -248,8 +254,9 @@ A typical workflow looks like this: - **Logical Transduction Algebra (LTA)** explains why these transformations compose and remain interpretable. - **Map–Reduce** provides the pattern for scaling these transductions to large datasets. -From here, you can explore: +## Next - πŸ‘‰ [Transducible Functions](transducible_functions.md) for concrete examples of defining and using transducible functions -- πŸ‘‰ `types_and_states.md` for data modeling patterns -- πŸ‘‰ `mapreduce.md` to see how large-scale execution works in practice + +## Go to Index +- πŸ‘‰ [Index](index.md) diff --git a/docs/getting_started.md b/docs/getting_started.md index 60c160697..d08582027 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -19,12 +19,11 @@ Agentics is a lightweight, Python-native framework for building structured, agen curl -LsSf https://astral.sh/uv/install.sh | sh ``` - Other installation options [here](curl -LsSf https://astral.sh/uv/install.sh | sh) + Other installation options [here](https://docs.astral.sh/uv/getting-started/installation/) * Install the dependencies ```bash - uv sync # Source the environment (optional, you can skip this and prepend uv run to the later lines) source .venv/bin/activate # bash/zsh 🐚 @@ -32,21 +31,21 @@ Agentics is a lightweight, Python-native framework for building structured, agen ``` -### 🎯 Set Environment Variables +### 🎯 Environment Variables Create a `.env` file in the root directory with your environment variables. See `.env.sample` for an example. -Set Up LLM provider, Chose one of the following: +Set up LLM provider, chose one of the following: #### OpenAI - Obtain API key from [OpenAI](https://platform.openai.com/) - `OPENAI_API_KEY` - Your OpenAI APIKey -- `OPENAI_MODEL_ID` - Your favorute model, default to **openai/gpt-4** +- `OPENAI_MODEL_ID` - Selected model, default to **openai/gpt-4** #### Ollama (local) - Download and install [Ollama](https://ollama.com/) -- Download a Model. You should use a model that support reasoning and fit your GPU. So smaller are preferred. +- Download a model. You should use a model that support reasoning and fit your GPU. So smaller are preferred. ``` ollama pull ollama/deepseek-r1:latest ``` @@ -59,11 +58,11 @@ ollama pull ollama/deepseek-r1:latest - `MODEL` - watsonx/meta-llama/llama-3-3-70b-instruct (or alternative supporting function call) -#### Google Gemini (offer free API key) +#### Google Gemini (offers free API key) -- `WATSONX_APIKEY` - WatsonX API key +- `GEMINI_API_KEY` - Your Google Gemini API key (get it from [Google AI Studio](https://aistudio.google.com/)) -- `MODEL` - watsonx/meta-llama/llama-3-3-70b-instruct (or alternative supporting function call) +- `MODEL` - `gemini/gemini-1.5-pro` or `gemini/gemini-1.5-flash` (or other Gemini models supporting function calling) #### VLLM (Need dedicated GPU server): @@ -72,209 +71,146 @@ ollama pull ollama/deepseek-r1:latest - `VLLM_URL` - - `VLLM_MODEL_ID` - Your model id (e.g. "hosted_vllm/meta-llama/Llama-3.3-70B-Instruct" ) -#### LiteLLM (100+ providers via single interface) - -LiteLLM provides a unified interface to access 100+ LLM providers. You can use models from OpenAI, Anthropic, Google, Cohere, Azure, Hugging Face, and more. - -**Basic Setup (Local LiteLLM)**: -- `LITELLM_MODEL` - Model in format `provider/model-name` (e.g., `openai/gpt-4`, `claude/claude-opus-4-5-20251101`, `gemini/gemini-2.0-flash`) -- The required API key for your provider should be in environment variables (e.g., `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, etc.) -- Optional: `LITELLM_TEMPERATURE` - Set temperature (default: varies by provider) -- Optional: `LITELLM_TOP_P` - Set top-p sampling (default: varies by provider) +## Test Installation -**Examples**: +Test hello world example (need to set up llm credentials first) -OpenAI via LiteLLM: ```bash -export LITELLM_MODEL="openai/gpt-4" -export OPENAI_API_KEY="sk-..." -``` +python python examples/hello_world.py +python examples/self_transduction.py +python examples/agentics_web_search_report.py -Anthropic Claude via LiteLLM: -```bash -export LITELLM_MODEL="claude/claude-opus-4-5-20251101" -export ANTHROPIC_API_KEY="sk-ant-..." ``` -Google Gemini via LiteLLM: -```bash -export LITELLM_MODEL="gemini/gemini-2.0-flash" -export GOOGLE_API_KEY="..." -``` - -**LiteLLM Proxy Server** -If you have a self-hosted LiteLLM proxy server: +## Hello World -- `LITELLM_PROXY_URL` - Base URL of your LiteLLM proxy (e.g., `http://localhost:8000`) -- `LITELLM_PROXY_API_KEY` - API key for the proxy -- `LITELLM_PROXY_MODEL` - Model name in format `litellm_proxy/` (e.g., `litellm_proxy/gpt-4`) -- Optional: `LITELLM_PROXY_TEMPERATURE` - Set temperature -- Optional: `LITELLM_PROXY_TOP_P` - Set top-p sampling +Transform boring product descriptions into viral tweets in just a few lines: -**Example**: -```bash -export LITELLM_PROXY_URL="http://localhost:8000" -export LITELLM_PROXY_API_KEY="sk-proxy-key-123" -export LITELLM_PROXY_MODEL="litellm_proxy/my-model" -``` +```python +from pydantic import BaseModel, Field +from agentics.core.transducible_functions import transducible, Transduce -Also you can use the provided script for configuration in the git repo (⚠️not available -through `pip install`) +from typing import Optional -```bash -uv run tasks.py setup +class ProductDescription(BaseModel): + name: Optional[str] = None + features: Optional[str] = None + price: Optional[float] = None + +class ViralTweet(BaseModel): + tweet: Optional[str] = Field(None, description="Engaging tweet under 280 characters") + hashtags: Optional[list[str]] = Field(None, description="3-5 relevant hashtags") + hook: Optional[str] = Field(None, description="Attention-grabbing opening line") + +@transducible() +async def generate_viral_tweet(product: ProductDescription) -> ViralTweet: + """Transform boring product descriptions into viral social media content.""" + return Transduce(product) + +# Transform a product into viral content +product = ProductDescription( + name="Agentics Framework", + features="Type-safe AI workflows with LLM-powered transductions", + price=0.0 # Open source! +) + +tweet = await generate_viral_tweet(product) +print(f"πŸ”₯ {tweet.tweet}") +print(f"πŸ“± {' '.join(tweet.hashtags)}") ``` -**Checking LiteLLM Status** - -After configuration, you can check if your LiteLLM setup is working: - -```bash -show-llms +**Output:** ``` - -This will display a table showing the authentication status of all configured LLMs, including LiteLLM. - - -## Test Installation - -test hello world example (need to set up llm credentials first) - -```bash -uv run examples/hello_world.py -uv run examples/self_transduction.py -uv run examples/agentics_web_search_report.py - +πŸ”₯ Stop wrestling with unstructured LLM outputs! 🎯 Agentics gives you type-safe AI workflows that just work. Build production-ready agents in minutes, not weeks. And it's FREE! πŸš€ +πŸ“± #AI #OpenSource #Python #LLM #DevTools ``` +### Alternative: Using `<<` Operator -## Hello World +For quick one-off transductions, use `<<` operator: ```python -from typing import Optional -from pydantic import BaseModel, Field - -from agentics.core.transducible_functions import Transduce, transducible +from pydantic import BaseModel +from typing import Optional -class Movie(BaseModel): - movie_name: Optional[str] = None +class Product(BaseModel): + name: Optional[str] = None description: Optional[str] = None - year: Optional[int] = None +class Tweet(BaseModel): + content: Optional[str] = None -class Genre(BaseModel): - genre: Optional[str] = Field(None, description="e.g., comedy, drama, action") +# Create transduction on the fly +make_tweet = Tweet << Product -movie = Movie(movie_name="The Godfather") - -genre = await (Genre << Movie)(movie) +product = Product( + name="Agentics", + description="Type-safe AI framework for Python" +) +tweet = await make_tweet(product) +print(tweet.content) ``` -### Installation details - -=== "Poetry" - - Install poetry (skip if available) - - ```bash - curl -sSL https://install.python-poetry.org | python3 - - ``` - - Clone and install agentics - - ```bash - - poetry install - source $(poetry env info --path)/bin/activate - ``` - -=== "Python" - - > Ensure you have Python 3.11+ 🚨. - > - > ```shell - > python --version - > ``` - - * Create a virtual environment with Python's built in `venv` module. In linux, this - package may be required to be installed with the Operating System package manager. - ```shell - python -m venv .venv - ``` - - * Activate the virtual environment +This concise syntax is perfect for exploratory work and rapid prototyping! - ### Bash/Zsh +### Batch Processing: Multiple Products - `source .venv/bin/activate` +Transducible functions automatically support batch processing. Process multiple products at once in parallel: - ### Fish - - `source .venv/bin/activate.fish` - - ### VSCode - - Press `F1` key and start typing `> Select python` and select `Select Python Interpreter` - - * Install the package - ```bash - python -m pip install ./agentics - ``` - - -=== "uv" - - * Ensure `uv` is installed. - ```bash - command -v uv >/dev/null && curl -LsSf https://astral.sh/uv/install.sh | sh - # It's recommended to restart the shell afterwards - exec $SHELL - ``` - * `uv venv --python 3.11` - * `uv pip install ./agentics` or `uv add ./agentics` (recommended) - - -=== "uvx πŸƒπŸ½" - - > This is a way to run agentics temporarily or quick tests +```python - * Ensure `uv` is installed. - ```bash - command -v uv >/dev/null && curl -LsSf https://astral.sh/uv/install.sh | sh - # It's recommended to restart the shell afterwards - exec $SHELL - ``` - * uvx --verbose --from ./agentics ipython +products = [ + ProductDescription( + name="Agentics Framework", + features="Type-safe AI workflows with LLM-powered transductions", + price=0.0 + ), + ProductDescription( + name="Smart Coffee Maker", + features="AI-powered brewing with perfect temperature control", + price=299.99 + ), + ProductDescription( + name="Wireless Earbuds Pro", + features="Active noise cancellation and 30-hour battery life", + price=149.99 + ), +] + +# Automatically processes all products in parallel +tweets = await generate_viral_tweet(products) + +# Display results +for product, tweet in zip(products, tweets): + print(f"\nπŸ“¦ Product: {product.name}") + print(f"πŸ”₯ Tweet: {tweet.tweet}") + print(f"πŸ“± Tags: {' '.join(tweet.hashtags)}") +``` +**Output:** +``` +πŸ“¦ Product: Agentics Framework +πŸ”₯ Tweet: Stop wrestling with unstructured LLM outputs! 🎯 Agentics gives you type-safe AI workflows that just work. Build production-ready agents in minutes, not weeks. And it's FREE! πŸš€ +πŸ“± Tags: #AI #OpenSource #Python #LLM #DevTools -=== "Conda" +πŸ“¦ Product: Smart Coffee Maker +πŸ”₯ Tweet: Wake up to perfection! β˜• Our AI-powered coffee maker learns your taste and brews the perfect cup every time. Never settle for mediocre coffee again! πŸ€– +πŸ“± Tags: #SmartHome #Coffee #AI #Tech #MorningRoutine - 1. Create a conda environment: - ```bash - conda create -n agentics python=3.11 - ``` - In this example the name of the environment is `agetnics` but you can change - it to your personal preference. +πŸ“¦ Product: Wireless Earbuds Pro +πŸ”₯ Tweet: Silence the world, amplify your music! 🎧 30 hours of pure audio bliss with active noise cancellation. Your commute just got an upgrade! πŸ”‹ +πŸ“± Tags: #Audio #Tech #Wireless #Music #Productivity +``` +The same transducible function works seamlessly for both single items and batchesβ€”no code changes needed! - 2. Activate the environment - ```bash - conda activate agentics - ``` - 3. Install `agentics` from a folder or git reference - ```bash - pip install ./agentics - ``` -## Documentation +## Next +- πŸ‘‰ [Core Concepts](core_concepts.md) - Understanding the theoretical foundation -This documentation page is written using Mkdocs. -You can start the server to visualize this interactively. -```bash -mkdocs serve -``` -After started, documentation will be available here [http://127.0.0.1:8000/](http://127.0.0.1:8000/) +## Go to Index +- πŸ‘‰ [Index](index.md) diff --git a/docs/index.md b/docs/index.md index 7286b40e9..54568920a 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,144 +1,106 @@ # 🌐 Agentics -Agentics is a lightweight, Python-native framework for building **structured and massively parallel agentic workflows** using Pydantic models and **transducible functions** . +Agentics is a lightweight, Python-native framework for building **structured and massively parallel agentic workflows** using Pydantic models and **transducible functions**. --- ## πŸ“š Documentation Overview -- **[Getting Started](getting_started.md)** πŸš€ - Install Agentics, set up your environment, and run your first transducible function over a small dataset. - -- **[Core Concepts](core_concepts.md)** 🧠 - The mental model: Pydantic types, transducible functions, typed state containers, Logical Transduction Algebra (LTA), and Map–Reduce. - - -- **[Transducible Functions](transducible_functions.md)** βš™οΈ - How to define, configure, and invoke transducible functions; specifying instructions; controlling temperature, retries, and structured decoding. - -- **[Agentics](agentics.md)** 🧬 - Defining Pydantic models for inputs/outputs, working with `AG` containers, loading data from JSON/CSV/DataFrames, and preserving type information across the pipeline. - -- **[Logical Transduction Algebra](tutorials/logical_transduction_algebra.ipynb)** πŸ” - Chaining transducible functions, branching, fan-in/fan-out patterns, and building reusable pipeline components. - -- **[Async Map–Reduce Execution](tutorials/map_reduce.ipynb)** πŸš€ - Using `amap` and `areduce` for large-scale runs, batching strategies, handling failures, and performance considerations. - - -- **[Examples & Use Cases](./examples)** πŸ“˜ - End-to-end examples: text-to-SQL, data extraction and enrichment, classification, document workflows, evaluation pipelines, and more. - - - ---- - -## Transducible Functions - -A **transducible function** is an LLM-powered, type-safe transformation between Pydantic models. Agentics lets you: - -- Define these transformations **declaratively** -- Compose them into **pipelines** -- Execute them at scale using an asynchronous **Map–Reduce** execution engine βš™οΈ - -Under the hood, Agentics is grounded in **Logical Transduction Algebra (LTA)**, a logico-mathematical formalism that guarantees: - -- βœ… Composability -- βœ… Explainability -- βœ… Stability of LLM-based transformations - -The result is a way to build agentic systems that are: - -- **Typed** – every step has explicit input/output schemas πŸ“ -- **Composable** – pipelines are built from reusable transducible functions 🧩 -- **Traceable** – outputs carry evidence back to input fields πŸ” -- **Scalable** – async `amap` / `areduce` primitives support large workloads πŸš€ -- **Minimal** – no heavy orchestrators: just types, functions, and data πŸͺΆ - -Agentics code is **simple, predictable, and robust**, and is easy to embed into modern ecosystems (LangFlow, LangChain, CrewAI, MCP, etc.) 🀝. +### Core Documentation +- **[Getting Started](getting_started.md)** πŸš€ + Install Agentics, set up your environment, and run your first transducible function over a small dataset. +- **[Core Concepts](core_concepts.md)** 🧠 + Pydantic types, transducible functions, typed state containers, Logical Transduction Algebra (LTA), and Map–Reduce. -## πŸ”‘ Key Features +- **[Transducible Functions](transducible_functions.md)** βš™οΈ + How to define, configure, and execute transducible functions. + Understanding dynamic generation and composition of transducible functions, batch processing, and provenance of generation. -### βš™οΈ Transducible Functions (Core Abstraction) +- **[Map-Reduce Operations](map_reduce.md)** πŸ” + Scaling transducible functions with map and reduce operations, batch processing patterns, and best practices. -Define LLM-powered transformations as first-class functions: +- **[Semantic Operators](semantic_operators.md)** πŸ” + High-level declarative API for data transformations using natural language. Includes `sem_map`, `sem_filter`, `sem_agg`, and more LOTUS-style operations. -- 🧾 Typed input and output via Pydantic models -- πŸ›‘οΈ Automatic schema validation and type-constrained generation -- πŸͺœ Composable into higher-level workflows and chains +- **[Agentics (AG)](agentics.md)** 🧬 + Working with `AG` typed state containers, loading data from JSON/CSV/DataFrames, and preserving type information across the pipeline. ---- +### Advanced Topics -### 🧱 Typed State Containers - a.k.a. Agentics (AG) +- **[Performance Optimization](optimization.md)** ⚑ + Batch size tuning, persisting intermediate results, performance optimization strategies, performance benchmarking, error handling, and best practices. -Wrap data into typed state collections so that every row or document carries a concrete Pydantic type: +- **[Tool Integration](tool_integration.md)** πŸ”Œ + Using MCP tools, tool usage patterns, custom tools, and best practices. -- Safe, batch-level operations βœ… -- Clear semantics over datasets and intermediate states πŸ“Š -- Input/output from DBs, CSV and Json -- Ideal to represent tabular/structured data +### Tutorials & Examples ---- +- **[Logical Transduction Algebra](../tutorials/logical_transduction_algebra.ipynb)** πŸ” + Interactive tutorial: Chaining transducible functions, branching, fan-in/fan-out patterns, and building reusable pipeline components. -### πŸš€ Async Map–Reduce Execution +- **[Map-Reduce Tutorial](../tutorials/map_reduce.ipynb)** πŸš€ + Interactive tutorial: Using `amap` and `areduce` for large-scale runs, batching strategies, handling failures, and performance considerations. -Run transducible functions over large collections using: +- **[Examples & Use Cases](../examples)** πŸ“˜ + End-to-end examples: text-to-SQL, data extraction and enrichment, classification, document workflows, evaluation pipelines, and more. -- ⚑ `amap` for massively parallel application -- πŸ“‰ `areduce` for aggregations and global summaries -Designed to scale on multi-core or distributed execution backends πŸ–₯️πŸ–₯️πŸ–₯️. +## How to Cite Agentics +- **[References](references.md)** πŸ“š + Academic papers and research that form the foundation of Agentics, including transduction algebra, agentic AI, and applications. --- -### 🧩 Dynamic Type & Function Composition - -Create new workflows on the fly: - -- πŸ”„ Merge or refine types dynamically -- 🧬 Compose transducible functions declaratively -- πŸ”€ Build polymorphic or adaptive pipelines driven by data and instructions +## πŸ“– Glossary ---- +**AG (Agentics)** +Short for "Agentics". A typed state container that wraps a list of Pydantic objects, enabling structured transductions. Used as `AG[Type]` or simply `AG(atype=Type)`. The recommended way to work with collections of typed data. -### πŸ” Explainable & Traceable Inference +**Agentics** +The full name of the framework and the class name for typed state containers. In code, typically imported and used as `AG` for brevity. -Each generated attribute can be traced back to: +**Transducible Function** +A typed, explainable function that maps inputs of type `Source` to outputs of type `Target`. Defined using the `@transducible()` decorator or dynamically with the `<<` operator. Guarantees totality, local evidence, and slot-level provenance. -- Specific input fields 🧷 -- The specific transducible function or step that produced it 🧠 +**Transduction** +The process of transforming data from one typed structure to another using LLM-powered reasoning. Unlike simple mapping, transduction preserves semantic relationships and provides explainability. -This enables **auditable, debuggable** LLM reasoning across the pipeline. +**Logical Transduction Algebra (LTA)** +The formal mathematical framework underlying Agentics. Treats transductions as morphisms between types, enabling composition, explainability, and stability guarantees. ---- +**`<<` Operator (Left Shift)** +The transduction operator. `Target << Source` creates a transducible function that maps `Source` to `Target`. Can be used with types, instances, or existing functions for composition. -### πŸ›‘οΈ End-to-End Type Safety +**`With()` Function** +A helper that wraps a source type with configuration parameters. Used as `Target << With(Source, instructions="...", tools=[...])` to create configured transducible functions dynamically. -Pydantic models are enforced at every boundary: +**TransductionResult** +A wrapper object returned when `provide_explanation=True`. Supports automatic unpacking into `(value, explanation)` tuples or single value assignment. -- βœ… Validation on input loading -- βœ… Validation after each transducible function -- βœ… Predictable runtime behavior and clear failure modes +**AType** +Short for "Agentics Type". The Pydantic model class that defines the schema for all instances in an AG container. Accessed via `ag.atype`. ---- +**Map-Reduce** +The execution pattern for scaling transductions. `amap` applies a function to each element in parallel; `areduce` aggregates results into a summary. -### πŸ”Œ Tool Integration +**MCP (Model Context Protocol)** +A standard protocol for exposing tools (web search, databases, APIs) to LLMs. Agentics supports MCP tools via the `tools` parameter. -Agentics is fully compatible with Model Context Protocol (MCP) and expose external tools and knowledge to transducible functions: +**Evidence** +The subset of input fields that contributed to generating a specific output field. Tracked automatically to enable explainability and provenance. -- 🌐 Web / search tools -- πŸ—„οΈ Databases & vector stores -- πŸ’» Code execution backends -- πŸ”— MCP-based tools +**Slot** +A field in a Pydantic model. "Slot-level provenance" means tracking which input slots contributed to each output slot. --- -### ✨ Minimalistic, Pythonic API - -The framework is intentionally small: +## Documentation -- 🚫 No custom DSL to learn -- 🐍 Just Python functions, Pydantic models, and a few core primitives -- πŸŒ‰ Easy to embed into existing stacks (LangFlow nodes, CrewAI agents, MCPs, etc.) +This documentation page is written using Mkdocs. +You can start the server to visualize this interactively. +```bash +mkdocs serve +``` +After started, documentation will be available here [http://127.0.0.1:8000/](http://127.0.0.1:8000/) diff --git a/docs/map_reduce.md b/docs/map_reduce.md new file mode 100644 index 000000000..ddb9ff0be --- /dev/null +++ b/docs/map_reduce.md @@ -0,0 +1,381 @@ +# πŸ” Map-Reduce Operations + +Map-Reduce is the execution pattern for scaling transducible functions to large datasets. Agentics provides built-in support for both **map** (parallel transformation) and **reduce** (aggregation) operations over typed collections. + +--- + +## Overview + +When you define a transducible function, it automatically supports both single-item and batch processing: + +```python +from pydantic import BaseModel + +class UserMessage(BaseModel): + content: str + +class Email(BaseModel): + to: str + subject: str + body: str + +@transducible() +async def write_email(message: UserMessage) -> Email: + """Convert a message into a professional email.""" + return Transduce(message) + +# Single item +email = await write_email(UserMessage(content="Hi John, great progress!")) + +# Batch processing (automatic map) +messages = [ + UserMessage(content="Hi John, I made great progress with Agentics."), + UserMessage(content="Hi, I fixed the last blocking bug in the pipeline."), +] +emails = await write_email(messages) # Returns list[Email] +``` + +--- + +## The Map Operation + +The **map** operation applies a transducible function to each element independently, enabling concurrency and parallelism. + +### How Map Works + +```python +# Conceptually: +# amap(write_email, messages) -> list[Email] + +# Each element is processed independently +# Results maintain the same order as inputs +``` + +### Map Characteristics + +| Aspect | Description | +|--------|-------------| +| **Input** | Single item or list of items | +| **Output** | List of transformed items (one per input) | +| **Operation** | Independent transformation of each element | +| **Parallelization** | Fully parallel - elements processed concurrently | +| **Use Cases** | Enrichment, extraction, classification, normalization | + +### Map Examples + +**Example 1: Data Enrichment** + +```python +class Product(BaseModel): + name: str + category: str + +class EnrichedProduct(BaseModel): + name: str + category: str + description: str + keywords: list[str] + +@transducible() +async def enrich_product(product: Product) -> EnrichedProduct: + """Add description and keywords to product.""" + return Transduce(product) + +# Process entire catalog +products = load_products() # list[Product] +enriched = await enrich_product(products) # Parallel processing +``` + +**Example 2: Text Classification** + +```python +class Document(BaseModel): + text: str + +class ClassifiedDocument(BaseModel): + text: str + category: str + confidence: float + tags: list[str] + +@transducible(batch_size=20) +async def classify_document(doc: Document) -> ClassifiedDocument: + """Classify document into categories.""" + return Transduce(doc) + +documents = load_documents(1000) +classified = await classify_document(documents) # Processes in batches of 20 +``` + +--- + +## The Reduce Operation + +The **reduce** operation aggregates a collection of items into a single summary or consolidated result. + +### Using `transduction_type="areduce"` + +Specify the transduction type to create a reduce operation: + +```python +from typing import List + +class Review(BaseModel): + text: str + rating: int + +class ReviewSummary(BaseModel): + overall_sentiment: str + average_rating: float + key_themes: List[str] + total_reviews: int + +@transducible(transduction_type="areduce") +async def summarize_reviews(reviews: List[Review]) -> ReviewSummary: + """Aggregate multiple reviews into a single summary.""" + return Transduce(reviews) + +# Use it +reviews = [ + Review(text="Great product!", rating=5), + Review(text="Good value for money", rating=4), + Review(text="Not bad, could be better", rating=3), +] + +summary = await summarize_reviews(reviews) +print(f"Overall: {summary.overall_sentiment}") +print(f"Average: {summary.average_rating}") +``` + +### Reduce Characteristics + +| Aspect | Description | +|--------|-------------| +| **Input** | List of items | +| **Output** | Single aggregated result | +| **Operation** | Aggregation across all elements | +| **Parallelization** | Sequential or hierarchical | +| **Use Cases** | Summarization, statistics, consolidation, consensus | + +### Common Reduce Patterns + +**Pattern 1: Summarization** + +```python +class Document(BaseModel): + title: str + content: str + +class ExecutiveSummary(BaseModel): + main_points: List[str] + conclusion: str + word_count: int + +@transducible(transduction_type="areduce") +async def create_executive_summary(docs: List[Document]) -> ExecutiveSummary: + """Summarize multiple documents into key insights.""" + return Transduce(docs) +``` + +**Pattern 2: Statistical Aggregation** + +```python +class DataPoint(BaseModel): + value: float + category: str + timestamp: str + +class Statistics(BaseModel): + mean: float + median: float + categories: List[str] + trend: str # "increasing", "decreasing", "stable" + +@transducible(transduction_type="areduce") +async def analyze_data(points: List[DataPoint]) -> Statistics: + """Compute statistics and identify trends.""" + return Transduce(points) +``` + +**Pattern 3: Consensus Building** + +```python +class Opinion(BaseModel): + author: str + stance: str + reasoning: str + +class Consensus(BaseModel): + majority_view: str + key_arguments: List[str] + dissenting_views: List[str] + confidence: float + +@transducible(transduction_type="areduce") +async def build_consensus(opinions: List[Opinion]) -> Consensus: + """Find consensus across multiple opinions.""" + return Transduce(opinions) +``` + +--- + +## Dynamic Map-Reduce with `<<` Operator + +Create map and reduce operations on the fly: + +### Dynamic Map + +```python +# Create a map function dynamically +enrich = EnrichedProduct << Product + +products = [Product(name="Widget", category="Tools"), ...] +enriched = await enrich(products) # Automatic map +``` + +### Dynamic Reduce + +```python +from agentics import With + +# Create a reduce function on the fly +summarize = ReviewSummary << With( + List[Review], + transduction_type="areduce", + instructions="Analyze all reviews and provide comprehensive summary" +) + +summary = await summarize(reviews) +``` + +--- + +## Combining Map and Reduce + +Build complete Map-Reduce pipelines by chaining operations: + +```python +# Step 1: Map - Extract insights from each document +class Document(BaseModel): + text: str + +class Insight(BaseModel): + key_point: str + importance: int + +@transducible() +async def extract_insight(doc: Document) -> Insight: + """Extract key insight from a document.""" + return Transduce(doc) + +# Step 2: Reduce - Consolidate all insights +class Report(BaseModel): + top_insights: List[str] + overall_theme: str + +@transducible(transduction_type="areduce") +async def consolidate_insights(insights: List[Insight]) -> Report: + """Consolidate insights into a final report.""" + return Transduce(insights) + +# Execute the pipeline +documents = [Document(text="..."), Document(text="..."), ...] +insights = await extract_insight(documents) # Map phase +report = await consolidate_insights(insights) # Reduce phase +``` + +### Multi-Stage Pipeline Example + +```python +# Stage 1: Map - Clean and normalize +@transducible() +async def clean_data(raw: RawData) -> CleanData: + return Transduce(raw) + +# Stage 2: Map - Extract features +@transducible() +async def extract_features(clean: CleanData) -> Features: + return Transduce(clean) + +# Stage 3: Reduce - Aggregate statistics +@transducible(transduction_type="areduce") +async def compute_stats(features: List[Features]) -> Statistics: + return Transduce(features) + +# Execute pipeline +raw_data = load_raw_data() +clean = await clean_data(raw_data) +features = await extract_features(clean) +stats = await compute_stats(features) +``` + +--- + + +## Best Practices + +### For Map Operations + +1. **Use appropriate batch sizes** - Balance throughput and memory (see [Optimization](optimization.md)) +2. **Handle failures gracefully** - Individual items can fail without stopping the batch +3. **Monitor progress** - Use `verbose_transduction=True` for long-running operations +4. **Consider rate limits** - Adjust batch size for API rate limits + +### For Reduce Operations + +1. **Keep reduce operations focused** - Each reduce should have a clear aggregation goal +2. **Handle empty lists** - Consider what happens when the input list is empty +3. **Use hierarchical reduction** - For very large collections, reduce in stages +4. **Provide clear instructions** - Help the LLM understand the aggregation logic +5. **Consider token limits** - Large collections may exceed context windows +6. **Test with representative data** - Ensure reduce logic works across different input sizes + +### General Best Practices + +```python +# Good: Clear separation of concerns +@transducible() +async def extract(item: Raw) -> Processed: + """Map: Extract and normalize.""" + return Transduce(item) + +@transducible(transduction_type="areduce") +async def summarize(items: List[Processed]) -> Summary: + """Reduce: Aggregate results.""" + return Transduce(items) + +# Execute +processed = await extract(raw_items) +summary = await summarize(processed) +``` + +--- + +## Performance Considerations + +### Batch Size Tuning + +```python +# Small batches for complex operations +@transducible(batch_size=5) +async def complex_analysis(item: Data) -> Analysis: + return Transduce(item) + +# Large batches for simple operations +@transducible(batch_size=30) +async def simple_extraction(item: Data) -> Extract: + return Transduce(item) +``` + +### Parallel Execution + +Map operations are automatically parallelized based on `batch_size`. For more control, see [Optimization](optimization.md). + +--- + +## Next +- πŸ‘‰ [Map-Reduce Tutorial](../tutorials/map_reduce.ipynb) to see how large-scale execution works in practice +- πŸ‘‰ [Semantic Operators](semantic_operators.md) for performing data transformation tasks using natural language. + +## Go to Index +- πŸ‘‰ [Index](index.md) diff --git a/docs/optimization.md b/docs/optimization.md new file mode 100644 index 000000000..bd2e085a9 --- /dev/null +++ b/docs/optimization.md @@ -0,0 +1,324 @@ +# ⚑ Performance Optimization + +Efficient batch processing and performance optimization are crucial for large-scale transductions. This guide covers strategies to maximize throughput, manage resources, and handle large datasets effectively. + +--- + +## Understanding Batch Size + +The `batch_size` parameter controls how many items are processed concurrently. Choosing the right batch size is critical for balancing throughput, memory usage, and reliability. + +```python +# Small batches - lower memory, more overhead +@transducible(batch_size=5) +async def conservative_process(state: Item) -> Result: + return Transduce(state) + +# Large batches - higher throughput, more memory +@transducible(batch_size=25) +async def aggressive_process(state: Item) -> Result: + return Transduce(state) +``` + +### Choosing the Right Batch Size + +| Scenario | Recommended Batch Size | Reason | +|----------|----------------------|---------| +| Simple transformations (< 1s each) | 20-30 | Maximize throughput | +| Complex reasoning (> 5s each) | 5-10 | Avoid timeout issues | +| Large input/output objects | 10-15 | Manage memory usage | +| Rate-limited APIs | 5-15 | Stay within limits | +| Local LLM (Ollama) | 1-5 | Limited by GPU memory | + +--- + +## Persisting Intermediate Results + +Use `persist_output` to save results incrementally, enabling recovery from failures: + +```python +@transducible( + batch_size=20, + persist_output="./output/processed_batches" +) +async def process_large_dataset(state: DataItem) -> ProcessedItem: + """Results saved after each batch completes.""" + return Transduce(state) + +# Process 10,000 items +large_dataset = load_items(10000) +results = await process_large_dataset(large_dataset) + +# If interrupted, previously completed batches are saved +# Resume by loading saved batches and processing remaining items +``` + +### File Structure + +``` +output/processed_batches/ +β”œβ”€β”€ batch_0000.jsonl # First 20 items +β”œβ”€β”€ batch_0001.jsonl # Next 20 items +β”œβ”€β”€ batch_0002.jsonl # And so on... +└── ... +``` + +--- + +## Monitoring Progress + +Enable verbose logging to track batch processing: + +```python +@transducible( + batch_size=25, + verbose_transduction=True, # Show progress + verbose_agent=False # Hide detailed agent logs +) +async def monitored_process(state: Item) -> Result: + return Transduce(state) + +# Output shows: +# Processing batch 1/40 (25 items)... +# Processing batch 2/40 (25 items)... +# ... +``` + +--- + +## Performance Optimization Strategies + +### 1. Adaptive Batch Sizing + +Tune batch size based on item complexity: + +```python +# Adaptive batching based on input size +def get_batch_size(items): + avg_size = sum(len(str(item)) for item in items) / len(items) + if avg_size < 500: + return 25 # Small items + elif avg_size < 2000: + return 15 # Medium items + else: + return 5 # Large items + +batch_size = get_batch_size(dataset) +process_fn = Result << With(Item, batch_size=batch_size) +``` + +### 2. Field-Specific Transduction + +Only transduce the fields you need: + +```python +@transducible( + transduce_fields=["summary", "category"], # Only these fields + batch_size=30 +) +async def focused_transform(state: FullData) -> PartialResult: + """Faster by ignoring unnecessary fields.""" + return Transduce(state) +``` + +### 3. Reduce Token Usage with Prompt Templates + +```python +# Custom template to reduce token count +compact_template = """ +Input: {input_data} +Task: {instructions} +Output format: {output_schema} +""" + +@transducible( + prompt_template=compact_template, + batch_size=40 +) +async def efficient_transform(state: Item) -> Result: + return Transduce(state) +``` + +### 4. Parallel Processing with Multiple Workers + +For extremely large datasets, consider splitting work across multiple processes: + +```python +import asyncio +from concurrent.futures import ProcessPoolExecutor + +async def process_chunk(chunk, process_fn): + """Process a chunk of data.""" + return await process_fn(chunk) + +async def parallel_process(dataset, process_fn, num_workers=4): + """Split dataset across multiple workers.""" + chunk_size = len(dataset) // num_workers + chunks = [dataset[i:i+chunk_size] for i in range(0, len(dataset), chunk_size)] + + tasks = [process_chunk(chunk, process_fn) for chunk in chunks] + results = await asyncio.gather(*tasks) + + # Flatten results + return [item for chunk_result in results for item in chunk_result] +``` + +--- + +## Performance Benchmarking + +Measure throughput for your specific use case: + +```python +import time + +async def benchmark_transduction(): + test_items = generate_test_data(100) + + start = time.time() + results = await process_fn(test_items) + elapsed = time.time() - start + + print(f"Processed {len(results)} items in {elapsed:.2f}s") + print(f"Throughput: {len(results)/elapsed:.2f} items/sec") + print(f"Average time per item: {elapsed/len(results):.2f}s") + +await benchmark_transduction() +``` + +### Profiling Memory Usage + +```python +import tracemalloc + +async def profile_memory(): + tracemalloc.start() + + # Your transduction + results = await process_fn(large_dataset) + + current, peak = tracemalloc.get_traced_memory() + print(f"Current memory: {current / 1024 / 1024:.2f} MB") + print(f"Peak memory: {peak / 1024 / 1024:.2f} MB") + + tracemalloc.stop() +``` + +--- + +## Error Handling & Retries + +### Automatic Retries + +Configure retry behavior for transient failures: + +```python +@transducible( + max_retries=3, # Retry up to 3 times + retry_delay=2.0, # Wait 2 seconds between retries + batch_size=20 +) +async def resilient_process(state: Item) -> Result: + return Transduce(state) +``` + +### Graceful Degradation + +Use optional fields to handle partial failures: + +```python +class RobustResult(BaseModel): + required_field: str + optional_field: Optional[str] = None # May be None if extraction fails + confidence: Optional[float] = None + +@transducible( + batch_size=25, + allow_partial=True # Continue even if some fields fail +) +async def robust_transform(state: Item) -> RobustResult: + return Transduce(state) +``` + +### Batch-Level Error Handling + +```python +async def process_with_error_handling(items): + results = [] + failed = [] + + for batch in chunk_items(items, batch_size=20): + try: + batch_results = await process_fn(batch) + results.extend(batch_results) + except Exception as e: + print(f"Batch failed: {e}") + failed.extend(batch) + + # Retry failed items with smaller batch size + if failed: + print(f"Retrying {len(failed)} failed items...") + retry_fn = Result << With(Item, batch_size=5) + retry_results = await retry_fn(failed) + results.extend(retry_results) + + return results +``` + +--- + +## Best Practices + +1. **Start with conservative batch sizes** - Increase gradually based on benchmarks +2. **Monitor memory usage** - Especially with large input/output objects +3. **Use persist_output for long-running jobs** - Protect against interruptions +4. **Profile before optimizing** - Measure to identify actual bottlenecks +5. **Consider API rate limits** - Adjust batch size and concurrency accordingly +6. **Test with representative data** - Performance varies with input complexity +7. **Use field-specific transduction** - Only process what you need +8. **Enable progress monitoring** - Track long-running operations + +--- + +## Common Performance Issues + +### Issue: High Memory Usage + +**Symptoms:** Process crashes or slows down with large datasets + +**Solutions:** +- Reduce batch size +- Use field-specific transduction +- Process in chunks with persistence +- Stream results instead of loading all at once + +### Issue: Slow Throughput + +**Symptoms:** Processing takes much longer than expected + +**Solutions:** +- Increase batch size (if memory allows) +- Reduce prompt complexity +- Use faster LLM models +- Optimize prompt templates +- Consider parallel processing + +### Issue: Frequent Timeouts + +**Symptoms:** Many requests timeout or fail + +**Solutions:** +- Reduce batch size +- Increase timeout value +- Simplify the transduction task +- Use faster models +- Check network connectivity + +--- + +## See Also + +- πŸ‘‰ [Transducible Functions](transducible_functions.md) - Core concepts and basic usage +- πŸ‘‰ [Tool Integration](tool_integration.md) - Using external tools +- πŸ‘‰ [Map-Reduce Tutorial](../tutorials/map_reduce.ipynb) - Large-scale execution patterns +- πŸ‘‰ [Index](index.md) diff --git a/docs/references.md b/docs/references.md new file mode 100644 index 000000000..be79a0237 --- /dev/null +++ b/docs/references.md @@ -0,0 +1,45 @@ +# References + +This page contains academic papers and research that form the foundation of Agentics. +If you use Agentics in your research or project, please cite the relevant papers listed below. Each entry includes BibTeX citations for easy integration into your bibliography. + +> **Note:** This list is actively maintained and updated as new papers are published. + +--- + +1. **Transduction is All You Need for Structured Data Workflows** (2025) + - Authors: Alfio Gliozzo, Naweed Khan, Christodoulos Constantinides, Nandana Mihindukulasooriya, Nahuel Defosse, Gaetano Rossiello, Junkyu Lee + - URL: https://arxiv.org/abs/2508.15610 + ```bibtex + @article{gliozzo2025transduction, + title={Transduction is All You Need for Structured Data Workflows}, + author={Gliozzo, Alfio and Khan, Naweed and Constantinides, Christodoulos and Mihindukulasooriya, Nandana and Defosse, Nahuel and Rossiello, Gaetano and Lee, Junkyu}, + journal={arXiv preprint arXiv:2508.15610}, + year={2025} + } + ``` + +2. **Semantic Trading: Agentic AI for Clustering and Relationship Discovery in Prediction Markets** (2025) + - Authors: Agostino Capponi, Alfio Gliozzo, Brian Zhu + - URL: https://arxiv.org/abs/2512.02436 + ```bibtex + @article{capponi2025semantic, + title={Semantic Trading: Agentic AI for Clustering and Relationship Discovery in Prediction Markets}, + author={Capponi, Agostino and Gliozzo, Alfio and Zhu, Brian}, + journal={arXiv preprint arXiv:2512.02436}, + year={2025} + } + ``` + +3. **DAO-AI: Evaluating Collective Decision-Making through Agentic AI in Decentralized Governance** (2026) + - Authors: Chunghyun Han, Alfio Gliozzo, Junkyu Lee, Agostino Capponi + - URL: https://arxiv.org/abs/2510.21117 + ```bibtex + @inproceedings{han2026daoai, + title={{DAO}-{AI}: Evaluating Collective Decision-Making through Agentic {AI} in Decentralized Governance}, + author={Chunghyun Han and Alfio Gliozzo and Junkyu Lee and Agostino Capponi}, + booktitle={AAAI'26 Workshop on Agentic AI in Financial Services}, + year={2026}, + url={https://arxiv.org/abs/2510.21117} + } + ``` diff --git a/docs/semantic_operators.md b/docs/semantic_operators.md new file mode 100644 index 000000000..35cf18ab7 --- /dev/null +++ b/docs/semantic_operators.md @@ -0,0 +1,346 @@ +# πŸ” Semantic Operators + +Semantic operators provide a high-level, declarative API for performing common data transformation tasks using natural language. Inspired by [LOTUS](https://lotus-data.github.io/)-style semantic operations, these operators enable you to work with structured and unstructured data using LLM-powered transformations. + +--- + +## Overview + +Agentics semantic operators bridge the gap between traditional data manipulation (like pandas operations) and LLM-powered semantic understanding. Each operator accepts either an `AG` (Agentics) or a pandas `DataFrame` as input and returns the same type, making them easy to integrate into existing data pipelines. + +### Available Operators + +| Operator | Description | +|----------|-------------| +| `sem_map` | Map each record using a natural language instruction | +| `sem_filter` | Keep records that match a natural language predicate | +| `sem_agg` | Aggregate across all records (e.g., for summarization) | + +--- + +## `sem_map` + +Transform each record in your dataset according to natural language instructions, mapping source data to a target schema. + +### Signature + +```python +async def sem_map( + source: AG | pd.DataFrame, + target_type: Type[BaseModel] | str, + instructions: str, + merge_output: bool = True, + **kwargs, +) -> AG | pd.DataFrame +``` + +### Parameters + +- **`source`** (`AG | pd.DataFrame`): Input data to be mapped +- **`target_type`** (`Type[BaseModel] | str`): Target schema for the output + - If a Pydantic `BaseModel` subclass: used directly as the target type + - If a `str`: a Pydantic model is created dynamically with a single string field +- **`instructions`** (`str`): Natural language description of how to transform the data +- **`merge_output`** (`bool`, default=`True`): + - `True`: Merge mapped fields back into original source records + - `False`: Return only the mapped output +- **`**kwargs`**: Additional arguments forwarded to `AG()` constructor (e.g., model configuration, batching) + +### Returns + +- **`AG | pd.DataFrame`**: `AG` or `DataFrame` that contains the transformed data following `target_type` + +### Example: Basic Mapping + +```python +import pandas as pd +from agentics.core.semantic_operators import sem_map +from pydantic import BaseModel + +# Sample data +df = pd.DataFrame({ + 'review': [ + 'This product is amazing! Best purchase ever.', + 'Terrible quality, broke after one day.', + 'It works okay, nothing special.' + ] +}) + +# Define target schema +class Sentiment(BaseModel): + sentiment: Optional[str] = Field(None, description="The sentiment of the review (e.g., positive, negative, neutral)") + confidence: Optional[float] = Field(None, description="Confidence score of the sentiment analysis btw 0 and 1") + +# Map reviews to sentiment +result = await sem_map( + source=df, + target_type=Sentiment, + instructions="Analyze the sentiment of the review and provide a confidence score between 0 and 1." +) + +# Output includes original 'review' column plus 'sentiment' and 'confidence' columns + review sentiment confidence + 0 This product is amazing! Best purchase ever. positive 0.85 + 1 Terrible quality, broke after one day. negative 0.99 + 2 It works okay, nothing special. neutral 0.85 +``` + +### Example: String-based Target Type + +```python +# Using string target type for simpler cases +result = await sem_map( + source=df, + target_type="category", + instructions="Classify the review into one of: positive, negative, neutral" +) +``` + +--- + +## `sem_filter` + +Filter records based on a natural language predicate, keeping only those that satisfy the condition. + +### Signature + +```python +async def sem_filter( + source: AG | pd.DataFrame, + predicate_template: str, + **kwargs +) -> AG | pd.DataFrame +``` + +### Parameters + +- **`source`** (`AG | pd.DataFrame`): Input data to be filtered +- **`predicate_template`** (`str`): Natural language condition or LangChain-style template + - Can use `{field}` placeholders to reference source fields + - Or provide a plain text predicate +- **`**kwargs`**: Additional arguments forwarded to `AG()` constructor + +### Returns + +- **`AG | pd.DataFrame`**: Filtered data containing only records that satisfy the predicate + +### Example: Simple Predicate + +```python +from agentics.core.semantic_operators import sem_filter + +df = pd.DataFrame({ + 'product': ['Laptop', 'Phone', 'Tablet', 'Monitor'], + 'description': [ + 'High-performance gaming laptop with RGB keyboard', + 'Budget smartphone with basic features', + 'Premium tablet with stylus support', + '4K monitor for professional work' + ] +}) + +# Filter for premium/high-end products +result = await sem_filter( + source=df, + predicate_template="The product is premium or high-end" +) + +print(result) + product description +0 Laptop High-performance gaming laptop with RGB keyboard +1 Tablet Premium tablet with stylus support +2 Monitor 4K monitor for professional work +``` + +### Example: Template-based Filtering + +```python +# Use field placeholders in the predicate +result = await sem_filter( + source=df, + predicate_template="The {product} described as '{description}' is suitable for gaming" +) +``` + +--- + +## `sem_agg` + +Aggregate data across all records to produce a summary or consolidated output. + +### Signature + +```python +async def sem_agg( + source: AG | pd.DataFrame, + target_type: Type[BaseModel] | str, + instructions: str = None, + **kwargs, +) -> AG | pd.DataFrame +``` + +### Parameters + +- **`source`** (`AG | pd.DataFrame`): Input data to be aggregated +- **`target_type`** (`Type[BaseModel] | str`): Schema for the aggregated output +- **`instructions`** (`str`, optional): Natural language description of the aggregation +- **`**kwargs`**: Additional arguments forwarded to `AG()` constructor + +### Returns + +- **`AG | pd.DataFrame`**: Aggregated result (typically a single record or summary) + +### Example: Summarization + +```python +from agentics.core.semantic_operators import sem_agg +from pydantic import BaseModel + +df = pd.DataFrame({ + 'review': [ + 'Great product, very satisfied!', + 'Good quality but expensive', + 'Not worth the price', + 'Excellent, would buy again', + 'Decent but has some issues' + ] +}) + +class ReviewSummary(BaseModel): + overall_sentiment: str + key_themes: list[str] + recommendation: str + +# Aggregate all reviews into a summary +result = await sem_agg( + source=df, + target_type=ReviewSummary, + instructions="Summarize all reviews, identify key themes, and provide an overall recommendation" +) + +print(result) +# Returns a single record with aggregated insights +``` + +### Example: Statistical Summary + +```python +class Statistics(BaseModel): + total_count: int + positive_count: int + negative_count: int + average_sentiment: str + +result = await sem_agg( + source=df, + target_type=Statistics, + instructions="Count total reviews, positive reviews, negative reviews, and determine average sentiment" +) +``` + +--- + +## Best Practices + +### 1. Choose the Right Operator + +- **`sem_map`**: Use for 1:1 transformations (each input β†’ one output) +- **`sem_filter`**: Use for selecting subsets based on conditions +- **`sem_agg`**: Use for many:1 transformations (all inputs β†’ one summary) + +### 2. Write Clear Instructions + +```python +# ❌ Vague +instructions = "Process the data" + +# βœ… Clear and specific +instructions = """ +Extract the product name, price, and category from each description. +Normalize prices to USD. Categorize products as: Electronics, Clothing, or Home Goods. +""" +``` + +### 3. Use Appropriate Target Types + +```python +# For simple extractions, use string types +sem_map( + ... + target_type= "category_name" + ... +) + + +# For structured outputs, use Pydantic models +class Product(BaseModel): + name: str + price: float + category: str + +sem_map( + ... + target_type= Product + ... +) +``` + +### 4. Batch Processing + +```python +# Configure batch size for large datasets +result = await sem_map( + source=large_df, + target_type=MyType, + instructions="...", + amap_batch_size=50 # Process 50 records at a time +) +``` + +### 5. Handle Both AG and DataFrame + +```python +# Operators work with both types +df_result = await sem_filter(df, "condition") # Returns DataFrame +ag_result = await sem_filter(ag, "condition") # Returns AG +``` + +--- + +## Performance Considerations + +### Batching + +Semantic operators support batching for efficient processing of large datasets: + +```python +result = await sem_map( + source=df, + target_type=MyType, + instructions="...", + amap_batch_size=20 # Default for sem_filter +) +``` + + +## Integration with Agentics Workflows + +Semantic operators integrate seamlessly with other Agentics features: + +### Chaining Operations + +```python +# Filter β†’ Map β†’ Aggregate pipeline +filtered = await sem_filter(df, "High-value customers") +mapped = await sem_map(filtered, CustomerProfile, "Extract profile details") +summary = await sem_agg(mapped, Summary, "Summarize customer segments") +``` + +--- + +## Next +- πŸ‘‰ [Semantic Operators Tutorial](../tutorials/semantic_operators.ipynb) - Code examples +- πŸ‘‰ [Agentics (AG)](agentics.md) for data modeling patterns and typed state containers + +## Go to Index +- πŸ‘‰ [Index](index.md) diff --git a/docs/tool_integration.md b/docs/tool_integration.md new file mode 100644 index 000000000..5a33f05ee --- /dev/null +++ b/docs/tool_integration.md @@ -0,0 +1,403 @@ +# πŸ”Œ Tool Integration + +Transducible functions can use external tools to enhance their capabilities through the Model Context Protocol (MCP). This enables LLM-powered workflows to access real-time data, execute code, query databases, and interact with external services. + +--- + +## What Are Tools? + +Tools extend transducible functions with external capabilities: + +- **Web search** - Retrieve real-time information from the internet +- **Database queries** - Access structured data from SQL/NoSQL databases +- **API calls** - Integrate with external services and APIs +- **Code execution** - Run computations and scripts +- **File operations** - Read/write files and process documents +- **Custom functions** - Any Python function you define + +--- + +## Using MCP Tools + +MCP (Model Context Protocol) provides a standard way to expose tools to LLMs, making them discoverable and callable during transductions. + +### Defining an MCP Server + +First, create an MCP server with your tools (see `examples/mcp_server_example.py`): + +```python +from ddgs import DDGS +from mcp.server.fastmcp import FastMCP + +mcp = FastMCP("Search") + +@mcp.tool() +def web_search(query: str, max_results: int = 5) -> list[str]: + """Search the web using DuckDuckGo. + + Args: + query: Search query with optional operators + max_results: Number of results to return (5-20) + + Returns: + List of search result snippets with titles and URLs + """ + results = DDGS().text(query, max_results=max_results) + return [f"{r['title']}\n{r['body']}\n{r['href']}" for r in results] + +if __name__ == "__main__": + mcp.run(transport="stdio") +``` + +### Using Tools in Transductions + +There are several ways to import and use MCP tools in your transductions: + +#### Option 1: Connect to Remote/External MCP Server + +Use `MCPServerAdapter` from `crewai_tools` to connect to a remote or external MCP server (e.g., a server provided by a third party or running on another machine): + +```python +import os +from pydantic import BaseModel +from typing import Optional +from mcp import StdioServerParameters +from crewai_tools import MCPServerAdapter +from agentics.core.transducible_functions import transducible, Transduce + +class ResearchQuery(BaseModel): + topic: Optional[str] = None + focus_area: Optional[str] = None + +class ResearchReport(BaseModel): + summary: Optional[str] = None + key_findings: Optional[list[str]] = None + sources: Optional[list[str]] = None + +# Configure connection to remote MCP server +# The server could be: +# - A third-party MCP server (e.g., from a service provider) +# - An MCP server running on another machine +# - A pre-existing MCP server script +server_params = StdioServerParameters( + command="python3", + args=["path/to/remote/mcp_server.py"], # Path to the MCP server + env={"UV_PYTHON": "3.12", **os.environ}, +) + +# Connect to the remote MCP server +with MCPServerAdapter(server_params) as server_tools: + print(f"Available tools from remote server: {[tool.name for tool in server_tools]}") + + @transducible( + tools=server_tools, # Use tools from remote MCP server + reasoning=True, + max_iter=5 + ) + async def research_topic(state: ResearchQuery) -> ResearchReport: + """Research a topic using tools from remote MCP server.""" + return Transduce(state) + + # Execute + query = ResearchQuery( + topic="Agentics framework", + focus_area="practical applications" + ) + report = await research_topic(query) +``` + +**Example: Connecting to a Third-Party MCP Server** + +```python +# Example: Connect to a hypothetical weather MCP server +weather_server_params = StdioServerParameters( + command="npx", # MCP servers can be in any language + args=["-y", "@weather/mcp-server"], # npm package +) + +with MCPServerAdapter(weather_server_params) as weather_tools: + print(f"Weather tools: {[tool.name for tool in weather_tools]}") + + @transducible(tools=weather_tools, reasoning=True) + async def get_weather_report(location: Location) -> WeatherReport: + return Transduce(location) +``` + +**Note:** The `MCPServerAdapter` handles the connection lifecycle - it starts the server process when entering the context and stops it when exiting. You don't manage the server manually. + +#### Option 2: Use CrewAI Tools + +Agentics supports CrewAI tools, which provide a standardized interface for tool integration: + +```python +from pydantic import BaseModel +from typing import Optional +from agentics.core.transducible_functions import transducible, Transduce +from crewai_tools import tool + +class ResearchQuery(BaseModel): + topic: Optional[str] = None + focus_area: Optional[str] = None + +class ResearchReport(BaseModel): + summary: Optional[str] = None + key_findings: Optional[list[str]] = None + sources: Optional[list[str]] = None + +# Define CrewAI tool +@tool("Web Search Tool") +def web_search(query: str, max_results: int = 5) -> list[str]: + """Search the web using DuckDuckGo. + + Args: + query: Search query + max_results: Number of results to return + + Returns: + List of search results with title, snippet, and URL + """ + from ddgs import DDGS + results = DDGS().text(query, max_results=max_results) + return [f"{r['title']}\n{r['body']}\n{r['href']}" for r in results] + +@transducible( + tools=[web_search], # Pass CrewAI tool + reasoning=True, + max_iter=5 +) +async def research_topic(state: ResearchQuery) -> ResearchReport: + """Research a topic using web search and synthesize findings.""" + return Transduce(state) + +# Use it +query = ResearchQuery( + topic="Agentics framework", + focus_area="practical applications" +) +report = await research_topic(query) +``` + +You can also use pre-built CrewAI tools: + +```python +from crewai_tools import SerperDevTool, WebsiteSearchTool + +# Use existing CrewAI tools +search_tool = SerperDevTool() +website_tool = WebsiteSearchTool() + +@transducible( + tools=[search_tool, website_tool], + reasoning=True, + max_iter=5 +) +async def research_with_multiple_tools(state: ResearchQuery) -> ResearchReport: + """Research using multiple CrewAI tools.""" + return Transduce(state) +``` + +#### Option 3: Launch MCP Server as Subprocess + +You can launch the MCP server as a separate subprocess and connect to it. This approach starts the server automatically when needed: + +```python +from mcp import ClientSession, StdioServerParameters +from mcp.client.stdio import stdio_client + +# Launch MCP server as subprocess +# The server_params specify how to start the server process +server_params = StdioServerParameters( + command="python", # Command to run + args=["mcp_server.py"], # Server script to execute +) + +# stdio_client launches the server and manages the connection +async with stdio_client(server_params) as (read, write): + async with ClientSession(read, write) as session: + # Initialize connection and discover available tools + await session.initialize() + tools = await session.list_tools() + + # Use tools in transduction + @transducible( + tools=tools, + reasoning=True, + max_iter=5 + ) + async def research_topic(state: ResearchQuery) -> ResearchReport: + return Transduce(state) + + # Execute + query = ResearchQuery(topic="Agentics framework") + report = await research_topic(query) + +# Server process is automatically terminated when exiting the context +``` + +**When to use each option:** +- **Option 1** (Remote MCP Server): Best for connecting to third-party or external MCP servers; use when you want to leverage existing MCP services or servers running elsewhere +- **Option 2** (CrewAI tools): Best for leveraging the CrewAI ecosystem and pre-built tools; simplest for getting started with existing tools +- **Option 3** (MCP subprocess with ClientSession): Lower-level approach for advanced use cases; gives more control over server lifecycle and communication; use for custom server management + +--- + +## Tool Usage Patterns + +### Pattern 1: Information Retrieval + +Enrich data with external sources: + +```python +@transducible(tools=[web_search, database_query]) +async def enrich_data(state: BasicInfo) -> EnrichedInfo: + """Enrich basic info with external data sources.""" + return Transduce(state) +``` + +### Pattern 2: Verification + +Verify claims against external sources: + +```python +@transducible(tools=[fact_checker, web_search]) +async def verify_claims(state: Claims) -> VerifiedClaims: + """Verify claims against external sources.""" + return Transduce(state) +``` + +### Pattern 3: Multi-Step Reasoning + +Solve complex problems requiring multiple tools: + +```python +@transducible( + tools=[web_search, calculator, code_executor], + reasoning=True, + max_iter=10, + verbose_agent=True # See tool calls +) +async def solve_complex_problem(state: Problem) -> Solution: + """Solve problems requiring multiple tools and reasoning steps.""" + return Transduce(state) +``` + +--- + +## Creating Custom Tools + +Define your own tools following the MCP pattern: + +```python +from mcp.server.fastmcp import FastMCP + +mcp = FastMCP("CustomTools") + +@mcp.tool() +def calculate_metrics(data: dict) -> dict: + """Calculate statistical metrics from data. + + Args: + data: Dictionary with numeric values + + Returns: + Dictionary with mean, median, std, etc. + """ + import statistics + values = list(data.values()) + return { + "mean": statistics.mean(values), + "median": statistics.median(values), + "stdev": statistics.stdev(values) if len(values) > 1 else 0 + } + +@mcp.tool() +def fetch_database_record(record_id: str) -> dict: + """Fetch a record from the database. + + Args: + record_id: Unique identifier for the record + + Returns: + Record data as dictionary + """ + # Your database logic here + return db.get(record_id) +``` + +--- + +## Tool Configuration + +Control how tools are used in your transductions: + +### Limiting Tool Usage + +```python +# Limit tool usage +@transducible( + tools=[expensive_api_tool], + max_iter=3, # Maximum 3 tool calls + timeout=120 # 2 minute timeout including tool calls +) +async def controlled_tool_usage(state: Input) -> Output: + return Transduce(state) +``` + +### Verbose Tool Logging + +```python +# Verbose tool logging +@transducible( + tools=[web_search], + verbose_agent=True, # Log each tool call + provide_explanation=True # Include tool usage in explanation +) +async def logged_tool_usage(state: Input) -> Output: + return Transduce(state) + +result, explanation = await logged_tool_usage(input_data) +print(f"Tools used: {explanation.tools_called}") +``` + +--- + +## Best Practices + +1. **Provide clear tool descriptions** - Help the LLM understand when to use each tool +2. **Limit tool iterations** - Prevent infinite loops with `max_iter` +3. **Handle tool failures gracefully** - Tools may timeout or return errors +4. **Use reasoning mode** - Enable `reasoning=True` for complex tool orchestration +5. **Monitor tool usage** - Use `verbose_agent=True` during development +6. **Cache tool results** - Avoid redundant API calls for the same queries + +### Example: Robust Tool Usage + +```python +@transducible( + tools=[web_search, database_query], + reasoning=True, + max_iter=5, + timeout=300, + verbose_agent=True, + provide_explanation=True +) +async def robust_research(state: Query) -> Report: + """Research with fallback strategies if tools fail.""" + return Transduce(state) + +try: + report, explanation = await robust_research(query) + print(f"Used tools: {explanation.tools_called}") + print(f"Tool call count: {len(explanation.tool_calls)}") +except TimeoutError: + print("Research timed out - try simpler query or increase timeout") +``` + +--- + +## See Also + +- πŸ‘‰ [Transducible Functions](transducible_functions.md) - Core concepts and basic usage +- πŸ‘‰ [Optimization](optimization.md) - Performance tuning and batch processing +- πŸ‘‰ [Examples](../examples/mcp_server_example.py) - Complete MCP server example +- πŸ‘‰ [Index](index.md) diff --git a/docs/transducible_functions.md b/docs/transducible_functions.md index 08340a8c6..bb48a86ab 100644 --- a/docs/transducible_functions.md +++ b/docs/transducible_functions.md @@ -4,7 +4,7 @@ Transducible functions are the *workhorse* of Agentics. They turn β€œcall this LLM with a prompt” into: > **A typed, explainable transformation** -> `T: X β†’ Y` with guarantees about how each output field was produced. +> `T: X β†’ Y` with explanation about how each output field was produced. This document explains what transducible functions are, how they work in Agentics, and how to use them in practice β€” including **dynamic generation** and **compositional patterns** using the `<<` operator. @@ -12,21 +12,16 @@ This document explains what transducible functions are, how they work in Agentic ## 1. What Is a Transducible Function? -Formally, a **transducible function** \(T : X \to Y\) is an *explainable* function that satisfies: +Formally, a **transducible function** `T: X β†’ Y` is an *explainable* function that satisfies: -1. **Totality** - For every valid input \(x \in \llbracket X \rrbracket\), the function produces a valid output of type \(Y\). - > No silent failures: the function always returns some well-typed `Y`. -2. **Local Evidence** - Each output slot \(y_i\) is computed only from its *evidence subset* \(\mathcal{E}_i(x)\). +1. **Local Evidence** + Each output slot **yα΅’*** is computed only from its *evidence subset* **Eα΅’(x)**. > No field is generated β€œfrom nowhere”: if `subject` appears in the output, we know which inputs and instructions it depended on. -3. **Slot-Level Provenance** - The mapping between input and output slots is explicit: - \[ - \mathcal{T}(y_i) = \mathcal{E}_i - \] +2. **Slot-Level Provenance** + The mapping between input and output slots is explicit: **T(yα΅’) = Eα΅’** + This induces a bipartite graph between **input slots** and **output slots**, which acts as the *explainability trace* of the transduction. Intuitively: @@ -38,7 +33,7 @@ Transducible functions extend normal functions with **structural transparency at --- -## 2. Source and Target Types (X and Y) πŸ“ +## 2. Source and Target Types πŸ“ Agentics uses **Pydantic models** to represent the input type `X` and the output type `Y`. @@ -61,7 +56,7 @@ class Email(BaseModel): > **Recommendation** > In transduction scenarios, it is often useful to declare fields as `Optional[...] = None`. -> This gives the LLM the ability to say *β€œI don’t have enough evidence for this field”* by leaving it `null`, instead of hallucinating content. +> This gives an LLM the ability to say *β€œI don’t have enough evidence for this field”* by leaving it `null`, instead of hallucinating content. The transducible function we will define next will transform exactly **one** `UserMessage` into **one** `Email` (and later, we’ll see how to scale to lists). @@ -79,12 +74,12 @@ They can be defined in two main ways: 1. Using the **`@transducible()` decorator** on an async Python function. 2. **Dynamically generating** them from source and target types (e.g., via builders or the `<<` operator), with instructions and parameters. -This section starts with the decorator pattern and then moves to dynamic generation and composition. --- ## 4. The `@transducible()` Decorator +This section starts with the decorator pattern and then moves to dynamic generation and composition. The decorator turns an ordinary async function into a transducible function. When decorated with `@transducible()`, your function can return either: - A **concrete instance of the target type** `Y` (pure Python logic), or @@ -246,7 +241,7 @@ Usage: ```python input_state = GenericInput( - content="Write a news story on Zoran Mandani winning the election in NYC and send it to Alfio" + content="Write a news story on the winner of Super Bowl in 2025 and send it to Alfio." ) mail = await write_mail(input_state) @@ -275,7 +270,7 @@ class Summary(BaseModel): ```python input_state = GenericInput( - content="Write news story on Zoran Mandani winning the election in NYC and send it to Alfio" + content="Write a news story on the winner of Super Bowl in 2025 and send it to Alfio." ) write_mail = Email << GenericInput # GenericInput β†’ Email @@ -346,7 +341,7 @@ summarize = Summary << With( ) input_state = GenericInput( - content="Zoran Mandani won the election in NYC. Draft a message to the press list." + content="Philadelphia Eagles won Super Bowl 2025. Draft a message to the press list." ) mail = await write_mail(input_state) @@ -393,29 +388,177 @@ This pattern generalizes: - `With(..., provide_explanation=True)` can be used with other source/target pairs. - Explanations can be logged, inspected, or surfaced in UI as **transparent justification** for the model’s decision. +### 6.5 `With()` Function Reference + +The `With()` function creates a `TransductionConfig` object that wraps a source model with configuration parameters. It's used with the `<<` operator to create configured transducible functions dynamically. + +**Signature:** +```python +def With(model: Type[BaseModel], **kwargs) -> TransductionConfig +``` + +**Parameters:** + +All parameters from the `@transducible()` decorator are supported: + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `instructions` | `str` | `""` | Custom instructions for the LLM on how to perform the transduction | +| `tools` | `list[Any]` | `[]` | List of tools (MCP, CrewAI, or LangChain) available during transduction | +| `enforce_output_type` | `bool` | `False` | If `True`, raises `TypeError` if output doesn't match target type | +| `llm` | `Any` | `AG.get_llm_provider()` | LLM provider to use (OpenAI, WatsonX, Ollama, etc.) | +| `reasoning` | `bool` | `False` | Enable reasoning mode for complex transductions | +| `max_iter` | `int` | `10` | Maximum iterations for agentic reasoning loops | +| `verbose_transduction` | `bool` | `True` | Print detailed transduction logs | +| `verbose_agent` | `bool` | `False` | Print agent-level execution logs | +| `batch_size` | `int` | `10` | Number of items to process in parallel batches | +| `provide_explanation` | `bool` | `False` | Return explanation alongside result (see Section 6.6) | +| `timeout` | `int` | `300` | Timeout in seconds for each transduction | +| `post_processing_function` | `Callable` | `None` | Function to apply to outputs after transduction | +| `persist_output` | `str` | `None` | Path to save intermediate batch results | +| `transduce_fields` | `list[str]` | `None` | Specific fields to use for transduction | +| `prompt_template` | `str` | `None` | Custom prompt template for the LLM | +| `areduce` | `bool` | `False` | Use reduce mode instead of map (for aggregations) | + +**Usage Patterns:** + +```python +# Basic usage with instructions +classify = Genre << With(Movie, instructions="Classify the movie genre") + +# Multiple parameters +enrich = EnrichedData << With( + RawData, + instructions="Enrich with external data", + tools=[web_search_tool], + batch_size=20, + timeout=600, + provide_explanation=True +) + +# Comparison: With() vs @transducible() +# These are equivalent: + +# Using With() +fn1 = TargetType << With(SourceType, instructions="Transform data") + +# Using decorator +@transducible(instructions="Transform data") +async def fn2(state: SourceType) -> TargetType: + return Transduce(state) +``` + +**When to use `With()` vs `@transducible()`:** + +- Use `With()` for **dynamic, one-off transductions** where you don't need a named function +- Use `@transducible()` for **reusable functions** that you'll call multiple times or compose into larger workflows +- `With()` is ideal for **exploratory work** and **inline transformations** +- `@transducible()` is better for **production code** with clear function names and documentation + +--- + +### 6.6 Result Unpacking with `TransductionResult` + +When you set `provide_explanation=True` (either in `@transducible()` or `With()`), the transduction returns a `TransductionResult` object that supports automatic unpacking. + +**The `TransductionResult` Class:** + +```python +class TransductionResult: + def __init__(self, value, explanation): + self.value = value # The actual transduced output + self.explanation = explanation # Explanation of how it was derived + + def __iter__(self): + yield self.value + yield self.explanation +``` + +**Automatic Unpacking Behavior:** + +The framework automatically detects how you assign the result: + +```python +# Single assignment - get only the value +result = await classify_genre(movie) +print(result.genre) # Access the Genre object directly + +# Tuple unpacking - get both value and explanation +genre, explanation = await classify_genre(movie) +print(genre.genre) # The Genre object +print(explanation.reasoning) # The explanation object +``` + +**Example with Decorator:** + +```python +@transducible(provide_explanation=True) +async def classify_genre(state: Movie) -> Genre: + """Classify the genre of the source Movie.""" + return Transduce(state) + +movie = Movie( + movie_name="The Godfather", + description="Crime family drama", + year=1972 +) + +# Get both result and explanation +genre, explanation = await classify_genre(movie) + +print(f"Genre: {genre.genre}") +print(f"Reasoning: {explanation.reasoning}") +print(f"Confidence: {explanation.confidence}") +``` + +**Example with `With()`:** + +```python +classify_genre = Genre << With( + Movie, + provide_explanation=True, + instructions="Classify based on plot and themes" +) + +# Tuple unpacking works the same way +genre, explanation = await classify_genre(movie) +``` + +**Batch Processing with Explanations:** + +When processing lists, each item gets its own explanation: + +```python +movies = [movie1, movie2, movie3] + +# Returns list of values and list of explanations +genres, explanations = await classify_genre(movies) + +for genre, explanation in zip(genres, explanations): + print(f"{genre.genre}: {explanation.reasoning}") +``` + +**Note:** If you don't need explanations, simply omit `provide_explanation=True` and the function returns only the transduced value(s). + +--- + --- -## 7. Map–Reduce: Scaling Transducible Functions πŸš€ +## 7. Batch Processing with Lists -When wrapped by `@transducible()` **or** created dynamically with `<<`, transducible functions are overloaded to accept **lists** of `X` as well. When called this way, they return a corresponding list of `Y`: +Transducible functions automatically support batch processing. When you pass a list of items, they are processed efficiently: ```python messages = [ UserMessage(content="Hi John, I made great progress with Agentics."), - UserMessage(content="Hi , I fixed the last blocking bug in the pipeline."), + UserMessage(content="Hi, I fixed the last blocking bug in the pipeline."), ] +# Automatically processes all messages emails = await write_email_with_llm(messages) ``` -Under the hood, Agentics uses an **asynchronous Map** operation: - -- Conceptually: - `amap(write_email_with_llm, messages) -> list[Email]` -- Each element is processed independently, enabling concurrency and parallelism. -- This pattern scales to **batch inference, dataset scans, and large evidence extraction tasks**. - -Later, you can combine this with **Reduce** operations (e.g., summarizing all emails into a single report), forming full Map–Reduce pipelines over typed states. +For detailed information on Map-Reduce operations, scaling to large datasets, and aggregation patterns, see the dedicated [Map-Reduce](map_reduce.md) documentation. --- @@ -423,8 +566,7 @@ Later, you can combine this with **Reduce** operations (e.g., summarizing all em Because transducible functions are defined over explicit types and carry evidence subsets, Agentics can: -- Track which input fields contributed to each output field. -- Represent this as a **bipartite graph** between input and output slots. +- Track which input fields contributed to the output. - Attach this trace as **metadata** to your states (depending on your Agentics configuration). For example, in the email examples: @@ -435,25 +577,24 @@ For example, in the email examples: This is critical when you: -- Need **auditable** LLM behavior. -- Want to debug why a particular field was generated. -- Need to enforce *β€œno hallucination from outside these inputs”* policies. +- Need **auditable** LLM behavior. +- Want to debug why a particular field was generated. +- Need to enforce *"no hallucination from outside these inputs"* policies. --- ## 9. When to Create a New Transducible Function -In a real system, you’ll typically end up with many small, focused transducible functions instead of one giant one. +In a real system, you'll typically end up with many small, focused transducible functions instead of one giant one. Good reasons to define a separate transducible function: -- You’re doing a logically distinct step: +- You're doing a logically distinct step: - e.g., *extract entities*, *normalize names*, *classify intent*, *summarize conversation*. - You want to **test** and **benchmark** that step independently. - You expect to **reuse** it across pipelines. - You need different **instructions, constraints, or safety properties** for that stage. -Think of transducible functions as the **operators** of your Logical Transduction Algebra. --- @@ -473,8 +614,11 @@ Think of transducible functions as the **operators** of your Logical Transductio - Expose structured explainability traces for each output field. - Compose into robust, interpretable, large-scale reasoning pipelines. -From here you can explore: +--- + +## Next +- πŸ‘‰ **[Transducible Functions Tutorial](../tutorials/transducible_functions.ipynb)** to see how transducible functions works in practice +- πŸ‘‰ **[Map-Reduce Operations](map_reduce.md)** - Scaling with map and reduce, batch processing patterns -- `core_concepts.md` – the broader mental model (types, states, LTA, Map–Reduce). -- `mapreduce.md` – how Agentics orchestrates large-scale transductions over typed state containers. -- `types_and_states.md` – how to design good schemas and manage collections of states. +## Go to Index +- πŸ‘‰ [Index](index.md) diff --git a/examples/discovery/demo.py b/examples/discovery/demo.py index 0b821e40f..41319bbbc 100644 --- a/examples/discovery/demo.py +++ b/examples/discovery/demo.py @@ -65,8 +65,8 @@ if st.session_state.databases: for db in st.session_state.databases: st.markdown( - f"""### {db.name if db.name else ""} -N rows: {len(DataFrame(db.df)) if db.df else ""} + f"""### {db.name if db.name else ''} +N rows: {len(DataFrame(db.df)) if db.df else ''} """ ) @@ -86,6 +86,7 @@ st.success(f"{len(uploaded_files)} file(s) uploaded successfully.") st.session_state.databases = [] for file in uploaded_files: + try: db = AgenticDB() df = db.import_db_from_csv(file) # βœ… pass the buffer @@ -121,6 +122,7 @@ with st.spinner( "Agentics is reading your documents and generating intermediate evidence before answering your question. This might take some time ..." ): + question = Question( question=question, dbs=st.session_state.databases, diff --git a/examples/discovery/transductions.py b/examples/discovery/transductions.py index b7c160a2e..12d3780d2 100644 --- a/examples/discovery/transductions.py +++ b/examples/discovery/transductions.py @@ -18,6 +18,7 @@ @transducible() async def answer_question_from_data(state: Question) -> Question: + if state.question: source_data = [AG.from_dataframe(DataFrame(db.df)) for db in state.dbs] diff --git a/examples/hello_world.py b/examples/hello_world.py index 29bd03387..3958926a8 100644 --- a/examples/hello_world.py +++ b/examples/hello_world.py @@ -3,8 +3,7 @@ from pydantic import BaseModel, Field -from agentics import AG -from agentics.core.transducible_functions import Transduce, With, transducible +import agentics.core.transducible_functions class Movie(BaseModel): @@ -17,20 +16,18 @@ class Genre(BaseModel): genre: Optional[str] = Field(None, description="Provide one category only") -movie = Movie( - movie_name="The Godfather", - description="The aging patriarch of an organized crime dynasty transfers control of his clandestine empire to his reluctant son.", - year=1972, -) - - -## Using Transducible Decorator -@transducible(provide_explanation=True) -async def classify_genre(state: Movie) -> Genre: - """Classify the genre of the source Movie""" - return Transduce(state) - - -genre, explanation = asyncio.run(classify_genre(movie)) -print(genre.model_dump_json(indent=2)) -print(explanation.model_dump_json(indent=2)) +# Create movie instance +movies = [ + Movie( + movie_name="The Godfather", + description="The aging patriarch of an organized crime dynasty transfers control of his clandestine empire to his reluctant son.", + year=1972, + ), + Movie(movie_name="The Shawshank Redemption"), +] +# Create transduction function using << operator +classify_genre = Genre << Movie + +# Execute transduction +result = asyncio.run(classify_genre(movies)) +print(result) diff --git a/mkdocs.yml b/mkdocs.yml index 4d6365aab..e59dc6992 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -10,12 +10,14 @@ plugins: nav: - πŸ‘‹ Welcome to Agentics: index.md - - Getting Started: getting_started.md - - What is Agentics?: background.md - - Use Cases: use_cases.md - - Agentics: agentics.md - - Transduction: transduction.md - - Memory: memory.md + - πŸš€ Getting Started: getting_started.md + - 🧠 Core Concepts: core_concepts.md + - βš™οΈ Transducible Functions: transducible_functions.md + - πŸ” Map-Reduce Operations: map_reduce.md + - πŸ” Semantic Operators: semantic_operators.md + - 🧬 Agentics (AG): agentics.md + - ⚑ Performance Optimization: optimization.md + - πŸ”Œ Tool Integration: tool_integration.md markdown_extensions: - pymdownx.tabbed diff --git a/src/agentics/core/agentics.py b/src/agentics/core/agentics.py index 9b40c62df..b8d07f310 100644 --- a/src/agentics/core/agentics.py +++ b/src/agentics/core/agentics.py @@ -180,22 +180,21 @@ def timeout(self, value: float): def create_crewai_llm(**kwargs): return LLM(**kwargs) - async def generate_atype( - self, description: str, retry: int = 3 - ) -> Tuple[str, Type[BaseModel]] | None: + async def generate_atype(self, description: str, retry: int = 3): class GeneratedAtype(BaseModel): python_code: Optional[str] = Field( None, description="Python Code for the described Pydantic type" ) - methods: list[str] = Field(None, description="Methods for the class above") + # methods: list[str] = Field(None, description="Methods for the class above") i = 0 while i < retry: generated_atype_ag = await ( AG( atype=GeneratedAtype, - instructions="""Generate python code for the input nl type specs. - Make all fields Optional. Use only primitive types for the fields, avoiding nested. + instructions="""Generate Python code for the pydantic type following the input specs. + Make all fields Optional. + Use only primitive types for the fields, avoiding nested. Provide descriptions for the class and all its fields, using Field(None,description= "...") If the input nl type spec is a question, generate a pydantic type that can be used to represent the answer to that question. diff --git a/src/agentics/core/semantic_operators.py b/src/agentics/core/semantic_operators.py index 844ce846b..57dad9872 100644 --- a/src/agentics/core/semantic_operators.py +++ b/src/agentics/core/semantic_operators.py @@ -86,11 +86,10 @@ async def sem_map( if isinstance(target_type, str) else target_type ), + instructions=instructions, **kwargs, ) - ag_source.prompt_template = instructions - map_out = await (target_ag << ag_source) output_ag = None if merge_output: @@ -103,7 +102,10 @@ async def sem_map( async def sem_filter( - source: AG | pd.DataFrame, predicate_template: str, **kwargs + source: AG | pd.DataFrame, + predicate_template: str, + sensitivity: float = 0.8, + **kwargs, ) -> AG | pd.DataFrame: """ Agentics-native semantic filter over an `AG` using a LangChain-style condition template. @@ -113,7 +115,7 @@ async def sem_filter( predicate. It is an agentic analogue of LOTUS-style semantic filtering. The `predicate_template` is a **LangChain-style template** (e.g., using `{field}` - placeholders) that is rendered against each source state’s fields. The rendered + placeholders) that is rendered against each source state's fields. The rendered text is then passed to an LLM-based logical classifier which produces a boolean decision (`condition_true`) for that state. @@ -149,21 +151,40 @@ async def sem_filter( target_ag = AG( atype=create_pydantic_model( - [("condition_true", "bool", """Condition is True""", False)], name="filter" + [ + ( + "sentence_is_true", + "bool", + """Provide True if you think the input sentence is True, False otherwise""", + False, + ), + ( + "truth_score", + "float", + """Provide a number from 0 to 1 to assess the degree to which the input Sentence is True. If you are uncertain, provide a number in the range (0,1)""", + False, + ), + ], + name="filter", ), - instructions="""You are a Logical Classifier. You have been given an input sentence. - Read the input text and return true if the predicate is positive, false otherwise""", + instructions="""You have been given an input sentence. + Read the input text and return True if the sentence is true, False otherwise""", amap_batch_size=20, **kwargs, ) + # Keep the more sophisticated version that handles both cases + if "{" in predicate_template: + ag_source.prompt_template = predicate_template + else: + target_ag.instructions += f"\n\nPredicate: {predicate_template}" - ag_source.prompt_template = predicate_template map_out = await (target_ag << ag_source) target = ag_source.clone() target.states = [] - for i in range(len(map_out.states)): - if map_out[i].condition_true: - target.append(ag_source[i]) + + for map_out_c, source_c in zip(map_out.states, ag_source.states): + if map_out_c.truth_score and map_out_c.truth_score >= sensitivity: + target.append(source_c) if type(source) is pd.DataFrame: return target.to_dataframe() @@ -174,7 +195,7 @@ async def sem_filter( async def sem_agg( source: AG | pd.DataFrame, target_type: Type[BaseModel] | str, - instructions: str = None, + instructions: str | None = None, # merge_output: bool = True, ## Target, Merged **kwargs, ) -> AG | pd.DataFrame: @@ -188,13 +209,15 @@ async def sem_agg( if isinstance(target_type, str) else target_type ), + instructions=instructions, + transduction_type="areduce", **kwargs, ) - ag_source.prompt_template = instructions - ag_source.transduction_type = "areduce" - output_ag = await (target_ag << ag_source) if type(source) is pd.DataFrame: return output_ag.to_dataframe() return output_ag + + +# Made with Bob diff --git a/src/agentics/core/transducible_functions.py b/src/agentics/core/transducible_functions.py index 461e4aab2..42b02e879 100644 --- a/src/agentics/core/transducible_functions.py +++ b/src/agentics/core/transducible_functions.py @@ -545,21 +545,27 @@ async def semantic_merge(instance1: BaseModel, instance2: BaseModel) -> BaseMode async def generate_prototypical_instances( - type: Type[BaseModel], n_instances: int = 10, llm: Any = AG.get_llm_provider() + type: Type[BaseModel], + n_instances: int = 10, + llm: Any = AG.get_llm_provider(), + instructions: str = None, ) -> list[BaseModel]: DynamicModel = create_model( "ListOfObjectsOfGivenType", instances=(list[type] | None, None), # REQUIRED field ) + full_instructions = f""" + Generate list of {n_instances} random instances of the following type + {type.model_json_schema()}. + fill all attributed for each generated instance + """ + if instructions: + full_instructions += "Adhere to the following instructions \n" + instructions target = AG( atype=DynamicModel, - instructions=f""" - Generate list of {n_instances} random instances of the following type - {type.model_json_schema()}. - Try to fill most of the attributed for each generated instance as possible - """, + instructions=full_instructions, llm=llm, ) generated = await (target << " ") diff --git a/tutorials/agentics.ipynb b/tutorials/agentics.ipynb index 45e47c2f9..9b0d7c240 100644 --- a/tutorials/agentics.ipynb +++ b/tutorials/agentics.ipynb @@ -221,7 +221,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.14" + "version": "3.12.12" } }, "nbformat": 4, diff --git a/tutorials/agentics1.0/logical_transduction.ipynb b/tutorials/agentics1.0/logical_transduction.ipynb index e1df9cfea..f2f5dc36f 100644 --- a/tutorials/agentics1.0/logical_transduction.ipynb +++ b/tutorials/agentics1.0/logical_transduction.ipynb @@ -213,6 +213,7 @@ " atype=Answer,\n", " verbose_transduction=True, # Set to verbose to see transduction timings and other logs\n", " transduction_logs_path=\"/tmp/answers.jsonl\",\n", + " amap_batch_size=2,\n", ") # Optionally write longs of transductions on the specified path\n", "questions = [\n", " \"Where is Paris?\",\n", diff --git a/tutorials/atypes.ipynb b/tutorials/atypes.ipynb index 1c0833bde..f751393b6 100644 --- a/tutorials/atypes.ipynb +++ b/tutorials/atypes.ipynb @@ -91,6 +91,7 @@ "source": [ "from typing import Optional, List\n", "from pydantic import BaseModel, Field\n", + "import agentics\n", "\n", "class Movie(BaseModel):\n", " movie_name: Optional[str] = Field(None, description=\"Movie title.\")\n", @@ -192,6 +193,32 @@ "cell_type": "markdown", "id": "8", "metadata": {}, + "source": [ + "## Create type from definition" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9", + "metadata": {}, + "outputs": [], + "source": [ + "from agentics.core.atype import create_pydantic_model\n", + "Answer = create_pydantic_model([(\"answer\", \"str\", \"the answer for the question\",False), (\"confidence\", \"float\", None,False)])\n", + "\n", + "schema = [{'name': 'name', 'description': 'the name of the person', 'type': 'str', 'multiple': 'False'},\n", + " {'name': 'age', 'description': 'the age of the person', 'type': 'int', 'multiple': 'False'}]\n", + "new_schema = [(field[\"name\"] , field[\"description\"], field[\"type\"], False) for field in schema]\n", + "Person = create_pydantic_model(new_schema)\n", + "print(Answer(confidence=0.95, answer=\"Paris is the capital of France.\"))\n", + "print(Person(name=\"Alice\"))" + ] + }, + { + "cell_type": "markdown", + "id": "10", + "metadata": {}, "source": [ "### Synthetic Data Generation πŸ§ͺ\n", "\n", @@ -219,7 +246,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9", + "id": "11", "metadata": {}, "outputs": [], "source": [ @@ -232,7 +259,7 @@ }, { "cell_type": "markdown", - "id": "10", + "id": "12", "metadata": {}, "source": [ "### Dynamic Type Generation ✨\n", @@ -257,7 +284,7 @@ { "cell_type": "code", "execution_count": null, - "id": "11", + "id": "13", "metadata": {}, "outputs": [], "source": [ @@ -270,7 +297,7 @@ }, { "cell_type": "markdown", - "id": "12", + "id": "14", "metadata": {}, "source": [ "## Logical Proximity (LP) 🧭\n", @@ -295,7 +322,7 @@ { "cell_type": "code", "execution_count": null, - "id": "13", + "id": "15", "metadata": {}, "outputs": [], "source": [ @@ -305,7 +332,7 @@ }, { "cell_type": "markdown", - "id": "14", + "id": "16", "metadata": {}, "source": [ "### Logical Proximity - Key Properties\n", @@ -349,7 +376,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.14" + "version": "3.12.12" } }, "nbformat": 4, diff --git a/tutorials/data/movies.jsonl b/tutorials/data/movies.jsonl deleted file mode 100644 index 6d157f171..000000000 --- a/tutorials/data/movies.jsonl +++ /dev/null @@ -1,20 +0,0 @@ -{"movie_name":"The Shawshank Redemption","genre":"Drama, Crime","description":"Imprisoned in the 1940s for the double murder of his wife and her lover, upstanding banker Andy Dufresne begins a new life at the Shawshank prison, where he puts his accounting skills to work for an amoral warden. During his long stretch in prison, Dufresne comes to be admired by the other inmates -- including an older prisoner named Red -- for his integrity and unquenchable sense of hope."} -{"movie_name":"The Godfather","genre":"Drama, Crime","description":"Spanning the years 1945 to 1955, a chronicle of the fictional Italian-American Corleone crime family. When organized crime family patriarch, Vito Corleone barely survives an attempt on his life, his youngest son, Michael steps in to take care of the would-be killers, launching a campaign of bloody revenge."} -{"movie_name":"The Godfather Part II","genre":"Drama, Crime","description":"In the continuing saga of the Corleone crime family, a young Vito Corleone grows up in Sicily and in 1910s New York. In the 1950s, Michael Corleone attempts to expand the family business into Las Vegas, Hollywood and Cuba."} -{"movie_name":"Schindler's List","genre":"Drama, History, War","description":"The true story of how businessman Oskar Schindler saved over a thousand Jewish lives from the Nazis while they worked as slaves in his factory during World War II."} -{"movie_name":"12 Angry Men","genre":"Drama","description":"The defense and the prosecution have rested and the jury is filing into the jury room to decide if a young Spanish-American is guilty or innocent of murdering his father. What begins as an open and shut case soon becomes a mini-drama of each of the jurors' prejudices and preconceptions about the trial, the accused, and each other."} -{"movie_name":"Spirited Away","genre":"Animation, Family, Fantasy","description":"A young girl, Chihiro, becomes trapped in a strange new world of spirits. When her parents undergo a mysterious transformation, she must call upon the courage she never knew she had to free her family."} -{"movie_name":"The Dark Knight","genre":"Drama, Action, Crime, Thriller","description":"Batman raises the stakes in his war on crime. With the help of Lt. Jim Gordon and District Attorney Harvey Dent, Batman sets out to dismantle the remaining criminal organizations that plague the streets. The partnership proves to be effective, but they soon find themselves prey to a reign of chaos unleashed by a rising criminal mastermind known to the terrified citizens of Gotham as the Joker."} -{"movie_name":"Dilwale Dulhania Le Jayenge","genre":"Comedy, Drama, Romance","description":"Raj is a rich, carefree, happy-go-lucky second generation NRI. Simran is the daughter of Chaudhary Baldev Singh, who in spite of being an NRI is very strict about adherence to Indian values. Simran has left for India to be married to her childhood fiancΓ©. Raj leaves for India with a mission at his hands, to claim his lady love under the noses of her whole family. Thus begins a saga."} -{"movie_name":"The Green Mile","genre":"Fantasy, Drama, Crime","description":"A supernatural tale set on death row in a Southern prison, where gentle giant John Coffey possesses the mysterious power to heal people's ailments. When the cell block's head guard, Paul Edgecomb, recognizes Coffey's miraculous gift, he tries desperately to help stave off the condemned man's execution."} -{"movie_name":"Parasite","genre":"","description":"All unemployed, Ki-taek's family takes peculiar interest in the wealthy and glamorous Parks for their livelihood until they get entangled in an unexpected incident."} -{"movie_name":"Pulp Fiction","genre":"","description":"A burger-loving hit man, his philosophical partner, a drug-addled gangster's moll and a washed-up boxer converge in this sprawling, comedic crime caper. Their adventures unfurl in three stories that ingeniously trip back and forth in time."} -{"movie_name":"Your Name.","genre":"","description":"High schoolers Mitsuha and Taki are complete strangers living separate lives. But one night, they suddenly switch places. Mitsuha wakes up in Taki’s body, and he in hers. This bizarre occurrence continues to happen randomly, and the two must adjust their lives around each other."} -{"movie_name":"The Lord of the Rings: The Return of the King","genre":"","description":"As armies mass for a final battle that will decide the fate of the world--and powerful, ancient forces of Light and Dark compete to determine the outcome--one member of the Fellowship of the Ring is revealed as the noble heir to the throne of the Kings of Men. Yet, the sole hope for triumph over evil lies with a brave hobbit, Frodo, who, accompanied by his loyal friend Sam and the hideous, wretched Gollum, ventures deep into the very dark heart of Mordor on his seemingly impossible quest to destroy the Ring of Power.​"} -{"movie_name":"Forrest Gump","genre":"","description":"A man with a low IQ has accomplished great things in his life and been present during significant historic eventsβ€”in each case, far exceeding what anyone imagined he could do. But despite all he has achieved, his one true love eludes him."} -{"movie_name":"The Good, the Bad and the Ugly","genre":"","description":"While the Civil War rages on between the Union and the Confederacy, three men – a quiet loner, a ruthless hitman, and a Mexican bandit – comb the American Southwest in search of a strongbox containing $200,000 in stolen gold."} -{"movie_name":"Seven Samurai","genre":"","description":"A samurai answers a village's request for protection after he falls on hard times. The town needs protection from bandits, so the samurai gathers six others to help him teach the people how to defend themselves, and the villagers provide the soldiers with food."} -{"movie_name":"GoodFellas","genre":"","description":"The true story of Henry Hill, a half-Irish, half-Sicilian Brooklyn kid who is adopted by neighbourhood gangsters at an early age and climbs the ranks of a Mafia family under the guidance of Jimmy Conway."} -{"movie_name":"Interstellar","genre":"","description":"The adventures of a group of explorers who make use of a newly discovered wormhole to surpass the limitations on human space travel and conquer the vast distances involved in an interstellar voyage."} -{"movie_name":"Grave of the Fireflies","genre":"","description":"In the final months of World War II, 14-year-old Seita and his sister Setsuko are orphaned when their mother is killed during an air raid in Kobe, Japan. After a falling out with their aunt, they move into an abandoned bomb shelter. With no surviving relatives and their emergency rations depleted, Seita and Setsuko struggle to survive."} -{"movie_name":"Life Is Beautiful","genre":"","description":"A touching story of an Italian book seller of Jewish ancestry who lives in his own little fairy tale. His creative and happy life would come to an abrupt halt when his entire family is deported to a concentration camp during World War II. While locked up he tries to convince his son that the whole thing is just a game."} diff --git a/tutorials/logical_transduction_algebra.ipynb b/tutorials/logical_transduction_algebra.ipynb index a0ea4054a..50022e5bd 100644 --- a/tutorials/logical_transduction_algebra.ipynb +++ b/tutorials/logical_transduction_algebra.ipynb @@ -112,7 +112,7 @@ "write_mail_to_alfio = Email<< With(\n", " GenericInput,\n", " instructions=\"Write an email to Alfio Gliozzo\",\n", - ")\n", + " prompyt_template=\"{content}\" )\n", "news = GenericInput(content=\"Zoran Mandani won the Election in NYC\")\n", "mail = await write_mail_to_alfio(news)\n", "print(mail)" @@ -241,7 +241,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.14" + "version": "3.12.12" } }, "nbformat": 4, diff --git a/tutorials/map_reduce.ipynb b/tutorials/map_reduce.ipynb index 414336b79..8fea9b224 100644 --- a/tutorials/map_reduce.ipynb +++ b/tutorials/map_reduce.ipynb @@ -26,13 +26,21 @@ "metadata": {}, "outputs": [], "source": [ - "from pydantic import BaseModel\n", "from typing import Optional\n", - "import agentics.core.transducible_functions \n", + "from agentics import AG\n", + "from pydantic import BaseModel, Field\n", + "from dotenv import load_dotenv\n", + "\n", + "# Load .env from project root (current working directory)\n", + "load_dotenv()\n", + "\n", + "llm=AG.get_llm_provider(\"litellm_proxy\")\n", + "\n", "class Number(BaseModel):\n", - " number:Optional[int]\n", + " number:Optional[int] = Field(None, description=\"An integer number\")\n", + "\n", "class RomanNumber(BaseModel):\n", - " roman_number:Optional[str] =None" + " roman_number:Optional[str] = Field(None, description=\"The Roman numeral representation of the number\")" ] }, { @@ -60,9 +68,12 @@ "metadata": {}, "outputs": [], "source": [ + "from agentics.core.transducible_functions import transducible, Transduce\n", + "\n", "to_roman_number = RomanNumber << Number\n", "roman_numbers = await to_roman_number([Number(number=i) for i in range(1,5)])\n", - "for roman in roman_numbers: print(roman)" + "for roman in roman_numbers: \n", + " print(roman)" ] }, { @@ -84,7 +95,6 @@ "\n", "Key points:\n", "- The call to to_roman_number([...]) still returns a list[RomanNumber].\n", - "- Additionally, every result is appended to /tmp/roman_numbers.jsonl as line-delimited JSON.\n", "- If the file already exists, new results are appended, not overwritten." ] }, @@ -98,11 +108,10 @@ "from agentics.core.transducible_functions import With\n", "to_roman_number = RomanNumber << With(Number,\n", " batch_size = 5,\n", - " timeout = 20,\n", - " persist_output = \"/tmp/roman_numbers.jsonl\")\n", + " timeout = 20)\n", "roman_numbers = await to_roman_number([Number(number=i) for i in range(1,20)])\n", - "print(open(\"/tmp/roman_numbers.jsonl\").read())\n", - "pass" + "for roman_number in roman_numbers: \n", + " print(roman_number)" ] }, { @@ -203,7 +212,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.14" + "version": "3.12.12" } }, "nbformat": 4, diff --git a/tutorials/semantic_operators.ipynb b/tutorials/semantic_operators.ipynb new file mode 100644 index 000000000..7a648dcfd --- /dev/null +++ b/tutorials/semantic_operators.ipynb @@ -0,0 +1,261 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0", + "metadata": {}, + "source": [ + "# Semantic Operators Tutorial\n", + "\n", + "**Semantic Operators** provide declarative API for performing common data transformation tasks using natural language.\n", + "Here, we show examples of `sem_map`, `sem_filter`, and `sem_agg` that are implmented in `Agentics`." + ] + }, + { + "cell_type": "markdown", + "id": "1", + "metadata": {}, + "source": [ + "### Semantic Map\n", + "\n", + "Transform each record in your dataset according to natural language instructions, mapping source data to a target schema." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", + "metadata": {}, + "outputs": [], + "source": [ + "from agentics import AG\n", + "from agentics.core.semantic_operators import sem_map, sem_filter, sem_agg\n", + "from typing import Optional\n", + "from pprint import pprint\n", + "\n", + "import pandas as pd\n", + "from pydantic import BaseModel, Field\n", + "\n", + "# Select a model \"watsonx\", \"gemini\", \"openai\", etc. \n", + "# Make sure to set up credentials for the model you choose in your environment variables.\n", + "my_llm = AG.get_llm_provider()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3", + "metadata": {}, + "outputs": [], + "source": [ + "# Sample data\n", + "df = pd.DataFrame({\n", + " 'review': [\n", + " 'This product is amazing! Best purchase ever.',\n", + " 'Terrible quality, broke after one day.',\n", + " 'It works okay, nothing special.'\n", + " ]\n", + "})\n", + "\n", + "# Define target schema\n", + "class Sentiment(BaseModel):\n", + " sentiment: Optional[str] = Field(None, description=\"The sentiment of the review (e.g., positive, negative, neutral)\")\n", + " confidence: Optional[float] = Field(None, description=\"Confidence score of the sentiment analysis between 0 and 1\")\n", + "\n", + "result = await sem_map(\n", + " source=df,\n", + " target_type=Sentiment,\n", + " instructions=\"Analyze the sentiment of the review and provide a confidence score between 0 and 1.\",\n", + " llm=my_llm)\n", + "print(result)\n", + "\n", + "result = await sem_map(\n", + " source=df,\n", + " target_type=\"category\",\n", + " instructions=\"Classify the review into one of: positive, negative, neutral\",\n", + " llm=my_llm)\n", + "print(result)" + ] + }, + { + "cell_type": "markdown", + "id": "4", + "metadata": {}, + "source": [ + "### Semantic Filter\n", + "\n", + "Filter records based on a natural language predicate, keeping only those that satisfy the condition." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame({\n", + " 'product': ['Laptop', 'Phone', 'Tablet', 'Monitor'],\n", + " 'description': [\n", + " 'High-performance gaming laptop with RGB keyboard',\n", + " 'Budget smartphone with basic features',\n", + " 'Premium tablet with stylus support',\n", + " '4K monitor for professional work'\n", + " ]\n", + "})\n", + "\n", + "# Filter for premium/high-end products\n", + "result = await sem_filter(\n", + " source=df,\n", + " predicate_template=\"The product is premium or high-end\",\n", + " llm=AG.get_llm_provider(\"litellm_proxy\"),\n", + " verbose_agent=False,\n", + " verbose_transduction=False\n", + ")\n", + "\n", + "print(result)\n", + "\n", + "# Use field placeholders in the predicate\n", + "result = await sem_filter(\n", + " source=df,\n", + " predicate_template=\"The {product} described as '{description}' is suitable for gaming\",\n", + " llm=my_llm)\n", + "\n", + "print(result)" + ] + }, + { + "cell_type": "markdown", + "id": "6", + "metadata": {}, + "source": [ + "### Semantic Aggregator\n", + "\n", + "Aggregate data across all records to produce a summary or consolidated output." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame({\n", + " 'review': [\n", + " 'Great product, very satisfied!',\n", + " 'Good quality but expensive',\n", + " 'Not worth the price',\n", + " 'Excellent, would buy again',\n", + " 'Decent but has some issues'\n", + " ]\n", + "})\n", + "\n", + "class ReviewSummary(BaseModel):\n", + " overall_sentiment: str\n", + " key_themes: list[str]\n", + " recommendation: str\n", + "\n", + "# Aggregate all reviews into a summary\n", + "result = await sem_agg(\n", + " source=df,\n", + " target_type=ReviewSummary,\n", + " instructions=\"Summarize all reviews, identify key themes, and provide an overall recommendation\",\n", + " llm=my_llm)\n", + "\n", + "pprint(result)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8", + "metadata": {}, + "outputs": [], + "source": [ + "class Statistics(BaseModel):\n", + " total_count: int\n", + " positive_count: int\n", + " negative_count: int\n", + " average_sentiment: str\n", + "\n", + "result = await sem_agg(\n", + " source=df,\n", + " target_type=Statistics,\n", + " instructions=\"Count total reviews, positive reviews, negative reviews, and determine average sentiment\",\n", + " llm=my_llm)\n", + "pprint(result)" + ] + }, + { + "cell_type": "markdown", + "id": "9", + "metadata": {}, + "source": [ + "### Integration with Agentics Workflows\n", + "\n", + "Chaining Semantic Operators." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame({\n", + " 'product': ['Laptop', 'Phone', 'Tablet', 'Monitor'],\n", + " 'description': [\n", + " 'High-performance gaming laptop with RGB keyboard',\n", + " 'Budget smartphone with basic features',\n", + " 'Premium tablet with stylus support',\n", + " '4K monitor for professional work'\n", + " ]\n", + "})\n", + "\n", + "filtered = await sem_filter(\n", + " source=df,\n", + " predicate_template=\"The product is premium or high-end\",\n", + " llm=AG.get_llm_provider(\"litellm_proxy\"),\n", + " verbose_agent=False,\n", + " verbose_transduction=False\n", + ")\n", + "pprint(filtered)\n", + "mapped = await sem_map(\n", + " source=filtered,\n", + " target_type=\"price_category\",\n", + " instructions=\"Classify the price of the product into one of: high, medium, low\", \n", + " llm=my_llm)\n", + "pprint(mapped)\n", + "result = await sem_agg(\n", + " source=mapped,\n", + " target_type=\"summary\",\n", + " instructions=\"Summarize all descriptions\",\n", + " llm=my_llm)\n", + "pprint(result)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tutorials/synthetic_data_generation.ipynb b/tutorials/synthetic_data_generation.ipynb index 676a253ad..bbb247ccc 100644 --- a/tutorials/synthetic_data_generation.ipynb +++ b/tutorials/synthetic_data_generation.ipynb @@ -29,6 +29,13 @@ "from pydantic import BaseModel, Field\n", "from typing import Optional\n", "from agentics.core.transducible_functions import generate_prototypical_instances\n", + "from agentics import AG\n", + "from dotenv import load_dotenv\n", + "\n", + "# Load .env from project root (current working directory)\n", + "load_dotenv()\n", + "\n", + "llm=AG.get_llm_provider()\n", "\n", "class ProductCatalogEntry(BaseModel):\n", " product_id: str = Field(None, description=\"Unique ID for the product\")\n", @@ -37,9 +44,7 @@ " category: str = Field(None, description=\"Product category\")\n", " price: float = Field(None, description=\"Retail price in USD\")\n", "\n", - "products = await generate_prototypical_instances(ProductCatalogEntry,\n", - " n_instances=10, \n", - " llm=\"watsonx/openai/gpt-oss-120b\")\n", + "products = await generate_prototypical_instances(ProductCatalogEntry, n_instances=10, llm=llm)\n", "\n", "for product in products:\n", " print(product.model_dump_json(indent=2))" @@ -54,7 +59,7 @@ "source": [ "from agentics.core.transducible_functions import generate_atype_from_description\n", "generated_type = await generate_atype_from_description(\"Person with demographic information including name, age, and email address\")\n", - "print(generated_type.model_dump_json(indent=2))\n" + "print(generated_type.model_dump_json(indent=2))" ] }, { @@ -80,7 +85,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.14" + "version": "3.12.12" } }, "nbformat": 4, diff --git a/tutorials/transducible_functions.ipynb b/tutorials/transducible_functions.ipynb index 8aaea046d..f417d22ea 100644 --- a/tutorials/transducible_functions.ipynb +++ b/tutorials/transducible_functions.ipynb @@ -51,6 +51,10 @@ "from agentics import AG\n", "from pydantic import BaseModel, Field\n", "from agentics.core.transducible_functions import transducible, Transduce\n", + "from dotenv import load_dotenv\n", + "\n", + "# Load .env from project root (current working directory)\n", + "load_dotenv()\n", "\n", "llm=AG.get_llm_provider()\n", "\n", @@ -86,11 +90,9 @@ "metadata": {}, "outputs": [], "source": [ - "\n", - "@transducible(provide_explanation=True, llm=llm)\n", + "@transducible(provide_explanation=False, llm=llm)\n", "async def write_an_email(state: GenericInput) -> Email:\n", - " \"\"\"Write an email about the provided content. \n", - " Elaborate on that and make up content as needed\"\"\"\n", + " \"\"\"Write an email about the provided content. Elaborate on that and make up content as needed\"\"\"\n", " return Transduce(state)\n", "\n", "## Transducible functions can be introspected to easily get their input , output , description and original function\n", @@ -100,9 +102,8 @@ "print(write_an_email.__original_fn__)\n", "\n", "\n", - "single_mail, explanation = await write_an_email(GenericInput(content=f\"Hi Lisa, I made great progress with the new release of Agentics 2.0\"))\n", + "single_mail, explanation = await write_an_email(GenericInput(content=\"Hi Lisa, I made great progress with the new release of Agentics 2.0\"))\n", "print(single_mail.model_dump_json(indent=2))\n", - "print(explanation)\n", "\n" ] }, @@ -134,7 +135,7 @@ "source": [ "import re\n", "\n", - "@transducible()\n", + "@transducible(provide_explanation=False, llm=llm)\n", "async def write_an_email_code_only(state: GenericInput) -> Email: \n", " match = re.match(r\"^(Hi|Dear|Hello|Hey)\\s+([^,]+),\\s*(.+)$\", state.content)\n", " if match:\n", @@ -142,7 +143,7 @@ " return Email(body= body, to=name, subject=\"\")\n", " else: return Email()\n", "\n", - "@transducible(provide_explanation=True, llm=llm)\n", + "@transducible(provide_explanation=False, llm=llm)\n", "async def write_an_email_to_lisa(state: GenericInput) -> Email:\n", " \"\"\"Write an email about the provided content. Elaborate on that and make up content as needed\"\"\"\n", " # example code to modify states before transduction\n", @@ -216,7 +217,7 @@ " relevant_sources:Optional[list[str]]=None\n", "\n", "\n", - "@transducible(tools=[web_search], llm=llm, reasoning=True, max_iter=20, provide_explanation=True)\n", + "@transducible(tools=[web_search], llm=llm, reasoning=True, max_iter=20, provide_explanation=False)\n", "async def answer_question_after_lookup(query: GenericInput) -> WebSearchResult:\n", " \"perform an extensive web search to provide an answer to the input question with supporting evidence. Use your tool to look it up\" \n", " return Transduce(query)\n", @@ -259,7 +260,7 @@ " \"\"\"Classify the genre of the source Movie \"\"\"\n", " return Transduce(state)\n", "\n", - "genre, explanation = await classify_genre(Movie(\n", + "genre = await classify_genre(Movie(\n", " movie_name=\"The Godfather\",\n", " description=\"The aging patriarch of an organized crime dynasty transfers control of his clandestine empire to his reluctant son.\",\n", " year=1972\n", @@ -286,7 +287,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.14" + "version": "3.12.12" } }, "nbformat": 4,