Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ dependencies = [
"langchain-google-calendar-tools==0.0.1",
"langchain-google-community==2.0.3",
"langchain-elasticsearch==0.3.0",
"langchain-ollama==0.2.1",
"langchain-ollama==0.3.10",
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could loosen this constraint if you want, like >=0.3.10,<1.0.0. Not necessary though

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ill make this part of the follow up task of moving the unit tests to the lfx test suite

"langchain-sambanova==0.1.0",
"langchain-community>=0.3.21,<1.0.0",
"sqlalchemy[aiosqlite]>=2.0.38,<3.0.0",
Expand Down

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -1804,7 +1804,7 @@
},
{
"name": "langchain_ollama",
"version": "0.2.1"
"version": "0.3.10"
},
{
"name": "langchain_community",
Expand Down

Large diffs are not rendered by default.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you, or can you make a follow up task, to move this to the lfx test suite?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for sure!

Original file line number Diff line number Diff line change
Expand Up @@ -1038,3 +1038,174 @@ def model_dump(self, **__):
pytest.raises(ValueError, match="No structured output returned"),
):
component.build_structured_dataframe()

def test_fallback_to_langchain_on_trustcall_generic_exception(self):
"""Test that when trustcall fails with a generic exception, it falls back to langchain."""
Comment thread
HzaRashid marked this conversation as resolved.
component = StructuredOutputComponent(
llm=MockLanguageModel(),
input_value="Test input",
schema_name="TestSchema",
output_schema=[{"name": "field", "type": "str", "description": "A test field"}],
system_prompt="Test system prompt",
)

with (
patch.object(component, "_extract_output_with_trustcall", return_value=None) as mock_trustcall,
patch.object(
component, "_extract_output_with_langchain", return_value=[{"field": "langchain_value"}]
) as mock_langchain,
):
result = component.build_structured_output_base()

# Verify fallback was successful
assert isinstance(result, list)
assert result == [{"field": "langchain_value"}]

# Verify both methods were called
mock_trustcall.assert_called_once()
mock_langchain.assert_called_once()

def test_fallback_both_methods_fail_raises_value_error(self):
"""Test that when both trustcall and langchain fail, a ValueError is raised."""
component = StructuredOutputComponent(
llm=MockLanguageModel(),
input_value="Test input",
schema_name="TestSchema",
output_schema=[{"name": "field", "type": "str", "description": "A test field"}],
system_prompt="Test system prompt",
)

# Mock trustcall to return None (indicating failure)
# Mock langchain to raise an exception
with (
patch.object(component, "_extract_output_with_trustcall", return_value=None),
patch.object(
component,
"_extract_output_with_langchain",
side_effect=ValueError(
"Model does not support tool calling (trustcall failed) and fallback "
"with_structured_output also failed: Langchain parsing error"
),
),
):
with pytest.raises(ValueError, match="trustcall failed") as exc_info:
component.build_structured_output_base()

error_msg = str(exc_info.value)
# Verify error message mentions both failures
assert "Model does not support tool calling" in error_msg
assert "trustcall failed" in error_msg
assert "fallback with_structured_output also failed" in error_msg
assert "Langchain parsing error" in error_msg

def test_langchain_fallback_processes_basemodel_response(self):
"""Test that langchain fallback correctly processes BaseModel responses."""
component = StructuredOutputComponent(
llm=MockLanguageModel(),
input_value="Test input",
schema_name="TestSchema",
output_schema=[{"name": "field", "type": "str", "description": "A test field"}],
system_prompt="Test system prompt",
)

# Mock trustcall to return None (fail)
# Mock langchain to return list with dict
with (
patch.object(component, "_extract_output_with_trustcall", return_value=None),
patch.object(component, "_extract_output_with_langchain", return_value=[{"field": "test_value"}]),
):
result = component.build_structured_output_base()

# Verify it extracted the objects
assert isinstance(result, list)
assert result == [{"field": "test_value"}]

def test_langchain_fallback_processes_dict_response(self):
"""Test that langchain fallback correctly processes dict responses without BaseModel conversion."""
component = StructuredOutputComponent(
llm=MockLanguageModel(),
input_value="Test input",
schema_name="TestSchema",
output_schema=[{"name": "field", "type": "str", "description": "A test field"}],
system_prompt="Test system prompt",
)

# Mock trustcall to return None (fail)
# Mock langchain to return dict directly
with (
patch.object(component, "_extract_output_with_trustcall", return_value=None),
patch.object(component, "_extract_output_with_langchain", return_value={"field": "dict_value"}),
):
result = component.build_structured_output_base()

# When langchain returns dict, it's returned as-is
assert result == {"field": "dict_value"}

def test_fallback_error_message_includes_both_errors(self):
"""Test that the error message when both methods fail includes context about both failures."""
component = StructuredOutputComponent(
llm=MockLanguageModel(),
input_value="Test input",
schema_name="TestSchema",
output_schema=[{"name": "field", "type": "str", "description": "A test field"}],
system_prompt="Test system prompt",
)

# Mock trustcall to return None (fail)
# Mock langchain to raise an exception with full error message
with (
patch.object(component, "_extract_output_with_trustcall", return_value=None),
patch.object(
component,
"_extract_output_with_langchain",
side_effect=ValueError(
"Model does not support tool calling (trustcall failed) and fallback "
"with_structured_output also failed: Langchain parsing error"
),
),
):
with pytest.raises(ValueError, match="trustcall failed") as exc_info:
component.build_structured_output_base()

error_msg = str(exc_info.value)
# Verify error message mentions both failures
assert "Model does not support tool calling" in error_msg
assert "trustcall failed" in error_msg
assert "fallback with_structured_output also failed" in error_msg
assert "Langchain parsing error" in error_msg

def test_trustcall_success_no_fallback_attempted(self):
"""Test that when trustcall succeeds, langchain fallback is not attempted."""

def mock_get_chat_result(runnable, system_message, input_value, config): # noqa: ARG001
class MockBaseModel(BaseModel):
def model_dump(self, **__):
return {"objects": [{"field": "trustcall_value"}]}

return {
"messages": ["mock_message"],
"responses": [MockBaseModel()],
"response_metadata": [{"id": "mock_id"}],
"attempts": 1,
}

component = StructuredOutputComponent(
llm=MockLanguageModel(),
input_value="Test input",
schema_name="TestSchema",
output_schema=[{"name": "field", "type": "str", "description": "A test field"}],
system_prompt="Test system prompt",
)

with (
patch("lfx.components.processing.structured_output.get_chat_result", mock_get_chat_result),
patch.object(component, "_extract_output_with_langchain") as mock_lc_fallback,
):
result = component.build_structured_output_base()

# Verify trustcall succeeded
assert isinstance(result, list)
assert result == [{"field": "trustcall_value"}]

# Verify langchain was NOT called
mock_lc_fallback.assert_not_called()
2 changes: 1 addition & 1 deletion src/lfx/src/lfx/_assets/component_index.json

Large diffs are not rendered by default.

72 changes: 55 additions & 17 deletions src/lfx/src/lfx/components/processing/structured_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
Output,
TableInput,
)
from lfx.log.logger import logger
from lfx.schema.data import Data
from lfx.schema.dataframe import DataFrame
from lfx.schema.table import EditMode
Expand Down Expand Up @@ -136,30 +137,27 @@ def build_structured_output_base(self):
raise ValueError(msg)

output_model_ = build_model_from_schema(self.output_schema)

output_model = create_model(
schema_name,
__doc__=f"A list of {schema_name}.",
objects=(list[output_model_], Field(description=f"A list of {schema_name}.")), # type: ignore[valid-type]
objects=(
list[output_model_],
Field(
description=f"A list of {schema_name}.", # type: ignore[valid-type]
min_length=1, # help ensure non-empty output
),
),
)

try:
llm_with_structured_output = create_extractor(self.llm, tools=[output_model])
except NotImplementedError as exc:
msg = f"{self.llm.__class__.__name__} does not support structured output."
raise TypeError(msg) from exc

# Tracing config
config_dict = {
"run_name": self.display_name,
"project_name": self.get_project_name(),
"callbacks": self.get_langchain_callbacks(),
}
result = get_chat_result(
runnable=llm_with_structured_output,
system_message=self.system_prompt,
input_value=self.input_value,
config=config_dict,
)
# Generate structured output using Trustcall first, then fallback to Langchain if it fails
result = self._extract_output_with_trustcall(output_model, config_dict)
if result is None:
result = self._extract_output_with_langchain(output_model, config_dict)

# OPTIMIZATION NOTE: Simplified processing based on trustcall response structure
# Handle non-dict responses (shouldn't happen with trustcall, but defensive)
Expand All @@ -173,8 +171,9 @@ def build_structured_output_base(self):

# Convert BaseModel to dict (creates the "objects" key)
first_response = responses[0]
structured_data = first_response.model_dump() if isinstance(first_response, BaseModel) else first_response

structured_data = first_response
if isinstance(first_response, BaseModel):
structured_data = first_response.model_dump()
# Extract the objects array (guaranteed to exist due to our Pydantic model structure)
return structured_data.get("objects", structured_data)

Expand Down Expand Up @@ -204,3 +203,42 @@ def build_structured_dataframe(self) -> DataFrame:
# Multiple outputs - convert to DataFrame directly
return DataFrame(output)
return DataFrame()

def _extract_output_with_trustcall(self, schema: BaseModel, config_dict: dict) -> list[BaseModel] | None:
try:
llm_with_structured_output = create_extractor(self.llm, tools=[schema], tool_choice=schema.__name__)
Comment thread
jordanrfrazier marked this conversation as resolved.
result = get_chat_result(
runnable=llm_with_structured_output,
system_message=self.system_prompt,
input_value=self.input_value,
config=config_dict,
)
except Exception as e: # noqa: BLE001
logger.warning(
f"Trustcall extraction failed, falling back to Langchain: {e} "
"(Note: This may not be an error—some models or configurations do not support tool calling. "
"Falling back is normal in such cases.)"
)
return None
return result or None # langchain fallback is used if error occurs or the result is empty

def _extract_output_with_langchain(self, schema: BaseModel, config_dict: dict) -> list[BaseModel] | None:
try:
llm_with_structured_output = self.llm.with_structured_output(schema)
result = get_chat_result(
runnable=llm_with_structured_output,
system_message=self.system_prompt,
input_value=self.input_value,
config=config_dict,
)
if isinstance(result, BaseModel):
result = result.model_dump()
result = result.get("objects", result)
except Exception as fallback_error:
msg = (
f"Model does not support tool calling (trustcall failed) "
f"and fallback with_structured_output also failed: {fallback_error}"
)
raise ValueError(msg) from fallback_error

return result or None
8 changes: 4 additions & 4 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading