diff --git a/src/backend/base/langflow/initial_setup/starter_projects/News Aggregator.json b/src/backend/base/langflow/initial_setup/starter_projects/News Aggregator.json index 5693b780be02..d1d497ce2b69 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/News Aggregator.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/News Aggregator.json @@ -1765,7 +1765,7 @@ "last_updated": "2025-09-30T16:16:26.172Z", "legacy": false, "metadata": { - "code_hash": "d593d2411385", + "code_hash": "df2a0603fe8f", "dependencies": { "dependencies": [ { @@ -1950,7 +1950,7 @@ "show": true, "title_case": false, "type": "code", - "value": "import json\nfrom collections.abc import AsyncIterator, Iterator\nfrom pathlib import Path\n\nimport orjson\nimport pandas as pd\nfrom fastapi import UploadFile\nfrom fastapi.encoders import jsonable_encoder\n\nfrom lfx.custom import Component\nfrom lfx.inputs import SortableListInput\nfrom lfx.io import DropdownInput, HandleInput, SecretStrInput, StrInput\nfrom lfx.schema import Data, DataFrame, Message\nfrom lfx.services.deps import get_settings_service, get_storage_service, session_scope\nfrom lfx.template.field.base import Output\n\n\nclass SaveToFileComponent(Component):\n display_name = \"Write File\"\n description = \"Save data to local file, AWS S3, or Google Drive in the selected format.\"\n documentation: str = \"https://docs.langflow.org/components-processing#save-file\"\n icon = \"file-text\"\n name = \"SaveToFile\"\n\n # File format options for different storage types\n LOCAL_DATA_FORMAT_CHOICES = [\"csv\", \"excel\", \"json\", \"markdown\"]\n LOCAL_MESSAGE_FORMAT_CHOICES = [\"txt\", \"json\", \"markdown\"]\n AWS_FORMAT_CHOICES = [\n \"txt\",\n \"json\",\n \"csv\",\n \"xml\",\n \"html\",\n \"md\",\n \"yaml\",\n \"log\",\n \"tsv\",\n \"jsonl\",\n \"parquet\",\n \"xlsx\",\n \"zip\",\n ]\n GDRIVE_FORMAT_CHOICES = [\"txt\", \"json\", \"csv\", \"xlsx\", \"slides\", \"docs\", \"jpg\", \"mp3\"]\n\n inputs = [\n # Storage location selection\n SortableListInput(\n name=\"storage_location\",\n display_name=\"Storage Location\",\n placeholder=\"Select Location\",\n info=\"Choose where to save the file.\",\n options=[\n {\"name\": \"Local\", \"icon\": \"hard-drive\"},\n {\"name\": \"AWS\", \"icon\": \"Amazon\"},\n {\"name\": \"Google Drive\", \"icon\": \"google\"},\n ],\n real_time_refresh=True,\n limit=1,\n ),\n # Common inputs\n HandleInput(\n name=\"input\",\n display_name=\"File Content\",\n info=\"The input to save.\",\n dynamic=True,\n input_types=[\"Data\", \"DataFrame\", \"Message\"],\n required=True,\n ),\n StrInput(\n name=\"file_name\",\n display_name=\"File Name\",\n info=\"Name file will be saved as (without extension).\",\n required=True,\n show=False,\n ),\n # Format inputs (dynamic based on storage location)\n DropdownInput(\n name=\"local_format\",\n display_name=\"File Format\",\n options=list(dict.fromkeys(LOCAL_DATA_FORMAT_CHOICES + LOCAL_MESSAGE_FORMAT_CHOICES)),\n info=\"Select the file format for local storage.\",\n value=\"json\",\n show=False,\n ),\n DropdownInput(\n name=\"aws_format\",\n display_name=\"File Format\",\n options=AWS_FORMAT_CHOICES,\n info=\"Select the file format for AWS S3 storage.\",\n value=\"txt\",\n show=False,\n ),\n DropdownInput(\n name=\"gdrive_format\",\n display_name=\"File Format\",\n options=GDRIVE_FORMAT_CHOICES,\n info=\"Select the file format for Google Drive storage.\",\n value=\"txt\",\n show=False,\n ),\n # AWS S3 specific inputs\n SecretStrInput(\n name=\"aws_access_key_id\",\n display_name=\"AWS Access Key ID\",\n info=\"AWS Access key ID.\",\n show=False,\n advanced=True,\n ),\n SecretStrInput(\n name=\"aws_secret_access_key\",\n display_name=\"AWS Secret Key\",\n info=\"AWS Secret Key.\",\n show=False,\n advanced=True,\n ),\n StrInput(\n name=\"bucket_name\",\n display_name=\"S3 Bucket Name\",\n info=\"Enter the name of the S3 bucket.\",\n show=False,\n advanced=True,\n ),\n StrInput(\n name=\"aws_region\",\n display_name=\"AWS Region\",\n info=\"AWS region (e.g., us-east-1, eu-west-1).\",\n show=False,\n advanced=True,\n ),\n StrInput(\n name=\"s3_prefix\",\n display_name=\"S3 Prefix\",\n info=\"Prefix for all files in S3.\",\n show=False,\n advanced=True,\n ),\n # Google Drive specific inputs\n SecretStrInput(\n name=\"service_account_key\",\n display_name=\"GCP Credentials Secret Key\",\n info=\"Your Google Cloud Platform service account JSON key as a secret string (complete JSON content).\",\n show=False,\n advanced=True,\n ),\n StrInput(\n name=\"folder_id\",\n display_name=\"Google Drive Folder ID\",\n info=(\n \"The Google Drive folder ID where the file will be uploaded. \"\n \"The folder must be shared with the service account email.\"\n ),\n show=False,\n advanced=True,\n ),\n ]\n\n outputs = [Output(display_name=\"File Path\", name=\"message\", method=\"save_to_file\")]\n\n def update_build_config(self, build_config, field_value, field_name=None):\n \"\"\"Update build configuration to show/hide fields based on storage location selection.\"\"\"\n if field_name != \"storage_location\":\n return build_config\n\n # Extract selected storage location\n selected = [location[\"name\"] for location in field_value] if isinstance(field_value, list) else []\n\n # Hide all dynamic fields first\n dynamic_fields = [\n \"file_name\", # Common fields (input is always visible)\n \"local_format\",\n \"aws_format\",\n \"gdrive_format\",\n \"aws_access_key_id\",\n \"aws_secret_access_key\",\n \"bucket_name\",\n \"aws_region\",\n \"s3_prefix\",\n \"service_account_key\",\n \"folder_id\",\n ]\n\n for f_name in dynamic_fields:\n if f_name in build_config:\n build_config[f_name][\"show\"] = False\n\n # Show fields based on selected storage location\n if len(selected) == 1:\n location = selected[0]\n\n # Show file_name when any storage location is selected (input is always visible)\n if \"file_name\" in build_config:\n build_config[\"file_name\"][\"show\"] = True\n\n if location == \"Local\":\n if \"local_format\" in build_config:\n build_config[\"local_format\"][\"show\"] = True\n\n elif location == \"AWS\":\n aws_fields = [\n \"aws_format\",\n \"aws_access_key_id\",\n \"aws_secret_access_key\",\n \"bucket_name\",\n \"aws_region\",\n \"s3_prefix\",\n ]\n for f_name in aws_fields:\n if f_name in build_config:\n build_config[f_name][\"show\"] = True\n\n elif location == \"Google Drive\":\n gdrive_fields = [\"gdrive_format\", \"service_account_key\", \"folder_id\"]\n for f_name in gdrive_fields:\n if f_name in build_config:\n build_config[f_name][\"show\"] = True\n\n return build_config\n\n async def save_to_file(self) -> Message:\n \"\"\"Save the input to a file and upload it, returning a confirmation message.\"\"\"\n # Validate inputs\n if not self.file_name:\n msg = \"File name must be provided.\"\n raise ValueError(msg)\n if not self._get_input_type():\n msg = \"Input type is not set.\"\n raise ValueError(msg)\n\n # Get selected storage location\n storage_location = self._get_selected_storage_location()\n if not storage_location:\n msg = \"Storage location must be selected.\"\n raise ValueError(msg)\n\n # Route to appropriate save method based on storage location\n if storage_location == \"Local\":\n return await self._save_to_local()\n if storage_location == \"AWS\":\n return await self._save_to_aws()\n if storage_location == \"Google Drive\":\n return await self._save_to_google_drive()\n msg = f\"Unsupported storage location: {storage_location}\"\n raise ValueError(msg)\n\n def _get_input_type(self) -> str:\n \"\"\"Determine the input type based on the provided input.\"\"\"\n # Use exact type checking (type() is) instead of isinstance() to avoid inheritance issues.\n # Since Message inherits from Data, isinstance(message, Data) would return True for Message objects,\n # causing Message inputs to be incorrectly identified as Data type.\n if type(self.input) is DataFrame:\n return \"DataFrame\"\n if type(self.input) is Message:\n return \"Message\"\n if type(self.input) is Data:\n return \"Data\"\n msg = f\"Unsupported input type: {type(self.input)}\"\n raise ValueError(msg)\n\n def _get_default_format(self) -> str:\n \"\"\"Return the default file format based on input type.\"\"\"\n if self._get_input_type() == \"DataFrame\":\n return \"csv\"\n if self._get_input_type() == \"Data\":\n return \"json\"\n if self._get_input_type() == \"Message\":\n return \"json\"\n return \"json\" # Fallback\n\n def _adjust_file_path_with_format(self, path: Path, fmt: str) -> Path:\n \"\"\"Adjust the file path to include the correct extension.\"\"\"\n file_extension = path.suffix.lower().lstrip(\".\")\n if fmt == \"excel\":\n return Path(f\"{path}.xlsx\").expanduser() if file_extension not in [\"xlsx\", \"xls\"] else path\n return Path(f\"{path}.{fmt}\").expanduser() if file_extension != fmt else path\n\n async def _upload_file(self, file_path: Path) -> None:\n \"\"\"Upload the saved file using the upload_user_file service.\"\"\"\n from langflow.api.v2.files import upload_user_file\n from langflow.services.database.models.user.crud import get_user_by_id\n\n # Ensure the file exists\n if not file_path.exists():\n msg = f\"File not found: {file_path}\"\n raise FileNotFoundError(msg)\n\n # Upload the file\n with file_path.open(\"rb\") as f:\n async with session_scope() as db:\n if not self.user_id:\n msg = \"User ID is required for file saving.\"\n raise ValueError(msg)\n current_user = await get_user_by_id(db, self.user_id)\n\n await upload_user_file(\n file=UploadFile(filename=file_path.name, file=f, size=file_path.stat().st_size),\n session=db,\n current_user=current_user,\n storage_service=get_storage_service(),\n settings_service=get_settings_service(),\n )\n\n def _save_dataframe(self, dataframe: DataFrame, path: Path, fmt: str) -> str:\n \"\"\"Save a DataFrame to the specified file format.\"\"\"\n if fmt == \"csv\":\n dataframe.to_csv(path, index=False)\n elif fmt == \"excel\":\n dataframe.to_excel(path, index=False, engine=\"openpyxl\")\n elif fmt == \"json\":\n dataframe.to_json(path, orient=\"records\", indent=2)\n elif fmt == \"markdown\":\n path.write_text(dataframe.to_markdown(index=False), encoding=\"utf-8\")\n else:\n msg = f\"Unsupported DataFrame format: {fmt}\"\n raise ValueError(msg)\n return f\"DataFrame saved successfully as '{path}'\"\n\n def _save_data(self, data: Data, path: Path, fmt: str) -> str:\n \"\"\"Save a Data object to the specified file format.\"\"\"\n if fmt == \"csv\":\n pd.DataFrame(data.data).to_csv(path, index=False)\n elif fmt == \"excel\":\n pd.DataFrame(data.data).to_excel(path, index=False, engine=\"openpyxl\")\n elif fmt == \"json\":\n path.write_text(\n orjson.dumps(jsonable_encoder(data.data), option=orjson.OPT_INDENT_2).decode(\"utf-8\"), encoding=\"utf-8\"\n )\n elif fmt == \"markdown\":\n path.write_text(pd.DataFrame(data.data).to_markdown(index=False), encoding=\"utf-8\")\n else:\n msg = f\"Unsupported Data format: {fmt}\"\n raise ValueError(msg)\n return f\"Data saved successfully as '{path}'\"\n\n async def _save_message(self, message: Message, path: Path, fmt: str) -> str:\n \"\"\"Save a Message to the specified file format, handling async iterators.\"\"\"\n content = \"\"\n if message.text is None:\n content = \"\"\n elif isinstance(message.text, AsyncIterator):\n async for item in message.text:\n content += str(item) + \" \"\n content = content.strip()\n elif isinstance(message.text, Iterator):\n content = \" \".join(str(item) for item in message.text)\n else:\n content = str(message.text)\n\n if fmt == \"txt\":\n path.write_text(content, encoding=\"utf-8\")\n elif fmt == \"json\":\n path.write_text(json.dumps({\"message\": content}, indent=2), encoding=\"utf-8\")\n elif fmt == \"markdown\":\n path.write_text(f\"**Message:**\\n\\n{content}\", encoding=\"utf-8\")\n else:\n msg = f\"Unsupported Message format: {fmt}\"\n raise ValueError(msg)\n return f\"Message saved successfully as '{path}'\"\n\n def _get_selected_storage_location(self) -> str:\n \"\"\"Get the selected storage location from the SortableListInput.\"\"\"\n if hasattr(self, \"storage_location\") and self.storage_location:\n if isinstance(self.storage_location, list) and len(self.storage_location) > 0:\n return self.storage_location[0].get(\"name\", \"\")\n if isinstance(self.storage_location, dict):\n return self.storage_location.get(\"name\", \"\")\n return \"\"\n\n def _get_file_format_for_location(self, location: str) -> str:\n \"\"\"Get the appropriate file format based on storage location.\"\"\"\n if location == \"Local\":\n return getattr(self, \"local_format\", None) or self._get_default_format()\n if location == \"AWS\":\n return getattr(self, \"aws_format\", \"txt\")\n if location == \"Google Drive\":\n return getattr(self, \"gdrive_format\", \"txt\")\n return self._get_default_format()\n\n async def _save_to_local(self) -> Message:\n \"\"\"Save file to local storage (original functionality).\"\"\"\n file_format = self._get_file_format_for_location(\"Local\")\n\n # Validate file format based on input type\n allowed_formats = (\n self.LOCAL_MESSAGE_FORMAT_CHOICES if self._get_input_type() == \"Message\" else self.LOCAL_DATA_FORMAT_CHOICES\n )\n if file_format not in allowed_formats:\n msg = f\"Invalid file format '{file_format}' for {self._get_input_type()}. Allowed: {allowed_formats}\"\n raise ValueError(msg)\n\n # Prepare file path\n file_path = Path(self.file_name).expanduser()\n if not file_path.parent.exists():\n file_path.parent.mkdir(parents=True, exist_ok=True)\n file_path = self._adjust_file_path_with_format(file_path, file_format)\n\n # Save the input to file based on type\n if self._get_input_type() == \"DataFrame\":\n confirmation = self._save_dataframe(self.input, file_path, file_format)\n elif self._get_input_type() == \"Data\":\n confirmation = self._save_data(self.input, file_path, file_format)\n elif self._get_input_type() == \"Message\":\n confirmation = await self._save_message(self.input, file_path, file_format)\n else:\n msg = f\"Unsupported input type: {self._get_input_type()}\"\n raise ValueError(msg)\n\n # Upload the saved file\n await self._upload_file(file_path)\n\n # Return the final file path and confirmation message\n final_path = Path.cwd() / file_path if not file_path.is_absolute() else file_path\n return Message(text=f\"{confirmation} at {final_path}\")\n\n async def _save_to_aws(self) -> Message:\n \"\"\"Save file to AWS S3 using S3 functionality.\"\"\"\n # Validate AWS credentials\n if not getattr(self, \"aws_access_key_id\", None):\n msg = \"AWS Access Key ID is required for S3 storage\"\n raise ValueError(msg)\n if not getattr(self, \"aws_secret_access_key\", None):\n msg = \"AWS Secret Key is required for S3 storage\"\n raise ValueError(msg)\n if not getattr(self, \"bucket_name\", None):\n msg = \"S3 Bucket Name is required for S3 storage\"\n raise ValueError(msg)\n\n # Use S3 upload functionality\n try:\n import boto3\n except ImportError as e:\n msg = \"boto3 is not installed. Please install it using `uv pip install boto3`.\"\n raise ImportError(msg) from e\n\n # Create S3 client\n client_config = {\n \"aws_access_key_id\": self.aws_access_key_id,\n \"aws_secret_access_key\": self.aws_secret_access_key,\n }\n\n if hasattr(self, \"aws_region\") and self.aws_region:\n client_config[\"region_name\"] = self.aws_region\n\n s3_client = boto3.client(\"s3\", **client_config)\n\n # Extract content\n content = self._extract_content_for_upload()\n file_format = self._get_file_format_for_location(\"AWS\")\n\n # Generate file path\n file_path = f\"{self.file_name}.{file_format}\"\n if hasattr(self, \"s3_prefix\") and self.s3_prefix:\n file_path = f\"{self.s3_prefix.rstrip('/')}/{file_path}\"\n\n # Create temporary file\n import tempfile\n\n with tempfile.NamedTemporaryFile(mode=\"w\", suffix=f\".{file_format}\", delete=False) as temp_file:\n temp_file.write(content)\n temp_file_path = temp_file.name\n\n try:\n # Upload to S3\n s3_client.upload_file(temp_file_path, self.bucket_name, file_path)\n s3_url = f\"s3://{self.bucket_name}/{file_path}\"\n return Message(text=f\"File successfully uploaded to {s3_url}\")\n finally:\n # Clean up temp file\n if Path(temp_file_path).exists():\n Path(temp_file_path).unlink()\n\n async def _save_to_google_drive(self) -> Message:\n \"\"\"Save file to Google Drive using Google Drive functionality.\"\"\"\n # Validate Google Drive credentials\n if not getattr(self, \"service_account_key\", None):\n msg = \"GCP Credentials Secret Key is required for Google Drive storage\"\n raise ValueError(msg)\n if not getattr(self, \"folder_id\", None):\n msg = \"Google Drive Folder ID is required for Google Drive storage\"\n raise ValueError(msg)\n\n # Use Google Drive upload functionality\n try:\n import json\n import tempfile\n\n from google.oauth2 import service_account\n from googleapiclient.discovery import build\n from googleapiclient.http import MediaFileUpload\n except ImportError as e:\n msg = \"Google API client libraries are not installed. Please install them.\"\n raise ImportError(msg) from e\n\n # Parse credentials\n try:\n credentials_dict = json.loads(self.service_account_key)\n except json.JSONDecodeError as e:\n msg = f\"Invalid JSON in service account key: {e!s}\"\n raise ValueError(msg) from e\n\n # Create Google Drive service\n credentials = service_account.Credentials.from_service_account_info(\n credentials_dict, scopes=[\"https://www.googleapis.com/auth/drive.file\"]\n )\n drive_service = build(\"drive\", \"v3\", credentials=credentials)\n\n # Extract content and format\n content = self._extract_content_for_upload()\n file_format = self._get_file_format_for_location(\"Google Drive\")\n\n # Handle special Google Drive formats\n if file_format in [\"slides\", \"docs\"]:\n return await self._save_to_google_apps(drive_service, content, file_format)\n\n # Create temporary file\n file_path = f\"{self.file_name}.{file_format}\"\n with tempfile.NamedTemporaryFile(mode=\"w\", suffix=f\".{file_format}\", delete=False) as temp_file:\n temp_file.write(content)\n temp_file_path = temp_file.name\n\n try:\n # Upload to Google Drive\n file_metadata = {\"name\": file_path, \"parents\": [self.folder_id]}\n media = MediaFileUpload(temp_file_path, resumable=True)\n\n uploaded_file = drive_service.files().create(body=file_metadata, media_body=media, fields=\"id\").execute()\n\n file_id = uploaded_file.get(\"id\")\n file_url = f\"https://drive.google.com/file/d/{file_id}/view\"\n return Message(text=f\"File successfully uploaded to Google Drive: {file_url}\")\n finally:\n # Clean up temp file\n if Path(temp_file_path).exists():\n Path(temp_file_path).unlink()\n\n async def _save_to_google_apps(self, drive_service, content: str, app_type: str) -> Message:\n \"\"\"Save content to Google Apps (Slides or Docs).\"\"\"\n import time\n\n if app_type == \"slides\":\n from googleapiclient.discovery import build\n\n slides_service = build(\"slides\", \"v1\", credentials=drive_service._http.credentials)\n\n file_metadata = {\n \"name\": self.file_name,\n \"mimeType\": \"application/vnd.google-apps.presentation\",\n \"parents\": [self.folder_id],\n }\n\n created_file = drive_service.files().create(body=file_metadata, fields=\"id\").execute()\n presentation_id = created_file[\"id\"]\n\n time.sleep(2) # Wait for file to be available # noqa: ASYNC251\n\n presentation = slides_service.presentations().get(presentationId=presentation_id).execute()\n slide_id = presentation[\"slides\"][0][\"objectId\"]\n\n # Add content to slide\n requests = [\n {\n \"createShape\": {\n \"objectId\": \"TextBox_01\",\n \"shapeType\": \"TEXT_BOX\",\n \"elementProperties\": {\n \"pageObjectId\": slide_id,\n \"size\": {\n \"height\": {\"magnitude\": 3000000, \"unit\": \"EMU\"},\n \"width\": {\"magnitude\": 6000000, \"unit\": \"EMU\"},\n },\n \"transform\": {\n \"scaleX\": 1,\n \"scaleY\": 1,\n \"translateX\": 1000000,\n \"translateY\": 1000000,\n \"unit\": \"EMU\",\n },\n },\n }\n },\n {\"insertText\": {\"objectId\": \"TextBox_01\", \"insertionIndex\": 0, \"text\": content}},\n ]\n\n slides_service.presentations().batchUpdate(\n presentationId=presentation_id, body={\"requests\": requests}\n ).execute()\n file_url = f\"https://docs.google.com/presentation/d/{presentation_id}/edit\"\n\n elif app_type == \"docs\":\n from googleapiclient.discovery import build\n\n docs_service = build(\"docs\", \"v1\", credentials=drive_service._http.credentials)\n\n file_metadata = {\n \"name\": self.file_name,\n \"mimeType\": \"application/vnd.google-apps.document\",\n \"parents\": [self.folder_id],\n }\n\n created_file = drive_service.files().create(body=file_metadata, fields=\"id\").execute()\n document_id = created_file[\"id\"]\n\n time.sleep(2) # Wait for file to be available # noqa: ASYNC251\n\n # Add content to document\n requests = [{\"insertText\": {\"location\": {\"index\": 1}, \"text\": content}}]\n docs_service.documents().batchUpdate(documentId=document_id, body={\"requests\": requests}).execute()\n file_url = f\"https://docs.google.com/document/d/{document_id}/edit\"\n\n return Message(text=f\"File successfully created in Google {app_type.title()}: {file_url}\")\n\n def _extract_content_for_upload(self) -> str:\n \"\"\"Extract content from input for upload to cloud services.\"\"\"\n if self._get_input_type() == \"DataFrame\":\n return self.input.to_csv(index=False)\n if self._get_input_type() == \"Data\":\n if hasattr(self.input, \"data\") and self.input.data:\n if isinstance(self.input.data, dict):\n import json\n\n return json.dumps(self.input.data, indent=2, ensure_ascii=False)\n return str(self.input.data)\n return str(self.input)\n if self._get_input_type() == \"Message\":\n return str(self.input.text) if self.input.text else str(self.input)\n return str(self.input)\n" + "value": "import json\nfrom collections.abc import AsyncIterator, Iterator\nfrom pathlib import Path\n\nimport orjson\nimport pandas as pd\nfrom fastapi import UploadFile\nfrom fastapi.encoders import jsonable_encoder\n\nfrom lfx.custom import Component\nfrom lfx.inputs import SortableListInput\nfrom lfx.io import DropdownInput, HandleInput, SecretStrInput, StrInput\nfrom lfx.schema import Data, DataFrame, Message\nfrom lfx.services.deps import get_settings_service, get_storage_service, session_scope\nfrom lfx.template.field.base import Output\n\n\nclass SaveToFileComponent(Component):\n display_name = \"Write File\"\n description = \"Save data to local file, AWS S3, or Google Drive in the selected format.\"\n documentation: str = \"https://docs.langflow.org/components-processing#save-file\"\n icon = \"file-text\"\n name = \"SaveToFile\"\n\n # File format options for different storage types\n LOCAL_DATA_FORMAT_CHOICES = [\"csv\", \"excel\", \"json\", \"markdown\"]\n LOCAL_MESSAGE_FORMAT_CHOICES = [\"txt\", \"json\", \"markdown\"]\n AWS_FORMAT_CHOICES = [\n \"txt\",\n \"json\",\n \"csv\",\n \"xml\",\n \"html\",\n \"md\",\n \"yaml\",\n \"log\",\n \"tsv\",\n \"jsonl\",\n \"parquet\",\n \"xlsx\",\n \"zip\",\n ]\n GDRIVE_FORMAT_CHOICES = [\"txt\", \"json\", \"csv\", \"xlsx\", \"slides\", \"docs\", \"jpg\", \"mp3\"]\n\n inputs = [\n # Storage location selection\n SortableListInput(\n name=\"storage_location\",\n display_name=\"Storage Location\",\n placeholder=\"Select Location\",\n info=\"Choose where to save the file.\",\n options=[\n {\"name\": \"Local\", \"icon\": \"hard-drive\"},\n {\"name\": \"AWS\", \"icon\": \"Amazon\"},\n {\"name\": \"Google Drive\", \"icon\": \"google\"},\n ],\n real_time_refresh=True,\n limit=1,\n ),\n # Common inputs\n HandleInput(\n name=\"input\",\n display_name=\"File Content\",\n info=\"The input to save.\",\n dynamic=True,\n input_types=[\"Data\", \"DataFrame\", \"Message\"],\n required=True,\n ),\n StrInput(\n name=\"file_name\",\n display_name=\"File Name\",\n info=\"Name file will be saved as (without extension).\",\n required=True,\n show=False,\n tool_mode=True,\n ),\n # Format inputs (dynamic based on storage location)\n DropdownInput(\n name=\"local_format\",\n display_name=\"File Format\",\n options=list(dict.fromkeys(LOCAL_DATA_FORMAT_CHOICES + LOCAL_MESSAGE_FORMAT_CHOICES)),\n info=\"Select the file format for local storage.\",\n value=\"json\",\n show=False,\n ),\n DropdownInput(\n name=\"aws_format\",\n display_name=\"File Format\",\n options=AWS_FORMAT_CHOICES,\n info=\"Select the file format for AWS S3 storage.\",\n value=\"txt\",\n show=False,\n ),\n DropdownInput(\n name=\"gdrive_format\",\n display_name=\"File Format\",\n options=GDRIVE_FORMAT_CHOICES,\n info=\"Select the file format for Google Drive storage.\",\n value=\"txt\",\n show=False,\n ),\n # AWS S3 specific inputs\n SecretStrInput(\n name=\"aws_access_key_id\",\n display_name=\"AWS Access Key ID\",\n info=\"AWS Access key ID.\",\n show=False,\n advanced=True,\n ),\n SecretStrInput(\n name=\"aws_secret_access_key\",\n display_name=\"AWS Secret Key\",\n info=\"AWS Secret Key.\",\n show=False,\n advanced=True,\n ),\n StrInput(\n name=\"bucket_name\",\n display_name=\"S3 Bucket Name\",\n info=\"Enter the name of the S3 bucket.\",\n show=False,\n advanced=True,\n ),\n StrInput(\n name=\"aws_region\",\n display_name=\"AWS Region\",\n info=\"AWS region (e.g., us-east-1, eu-west-1).\",\n show=False,\n advanced=True,\n ),\n StrInput(\n name=\"s3_prefix\",\n display_name=\"S3 Prefix\",\n info=\"Prefix for all files in S3.\",\n show=False,\n advanced=True,\n ),\n # Google Drive specific inputs\n SecretStrInput(\n name=\"service_account_key\",\n display_name=\"GCP Credentials Secret Key\",\n info=\"Your Google Cloud Platform service account JSON key as a secret string (complete JSON content).\",\n show=False,\n advanced=True,\n ),\n StrInput(\n name=\"folder_id\",\n display_name=\"Google Drive Folder ID\",\n info=(\n \"The Google Drive folder ID where the file will be uploaded. \"\n \"The folder must be shared with the service account email.\"\n ),\n show=False,\n advanced=True,\n ),\n ]\n\n outputs = [Output(display_name=\"File Path\", name=\"message\", method=\"save_to_file\")]\n\n def update_build_config(self, build_config, field_value, field_name=None):\n \"\"\"Update build configuration to show/hide fields based on storage location selection.\"\"\"\n if field_name != \"storage_location\":\n return build_config\n\n # Extract selected storage location\n selected = [location[\"name\"] for location in field_value] if isinstance(field_value, list) else []\n\n # Hide all dynamic fields first\n dynamic_fields = [\n \"file_name\", # Common fields (input is always visible)\n \"local_format\",\n \"aws_format\",\n \"gdrive_format\",\n \"aws_access_key_id\",\n \"aws_secret_access_key\",\n \"bucket_name\",\n \"aws_region\",\n \"s3_prefix\",\n \"service_account_key\",\n \"folder_id\",\n ]\n\n for f_name in dynamic_fields:\n if f_name in build_config:\n build_config[f_name][\"show\"] = False\n\n # Show fields based on selected storage location\n if len(selected) == 1:\n location = selected[0]\n\n # Show file_name when any storage location is selected (input is always visible)\n if \"file_name\" in build_config:\n build_config[\"file_name\"][\"show\"] = True\n\n if location == \"Local\":\n if \"local_format\" in build_config:\n build_config[\"local_format\"][\"show\"] = True\n\n elif location == \"AWS\":\n aws_fields = [\n \"aws_format\",\n \"aws_access_key_id\",\n \"aws_secret_access_key\",\n \"bucket_name\",\n \"aws_region\",\n \"s3_prefix\",\n ]\n for f_name in aws_fields:\n if f_name in build_config:\n build_config[f_name][\"show\"] = True\n\n elif location == \"Google Drive\":\n gdrive_fields = [\"gdrive_format\", \"service_account_key\", \"folder_id\"]\n for f_name in gdrive_fields:\n if f_name in build_config:\n build_config[f_name][\"show\"] = True\n\n return build_config\n\n async def save_to_file(self) -> Message:\n \"\"\"Save the input to a file and upload it, returning a confirmation message.\"\"\"\n # Validate inputs\n if not self.file_name:\n msg = \"File name must be provided.\"\n raise ValueError(msg)\n if not self._get_input_type():\n msg = \"Input type is not set.\"\n raise ValueError(msg)\n\n # Get selected storage location\n storage_location = self._get_selected_storage_location()\n if not storage_location:\n msg = \"Storage location must be selected.\"\n raise ValueError(msg)\n\n # Route to appropriate save method based on storage location\n if storage_location == \"Local\":\n return await self._save_to_local()\n if storage_location == \"AWS\":\n return await self._save_to_aws()\n if storage_location == \"Google Drive\":\n return await self._save_to_google_drive()\n msg = f\"Unsupported storage location: {storage_location}\"\n raise ValueError(msg)\n\n def _get_input_type(self) -> str:\n \"\"\"Determine the input type based on the provided input.\"\"\"\n # Use exact type checking (type() is) instead of isinstance() to avoid inheritance issues.\n # Since Message inherits from Data, isinstance(message, Data) would return True for Message objects,\n # causing Message inputs to be incorrectly identified as Data type.\n if type(self.input) is DataFrame:\n return \"DataFrame\"\n if type(self.input) is Message:\n return \"Message\"\n if type(self.input) is Data:\n return \"Data\"\n msg = f\"Unsupported input type: {type(self.input)}\"\n raise ValueError(msg)\n\n def _get_default_format(self) -> str:\n \"\"\"Return the default file format based on input type.\"\"\"\n if self._get_input_type() == \"DataFrame\":\n return \"csv\"\n if self._get_input_type() == \"Data\":\n return \"json\"\n if self._get_input_type() == \"Message\":\n return \"json\"\n return \"json\" # Fallback\n\n def _adjust_file_path_with_format(self, path: Path, fmt: str) -> Path:\n \"\"\"Adjust the file path to include the correct extension.\"\"\"\n file_extension = path.suffix.lower().lstrip(\".\")\n if fmt == \"excel\":\n return Path(f\"{path}.xlsx\").expanduser() if file_extension not in [\"xlsx\", \"xls\"] else path\n return Path(f\"{path}.{fmt}\").expanduser() if file_extension != fmt else path\n\n async def _upload_file(self, file_path: Path) -> None:\n \"\"\"Upload the saved file using the upload_user_file service.\"\"\"\n from langflow.api.v2.files import upload_user_file\n from langflow.services.database.models.user.crud import get_user_by_id\n\n # Ensure the file exists\n if not file_path.exists():\n msg = f\"File not found: {file_path}\"\n raise FileNotFoundError(msg)\n\n # Upload the file\n with file_path.open(\"rb\") as f:\n async with session_scope() as db:\n if not self.user_id:\n msg = \"User ID is required for file saving.\"\n raise ValueError(msg)\n current_user = await get_user_by_id(db, self.user_id)\n\n await upload_user_file(\n file=UploadFile(filename=file_path.name, file=f, size=file_path.stat().st_size),\n session=db,\n current_user=current_user,\n storage_service=get_storage_service(),\n settings_service=get_settings_service(),\n )\n\n def _save_dataframe(self, dataframe: DataFrame, path: Path, fmt: str) -> str:\n \"\"\"Save a DataFrame to the specified file format.\"\"\"\n if fmt == \"csv\":\n dataframe.to_csv(path, index=False)\n elif fmt == \"excel\":\n dataframe.to_excel(path, index=False, engine=\"openpyxl\")\n elif fmt == \"json\":\n dataframe.to_json(path, orient=\"records\", indent=2)\n elif fmt == \"markdown\":\n path.write_text(dataframe.to_markdown(index=False), encoding=\"utf-8\")\n else:\n msg = f\"Unsupported DataFrame format: {fmt}\"\n raise ValueError(msg)\n return f\"DataFrame saved successfully as '{path}'\"\n\n def _save_data(self, data: Data, path: Path, fmt: str) -> str:\n \"\"\"Save a Data object to the specified file format.\"\"\"\n if fmt == \"csv\":\n pd.DataFrame(data.data).to_csv(path, index=False)\n elif fmt == \"excel\":\n pd.DataFrame(data.data).to_excel(path, index=False, engine=\"openpyxl\")\n elif fmt == \"json\":\n path.write_text(\n orjson.dumps(jsonable_encoder(data.data), option=orjson.OPT_INDENT_2).decode(\"utf-8\"), encoding=\"utf-8\"\n )\n elif fmt == \"markdown\":\n path.write_text(pd.DataFrame(data.data).to_markdown(index=False), encoding=\"utf-8\")\n else:\n msg = f\"Unsupported Data format: {fmt}\"\n raise ValueError(msg)\n return f\"Data saved successfully as '{path}'\"\n\n async def _save_message(self, message: Message, path: Path, fmt: str) -> str:\n \"\"\"Save a Message to the specified file format, handling async iterators.\"\"\"\n content = \"\"\n if message.text is None:\n content = \"\"\n elif isinstance(message.text, AsyncIterator):\n async for item in message.text:\n content += str(item) + \" \"\n content = content.strip()\n elif isinstance(message.text, Iterator):\n content = \" \".join(str(item) for item in message.text)\n else:\n content = str(message.text)\n\n if fmt == \"txt\":\n path.write_text(content, encoding=\"utf-8\")\n elif fmt == \"json\":\n path.write_text(json.dumps({\"message\": content}, indent=2), encoding=\"utf-8\")\n elif fmt == \"markdown\":\n path.write_text(f\"**Message:**\\n\\n{content}\", encoding=\"utf-8\")\n else:\n msg = f\"Unsupported Message format: {fmt}\"\n raise ValueError(msg)\n return f\"Message saved successfully as '{path}'\"\n\n def _get_selected_storage_location(self) -> str:\n \"\"\"Get the selected storage location from the SortableListInput.\"\"\"\n if hasattr(self, \"storage_location\") and self.storage_location:\n if isinstance(self.storage_location, list) and len(self.storage_location) > 0:\n return self.storage_location[0].get(\"name\", \"\")\n if isinstance(self.storage_location, dict):\n return self.storage_location.get(\"name\", \"\")\n return \"\"\n\n def _get_file_format_for_location(self, location: str) -> str:\n \"\"\"Get the appropriate file format based on storage location.\"\"\"\n if location == \"Local\":\n return getattr(self, \"local_format\", None) or self._get_default_format()\n if location == \"AWS\":\n return getattr(self, \"aws_format\", \"txt\")\n if location == \"Google Drive\":\n return getattr(self, \"gdrive_format\", \"txt\")\n return self._get_default_format()\n\n async def _save_to_local(self) -> Message:\n \"\"\"Save file to local storage (original functionality).\"\"\"\n file_format = self._get_file_format_for_location(\"Local\")\n\n # Validate file format based on input type\n allowed_formats = (\n self.LOCAL_MESSAGE_FORMAT_CHOICES if self._get_input_type() == \"Message\" else self.LOCAL_DATA_FORMAT_CHOICES\n )\n if file_format not in allowed_formats:\n msg = f\"Invalid file format '{file_format}' for {self._get_input_type()}. Allowed: {allowed_formats}\"\n raise ValueError(msg)\n\n # Prepare file path\n file_path = Path(self.file_name).expanduser()\n if not file_path.parent.exists():\n file_path.parent.mkdir(parents=True, exist_ok=True)\n file_path = self._adjust_file_path_with_format(file_path, file_format)\n\n # Save the input to file based on type\n if self._get_input_type() == \"DataFrame\":\n confirmation = self._save_dataframe(self.input, file_path, file_format)\n elif self._get_input_type() == \"Data\":\n confirmation = self._save_data(self.input, file_path, file_format)\n elif self._get_input_type() == \"Message\":\n confirmation = await self._save_message(self.input, file_path, file_format)\n else:\n msg = f\"Unsupported input type: {self._get_input_type()}\"\n raise ValueError(msg)\n\n # Upload the saved file\n await self._upload_file(file_path)\n\n # Return the final file path and confirmation message\n final_path = Path.cwd() / file_path if not file_path.is_absolute() else file_path\n return Message(text=f\"{confirmation} at {final_path}\")\n\n async def _save_to_aws(self) -> Message:\n \"\"\"Save file to AWS S3 using S3 functionality.\"\"\"\n # Validate AWS credentials\n if not getattr(self, \"aws_access_key_id\", None):\n msg = \"AWS Access Key ID is required for S3 storage\"\n raise ValueError(msg)\n if not getattr(self, \"aws_secret_access_key\", None):\n msg = \"AWS Secret Key is required for S3 storage\"\n raise ValueError(msg)\n if not getattr(self, \"bucket_name\", None):\n msg = \"S3 Bucket Name is required for S3 storage\"\n raise ValueError(msg)\n\n # Use S3 upload functionality\n try:\n import boto3\n except ImportError as e:\n msg = \"boto3 is not installed. Please install it using `uv pip install boto3`.\"\n raise ImportError(msg) from e\n\n # Create S3 client\n client_config = {\n \"aws_access_key_id\": self.aws_access_key_id,\n \"aws_secret_access_key\": self.aws_secret_access_key,\n }\n\n if hasattr(self, \"aws_region\") and self.aws_region:\n client_config[\"region_name\"] = self.aws_region\n\n s3_client = boto3.client(\"s3\", **client_config)\n\n # Extract content\n content = self._extract_content_for_upload()\n file_format = self._get_file_format_for_location(\"AWS\")\n\n # Generate file path\n file_path = f\"{self.file_name}.{file_format}\"\n if hasattr(self, \"s3_prefix\") and self.s3_prefix:\n file_path = f\"{self.s3_prefix.rstrip('/')}/{file_path}\"\n\n # Create temporary file\n import tempfile\n\n with tempfile.NamedTemporaryFile(mode=\"w\", suffix=f\".{file_format}\", delete=False) as temp_file:\n temp_file.write(content)\n temp_file_path = temp_file.name\n\n try:\n # Upload to S3\n s3_client.upload_file(temp_file_path, self.bucket_name, file_path)\n s3_url = f\"s3://{self.bucket_name}/{file_path}\"\n return Message(text=f\"File successfully uploaded to {s3_url}\")\n finally:\n # Clean up temp file\n if Path(temp_file_path).exists():\n Path(temp_file_path).unlink()\n\n async def _save_to_google_drive(self) -> Message:\n \"\"\"Save file to Google Drive using Google Drive functionality.\"\"\"\n # Validate Google Drive credentials\n if not getattr(self, \"service_account_key\", None):\n msg = \"GCP Credentials Secret Key is required for Google Drive storage\"\n raise ValueError(msg)\n if not getattr(self, \"folder_id\", None):\n msg = \"Google Drive Folder ID is required for Google Drive storage\"\n raise ValueError(msg)\n\n # Use Google Drive upload functionality\n try:\n import json\n import tempfile\n\n from google.oauth2 import service_account\n from googleapiclient.discovery import build\n from googleapiclient.http import MediaFileUpload\n except ImportError as e:\n msg = \"Google API client libraries are not installed. Please install them.\"\n raise ImportError(msg) from e\n\n # Parse credentials\n try:\n credentials_dict = json.loads(self.service_account_key)\n except json.JSONDecodeError as e:\n msg = f\"Invalid JSON in service account key: {e!s}\"\n raise ValueError(msg) from e\n\n # Create Google Drive service\n credentials = service_account.Credentials.from_service_account_info(\n credentials_dict, scopes=[\"https://www.googleapis.com/auth/drive.file\"]\n )\n drive_service = build(\"drive\", \"v3\", credentials=credentials)\n\n # Extract content and format\n content = self._extract_content_for_upload()\n file_format = self._get_file_format_for_location(\"Google Drive\")\n\n # Handle special Google Drive formats\n if file_format in [\"slides\", \"docs\"]:\n return await self._save_to_google_apps(drive_service, content, file_format)\n\n # Create temporary file\n file_path = f\"{self.file_name}.{file_format}\"\n with tempfile.NamedTemporaryFile(mode=\"w\", suffix=f\".{file_format}\", delete=False) as temp_file:\n temp_file.write(content)\n temp_file_path = temp_file.name\n\n try:\n # Upload to Google Drive\n file_metadata = {\"name\": file_path, \"parents\": [self.folder_id]}\n media = MediaFileUpload(temp_file_path, resumable=True)\n\n uploaded_file = drive_service.files().create(body=file_metadata, media_body=media, fields=\"id\").execute()\n\n file_id = uploaded_file.get(\"id\")\n file_url = f\"https://drive.google.com/file/d/{file_id}/view\"\n return Message(text=f\"File successfully uploaded to Google Drive: {file_url}\")\n finally:\n # Clean up temp file\n if Path(temp_file_path).exists():\n Path(temp_file_path).unlink()\n\n async def _save_to_google_apps(self, drive_service, content: str, app_type: str) -> Message:\n \"\"\"Save content to Google Apps (Slides or Docs).\"\"\"\n import time\n\n if app_type == \"slides\":\n from googleapiclient.discovery import build\n\n slides_service = build(\"slides\", \"v1\", credentials=drive_service._http.credentials)\n\n file_metadata = {\n \"name\": self.file_name,\n \"mimeType\": \"application/vnd.google-apps.presentation\",\n \"parents\": [self.folder_id],\n }\n\n created_file = drive_service.files().create(body=file_metadata, fields=\"id\").execute()\n presentation_id = created_file[\"id\"]\n\n time.sleep(2) # Wait for file to be available # noqa: ASYNC251\n\n presentation = slides_service.presentations().get(presentationId=presentation_id).execute()\n slide_id = presentation[\"slides\"][0][\"objectId\"]\n\n # Add content to slide\n requests = [\n {\n \"createShape\": {\n \"objectId\": \"TextBox_01\",\n \"shapeType\": \"TEXT_BOX\",\n \"elementProperties\": {\n \"pageObjectId\": slide_id,\n \"size\": {\n \"height\": {\"magnitude\": 3000000, \"unit\": \"EMU\"},\n \"width\": {\"magnitude\": 6000000, \"unit\": \"EMU\"},\n },\n \"transform\": {\n \"scaleX\": 1,\n \"scaleY\": 1,\n \"translateX\": 1000000,\n \"translateY\": 1000000,\n \"unit\": \"EMU\",\n },\n },\n }\n },\n {\"insertText\": {\"objectId\": \"TextBox_01\", \"insertionIndex\": 0, \"text\": content}},\n ]\n\n slides_service.presentations().batchUpdate(\n presentationId=presentation_id, body={\"requests\": requests}\n ).execute()\n file_url = f\"https://docs.google.com/presentation/d/{presentation_id}/edit\"\n\n elif app_type == \"docs\":\n from googleapiclient.discovery import build\n\n docs_service = build(\"docs\", \"v1\", credentials=drive_service._http.credentials)\n\n file_metadata = {\n \"name\": self.file_name,\n \"mimeType\": \"application/vnd.google-apps.document\",\n \"parents\": [self.folder_id],\n }\n\n created_file = drive_service.files().create(body=file_metadata, fields=\"id\").execute()\n document_id = created_file[\"id\"]\n\n time.sleep(2) # Wait for file to be available # noqa: ASYNC251\n\n # Add content to document\n requests = [{\"insertText\": {\"location\": {\"index\": 1}, \"text\": content}}]\n docs_service.documents().batchUpdate(documentId=document_id, body={\"requests\": requests}).execute()\n file_url = f\"https://docs.google.com/document/d/{document_id}/edit\"\n\n return Message(text=f\"File successfully created in Google {app_type.title()}: {file_url}\")\n\n def _extract_content_for_upload(self) -> str:\n \"\"\"Extract content from input for upload to cloud services.\"\"\"\n if self._get_input_type() == \"DataFrame\":\n return self.input.to_csv(index=False)\n if self._get_input_type() == \"Data\":\n if hasattr(self.input, \"data\") and self.input.data:\n if isinstance(self.input.data, dict):\n import json\n\n return json.dumps(self.input.data, indent=2, ensure_ascii=False)\n return str(self.input.data)\n return str(self.input)\n if self._get_input_type() == \"Message\":\n return str(self.input.text) if self.input.text else str(self.input)\n return str(self.input)\n" }, "file_name": { "_input_type": "StrInput", diff --git a/src/lfx/src/lfx/base/data/base_file.py b/src/lfx/src/lfx/base/data/base_file.py index 4fe7eb8cae85..77aa2f875e40 100644 --- a/src/lfx/src/lfx/base/data/base_file.py +++ b/src/lfx/src/lfx/base/data/base_file.py @@ -130,6 +130,7 @@ def __init__(self, *args, **kwargs): required=False, list=True, value=[], + tool_mode=True, ), HandleInput( name="file_path", diff --git a/src/lfx/src/lfx/components/data/save_file.py b/src/lfx/src/lfx/components/data/save_file.py index 8e3811d249b7..24353d86485e 100644 --- a/src/lfx/src/lfx/components/data/save_file.py +++ b/src/lfx/src/lfx/components/data/save_file.py @@ -72,6 +72,7 @@ class SaveToFileComponent(Component): info="Name file will be saved as (without extension).", required=True, show=False, + tool_mode=True, ), # Format inputs (dynamic based on storage location) DropdownInput( diff --git a/src/lfx/src/lfx/inputs/inputs.py b/src/lfx/src/lfx/inputs/inputs.py index 76937e38bc8b..ab9d9ea0933a 100644 --- a/src/lfx/src/lfx/inputs/inputs.py +++ b/src/lfx/src/lfx/inputs/inputs.py @@ -617,7 +617,7 @@ def validate_value(cls, v: Any, _info): return v -class FileInput(BaseInputMixin, ListableInputMixin, FileMixin, MetadataTraceMixin): +class FileInput(BaseInputMixin, ListableInputMixin, FileMixin, MetadataTraceMixin, ToolModeMixin): """Represents a file field. This class represents a file input and provides functionality for handling file values.