diff --git a/flatfilers/sandbox/src/index.ts b/flatfilers/sandbox/src/index.ts index 8ed867929..12639a9ff 100644 --- a/flatfilers/sandbox/src/index.ts +++ b/flatfilers/sandbox/src/index.ts @@ -1,6 +1,117 @@ import type { FlatfileListener } from '@flatfile/listener' +import { rssImport } from '@flatfile/plugin-import-rss' +import { configureSpace } from '@flatfile/plugin-space-configure' import { MarkdownExtractor } from '@flatfile/plugin-markdown-extractor' export default async function (listener: FlatfileListener) { + listener.use( + rssImport([ + { + sheetSlug: 'rss-feed-1', + rssFeedUrl: 'http://rss.cnn.com/rss/money_topstories.rss', + }, + { + sheetSlug: 'rss-feed-2', + rssFeedUrl: 'http://rss.cnn.com/rss/money_news_companies.rss', + }, + ]) + ) + listener.use( + configureSpace({ + workbooks: [ + { + name: 'Sandbox', + sheets: [ + { + name: 'RSS Feed One', + slug: 'rss-feed-1', + fields: [ + { + key: 'title', + type: 'string', + label: 'Title', + }, + { + key: 'link', + type: 'string', + label: 'Link', + }, + { + key: 'pubDate', + type: 'string', + label: 'Pub Date', + }, + { + key: 'content', + type: 'string', + label: 'Content', + }, + { + key: 'guid', + type: 'string', + label: 'GUID', + }, + ], + actions: [ + { + operation: 'importRSSFeed', + label: 'Import RSS Feed', + description: 'Import data from an RSS feed into the workbook', + primary: true, + icon: 'rss_feed', + tooltip: 'Click to import data from an RSS feed', + mode: 'foreground', + }, + ], + }, + { + name: 'RSS Feed Two', + slug: 'rss-feed-2', + fields: [ + { + key: 'title', + type: 'string', + label: 'Title', + }, + { + key: 'link', + type: 'string', + label: 'Link', + }, + { + key: 'pubDate', + type: 'string', + label: 'Pub Date', + }, + { + key: 'content', + type: 'string', + label: 'Content', + }, + { + key: 'guid', + type: 'string', + label: 'GUID', + }, + ], + actions: [ + { + operation: 'importRSSFeed', + label: 'Import RSS Feed', + description: 'Import data from an RSS feed into the workbook', + primary: true, + icon: 'rss_feed', + tooltip: 'Click to import data from an RSS feed', + mode: 'foreground', + }, + ], + }, + ], + }, + ], + }) + ) + listener.use(MarkdownExtractor()) + } diff --git a/import/rss/README.MD b/import/rss/README.MD new file mode 100644 index 000000000..1df1fe135 --- /dev/null +++ b/import/rss/README.MD @@ -0,0 +1,122 @@ + + +The `@flatfile/plugin-import-rss` plugin automates the process of importing RSS feed data into Flatfile Sheets. It listens for and responds to job:ready events with the specified operation type, fetching RSS feed content and populating Flatfile sheets with the retrieved data. This plugin supports multiple RSS feeds and provides flexible configuration options. + +**Event Type:** +`job:ready` + +**Supported RSS feed items:** +title, link, pubDate, content, guid + + + +> When embedding Flatfile, this plugin should be deployed in a server-side listener. [Learn more](https://flatfile.com/docs/orchestration/listeners#listener-types) + +The `@flatfile/plugin-import-rss` plugin allows you to easily import RSS feed data into your Flatfile Sheets. It automates the process of fetching RSS content and populating Flatfile Sheets with the retrieved data. + +## Parameters + +The `rssImport` function accepts a configuration object with the following properties: + +- `operation` (string): The operation name to listen for in the job:ready event. +- `feeds` (array): An array of feed configurations, each containing: + - `sheetSlug` (string): The slug of the sheet to import RSS data into. + - `rssFeedUrl` (string): The URL of the RSS feed to import data from. + +## Usage + +### Installation + +To install the plugin, run the following command: + +```bash +npm install @flatfile/plugin-import-rss +``` + +### JavaScript + +```javascript +import { FlatfileListener } from '@flatfile/listener'; +import { rssImport } from "@flatfile/plugin-import-rss"; + +const listener = new FlatfileListener(); + +listener.use( + rssImport({ + operation: "importRSSFeed", + feeds: [ + { + sheetSlug: "rss_feed_data", + rssFeedUrl: "https://example.com/rss-feed" + } + ] + }) +); + +listener.mount(); +``` + +### TypeScript + +```typescript +import { rssImport, RSSImportConfig } from "@flatfile/plugin-import-rss"; + +listener.use(rssImport({ + operation: "importRSSFeed", + feeds: [ + { + sheetSlug: "rss_feed_data", + rssFeedUrl: "https://example.com/rss-feed" + } + ] +})); +``` + +## Example Usage + +Here's an example of how to use the plugin with multiple RSS feeds: + +```typescript listener.ts +import type { FlatfileListener } from '@flatfile/listener'; +import { rssImport } from "@flatfile/plugin-import-rss"; + +export default function (listener: FlatfileListener) { + listener.use( + rssImport({ + operation: "importRSSFeed", + feeds: [ + { + sheetSlug: "tech_news", + rssFeedUrl: "https://techcrunch.com/feed/" + }, + { + sheetSlug: "world_news", + rssFeedUrl: "https://rss.nytimes.com/services/xml/rss/nyt/World.xml" + } + ] + }) + ); +} +``` + +## API Calls + +The plugin uses the following Flatfile API calls: + +- `api.sheets.get` +- `api.jobs.ack` +- `api.jobs.complete` +- `api.jobs.fail` +- `api.records.insert` + +## Contributing + +Contributions to this plugin are welcome! Please follow these steps: + +1. Fork the repository +2. Create a new branch for your feature or bug fix +3. Make your changes and commit them with clear, descriptive messages +4. Push your changes to your fork +5. Submit a pull request with a clear description of your changes + +Please ensure your code adheres to the existing style and includes appropriate tests. \ No newline at end of file diff --git a/plugins/markdown-extractor/jest.config.js b/import/rss/jest.config.js similarity index 100% rename from plugins/markdown-extractor/jest.config.js rename to import/rss/jest.config.js diff --git a/import/rss/metadata.json b/import/rss/metadata.json new file mode 100644 index 000000000..2dba58441 --- /dev/null +++ b/import/rss/metadata.json @@ -0,0 +1,84 @@ +{ + "timestamp": "2024-09-25T16-37-16-776Z", + "task": "Create a RSS Feed Import Flatfile Action:\n - Implement a custom action to import new entries from an RSS feed into a Flatfile Sheet\n - Allow users to specify the RSS feed URL and update frequency\n - Parse the RSS feed and map feed items to appropriate sheet columns\n - Implement deduplication to avoid importing duplicate entries\n - Provide options for handling media enclosures and attachments\n - Allow users to configure filters for importing specific types of content\n - Implement error handling for feed parsing and network issues\n - Generate a report of newly imported entries and any import failures\n - Optionally, allow scheduling of regular imports", + "summary": "This code implements a Flatfile Action for importing RSS feed data, including error handling, scheduling, and reporting. It uses the Flatfile Listener to handle events, parses RSS feeds, maps data to sheet columns, generates reports, and schedules regular imports.", + "steps": [ + [ + "First, let's create a basic structure for our RSS Feed Import Action using Flatfile's Action framework.\n", + "#E1", + "PineconeAssistant", + "Example of a Flatfile Action structure for RSS Feed Import", + "Plan: First, let's create a basic structure for our RSS Feed Import Action using Flatfile's Action framework.\n#E1 = PineconeAssistant[Example of a Flatfile Action structure for RSS Feed Import]" + ], + [ + "Now, let's implement the main logic for parsing the RSS feed and mapping items to sheet columns.\n", + "#E2", + "PineconeAssistant", + "Example code for parsing RSS feed and mapping to Flatfile sheet columns", + "Plan: Now, let's implement the main logic for parsing the RSS feed and mapping items to sheet columns.\n#E2 = PineconeAssistant[Example code for parsing RSS feed and mapping to Flatfile sheet columns]" + ], + [ + "We need to implement deduplication to avoid importing duplicate entries. Let's use Flatfile's Record Hook for this.\n", + "#E3", + "PineconeAssistant", + "Example of using Record Hook in Flatfile for deduplication", + "Plan: We need to implement deduplication to avoid importing duplicate entries. Let's use Flatfile's Record Hook for this.\n#E3 = PineconeAssistant[Example of using Record Hook in Flatfile for deduplication]" + ], + [ + "Let's add functionality to handle media enclosures and attachments from the RSS feed.\n", + "#E4", + "PineconeAssistant", + "Example of handling media enclosures and attachments in Flatfile", + "Plan: Let's add functionality to handle media enclosures and attachments from the RSS feed.\n#E4 = PineconeAssistant[Example of handling media enclosures and attachments in Flatfile]" + ], + [ + "Implement user-configurable filters for importing specific types of content.\n", + "#E5", + "PineconeAssistant", + "Example of implementing content filters in Flatfile Action", + "Plan: Implement user-configurable filters for importing specific types of content.\n#E5 = PineconeAssistant[Example of implementing content filters in Flatfile Action]" + ], + [ + "Add error handling for feed parsing and network issues.\n", + "#E6", + "PineconeAssistant", + "Example of error handling in Flatfile Action for RSS feed parsing and network issues", + "Plan: Add error handling for feed parsing and network issues.\n#E6 = PineconeAssistant[Example of error handling in Flatfile Action for RSS feed parsing and network issues]" + ], + [ + "Generate a report of newly imported entries and any import failures.\n", + "#E7", + "PineconeAssistant", + "Example of generating import report in Flatfile Action", + "Plan: Generate a report of newly imported entries and any import failures.\n#E7 = PineconeAssistant[Example of generating import report in Flatfile Action]" + ], + [ + "Implement scheduling functionality for regular imports.\n", + "#E8", + "PineconeAssistant", + "Example of implementing scheduled imports in Flatfile", + "Plan: Implement scheduling functionality for regular imports.\n#E8 = PineconeAssistant[Example of implementing scheduled imports in Flatfile]" + ], + [ + "Combine all the components into a complete RSS Feed Import Action.\n", + "#E9", + "LLM", + "Combine the following components into a complete Flatfile Action for RSS Feed Import: #E1, #E2, #E3, #E4, #E5, #E6, #E7, #E8. Ensure all imports are correctly included and the code is valid.", + "Plan: Combine all the components into a complete RSS Feed Import Action.\n#E9 = LLM[Combine the following components into a complete Flatfile Action for RSS Feed Import: #E1, #E2, #E3, #E4, #E5, #E6, #E7, #E8. Ensure all imports are correctly included and the code is valid.]" + ], + [ + "Validate the final Action code, checking for unused imports and ensuring all Event Topics are valid.\n", + "#E10", + "PineconeAssistant", + "Validate the following Flatfile Action code, remove unused imports, and confirm all Event Topics are valid: #E9", + "Plan: Validate the final Action code, checking for unused imports and ensuring all Event Topics are valid.\n#E10 = PineconeAssistant[Validate the following Flatfile Action code, remove unused imports, and confirm all Event Topics are valid: #E9]" + ] + ], + "metrics": { + "tokens": { + "plan": 4763, + "state": 5988, + "total": 10751 + } + } +} \ No newline at end of file diff --git a/import/rss/package.json b/import/rss/package.json new file mode 100644 index 000000000..3d52ce5ae --- /dev/null +++ b/import/rss/package.json @@ -0,0 +1,72 @@ +{ + "name": "@flatfile/plugin-import-rss", + "version": "0.0.0", + "url": "https://github.com/FlatFilers/flatfile-plugins/tree/main/import/rss", + "description": "A Flatfile plugin for importing RSS feed data", + "registryMetadata": { + "category": "import" + }, + "engines": { + "node": ">= 16" + }, + "browserslist": [ + "> 0.5%", + "last 2 versions", + "not dead" + ], + "browser": { + "./dist/index.cjs": "./dist/index.browser.cjs", + "./dist/index.mjs": "./dist/index.browser.mjs" + }, + "exports": { + "types": "./dist/index.d.ts", + "node": { + "import": "./dist/index.mjs", + "require": "./dist/index.cjs" + }, + "browser": { + "require": "./dist/index.browser.cjs", + "import": "./dist/index.browser.mjs" + }, + "default": "./dist/index.mjs" + }, + "main": "./dist/index.cjs", + "module": "./dist/index.mjs", + "source": "./src/index.ts", + "types": "./dist/index.d.ts", + "files": [ + "dist/**" + ], + "scripts": { + "build": "rollup -c", + "build:watch": "rollup -c --watch", + "build:prod": "NODE_ENV=production rollup -c", + "check": "tsc ./**/*.ts --noEmit --esModuleInterop", + "test": "jest src/*.spec.ts --detectOpenHandles", + "test:unit": "jest src/*.spec.ts --testPathIgnorePatterns=.*\\.e2e\\.spec\\.ts$ --detectOpenHandles", + "test:e2e": "jest src/*.e2e.spec.ts --detectOpenHandles" + }, + "keywords": [ + "flatfile-plugins", + "category-import" + ], + "author": "Flatfile, Inc.", + "repository": { + "type": "git", + "url": "https://github.com/FlatFilers/flatfile-plugins.git", + "directory": "import/rss" + }, + "license": "ISC", + "dependencies": { + "@flatfile/api": "^1.9.19", + "@flatfile/util-common": "^1.4.0", + "axios": "^1.7.7", + "rss-parser": "^3.13.0" + }, + "peerDependencies": { + "@flatfile/listener": "^1.0.1" + }, + "devDependencies": { + "@flatfile/rollup-config": "^0.1.1" + } +} \ No newline at end of file diff --git a/import/rss/rollup.config.mjs b/import/rss/rollup.config.mjs new file mode 100644 index 000000000..fafa813c6 --- /dev/null +++ b/import/rss/rollup.config.mjs @@ -0,0 +1,5 @@ +import { buildConfig } from '@flatfile/rollup-config' + +const config = buildConfig({}) + +export default config diff --git a/import/rss/src/import.rss.plugin.ts b/import/rss/src/import.rss.plugin.ts new file mode 100644 index 000000000..676784058 --- /dev/null +++ b/import/rss/src/import.rss.plugin.ts @@ -0,0 +1,50 @@ +import { FlatfileClient } from '@flatfile/api' +import type { FlatfileEvent, FlatfileListener } from '@flatfile/listener' +import { mapToSheetColumns, parseRSSFeed } from './import.rss.utils' + +const api = new FlatfileClient() + +export interface RSSImportConfig { + operation: string + feeds: { + sheetSlug: string + rssFeedUrl: string + }[] +} + +export function rssImport(config: RSSImportConfig) { + return (listener: FlatfileListener) => { + listener.on( + 'job:ready', + { job: `sheet:${config.operation}` }, + async (event: FlatfileEvent) => { + const { jobId, sheetId } = event.context + try { + const { data: sheet } = await api.sheets.get(sheetId) + + const sheetConfig = config.feeds.find( + (c) => c.sheetSlug === sheet.slug + ) + + await api.jobs.ack(jobId, { + info: 'Starting job to import RSS feed data', + progress: 10, + }) + + const records = await parseRSSFeed(sheetConfig!.rssFeedUrl) + await mapToSheetColumns(sheetId, records) + + await api.jobs.complete(jobId, { + outcome: { message: 'RSS feed data imported successfully' }, + }) + } catch (error) { + await api.jobs.fail(jobId, { + outcome: { + message: `Failed to import RSS feed data: ${error.message}`, + }, + }) + } + } + ) + } +} diff --git a/import/rss/src/import.rss.utils.ts b/import/rss/src/import.rss.utils.ts new file mode 100644 index 000000000..09d145ea7 --- /dev/null +++ b/import/rss/src/import.rss.utils.ts @@ -0,0 +1,50 @@ +import { FlatfileClient } from '@flatfile/api' +import { logError, logInfo } from '@flatfile/util-common' +import RSSParser from 'rss-parser' + +const parser = new RSSParser() + +export async function parseRSSFeed(url: string) { + try { + const feed = await parser.parseURL(url) + return feed.items.map((item) => ({ + title: item.title, + link: item.link, + pubDate: item.pubDate, + content: item.content, + guid: item.guid, + })) + } catch (error) { + logError( + '@flatfile/plugin-rss-import', + `Error parsing RSS feed: ${error.message}` + ) + throw error + } +} + +export async function mapToSheetColumns(sheetId: string, records: any[]) { + try { + const formattedRecords = records.map((record) => ({ + title: { value: record.title, valid: true, messages: [] }, + link: { value: record.link, valid: true, messages: [] }, + pubDate: { value: record.pubDate, valid: true, messages: [] }, + content: { value: record.content, valid: true, messages: [] }, + guid: { value: record.guid, valid: true, messages: [] }, + })) + + const api = new FlatfileClient() + + await api.records.insert(sheetId, formattedRecords) + logInfo( + '@flatfile/plugin-rss-import', + 'Records successfully inserted into Flatfile sheet.' + ) + } catch (error) { + logError( + '@flatfile/plugin-rss-import', + `Error mapping records to sheet columns: ${error.message}` + ) + throw error + } +} diff --git a/import/rss/src/index.ts b/import/rss/src/index.ts new file mode 100644 index 000000000..36c4aa664 --- /dev/null +++ b/import/rss/src/index.ts @@ -0,0 +1 @@ +export { rssImport } from './import.rss.plugin' diff --git a/package-lock.json b/package-lock.json index 08bd3237d..a028e002d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -13,6 +13,7 @@ "convert/*", "enrich/*", "flatfilers/*", + "import/*", "plugins/*", "support/*", "utils/*", @@ -82,7 +83,7 @@ }, "enrich/sentiment": { "name": "@flatfile/plugin-enrich-sentiment", - "version": "1.0.0", + "version": "0.0.0", "license": "ISC", "dependencies": { "@flatfile/plugin-record-hook": "^1.6.1", @@ -128,6 +129,25 @@ "flatfile": "^3.6.1" } }, + "import/rss": { + "version": "0.0.0", + "license": "ISC", + "dependencies": { + "@flatfile/api": "^1.9.19", + "@flatfile/util-common": "^1.4.0", + "axios": "^1.7.7", + "rss-parser": "^3.13.0" + }, + "devDependencies": { + "@flatfile/rollup-config": "^0.1.1" + }, + "engines": { + "node": ">= 16" + }, + "peerDependencies": { + "@flatfile/listener": "^1.0.1" + } + }, "node_modules/@ampproject/remapping": { "version": "2.3.0", "license": "Apache-2.0", @@ -3185,6 +3205,10 @@ "resolved": "plugins/graphql-schema", "link": true }, + "node_modules/@flatfile/plugin-import-rss": { + "resolved": "import/rss", + "link": true + }, "node_modules/@flatfile/plugin-job-handler": { "resolved": "plugins/job-handler", "link": true @@ -8600,8 +8624,9 @@ } }, "node_modules/axios": { - "version": "1.7.2", - "license": "MIT", + "version": "1.7.7", + "resolved": "https://registry.npmjs.org/axios/-/axios-1.7.7.tgz", + "integrity": "sha512-S4kL7XrjgBmvdGut0sN3yJxqYzrDOnivkBiN0OFs6hLiUam3UPvswUo0kqGyhqUZGEOytHyumEdXsAkgCOUf3Q==", "dependencies": { "follow-redirects": "^1.15.6", "form-data": "^4.0.0", @@ -17959,6 +17984,23 @@ "linux" ] }, + "node_modules/rss-parser": { + "version": "3.13.0", + "resolved": "https://registry.npmjs.org/rss-parser/-/rss-parser-3.13.0.tgz", + "integrity": "sha512-7jWUBV5yGN3rqMMj7CZufl/291QAhvrrGpDNE4k/02ZchL0npisiYYqULF71jCEKoIiHvK/Q2e6IkDwPziT7+w==", + "dependencies": { + "entities": "^2.0.3", + "xml2js": "^0.5.0" + } + }, + "node_modules/rss-parser/node_modules/entities": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-2.2.0.tgz", + "integrity": "sha512-p92if5Nz619I0w+akJrLZH0MX0Pb5DX39XOwQTtXSdQQOaYH03S1uIQp4mhOZtAXrxq4ViO67YTiLBo2638o9A==", + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, "node_modules/run-async": { "version": "3.0.0", "dev": true, @@ -20230,6 +20272,26 @@ "xml-js": "^1.6.11" } }, + "node_modules/xml2js": { + "version": "0.5.0", + "resolved": "https://registry.npmjs.org/xml2js/-/xml2js-0.5.0.tgz", + "integrity": "sha512-drPFnkQJik/O+uPKpqSgr22mpuFHqKdbS835iAQrUC73L2F5WkboIRd63ai/2Yg6I1jzifPFKH2NTK+cfglkIA==", + "dependencies": { + "sax": ">=0.6.0", + "xmlbuilder": "~11.0.0" + }, + "engines": { + "node": ">=4.0.0" + } + }, + "node_modules/xmlbuilder": { + "version": "11.0.1", + "resolved": "https://registry.npmjs.org/xmlbuilder/-/xmlbuilder-11.0.1.tgz", + "integrity": "sha512-fDlsI/kFEx7gLvbecc0/ohLG50fugQp8ryHzMTuW9vSa1GJ0XYWKnhsUx7oie3G98+r56aTQIUB4kht42R3JvA==", + "engines": { + "node": ">=4.0" + } + }, "node_modules/xtend": { "version": "4.0.2", "license": "MIT", @@ -21108,7 +21170,8 @@ } }, "validate/string": { - "version": "0.0.1", + "name": "@flatfile/plugin-validate-string", + "version": "0.0.0", "license": "ISC", "dependencies": { "@flatfile/plugin-record-hook": "^1.7.0" diff --git a/package.json b/package.json index 44103a600..87e35555b 100644 --- a/package.json +++ b/package.json @@ -10,6 +10,7 @@ "convert/*", "enrich/*", "flatfilers/*", + "import/*", "plugins/*", "support/*", "utils/*", diff --git a/plugins/markdown-extractor/jest.config.cjs b/plugins/markdown-extractor/jest.config.cjs new file mode 100644 index 000000000..e6d7ca40b --- /dev/null +++ b/plugins/markdown-extractor/jest.config.cjs @@ -0,0 +1,16 @@ +module.exports = { + testEnvironment: 'node', + + transform: { + '^.+\\.tsx?$': 'ts-jest', + }, + setupFiles: ['../../test/dotenv-config.js'], + setupFilesAfterEnv: [ + '../../test/betterConsoleLog.js', + '../../test/unit.cleanup.js', + ], + testTimeout: 60_000, + globalSetup: '../../test/setup-global.js', + forceExit: true, + passWithNoTests: true, +}