|
| 1 | +/** |
| 2 | + * ChatProcessor Interface and Implementation |
| 3 | + * |
| 4 | + * Defines the contract for chat processing that both the real implementation |
| 5 | + * and any mock implementations (in consuming apps) must follow. |
| 6 | + */ |
| 7 | + |
| 8 | +import { classifyMessages } from './classifier/index' |
| 9 | +import { extractCandidates as extractCandidatesImpl } from './extraction/index' |
| 10 | +import { geocodeActivities } from './geocoder/index' |
| 11 | +import { parseChatWithStats } from './parser/index' |
| 12 | +import type { |
| 13 | + CandidateMessage, |
| 14 | + ClassifiedActivity, |
| 15 | + ClassifierConfig, |
| 16 | + GeocodedActivity, |
| 17 | + GeocoderConfig, |
| 18 | + ParsedMessage |
| 19 | +} from './types' |
| 20 | +import { isMappable } from './types/classifier' |
| 21 | + |
| 22 | +/** |
| 23 | + * Result of parsing a chat export (processor stage) |
| 24 | + */ |
| 25 | +export interface ProcessorParseResult { |
| 26 | + messages: readonly ParsedMessage[] |
| 27 | + messageCount: number |
| 28 | +} |
| 29 | + |
| 30 | +/** |
| 31 | + * Result of extracting candidates from messages (processor stage) |
| 32 | + */ |
| 33 | +export interface ProcessorCandidateResult { |
| 34 | + candidates: readonly CandidateMessage[] |
| 35 | + candidateCount: number |
| 36 | +} |
| 37 | + |
| 38 | +/** |
| 39 | + * Result of classifying candidates into activities (processor stage) |
| 40 | + */ |
| 41 | +export interface ProcessorClassifyResult { |
| 42 | + activities: readonly ClassifiedActivity[] |
| 43 | + costCents: number |
| 44 | +} |
| 45 | + |
| 46 | +/** |
| 47 | + * Result of geocoding activities (processor stage) |
| 48 | + */ |
| 49 | +export interface ProcessorGeocodeResult { |
| 50 | + activities: readonly GeocodedActivity[] |
| 51 | + geocodedCount: number |
| 52 | + costCents: number |
| 53 | +} |
| 54 | + |
| 55 | +/** |
| 56 | + * Complete processing results from all stages |
| 57 | + */ |
| 58 | +export interface ProcessingStageResults { |
| 59 | + parse: ProcessorParseResult |
| 60 | + extract: ProcessorCandidateResult |
| 61 | + classify: ProcessorClassifyResult |
| 62 | + geocode: ProcessorGeocodeResult |
| 63 | +} |
| 64 | + |
| 65 | +/** |
| 66 | + * Configuration for the processor |
| 67 | + */ |
| 68 | +export interface ProcessorConfig { |
| 69 | + anthropicApiKey?: string |
| 70 | + openaiApiKey?: string |
| 71 | + googleMapsApiKey?: string |
| 72 | + homeCountry?: string |
| 73 | +} |
| 74 | + |
| 75 | +/** |
| 76 | + * Chat processor interface. |
| 77 | + * Both real and mock implementations must conform to this contract. |
| 78 | + */ |
| 79 | +export interface ChatProcessor { |
| 80 | + /** |
| 81 | + * Parse chat content from a string |
| 82 | + */ |
| 83 | + parse(content: string): Promise<ProcessorParseResult> |
| 84 | + |
| 85 | + /** |
| 86 | + * Extract candidate messages that might contain activities |
| 87 | + */ |
| 88 | + extractCandidates(messages: readonly ParsedMessage[]): Promise<ProcessorCandidateResult> |
| 89 | + |
| 90 | + /** |
| 91 | + * Classify candidates using AI to identify activities |
| 92 | + */ |
| 93 | + classify( |
| 94 | + candidates: readonly CandidateMessage[], |
| 95 | + config: ProcessorConfig |
| 96 | + ): Promise<ProcessorClassifyResult> |
| 97 | + |
| 98 | + /** |
| 99 | + * Geocode activities to get coordinates |
| 100 | + */ |
| 101 | + geocode( |
| 102 | + activities: readonly ClassifiedActivity[], |
| 103 | + config: ProcessorConfig |
| 104 | + ): Promise<ProcessorGeocodeResult> |
| 105 | + |
| 106 | + /** |
| 107 | + * Run the full processing pipeline |
| 108 | + */ |
| 109 | + processAll(content: string, config: ProcessorConfig): Promise<ProcessingStageResults> |
| 110 | +} |
| 111 | + |
| 112 | +/** |
| 113 | + * Cost estimates for API calls (in cents) |
| 114 | + */ |
| 115 | +const COST_ESTIMATES = { |
| 116 | + // Claude Haiku: ~$0.0008 per message |
| 117 | + classificationPerMessage: 0.08, |
| 118 | + // Google Places geocoding: $5/1000 requests |
| 119 | + geocodingPerRequest: 0.5 |
| 120 | +} |
| 121 | + |
| 122 | +/** |
| 123 | + * Real ChatProcessor implementation using the chat-to-map library functions |
| 124 | + */ |
| 125 | +export class RealChatProcessor implements ChatProcessor { |
| 126 | + async parse(content: string): Promise<ProcessorParseResult> { |
| 127 | + const result = parseChatWithStats(content) |
| 128 | + return { |
| 129 | + messages: result.messages, |
| 130 | + messageCount: result.messageCount |
| 131 | + } |
| 132 | + } |
| 133 | + |
| 134 | + async extractCandidates(messages: readonly ParsedMessage[]): Promise<ProcessorCandidateResult> { |
| 135 | + const result = await extractCandidatesImpl(messages) |
| 136 | + |
| 137 | + if (!result.ok) { |
| 138 | + throw new Error(`Candidate extraction failed: ${result.error.message}`) |
| 139 | + } |
| 140 | + |
| 141 | + return { |
| 142 | + candidates: result.value.candidates, |
| 143 | + candidateCount: result.value.candidates.length |
| 144 | + } |
| 145 | + } |
| 146 | + |
| 147 | + async classify( |
| 148 | + candidates: readonly CandidateMessage[], |
| 149 | + config: ProcessorConfig |
| 150 | + ): Promise<ProcessorClassifyResult> { |
| 151 | + if (!config.anthropicApiKey) { |
| 152 | + return { activities: [], costCents: 0 } |
| 153 | + } |
| 154 | + |
| 155 | + if (candidates.length === 0) { |
| 156 | + return { activities: [], costCents: 0 } |
| 157 | + } |
| 158 | + |
| 159 | + const classifierConfig: ClassifierConfig = { |
| 160 | + provider: 'anthropic', |
| 161 | + apiKey: config.anthropicApiKey, |
| 162 | + homeCountry: config.homeCountry ?? 'United States' |
| 163 | + } |
| 164 | + |
| 165 | + const result = await classifyMessages(candidates, classifierConfig) |
| 166 | + |
| 167 | + if (!result.ok) { |
| 168 | + throw new Error(`Classification failed: ${result.error.message}`) |
| 169 | + } |
| 170 | + |
| 171 | + const costCents = candidates.length * COST_ESTIMATES.classificationPerMessage |
| 172 | + |
| 173 | + return { |
| 174 | + activities: result.value, |
| 175 | + costCents |
| 176 | + } |
| 177 | + } |
| 178 | + |
| 179 | + async geocode( |
| 180 | + activities: readonly ClassifiedActivity[], |
| 181 | + config: ProcessorConfig |
| 182 | + ): Promise<ProcessorGeocodeResult> { |
| 183 | + if (!config.googleMapsApiKey) { |
| 184 | + return { |
| 185 | + activities: activities as readonly GeocodedActivity[], |
| 186 | + geocodedCount: 0, |
| 187 | + costCents: 0 |
| 188 | + } |
| 189 | + } |
| 190 | + |
| 191 | + const mappable = activities.filter((a) => isMappable(a)) |
| 192 | + |
| 193 | + if (mappable.length === 0) { |
| 194 | + return { |
| 195 | + activities: activities as readonly GeocodedActivity[], |
| 196 | + geocodedCount: 0, |
| 197 | + costCents: 0 |
| 198 | + } |
| 199 | + } |
| 200 | + |
| 201 | + const geocoderConfig: GeocoderConfig = { |
| 202 | + apiKey: config.googleMapsApiKey |
| 203 | + } |
| 204 | + |
| 205 | + const geocoded = await geocodeActivities(mappable, geocoderConfig) |
| 206 | + |
| 207 | + const geocodedCount = geocoded.filter((a: GeocodedActivity) => a.latitude !== undefined).length |
| 208 | + const costCents = geocodedCount * COST_ESTIMATES.geocodingPerRequest |
| 209 | + |
| 210 | + // Merge geocoded results back with non-mappable activities |
| 211 | + const geocodedMap = new Map(geocoded.map((g) => [g.activityId, g])) |
| 212 | + const result = activities.map((a) => geocodedMap.get(a.activityId) ?? (a as GeocodedActivity)) |
| 213 | + |
| 214 | + return { |
| 215 | + activities: result, |
| 216 | + geocodedCount, |
| 217 | + costCents |
| 218 | + } |
| 219 | + } |
| 220 | + |
| 221 | + async processAll(content: string, config: ProcessorConfig): Promise<ProcessingStageResults> { |
| 222 | + const parse = await this.parse(content) |
| 223 | + const extract = await this.extractCandidates(parse.messages) |
| 224 | + const classify = await this.classify(extract.candidates, config) |
| 225 | + const geocode = await this.geocode(classify.activities, config) |
| 226 | + |
| 227 | + return { parse, extract, classify, geocode } |
| 228 | + } |
| 229 | +} |
0 commit comments