diff --git a/app/api/auto-carousel/route.ts b/app/api/auto-carousel/route.ts index 250a6c3..3c42767 100644 --- a/app/api/auto-carousel/route.ts +++ b/app/api/auto-carousel/route.ts @@ -96,7 +96,8 @@ STEP 2 — CLEAN & FORMAT EACH SEGMENT FOR SLIDES: - Clean up the transcript text for each line: • Fix grammar errors and typos (e.g. "jarens" → "jargons", "I I don't" → "I don't"). • Remove filler words (uh, um, hmm) ONLY if it doesn't change meaning. - • Break long sentences into concise clauses. Each line should be 5-20 words — punchy and readable on a phone screen. + • Break long sentences into concise clauses. Each line should be 5-20 words, punchy and readable on a phone screen. + • Do NOT use em dashes (—) in any text. Use a comma, period, or rephrase instead. • Remove pure filler ("Yeah.", "Right.", "Uh huh.") — skip those moments entirely. • Do NOT invent new content. Only clean up what's in the transcript. - topTimestamp and bottomTimestamp must be actual timestamps from the transcript (the [Xs] values). diff --git a/package-lock.json b/package-lock.json index 9ba7483..52977aa 100644 --- a/package-lock.json +++ b/package-lock.json @@ -37,7 +37,8 @@ "remotion": "^4.0.451", "sharp": "^0.33.0", "wavefile": "^11.0.0", - "youtube-transcript": "^1.3.0" + "youtube-transcript": "^1.3.0", + "youtubei.js": "^17.0.1" }, "devDependencies": { "@babel/core": "^7.29.0", @@ -1614,6 +1615,12 @@ "dev": true, "license": "MIT" }, + "node_modules/@bufbuild/protobuf": { + "version": "2.12.0", + "resolved": "https://registry.npmjs.org/@bufbuild/protobuf/-/protobuf-2.12.0.tgz", + "integrity": "sha512-B/XlCaFIP8LOwzo+bz5uFzATYokcwCKQcghqnlfwSmM5eX/qTkvDBnDPs+gXtX/RyjxJ4DRikECcPJbyALA8FA==", + "license": "(Apache-2.0 AND BSD-3-Clause)" + }, "node_modules/@emnapi/core": { "version": "1.10.0", "resolved": "https://registry.npmjs.org/@emnapi/core/-/core-1.10.0.tgz", @@ -10457,6 +10464,15 @@ "node": ">= 8" } }, + "node_modules/meriyah": { + "version": "6.1.4", + "resolved": "https://registry.npmjs.org/meriyah/-/meriyah-6.1.4.tgz", + "integrity": "sha512-Sz8FzjzI0kN13GK/6MVEsVzMZEPvOhnmmI1lU5+/1cGOiK3QUahntrNNtdVeihrO7t9JpoH75iMNXg6R6uWflQ==", + "license": "ISC", + "engines": { + "node": ">=18.0.0" + } + }, "node_modules/micromatch": { "version": "4.0.8", "dev": true, @@ -14029,6 +14045,19 @@ "node": ">=18.0.0" } }, + "node_modules/youtubei.js": { + "version": "17.0.1", + "resolved": "https://registry.npmjs.org/youtubei.js/-/youtubei.js-17.0.1.tgz", + "integrity": "sha512-1lO4b8UqMDzE0oh2qEGzbBOd4UYRdxn/4PdpRM7BGTHxM6ddsEsKZTu90jp8V9FHVgC2h1UirQyqoqLiKwl+Zg==", + "funding": [ + "https://github.com/sponsors/LuanRT" + ], + "license": "MIT", + "dependencies": { + "@bufbuild/protobuf": "^2.0.0", + "meriyah": "^6.1.4" + } + }, "node_modules/zod": { "version": "4.3.6", "license": "MIT", diff --git a/package.json b/package.json index 9337208..67cc2c2 100644 --- a/package.json +++ b/package.json @@ -91,7 +91,8 @@ "remotion": "^4.0.451", "sharp": "^0.33.0", "wavefile": "^11.0.0", - "youtube-transcript": "^1.3.0" + "youtube-transcript": "^1.3.0", + "youtubei.js": "^17.0.1" }, "optionalDependencies": { "@img/sharp-libvips-linux-x64": "1.0.4", diff --git a/scripts/carousel/CaptionExtractor.js b/scripts/carousel/CaptionExtractor.js index ef62725..6438ecc 100644 --- a/scripts/carousel/CaptionExtractor.js +++ b/scripts/carousel/CaptionExtractor.js @@ -70,38 +70,48 @@ class CaptionExtractor { const captionTracks = playerResponse?.captions?.playerCaptionsTracklistRenderer?.captionTracks; - if (!captionTracks || captionTracks.length === 0) { - throw new Error('No caption tracks found for this video'); - } - - // Prefer auto-generated English - const rankedTracks = [...captionTracks].sort((a, b) => { - const score = (t) => { - let s = 0; - if (t.languageCode === 'en') s += 10; - if (t.kind === 'asr') s += 5; - return s; - }; - return score(b) - score(a); - }); - let captionData = null; - for (const track of rankedTracks) { - console.log(` Trying track: ${track.languageCode} (${track.kind || 'manual'})`); - captionData = await this.fetchCaptionData(track.baseUrl, cookieStr, videoId, track); - if (captionData) break; + + if (captionTracks && captionTracks.length > 0) { + // Prefer auto-generated English + const rankedTracks = [...captionTracks].sort((a, b) => { + const score = (t) => { + let s = 0; + if (t.languageCode === 'en') s += 10; + if (t.kind === 'asr') s += 5; + return s; + }; + return score(b) - score(a); + }); + + for (const track of rankedTracks) { + console.log(` Trying track: ${track.languageCode} (${track.kind || 'manual'})`); + captionData = await this.fetchCaptionData(track.baseUrl, cookieStr, videoId, track); + if (captionData) break; + } + } else { + console.log(' No caption tracks in page response (likely bot-detection) — trying InnerTube...'); } if (!captionData) { - console.log(' Page cookies failed, trying InnerTube ANDROID fallback...'); + console.log(' Trying InnerTube ANDROID/IOS fallback...'); captionData = await this.fetchViaInnertube(videoId); } if (!captionData) { - throw new Error('Could not fetch caption data'); + console.log(' Trying youtubei.js fallback...'); + const youtubeiSegments = await this.fetchViaYoutubei(videoId); + if (youtubeiSegments) return youtubeiSegments; } - // Step 3: Parse all segments into a unified timed array + if (process.env.SUPADATA_API_KEY) { + console.log(' Trying Supadata API fallback...'); + const supadata = await this.fetchViaSupadata(videoId); + if (supadata) return supadata; + } + + if (!captionData) throw new Error('Could not fetch caption data from any source'); + return this.parseAllSegments(captionData); } @@ -182,6 +192,31 @@ class CaptionExtractor { return null; } + async fetchViaSupadata(videoId) { + try { + const resp = await fetch( + `https://api.supadata.ai/v1/youtube/transcript?videoId=${videoId}&lang=en`, + { headers: { 'x-api-key': process.env.SUPADATA_API_KEY } } + ); + if (!resp.ok) { + console.log(`Supadata failed: ${resp.status}`); + return null; + } + const data = await resp.json(); + const content = data?.content; + if (!content || content.length === 0) return null; + console.log(`Supadata succeeded with ${content.length} segments`); + return content.map(s => ({ + startSec: (s.offset ?? 0) / 1000, + endSec: ((s.offset ?? 0) + (s.duration ?? 0)) / 1000, + text: s.text ?? '', + })).filter(s => s.text); + } catch (e) { + console.log(` Supadata error: ${e.message}`); + return null; + } + } + async fetchViaInnertube(videoId) { const clients = [ { clientName: 'ANDROID', clientVersion: '19.09.37', apiKey: 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w', ua: 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip' }, @@ -225,6 +260,45 @@ class CaptionExtractor { return null; } + async fetchViaYoutubei(videoId) { + try { + const { Innertube } = await import('youtubei.js'); + const yt = await Innertube.create(); + const info = await yt.getInfo(videoId); + + const captionTracks = info.captions?.caption_tracks; + if (!captionTracks || captionTracks.length === 0) { + console.log(' youtubei.js: no caption tracks in video info'); + return null; + } + + // Caption track URLs contain embedded auth tokens — fetchable from any IP + const track = + captionTracks.find(t => t.language_code === 'en' && t.is_autogenerated) || + captionTracks.find(t => t.language_code === 'en') || + captionTracks[0]; + + console.log(` youtubei.js: using track ${track.language_code} (${track.is_autogenerated ? 'auto' : 'manual'})`); + + for (const fmt of ['json3', 'srv3', '']) { + try { + const url = new URL(track.base_url); + if (fmt) url.searchParams.set('fmt', fmt); + const resp = await fetch(url.toString()); + if (!resp.ok) continue; + const text = await resp.text(); + if (!text || /^