Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion app/api/auto-carousel/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,8 @@ STEP 2 — CLEAN & FORMAT EACH SEGMENT FOR SLIDES:
- Clean up the transcript text for each line:
• Fix grammar errors and typos (e.g. "jarens" → "jargons", "I I don't" → "I don't").
• Remove filler words (uh, um, hmm) ONLY if it doesn't change meaning.
• Break long sentences into concise clauses. Each line should be 5-20 words — punchy and readable on a phone screen.
• Break long sentences into concise clauses. Each line should be 5-20 words, punchy and readable on a phone screen.
• Do NOT use em dashes (—) in any text. Use a comma, period, or rephrase instead.
• Remove pure filler ("Yeah.", "Right.", "Uh huh.") — skip those moments entirely.
• Do NOT invent new content. Only clean up what's in the transcript.
- topTimestamp and bottomTimestamp must be actual timestamps from the transcript (the [Xs] values).
Expand Down
31 changes: 30 additions & 1 deletion package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,8 @@
"remotion": "^4.0.451",
"sharp": "^0.33.0",
"wavefile": "^11.0.0",
"youtube-transcript": "^1.3.0"
"youtube-transcript": "^1.3.0",
"youtubei.js": "^17.0.1"
},
"optionalDependencies": {
"@img/sharp-libvips-linux-x64": "1.0.4",
Expand Down
118 changes: 96 additions & 22 deletions scripts/carousel/CaptionExtractor.js
Original file line number Diff line number Diff line change
Expand Up @@ -70,38 +70,48 @@ class CaptionExtractor {
const captionTracks =
playerResponse?.captions?.playerCaptionsTracklistRenderer?.captionTracks;

if (!captionTracks || captionTracks.length === 0) {
throw new Error('No caption tracks found for this video');
}

// Prefer auto-generated English
const rankedTracks = [...captionTracks].sort((a, b) => {
const score = (t) => {
let s = 0;
if (t.languageCode === 'en') s += 10;
if (t.kind === 'asr') s += 5;
return s;
};
return score(b) - score(a);
});

let captionData = null;
for (const track of rankedTracks) {
console.log(` Trying track: ${track.languageCode} (${track.kind || 'manual'})`);
captionData = await this.fetchCaptionData(track.baseUrl, cookieStr, videoId, track);
if (captionData) break;

if (captionTracks && captionTracks.length > 0) {
// Prefer auto-generated English
const rankedTracks = [...captionTracks].sort((a, b) => {
const score = (t) => {
let s = 0;
if (t.languageCode === 'en') s += 10;
if (t.kind === 'asr') s += 5;
return s;
};
return score(b) - score(a);
});

for (const track of rankedTracks) {
console.log(` Trying track: ${track.languageCode} (${track.kind || 'manual'})`);
captionData = await this.fetchCaptionData(track.baseUrl, cookieStr, videoId, track);
if (captionData) break;
}
} else {
console.log(' No caption tracks in page response (likely bot-detection) — trying InnerTube...');
}

if (!captionData) {
console.log(' Page cookies failed, trying InnerTube ANDROID fallback...');
console.log(' Trying InnerTube ANDROID/IOS fallback...');
captionData = await this.fetchViaInnertube(videoId);
}

if (!captionData) {
throw new Error('Could not fetch caption data');
console.log(' Trying youtubei.js fallback...');
const youtubeiSegments = await this.fetchViaYoutubei(videoId);
if (youtubeiSegments) return youtubeiSegments;
}

// Step 3: Parse all segments into a unified timed array
if (process.env.SUPADATA_API_KEY) {
console.log(' Trying Supadata API fallback...');
const supadata = await this.fetchViaSupadata(videoId);
if (supadata) return supadata;
}

if (!captionData) throw new Error('Could not fetch caption data from any source');

return this.parseAllSegments(captionData);
}

Expand Down Expand Up @@ -182,6 +192,31 @@ class CaptionExtractor {

return null;
}
async fetchViaSupadata(videoId) {
try {
const resp = await fetch(
`https://api.supadata.ai/v1/youtube/transcript?videoId=${videoId}&lang=en`,
{ headers: { 'x-api-key': process.env.SUPADATA_API_KEY } }
);
if (!resp.ok) {
console.log(`Supadata failed: ${resp.status}`);
return null;
}
const data = await resp.json();
const content = data?.content;
if (!content || content.length === 0) return null;
console.log(`Supadata succeeded with ${content.length} segments`);
return content.map(s => ({
startSec: (s.offset ?? 0) / 1000,
endSec: ((s.offset ?? 0) + (s.duration ?? 0)) / 1000,
text: s.text ?? '',
})).filter(s => s.text);
} catch (e) {
console.log(` Supadata error: ${e.message}`);
return null;
}
}

async fetchViaInnertube(videoId) {
const clients = [
{ clientName: 'ANDROID', clientVersion: '19.09.37', apiKey: 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w', ua: 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip' },
Expand Down Expand Up @@ -225,6 +260,45 @@ class CaptionExtractor {
return null;
}

async fetchViaYoutubei(videoId) {
try {
const { Innertube } = await import('youtubei.js');
const yt = await Innertube.create();
const info = await yt.getInfo(videoId);

const captionTracks = info.captions?.caption_tracks;
if (!captionTracks || captionTracks.length === 0) {
console.log(' youtubei.js: no caption tracks in video info');
return null;
}

// Caption track URLs contain embedded auth tokens — fetchable from any IP
const track =
captionTracks.find(t => t.language_code === 'en' && t.is_autogenerated) ||
captionTracks.find(t => t.language_code === 'en') ||
captionTracks[0];

console.log(` youtubei.js: using track ${track.language_code} (${track.is_autogenerated ? 'auto' : 'manual'})`);

for (const fmt of ['json3', 'srv3', '']) {
try {
const url = new URL(track.base_url);
if (fmt) url.searchParams.set('fmt', fmt);
const resp = await fetch(url.toString());
if (!resp.ok) continue;
const text = await resp.text();
if (!text || /^<!doctype html/i.test(text) || /^<html/i.test(text)) continue;
return this.parseAllSegments(text);
} catch { continue; }
}

return null;
} catch (e) {
console.log(` youtubei.js failed: ${e.message}`);
return null;
}
}

// Parse caption data (XML or JSON) into array of { startSec, endSec, text }
parseAllSegments(captionData) {
const trimmed = captionData.trimStart();
Expand Down
10 changes: 8 additions & 2 deletions scripts/carousel/CarouselGenerator.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,14 @@ class CarouselGenerator {
await fs.ensureDir(this.outputDir);
}

{
const { launch } = await import('puppeteer');
const { connect, launch } = await import('puppeteer');
const browserlessToken = process.env.BROWSERLESS_TOKEN;

if (browserlessToken) {
const wsEndpoint = `wss://production-sfo.browserless.io?token=${browserlessToken}`;
console.log('Connecting to Browserless.io...');
this.browser = await connect({ browserWSEndpoint: wsEndpoint });
} else {
this.browser = await launch({
headless: true,
args: [
Expand Down