diff --git a/.gitignore b/.gitignore index fc5ff52..eb29958 100644 --- a/.gitignore +++ b/.gitignore @@ -39,6 +39,7 @@ yarn-error.log* *.pem # Crawling +log/* crawlYoutubeSuccess.txt crawlYoutubeFailed.txt postByReleaseSuccess.txt diff --git a/packages/crawling/log/findKYByOpenSuccess.txt b/packages/crawling/log/findKYByOpenSuccess.txt new file mode 100644 index 0000000..bb443f7 --- /dev/null +++ b/packages/crawling/log/findKYByOpenSuccess.txt @@ -0,0 +1,6 @@ + +[2025. 05. 11. 00:06:18] + + +[2025. 05. 11. 00:06:18] + diff --git a/packages/crawling/src/crawlYoutube.ts b/packages/crawling/src/crawlYoutube.ts index 06de915..6a07e62 100644 --- a/packages/crawling/src/crawlYoutube.ts +++ b/packages/crawling/src/crawlYoutube.ts @@ -24,6 +24,7 @@ async function scrapeSongNumber(query: string) { const karaokeNumber = extractKaraokeNumber(title); // await browser.close(); + return karaokeNumber; } diff --git a/packages/crawling/src/findKYChatGPT.ts b/packages/crawling/src/findKYByGPT.ts similarity index 100% rename from packages/crawling/src/findKYChatGPT.ts rename to packages/crawling/src/findKYByGPT.ts diff --git a/packages/crawling/src/findKYByOpen.ts b/packages/crawling/src/findKYByOpen.ts new file mode 100644 index 0000000..062d9e0 --- /dev/null +++ b/packages/crawling/src/findKYByOpen.ts @@ -0,0 +1,78 @@ +import { getSong, getSinger } from "@repo/open-api"; +import { Song } from "./types"; +import { updateKYDB } from "./supabase/updateDB"; +import { getKYNULLDB } from "./supabase/getDB"; +import { logUnknownData } from "./logData"; + +const resultsLog = { + success: [] as Song[], + failed: [] as { song: Song; error: any }[], +}; + +const updateKYByOpen = async (song: Song) => { + const { title, artist } = song; + const trimTitle = title.trim(); + const trimArtist = artist.trim(); + // console.log(artist, "-", title); + + const response = await getSong({ title: trimTitle, brand: "kumyoung" }); + + if (!response || response.length === 0 || !Array.isArray(response)) { + resultsLog.failed.push({ song, error: "there is no kumyoung song" }); + return null; + } + + // 가수 일치하거나 비슷한지 조회 + console.log("금영 title 일치 개수 ", response.length, "개"); + + // console.log(response); + if (response && response.length > 1) { + // filter의 includes 만으로는 완벽 비교 불가. chatGPT API를 활용해야 할까...? + const filteredResponse = response.filter((item) => { + const artistName = item.singer.trim(); + return artistName.includes(trimArtist); + }); + console.log(filteredResponse); + + if (filteredResponse.length === 1) { + const kyNum = filteredResponse[0].no; + // console.log("filteredResponse kyNum", kyNum); + const result = await updateKYDB({ ...song, num_ky: kyNum }); + if (result) { + resultsLog.success.push({ ...song, num_ky: kyNum }); + } else { + resultsLog.failed.push({ song, error: "supabase update failed" }); + } + } else { + console.log("필터링 실패"); + } + } else { + const kyNum = response[0].no; + // console.log("response kyNum", kyNum); + const result = await updateKYDB({ ...song, num_ky: kyNum }); + if (result) { + resultsLog.success.push({ ...song, num_ky: kyNum }); + } else { + resultsLog.failed.push({ song, error: "supabase update failed" }); + } + } +}; + +const kyNullData = await getKYNULLDB(); +console.log("kyNullData", kyNullData.length); + +for (const song of kyNullData) { + await updateKYByOpen(song); +} + +// 1차 시도 +// 6079개 업데이트 + +console.log(` + 총 ${kyNullData.length}곡 중: + - 성공: ${resultsLog.success.length}곡 + - 실패: ${resultsLog.failed.length}곡 + `); + +logUnknownData(resultsLog.success, "log/findKYByOpenSuccess.txt"); +logUnknownData(resultsLog.failed, "log/findKYByOpenSuccess.txt"); diff --git a/packages/crawling/src/logData.ts b/packages/crawling/src/logData.ts index b81da5b..89a5d85 100644 --- a/packages/crawling/src/logData.ts +++ b/packages/crawling/src/logData.ts @@ -1,17 +1,35 @@ import fs from "fs"; import path from "path"; -export function logUnknownData(unknownData: T[], filename: string) { - if (unknownData.length === 0) return; +export function logUnknownData(unknownData: T[] | T, filename: string) { + if (!unknownData) return; const now = new Date(); - const timeString = now.toISOString(); + const timeString = now.toLocaleString("ko-KR", { + timeZone: "Asia/Seoul", + year: "numeric", + month: "2-digit", + day: "2-digit", + hour: "2-digit", + minute: "2-digit", + second: "2-digit", + hour12: false, + }); + const logPath = path.join(filename); - // 로그 문자열 생성 - const logString = - `\n[${timeString}]\n` + - unknownData.map((item) => JSON.stringify(item)).join("\n") + - "\n"; + if (unknownData instanceof Array) { + // 로그 문자열 생성 + const logString = + `\n[${timeString}]\n` + + unknownData.map((item) => JSON.stringify(item)).join("\n") + + "\n"; + + fs.appendFileSync(logPath, logString, "utf-8"); + } else { + // 로그 문자열 생성 + const logString = + `\n[${timeString}]\n` + JSON.stringify(unknownData) + "\n"; - fs.appendFileSync(logPath, logString, "utf-8"); + fs.appendFileSync(logPath, logString, "utf-8"); + } } diff --git a/packages/crawling/src/postByRelease.ts b/packages/crawling/src/postByRelease.ts index 86aa646..a778dae 100644 --- a/packages/crawling/src/postByRelease.ts +++ b/packages/crawling/src/postByRelease.ts @@ -2,6 +2,7 @@ import { getRelease } from "@repo/open-api"; import { Song } from "./types"; import { postDB } from "./supabase/postDB"; import { logUnknownData } from "./logData"; + const parseMonth = (month: number) => { return month < 10 ? `0${month}` : month; }; @@ -21,17 +22,21 @@ while (year <= 2025) { release: `${year}${parseMonth(month)}`, brand: "tj", }); - // console.log('response', response); - console.log("response", `${year}${parseMonth(month)}`, response?.length); + // console.log("response", response); + // console.log("response", `${year}${parseMonth(month)}`, response?.length); response?.forEach((item) => { - const { title, singer, no } = item; - songs.push({ title, artist: singer, num_tj: no, num_ky: null }); + const { title, singer, no, release } = item; + songs.push({ title, artist: singer, num_tj: no, num_ky: null, release }); }); month++; } year++; } +console.log("songs", songs.length); + +// TJ 2007~2025 38519곡 + const result = await postDB(songs); logUnknownData(result.success, "log/postByReleaseSuccess.txt"); diff --git a/packages/crawling/src/supabase/getDB.ts b/packages/crawling/src/supabase/getDB.ts index de407d7..b95ca64 100644 --- a/packages/crawling/src/supabase/getDB.ts +++ b/packages/crawling/src/supabase/getDB.ts @@ -39,10 +39,12 @@ export async function getKYNULLDB() { const { data, error } = await supabase .from("songs") .select("id, title, artist, num_tj, num_ky") - .order("title", { ascending: false }); + .order("title", { ascending: true }); if (error) throw error; + console.log("data", data.length); + const isKYNULLData: Song[] = []; data.forEach((song) => { diff --git a/packages/crawling/src/supabase/updateDB.ts b/packages/crawling/src/supabase/updateDB.ts index 8afb9d5..526be2f 100644 --- a/packages/crawling/src/supabase/updateDB.ts +++ b/packages/crawling/src/supabase/updateDB.ts @@ -4,7 +4,8 @@ import { Song, TransSong } from "../types"; export const updateJpnDB = async (song: TransSong) => { const supabase = getClient(); - if (song.isArtistJp || song.isTitleJp) { + // if (song.isArtistJp || song.isTitleJp) { + if (song.isTitleJp) { const { data, error } = await supabase .from("songs") .update({ title: song.title, artist: song.artist }) diff --git a/packages/crawling/src/transChatGPT.ts b/packages/crawling/src/transChatGPT.ts index b23b8a1..24e1bc6 100644 --- a/packages/crawling/src/transChatGPT.ts +++ b/packages/crawling/src/transChatGPT.ts @@ -13,16 +13,13 @@ class TranslationAssistant { this.messages = [ { role: "system", - content: `당신은 일본 음악 전문가입니다. 다음 규칙을 철저히 따르세요. - 1. 주어진 일본어 아티스트/곡 이름의 한국어 공식 번역을 제공하세요 - 2. 응답은 다음 형식을 반드시 따를 것: - 번역된 결과 (원문) - 3. 다음 우선순위로 번역을 결정하세요: - - 공식 한국 발매 시 사용된 이름 - - 한국 음악 사이트/미디어에서 통용되는 이름 - - 팬덤에서 일반적으로 사용하는 이름 - 4. 만약 이미 번역된 형태라면 그대로 반환하되, 형식이 다르거나 어색하다면 형식에 맞게 반환할 것 - 5. 확실하지 않은 경우 빈 문자열을 반환`, + content: `You are a Japanese music translator. Follow these rules: + 1. Translate song/artist names to Korean. + 2. Format: Translation (Original) + 3. Priority: Official KR release > Common Korean media name > Korean Fandom name + 4. If already translated, reformat only. + 5. If unsure, return an empty string. + `, }, ]; } diff --git a/packages/crawling/src/types.ts b/packages/crawling/src/types.ts index df33ca6..91000bc 100644 --- a/packages/crawling/src/types.ts +++ b/packages/crawling/src/types.ts @@ -12,6 +12,7 @@ export interface Song { artist: string; num_tj: string | null; num_ky: string | null; + release?: string; } export interface TransSong extends Song { diff --git a/packages/crawling/src/updateJpnSongs.ts b/packages/crawling/src/updateJpnSongs.ts index 3372b83..106810a 100644 --- a/packages/crawling/src/updateJpnSongs.ts +++ b/packages/crawling/src/updateJpnSongs.ts @@ -29,15 +29,15 @@ for (const song of data) { newSong.title = titleTrans; } } - if (song.isArtistJp) { - const artistTrans = await transChatGPT(song.artist); - if (!artistTrans || artistTrans.length === 0) { - unknownData.push({ ...song, type: "artist" }); - } else { - newSong.artist = artistTrans; - } - } - if (newSong.isTitleJp || newSong.isArtistJp) { + // if (song.isArtistJp) { + // const artistTrans = await transChatGPT(song.artist); + // if (!artistTrans || artistTrans.length === 0) { + // unknownData.push({ ...song, type: "artist" }); + // } else { + // newSong.artist = artistTrans; + // } + // } + if (newSong.isTitleJp) { transData.push(newSong); } }