From 1efc4b9916c8a64ea8e8ccd9144423d8a5036335 Mon Sep 17 00:00:00 2001 From: GulSam00 Date: Mon, 28 Apr 2025 23:46:03 +0900 Subject: [PATCH 1/4] =?UTF-8?q?fix=20:=20=EB=8D=B0=EC=9D=B4=ED=84=B0=20?= =?UTF-8?q?=ED=81=AC=EB=A1=A4=EB=A7=81=20=EC=88=98=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- packages/crawling/src/crawlYoutube.ts | 1 + packages/crawling/src/logData.ts | 12 +++++++++++- packages/crawling/src/supabase/getDB.ts | 2 +- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/packages/crawling/src/crawlYoutube.ts b/packages/crawling/src/crawlYoutube.ts index 06de915..6a07e62 100644 --- a/packages/crawling/src/crawlYoutube.ts +++ b/packages/crawling/src/crawlYoutube.ts @@ -24,6 +24,7 @@ async function scrapeSongNumber(query: string) { const karaokeNumber = extractKaraokeNumber(title); // await browser.close(); + return karaokeNumber; } diff --git a/packages/crawling/src/logData.ts b/packages/crawling/src/logData.ts index b81da5b..2f066c5 100644 --- a/packages/crawling/src/logData.ts +++ b/packages/crawling/src/logData.ts @@ -4,7 +4,17 @@ import path from "path"; export function logUnknownData(unknownData: T[], filename: string) { if (unknownData.length === 0) return; const now = new Date(); - const timeString = now.toISOString(); + const timeString = now.toLocaleString("ko-KR", { + timeZone: "Asia/Seoul", + year: "numeric", + month: "2-digit", + day: "2-digit", + hour: "2-digit", + minute: "2-digit", + second: "2-digit", + hour12: false, + }); + const logPath = path.join(filename); // 로그 문자열 생성 diff --git a/packages/crawling/src/supabase/getDB.ts b/packages/crawling/src/supabase/getDB.ts index de407d7..b552a07 100644 --- a/packages/crawling/src/supabase/getDB.ts +++ b/packages/crawling/src/supabase/getDB.ts @@ -39,7 +39,7 @@ export async function getKYNULLDB() { const { data, error } = await supabase .from("songs") .select("id, title, artist, num_tj, num_ky") - .order("title", { ascending: false }); + .order("title", { ascending: true }); if (error) throw error; From 81c2e500267dfd28c44905faa02bc6734d24903b Mon Sep 17 00:00:00 2001 From: GulSam00 Date: Wed, 7 May 2025 19:14:55 +0900 Subject: [PATCH 2/4] =?UTF-8?q?fix=20:=20=ED=94=84=EB=A1=AC=ED=94=84?= =?UTF-8?q?=ED=8A=B8=20=ED=86=A0=ED=81=B0=20171=20->=2067=20=ED=86=A0?= =?UTF-8?q?=ED=81=B0=EC=9C=BC=EB=A1=9C=20=EC=A4=84=EC=97=AC=20=EB=B9=84?= =?UTF-8?q?=EC=9A=A9=20=EC=A0=88=EA=B0=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- packages/crawling/src/postByRelease.ts | 15 ++++++++++----- packages/crawling/src/transChatGPT.ts | 17 +++++++---------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/packages/crawling/src/postByRelease.ts b/packages/crawling/src/postByRelease.ts index 86aa646..2e758d2 100644 --- a/packages/crawling/src/postByRelease.ts +++ b/packages/crawling/src/postByRelease.ts @@ -2,6 +2,7 @@ import { getRelease } from "@repo/open-api"; import { Song } from "./types"; import { postDB } from "./supabase/postDB"; import { logUnknownData } from "./logData"; + const parseMonth = (month: number) => { return month < 10 ? `0${month}` : month; }; @@ -21,8 +22,8 @@ while (year <= 2025) { release: `${year}${parseMonth(month)}`, brand: "tj", }); - // console.log('response', response); - console.log("response", `${year}${parseMonth(month)}`, response?.length); + // console.log("response", response); + // console.log("response", `${year}${parseMonth(month)}`, response?.length); response?.forEach((item) => { const { title, singer, no } = item; songs.push({ title, artist: singer, num_tj: no, num_ky: null }); @@ -32,7 +33,11 @@ while (year <= 2025) { year++; } -const result = await postDB(songs); +console.log("songs", songs.length); + +// TJ 2007~2025 38519곡 + +// const result = await postDB(songs); -logUnknownData(result.success, "log/postByReleaseSuccess.txt"); -logUnknownData(result.failed, "log/postByReleaseFailed.txt"); +// logUnknownData(result.success, "log/postByReleaseSuccess.txt"); +// logUnknownData(result.failed, "log/postByReleaseFailed.txt"); diff --git a/packages/crawling/src/transChatGPT.ts b/packages/crawling/src/transChatGPT.ts index b23b8a1..24e1bc6 100644 --- a/packages/crawling/src/transChatGPT.ts +++ b/packages/crawling/src/transChatGPT.ts @@ -13,16 +13,13 @@ class TranslationAssistant { this.messages = [ { role: "system", - content: `당신은 일본 음악 전문가입니다. 다음 규칙을 철저히 따르세요. - 1. 주어진 일본어 아티스트/곡 이름의 한국어 공식 번역을 제공하세요 - 2. 응답은 다음 형식을 반드시 따를 것: - 번역된 결과 (원문) - 3. 다음 우선순위로 번역을 결정하세요: - - 공식 한국 발매 시 사용된 이름 - - 한국 음악 사이트/미디어에서 통용되는 이름 - - 팬덤에서 일반적으로 사용하는 이름 - 4. 만약 이미 번역된 형태라면 그대로 반환하되, 형식이 다르거나 어색하다면 형식에 맞게 반환할 것 - 5. 확실하지 않은 경우 빈 문자열을 반환`, + content: `You are a Japanese music translator. Follow these rules: + 1. Translate song/artist names to Korean. + 2. Format: Translation (Original) + 3. Priority: Official KR release > Common Korean media name > Korean Fandom name + 4. If already translated, reformat only. + 5. If unsure, return an empty string. + `, }, ]; } From bd0ee9973cb9fd692b346aac8f536e1e3ca0d8bc Mon Sep 17 00:00:00 2001 From: GulSam00 Date: Sat, 10 May 2025 17:52:04 +0900 Subject: [PATCH 3/4] =?UTF-8?q?fix=20:=20=EB=8D=B0=EC=9D=B4=ED=84=B0=20?= =?UTF-8?q?=ED=83=80=EC=9E=85=EC=97=90=20release=20=EC=B6=94=EA=B0=80.=20T?= =?UTF-8?q?J=20=EA=B3=A1=20=EC=9D=BC=EA=B4=84=20=EC=97=85=EB=8D=B0?= =?UTF-8?q?=EC=9D=B4=ED=8A=B8.=20=EA=B8=88=EC=98=81=20=EC=B6=94=EA=B0=80?= =?UTF-8?q?=20=EB=A1=9C=EC=A7=81=20=EC=9E=91=EC=84=B1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/{findKYChatGPT.ts => findKYByGPT.ts} | 0 packages/crawling/src/findKYByOpen.ts | 25 +++++++++++++++++++ packages/crawling/src/postByRelease.ts | 10 ++++---- packages/crawling/src/supabase/getDB.ts | 2 ++ packages/crawling/src/supabase/updateDB.ts | 3 ++- packages/crawling/src/types.ts | 1 + packages/crawling/src/updateJpnSongs.ts | 18 ++++++------- 7 files changed, 44 insertions(+), 15 deletions(-) rename packages/crawling/src/{findKYChatGPT.ts => findKYByGPT.ts} (100%) create mode 100644 packages/crawling/src/findKYByOpen.ts diff --git a/packages/crawling/src/findKYChatGPT.ts b/packages/crawling/src/findKYByGPT.ts similarity index 100% rename from packages/crawling/src/findKYChatGPT.ts rename to packages/crawling/src/findKYByGPT.ts diff --git a/packages/crawling/src/findKYByOpen.ts b/packages/crawling/src/findKYByOpen.ts new file mode 100644 index 0000000..802b377 --- /dev/null +++ b/packages/crawling/src/findKYByOpen.ts @@ -0,0 +1,25 @@ +import { getSong } from "@repo/open-api"; +import { Song } from "./types"; +import { updateKYDB } from "./supabase/updateDB"; + +const getKYByOpen = async (song: Song) => { + const { title, artist } = song; + const response = await getSong({ title, brand: "kumyoung" }); + + const trimArtist = artist.trim(); + // 가수 일치하거나 비슷한지 조회 + if (response && response.length > 0) { + const filteredResponse = response.filter((item) => { + const artistName = item.singer.trim(); + return artistName.includes(trimArtist); + }); + + if (filteredResponse.length === 1) { + const kyNum = filteredResponse[0].no; + await updateKYDB({ ...song, num_ky: kyNum }); + // return { ...song, num_ky: kyNum }; + } + } + + return null; +}; diff --git a/packages/crawling/src/postByRelease.ts b/packages/crawling/src/postByRelease.ts index 2e758d2..a778dae 100644 --- a/packages/crawling/src/postByRelease.ts +++ b/packages/crawling/src/postByRelease.ts @@ -25,8 +25,8 @@ while (year <= 2025) { // console.log("response", response); // console.log("response", `${year}${parseMonth(month)}`, response?.length); response?.forEach((item) => { - const { title, singer, no } = item; - songs.push({ title, artist: singer, num_tj: no, num_ky: null }); + const { title, singer, no, release } = item; + songs.push({ title, artist: singer, num_tj: no, num_ky: null, release }); }); month++; } @@ -37,7 +37,7 @@ console.log("songs", songs.length); // TJ 2007~2025 38519곡 -// const result = await postDB(songs); +const result = await postDB(songs); -// logUnknownData(result.success, "log/postByReleaseSuccess.txt"); -// logUnknownData(result.failed, "log/postByReleaseFailed.txt"); +logUnknownData(result.success, "log/postByReleaseSuccess.txt"); +logUnknownData(result.failed, "log/postByReleaseFailed.txt"); diff --git a/packages/crawling/src/supabase/getDB.ts b/packages/crawling/src/supabase/getDB.ts index b552a07..b95ca64 100644 --- a/packages/crawling/src/supabase/getDB.ts +++ b/packages/crawling/src/supabase/getDB.ts @@ -43,6 +43,8 @@ export async function getKYNULLDB() { if (error) throw error; + console.log("data", data.length); + const isKYNULLData: Song[] = []; data.forEach((song) => { diff --git a/packages/crawling/src/supabase/updateDB.ts b/packages/crawling/src/supabase/updateDB.ts index 8afb9d5..526be2f 100644 --- a/packages/crawling/src/supabase/updateDB.ts +++ b/packages/crawling/src/supabase/updateDB.ts @@ -4,7 +4,8 @@ import { Song, TransSong } from "../types"; export const updateJpnDB = async (song: TransSong) => { const supabase = getClient(); - if (song.isArtistJp || song.isTitleJp) { + // if (song.isArtistJp || song.isTitleJp) { + if (song.isTitleJp) { const { data, error } = await supabase .from("songs") .update({ title: song.title, artist: song.artist }) diff --git a/packages/crawling/src/types.ts b/packages/crawling/src/types.ts index df33ca6..91000bc 100644 --- a/packages/crawling/src/types.ts +++ b/packages/crawling/src/types.ts @@ -12,6 +12,7 @@ export interface Song { artist: string; num_tj: string | null; num_ky: string | null; + release?: string; } export interface TransSong extends Song { diff --git a/packages/crawling/src/updateJpnSongs.ts b/packages/crawling/src/updateJpnSongs.ts index 3372b83..106810a 100644 --- a/packages/crawling/src/updateJpnSongs.ts +++ b/packages/crawling/src/updateJpnSongs.ts @@ -29,15 +29,15 @@ for (const song of data) { newSong.title = titleTrans; } } - if (song.isArtistJp) { - const artistTrans = await transChatGPT(song.artist); - if (!artistTrans || artistTrans.length === 0) { - unknownData.push({ ...song, type: "artist" }); - } else { - newSong.artist = artistTrans; - } - } - if (newSong.isTitleJp || newSong.isArtistJp) { + // if (song.isArtistJp) { + // const artistTrans = await transChatGPT(song.artist); + // if (!artistTrans || artistTrans.length === 0) { + // unknownData.push({ ...song, type: "artist" }); + // } else { + // newSong.artist = artistTrans; + // } + // } + if (newSong.isTitleJp) { transData.push(newSong); } } From 927fee320df8f66d35938445f2cb90b8b3b500f0 Mon Sep 17 00:00:00 2001 From: GulSam00 Date: Sun, 11 May 2025 00:29:34 +0900 Subject: [PATCH 4/4] =?UTF-8?q?fix=20:=20=EA=B8=88=EC=98=81=20=EA=B3=A1=20?= =?UTF-8?q?=EC=97=85=EB=8D=B0=EC=9D=B4=ED=8A=B8=20=EB=B0=A9=EC=8B=9D=20?= =?UTF-8?q?=EB=B3=80=EA=B2=BD.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 1 + packages/crawling/log/findKYByOpenSuccess.txt | 6 ++ packages/crawling/src/findKYByOpen.ts | 71 ++++++++++++++++--- packages/crawling/src/logData.ts | 24 ++++--- 4 files changed, 85 insertions(+), 17 deletions(-) create mode 100644 packages/crawling/log/findKYByOpenSuccess.txt diff --git a/.gitignore b/.gitignore index fc5ff52..eb29958 100644 --- a/.gitignore +++ b/.gitignore @@ -39,6 +39,7 @@ yarn-error.log* *.pem # Crawling +log/* crawlYoutubeSuccess.txt crawlYoutubeFailed.txt postByReleaseSuccess.txt diff --git a/packages/crawling/log/findKYByOpenSuccess.txt b/packages/crawling/log/findKYByOpenSuccess.txt new file mode 100644 index 0000000..bb443f7 --- /dev/null +++ b/packages/crawling/log/findKYByOpenSuccess.txt @@ -0,0 +1,6 @@ + +[2025. 05. 11. 00:06:18] + + +[2025. 05. 11. 00:06:18] + diff --git a/packages/crawling/src/findKYByOpen.ts b/packages/crawling/src/findKYByOpen.ts index 802b377..062d9e0 100644 --- a/packages/crawling/src/findKYByOpen.ts +++ b/packages/crawling/src/findKYByOpen.ts @@ -1,25 +1,78 @@ -import { getSong } from "@repo/open-api"; +import { getSong, getSinger } from "@repo/open-api"; import { Song } from "./types"; import { updateKYDB } from "./supabase/updateDB"; +import { getKYNULLDB } from "./supabase/getDB"; +import { logUnknownData } from "./logData"; -const getKYByOpen = async (song: Song) => { - const { title, artist } = song; - const response = await getSong({ title, brand: "kumyoung" }); +const resultsLog = { + success: [] as Song[], + failed: [] as { song: Song; error: any }[], +}; +const updateKYByOpen = async (song: Song) => { + const { title, artist } = song; + const trimTitle = title.trim(); const trimArtist = artist.trim(); + // console.log(artist, "-", title); + + const response = await getSong({ title: trimTitle, brand: "kumyoung" }); + + if (!response || response.length === 0 || !Array.isArray(response)) { + resultsLog.failed.push({ song, error: "there is no kumyoung song" }); + return null; + } + // 가수 일치하거나 비슷한지 조회 - if (response && response.length > 0) { + console.log("금영 title 일치 개수 ", response.length, "개"); + + // console.log(response); + if (response && response.length > 1) { + // filter의 includes 만으로는 완벽 비교 불가. chatGPT API를 활용해야 할까...? const filteredResponse = response.filter((item) => { const artistName = item.singer.trim(); return artistName.includes(trimArtist); }); + console.log(filteredResponse); if (filteredResponse.length === 1) { const kyNum = filteredResponse[0].no; - await updateKYDB({ ...song, num_ky: kyNum }); - // return { ...song, num_ky: kyNum }; + // console.log("filteredResponse kyNum", kyNum); + const result = await updateKYDB({ ...song, num_ky: kyNum }); + if (result) { + resultsLog.success.push({ ...song, num_ky: kyNum }); + } else { + resultsLog.failed.push({ song, error: "supabase update failed" }); + } + } else { + console.log("필터링 실패"); + } + } else { + const kyNum = response[0].no; + // console.log("response kyNum", kyNum); + const result = await updateKYDB({ ...song, num_ky: kyNum }); + if (result) { + resultsLog.success.push({ ...song, num_ky: kyNum }); + } else { + resultsLog.failed.push({ song, error: "supabase update failed" }); } } - - return null; }; + +const kyNullData = await getKYNULLDB(); +console.log("kyNullData", kyNullData.length); + +for (const song of kyNullData) { + await updateKYByOpen(song); +} + +// 1차 시도 +// 6079개 업데이트 + +console.log(` + 총 ${kyNullData.length}곡 중: + - 성공: ${resultsLog.success.length}곡 + - 실패: ${resultsLog.failed.length}곡 + `); + +logUnknownData(resultsLog.success, "log/findKYByOpenSuccess.txt"); +logUnknownData(resultsLog.failed, "log/findKYByOpenSuccess.txt"); diff --git a/packages/crawling/src/logData.ts b/packages/crawling/src/logData.ts index 2f066c5..89a5d85 100644 --- a/packages/crawling/src/logData.ts +++ b/packages/crawling/src/logData.ts @@ -1,8 +1,8 @@ import fs from "fs"; import path from "path"; -export function logUnknownData(unknownData: T[], filename: string) { - if (unknownData.length === 0) return; +export function logUnknownData(unknownData: T[] | T, filename: string) { + if (!unknownData) return; const now = new Date(); const timeString = now.toLocaleString("ko-KR", { timeZone: "Asia/Seoul", @@ -17,11 +17,19 @@ export function logUnknownData(unknownData: T[], filename: string) { const logPath = path.join(filename); - // 로그 문자열 생성 - const logString = - `\n[${timeString}]\n` + - unknownData.map((item) => JSON.stringify(item)).join("\n") + - "\n"; + if (unknownData instanceof Array) { + // 로그 문자열 생성 + const logString = + `\n[${timeString}]\n` + + unknownData.map((item) => JSON.stringify(item)).join("\n") + + "\n"; - fs.appendFileSync(logPath, logString, "utf-8"); + fs.appendFileSync(logPath, logString, "utf-8"); + } else { + // 로그 문자열 생성 + const logString = + `\n[${timeString}]\n` + JSON.stringify(unknownData) + "\n"; + + fs.appendFileSync(logPath, logString, "utf-8"); + } }