Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ yarn-error.log*
*.pem

# Crawling
log/*
crawlYoutubeSuccess.txt
crawlYoutubeFailed.txt
postByReleaseSuccess.txt
Expand Down
6 changes: 6 additions & 0 deletions packages/crawling/log/findKYByOpenSuccess.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@

[2025. 05. 11. 00:06:18]


[2025. 05. 11. 00:06:18]

1 change: 1 addition & 0 deletions packages/crawling/src/crawlYoutube.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ async function scrapeSongNumber(query: string) {
const karaokeNumber = extractKaraokeNumber(title);

// await browser.close();

return karaokeNumber;
}

Expand Down
78 changes: 78 additions & 0 deletions packages/crawling/src/findKYByOpen.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import { getSong, getSinger } from "@repo/open-api";
import { Song } from "./types";
import { updateKYDB } from "./supabase/updateDB";
import { getKYNULLDB } from "./supabase/getDB";
import { logUnknownData } from "./logData";

const resultsLog = {
success: [] as Song[],
failed: [] as { song: Song; error: any }[],
};

const updateKYByOpen = async (song: Song) => {
const { title, artist } = song;
const trimTitle = title.trim();
const trimArtist = artist.trim();
// console.log(artist, "-", title);

const response = await getSong({ title: trimTitle, brand: "kumyoung" });

if (!response || response.length === 0 || !Array.isArray(response)) {
resultsLog.failed.push({ song, error: "there is no kumyoung song" });
return null;
}

// 가수 일치하거나 비슷한지 조회
console.log("금영 title 일치 개수 ", response.length, "개");

// console.log(response);
if (response && response.length > 1) {
// filter의 includes 만으로는 완벽 비교 불가. chatGPT API를 활용해야 할까...?
const filteredResponse = response.filter((item) => {
const artistName = item.singer.trim();
return artistName.includes(trimArtist);
});
console.log(filteredResponse);

if (filteredResponse.length === 1) {
const kyNum = filteredResponse[0].no;
// console.log("filteredResponse kyNum", kyNum);
const result = await updateKYDB({ ...song, num_ky: kyNum });
if (result) {
resultsLog.success.push({ ...song, num_ky: kyNum });
} else {
resultsLog.failed.push({ song, error: "supabase update failed" });
}
} else {
console.log("필터링 실패");
}
} else {
const kyNum = response[0].no;
// console.log("response kyNum", kyNum);
const result = await updateKYDB({ ...song, num_ky: kyNum });
if (result) {
resultsLog.success.push({ ...song, num_ky: kyNum });
} else {
resultsLog.failed.push({ song, error: "supabase update failed" });
}
}
};

const kyNullData = await getKYNULLDB();
console.log("kyNullData", kyNullData.length);

for (const song of kyNullData) {
await updateKYByOpen(song);
}

// 1차 시도
// 6079개 업데이트

console.log(`
총 ${kyNullData.length}곡 중:
- 성공: ${resultsLog.success.length}곡
- 실패: ${resultsLog.failed.length}곡
`);

logUnknownData(resultsLog.success, "log/findKYByOpenSuccess.txt");
logUnknownData(resultsLog.failed, "log/findKYByOpenSuccess.txt");
36 changes: 27 additions & 9 deletions packages/crawling/src/logData.ts
Original file line number Diff line number Diff line change
@@ -1,17 +1,35 @@
import fs from "fs";
import path from "path";

export function logUnknownData<T>(unknownData: T[], filename: string) {
if (unknownData.length === 0) return;
export function logUnknownData<T>(unknownData: T[] | T, filename: string) {
if (!unknownData) return;
const now = new Date();
const timeString = now.toISOString();
const timeString = now.toLocaleString("ko-KR", {
timeZone: "Asia/Seoul",
year: "numeric",
month: "2-digit",
day: "2-digit",
hour: "2-digit",
minute: "2-digit",
second: "2-digit",
hour12: false,
});

const logPath = path.join(filename);

// 로그 문자열 생성
const logString =
`\n[${timeString}]\n` +
unknownData.map((item) => JSON.stringify(item)).join("\n") +
"\n";
if (unknownData instanceof Array) {
// 로그 문자열 생성
const logString =
`\n[${timeString}]\n` +
unknownData.map((item) => JSON.stringify(item)).join("\n") +
"\n";

fs.appendFileSync(logPath, logString, "utf-8");
} else {
// 로그 문자열 생성
const logString =
`\n[${timeString}]\n` + JSON.stringify(unknownData) + "\n";

fs.appendFileSync(logPath, logString, "utf-8");
fs.appendFileSync(logPath, logString, "utf-8");
}
}
13 changes: 9 additions & 4 deletions packages/crawling/src/postByRelease.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { getRelease } from "@repo/open-api";
import { Song } from "./types";
import { postDB } from "./supabase/postDB";
import { logUnknownData } from "./logData";

const parseMonth = (month: number) => {
return month < 10 ? `0${month}` : month;
};
Expand All @@ -21,17 +22,21 @@ while (year <= 2025) {
release: `${year}${parseMonth(month)}`,
brand: "tj",
});
// console.log('response', response);
console.log("response", `${year}${parseMonth(month)}`, response?.length);
// console.log("response", response);
// console.log("response", `${year}${parseMonth(month)}`, response?.length);
response?.forEach((item) => {
const { title, singer, no } = item;
songs.push({ title, artist: singer, num_tj: no, num_ky: null });
const { title, singer, no, release } = item;
songs.push({ title, artist: singer, num_tj: no, num_ky: null, release });
});
month++;
}
year++;
}

console.log("songs", songs.length);

// TJ 2007~2025 38519곡

const result = await postDB(songs);

logUnknownData(result.success, "log/postByReleaseSuccess.txt");
Expand Down
4 changes: 3 additions & 1 deletion packages/crawling/src/supabase/getDB.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,12 @@ export async function getKYNULLDB() {
const { data, error } = await supabase
.from("songs")
.select("id, title, artist, num_tj, num_ky")
.order("title", { ascending: false });
.order("title", { ascending: true });

if (error) throw error;

console.log("data", data.length);

const isKYNULLData: Song[] = [];

data.forEach((song) => {
Expand Down
3 changes: 2 additions & 1 deletion packages/crawling/src/supabase/updateDB.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ import { Song, TransSong } from "../types";
export const updateJpnDB = async (song: TransSong) => {
const supabase = getClient();

if (song.isArtistJp || song.isTitleJp) {
// if (song.isArtistJp || song.isTitleJp) {
if (song.isTitleJp) {
const { data, error } = await supabase
.from("songs")
.update({ title: song.title, artist: song.artist })
Expand Down
17 changes: 7 additions & 10 deletions packages/crawling/src/transChatGPT.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,13 @@ class TranslationAssistant {
this.messages = [
{
role: "system",
content: `당신은 일본 음악 전문가입니다. 다음 규칙을 철저히 따르세요.
1. 주어진 일본어 아티스트/곡 이름의 한국어 공식 번역을 제공하세요
2. 응답은 다음 형식을 반드시 따를 것:
번역된 결과 (원문)
3. 다음 우선순위로 번역을 결정하세요:
- 공식 한국 발매 시 사용된 이름
- 한국 음악 사이트/미디어에서 통용되는 이름
- 팬덤에서 일반적으로 사용하는 이름
4. 만약 이미 번역된 형태라면 그대로 반환하되, 형식이 다르거나 어색하다면 형식에 맞게 반환할 것
5. 확실하지 않은 경우 빈 문자열을 반환`,
content: `You are a Japanese music translator. Follow these rules:
1. Translate song/artist names to Korean.
2. Format: Translation (Original)
3. Priority: Official KR release > Common Korean media name > Korean Fandom name
4. If already translated, reformat only.
5. If unsure, return an empty string.
`,
},
];
}
Expand Down
1 change: 1 addition & 0 deletions packages/crawling/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ export interface Song {
artist: string;
num_tj: string | null;
num_ky: string | null;
release?: string;
}

export interface TransSong extends Song {
Expand Down
18 changes: 9 additions & 9 deletions packages/crawling/src/updateJpnSongs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,15 @@ for (const song of data) {
newSong.title = titleTrans;
}
}
if (song.isArtistJp) {
const artistTrans = await transChatGPT(song.artist);
if (!artistTrans || artistTrans.length === 0) {
unknownData.push({ ...song, type: "artist" });
} else {
newSong.artist = artistTrans;
}
}
if (newSong.isTitleJp || newSong.isArtistJp) {
// if (song.isArtistJp) {
// const artistTrans = await transChatGPT(song.artist);
// if (!artistTrans || artistTrans.length === 0) {
// unknownData.push({ ...song, type: "artist" });
// } else {
// newSong.artist = artistTrans;
// }
// }
if (newSong.isTitleJp) {
transData.push(newSong);
}
}
Expand Down