Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -39,5 +39,5 @@ yarn-error.log*
*.pem

# Crawling
**/logs/*.txt
**/log/*.txt

9 changes: 8 additions & 1 deletion apps/web/next.config.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
import withBundleAnalyzer from '@next/bundle-analyzer';
import type { NextConfig } from 'next';

const nextConfig: NextConfig = {
/* config options here */
};

export default nextConfig;
const withBundle = withBundleAnalyzer({
// enabled: process.env.ANALYZE === 'true',
enabled: true,
openAnalyzer: true,
});

export default withBundle(nextConfig);
1 change: 1 addition & 0 deletions apps/web/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
"@dnd-kit/modifiers": "^9.0.0",
"@dnd-kit/sortable": "^10.0.0",
"@dnd-kit/utilities": "^3.2.2",
"@next/bundle-analyzer": "^15.3.2",
"@radix-ui/react-checkbox": "^1.1.5",
"@radix-ui/react-dialog": "^1.1.6",
"@radix-ui/react-dropdown-menu": "^2.1.6",
Expand Down
6 changes: 6 additions & 0 deletions packages/crawling/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@
"exports": {
".": "./src/index.js"
},
"scripts": {
"dev-ky-open": "tsx src/findKYByOpen.ts",
"dev-ky-youtube": "tsx src/crawlYoutube.ts",
"lint": "eslint . --ext .ts,.js",
"test": "vitest run"
},
"dependencies": {
"@repo/open-api": "workspace:*",
"@supabase/supabase-js": "^2.49.1",
Expand Down
79 changes: 62 additions & 17 deletions packages/crawling/src/crawlYoutube.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,57 +5,102 @@ import { Song } from "./types";
import { updateDataLog } from "./logData";
import { updateKYDB } from "./supabase/updateDB";

const stackData: Song[] = [];
const totalData: Song[] = [];

// process.on("SIGINT", async () => {
// console.log("ํ”„๋กœ์„ธ์Šค๊ฐ€ ์ข…๋ฃŒ๋ฉ๋‹ˆ๋‹ค. ์ง€๊ธˆ๊นŒ์ง€์˜ ๋ฐ์ดํ„ฐ๋ฅผ ์—…๋ฐ์ดํŠธ ์ค‘...");
// console.log("stackData : ", stackData.length);
// const result = await updateKYDB(stackData);

// console.log(result);
// console.log("ํ”„๋กœ์„ธ์Šค๊ฐ€ ์ข…๋ฃŒ๋ฉ๋‹ˆ๋‹ค. ๋กœ๊ทธ๋ฅผ ๊ธฐ๋ก ์ค‘...");

// await Promise.all([
// updateDataLog(totalData, "log/crawlYodutubeSuccess.txt"),
// updateDataLog(failedCase, "log/crawlYoutubeFailed.txt"),
// ]);

// console.log("๋กœ๊ทธ ๊ธฐ๋ก ์™„๋ฃŒ.");
// });

const browser = await puppeteer.launch();
const page = await browser.newPage();

const baseUrl = "https://www.youtube.com/@KARAOKEKY/search";

async function scrapeSongNumber(query: string) {
const scrapeSongNumber = async (query: string) => {
const searchUrl = `${baseUrl}?query=${encodeURIComponent(query)}`;
await page.goto(searchUrl);

// page.goto์˜ waitUntil ๋ฌธ์ œ์˜€์Œ!
await page.goto(searchUrl, {
waitUntil: "networkidle2",
timeout: 0,
});

const html = await page.content();
const $ = cheerio.load(html);

// id contents ์˜ ์ฒซ๋ฒˆ์งธ ytd-item-section-renderer ์ฐพ๊ธฐ
const firstItem = $("#contents ytd-item-section-renderer").first();
// const firstItem = $("#contents ytd-item-section-renderer").first();

const firstItem = $("ytd-video-renderer").first();

// yt-formatted-string ์ฐพ๊ธฐ
const title = firstItem.find("yt-formatted-string").first().text().trim();

const karaokeNumber = extractKaraokeNumber(title);

// await browser.close();

return karaokeNumber;
}
};

function extractKaraokeNumber(title: string) {
const extractKaraokeNumber = (title: string) => {
// KY. ์ฐพ๊ณ  ) ๊ฐ€ ์˜ฌ๋•Œ๊นŒ์ง€ ์ฐพ๊ธฐ
const matchResult = title.match(/KY\.\s*(\d{3,5})\)/);
const matchResult = title.match(/KY\.\s*(\d{2,5})\)/);
const karaokeNumber = matchResult ? matchResult[1] : null;
return karaokeNumber;
}
};

const refreshData = async () => {
console.log("refreshData");
const result = await updateKYDB(stackData);

updateDataLog(result.success, "log/crawlYoutubeSuccess.txt");
updateDataLog(result.failed, "log/crawlYoutubeFailed.txt");

stackData.length = 0; // stackData ์ดˆ๊ธฐํ™”
};
// ์‚ฌ์šฉ

const data = await getKYNULLDB();
console.log("getKYNULLDB : ", data);
const resultData: Song[] = [];
console.log("getKYNULLDB : ", data.length);
let index = 0;

for (const song of data) {
const query = song.title + " - " + song.artist;
if (stackData.length > 100) {
refreshData();
}
const query = song.title + "-" + song.artist;
console.log(song.title, " - ", song.artist);
const result = await scrapeSongNumber(query);
if (result) {
resultData.push({ ...song, num_ky: result });
console.log("success : ", result);
stackData.push({ ...song, num_ky: result });
totalData.push({ ...song, num_ky: result });
}
index++;
console.log("scrapeSongNumber : ", index);
console.log("stackData : ", stackData.length);
}

console.log("resultData : ", resultData.length);
const result = await updateKYDB(resultData);

console.log(result);
console.log("totalData : ", totalData.length);
// const result = await updateKYDB(totalData);
const result = await updateKYDB(stackData);

updateDataLog(result.success, "log/crawlYoutubeSuccess.txt");
updateDataLog(result.failed, "log/crawlYoutubeFailed.txt");

// 5.13 1์ฐจ ์‹œ๋„
// 5000๊ฐœ ์ค‘ 3507๊ฐœ ์„ฑ๊ณต, ์ด 18906๊ฐœ ๋“ฑ๋ก

// 5.13 2์ฐจ ์‹œ๋„
2 changes: 1 addition & 1 deletion packages/crawling/src/findKYByOpen.ts
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ for (const song of kyNullData) {
// 6079๊ฐœ ์—…๋ฐ์ดํŠธ

// 2์ฐจ ์‹œ๋„
// 15065๊ฐœ ์—…๋ฐ์ดํŠธ, ์ œ๋ชฉ ๊ฐ€์ˆ˜ ์ด๋ฆ„ ์ผ์น˜ ์ด์Šˆ
// 15065๊ฐœ ์—…๋ฐ์ดํŠธ, ์ œ๋ชฉ ๊ฐ€์ˆ˜ ์ด๋ฆ„ ๋ถˆ์ผ์น˜ ์ด์Šˆ

console.log(`
์ด ${kyNullData.length}๊ณก ์ค‘:
Expand Down
6 changes: 6 additions & 0 deletions packages/crawling/src/logData.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@ export function updateDataLog<T>(unknownData: T[] | T, filename: string) {
});

const logPath = path.join(filename);
const logDir = path.dirname(logPath); // ๋””๋ ‰ํ„ฐ๋ฆฌ ๊ฒฝ๋กœ ์ถ”์ถœ

// ๋””๋ ‰ํ„ฐ๋ฆฌ๊ฐ€ ์—†์œผ๋ฉด ์ƒ์„ฑ
if (!fs.existsSync(logDir)) {
fs.mkdirSync(logDir, { recursive: true });
}

if (unknownData instanceof Array) {
// ๋กœ๊ทธ ๋ฌธ์ž์—ด ์ƒ์„ฑ
Expand Down
27 changes: 18 additions & 9 deletions packages/crawling/src/supabase/getDB.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,26 +32,35 @@ export async function getJapaneseDB() {
return hasJapaneseData;
}

export async function getKYNULLDB() {
export async function getKYNULLDB(max: number = 50000) {
const supabase = getClient();

// artist ์ •๋ ฌ
// const { data, error } = await supabase
// .from("songs")
// .select("id, title, artist, num_tj, num_ky")
// .order("title", { ascending: true });

const { data, error } = await supabase
.from("songs")
.select("id, title, artist, num_tj, num_ky")
.order("title", { ascending: true });
.is("num_ky", null) // num_ky๊ฐ€ null์ธ ๋ฐ์ดํ„ฐ๋งŒ ๊ฐ€์ ธ์˜ด
.order("title", { ascending: false })
.limit(max); // Supabase ์ฟผ๋ฆฌ ์•ˆ์—์„œ์˜ ํ•œ๊ณ„๋ฅผ ๋„˜์„ ์ˆ˜๋Š” ์—†์Œ

if (error) throw error;

console.log("data", data.length);

const isKYNULLData: Song[] = [];
return data;

data.forEach((song) => {
if (song.num_ky === null) {
isKYNULLData.push(song);
}
});
// const isKYNULLData: Song[] = [];

// data.forEach((song) => {
// if (song.num_ky === null) {
// isKYNULLData.push(song);
// }
// });

return isKYNULLData;
// return isKYNULLData.slice(0, max);
}
Loading