land_crawler/index.js

241 lines
7.0 KiB
JavaScript

const puppeteer = require("puppeteer");
const fs = require("fs");
const Axios = require("axios");
const dirPath = "./articles";
const configFileName = "config.json";
const configPath = __dirname + "/" + configFileName;
let config = {};
const COMPLEX_IDS = [
{ id: 3286, name: "한가람세경", target: ["511동"] },
{ id: 1464, name: "한가람신라", target: ["407동", "408동"] },
{ id: 8775, name: "초원세경", target: [] },
{ id: 3022, name: "목련3단지", target: [] },
];
const FILTERS = {
minPrice: 500000000,
maxPrice: 650000000,
keywords: ["입주", "급매"],
minSupplySpace: 48,
};
async function captureComplexArticles(complex) {
const { id, name, target } = complex;
console.log(`\n🏢 [${name}] (${id}) 데이터 수집 시작`);
const browser = await puppeteer.launch({
headless: true,
args: [
"--no-sandbox",
"--disable-setuid-sandbox",
"--disable-dev-shm-usage",
"--disable-blink-features=AutomationControlled",
],
});
const page = await browser.newPage();
await page.setUserAgent(
"Mozilla/5.0 (iPhone; CPU iPhone OS 16_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.0 Mobile/15E148 Safari/604.1"
);
const responses = [];
page.on("response", async (response) => {
const url = response.url();
const status = response.status();
if (
url.includes("front-api/v1/complex/article/list") &&
response.headers()["content-type"]?.includes("application/json")
) {
try {
const data = await response.json();
responses.push({ url, status, headers: response.headers(), data });
console.log("📡 API 응답 감지:", url);
} catch (e) {}
}
});
const targetUrl = `https://fin.land.naver.com/complexes/${id}?tab=article`;
await page.goto(targetUrl, { waitUntil: "networkidle2" });
await sleep(2000);
// 스크롤 및 로드 감시
const maxScrolls = 10;
let prevCount = 0;
let noNewData = 0;
for (let i = 0; i < maxScrolls; i++) {
console.log(`🔄 스크롤 ${i + 1}/${maxScrolls}`);
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
await sleep(2000);
const currentCount = responses.length;
if (currentCount === prevCount) {
noNewData++;
console.log(` ⚠️ 새 데이터 없음 (${noNewData}회)`);
if (noNewData >= 2) break;
} else {
noNewData = 0;
prevCount = currentCount;
}
}
// 데이터 합치기
const allArticles = [];
responses.forEach((res, i) => {
const list = res.data.result?.list || [];
console.log(`응답 ${i + 1}: ${list.length}개 매물`);
allArticles.push(...list);
});
console.log(`✅ [${name}] 총 ${allArticles.length}개 매물 수집 완료`);
console.log(target);
const filteredArticles = allArticles.filter((item) => {
const dealPrice = item.representativeArticleInfo.priceInfo.dealPrice;
const supplySpace = item.representativeArticleInfo.spaceInfo.supplySpace;
const description =
item.representativeArticleInfo.articleDetail.articleFeatureDescription ??
"";
let duplicatedDescription = [];
const dongName = item.representativeArticleInfo.dongName;
if (item.duplicatedArticleInfo !== undefined) {
duplicatedDescription = item.duplicatedArticleInfo.articleInfoList.map(
(al) => al.articleDetail.articleFeatureDescription
);
}
const descriptionIsPass = FILTERS.keywords.some((keyword) => {
return duplicatedDescription.some((desc) => {
if (desc === undefined) {
return;
}
return desc.includes(keyword);
});
});
const mainDescriptionHasKeyword = FILTERS.keywords.some((keyword) =>
description.includes(keyword)
);
console.log(
dealPrice,
description,
duplicatedDescription,
descriptionIsPass
);
const useTarget = target.length === 0 ? true : target.includes(dongName);
return (
dealPrice > FILTERS.minPrice &&
dealPrice <= FILTERS.maxPrice &&
(mainDescriptionHasKeyword || descriptionIsPass) &&
supplySpace >= FILTERS.minSupplySpace &&
useTarget
);
});
if (!fs.existsSync(dirPath)) {
fs.mkdirSync(dirPath, { recursive: true });
console.log(`📁 폴더 생성 완료: ${dirPath}`);
}
// 단지별로 저장
const filename = `${dirPath}/${id}.json`;
fs.writeFileSync(
filename,
JSON.stringify(filteredArticles, null, 2),
"utf-8"
);
console.log(`💾 저장 완료: ${filename}`);
await browser.close();
return filteredArticles;
}
async function main() {
const allComplexData = [];
for (const complex of COMPLEX_IDS) {
const articles = await captureComplexArticles(complex);
allComplexData.push({
complexId: complex.id,
complexName: complex.name,
articles,
});
console.log(`\n⏸ 다음 단지로 이동 전 대기 중...`);
await sleep(3000);
}
const notifyData = allComplexData.map((complex) => {
return {
complexName: complex.complexName,
quantity: complex.articles.length,
articles: complex.articles.map((article) => {
return {
dongName: article.representativeArticleInfo.dongName,
floorInfo: article.representativeArticleInfo.articleDetail.floorInfo,
dealPrice: article.representativeArticleInfo.priceInfo.dealPrice,
supplySpace: article.representativeArticleInfo.spaceInfo.supplySpace,
description:
article.representativeArticleInfo.articleDetail
.articleFeatureDescription,
// url: `https://fin.land.naver.com/complexes/${complex.complexId}/articles/${article.representativeArticleInfo.articleNo}`,
};
}),
};
});
const ntfyMessage = [
"🏢 단지별 매물 알림",
"========================",
...notifyData.map((complex) => {
const header = `🔹 ${complex.complexName} (${complex.quantity}개 매물)`;
const articles = complex.articles.map((article, i) => {
const price = Number(article.dealPrice).toLocaleString();
return ` ${i + 1}. ${article.dongName} ${
article.floorInfo || "층수미상"
}\n 📏 ${article.supplySpace}㎡ | 💰 ${price}\n 📝 ${
article.description || ""
}`;
});
return [header, ...articles].join("\n");
}),
];
if (!fs.existsSync(configPath)) {
throw new Error("config.json 파일이 존재하지 않습니다.");
}
config = JSON.parse(fs.readFileSync(configPath, "utf-8"));
await Axios.post("https://ntfy.horoli.kr/land", ntfyMessage.join("\n"), {
headers: {
"Content-Type": "text/plain; charset=utf-8",
Authorization: `Bearer ${config.ntfyToken}`,
},
});
console.log(notifyData);
// 전체 통합 파일 저장
fs.writeFileSync(
`${dirPath}/all_complexes.json`,
JSON.stringify(allComplexData, null, 2),
"utf-8"
);
console.log("\n🏁 모든 단지 데이터 수집 완료!");
}
function sleep(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
main();