d567c4bca9
- Switch from Alpine to Debian slim for correct Chromium architecture
(fixes ARM/Apple Silicon support)
- Upgrade Puppeteer 21 to 24, use system Chromium via PUPPETEER_EXECUTABLE_PATH
- Replace removed page.waitForTimeout() with setTimeout
- Set NODE_ENV=production in Dockerfile to prevent pino-pretty import
- Improve error logging with Pino's { err: error } pattern
- Add build: . to docker-compose for local development builds
319 lines
10 KiB
JavaScript
319 lines
10 KiB
JavaScript
const { parseDate } = require('../services/rating-calculator');
|
|
const logger = require('../logger');
|
|
|
|
async function getOfficialRatingHistory(browser, pdgaNumber) {
|
|
const page = await browser.newPage();
|
|
let ratingHistory = [];
|
|
|
|
try {
|
|
const url = `https://www.pdga.com/player/${pdgaNumber}/history`;
|
|
await page.goto(url, { waitUntil: 'networkidle2', timeout: 45000 });
|
|
await new Promise(r => setTimeout(r, 1000));
|
|
|
|
ratingHistory = await page.evaluate(() => {
|
|
const history = [];
|
|
|
|
const selectors = [
|
|
'table tbody tr',
|
|
'table tr',
|
|
'.view-content tbody tr'
|
|
];
|
|
|
|
for (const selector of selectors) {
|
|
const rows = document.querySelectorAll(selector);
|
|
|
|
for (const row of rows) {
|
|
const cells = row.querySelectorAll('td');
|
|
if (cells.length >= 3) {
|
|
const dateText = cells[0]?.innerText?.trim();
|
|
const ratingText = cells[1]?.innerText?.trim();
|
|
|
|
if (dateText && ratingText && /^\d{4}-\d{2}-\d{2}$|^\d{1,2}-\w{3}-\d{4}$|^\w{3} \d{1,2}, \d{4}$/.test(dateText)) {
|
|
const rating = parseInt(ratingText);
|
|
if (!isNaN(rating) && rating > 800 && rating < 1200) {
|
|
history.push({
|
|
date: dateText,
|
|
rating: rating,
|
|
tournament: cells[2]?.innerText?.trim() || 'Unknown'
|
|
});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (history.length > 0) break;
|
|
}
|
|
|
|
return history;
|
|
});
|
|
|
|
} catch (error) {
|
|
logger.error('Error fetching official rating history: ' + error.message);
|
|
} finally {
|
|
await page.close();
|
|
}
|
|
|
|
return ratingHistory;
|
|
}
|
|
|
|
async function getPlayerTournamentDetails(browser, pdgaNumber) {
|
|
const page = await browser.newPage();
|
|
let tournamentRounds = [];
|
|
|
|
try {
|
|
const url = `https://www.pdga.com/player/${pdgaNumber}/details`;
|
|
await page.goto(url, { waitUntil: 'networkidle2', timeout: 45000 });
|
|
await new Promise(r => setTimeout(r, 1000));
|
|
|
|
tournamentRounds = await page.evaluate(() => {
|
|
const rounds = [];
|
|
const rows = document.querySelectorAll('table tbody tr');
|
|
|
|
rows.forEach(row => {
|
|
const cells = row.querySelectorAll('td');
|
|
|
|
if (cells.length >= 4) {
|
|
const cellTexts = Array.from(cells).map(cell => cell.innerText.trim());
|
|
|
|
let tournamentName = '';
|
|
let dateText = '';
|
|
let rating = 0;
|
|
let division = '';
|
|
|
|
cellTexts.forEach((text, index) => {
|
|
if (/\d{1,2}(-\w{3})?(\s+to\s+)\d{1,2}-\w{3}-\d{4}/.test(text) || /\d{1,2}-\w{3}-\d{4}/.test(text)) {
|
|
dateText = text;
|
|
}
|
|
if (/^\d{3,4}$/.test(text) && parseInt(text) >= 800 && parseInt(text) <= 1200) {
|
|
rating = parseInt(text);
|
|
}
|
|
if (/^M[A-Z]\d*$|^F[A-Z]\d*$/.test(text)) {
|
|
division = text;
|
|
}
|
|
if (index === 0) {
|
|
tournamentName = text;
|
|
}
|
|
});
|
|
|
|
if (tournamentName && dateText && rating > 0) {
|
|
rounds.push({
|
|
tournament: tournamentName,
|
|
dateText: dateText,
|
|
rating: rating,
|
|
division: division,
|
|
competition: `${tournamentName} (${division})`
|
|
});
|
|
}
|
|
}
|
|
});
|
|
|
|
return rounds;
|
|
});
|
|
|
|
const fixedRounds = tournamentRounds.map(round => {
|
|
let validDate = new Date();
|
|
if (round.dateText) {
|
|
try {
|
|
const pdgaParsed = parseDate(round.dateText);
|
|
if (pdgaParsed instanceof Date && !isNaN(pdgaParsed.getTime())) {
|
|
validDate = pdgaParsed;
|
|
} else {
|
|
const nativeParsed = new Date(round.dateText);
|
|
if (!isNaN(nativeParsed.getTime())) {
|
|
validDate = nativeParsed;
|
|
}
|
|
}
|
|
} catch (e) {
|
|
logger.info(`Date parsing failed for "${round.dateText}": ${e.message}`);
|
|
}
|
|
}
|
|
return {
|
|
tournament: round.tournament,
|
|
date: validDate,
|
|
rating: round.rating,
|
|
division: round.division,
|
|
competition: round.competition
|
|
};
|
|
});
|
|
tournamentRounds = fixedRounds;
|
|
|
|
} catch (error) {
|
|
logger.error('Error fetching tournament details: ' + error.message);
|
|
} finally {
|
|
await page.close();
|
|
}
|
|
|
|
return tournamentRounds;
|
|
}
|
|
|
|
async function getNewTournamentRounds(browser, pdgaNumber, afterDate) {
|
|
const page = await browser.newPage();
|
|
let newRounds = [];
|
|
|
|
try {
|
|
const url = `https://www.pdga.com/player/${pdgaNumber}`;
|
|
await page.goto(url, { waitUntil: 'networkidle2' });
|
|
|
|
logger.info(`Looking for tournaments after ${afterDate.toDateString()}...`);
|
|
|
|
const newTournamentUrls = await page.evaluate((afterTimestamp) => {
|
|
const afterDate = new Date(afterTimestamp);
|
|
const tables = document.querySelectorAll('table[id*="player-results"]');
|
|
const urls = [];
|
|
|
|
tables.forEach(table => {
|
|
const rows = table.querySelectorAll('tbody tr');
|
|
rows.forEach(row => {
|
|
const dateCell = row.querySelector('.dates');
|
|
const tournamentCell = row.querySelector('.tournament a');
|
|
|
|
if (dateCell && tournamentCell) {
|
|
const dateText = dateCell.innerText.trim();
|
|
const dateMatch = dateText.match(/\d{1,2}-[A-Za-z]{3}-\d{4}/);
|
|
|
|
if (dateMatch) {
|
|
const dateStr = dateMatch[0];
|
|
const date = new Date(dateStr);
|
|
|
|
if (date > afterDate) {
|
|
const href = tournamentCell.getAttribute('href');
|
|
if (href) {
|
|
urls.push({
|
|
url: `https://www.pdga.com${href}`,
|
|
date: dateStr,
|
|
name: tournamentCell.innerText.trim()
|
|
});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
});
|
|
});
|
|
|
|
return urls;
|
|
}, afterDate.getTime());
|
|
|
|
logger.info(`Found ${newTournamentUrls.length} new tournaments after ${afterDate.toDateString()}`);
|
|
|
|
for (const tournamentData of newTournamentUrls) {
|
|
try {
|
|
logger.info(`Scraping new tournament: ${tournamentData.name} (${tournamentData.date})`);
|
|
|
|
await page.goto(tournamentData.url, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
|
await new Promise(r => setTimeout(r, 500));
|
|
|
|
const roundRatings = await page.evaluate((pdgaNum) => {
|
|
const rows = document.querySelectorAll('tr');
|
|
|
|
for (const row of rows) {
|
|
const cells = row.querySelectorAll('td');
|
|
const hasPlayerNumber = Array.from(cells).some(cell =>
|
|
cell.innerText && cell.innerText.includes(pdgaNum.toString())
|
|
);
|
|
|
|
if (hasPlayerNumber) {
|
|
const roundRatingCells = row.querySelectorAll('td.round-rating');
|
|
const ratings = [];
|
|
|
|
roundRatingCells.forEach(cell => {
|
|
const rating = parseInt(cell.innerText.trim());
|
|
if (!isNaN(rating) && rating > 0) {
|
|
ratings.push(rating);
|
|
}
|
|
});
|
|
|
|
return ratings;
|
|
}
|
|
}
|
|
|
|
return [];
|
|
}, pdgaNumber);
|
|
|
|
if (roundRatings.length > 0) {
|
|
const parsedDate = parseDate(tournamentData.date);
|
|
roundRatings.forEach(rating => {
|
|
newRounds.push({
|
|
rating,
|
|
date: parsedDate,
|
|
competition: tournamentData.name
|
|
});
|
|
});
|
|
|
|
logger.info(`Found ${roundRatings.length} round ratings for ${tournamentData.name}`);
|
|
}
|
|
|
|
} catch (error) {
|
|
logger.error(`Error scraping tournament ${tournamentData.name}: ${error.message}`);
|
|
}
|
|
}
|
|
|
|
} catch (error) {
|
|
logger.error(`Error getting new tournament rounds for PDGA ${pdgaNumber}: ${error.message}`);
|
|
} finally {
|
|
await page.close();
|
|
}
|
|
|
|
return newRounds;
|
|
}
|
|
|
|
async function getOptimizedPlayerRounds(browser, pdgaNumber) {
|
|
logger.info(`Optimized Round Collection for PDGA ${pdgaNumber}`);
|
|
|
|
try {
|
|
logger.info('Getting official rating rounds from /details page...');
|
|
const officialRounds = await getPlayerTournamentDetails(browser, pdgaNumber);
|
|
|
|
if (officialRounds.length === 0) {
|
|
logger.info('No official rounds found in details page');
|
|
return [];
|
|
}
|
|
|
|
logger.info(`Found ${officialRounds.length} official rating rounds`);
|
|
|
|
const sortedRounds = officialRounds.sort((a, b) => b.date - a.date);
|
|
const latestOfficialDate = sortedRounds[0].date;
|
|
logger.info(`Latest official round: ${latestOfficialDate.toDateString()}`);
|
|
|
|
logger.info('Looking for new tournaments since latest official round...');
|
|
const newRounds = await getNewTournamentRounds(browser, pdgaNumber, latestOfficialDate);
|
|
|
|
if (newRounds.length > 0) {
|
|
logger.info(`Found ${newRounds.length} new round ratings`);
|
|
} else {
|
|
logger.info('No new tournaments found since latest official round');
|
|
}
|
|
|
|
const allRounds = [
|
|
...officialRounds.map(round => ({
|
|
rating: round.rating,
|
|
date: round.date,
|
|
competition: round.competition,
|
|
source: 'official'
|
|
})),
|
|
...newRounds.map(round => ({
|
|
rating: round.rating,
|
|
date: round.date,
|
|
competition: round.competition,
|
|
source: 'new'
|
|
}))
|
|
];
|
|
|
|
allRounds.sort((a, b) => a.date - b.date);
|
|
|
|
logger.info(`Summary: ${officialRounds.length} official + ${newRounds.length} new = ${allRounds.length} total rounds`);
|
|
|
|
return allRounds;
|
|
|
|
} catch (error) {
|
|
logger.error('Error in optimized round collection: ' + error.message);
|
|
return [];
|
|
}
|
|
}
|
|
|
|
module.exports = {
|
|
getOfficialRatingHistory,
|
|
getPlayerTournamentDetails,
|
|
getNewTournamentRounds,
|
|
getOptimizedPlayerRounds
|
|
};
|