Compare commits

...

4 Commits

Author SHA1 Message Date
Release Bot 8ee5cc3861 1.4.1
Release / release (push) Successful in 5s
Build and deploy / build-and-push (push) Successful in 23s
Build and deploy / deploy (push) Successful in 8s
2026-06-01 07:04:42 +00:00
shcizo 2561ee12ef Merge pull request 'fix: parse latest tournament from recent-events list on player page (#24)' (#25) from fix/parse-recent-events-tournament-24 into main
Release / release (push) Successful in 25s
2026-06-01 09:04:13 +02:00
Samuel Enocsson 0d2f0fa3a8 fix: skip recent-events tournament when extracted date predates afterDate (#24) 2026-06-01 08:57:51 +02:00
Samuel Enocsson ec3ae872da fix: parse latest tournament from recent-events list on player page (#24) 2026-06-01 08:53:12 +02:00
3 changed files with 105 additions and 32 deletions
+2 -2
View File
@@ -1,12 +1,12 @@
{ {
"name": "pdga-ratings", "name": "pdga-ratings",
"version": "1.4.0", "version": "1.4.1",
"lockfileVersion": 3, "lockfileVersion": 3,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "pdga-ratings", "name": "pdga-ratings",
"version": "1.4.0", "version": "1.4.1",
"dependencies": { "dependencies": {
"ejs": "^4.0.1", "ejs": "^4.0.1",
"express": "^4.18.2", "express": "^4.18.2",
+1 -1
View File
@@ -1,6 +1,6 @@
{ {
"name": "pdga-ratings", "name": "pdga-ratings",
"version": "1.4.0", "version": "1.4.1",
"description": "PDGA rating scraper and display", "description": "PDGA rating scraper and display",
"main": "server.js", "main": "server.js",
"scripts": { "scripts": {
+102 -29
View File
@@ -156,81 +156,154 @@ async function getNewTournamentRounds(browser, pdgaNumber, afterDate) {
logger.info(`Looking for tournaments after ${afterDate.toDateString()}...`); logger.info(`Looking for tournaments after ${afterDate.toDateString()}...`);
const newTournamentUrls = await page.evaluate((afterTimestamp) => { const { urls: newTournamentUrls, counts } = await page.evaluate((afterTimestamp) => {
const afterDate = new Date(afterTimestamp); const afterDate = new Date(afterTimestamp);
const tables = document.querySelectorAll('table[id*="player-results"]'); const tables = document.querySelectorAll('table[id*="player-results"]');
const urls = []; const urls = [];
const seenUrls = new Set();
tables.forEach(table => { let table = 0;
const rows = table.querySelectorAll('tbody tr'); let recentEvents = 0;
let recentEventsAnchorsSeen = 0;
let recentEventsSkippedDuplicates = 0;
tables.forEach(tbl => {
const rows = tbl.querySelectorAll('tbody tr');
rows.forEach(row => { rows.forEach(row => {
const dateCell = row.querySelector('.dates'); const dateCell = row.querySelector('.dates');
const tournamentCell = row.querySelector('.tournament a'); const tournamentCell = row.querySelector('.tournament a');
if (dateCell && tournamentCell) { if (dateCell && tournamentCell) {
const dateText = dateCell.innerText.trim(); const dateText = dateCell.innerText.trim();
const dateMatch = dateText.match(/\d{1,2}-[A-Za-z]{3}-\d{4}/); const dateMatch = dateText.match(/\d{1,2}-[A-Za-z]{3}-\d{4}/);
if (dateMatch) { if (dateMatch) {
const dateStr = dateMatch[0]; const dateStr = dateMatch[0];
const date = new Date(dateStr); const date = new Date(dateStr);
if (date > afterDate) { if (date > afterDate) {
const href = tournamentCell.getAttribute('href'); const href = tournamentCell.getAttribute('href');
if (href) { if (href) {
urls.push({ const absoluteUrl = new URL(href, location.origin).href;
url: `https://www.pdga.com${href}`, if (!seenUrls.has(absoluteUrl)) {
date: dateStr, seenUrls.add(absoluteUrl);
name: tournamentCell.innerText.trim() urls.push({
}); url: absoluteUrl,
date: dateStr,
name: tournamentCell.innerText.trim(),
source: 'table'
});
table++;
}
} }
} }
} }
} }
}); });
}); });
return urls; const recentAnchors = document.querySelectorAll('.recent-events a[href*="/tour/event/"]');
recentAnchors.forEach(anchor => {
recentEventsAnchorsSeen++;
const href = anchor.getAttribute('href');
if (href) {
const absoluteUrl = new URL(href, location.origin).href;
if (seenUrls.has(absoluteUrl)) {
recentEventsSkippedDuplicates++;
} else {
seenUrls.add(absoluteUrl);
urls.push({
url: absoluteUrl,
date: null,
name: anchor.innerText.trim() || 'Recent event',
source: 'recent-events'
});
recentEvents++;
}
}
});
return { urls, counts: { table, recentEvents, recentEventsAnchorsSeen, recentEventsSkippedDuplicates } };
}, afterDate.getTime()); }, afterDate.getTime());
logger.info(`Found ${newTournamentUrls.length} new tournaments after ${afterDate.toDateString()}`); logger.info({
pdgaNumber,
afterDate: afterDate.toISOString(),
tableMatches: counts.table,
recentEventsMatches: counts.recentEvents,
recentEventsAnchorsSeen: counts.recentEventsAnchorsSeen,
recentEventsSkippedDuplicates: counts.recentEventsSkippedDuplicates,
totalUrlsToScrape: newTournamentUrls.length
}, 'new tournament URL discovery completed');
for (const tournamentData of newTournamentUrls) { for (const tournamentData of newTournamentUrls) {
try { try {
logger.info(`Scraping new tournament: ${tournamentData.name} (${tournamentData.date})`); if (tournamentData.source === 'recent-events') {
logger.debug({ pdgaNumber, url: tournamentData.url }, 'recent-events: scraping tournament');
} else {
logger.info(`Scraping new tournament: ${tournamentData.name} (${tournamentData.date})`);
}
await page.goto(tournamentData.url, { waitUntil: 'domcontentloaded', timeout: 30000 }); await page.goto(tournamentData.url, { waitUntil: 'domcontentloaded', timeout: 30000 });
await new Promise(r => setTimeout(r, 500)); await new Promise(r => setTimeout(r, 500));
let parsedDate;
if (tournamentData.date !== null) {
parsedDate = parseDate(tournamentData.date);
} else {
const eventDateStr = await page.evaluate(() => {
const body = document.body ? document.body.innerText : '';
const m = body.match(/\d{1,2}\s+to\s+\d{1,2}-[A-Za-z]{3}-\d{4}/)
|| body.match(/\d{1,2}-[A-Za-z]{3}-\d{4}/);
return m ? m[0] : null;
});
if (eventDateStr) {
parsedDate = parseDate(eventDateStr);
if (!(parsedDate > afterDate)) {
logger.warn({
pdgaNumber,
url: tournamentData.url,
eventDateStr,
parsedDate: parsedDate ? parsedDate.toISOString() : null,
afterDate: afterDate.toISOString()
}, 'recent-events: extracted event date is not newer than afterDate, likely captured a non-tournament date — skipping');
continue;
}
logger.debug({ pdgaNumber, url: tournamentData.url, eventDateStr }, 'recent-events: extracted date from event page');
} else {
logger.warn({ pdgaNumber, url: tournamentData.url }, 'recent-events: could not extract date from event page, skipping tournament');
continue;
}
}
const roundRatings = await page.evaluate((pdgaNum) => { const roundRatings = await page.evaluate((pdgaNum) => {
const rows = document.querySelectorAll('tr'); const rows = document.querySelectorAll('tr');
for (const row of rows) { for (const row of rows) {
const cells = row.querySelectorAll('td'); const cells = row.querySelectorAll('td');
const hasPlayerNumber = Array.from(cells).some(cell => const hasPlayerNumber = Array.from(cells).some(cell =>
cell.innerText && cell.innerText.includes(pdgaNum.toString()) cell.innerText && cell.innerText.includes(pdgaNum.toString())
); );
if (hasPlayerNumber) { if (hasPlayerNumber) {
const roundRatingCells = row.querySelectorAll('td.round-rating'); const roundRatingCells = row.querySelectorAll('td.round-rating');
const ratings = []; const ratings = [];
roundRatingCells.forEach(cell => { roundRatingCells.forEach(cell => {
const rating = parseInt(cell.innerText.trim()); const rating = parseInt(cell.innerText.trim());
if (!isNaN(rating) && rating > 0) { if (!isNaN(rating) && rating > 0) {
ratings.push(rating); ratings.push(rating);
} }
}); });
return ratings; return ratings;
} }
} }
return []; return [];
}, pdgaNumber); }, pdgaNumber);
if (roundRatings.length > 0) { if (roundRatings.length > 0) {
const parsedDate = parseDate(tournamentData.date);
roundRatings.forEach(rating => { roundRatings.forEach(rating => {
newRounds.push({ newRounds.push({
rating, rating,
@@ -238,10 +311,10 @@ async function getNewTournamentRounds(browser, pdgaNumber, afterDate) {
competition: tournamentData.name competition: tournamentData.name
}); });
}); });
logger.info(`Found ${roundRatings.length} round ratings for ${tournamentData.name}`); logger.info(`Found ${roundRatings.length} round ratings for ${tournamentData.name}`);
} }
} catch (error) { } catch (error) {
logger.error(`Error scraping tournament ${tournamentData.name}: ${error.message}`); logger.error(`Error scraping tournament ${tournamentData.name}: ${error.message}`);
} }