Files
pdga-rating/server.js
T
Samuel Enocsson 765917d495 Add automatic database population from PDGA numbers file at startup
- Read pdga-numbers.txt at server startup and check for missing players
- Automatically scrape and populate any missing players into database
- Maintain respectful 2-second delays between PDGA requests
- Add comprehensive logging for population process
- Include new API endpoints for manual database population and status checking
- Ensure database is fully populated before server accepts requests

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-18 09:49:54 +02:00

2253 lines
77 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
const express = require('express');
const puppeteer = require('puppeteer');
const https = require('https');
const fs = require('fs');
const path = require('path');
const sqlite3 = require('sqlite3').verbose();
const app = express();
const PORT = 3000;
app.use(express.static('public'));
// Initialize SQLite database
const db = new sqlite3.Database('./ratings.db');
// Initialize database schema
function initializeDatabase() {
return new Promise((resolve, reject) => {
db.serialize(() => {
// Create players table
db.run(`
CREATE TABLE IF NOT EXISTS players (
id INTEGER PRIMARY KEY AUTOINCREMENT,
pdga_number INTEGER UNIQUE NOT NULL,
name TEXT NOT NULL,
current_rating INTEGER,
rating_change INTEGER,
last_updated DATETIME DEFAULT CURRENT_TIMESTAMP,
last_round_update DATETIME DEFAULT NULL
)
`);
// Migration: Add last_round_update column if it doesn't exist
db.get("PRAGMA table_info(players)", (err, info) => {
if (err) {
console.error('Error checking table schema:', err);
return;
}
// Check if column exists by querying table info
db.all("PRAGMA table_info(players)", (err, columns) => {
if (err) {
console.error('Error getting table info:', err);
return;
}
const hasLastRoundUpdate = columns.some(col => col.name === 'last_round_update');
if (!hasLastRoundUpdate) {
console.log('Adding last_round_update column to players table...');
db.run(`
ALTER TABLE players ADD COLUMN last_round_update DATETIME DEFAULT NULL
`, (err) => {
if (err) {
console.error('Error adding last_round_update column:', err.message);
} else {
console.log('Successfully added last_round_update column');
}
});
}
});
});
// Create round_history table
db.run(`
CREATE TABLE IF NOT EXISTS round_history (
id INTEGER PRIMARY KEY AUTOINCREMENT,
player_id INTEGER NOT NULL,
date DATE NOT NULL,
competition_name TEXT NOT NULL,
rating INTEGER NOT NULL,
FOREIGN KEY (player_id) REFERENCES players (id)
)
`);
// Create rating_history table
db.run(`
CREATE TABLE IF NOT EXISTS rating_history (
id INTEGER PRIMARY KEY AUTOINCREMENT,
player_id INTEGER NOT NULL,
date DATE NOT NULL,
rating INTEGER NOT NULL,
FOREIGN KEY (player_id) REFERENCES players (id)
)
`, (err) => {
if (err) {
reject(err);
} else {
console.log('Database initialized successfully');
resolve();
}
});
});
});
}
// Check and populate database from PDGA numbers file at startup
async function checkAndPopulateDatabase() {
try {
console.log('=== Checking database population against PDGA numbers file ===');
const pdgaNumbers = fs.readFileSync('pdga-numbers.txt', 'utf-8')
.split('\n')
.map(num => num.trim())
.filter(num => num);
console.log(`Found ${pdgaNumbers.length} PDGA numbers in file`);
const missingPlayers = [];
// Check which players are missing from database
for (const pdgaNumber of pdgaNumbers) {
const player = await getPlayerFromDB(pdgaNumber);
if (!player) {
missingPlayers.push(pdgaNumber);
}
}
if (missingPlayers.length === 0) {
console.log('✓ All players from PDGA numbers file are already in database');
return;
}
console.log(`Found ${missingPlayers.length} missing players: [${missingPlayers.join(', ')}]`);
console.log('=== Starting automatic population of missing players ===');
// Populate missing players
for (let i = 0; i < missingPlayers.length; i++) {
const pdgaNumber = missingPlayers[i];
console.log(`[${i + 1}/${missingPlayers.length}] Scraping missing player PDGA ${pdgaNumber}...`);
try {
const playerData = await scrapePDGARating(pdgaNumber);
console.log(`✓ Added PDGA ${pdgaNumber}: ${playerData.name}`);
// Delay between requests to be respectful to PDGA
if (i < missingPlayers.length - 1) {
console.log('Waiting 2s before next request...');
await new Promise(resolve => setTimeout(resolve, 2000));
}
} catch (error) {
console.error(`✗ Failed to add PDGA ${pdgaNumber}: ${error.message}`);
}
}
console.log('=== Database population complete ===');
} catch (error) {
console.error('Error during database population check:', error.message);
}
}
// Database helper functions
function getPlayerFromDB(pdgaNumber) {
return new Promise((resolve, reject) => {
db.get(
'SELECT * FROM players WHERE pdga_number = ?',
[pdgaNumber],
(err, row) => {
if (err) reject(err);
else resolve(row);
}
);
});
}
function savePlayerToDB(playerData) {
return new Promise((resolve, reject) => {
db.run(
`INSERT OR REPLACE INTO players (pdga_number, name, current_rating, rating_change, last_updated)
VALUES (?, ?, ?, ?, datetime('now'))`,
[playerData.pdgaNumber, playerData.name, playerData.rating, playerData.ratingChange],
function(err) {
if (err) reject(err);
else resolve(this.lastID);
}
);
});
}
function getRatingHistoryFromDB(pdgaNumber) {
return new Promise((resolve, reject) => {
db.get('SELECT id FROM players WHERE pdga_number = ?', [pdgaNumber], (err, player) => {
if (err) return reject(err);
if (!player) return resolve(null);
db.all(
'SELECT * FROM rating_history WHERE player_id = ? ORDER BY date ASC',
[player.id],
(err, rows) => {
if (err) reject(err);
else resolve(rows);
}
);
});
});
}
function saveRatingHistoryToDB(pdgaNumber, history) {
return new Promise((resolve, reject) => {
db.get('SELECT id FROM players WHERE pdga_number = ?', [pdgaNumber], (err, player) => {
if (err) return reject(err);
if (!player) return reject(new Error('Player not found'));
// Clear existing history for this player
db.run('DELETE FROM rating_history WHERE player_id = ?', [player.id], (err) => {
if (err) return reject(err);
// Insert new history
const stmt = db.prepare('INSERT INTO rating_history (player_id, date, rating) VALUES (?, ?, ?)');
for (const entry of history) {
stmt.run([player.id, entry.date, entry.rating]);
}
stmt.finalize((err) => {
if (err) reject(err);
else resolve();
});
});
});
});
}
function getRoundHistoryFromDB(pdgaNumber) {
return new Promise((resolve, reject) => {
db.get('SELECT id FROM players WHERE pdga_number = ?', [pdgaNumber], (err, player) => {
if (err) return reject(err);
if (!player) return resolve([]);
db.all(
'SELECT * FROM round_history WHERE player_id = ? ORDER BY date DESC',
[player.id],
(err, rows) => {
if (err) reject(err);
else resolve(rows);
}
);
});
});
}
function getLastRoundUpdateDate(pdgaNumber) {
return new Promise((resolve, reject) => {
db.get(
'SELECT last_round_update FROM players WHERE pdga_number = ?',
[pdgaNumber],
(err, row) => {
if (err) reject(err);
else resolve(row ? row.last_round_update : null);
}
);
});
}
function updateLastRoundUpdateDate(pdgaNumber) {
return new Promise((resolve, reject) => {
db.run(
'UPDATE players SET last_round_update = CURRENT_TIMESTAMP WHERE pdga_number = ?',
[pdgaNumber],
function(err) {
if (err) reject(err);
else resolve();
}
);
});
}
function saveRatingHistoryToDB(pdgaNumber, ratingHistory) {
return new Promise((resolve, reject) => {
db.get('SELECT id FROM players WHERE pdga_number = ?', [pdgaNumber], (err, player) => {
if (err) return reject(err);
if (!player) return reject(new Error('Player not found'));
// Clear existing rating history for this player
db.run('DELETE FROM rating_history WHERE player_id = ?', [player.id], (err) => {
if (err) return reject(err);
if (ratingHistory.length === 0) {
return resolve();
}
let completed = 0;
const total = ratingHistory.length;
ratingHistory.forEach(entry => {
const parsedDate = parseDate(entry.date);
db.run(
'INSERT INTO rating_history (player_id, date, rating) VALUES (?, ?, ?)',
[player.id, parsedDate.toISOString().split('T')[0], entry.rating],
(err) => {
if (err) return reject(err);
completed++;
if (completed === total) {
resolve();
}
}
);
});
});
});
});
}
function saveRoundHistoryToDB(pdgaNumber, roundData, isIncremental = false) {
return new Promise((resolve, reject) => {
db.get('SELECT id FROM players WHERE pdga_number = ?', [pdgaNumber], (err, player) => {
if (err) return reject(err);
if (!player) return reject(new Error('Player not found'));
const processRounds = () => {
if (roundData.length === 0) {
// Update last_round_update timestamp even if no new rounds
db.run('UPDATE players SET last_round_update = datetime("now") WHERE pdga_number = ?', [pdgaNumber], (err) => {
if (err) reject(err);
else resolve();
});
return;
}
// Insert new round history
const stmt = db.prepare('INSERT OR REPLACE INTO round_history (player_id, date, competition_name, rating) VALUES (?, ?, ?, ?)');
for (const round of roundData) {
stmt.run([player.id, round.date.toISOString().split('T')[0], round.competition || 'Unknown', round.rating]);
}
stmt.finalize((err) => {
if (err) {
reject(err);
} else {
// Update last_round_update timestamp
db.run('UPDATE players SET last_round_update = datetime("now") WHERE pdga_number = ?', [pdgaNumber], (updateErr) => {
if (updateErr) reject(updateErr);
else resolve();
});
}
});
};
if (!isIncremental) {
// Clear existing round history for full refresh
db.run('DELETE FROM round_history WHERE player_id = ?', [player.id], (err) => {
if (err) return reject(err);
processRounds();
});
} else {
// For incremental updates, just add new rounds
processRounds();
}
});
});
}
// Legacy in-memory cache (will be phased out)
const cache = new Map();
const CACHE_DURATION = 24 * 60 * 60 * 1000;
async function fetchPlayerDataHTTP(pdgaNumber) {
return new Promise((resolve, reject) => {
const options = {
hostname: 'www.pdga.com',
port: 443,
path: `/player/${pdgaNumber}`,
method: 'GET',
headers: {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
},
timeout: 30000
};
const req = https.request(options, (res) => {
let data = '';
res.on('data', (chunk) => {
data += chunk;
});
res.on('end', () => {
if (res.statusCode === 200) {
resolve(data);
} else {
// Log rate limiting information if available
const rateLimitInfo = {
statusCode: res.statusCode,
headers: res.headers
};
console.log(`PDGA Response Status for #${pdgaNumber}: ${res.statusCode}`);
console.log('Response Headers:', JSON.stringify(res.headers, null, 2));
// Check for common rate limiting headers
if (res.headers['retry-after']) {
console.log(`Retry-After header: ${res.headers['retry-after']}`);
}
if (res.headers['x-ratelimit-limit']) {
console.log(`Rate Limit: ${res.headers['x-ratelimit-limit']}`);
}
if (res.headers['x-ratelimit-remaining']) {
console.log(`Rate Limit Remaining: ${res.headers['x-ratelimit-remaining']}`);
}
if (res.headers['x-ratelimit-reset']) {
console.log(`Rate Limit Reset: ${res.headers['x-ratelimit-reset']}`);
}
const error = new Error(`HTTP ${res.statusCode}`);
error.rateLimitInfo = rateLimitInfo;
reject(error);
}
});
});
req.on('error', (error) => {
console.log(`Request error for PDGA #${pdgaNumber}:`, error.code, error.message);
if (error.code === 'ECONNRESET') {
console.log('Connection reset - likely rate limited by PDGA');
}
reject(error);
});
req.on('timeout', () => {
req.destroy();
reject(new Error('Request timeout'));
});
req.setTimeout(30000);
req.end();
});
}
function parsePlayerData(html, pdgaNumber) {
try {
// Extract player name from title
const nameMatch = html.match(/<title>([^<]+?)\s*\|\s*Professional Disc Golf Association/i);
const name = nameMatch ? nameMatch[1].trim() : 'Unknown';
// Extract current rating - account for HTML tags between "Current Rating:" and the number
const ratingMatch = html.match(/Current Rating:[^>]*>\s*(\d+)/i);
const rating = ratingMatch ? parseInt(ratingMatch[1]) : 0;
// Extract rating change - look for the +/- number in the rating context
const changeMatch = html.match(/Current Rating:[\s\S]*?([+\-]\d+)[\s\S]*?\(as of/i);
const ratingChange = changeMatch ? parseInt(changeMatch[1]) : null;
return {
pdgaNumber,
name: name.replace(/\s*#\d+$/, ''),
rating,
ratingChange,
predictedRating: null
};
} catch (error) {
console.error(`Error parsing data for PDGA ${pdgaNumber}:`, error.message);
return {
pdgaNumber,
name: 'Error',
rating: 0,
ratingChange: null,
predictedRating: null
};
}
}
// Function to get player data from DB only (for page loads)
async function getPlayerDataFromDB(pdgaNumber) {
try {
const cachedPlayer = await getPlayerFromDB(pdgaNumber);
if (cachedPlayer) {
console.log(`Loading PDGA ${pdgaNumber} from DB (source of truth)`);
const predictedRating = await getPredictedRatingFromDB(pdgaNumber);
return {
pdgaNumber: cachedPlayer.pdga_number,
name: cachedPlayer.name,
rating: cachedPlayer.current_rating,
ratingChange: cachedPlayer.rating_change,
predictedRating: predictedRating > 0 ? predictedRating : null
};
}
return null; // No data in DB
} catch (err) {
console.error(`Database error for PDGA ${pdgaNumber}:`, err.message);
return null;
}
}
// Function for explicit refresh (scrape PDGA + update DB)
async function scrapePDGARating(pdgaNumber, retries = 3) {
console.log(`=== Refreshing PDGA ${pdgaNumber} from PDGA website ===`);
for (let attempt = 1; attempt <= retries; attempt++) {
try {
console.log(`Attempt ${attempt}/${retries} for PDGA ${pdgaNumber} (using HTTP)`);
const html = await fetchPlayerDataHTTP(pdgaNumber);
const result = parsePlayerData(html, pdgaNumber);
// Save to database
try {
await savePlayerToDB(result);
console.log(`Saved PDGA ${pdgaNumber} to database`);
} catch (dbErr) {
console.error(`Failed to save PDGA ${pdgaNumber} to database:`, dbErr.message);
}
console.log(`Successfully scraped PDGA ${pdgaNumber} on attempt ${attempt}`);
return result;
} catch (error) {
console.error(`Attempt ${attempt}/${retries} failed for PDGA ${pdgaNumber}:`, error.message);
if (attempt === retries) {
return {
pdgaNumber,
name: 'Error',
rating: 0,
ratingChange: null,
predictedRating: null
};
}
// Adaptive retry delay based on error type
let retryDelay = 2000 * attempt; // Base delay
if (error.rateLimitInfo) {
const retryAfter = error.rateLimitInfo.headers['retry-after'];
if (retryAfter) {
// If server tells us when to retry, use that + some buffer
retryDelay = Math.max(retryDelay, (parseInt(retryAfter) + 1) * 1000);
console.log(`Using Retry-After header: waiting ${retryDelay/1000}s`);
}
}
if (error.code === 'ECONNRESET') {
// Connection reset usually means rate limiting - wait longer
retryDelay = Math.max(retryDelay, 10000);
console.log(`Connection reset detected: waiting ${retryDelay/1000}s`);
}
await new Promise(resolve => setTimeout(resolve, retryDelay));
}
}
}
async function getPredictedRating(browser, pdgaNumber, retries = 2) {
for (let attempt = 1; attempt <= retries; attempt++) {
try {
console.log(`Predicted rating attempt ${attempt}/${retries} for PDGA ${pdgaNumber}`);
const roundRatings = await getPlayerCompetitionRatings(browser, pdgaNumber);
const predictedRating = calculatePredictedRating(roundRatings);
if (predictedRating > 0) {
return predictedRating;
}
if (attempt < retries) {
console.log(`No ratings found, waiting before retry...`);
await new Promise(resolve => setTimeout(resolve, 5000));
}
} catch (error) {
console.error(`Predicted rating attempt ${attempt}/${retries} failed for ${pdgaNumber}:`, error.message);
if (attempt < retries) {
await new Promise(resolve => setTimeout(resolve, 5000));
}
}
}
console.log(`All attempts failed for predicted rating of PDGA ${pdgaNumber}`);
return 0;
}
async function getPredictedRatingFromDB(pdgaNumber) {
try {
const roundHistory = await getRoundHistoryFromDB(pdgaNumber);
if (roundHistory.length > 0) {
console.log(`Using ${roundHistory.length} cached rounds for PDGA ${pdgaNumber} prediction`);
// Convert to the format expected by calculatePredictedRating
const roundRatings = roundHistory.map(round => ({
rating: round.rating,
date: new Date(round.date)
}));
return calculatePredictedRating(roundRatings);
}
return 0;
} catch (err) {
console.error(`Error getting predicted rating from DB for ${pdgaNumber}:`, err.message);
return 0;
}
}
async function getOfficialRatingHistory(browser, pdgaNumber) {
const page = await browser.newPage();
let ratingHistory = [];
try {
const url = `https://www.pdga.com/player/${pdgaNumber}/history`;
await page.goto(url, { waitUntil: 'networkidle2', timeout: 45000 });
await page.waitForTimeout(1000); // Reduced delay
// Extract the rating history data
ratingHistory = await page.evaluate(() => {
const history = [];
// Try each selector until we find rating data
const selectors = [
'table tbody tr',
'table tr',
'.view-content tbody tr'
];
for (const selector of selectors) {
const rows = document.querySelectorAll(selector);
for (const row of rows) {
const cells = row.querySelectorAll('td');
if (cells.length >= 3) {
const dateText = cells[0]?.innerText?.trim();
const ratingText = cells[1]?.innerText?.trim();
// Check if this looks like a date and rating
if (dateText && ratingText && /^\d{4}-\d{2}-\d{2}$|^\d{1,2}-\w{3}-\d{4}$|^\w{3} \d{1,2}, \d{4}$/.test(dateText)) {
const rating = parseInt(ratingText);
if (!isNaN(rating) && rating > 800 && rating < 1200) {
history.push({
date: dateText,
rating: rating,
tournament: cells[2]?.innerText?.trim() || 'Unknown'
});
}
}
}
}
if (history.length > 0) break;
}
return history;
});
} catch (error) {
console.error('Error fetching official rating history:', error.message);
} finally {
await page.close();
}
return ratingHistory;
}
async function getPlayerTournamentDetails(browser, pdgaNumber) {
const page = await browser.newPage();
let tournamentRounds = [];
try {
const url = `https://www.pdga.com/player/${pdgaNumber}/details`;
await page.goto(url, { waitUntil: 'networkidle2', timeout: 45000 });
await page.waitForTimeout(1000); // Reduced delay
// Extract individual tournament rounds with actual dates and ratings
tournamentRounds = await page.evaluate(() => {
const rounds = [];
const rows = document.querySelectorAll('table tbody tr');
// Log first few rows to see structure
console.log('First few table rows for debugging:');
for (let i = 0; i < Math.min(3, rows.length); i++) {
const cells = rows[i].querySelectorAll('td');
const cellTexts = Array.from(cells).map(cell => cell.innerText.trim());
console.log(`Row ${i}: [${cellTexts.join(' | ')}]`);
}
rows.forEach(row => {
const cells = row.querySelectorAll('td');
// Try to identify which columns contain date and rating information
if (cells.length >= 4) {
const cellTexts = Array.from(cells).map(cell => cell.innerText.trim());
// Look for patterns in the data
let tournamentName = '';
let dateText = '';
let rating = 0;
let division = '';
// Try to find date and rating in different column positions
cellTexts.forEach((text, index) => {
// Look for date patterns, including multi-day tournaments
// Examples: "2-Sep-2023", "2-Sep to 3-Sep-2023", "2 to 3-Sep-2023"
if (/\d{1,2}(-\w{3})?(\s+to\s+)\d{1,2}-\w{3}-\d{4}/.test(text) || /\d{1,2}-\w{3}-\d{4}/.test(text)) {
dateText = text;
}
// Look for rating patterns (3-4 digit numbers between 800-1200)
if (/^\d{3,4}$/.test(text) && parseInt(text) >= 800 && parseInt(text) <= 1200) {
rating = parseInt(text);
}
// Look for division patterns (like MA3, MPO, etc.)
if (/^M[A-Z]\d*$|^F[A-Z]\d*$/.test(text)) {
division = text;
}
// First cell is usually tournament name
if (index === 0) {
tournamentName = text;
}
});
if (tournamentName && dateText && rating > 0) {
rounds.push({
tournament: tournamentName,
dateText: dateText,
rating: rating,
division: division,
competition: `${tournamentName} (${division})`
});
}
}
});
return rounds;
});
// Parse dates properly after extraction
const fixedRounds = tournamentRounds.map(round => {
let validDate = new Date();
if (round.dateText) {
try {
const pdgaParsed = parseDate(round.dateText);
if (pdgaParsed instanceof Date && !isNaN(pdgaParsed.getTime())) {
validDate = pdgaParsed;
} else {
const nativeParsed = new Date(round.dateText);
if (!isNaN(nativeParsed.getTime())) {
validDate = nativeParsed;
}
}
} catch (e) {
console.log(`Date parsing failed for "${round.dateText}": ${e.message}`);
}
}
return {
tournament: round.tournament,
date: validDate,
rating: round.rating,
division: round.division,
competition: round.competition
};
});
tournamentRounds = fixedRounds;
} catch (error) {
console.error('Error fetching tournament details:', error.message);
} finally {
await page.close();
}
return tournamentRounds;
}
// Get the most recent tournament date from /details page (official rating rounds)
async function getLatestOfficialRoundDate(browser, pdgaNumber) {
try {
const detailsRounds = await getPlayerTournamentDetails(browser, pdgaNumber);
if (detailsRounds.length === 0) {
return null;
}
// Find the most recent date from details page
const sortedRounds = detailsRounds.sort((a, b) => b.date - a.date);
const latestDate = sortedRounds[0].date;
console.log(`Latest official round date for PDGA ${pdgaNumber}: ${latestDate.toDateString()}`);
return latestDate;
} catch (error) {
console.error('Error getting latest official round date:', error.message);
return null;
}
}
// Get NEW tournament rounds (played after the latest official round)
async function getNewTournamentRounds(browser, pdgaNumber, afterDate) {
const page = await browser.newPage();
let newRounds = [];
try {
const url = `https://www.pdga.com/player/${pdgaNumber}`;
await page.goto(url, { waitUntil: 'networkidle2' });
console.log(`Looking for tournaments after ${afterDate.toDateString()}...`);
// Get tournament URLs that are newer than afterDate
const newTournamentUrls = await page.evaluate((afterTimestamp) => {
const afterDate = new Date(afterTimestamp);
const tables = document.querySelectorAll('table[id*="player-results"]');
const urls = [];
tables.forEach(table => {
const rows = table.querySelectorAll('tbody tr');
rows.forEach(row => {
const dateCell = row.querySelector('.dates');
const tournamentCell = row.querySelector('.tournament a');
if (dateCell && tournamentCell) {
const dateText = dateCell.innerText.trim();
const dateMatch = dateText.match(/\d{1,2}-[A-Za-z]{3}-\d{4}/);
if (dateMatch) {
const dateStr = dateMatch[0];
const date = new Date(dateStr);
// Only include tournaments AFTER the latest official round
if (date > afterDate) {
const href = tournamentCell.getAttribute('href');
if (href) {
urls.push({
url: `https://www.pdga.com${href}`,
date: dateStr,
name: tournamentCell.innerText.trim()
});
}
}
}
}
});
});
return urls;
}, afterDate.getTime());
console.log(`Found ${newTournamentUrls.length} new tournaments after ${afterDate.toDateString()}`);
// Scrape individual round ratings from new tournaments
for (const tournamentData of newTournamentUrls) {
try {
console.log(`Scraping new tournament: ${tournamentData.name} (${tournamentData.date})`);
await page.goto(tournamentData.url, { waitUntil: 'domcontentloaded', timeout: 30000 });
await page.waitForTimeout(500); // Reduced from 2s to 0.5s since we're only scraping a few tournaments
const roundRatings = await page.evaluate((pdgaNum) => {
const rows = document.querySelectorAll('tr');
for (const row of rows) {
const cells = row.querySelectorAll('td');
const hasPlayerNumber = Array.from(cells).some(cell =>
cell.innerText && cell.innerText.includes(pdgaNum.toString())
);
if (hasPlayerNumber) {
const roundRatingCells = row.querySelectorAll('td.round-rating');
const ratings = [];
roundRatingCells.forEach(cell => {
const rating = parseInt(cell.innerText.trim());
if (!isNaN(rating) && rating > 0) {
ratings.push(rating);
}
});
return ratings;
}
}
return [];
}, pdgaNumber);
if (roundRatings.length > 0) {
const parsedDate = parseDate(tournamentData.date);
roundRatings.forEach(rating => {
newRounds.push({
rating,
date: parsedDate,
competition: tournamentData.name
});
});
console.log(`✓ Found ${roundRatings.length} round ratings for ${tournamentData.name}`);
}
} catch (error) {
console.error(`Error scraping tournament ${tournamentData.name}:`, error.message);
}
}
} catch (error) {
console.error(`Error getting new tournament rounds for PDGA ${pdgaNumber}:`, error);
} finally {
await page.close();
}
return newRounds;
}
// Optimized function: Get /details rounds + new tournaments only
async function getOptimizedPlayerRounds(browser, pdgaNumber) {
console.log(`=== Optimized Round Collection for PDGA ${pdgaNumber} ===`);
try {
// Step 1: Get all official rating rounds from /details page
console.log('Step 1: Getting official rating rounds from /details page...');
const officialRounds = await getPlayerTournamentDetails(browser, pdgaNumber);
if (officialRounds.length === 0) {
console.log('No official rounds found in details page');
return [];
}
console.log(`✓ Found ${officialRounds.length} official rating rounds`);
// Step 2: Find the most recent official round date
const sortedRounds = officialRounds.sort((a, b) => b.date - a.date);
const latestOfficialDate = sortedRounds[0].date;
console.log(`Latest official round: ${latestOfficialDate.toDateString()}`);
// Step 3: Get NEW tournament rounds (after latest official round)
console.log('Step 2: Looking for NEW tournaments since latest official round...');
const newRounds = await getNewTournamentRounds(browser, pdgaNumber, latestOfficialDate);
if (newRounds.length > 0) {
console.log(`✓ Found ${newRounds.length} new round ratings`);
} else {
console.log(' No new tournaments found since latest official round');
}
// Step 4: Combine official rounds + new rounds
const allRounds = [
...officialRounds.map(round => ({
rating: round.rating,
date: round.date,
competition: round.competition,
source: 'official' // From /details page
})),
...newRounds.map(round => ({
rating: round.rating,
date: round.date,
competition: round.competition,
source: 'new' // From individual tournaments
}))
];
// Sort by date (oldest first)
allRounds.sort((a, b) => a.date - b.date);
console.log(`=== Summary: ${officialRounds.length} official + ${newRounds.length} new = ${allRounds.length} total rounds ===`);
return allRounds;
} catch (error) {
console.error('Error in optimized round collection:', error.message);
return [];
}
}
// Legacy function - keep for backward compatibility but mark as deprecated
async function getPlayerCompetitionRatings(browser, pdgaNumber, sinceDate = null) {
const page = await browser.newPage();
let allRatings = [];
let tournamentCount = 0;
let successfulTournaments = 0;
try {
const url = `https://www.pdga.com/player/${pdgaNumber}`;
await page.goto(url, { waitUntil: 'networkidle2' });
// Calculate the next PDGA update date to filter tournaments
const nextUpdateDate = getNextPDGAUpdateDate();
const tournamentUrls = await page.evaluate((nextUpdateTimestamp, sinceDateString) => {
const nextUpdateDate = new Date(nextUpdateTimestamp);
const sinceDate = sinceDateString ? new Date(sinceDateString) : null;
const tables = document.querySelectorAll('table[id*="player-results"]');
const urls = [];
tables.forEach(table => {
const rows = table.querySelectorAll('tbody tr');
rows.forEach(row => {
const dateCell = row.querySelector('.dates');
const tournamentCell = row.querySelector('.tournament a');
if (dateCell && tournamentCell) {
const dateText = dateCell.innerText.trim();
const dateMatch = dateText.match(/\d{1,2}-[A-Za-z]{3}-\d{4}/);
if (dateMatch) {
const dateStr = dateMatch[0];
const date = new Date(dateStr);
const oneYearAgo = new Date();
oneYearAgo.setFullYear(oneYearAgo.getFullYear() - 1);
// Apply date filters
const dateValid = date > oneYearAgo && date < nextUpdateDate;
const isNewTournament = !sinceDate || date > sinceDate;
if (dateValid && isNewTournament) {
const href = tournamentCell.getAttribute('href');
if (href) {
urls.push({
url: `https://www.pdga.com${href}`,
date: dateStr
});
}
}
}
}
});
});
return urls; // Get all tournaments from the past year
}, nextUpdateDate.getTime(), sinceDate ? sinceDate.toISOString() : null);
const updateType = sinceDate ? `incremental (since ${sinceDate.toDateString()})` : 'full';
console.log(`Found ${tournamentUrls.length} tournaments for PDGA ${pdgaNumber} (${updateType})`);
for (const tournamentData of tournamentUrls) {
tournamentCount++;
try {
console.log(`[${tournamentCount}/${tournamentUrls.length}] Navigating to tournament: ${tournamentData.url}`);
const navigationStart = Date.now();
try {
await page.goto(tournamentData.url, { waitUntil: 'domcontentloaded', timeout: 45000 });
const navigationTime = Date.now() - navigationStart;
console.log(`✓ Navigation completed in ${navigationTime}ms`);
} catch (navError) {
console.error(`✗ Navigation failed for ${tournamentData.url}:`);
console.error('Navigation error details:', {
type: navError.constructor.name,
message: navError.message,
code: navError.code,
stack: navError.stack?.split('\n')[0]
});
throw navError; // Re-throw to be caught by outer try-catch
}
console.log(`Waiting 1s before scraping tournament data...`);
await page.waitForTimeout(1000); // Reduced delay for optimized approach
console.log(`Starting page evaluation for PDGA ${pdgaNumber}...`);
let roundRatings;
try {
roundRatings = await page.evaluate((pdgaNum) => {
const rows = document.querySelectorAll('tr');
for (const row of rows) {
const cells = row.querySelectorAll('td');
const hasPlayerNumber = Array.from(cells).some(cell =>
cell.innerText && cell.innerText.includes(pdgaNum.toString())
);
if (hasPlayerNumber) {
const roundRatingCells = row.querySelectorAll('td.round-rating');
const ratings = [];
roundRatingCells.forEach(cell => {
const rating = parseInt(cell.innerText.trim());
if (!isNaN(rating) && rating > 0) {
ratings.push(rating);
}
});
return ratings;
}
}
return [];
}, pdgaNumber);
console.log(`✓ Page evaluation completed, found ${roundRatings.length} round ratings`);
} catch (evalError) {
console.error(`✗ Page evaluation failed for ${tournamentData.url}:`);
console.error('Evaluation error details:', {
type: evalError.constructor.name,
message: evalError.message,
code: evalError.code,
stack: evalError.stack?.split('\n')[0]
});
throw evalError; // Re-throw to be caught by outer try-catch
}
if (roundRatings.length > 0) {
const parsedDate = parseDate(tournamentData.date);
// Extract tournament name from URL for better database storage
const tournamentName = tournamentData.url.split('/').pop() || 'Unknown Tournament';
const newRounds = [];
roundRatings.forEach(rating => {
const roundData = {
rating,
date: parsedDate,
competition: tournamentName
};
allRatings.push(roundData);
newRounds.push(roundData);
});
successfulTournaments++;
console.log(`✓ [${tournamentCount}/${tournamentUrls.length}] Found ${roundRatings.length} round ratings for ${tournamentName}`);
// Save rounds immediately to database (partial save)
try {
await saveRoundHistoryToDB(pdgaNumber, newRounds, true);
console.log(`💾 Saved ${newRounds.length} rounds to database`);
} catch (saveError) {
console.error(`⚠️ Could not save rounds to DB: ${saveError.message}`);
}
} else {
console.log(`✗ [${tournamentCount}/${tournamentUrls.length}] No round ratings found for ${tournamentData.url}`);
}
} catch (error) {
console.error(`✗ [${tournamentCount}/${tournamentUrls.length}] Error scraping tournament ${tournamentData.url}:`);
console.error('Tournament error type:', error.constructor.name);
console.error('Tournament error message:', error.message);
console.error('Tournament error code:', error.code);
console.error('Tournament error name:', error.name);
console.error('Tournament full error object:', JSON.stringify(error, Object.getOwnPropertyNames(error), 2));
// Log the current state when error occurs
console.error(`Tournament scraping progress: ${tournamentCount}/${tournamentUrls.length} (${successfulTournaments} successful so far)`);
console.error(`Total rounds collected before this error: ${allRatings.length}`);
if (error.message.includes('socket hang up')) {
console.error('🔌 Socket hang up detected at tournament level - PDGA may be rate limiting');
console.error('💡 Will continue trying remaining tournaments after this failure');
}
if (error.message.includes('Navigation timeout')) {
console.error('⏰ Navigation timeout at tournament level - page took too long to load');
}
if (error.message.includes('net::ERR_CONNECTION_RESET')) {
console.error('🚫 Connection reset at tournament level - PDGA blocking requests');
}
// Don't let individual tournament failures stop the whole process
console.error('⚠️ Continuing with next tournament despite this error...');
}
}
// Log summary of scraping results
console.log(`=== Scraping Summary for PDGA ${pdgaNumber} ===`);
console.log(`Tournaments processed: ${tournamentCount}/${tournamentUrls.length}`);
console.log(`Successful tournaments: ${successfulTournaments}`);
console.log(`Total rounds found: ${allRatings.length}`);
console.log(`Completion rate: ${Math.round((successfulTournaments / tournamentUrls.length) * 100)}%`);
} catch (error) {
console.error(`Error getting competition ratings for PDGA ${pdgaNumber}:`, error);
console.error(`=== Partial Results Before Error ===`);
console.error(`Tournaments processed: ${tournamentCount}/${tournamentUrls.length || 0}`);
console.error(`Successful tournaments: ${successfulTournaments}`);
console.error(`Total rounds collected: ${allRatings.length}`);
if (allRatings.length > 0) {
console.error(`Rounds saved to database before error occurred`);
}
} finally {
await page.close();
}
// Return all ratings from the last year (already filtered above)
return allRatings;
}
function parseDate(dateStr) {
// Handle multi-day tournament formats first
// Examples: "2-Sep to 3-Sep-2023", "2 to 3-Sep-2023"
const multiDayMatch = dateStr.match(/^(\d{1,2})(-([A-Za-z]{3}))?(\s+to\s+)(\d{1,2})-([A-Za-z]{3})-(\d{4})$/);
if (multiDayMatch) {
// Extract first day and use that as the tournament date
const day = parseInt(multiDayMatch[1]);
const month = multiDayMatch[3] || multiDayMatch[6]; // Use first month if available, otherwise second
const year = parseInt(multiDayMatch[7]);
const monthMap = {
'Jan': 0, 'Feb': 1, 'Mar': 2, 'Apr': 3, 'May': 4, 'Jun': 5,
'Jul': 6, 'Aug': 7, 'Sep': 8, 'Oct': 9, 'Nov': 10, 'Dec': 11
};
return new Date(year, monthMap[month], day);
}
const formats = [
/^(\d{1,2})-([A-Za-z]{3})-(\d{4})$/,
/^(\d{1,2})\/(\d{1,2})\/(\d{4})$/
];
for (const format of formats) {
const match = dateStr.match(format);
if (match) {
if (format === formats[0]) {
const monthMap = {
'Jan': 0, 'Feb': 1, 'Mar': 2, 'Apr': 3, 'May': 4, 'Jun': 5,
'Jul': 6, 'Aug': 7, 'Sep': 8, 'Oct': 9, 'Nov': 10, 'Dec': 11
};
const day = parseInt(match[1]);
const month = monthMap[match[2]];
const year = parseInt(match[3]);
return new Date(year, month, day);
}
}
}
return new Date(dateStr);
}
function getNextPDGAUpdateDate() {
const today = new Date();
const currentMonth = today.getMonth();
const currentYear = today.getFullYear();
// Calculate 2nd Tuesday of current month
const firstDayOfMonth = new Date(currentYear, currentMonth, 1);
const firstTuesday = new Date(firstDayOfMonth);
// Find first Tuesday (day 2 = Tuesday, 0 = Sunday)
const daysUntilTuesday = (2 - firstDayOfMonth.getDay() + 7) % 7;
firstTuesday.setDate(1 + daysUntilTuesday);
// Second Tuesday is 7 days after first Tuesday
const secondTuesday = new Date(firstTuesday);
secondTuesday.setDate(firstTuesday.getDate() + 7);
// If today is before or on the 2nd Tuesday of this month, use this month's date
// Otherwise, use next month's 2nd Tuesday
if (today <= secondTuesday) {
return secondTuesday;
} else {
// Calculate 2nd Tuesday of next month
const nextMonth = currentMonth === 11 ? 0 : currentMonth + 1;
const nextYear = currentMonth === 11 ? currentYear + 1 : currentYear;
const firstDayNextMonth = new Date(nextYear, nextMonth, 1);
const firstTuesdayNext = new Date(firstDayNextMonth);
const daysUntilTuesdayNext = (2 - firstDayNextMonth.getDay() + 7) % 7;
firstTuesdayNext.setDate(1 + daysUntilTuesdayNext);
const secondTuesdayNext = new Date(firstTuesdayNext);
secondTuesdayNext.setDate(firstTuesdayNext.getDate() + 7);
return secondTuesdayNext;
}
}
function calculatePredictedRating(roundRatings) {
console.log('\n=== PDGA RATING CALCULATION (Following Official Rules) ===');
if (!roundRatings || roundRatings.length === 0) {
console.log('❌ No rounds provided for prediction');
return 0;
}
console.log(`📊 Starting with ${roundRatings.length} total rounds`);
// PDGA Simulation: Only include rounds that would be rated by next update
const nextUpdateDate = getNextPDGAUpdateDate();
console.log(`🎯 PDGA Update Simulation: Next update date is ${nextUpdateDate.toDateString()}`);
console.log(` Only including rounds played before ${nextUpdateDate.toDateString()}`);
// Sort all rounds by date (most recent first), but only include rounds before next update
const allSortedRounds = roundRatings
.filter(r => r.rating > 0 && r.date < nextUpdateDate)
.sort((a, b) => b.date - a.date);
if (allSortedRounds.length === 0) {
console.log('❌ No valid rounds after filtering for update date');
return 0;
}
console.log(`📊 After update date filter: ${allSortedRounds.length} rounds`);
// PDGA Rule: Use rounds from 12 months prior to next update date
const twelveMonthsBeforeUpdate = new Date(nextUpdateDate);
twelveMonthsBeforeUpdate.setFullYear(twelveMonthsBeforeUpdate.getFullYear() - 1);
const mostRecentDate = allSortedRounds[0].date;
console.log(`📅 Most recent round: ${mostRecentDate.toDateString()}`);
console.log(`📅 12-month cutoff: ${twelveMonthsBeforeUpdate.toDateString()} (1 year before update)`);
// Step 1: Get rounds from last 12 months before update
let eligibleRounds = allSortedRounds.filter(r => r.date >= twelveMonthsBeforeUpdate);
console.log(`\n🗓️ 12-MONTH FILTERING:`);
console.log(`✅ Rounds in last 12 months: ${eligibleRounds.length}`);
// PDGA Rule: If fewer than 8 rounds in 12 months, extend to 24 months before update
if (eligibleRounds.length < 8) {
const twentyFourMonthsBeforeUpdate = new Date(nextUpdateDate);
twentyFourMonthsBeforeUpdate.setFullYear(twentyFourMonthsBeforeUpdate.getFullYear() - 2);
eligibleRounds = allSortedRounds.filter(r => r.date >= twentyFourMonthsBeforeUpdate);
console.log(`⚠️ Extended to 24 months before update (${twentyFourMonthsBeforeUpdate.toDateString()}) - now ${eligibleRounds.length} rounds`);
}
if (eligibleRounds.length === 0) {
console.log('❌ No eligible rounds found');
return 0;
}
console.log(`\n📈 ELIGIBLE ROUNDS: ${eligibleRounds.length}`);
eligibleRounds.forEach((round, index) => {
console.log(` ${index + 1}. ${round.date.toDateString()}: ${round.rating} (${round.competition})`);
});
let workingRounds = [...eligibleRounds];
let workingRatings = workingRounds.map(r => r.rating);
// PDGA Rule: Apply outlier exclusion if ≥7 rounds
if (workingRatings.length >= 7) {
console.log(`\n🔍 OUTLIER EXCLUSION (≥7 rounds available):`);
const mean = workingRatings.reduce((sum, r) => sum + r, 0) / workingRatings.length;
const stdDev = calculateStandardDeviation(workingRatings);
console.log(` Mean: ${mean.toFixed(1)}`);
console.log(` Std Dev: ${stdDev.toFixed(1)}`);
// Two PDGA exclusion rules:
// 1. More than 2.5 standard deviations below average
const stdDevCutoff = mean - 2.5 * stdDev;
// 2. More than 100 points below average
const hundredPointCutoff = mean - 100;
console.log(` 2.5σ cutoff: ${stdDevCutoff.toFixed(1)}`);
console.log(` 100-point cutoff: ${hundredPointCutoff.toFixed(1)}`);
const filteredByStdDev = workingRatings.filter(rating => rating >= stdDevCutoff);
const filteredBy100Points = workingRatings.filter(rating => rating >= hundredPointCutoff);
// Apply both exclusion rules
const filteredRatings = workingRatings.filter(rating =>
rating >= stdDevCutoff && rating >= hundredPointCutoff
);
const stdDevOutliers = workingRatings.filter(rating => rating < stdDevCutoff);
const hundredPointOutliers = workingRatings.filter(rating => rating < hundredPointCutoff && rating >= stdDevCutoff);
if (stdDevOutliers.length > 0) {
console.log(` ❌ 2.5σ outliers removed: ${stdDevOutliers.length} rounds`);
stdDevOutliers.forEach(rating => {
const round = workingRounds.find(r => r.rating === rating);
console.log(` - ${rating} (${round.date.toDateString()}: ${round.competition})`);
});
}
if (hundredPointOutliers.length > 0) {
console.log(` ❌ 100-point outliers removed: ${hundredPointOutliers.length} rounds`);
hundredPointOutliers.forEach(rating => {
const round = workingRounds.find(r => r.rating === rating);
console.log(` - ${rating} (${round.date.toDateString()}: ${round.competition})`);
});
}
if (stdDevOutliers.length === 0 && hundredPointOutliers.length === 0) {
console.log(` ✅ No outliers detected`);
}
// Keep filtered rounds only if we still have enough data
if (filteredRatings.length >= 4) {
workingRounds = workingRounds.filter(round =>
round.rating >= stdDevCutoff && round.rating >= hundredPointCutoff
);
workingRatings = filteredRatings;
console.log(` ✅ Using ${filteredRatings.length} rounds after outlier removal`);
} else {
console.log(` ⚠️ Too few rounds after outlier removal (${filteredRatings.length}), keeping all rounds`);
}
} else {
console.log(`\n⏭️ OUTLIER EXCLUSION SKIPPED (only ${workingRatings.length} rounds, need ≥7)`);
}
// PDGA Rule: Most recent 25% of rounds get double weight if ≥9 rounds
console.log(`\n⚖️ WEIGHTING (Most recent 25% count double if ≥9 rounds):`);
const weightedRatings = [];
if (workingRatings.length >= 9) {
const recentCount = Math.round(workingRatings.length * 0.25);
console.log(` ✅ Double-weighting most recent ${recentCount} rounds`);
// Add all ratings once
weightedRatings.push(...workingRatings);
// Add the most recent 25% again (double weight)
for (let i = 0; i < recentCount; i++) {
weightedRatings.push(workingRatings[i]);
const round = workingRounds[i];
console.log(` 2x weight: ${workingRatings[i]} (${round.date.toDateString()}: ${round.competition})`);
}
console.log(` 📊 Total values: ${workingRatings.length} + ${recentCount} double-weighted = ${weightedRatings.length}`);
} else {
console.log(` ➡️ No double weighting (${workingRatings.length} rounds, need ≥9)`);
weightedRatings.push(...workingRatings);
}
// Calculate final rating
const sum = weightedRatings.reduce((sum, r) => sum + r, 0);
const average = sum / weightedRatings.length;
const finalRating = Math.round(average);
console.log(`\n🎯 FINAL CALCULATION:`);
console.log(` Sum: ${sum}`);
console.log(` Count: ${weightedRatings.length}`);
console.log(` Average: ${average.toFixed(1)}`);
console.log(` Final Rating: ${finalRating}`);
console.log('=== END PDGA CALCULATION ===\n');
return finalRating;
}
function calculateStandardDeviation(ratings) {
if (!ratings || ratings.length === 0) return 0;
const mean = ratings.reduce((sum, r) => sum + r, 0) / ratings.length;
const variance = ratings.reduce((sum, r) => sum + Math.pow(r - mean, 2), 0) / ratings.length;
return Math.sqrt(variance);
}
async function getAllRatingsFromDB(progressCallback = null) {
try {
const pdgaNumbers = fs.readFileSync('pdga-numbers.txt', 'utf-8')
.split('\n')
.map(num => num.trim())
.filter(num => num);
const ratings = [];
const total = pdgaNumbers.length;
for (let i = 0; i < pdgaNumbers.length; i++) {
const pdgaNumber = pdgaNumbers[i];
console.log(`Loading PDGA ${pdgaNumber} from database... (${i + 1}/${total})`);
if (progressCallback) {
progressCallback({
current: i + 1,
total,
pdgaNumber,
status: 'loading'
});
}
try {
// Load from database only (source of truth)
const playerData = await getPlayerDataFromDB(pdgaNumber);
if (playerData) {
ratings.push(playerData);
} else {
console.log(`PDGA ${pdgaNumber} not found in DB - skipping (page load)`);
// Skip players not in DB for page loads
}
if (progressCallback) {
progressCallback({
current: i + 1,
total,
pdgaNumber,
status: playerData ? 'completed' : 'skipped',
name: playerData ? playerData.name : 'Not in DB'
});
}
} catch (error) {
console.error(`Failed to load PDGA ${pdgaNumber} from database:`, error.message);
const errorData = {
pdgaNumber: parseInt(pdgaNumber),
name: 'Database Error',
rating: null,
ratingChange: null,
predictedRating: null
};
ratings.push(errorData);
if (progressCallback) {
progressCallback({
current: i + 1,
total,
pdgaNumber,
status: 'error',
name: 'Database Error'
});
}
}
}
return ratings.sort((a, b) => (b.rating || 0) - (a.rating || 0));
} catch (error) {
console.error('Error reading PDGA numbers:', error);
return [];
}
}
app.get('/', (req, res) => {
res.sendFile(path.join(__dirname, 'index.html'));
});
app.get('/api/ratings', async (req, res) => {
try {
const ratings = await getAllRatingsFromDB();
res.json(ratings);
} catch (error) {
res.status(500).json({ error: 'Failed to fetch ratings' });
}
});
app.get('/api/ratings/progress', (req, res) => {
res.writeHead(200, {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Access-Control-Allow-Origin': '*',
'Access-Control-Allow-Headers': 'Cache-Control'
});
const progressCallback = (progress) => {
res.write(`data: ${JSON.stringify(progress)}\n\n`);
};
getAllRatingsFromDB(progressCallback).then(ratings => {
res.write(`data: ${JSON.stringify({ status: 'complete', ratings })}\n\n`);
res.end();
}).catch(error => {
res.write(`data: ${JSON.stringify({ status: 'error', error: error.message })}\n\n`);
res.end();
});
req.on('close', () => {
res.end();
});
});
// Endpoint to populate database from PDGA numbers file
app.post('/api/populate-database', (req, res) => {
res.writeHead(200, {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Access-Control-Allow-Origin': '*',
});
const progressCallback = (progress) => {
res.write(`data: ${JSON.stringify(progress)}\n\n`);
};
console.log('=== Starting database population from PDGA numbers file ===');
// Use the scraping function to populate database
getAllRatingsWithScraping(progressCallback).then(ratings => {
console.log(`=== Database population complete: ${ratings.length} players added ===`);
res.write(`data: ${JSON.stringify({ status: 'complete', ratings, message: `Successfully populated database with ${ratings.length} players` })}\n\n`);
res.end();
}).catch(error => {
console.error('Error populating database:', error);
res.write(`data: ${JSON.stringify({ status: 'error', message: error.message })}\n\n`);
res.end();
});
});
// Simple endpoint to check if database needs population
app.get('/api/database-status', async (req, res) => {
try {
const pdgaNumbers = fs.readFileSync('pdga-numbers.txt', 'utf-8')
.split('\n')
.map(num => num.trim())
.filter(num => num);
let playersInDB = 0;
for (const pdgaNumber of pdgaNumbers) {
const player = await getPlayerFromDB(pdgaNumber);
if (player) playersInDB++;
}
res.json({
totalExpected: pdgaNumbers.length,
playersInDB: playersInDB,
needsPopulation: playersInDB === 0,
populationProgress: Math.round((playersInDB / pdgaNumbers.length) * 100)
});
} catch (error) {
res.status(500).json({ error: 'Failed to check database status' });
}
});
app.get('/api/load-all-players', (req, res) => {
res.writeHead(200, {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Access-Control-Allow-Origin': '*',
'Access-Control-Allow-Headers': 'Cache-Control'
});
const progressCallback = (progress) => {
res.write(`data: ${JSON.stringify(progress)}\n\n`);
};
// Use the original scraping function for bulk loading
getAllRatingsWithScraping(progressCallback).then(ratings => {
res.write(`data: ${JSON.stringify({ status: 'complete', ratings })}\n\n`);
res.end();
}).catch(error => {
res.write(`data: ${JSON.stringify({ status: 'error', error: error.message })}\n\n`);
res.end();
});
req.on('close', () => {
res.end();
});
});
// Original scraping function for bulk loading
async function getAllRatingsWithScraping(progressCallback = null) {
try {
const pdgaNumbers = fs.readFileSync('pdga-numbers.txt', 'utf-8')
.split('\n')
.map(num => num.trim())
.filter(num => num);
const ratings = [];
const total = pdgaNumbers.length;
for (let i = 0; i < pdgaNumbers.length; i++) {
const pdgaNumber = pdgaNumbers[i];
console.log(`Scraping PDGA ${pdgaNumber}... (${i + 1}/${total})`);
if (progressCallback) {
progressCallback({
current: i + 1,
total,
pdgaNumber,
status: 'loading'
});
}
try {
const playerData = await scrapePDGARating(pdgaNumber);
ratings.push(playerData);
if (progressCallback) {
progressCallback({
current: i + 1,
total,
pdgaNumber,
status: 'completed',
name: playerData.name
});
}
// Delay between PDGA scraping requests to be respectful
await new Promise(resolve => setTimeout(resolve, 2000));
} catch (error) {
console.error(`Failed to scrape PDGA ${pdgaNumber}:`, error.message);
const errorData = {
pdgaNumber: parseInt(pdgaNumber),
name: 'Error',
rating: 0,
ratingChange: null,
predictedRating: null
};
ratings.push(errorData);
if (progressCallback) {
progressCallback({
current: i + 1,
total,
pdgaNumber,
status: 'error',
name: 'Error'
});
}
}
}
return ratings.sort((a, b) => (b.rating || 0) - (a.rating || 0));
} catch (error) {
console.error('Error reading PDGA numbers:', error);
return [];
}
}
async function fetchRatingHistory(pdgaNumber) {
return new Promise((resolve, reject) => {
const options = {
hostname: 'www.pdga.com',
port: 443,
path: `/player/${pdgaNumber}/history`,
method: 'GET',
headers: {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
},
timeout: 30000
};
console.log(`Fetching rating history for PDGA #${pdgaNumber} from: https://www.pdga.com/player/${pdgaNumber}/history`);
const req = https.request(options, (res) => {
let data = '';
res.on('data', (chunk) => {
data += chunk;
});
res.on('end', () => {
if (res.statusCode === 200) {
console.log(`Rating history request successful for PDGA #${pdgaNumber}`);
resolve(data);
} else {
// Log detailed error information for rating history
console.log(`Rating History Error for PDGA #${pdgaNumber}:`);
console.log(`Status: ${res.statusCode}`);
console.log('Response Headers:', JSON.stringify(res.headers, null, 2));
// Check for rate limiting headers
if (res.headers['retry-after']) {
console.log(`Retry-After: ${res.headers['retry-after']} seconds`);
}
if (res.headers['x-ratelimit-limit']) {
console.log(`Rate Limit: ${res.headers['x-ratelimit-limit']}`);
}
if (res.headers['x-ratelimit-remaining']) {
console.log(`Rate Limit Remaining: ${res.headers['x-ratelimit-remaining']}`);
}
// Log partial response if available
if (data.length > 0) {
console.log(`Partial response received (${data.length} bytes):`, data.substring(0, 200));
}
const error = new Error(`HTTP ${res.statusCode} for rating history`);
error.statusCode = res.statusCode;
error.headers = res.headers;
reject(error);
}
});
});
req.on('error', (error) => {
console.log(`Rating history request error for PDGA #${pdgaNumber}:`, {
code: error.code,
message: error.message,
errno: error.errno,
syscall: error.syscall
});
if (error.code === 'ECONNRESET') {
console.log('Connection reset on rating history - likely rate limited by PDGA');
}
if (error.code === 'ECONNREFUSED') {
console.log('Connection refused - PDGA server may be blocking requests');
}
if (error.code === 'ETIMEDOUT') {
console.log('Request timed out - server may be overloaded');
}
reject(error);
});
req.on('timeout', () => {
console.log(`Rating history request timeout for PDGA #${pdgaNumber} after 30s`);
req.destroy();
reject(new Error('Request timeout'));
});
req.setTimeout(30000);
req.end();
});
}
function parseRatingHistory(html) {
const history = [];
// Find all table rows with rating data
const rowMatches = html.match(/<tr[^>]*>[\s\S]*?<\/tr>/gi);
if (rowMatches) {
for (const row of rowMatches) {
// Skip header rows and empty rows
if (row.includes('<th') || !row.includes('<td')) continue;
// Extract date, rating, and rounds from table cells
const cellMatches = row.match(/<td[^>]*>(.*?)<\/td>/gi);
if (cellMatches && cellMatches.length >= 2) {
const dateText = cellMatches[0].replace(/<[^>]*>/g, '').trim();
const ratingText = cellMatches[1].replace(/<[^>]*>/g, '').trim();
// Parse date (DD-Mon-YYYY format)
const dateMatch = dateText.match(/(\d{1,2})-([A-Za-z]{3})-(\d{4})/);
if (dateMatch && !isNaN(parseInt(ratingText))) {
const [, day, month, year] = dateMatch;
const monthMap = {
'Jan': 0, 'Feb': 1, 'Mar': 2, 'Apr': 3, 'May': 4, 'Jun': 5,
'Jul': 6, 'Aug': 7, 'Sep': 8, 'Oct': 9, 'Nov': 10, 'Dec': 11
};
const date = new Date(parseInt(year), monthMap[month], parseInt(day));
history.push({
date: date.toISOString().split('T')[0], // YYYY-MM-DD format
rating: parseInt(ratingText),
displayDate: dateText
});
}
}
}
}
// Sort by date (oldest first for chart display)
return history.sort((a, b) => new Date(a.date) - new Date(b.date));
}
app.get('/api/rating-history/:pdgaNumber', async (req, res) => {
try {
const { pdgaNumber } = req.params;
// Check database first
const cachedHistory = await getRatingHistoryFromDB(pdgaNumber);
if (cachedHistory && cachedHistory.length > 0) {
console.log(`Using cached rating history from DB for PDGA ${pdgaNumber}`);
const formattedHistory = cachedHistory.map(row => ({
date: row.date,
rating: row.rating,
displayDate: new Date(row.date).toLocaleDateString('en-US', {
day: '2-digit',
month: 'short',
year: 'numeric'
})
}));
res.json({
pdgaNumber: parseInt(pdgaNumber),
history: formattedHistory
});
return;
}
console.log(`Fetching rating history for PDGA ${pdgaNumber}...`);
const html = await fetchRatingHistory(pdgaNumber);
const history = parseRatingHistory(html);
// Save to database
try {
await saveRatingHistoryToDB(pdgaNumber, history);
console.log(`Saved rating history for PDGA ${pdgaNumber} to database`);
} catch (dbErr) {
console.error(`Failed to save rating history to database:`, dbErr.message);
}
res.json({
pdgaNumber: parseInt(pdgaNumber),
history
});
} catch (error) {
console.error('Error fetching rating history:', error.message);
res.status(500).json({ error: 'Failed to fetch rating history' });
}
});
app.post('/api/clear-cache', (req, res) => {
try {
// Clear database cache by updating timestamps to force refresh
db.run('UPDATE players SET last_updated = datetime("now", "-25 hours"), last_round_update = NULL', (err) => {
if (err) {
console.error('Error clearing database cache:', err);
res.status(500).json({ error: 'Failed to clear database cache' });
return;
}
// Also clear legacy in-memory cache
const cacheSize = cache.size;
cache.clear();
console.log('Database cache cleared - all players will be refreshed on next request');
res.json({
success: true,
message: `Cache cleared - database and ${cacheSize} memory entries reset`
});
});
} catch (error) {
console.error('Error clearing cache:', error);
res.status(500).json({ error: 'Failed to clear cache' });
}
});
// Individual player refresh endpoints
app.post('/api/refresh-player/:pdgaNumber', async (req, res) => {
try {
const { pdgaNumber } = req.params;
console.log(`Manually refreshing player data for PDGA ${pdgaNumber}`);
// Force refresh by bypassing cache
const html = await fetchPlayerDataHTTP(pdgaNumber);
const playerData = parsePlayerData(html, pdgaNumber);
// Save to database
await savePlayerToDB(playerData);
res.json({
success: true,
player: playerData
});
} catch (error) {
console.error('Error refreshing player data:', error.message);
res.status(500).json({ error: 'Failed to refresh player data' });
}
});
app.post('/api/refresh-rating-history/:pdgaNumber', async (req, res) => {
try {
const { pdgaNumber } = req.params;
console.log(`=== Manually refreshing rating history for PDGA ${pdgaNumber} ===`);
const startTime = Date.now();
const html = await fetchRatingHistory(pdgaNumber);
const fetchTime = Date.now() - startTime;
console.log(`HTML fetch completed in ${fetchTime}ms, received ${html.length} bytes`);
const parseStartTime = Date.now();
const history = parseRatingHistory(html);
const parseTime = Date.now() - parseStartTime;
console.log(`Parsing completed in ${parseTime}ms, found ${history.length} history entries`);
if (history.length > 0) {
console.log('Sample history entries:', history.slice(0, 3));
} else {
console.log('No history entries found. HTML sample:', html.substring(0, 500));
}
const dbStartTime = Date.now();
await saveRatingHistoryToDB(pdgaNumber, history);
const dbTime = Date.now() - dbStartTime;
console.log(`Database save completed in ${dbTime}ms`);
const formattedHistory = history.map(entry => ({
date: entry.date,
rating: entry.rating,
displayDate: entry.displayDate
}));
console.log(`=== Rating history refresh completed for PDGA ${pdgaNumber} ===`);
res.json({
success: true,
history: formattedHistory
});
} catch (error) {
console.error(`=== Error refreshing rating history for PDGA ${pdgaNumber} ===`);
console.error('Error type:', error.constructor.name);
console.error('Error message:', error.message);
console.error('Error code:', error.code);
console.error('Status code:', error.statusCode);
if (error.stack) {
console.error('Stack trace:', error.stack);
}
res.status(500).json({
error: 'Failed to refresh rating history',
details: error.message,
code: error.code
});
}
});
app.post('/api/refresh-round-history/:pdgaNumber', async (req, res) => {
let browser = null;
const { pdgaNumber } = req.params;
try {
// Check when we last updated rounds for this player
const lastRoundUpdate = await getLastRoundUpdateDate(pdgaNumber);
const sinceDate = lastRoundUpdate ? new Date(lastRoundUpdate) : null;
const isIncremental = !!sinceDate;
console.log(`${isIncremental ? 'Incrementally updating' : 'Fully refreshing'} round history for PDGA ${pdgaNumber}${sinceDate ? ` since ${sinceDate.toDateString()}` : ''}`);
try {
browser = await puppeteer.launch({
headless: "new",
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-accelerated-2d-canvas',
'--no-first-run',
'--no-zygote',
'--disable-gpu'
]
});
} catch (launchError) {
// Fallback with minimal options
browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-dev-shm-usage']
});
}
// Step 1: Get official rating history
let officialHistory;
try {
officialHistory = await getOfficialRatingHistory(browser, pdgaNumber);
if (officialHistory.length > 0) {
await saveRatingHistoryToDB(pdgaNumber, officialHistory);
}
} catch (historyError) {
console.error('Failed to fetch official history:', historyError.message);
officialHistory = [];
}
// Step 2: Get optimized round collection (details + new tournaments only)
let allRounds = [];
try {
console.log(`Using optimized approach: /details + new tournaments only for PDGA ${pdgaNumber}...`);
allRounds = await getOptimizedPlayerRounds(browser, pdgaNumber);
if (allRounds.length > 0) {
// Convert to the format expected by saveRoundHistoryToDB
const roundsForDB = allRounds.map(round => ({
rating: round.rating,
date: round.date,
competition: round.competition
}));
// Save all rounds (replacing existing data with the complete optimized set)
await saveRoundHistoryToDB(pdgaNumber, roundsForDB, false); // false = replace all
console.log(`✓ Saved ${allRounds.length} rounds using optimized approach`);
// Update timestamp to mark when we last did a full collection
await updateLastRoundUpdateDate(pdgaNumber);
} else {
console.log(' No rounds found');
}
} catch (detailsError) {
console.error('Failed to fetch rounds using optimized approach:', detailsError.message);
allRounds = [];
}
await browser.close();
browser = null;
// Calculate prediction from optimized round collection
const dbRounds = await getRoundHistoryFromDB(pdgaNumber);
const roundsForPrediction = dbRounds.map(round => ({
rating: round.rating,
date: new Date(round.date),
competition: round.competition_name
}));
const predictedRating = calculatePredictedRating(roundsForPrediction);
// Count official vs new rounds
const officialCount = allRounds.filter(r => r.source === 'official').length;
const newCount = allRounds.filter(r => r.source === 'new').length;
res.json({
success: true,
predictedRating,
totalRounds: roundsForPrediction.length,
officialRounds: officialCount,
newRounds: newCount,
approach: 'optimized',
message: `Used /details (${officialCount} rounds) + new tournaments (${newCount} rounds)`
});
} catch (error) {
console.error(`=== Error refreshing round history for PDGA ${pdgaNumber} ===`);
console.error('Error type:', error.constructor.name);
console.error('Error message:', error.message);
console.error('Error code:', error.code);
console.error('Error name:', error.name);
// Log all error properties for debugging
console.error('Full error object:', JSON.stringify(error, Object.getOwnPropertyNames(error), 2));
// Check if this is a puppeteer-specific error
if (error.name) {
console.error(`Specific error name: ${error.name}`);
}
// Log timing information
const currentTime = new Date().toISOString();
console.error(`Error occurred at: ${currentTime}`);
// Check if we have browser information
if (browser) {
console.error('Browser was active when error occurred');
} else {
console.error('No active browser session');
}
if (error.message.includes('socket hang up')) {
console.error('🔌 Socket hang up - likely rate limited by PDGA');
console.error('💡 Try waiting a few minutes before attempting again');
console.error('🔍 This usually happens when PDGA blocks too many rapid requests');
}
if (error.message.includes('Navigation timeout')) {
console.error('⏰ Navigation timeout - PDGA pages loading slowly');
console.error('💡 Try reducing the number of tournaments scraped');
}
if (error.message.includes('net::ERR_CONNECTION_RESET')) {
console.error('🚫 Connection reset by PDGA server');
console.error('💡 PDGA may be blocking or rate limiting requests');
}
if (error.stack) {
console.error('Full stack trace:');
console.error(error.stack);
} else {
console.error('No stack trace available');
}
if (browser) {
try {
await browser.close();
console.log('Browser closed successfully');
} catch (closeError) {
console.error('Error closing browser:', closeError.message);
}
}
res.status(500).json({
error: 'Failed to refresh round history',
details: error.message,
errorType: error.constructor.name,
errorName: error.name,
timestamp: new Date().toISOString(),
suggestion: error.message.includes('socket hang up') ?
'Rate limited by PDGA - try again in a few minutes. This happens when too many requests are made too quickly.' :
error.message.includes('timeout') ?
'PDGA pages are loading slowly - try again later when PDGA servers are less busy.' :
'Tournament scraping failed - check server logs for detailed error information'
});
}
});
app.post('/api/predicted-rating/:pdgaNumber', async (req, res) => {
let browser = null;
try {
const { pdgaNumber } = req.params;
// Always check database first (source of truth)
const cachedPrediction = await getPredictedRatingFromDB(pdgaNumber);
if (cachedPrediction > 0) {
console.log(`Using DB round history for PDGA ${pdgaNumber} prediction (source of truth)`);
res.json({
pdgaNumber: parseInt(pdgaNumber),
predictedRating: cachedPrediction
});
return;
}
browser = await puppeteer.launch({
headless: "new",
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-accelerated-2d-canvas',
'--no-first-run',
'--no-zygote',
'--disable-gpu'
]
});
console.log(`Calculating predicted rating for PDGA ${pdgaNumber}...`);
// Check for incremental update
const lastRoundUpdate = await getLastRoundUpdateDate(pdgaNumber);
const sinceDate = lastRoundUpdate ? new Date(lastRoundUpdate) : null;
const isIncremental = !!sinceDate;
// Get round ratings and calculate prediction
const newRoundRatings = await getPlayerCompetitionRatings(browser, pdgaNumber, sinceDate);
await browser.close();
browser = null;
// Save new round history to database
await saveRoundHistoryToDB(pdgaNumber, newRoundRatings, isIncremental);
// Get all rounds for prediction calculation
const allRounds = await getRoundHistoryFromDB(pdgaNumber);
const roundRatings = allRounds.map(round => ({
rating: round.rating,
date: new Date(round.date),
competition: round.competition_name
}));
const predictedRating = calculatePredictedRating(roundRatings);
res.json({
pdgaNumber: parseInt(pdgaNumber),
predictedRating
});
} catch (error) {
console.error('Error calculating predicted rating:', error.message || error);
if (browser) {
try {
await browser.close();
} catch (closeError) {
console.error('Error closing browser:', closeError.message);
}
}
res.status(500).json({ error: 'Failed to calculate predicted rating' });
}
});
// Test function to probe PDGA rate limiting
async function testPDGARateLimit() {
console.log('Testing PDGA rate limiting behavior...');
const testPdgaNumbers = ['60954', '178737', '251092']; // First few from our list
const requestTimes = [];
for (let i = 0; i < testPdgaNumbers.length; i++) {
const startTime = Date.now();
try {
console.log(`Test request ${i + 1}: PDGA #${testPdgaNumbers[i]}`);
await fetchPlayerDataHTTP(testPdgaNumbers[i]);
const endTime = Date.now();
requestTimes.push(endTime - startTime);
console.log(`Request ${i + 1} completed in ${endTime - startTime}ms`);
} catch (error) {
const endTime = Date.now();
requestTimes.push(endTime - startTime);
console.log(`Request ${i + 1} failed after ${endTime - startTime}ms:`, error.message);
}
// Small delay between test requests
if (i < testPdgaNumbers.length - 1) {
await new Promise(resolve => setTimeout(resolve, 500));
}
}
console.log('Rate limit test completed. Request times:', requestTimes);
}
// Uncomment the line below to run rate limit test on startup
// testPDGARateLimit();
// Initialize database and start server
initializeDatabase().then(async () => {
// Check and populate missing players from PDGA numbers file
await checkAndPopulateDatabase();
app.listen(PORT, () => {
console.log(`PDGA Ratings app running on http://localhost:${PORT}`);
});
}).catch(err => {
console.error('Failed to initialize database:', err);
process.exit(1);
});