const express = require('express'); const puppeteer = require('puppeteer'); const https = require('https'); const fs = require('fs'); const path = require('path'); const sqlite3 = require('sqlite3').verbose(); const app = express(); const PORT = 3000; app.use(express.static('public')); // Initialize SQLite database const db = new sqlite3.Database('./ratings.db'); // Initialize database schema function initializeDatabase() { return new Promise((resolve, reject) => { db.serialize(() => { // Create players table db.run(` CREATE TABLE IF NOT EXISTS players ( id INTEGER PRIMARY KEY AUTOINCREMENT, pdga_number INTEGER UNIQUE NOT NULL, name TEXT NOT NULL, current_rating INTEGER, rating_change INTEGER, last_updated DATETIME DEFAULT CURRENT_TIMESTAMP, last_round_update DATETIME DEFAULT NULL ) `); // Migration: Add last_round_update column if it doesn't exist db.get("PRAGMA table_info(players)", (err, info) => { if (err) { console.error('Error checking table schema:', err); return; } // Check if column exists by querying table info db.all("PRAGMA table_info(players)", (err, columns) => { if (err) { console.error('Error getting table info:', err); return; } const hasLastRoundUpdate = columns.some(col => col.name === 'last_round_update'); if (!hasLastRoundUpdate) { console.log('Adding last_round_update column to players table...'); db.run(` ALTER TABLE players ADD COLUMN last_round_update DATETIME DEFAULT NULL `, (err) => { if (err) { console.error('Error adding last_round_update column:', err.message); } else { console.log('Successfully added last_round_update column'); } }); } }); }); // Create round_history table db.run(` CREATE TABLE IF NOT EXISTS round_history ( id INTEGER PRIMARY KEY AUTOINCREMENT, player_id INTEGER NOT NULL, date DATE NOT NULL, competition_name TEXT NOT NULL, rating INTEGER NOT NULL, FOREIGN KEY (player_id) REFERENCES players (id) ) `); // Create rating_history table db.run(` CREATE TABLE IF NOT EXISTS rating_history ( id INTEGER PRIMARY KEY AUTOINCREMENT, player_id INTEGER NOT NULL, date DATE NOT NULL, rating INTEGER NOT NULL, FOREIGN KEY (player_id) REFERENCES players (id) ) `, (err) => { if (err) { reject(err); } else { console.log('Database initialized successfully'); resolve(); } }); }); }); } // Check and populate database from PDGA numbers file at startup async function checkAndPopulateDatabase() { try { console.log('=== Checking database population against PDGA numbers file ==='); const pdgaNumbers = fs.readFileSync('pdga-numbers.txt', 'utf-8') .split('\n') .map(num => num.trim()) .filter(num => num); console.log(`Found ${pdgaNumbers.length} PDGA numbers in file`); const missingPlayers = []; // Check which players are missing from database for (const pdgaNumber of pdgaNumbers) { const player = await getPlayerFromDB(pdgaNumber); if (!player) { missingPlayers.push(pdgaNumber); } } if (missingPlayers.length === 0) { console.log('✓ All players from PDGA numbers file are already in database'); return; } console.log(`Found ${missingPlayers.length} missing players: [${missingPlayers.join(', ')}]`); console.log('=== Starting automatic population of missing players ==='); // Populate missing players for (let i = 0; i < missingPlayers.length; i++) { const pdgaNumber = missingPlayers[i]; console.log(`[${i + 1}/${missingPlayers.length}] Scraping missing player PDGA ${pdgaNumber}...`); try { const playerData = await scrapePDGARating(pdgaNumber); console.log(`✓ Added PDGA ${pdgaNumber}: ${playerData.name}`); // Delay between requests to be respectful to PDGA if (i < missingPlayers.length - 1) { console.log('Waiting 2s before next request...'); await new Promise(resolve => setTimeout(resolve, 2000)); } } catch (error) { console.error(`✗ Failed to add PDGA ${pdgaNumber}: ${error.message}`); } } console.log('=== Database population complete ==='); } catch (error) { console.error('Error during database population check:', error.message); } } // Database helper functions function getPlayerFromDB(pdgaNumber) { return new Promise((resolve, reject) => { db.get( 'SELECT * FROM players WHERE pdga_number = ?', [pdgaNumber], (err, row) => { if (err) reject(err); else resolve(row); } ); }); } function savePlayerToDB(playerData) { return new Promise((resolve, reject) => { db.run( `INSERT OR REPLACE INTO players (pdga_number, name, current_rating, rating_change, last_updated) VALUES (?, ?, ?, ?, datetime('now'))`, [playerData.pdgaNumber, playerData.name, playerData.rating, playerData.ratingChange], function(err) { if (err) reject(err); else resolve(this.lastID); } ); }); } function getRatingHistoryFromDB(pdgaNumber) { return new Promise((resolve, reject) => { db.get('SELECT id FROM players WHERE pdga_number = ?', [pdgaNumber], (err, player) => { if (err) return reject(err); if (!player) return resolve(null); db.all( 'SELECT * FROM rating_history WHERE player_id = ? ORDER BY date ASC', [player.id], (err, rows) => { if (err) reject(err); else resolve(rows); } ); }); }); } function saveRatingHistoryToDB(pdgaNumber, history) { return new Promise((resolve, reject) => { db.get('SELECT id FROM players WHERE pdga_number = ?', [pdgaNumber], (err, player) => { if (err) return reject(err); if (!player) return reject(new Error('Player not found')); // Clear existing history for this player db.run('DELETE FROM rating_history WHERE player_id = ?', [player.id], (err) => { if (err) return reject(err); // Insert new history const stmt = db.prepare('INSERT INTO rating_history (player_id, date, rating) VALUES (?, ?, ?)'); for (const entry of history) { stmt.run([player.id, entry.date, entry.rating]); } stmt.finalize((err) => { if (err) reject(err); else resolve(); }); }); }); }); } function getRoundHistoryFromDB(pdgaNumber) { return new Promise((resolve, reject) => { db.get('SELECT id FROM players WHERE pdga_number = ?', [pdgaNumber], (err, player) => { if (err) return reject(err); if (!player) return resolve([]); db.all( 'SELECT * FROM round_history WHERE player_id = ? ORDER BY date DESC', [player.id], (err, rows) => { if (err) reject(err); else resolve(rows); } ); }); }); } function getLastRoundUpdateDate(pdgaNumber) { return new Promise((resolve, reject) => { db.get( 'SELECT last_round_update FROM players WHERE pdga_number = ?', [pdgaNumber], (err, row) => { if (err) reject(err); else resolve(row ? row.last_round_update : null); } ); }); } function updateLastRoundUpdateDate(pdgaNumber) { return new Promise((resolve, reject) => { db.run( 'UPDATE players SET last_round_update = CURRENT_TIMESTAMP WHERE pdga_number = ?', [pdgaNumber], function(err) { if (err) reject(err); else resolve(); } ); }); } function saveRatingHistoryToDB(pdgaNumber, ratingHistory) { return new Promise((resolve, reject) => { db.get('SELECT id FROM players WHERE pdga_number = ?', [pdgaNumber], (err, player) => { if (err) return reject(err); if (!player) return reject(new Error('Player not found')); // Clear existing rating history for this player db.run('DELETE FROM rating_history WHERE player_id = ?', [player.id], (err) => { if (err) return reject(err); if (ratingHistory.length === 0) { return resolve(); } let completed = 0; const total = ratingHistory.length; ratingHistory.forEach(entry => { const parsedDate = parseDate(entry.date); db.run( 'INSERT INTO rating_history (player_id, date, rating) VALUES (?, ?, ?)', [player.id, parsedDate.toISOString().split('T')[0], entry.rating], (err) => { if (err) return reject(err); completed++; if (completed === total) { resolve(); } } ); }); }); }); }); } function saveRoundHistoryToDB(pdgaNumber, roundData, isIncremental = false) { return new Promise((resolve, reject) => { db.get('SELECT id FROM players WHERE pdga_number = ?', [pdgaNumber], (err, player) => { if (err) return reject(err); if (!player) return reject(new Error('Player not found')); const processRounds = () => { if (roundData.length === 0) { // Update last_round_update timestamp even if no new rounds db.run('UPDATE players SET last_round_update = datetime("now") WHERE pdga_number = ?', [pdgaNumber], (err) => { if (err) reject(err); else resolve(); }); return; } // Insert new round history const stmt = db.prepare('INSERT OR REPLACE INTO round_history (player_id, date, competition_name, rating) VALUES (?, ?, ?, ?)'); for (const round of roundData) { stmt.run([player.id, round.date.toISOString().split('T')[0], round.competition || 'Unknown', round.rating]); } stmt.finalize((err) => { if (err) { reject(err); } else { // Update last_round_update timestamp db.run('UPDATE players SET last_round_update = datetime("now") WHERE pdga_number = ?', [pdgaNumber], (updateErr) => { if (updateErr) reject(updateErr); else resolve(); }); } }); }; if (!isIncremental) { // Clear existing round history for full refresh db.run('DELETE FROM round_history WHERE player_id = ?', [player.id], (err) => { if (err) return reject(err); processRounds(); }); } else { // For incremental updates, just add new rounds processRounds(); } }); }); } // Legacy in-memory cache (will be phased out) const cache = new Map(); const CACHE_DURATION = 24 * 60 * 60 * 1000; async function fetchPlayerDataHTTP(pdgaNumber) { return new Promise((resolve, reject) => { const options = { hostname: 'www.pdga.com', port: 443, path: `/player/${pdgaNumber}`, method: 'GET', headers: { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36' }, timeout: 30000 }; const req = https.request(options, (res) => { let data = ''; res.on('data', (chunk) => { data += chunk; }); res.on('end', () => { if (res.statusCode === 200) { resolve(data); } else { // Log rate limiting information if available const rateLimitInfo = { statusCode: res.statusCode, headers: res.headers }; console.log(`PDGA Response Status for #${pdgaNumber}: ${res.statusCode}`); console.log('Response Headers:', JSON.stringify(res.headers, null, 2)); // Check for common rate limiting headers if (res.headers['retry-after']) { console.log(`Retry-After header: ${res.headers['retry-after']}`); } if (res.headers['x-ratelimit-limit']) { console.log(`Rate Limit: ${res.headers['x-ratelimit-limit']}`); } if (res.headers['x-ratelimit-remaining']) { console.log(`Rate Limit Remaining: ${res.headers['x-ratelimit-remaining']}`); } if (res.headers['x-ratelimit-reset']) { console.log(`Rate Limit Reset: ${res.headers['x-ratelimit-reset']}`); } const error = new Error(`HTTP ${res.statusCode}`); error.rateLimitInfo = rateLimitInfo; reject(error); } }); }); req.on('error', (error) => { console.log(`Request error for PDGA #${pdgaNumber}:`, error.code, error.message); if (error.code === 'ECONNRESET') { console.log('Connection reset - likely rate limited by PDGA'); } reject(error); }); req.on('timeout', () => { req.destroy(); reject(new Error('Request timeout')); }); req.setTimeout(30000); req.end(); }); } function parsePlayerData(html, pdgaNumber) { try { // Extract player name from title const nameMatch = html.match(/([^<]+?)\s*\|\s*Professional Disc Golf Association/i); const name = nameMatch ? nameMatch[1].trim() : 'Unknown'; // Extract current rating - account for HTML tags between "Current Rating:" and the number const ratingMatch = html.match(/Current Rating:[^>]*>\s*(\d+)/i); const rating = ratingMatch ? parseInt(ratingMatch[1]) : 0; // Extract rating change - look for the +/- number in the rating context const changeMatch = html.match(/Current Rating:[\s\S]*?([+\-]\d+)[\s\S]*?\(as of/i); const ratingChange = changeMatch ? parseInt(changeMatch[1]) : null; return { pdgaNumber, name: name.replace(/\s*#\d+$/, ''), rating, ratingChange, predictedRating: null }; } catch (error) { console.error(`Error parsing data for PDGA ${pdgaNumber}:`, error.message); return { pdgaNumber, name: 'Error', rating: 0, ratingChange: null, predictedRating: null }; } } // Function to get player data from DB only (for page loads) async function getPlayerDataFromDB(pdgaNumber) { try { const cachedPlayer = await getPlayerFromDB(pdgaNumber); if (cachedPlayer) { console.log(`Loading PDGA ${pdgaNumber} from DB (source of truth)`); const predictedRating = await getPredictedRatingFromDB(pdgaNumber); return { pdgaNumber: cachedPlayer.pdga_number, name: cachedPlayer.name, rating: cachedPlayer.current_rating, ratingChange: cachedPlayer.rating_change, predictedRating: predictedRating > 0 ? predictedRating : null }; } return null; // No data in DB } catch (err) { console.error(`Database error for PDGA ${pdgaNumber}:`, err.message); return null; } } // Function for explicit refresh (scrape PDGA + update DB) async function scrapePDGARating(pdgaNumber, retries = 3) { console.log(`=== Refreshing PDGA ${pdgaNumber} from PDGA website ===`); for (let attempt = 1; attempt <= retries; attempt++) { try { console.log(`Attempt ${attempt}/${retries} for PDGA ${pdgaNumber} (using HTTP)`); const html = await fetchPlayerDataHTTP(pdgaNumber); const result = parsePlayerData(html, pdgaNumber); // Save to database try { await savePlayerToDB(result); console.log(`Saved PDGA ${pdgaNumber} to database`); } catch (dbErr) { console.error(`Failed to save PDGA ${pdgaNumber} to database:`, dbErr.message); } console.log(`Successfully scraped PDGA ${pdgaNumber} on attempt ${attempt}`); return result; } catch (error) { console.error(`Attempt ${attempt}/${retries} failed for PDGA ${pdgaNumber}:`, error.message); if (attempt === retries) { return { pdgaNumber, name: 'Error', rating: 0, ratingChange: null, predictedRating: null }; } // Adaptive retry delay based on error type let retryDelay = 2000 * attempt; // Base delay if (error.rateLimitInfo) { const retryAfter = error.rateLimitInfo.headers['retry-after']; if (retryAfter) { // If server tells us when to retry, use that + some buffer retryDelay = Math.max(retryDelay, (parseInt(retryAfter) + 1) * 1000); console.log(`Using Retry-After header: waiting ${retryDelay/1000}s`); } } if (error.code === 'ECONNRESET') { // Connection reset usually means rate limiting - wait longer retryDelay = Math.max(retryDelay, 10000); console.log(`Connection reset detected: waiting ${retryDelay/1000}s`); } await new Promise(resolve => setTimeout(resolve, retryDelay)); } } } async function getPredictedRating(browser, pdgaNumber, retries = 2) { for (let attempt = 1; attempt <= retries; attempt++) { try { console.log(`Predicted rating attempt ${attempt}/${retries} for PDGA ${pdgaNumber}`); const roundRatings = await getPlayerCompetitionRatings(browser, pdgaNumber); const result = calculatePredictedRating(roundRatings); if (result.rating > 0) { return result.rating; } if (attempt < retries) { console.log(`No ratings found, waiting before retry...`); await new Promise(resolve => setTimeout(resolve, 5000)); } } catch (error) { console.error(`Predicted rating attempt ${attempt}/${retries} failed for ${pdgaNumber}:`, error.message); if (attempt < retries) { await new Promise(resolve => setTimeout(resolve, 5000)); } } } console.log(`All attempts failed for predicted rating of PDGA ${pdgaNumber}`); return 0; } async function getPredictedRatingFromDB(pdgaNumber) { try { const roundHistory = await getRoundHistoryFromDB(pdgaNumber); if (roundHistory.length > 0) { console.log(`Using ${roundHistory.length} cached rounds for PDGA ${pdgaNumber} prediction`); // Convert to the format expected by calculatePredictedRating const roundRatings = roundHistory.map(round => ({ rating: round.rating, date: new Date(round.date), competition: round.competition_name || 'Unknown' })); const result = calculatePredictedRating(roundRatings); return result.rating; } return 0; } catch (err) { console.error(`Error getting predicted rating from DB for ${pdgaNumber}:`, err.message); return 0; } } async function getOfficialRatingHistory(browser, pdgaNumber) { const page = await browser.newPage(); let ratingHistory = []; try { const url = `https://www.pdga.com/player/${pdgaNumber}/history`; await page.goto(url, { waitUntil: 'networkidle2', timeout: 45000 }); await page.waitForTimeout(1000); // Reduced delay // Extract the rating history data ratingHistory = await page.evaluate(() => { const history = []; // Try each selector until we find rating data const selectors = [ 'table tbody tr', 'table tr', '.view-content tbody tr' ]; for (const selector of selectors) { const rows = document.querySelectorAll(selector); for (const row of rows) { const cells = row.querySelectorAll('td'); if (cells.length >= 3) { const dateText = cells[0]?.innerText?.trim(); const ratingText = cells[1]?.innerText?.trim(); // Check if this looks like a date and rating if (dateText && ratingText && /^\d{4}-\d{2}-\d{2}$|^\d{1,2}-\w{3}-\d{4}$|^\w{3} \d{1,2}, \d{4}$/.test(dateText)) { const rating = parseInt(ratingText); if (!isNaN(rating) && rating > 800 && rating < 1200) { history.push({ date: dateText, rating: rating, tournament: cells[2]?.innerText?.trim() || 'Unknown' }); } } } } if (history.length > 0) break; } return history; }); } catch (error) { console.error('Error fetching official rating history:', error.message); } finally { await page.close(); } return ratingHistory; } async function getPlayerTournamentDetails(browser, pdgaNumber) { const page = await browser.newPage(); let tournamentRounds = []; try { const url = `https://www.pdga.com/player/${pdgaNumber}/details`; await page.goto(url, { waitUntil: 'networkidle2', timeout: 45000 }); await page.waitForTimeout(1000); // Reduced delay // Extract individual tournament rounds with actual dates and ratings tournamentRounds = await page.evaluate(() => { const rounds = []; const rows = document.querySelectorAll('table tbody tr'); // Log first few rows to see structure console.log('First few table rows for debugging:'); for (let i = 0; i < Math.min(3, rows.length); i++) { const cells = rows[i].querySelectorAll('td'); const cellTexts = Array.from(cells).map(cell => cell.innerText.trim()); console.log(`Row ${i}: [${cellTexts.join(' | ')}]`); } rows.forEach(row => { const cells = row.querySelectorAll('td'); // Try to identify which columns contain date and rating information if (cells.length >= 4) { const cellTexts = Array.from(cells).map(cell => cell.innerText.trim()); // Look for patterns in the data let tournamentName = ''; let dateText = ''; let rating = 0; let division = ''; // Try to find date and rating in different column positions cellTexts.forEach((text, index) => { // Look for date patterns, including multi-day tournaments // Examples: "2-Sep-2023", "2-Sep to 3-Sep-2023", "2 to 3-Sep-2023" if (/\d{1,2}(-\w{3})?(\s+to\s+)\d{1,2}-\w{3}-\d{4}/.test(text) || /\d{1,2}-\w{3}-\d{4}/.test(text)) { dateText = text; } // Look for rating patterns (3-4 digit numbers between 800-1200) if (/^\d{3,4}$/.test(text) && parseInt(text) >= 800 && parseInt(text) <= 1200) { rating = parseInt(text); } // Look for division patterns (like MA3, MPO, etc.) if (/^M[A-Z]\d*$|^F[A-Z]\d*$/.test(text)) { division = text; } // First cell is usually tournament name if (index === 0) { tournamentName = text; } }); if (tournamentName && dateText && rating > 0) { rounds.push({ tournament: tournamentName, dateText: dateText, rating: rating, division: division, competition: `${tournamentName} (${division})` }); } } }); return rounds; }); // Parse dates properly after extraction const fixedRounds = tournamentRounds.map(round => { let validDate = new Date(); if (round.dateText) { try { const pdgaParsed = parseDate(round.dateText); if (pdgaParsed instanceof Date && !isNaN(pdgaParsed.getTime())) { validDate = pdgaParsed; } else { const nativeParsed = new Date(round.dateText); if (!isNaN(nativeParsed.getTime())) { validDate = nativeParsed; } } } catch (e) { console.log(`Date parsing failed for "${round.dateText}": ${e.message}`); } } return { tournament: round.tournament, date: validDate, rating: round.rating, division: round.division, competition: round.competition }; }); tournamentRounds = fixedRounds; } catch (error) { console.error('Error fetching tournament details:', error.message); } finally { await page.close(); } return tournamentRounds; } // Get the most recent tournament date from /details page (official rating rounds) async function getLatestOfficialRoundDate(browser, pdgaNumber) { try { const detailsRounds = await getPlayerTournamentDetails(browser, pdgaNumber); if (detailsRounds.length === 0) { return null; } // Find the most recent date from details page const sortedRounds = detailsRounds.sort((a, b) => b.date - a.date); const latestDate = sortedRounds[0].date; console.log(`Latest official round date for PDGA ${pdgaNumber}: ${latestDate.toDateString()}`); return latestDate; } catch (error) { console.error('Error getting latest official round date:', error.message); return null; } } // Get NEW tournament rounds (played after the latest official round) async function getNewTournamentRounds(browser, pdgaNumber, afterDate) { const page = await browser.newPage(); let newRounds = []; try { const url = `https://www.pdga.com/player/${pdgaNumber}`; await page.goto(url, { waitUntil: 'networkidle2' }); console.log(`Looking for tournaments after ${afterDate.toDateString()}...`); // Get tournament URLs that are newer than afterDate const newTournamentUrls = await page.evaluate((afterTimestamp) => { const afterDate = new Date(afterTimestamp); const tables = document.querySelectorAll('table[id*="player-results"]'); const urls = []; tables.forEach(table => { const rows = table.querySelectorAll('tbody tr'); rows.forEach(row => { const dateCell = row.querySelector('.dates'); const tournamentCell = row.querySelector('.tournament a'); if (dateCell && tournamentCell) { const dateText = dateCell.innerText.trim(); const dateMatch = dateText.match(/\d{1,2}-[A-Za-z]{3}-\d{4}/); if (dateMatch) { const dateStr = dateMatch[0]; const date = new Date(dateStr); // Only include tournaments AFTER the latest official round if (date > afterDate) { const href = tournamentCell.getAttribute('href'); if (href) { urls.push({ url: `https://www.pdga.com${href}`, date: dateStr, name: tournamentCell.innerText.trim() }); } } } } }); }); return urls; }, afterDate.getTime()); console.log(`Found ${newTournamentUrls.length} new tournaments after ${afterDate.toDateString()}`); // Scrape individual round ratings from new tournaments for (const tournamentData of newTournamentUrls) { try { console.log(`Scraping new tournament: ${tournamentData.name} (${tournamentData.date})`); await page.goto(tournamentData.url, { waitUntil: 'domcontentloaded', timeout: 30000 }); await page.waitForTimeout(500); // Reduced from 2s to 0.5s since we're only scraping a few tournaments const roundRatings = await page.evaluate((pdgaNum) => { const rows = document.querySelectorAll('tr'); for (const row of rows) { const cells = row.querySelectorAll('td'); const hasPlayerNumber = Array.from(cells).some(cell => cell.innerText && cell.innerText.includes(pdgaNum.toString()) ); if (hasPlayerNumber) { const roundRatingCells = row.querySelectorAll('td.round-rating'); const ratings = []; roundRatingCells.forEach(cell => { const rating = parseInt(cell.innerText.trim()); if (!isNaN(rating) && rating > 0) { ratings.push(rating); } }); return ratings; } } return []; }, pdgaNumber); if (roundRatings.length > 0) { const parsedDate = parseDate(tournamentData.date); roundRatings.forEach(rating => { newRounds.push({ rating, date: parsedDate, competition: tournamentData.name }); }); console.log(`✓ Found ${roundRatings.length} round ratings for ${tournamentData.name}`); } } catch (error) { console.error(`Error scraping tournament ${tournamentData.name}:`, error.message); } } } catch (error) { console.error(`Error getting new tournament rounds for PDGA ${pdgaNumber}:`, error); } finally { await page.close(); } return newRounds; } // Optimized function: Get /details rounds + new tournaments only async function getOptimizedPlayerRounds(browser, pdgaNumber) { console.log(`=== Optimized Round Collection for PDGA ${pdgaNumber} ===`); try { // Step 1: Get all official rating rounds from /details page console.log('Step 1: Getting official rating rounds from /details page...'); const officialRounds = await getPlayerTournamentDetails(browser, pdgaNumber); if (officialRounds.length === 0) { console.log('No official rounds found in details page'); return []; } console.log(`✓ Found ${officialRounds.length} official rating rounds`); // Step 2: Find the most recent official round date const sortedRounds = officialRounds.sort((a, b) => b.date - a.date); const latestOfficialDate = sortedRounds[0].date; console.log(`Latest official round: ${latestOfficialDate.toDateString()}`); // Step 3: Get NEW tournament rounds (after latest official round) console.log('Step 2: Looking for NEW tournaments since latest official round...'); const newRounds = await getNewTournamentRounds(browser, pdgaNumber, latestOfficialDate); if (newRounds.length > 0) { console.log(`✓ Found ${newRounds.length} new round ratings`); } else { console.log('ℹ No new tournaments found since latest official round'); } // Step 4: Combine official rounds + new rounds const allRounds = [ ...officialRounds.map(round => ({ rating: round.rating, date: round.date, competition: round.competition, source: 'official' // From /details page })), ...newRounds.map(round => ({ rating: round.rating, date: round.date, competition: round.competition, source: 'new' // From individual tournaments })) ]; // Sort by date (oldest first) allRounds.sort((a, b) => a.date - b.date); console.log(`=== Summary: ${officialRounds.length} official + ${newRounds.length} new = ${allRounds.length} total rounds ===`); return allRounds; } catch (error) { console.error('Error in optimized round collection:', error.message); return []; } } // Legacy function - keep for backward compatibility but mark as deprecated async function getPlayerCompetitionRatings(browser, pdgaNumber, sinceDate = null) { const page = await browser.newPage(); let allRatings = []; let tournamentCount = 0; let successfulTournaments = 0; try { const url = `https://www.pdga.com/player/${pdgaNumber}`; await page.goto(url, { waitUntil: 'networkidle2' }); // Calculate the next PDGA update date to filter tournaments const nextUpdateDate = getNextPDGAUpdateDate(); const tournamentUrls = await page.evaluate((nextUpdateTimestamp, sinceDateString) => { const nextUpdateDate = new Date(nextUpdateTimestamp); const sinceDate = sinceDateString ? new Date(sinceDateString) : null; const tables = document.querySelectorAll('table[id*="player-results"]'); const urls = []; tables.forEach(table => { const rows = table.querySelectorAll('tbody tr'); rows.forEach(row => { const dateCell = row.querySelector('.dates'); const tournamentCell = row.querySelector('.tournament a'); if (dateCell && tournamentCell) { const dateText = dateCell.innerText.trim(); const dateMatch = dateText.match(/\d{1,2}-[A-Za-z]{3}-\d{4}/); if (dateMatch) { const dateStr = dateMatch[0]; const date = new Date(dateStr); const oneYearAgo = new Date(); oneYearAgo.setFullYear(oneYearAgo.getFullYear() - 1); // Apply date filters const dateValid = date > oneYearAgo && date < nextUpdateDate; const isNewTournament = !sinceDate || date > sinceDate; if (dateValid && isNewTournament) { const href = tournamentCell.getAttribute('href'); if (href) { urls.push({ url: `https://www.pdga.com${href}`, date: dateStr }); } } } } }); }); return urls; // Get all tournaments from the past year }, nextUpdateDate.getTime(), sinceDate ? sinceDate.toISOString() : null); const updateType = sinceDate ? `incremental (since ${sinceDate.toDateString()})` : 'full'; console.log(`Found ${tournamentUrls.length} tournaments for PDGA ${pdgaNumber} (${updateType})`); for (const tournamentData of tournamentUrls) { tournamentCount++; try { console.log(`[${tournamentCount}/${tournamentUrls.length}] Navigating to tournament: ${tournamentData.url}`); const navigationStart = Date.now(); try { await page.goto(tournamentData.url, { waitUntil: 'domcontentloaded', timeout: 45000 }); const navigationTime = Date.now() - navigationStart; console.log(`✓ Navigation completed in ${navigationTime}ms`); } catch (navError) { console.error(`✗ Navigation failed for ${tournamentData.url}:`); console.error('Navigation error details:', { type: navError.constructor.name, message: navError.message, code: navError.code, stack: navError.stack?.split('\n')[0] }); throw navError; // Re-throw to be caught by outer try-catch } console.log(`Waiting 1s before scraping tournament data...`); await page.waitForTimeout(1000); // Reduced delay for optimized approach console.log(`Starting page evaluation for PDGA ${pdgaNumber}...`); let roundRatings; try { roundRatings = await page.evaluate((pdgaNum) => { const rows = document.querySelectorAll('tr'); for (const row of rows) { const cells = row.querySelectorAll('td'); const hasPlayerNumber = Array.from(cells).some(cell => cell.innerText && cell.innerText.includes(pdgaNum.toString()) ); if (hasPlayerNumber) { const roundRatingCells = row.querySelectorAll('td.round-rating'); const ratings = []; roundRatingCells.forEach(cell => { const rating = parseInt(cell.innerText.trim()); if (!isNaN(rating) && rating > 0) { ratings.push(rating); } }); return ratings; } } return []; }, pdgaNumber); console.log(`✓ Page evaluation completed, found ${roundRatings.length} round ratings`); } catch (evalError) { console.error(`✗ Page evaluation failed for ${tournamentData.url}:`); console.error('Evaluation error details:', { type: evalError.constructor.name, message: evalError.message, code: evalError.code, stack: evalError.stack?.split('\n')[0] }); throw evalError; // Re-throw to be caught by outer try-catch } if (roundRatings.length > 0) { const parsedDate = parseDate(tournamentData.date); // Extract tournament name from URL for better database storage const tournamentName = tournamentData.url.split('/').pop() || 'Unknown Tournament'; const newRounds = []; roundRatings.forEach(rating => { const roundData = { rating, date: parsedDate, competition: tournamentName }; allRatings.push(roundData); newRounds.push(roundData); }); successfulTournaments++; console.log(`✓ [${tournamentCount}/${tournamentUrls.length}] Found ${roundRatings.length} round ratings for ${tournamentName}`); // Save rounds immediately to database (partial save) try { await saveRoundHistoryToDB(pdgaNumber, newRounds, true); console.log(`💾 Saved ${newRounds.length} rounds to database`); } catch (saveError) { console.error(`⚠️ Could not save rounds to DB: ${saveError.message}`); } } else { console.log(`✗ [${tournamentCount}/${tournamentUrls.length}] No round ratings found for ${tournamentData.url}`); } } catch (error) { console.error(`✗ [${tournamentCount}/${tournamentUrls.length}] Error scraping tournament ${tournamentData.url}:`); console.error('Tournament error type:', error.constructor.name); console.error('Tournament error message:', error.message); console.error('Tournament error code:', error.code); console.error('Tournament error name:', error.name); console.error('Tournament full error object:', JSON.stringify(error, Object.getOwnPropertyNames(error), 2)); // Log the current state when error occurs console.error(`Tournament scraping progress: ${tournamentCount}/${tournamentUrls.length} (${successfulTournaments} successful so far)`); console.error(`Total rounds collected before this error: ${allRatings.length}`); if (error.message.includes('socket hang up')) { console.error('🔌 Socket hang up detected at tournament level - PDGA may be rate limiting'); console.error('💡 Will continue trying remaining tournaments after this failure'); } if (error.message.includes('Navigation timeout')) { console.error('⏰ Navigation timeout at tournament level - page took too long to load'); } if (error.message.includes('net::ERR_CONNECTION_RESET')) { console.error('🚫 Connection reset at tournament level - PDGA blocking requests'); } // Don't let individual tournament failures stop the whole process console.error('⚠️ Continuing with next tournament despite this error...'); } } // Log summary of scraping results console.log(`=== Scraping Summary for PDGA ${pdgaNumber} ===`); console.log(`Tournaments processed: ${tournamentCount}/${tournamentUrls.length}`); console.log(`Successful tournaments: ${successfulTournaments}`); console.log(`Total rounds found: ${allRatings.length}`); console.log(`Completion rate: ${Math.round((successfulTournaments / tournamentUrls.length) * 100)}%`); } catch (error) { console.error(`Error getting competition ratings for PDGA ${pdgaNumber}:`, error); console.error(`=== Partial Results Before Error ===`); console.error(`Tournaments processed: ${tournamentCount}/${tournamentUrls.length || 0}`); console.error(`Successful tournaments: ${successfulTournaments}`); console.error(`Total rounds collected: ${allRatings.length}`); if (allRatings.length > 0) { console.error(`Rounds saved to database before error occurred`); } } finally { await page.close(); } // Return all ratings from the last year (already filtered above) return allRatings; } function parseDate(dateStr) { // Handle multi-day tournament formats first // Examples: "2-Sep to 3-Sep-2023", "2 to 3-Sep-2023" const multiDayMatch = dateStr.match(/^(\d{1,2})(-([A-Za-z]{3}))?(\s+to\s+)(\d{1,2})-([A-Za-z]{3})-(\d{4})$/); if (multiDayMatch) { // Extract first day and use that as the tournament date const day = parseInt(multiDayMatch[1]); const month = multiDayMatch[3] || multiDayMatch[6]; // Use first month if available, otherwise second const year = parseInt(multiDayMatch[7]); const monthMap = { 'Jan': 0, 'Feb': 1, 'Mar': 2, 'Apr': 3, 'May': 4, 'Jun': 5, 'Jul': 6, 'Aug': 7, 'Sep': 8, 'Oct': 9, 'Nov': 10, 'Dec': 11 }; return new Date(year, monthMap[month], day); } const formats = [ /^(\d{1,2})-([A-Za-z]{3})-(\d{4})$/, /^(\d{1,2})\/(\d{1,2})\/(\d{4})$/ ]; for (const format of formats) { const match = dateStr.match(format); if (match) { if (format === formats[0]) { const monthMap = { 'Jan': 0, 'Feb': 1, 'Mar': 2, 'Apr': 3, 'May': 4, 'Jun': 5, 'Jul': 6, 'Aug': 7, 'Sep': 8, 'Oct': 9, 'Nov': 10, 'Dec': 11 }; const day = parseInt(match[1]); const month = monthMap[match[2]]; const year = parseInt(match[3]); return new Date(year, month, day); } } } return new Date(dateStr); } function getNextPDGAUpdateDate() { const today = new Date(); const currentMonth = today.getMonth(); const currentYear = today.getFullYear(); // Calculate 2nd Tuesday of current month const firstDayOfMonth = new Date(currentYear, currentMonth, 1); const firstTuesday = new Date(firstDayOfMonth); // Find first Tuesday (day 2 = Tuesday, 0 = Sunday) const daysUntilTuesday = (2 - firstDayOfMonth.getDay() + 7) % 7; firstTuesday.setDate(1 + daysUntilTuesday); // Second Tuesday is 7 days after first Tuesday const secondTuesday = new Date(firstTuesday); secondTuesday.setDate(firstTuesday.getDate() + 7); // If today is before or on the 2nd Tuesday of this month, use this month's date // Otherwise, use next month's 2nd Tuesday if (today <= secondTuesday) { return secondTuesday; } else { // Calculate 2nd Tuesday of next month const nextMonth = currentMonth === 11 ? 0 : currentMonth + 1; const nextYear = currentMonth === 11 ? currentYear + 1 : currentYear; const firstDayNextMonth = new Date(nextYear, nextMonth, 1); const firstTuesdayNext = new Date(firstDayNextMonth); const daysUntilTuesdayNext = (2 - firstDayNextMonth.getDay() + 7) % 7; firstTuesdayNext.setDate(1 + daysUntilTuesdayNext); const secondTuesdayNext = new Date(firstTuesdayNext); secondTuesdayNext.setDate(firstTuesdayNext.getDate() + 7); return secondTuesdayNext; } } function calculatePredictedRating(roundRatings) { const debugLog = []; debugLog.push('=== PDGA RATING CALCULATION (Following Official Rules) ==='); if (!roundRatings || roundRatings.length === 0) { debugLog.push('❌ No rounds provided for prediction'); return { rating: 0, debugLog }; } debugLog.push(`📊 Starting with ${roundRatings.length} total rounds`); // PDGA Simulation: Only include rounds that would be rated by next update const nextUpdateDate = getNextPDGAUpdateDate(); debugLog.push(`🎯 PDGA Update Simulation: Next update date is ${nextUpdateDate.toDateString()}`); debugLog.push(` Only including rounds played before ${nextUpdateDate.toDateString()}`); // Sort all rounds by date (most recent first), but only include rounds before next update const allSortedRounds = roundRatings .filter(r => r.rating > 0 && r.date < nextUpdateDate) .sort((a, b) => b.date - a.date); if (allSortedRounds.length === 0) { debugLog.push('❌ No valid rounds after filtering for update date'); return { rating: 0, debugLog }; } debugLog.push(`📊 After update date filter: ${allSortedRounds.length} rounds`); // PDGA Rule: Use rounds from 12 months prior to next update date const twelveMonthsBeforeUpdate = new Date(nextUpdateDate); twelveMonthsBeforeUpdate.setFullYear(twelveMonthsBeforeUpdate.getFullYear() - 1); const mostRecentDate = allSortedRounds[0].date; debugLog.push(`📅 Most recent round: ${mostRecentDate.toDateString()}`); debugLog.push(`📅 12-month cutoff: ${twelveMonthsBeforeUpdate.toDateString()} (1 year before update)`); // Step 1: Get rounds from last 12 months before update let eligibleRounds = allSortedRounds.filter(r => r.date >= twelveMonthsBeforeUpdate); debugLog.push('🗓️ 12-MONTH FILTERING:'); debugLog.push(`✅ Rounds in last 12 months: ${eligibleRounds.length}`); // PDGA Rule: If fewer than 8 rounds in 12 months, extend to 24 months before update if (eligibleRounds.length < 8) { const twentyFourMonthsBeforeUpdate = new Date(nextUpdateDate); twentyFourMonthsBeforeUpdate.setFullYear(twentyFourMonthsBeforeUpdate.getFullYear() - 2); eligibleRounds = allSortedRounds.filter(r => r.date >= twentyFourMonthsBeforeUpdate); debugLog.push(`⚠️ Extended to 24 months before update (${twentyFourMonthsBeforeUpdate.toDateString()}) - now ${eligibleRounds.length} rounds`); } if (eligibleRounds.length === 0) { debugLog.push('❌ No eligible rounds found'); return { rating: 0, debugLog }; } debugLog.push(`📈 ELIGIBLE ROUNDS: ${eligibleRounds.length}`); eligibleRounds.forEach((round, index) => { debugLog.push(` ${index + 1}. ${round.date.toDateString()}: ${round.rating} (${round.competition})`); }); let workingRounds = [...eligibleRounds]; let workingRatings = workingRounds.map(r => r.rating); // PDGA Rule: Apply outlier exclusion if ≥7 rounds if (workingRatings.length >= 7) { debugLog.push('🔍 OUTLIER EXCLUSION (≥7 rounds available):'); const mean = workingRatings.reduce((sum, r) => sum + r, 0) / workingRatings.length; const stdDev = calculateStandardDeviation(workingRatings); debugLog.push(` Mean: ${mean.toFixed(1)}`); debugLog.push(` Std Dev: ${stdDev.toFixed(1)}`); // Two PDGA exclusion rules: // 1. More than 2.5 standard deviations below average const stdDevCutoff = mean - 2.5 * stdDev; // 2. More than 100 points below average const hundredPointCutoff = mean - 100; debugLog.push(` 2.5σ cutoff: ${stdDevCutoff.toFixed(1)}`); debugLog.push(` 100-point cutoff: ${hundredPointCutoff.toFixed(1)}`); const filteredByStdDev = workingRatings.filter(rating => rating >= stdDevCutoff); const filteredBy100Points = workingRatings.filter(rating => rating >= hundredPointCutoff); // Apply both exclusion rules const filteredRatings = workingRatings.filter(rating => rating >= stdDevCutoff && rating >= hundredPointCutoff ); const stdDevOutliers = workingRatings.filter(rating => rating < stdDevCutoff); const hundredPointOutliers = workingRatings.filter(rating => rating < hundredPointCutoff && rating >= stdDevCutoff); if (stdDevOutliers.length > 0) { debugLog.push(` ❌ 2.5σ outliers removed: ${stdDevOutliers.length} rounds`); stdDevOutliers.forEach(rating => { const round = workingRounds.find(r => r.rating === rating); debugLog.push(` - ${rating} (${round.date.toDateString()}: ${round.competition})`); }); } if (hundredPointOutliers.length > 0) { debugLog.push(` ❌ 100-point outliers removed: ${hundredPointOutliers.length} rounds`); hundredPointOutliers.forEach(rating => { const round = workingRounds.find(r => r.rating === rating); debugLog.push(` - ${rating} (${round.date.toDateString()}: ${round.competition})`); }); } if (stdDevOutliers.length === 0 && hundredPointOutliers.length === 0) { debugLog.push(` ✅ No outliers detected`); } // Keep filtered rounds only if we still have enough data if (filteredRatings.length >= 4) { workingRounds = workingRounds.filter(round => round.rating >= stdDevCutoff && round.rating >= hundredPointCutoff ); workingRatings = filteredRatings; debugLog.push(` ✅ Using ${filteredRatings.length} rounds after outlier removal`); } else { debugLog.push(` ⚠️ Too few rounds after outlier removal (${filteredRatings.length}), keeping all rounds`); } } else { debugLog.push(`⏭️ OUTLIER EXCLUSION SKIPPED (only ${workingRatings.length} rounds, need ≥7)`); } // PDGA Rule: Most recent 25% of rounds get double weight if ≥9 rounds debugLog.push('⚖️ WEIGHTING (Most recent 25% count double if ≥9 rounds):'); const weightedRatings = []; if (workingRatings.length >= 9) { const recentCount = Math.round(workingRatings.length * 0.25); debugLog.push(` ✅ Double-weighting most recent ${recentCount} rounds`); // Add all ratings once weightedRatings.push(...workingRatings); // Add the most recent 25% again (double weight) for (let i = 0; i < recentCount; i++) { weightedRatings.push(workingRatings[i]); const round = workingRounds[i]; debugLog.push(` 2x weight: ${workingRatings[i]} (${round.date.toDateString()}: ${round.competition})`); } debugLog.push(` 📊 Total values: ${workingRatings.length} + ${recentCount} double-weighted = ${weightedRatings.length}`); } else { debugLog.push(` ➡️ No double weighting (${workingRatings.length} rounds, need ≥9)`); weightedRatings.push(...workingRatings); } // Calculate final rating const sum = weightedRatings.reduce((sum, r) => sum + r, 0); const average = sum / weightedRatings.length; const finalRating = Math.round(average); debugLog.push('🎯 FINAL CALCULATION:'); debugLog.push(` Sum: ${sum}`); debugLog.push(` Count: ${weightedRatings.length}`); debugLog.push(` Average: ${average.toFixed(1)}`); debugLog.push(` Final Rating: ${finalRating}`); debugLog.push('=== END PDGA CALCULATION ==='); return { rating: finalRating, debugLog }; } function calculateStandardDeviation(ratings) { if (!ratings || ratings.length === 0) return 0; const mean = ratings.reduce((sum, r) => sum + r, 0) / ratings.length; const variance = ratings.reduce((sum, r) => sum + Math.pow(r - mean, 2), 0) / ratings.length; return Math.sqrt(variance); } async function getAllRatingsFromDB(progressCallback = null) { try { const pdgaNumbers = fs.readFileSync('pdga-numbers.txt', 'utf-8') .split('\n') .map(num => num.trim()) .filter(num => num); const ratings = []; const total = pdgaNumbers.length; for (let i = 0; i < pdgaNumbers.length; i++) { const pdgaNumber = pdgaNumbers[i]; console.log(`Loading PDGA ${pdgaNumber} from database... (${i + 1}/${total})`); if (progressCallback) { progressCallback({ current: i + 1, total, pdgaNumber, status: 'loading' }); } try { // Load from database only (source of truth) const playerData = await getPlayerDataFromDB(pdgaNumber); if (playerData) { ratings.push(playerData); } else { console.log(`PDGA ${pdgaNumber} not found in DB - skipping (page load)`); // Skip players not in DB for page loads } if (progressCallback) { progressCallback({ current: i + 1, total, pdgaNumber, status: playerData ? 'completed' : 'skipped', name: playerData ? playerData.name : 'Not in DB' }); } } catch (error) { console.error(`Failed to load PDGA ${pdgaNumber} from database:`, error.message); const errorData = { pdgaNumber: parseInt(pdgaNumber), name: 'Database Error', rating: null, ratingChange: null, predictedRating: null }; ratings.push(errorData); if (progressCallback) { progressCallback({ current: i + 1, total, pdgaNumber, status: 'error', name: 'Database Error' }); } } } return ratings.sort((a, b) => (b.rating || 0) - (a.rating || 0)); } catch (error) { console.error('Error reading PDGA numbers:', error); return []; } } app.get('/', (req, res) => { res.sendFile(path.join(__dirname, 'index.html')); }); app.get('/api/ratings', async (req, res) => { try { const ratings = await getAllRatingsFromDB(); res.json(ratings); } catch (error) { res.status(500).json({ error: 'Failed to fetch ratings' }); } }); app.get('/api/ratings/progress', (req, res) => { res.writeHead(200, { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache', 'Connection': 'keep-alive', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Headers': 'Cache-Control' }); const progressCallback = (progress) => { res.write(`data: ${JSON.stringify(progress)}\n\n`); }; getAllRatingsFromDB(progressCallback).then(ratings => { res.write(`data: ${JSON.stringify({ status: 'complete', ratings })}\n\n`); res.end(); }).catch(error => { res.write(`data: ${JSON.stringify({ status: 'error', error: error.message })}\n\n`); res.end(); }); req.on('close', () => { res.end(); }); }); // Endpoint to populate database from PDGA numbers file app.post('/api/populate-database', (req, res) => { res.writeHead(200, { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache', 'Connection': 'keep-alive', 'Access-Control-Allow-Origin': '*', }); const progressCallback = (progress) => { res.write(`data: ${JSON.stringify(progress)}\n\n`); }; console.log('=== Starting database population from PDGA numbers file ==='); // Use the scraping function to populate database getAllRatingsWithScraping(progressCallback).then(ratings => { console.log(`=== Database population complete: ${ratings.length} players added ===`); res.write(`data: ${JSON.stringify({ status: 'complete', ratings, message: `Successfully populated database with ${ratings.length} players` })}\n\n`); res.end(); }).catch(error => { console.error('Error populating database:', error); res.write(`data: ${JSON.stringify({ status: 'error', message: error.message })}\n\n`); res.end(); }); }); // Simple endpoint to check if database needs population app.get('/api/database-status', async (req, res) => { try { const pdgaNumbers = fs.readFileSync('pdga-numbers.txt', 'utf-8') .split('\n') .map(num => num.trim()) .filter(num => num); let playersInDB = 0; for (const pdgaNumber of pdgaNumbers) { const player = await getPlayerFromDB(pdgaNumber); if (player) playersInDB++; } res.json({ totalExpected: pdgaNumbers.length, playersInDB: playersInDB, needsPopulation: playersInDB === 0, populationProgress: Math.round((playersInDB / pdgaNumbers.length) * 100) }); } catch (error) { res.status(500).json({ error: 'Failed to check database status' }); } }); app.get('/api/load-all-players', (req, res) => { res.writeHead(200, { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache', 'Connection': 'keep-alive', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Headers': 'Cache-Control' }); const progressCallback = (progress) => { res.write(`data: ${JSON.stringify(progress)}\n\n`); }; // Use the original scraping function for bulk loading getAllRatingsWithScraping(progressCallback).then(ratings => { res.write(`data: ${JSON.stringify({ status: 'complete', ratings })}\n\n`); res.end(); }).catch(error => { res.write(`data: ${JSON.stringify({ status: 'error', error: error.message })}\n\n`); res.end(); }); req.on('close', () => { res.end(); }); }); // Original scraping function for bulk loading async function getAllRatingsWithScraping(progressCallback = null) { try { const pdgaNumbers = fs.readFileSync('pdga-numbers.txt', 'utf-8') .split('\n') .map(num => num.trim()) .filter(num => num); const ratings = []; const total = pdgaNumbers.length; for (let i = 0; i < pdgaNumbers.length; i++) { const pdgaNumber = pdgaNumbers[i]; console.log(`Scraping PDGA ${pdgaNumber}... (${i + 1}/${total})`); if (progressCallback) { progressCallback({ current: i + 1, total, pdgaNumber, status: 'loading' }); } try { const playerData = await scrapePDGARating(pdgaNumber); ratings.push(playerData); if (progressCallback) { progressCallback({ current: i + 1, total, pdgaNumber, status: 'completed', name: playerData.name }); } // Delay between PDGA scraping requests to be respectful await new Promise(resolve => setTimeout(resolve, 2000)); } catch (error) { console.error(`Failed to scrape PDGA ${pdgaNumber}:`, error.message); const errorData = { pdgaNumber: parseInt(pdgaNumber), name: 'Error', rating: 0, ratingChange: null, predictedRating: null }; ratings.push(errorData); if (progressCallback) { progressCallback({ current: i + 1, total, pdgaNumber, status: 'error', name: 'Error' }); } } } return ratings.sort((a, b) => (b.rating || 0) - (a.rating || 0)); } catch (error) { console.error('Error reading PDGA numbers:', error); return []; } } async function fetchRatingHistory(pdgaNumber) { return new Promise((resolve, reject) => { const options = { hostname: 'www.pdga.com', port: 443, path: `/player/${pdgaNumber}/history`, method: 'GET', headers: { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36' }, timeout: 30000 }; console.log(`Fetching rating history for PDGA #${pdgaNumber} from: https://www.pdga.com/player/${pdgaNumber}/history`); const req = https.request(options, (res) => { let data = ''; res.on('data', (chunk) => { data += chunk; }); res.on('end', () => { if (res.statusCode === 200) { console.log(`Rating history request successful for PDGA #${pdgaNumber}`); resolve(data); } else { // Log detailed error information for rating history console.log(`Rating History Error for PDGA #${pdgaNumber}:`); console.log(`Status: ${res.statusCode}`); console.log('Response Headers:', JSON.stringify(res.headers, null, 2)); // Check for rate limiting headers if (res.headers['retry-after']) { console.log(`Retry-After: ${res.headers['retry-after']} seconds`); } if (res.headers['x-ratelimit-limit']) { console.log(`Rate Limit: ${res.headers['x-ratelimit-limit']}`); } if (res.headers['x-ratelimit-remaining']) { console.log(`Rate Limit Remaining: ${res.headers['x-ratelimit-remaining']}`); } // Log partial response if available if (data.length > 0) { console.log(`Partial response received (${data.length} bytes):`, data.substring(0, 200)); } const error = new Error(`HTTP ${res.statusCode} for rating history`); error.statusCode = res.statusCode; error.headers = res.headers; reject(error); } }); }); req.on('error', (error) => { console.log(`Rating history request error for PDGA #${pdgaNumber}:`, { code: error.code, message: error.message, errno: error.errno, syscall: error.syscall }); if (error.code === 'ECONNRESET') { console.log('Connection reset on rating history - likely rate limited by PDGA'); } if (error.code === 'ECONNREFUSED') { console.log('Connection refused - PDGA server may be blocking requests'); } if (error.code === 'ETIMEDOUT') { console.log('Request timed out - server may be overloaded'); } reject(error); }); req.on('timeout', () => { console.log(`Rating history request timeout for PDGA #${pdgaNumber} after 30s`); req.destroy(); reject(new Error('Request timeout')); }); req.setTimeout(30000); req.end(); }); } function parseRatingHistory(html) { const history = []; // Find all table rows with rating data const rowMatches = html.match(/<tr[^>]*>[\s\S]*?<\/tr>/gi); if (rowMatches) { for (const row of rowMatches) { // Skip header rows and empty rows if (row.includes('<th') || !row.includes('<td')) continue; // Extract date, rating, and rounds from table cells const cellMatches = row.match(/<td[^>]*>(.*?)<\/td>/gi); if (cellMatches && cellMatches.length >= 2) { const dateText = cellMatches[0].replace(/<[^>]*>/g, '').trim(); const ratingText = cellMatches[1].replace(/<[^>]*>/g, '').trim(); // Parse date (DD-Mon-YYYY format) const dateMatch = dateText.match(/(\d{1,2})-([A-Za-z]{3})-(\d{4})/); if (dateMatch && !isNaN(parseInt(ratingText))) { const [, day, month, year] = dateMatch; const monthMap = { 'Jan': 0, 'Feb': 1, 'Mar': 2, 'Apr': 3, 'May': 4, 'Jun': 5, 'Jul': 6, 'Aug': 7, 'Sep': 8, 'Oct': 9, 'Nov': 10, 'Dec': 11 }; const date = new Date(parseInt(year), monthMap[month], parseInt(day)); history.push({ date: date.toISOString().split('T')[0], // YYYY-MM-DD format rating: parseInt(ratingText), displayDate: dateText }); } } } } // Sort by date (oldest first for chart display) return history.sort((a, b) => new Date(a.date) - new Date(b.date)); } app.get('/api/rating-history/:pdgaNumber', async (req, res) => { try { const { pdgaNumber } = req.params; // Check database first const cachedHistory = await getRatingHistoryFromDB(pdgaNumber); if (cachedHistory && cachedHistory.length > 0) { console.log(`Using cached rating history from DB for PDGA ${pdgaNumber}`); const formattedHistory = cachedHistory.map(row => ({ date: row.date, rating: row.rating, displayDate: new Date(row.date).toLocaleDateString('en-US', { day: '2-digit', month: 'short', year: 'numeric' }) })); res.json({ pdgaNumber: parseInt(pdgaNumber), history: formattedHistory }); return; } console.log(`Fetching rating history for PDGA ${pdgaNumber}...`); const html = await fetchRatingHistory(pdgaNumber); const history = parseRatingHistory(html); // Save to database try { await saveRatingHistoryToDB(pdgaNumber, history); console.log(`Saved rating history for PDGA ${pdgaNumber} to database`); } catch (dbErr) { console.error(`Failed to save rating history to database:`, dbErr.message); } res.json({ pdgaNumber: parseInt(pdgaNumber), history }); } catch (error) { console.error('Error fetching rating history:', error.message); res.status(500).json({ error: 'Failed to fetch rating history' }); } }); app.post('/api/clear-cache', (req, res) => { try { // Clear database cache by updating timestamps to force refresh db.run('UPDATE players SET last_updated = datetime("now", "-25 hours"), last_round_update = NULL', (err) => { if (err) { console.error('Error clearing database cache:', err); res.status(500).json({ error: 'Failed to clear database cache' }); return; } // Also clear legacy in-memory cache const cacheSize = cache.size; cache.clear(); console.log('Database cache cleared - all players will be refreshed on next request'); res.json({ success: true, message: `Cache cleared - database and ${cacheSize} memory entries reset` }); }); } catch (error) { console.error('Error clearing cache:', error); res.status(500).json({ error: 'Failed to clear cache' }); } }); // Individual player refresh endpoints app.post('/api/refresh-player/:pdgaNumber', async (req, res) => { try { const { pdgaNumber } = req.params; console.log(`Manually refreshing player data for PDGA ${pdgaNumber}`); // Force refresh by bypassing cache const html = await fetchPlayerDataHTTP(pdgaNumber); const playerData = parsePlayerData(html, pdgaNumber); // Save to database await savePlayerToDB(playerData); res.json({ success: true, player: playerData }); } catch (error) { console.error('Error refreshing player data:', error.message); res.status(500).json({ error: 'Failed to refresh player data' }); } }); app.post('/api/refresh-rating-history/:pdgaNumber', async (req, res) => { try { const { pdgaNumber } = req.params; console.log(`=== Manually refreshing rating history for PDGA ${pdgaNumber} ===`); const startTime = Date.now(); const html = await fetchRatingHistory(pdgaNumber); const fetchTime = Date.now() - startTime; console.log(`HTML fetch completed in ${fetchTime}ms, received ${html.length} bytes`); const parseStartTime = Date.now(); const history = parseRatingHistory(html); const parseTime = Date.now() - parseStartTime; console.log(`Parsing completed in ${parseTime}ms, found ${history.length} history entries`); if (history.length > 0) { console.log('Sample history entries:', history.slice(0, 3)); } else { console.log('No history entries found. HTML sample:', html.substring(0, 500)); } const dbStartTime = Date.now(); await saveRatingHistoryToDB(pdgaNumber, history); const dbTime = Date.now() - dbStartTime; console.log(`Database save completed in ${dbTime}ms`); const formattedHistory = history.map(entry => ({ date: entry.date, rating: entry.rating, displayDate: entry.displayDate })); console.log(`=== Rating history refresh completed for PDGA ${pdgaNumber} ===`); res.json({ success: true, history: formattedHistory }); } catch (error) { console.error(`=== Error refreshing rating history for PDGA ${pdgaNumber} ===`); console.error('Error type:', error.constructor.name); console.error('Error message:', error.message); console.error('Error code:', error.code); console.error('Status code:', error.statusCode); if (error.stack) { console.error('Stack trace:', error.stack); } res.status(500).json({ error: 'Failed to refresh rating history', details: error.message, code: error.code }); } }); app.post('/api/refresh-round-history/:pdgaNumber', async (req, res) => { let browser = null; const { pdgaNumber } = req.params; try { // Check when we last updated rounds for this player const lastRoundUpdate = await getLastRoundUpdateDate(pdgaNumber); const sinceDate = lastRoundUpdate ? new Date(lastRoundUpdate) : null; const isIncremental = !!sinceDate; console.log(`${isIncremental ? 'Incrementally updating' : 'Fully refreshing'} round history for PDGA ${pdgaNumber}${sinceDate ? ` since ${sinceDate.toDateString()}` : ''}`); try { browser = await puppeteer.launch({ headless: "new", args: [ '--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-accelerated-2d-canvas', '--no-first-run', '--no-zygote', '--disable-gpu' ] }); } catch (launchError) { // Fallback with minimal options browser = await puppeteer.launch({ headless: true, args: ['--no-sandbox', '--disable-dev-shm-usage'] }); } // Step 1: Get official rating history let officialHistory; try { officialHistory = await getOfficialRatingHistory(browser, pdgaNumber); if (officialHistory.length > 0) { await saveRatingHistoryToDB(pdgaNumber, officialHistory); } } catch (historyError) { console.error('Failed to fetch official history:', historyError.message); officialHistory = []; } // Step 2: Get optimized round collection (details + new tournaments only) let allRounds = []; try { console.log(`Using optimized approach: /details + new tournaments only for PDGA ${pdgaNumber}...`); allRounds = await getOptimizedPlayerRounds(browser, pdgaNumber); if (allRounds.length > 0) { // Convert to the format expected by saveRoundHistoryToDB const roundsForDB = allRounds.map(round => ({ rating: round.rating, date: round.date, competition: round.competition })); // Save all rounds (replacing existing data with the complete optimized set) await saveRoundHistoryToDB(pdgaNumber, roundsForDB, false); // false = replace all console.log(`✓ Saved ${allRounds.length} rounds using optimized approach`); // Update timestamp to mark when we last did a full collection await updateLastRoundUpdateDate(pdgaNumber); } else { console.log('ℹ No rounds found'); } } catch (detailsError) { console.error('Failed to fetch rounds using optimized approach:', detailsError.message); allRounds = []; } await browser.close(); browser = null; // Calculate prediction from optimized round collection const dbRounds = await getRoundHistoryFromDB(pdgaNumber); const roundsForPrediction = dbRounds.map(round => ({ rating: round.rating, date: new Date(round.date), competition: round.competition_name })); const result = calculatePredictedRating(roundsForPrediction); // Count official vs new rounds const officialCount = allRounds.filter(r => r.source === 'official').length; const newCount = allRounds.filter(r => r.source === 'new').length; res.json({ success: true, predictedRating: result.rating, debugLog: result.debugLog, totalRounds: roundsForPrediction.length, officialRounds: officialCount, newRounds: newCount, approach: 'optimized', message: `Used /details (${officialCount} rounds) + new tournaments (${newCount} rounds)` }); } catch (error) { console.error(`=== Error refreshing round history for PDGA ${pdgaNumber} ===`); console.error('Error type:', error.constructor.name); console.error('Error message:', error.message); console.error('Error code:', error.code); console.error('Error name:', error.name); // Log all error properties for debugging console.error('Full error object:', JSON.stringify(error, Object.getOwnPropertyNames(error), 2)); // Check if this is a puppeteer-specific error if (error.name) { console.error(`Specific error name: ${error.name}`); } // Log timing information const currentTime = new Date().toISOString(); console.error(`Error occurred at: ${currentTime}`); // Check if we have browser information if (browser) { console.error('Browser was active when error occurred'); } else { console.error('No active browser session'); } if (error.message.includes('socket hang up')) { console.error('🔌 Socket hang up - likely rate limited by PDGA'); console.error('💡 Try waiting a few minutes before attempting again'); console.error('🔍 This usually happens when PDGA blocks too many rapid requests'); } if (error.message.includes('Navigation timeout')) { console.error('⏰ Navigation timeout - PDGA pages loading slowly'); console.error('💡 Try reducing the number of tournaments scraped'); } if (error.message.includes('net::ERR_CONNECTION_RESET')) { console.error('🚫 Connection reset by PDGA server'); console.error('💡 PDGA may be blocking or rate limiting requests'); } if (error.stack) { console.error('Full stack trace:'); console.error(error.stack); } else { console.error('No stack trace available'); } if (browser) { try { await browser.close(); console.log('Browser closed successfully'); } catch (closeError) { console.error('Error closing browser:', closeError.message); } } res.status(500).json({ error: 'Failed to refresh round history', details: error.message, errorType: error.constructor.name, errorName: error.name, timestamp: new Date().toISOString(), suggestion: error.message.includes('socket hang up') ? 'Rate limited by PDGA - try again in a few minutes. This happens when too many requests are made too quickly.' : error.message.includes('timeout') ? 'PDGA pages are loading slowly - try again later when PDGA servers are less busy.' : 'Tournament scraping failed - check server logs for detailed error information' }); } }); app.post('/api/predicted-rating/:pdgaNumber', async (req, res) => { let browser = null; try { const { pdgaNumber } = req.params; // Always check database first (source of truth) const cachedPrediction = await getPredictedRatingFromDB(pdgaNumber); if (cachedPrediction > 0) { console.log(`Using DB round history for PDGA ${pdgaNumber} prediction (source of truth)`); res.json({ pdgaNumber: parseInt(pdgaNumber), predictedRating: cachedPrediction }); return; } browser = await puppeteer.launch({ headless: "new", args: [ '--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-accelerated-2d-canvas', '--no-first-run', '--no-zygote', '--disable-gpu' ] }); console.log(`Calculating predicted rating for PDGA ${pdgaNumber}...`); // Check for incremental update const lastRoundUpdate = await getLastRoundUpdateDate(pdgaNumber); const sinceDate = lastRoundUpdate ? new Date(lastRoundUpdate) : null; const isIncremental = !!sinceDate; // Get round ratings and calculate prediction const newRoundRatings = await getPlayerCompetitionRatings(browser, pdgaNumber, sinceDate); await browser.close(); browser = null; // Save new round history to database await saveRoundHistoryToDB(pdgaNumber, newRoundRatings, isIncremental); // Get all rounds for prediction calculation const allRounds = await getRoundHistoryFromDB(pdgaNumber); const roundRatings = allRounds.map(round => ({ rating: round.rating, date: new Date(round.date), competition: round.competition_name })); const result = calculatePredictedRating(roundRatings); res.json({ pdgaNumber: parseInt(pdgaNumber), predictedRating: result.rating, debugLog: result.debugLog }); } catch (error) { console.error('Error calculating predicted rating:', error.message || error); if (browser) { try { await browser.close(); } catch (closeError) { console.error('Error closing browser:', closeError.message); } } res.status(500).json({ error: 'Failed to calculate predicted rating' }); } }); // Test function to probe PDGA rate limiting async function testPDGARateLimit() { console.log('Testing PDGA rate limiting behavior...'); const testPdgaNumbers = ['60954', '178737', '251092']; // First few from our list const requestTimes = []; for (let i = 0; i < testPdgaNumbers.length; i++) { const startTime = Date.now(); try { console.log(`Test request ${i + 1}: PDGA #${testPdgaNumbers[i]}`); await fetchPlayerDataHTTP(testPdgaNumbers[i]); const endTime = Date.now(); requestTimes.push(endTime - startTime); console.log(`Request ${i + 1} completed in ${endTime - startTime}ms`); } catch (error) { const endTime = Date.now(); requestTimes.push(endTime - startTime); console.log(`Request ${i + 1} failed after ${endTime - startTime}ms:`, error.message); } // Small delay between test requests if (i < testPdgaNumbers.length - 1) { await new Promise(resolve => setTimeout(resolve, 500)); } } console.log('Rate limit test completed. Request times:', requestTimes); } // Uncomment the line below to run rate limit test on startup // testPDGARateLimit(); // Initialize database and start server initializeDatabase().then(async () => { // Check and populate missing players from PDGA numbers file await checkAndPopulateDatabase(); app.listen(PORT, () => { console.log(`PDGA Ratings app running on http://localhost:${PORT}`); }); }).catch(err => { console.error('Failed to initialize database:', err); process.exit(1); });