diff --git a/.gitignore b/.gitignore index 5fa362b..b903b59 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ node_modules/ .env .cache/ -*.log \ No newline at end of file +*.log +*.db \ No newline at end of file diff --git a/server.js b/server.js index e8b4a1c..cff026f 100644 --- a/server.js +++ b/server.js @@ -1,9 +1,10 @@ const express = require('express'); -const puppeteer = require('puppeteer'); -const https = require('https'); -const fs = require('fs'); const path = require('path'); -const sqlite3 = require('sqlite3').verbose(); + +const { initializeDatabase, checkAndPopulateDatabase } = require('./src/db'); +const playerRoutes = require('./src/routes/players'); +const courseRoutes = require('./src/routes/courses'); +const pageRoutes = require('./src/routes/pages'); const app = express(); const PORT = 3000; @@ -11,3400 +12,17 @@ const PORT = 3000; app.use(express.static('public')); app.use(express.json()); -// Initialize SQLite database -const dbPath = process.env.DB_PATH || './ratings.db'; -const db = new sqlite3.Database(dbPath); +app.use(playerRoutes); +app.use(courseRoutes); +app.use(pageRoutes); -// In-memory cache for layout-division-event mapping -const layoutEventCache = new Map(); // key: courseId, value: array of {name, par, divisions, eventUrl} - -// Request locking to prevent concurrent scrapes of the same resource -const activeScrapes = new Map(); // key: resourceId, value: Promise - -// Initialize database schema -function initializeDatabase() { - return new Promise((resolve, reject) => { - db.serialize(() => { - // Create players table - db.run(` - CREATE TABLE IF NOT EXISTS players ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - pdga_number INTEGER UNIQUE NOT NULL, - name TEXT NOT NULL, - current_rating INTEGER, - rating_change INTEGER, - last_updated DATETIME DEFAULT CURRENT_TIMESTAMP, - last_round_update DATETIME DEFAULT NULL - ) - `); - - // Migration: Add last_round_update column if it doesn't exist - db.get("PRAGMA table_info(players)", (err, info) => { - if (err) { - console.error('Error checking table schema:', err); - return; - } - - // Check if columns exist by querying table info - db.all("PRAGMA table_info(players)", (err, columns) => { - if (err) { - console.error('Error getting table info:', err); - return; - } - - const hasLastRoundUpdate = columns.some(col => col.name === 'last_round_update'); - const hasPredictedRating = columns.some(col => col.name === 'predicted_rating'); - const hasStdDev = columns.some(col => col.name === 'std_dev'); - - if (!hasLastRoundUpdate) { - console.log('Adding last_round_update column to players table...'); - db.run(` - ALTER TABLE players ADD COLUMN last_round_update DATETIME DEFAULT NULL - `, (err) => { - if (err) { - console.error('Error adding last_round_update column:', err.message); - } else { - console.log('Successfully added last_round_update column'); - } - }); - } - - if (!hasPredictedRating) { - console.log('Adding predicted_rating column to players table...'); - db.run(` - ALTER TABLE players ADD COLUMN predicted_rating INTEGER DEFAULT NULL - `, (err) => { - if (err) { - console.error('Error adding predicted_rating column:', err.message); - } else { - console.log('Successfully added predicted_rating column'); - } - }); - } - - if (!hasStdDev) { - console.log('Adding std_dev column to players table...'); - db.run(` - ALTER TABLE players ADD COLUMN std_dev INTEGER DEFAULT NULL - `, (err) => { - if (err) { - console.error('Error adding std_dev column:', err.message); - } else { - console.log('Successfully added std_dev column'); - } - }); - } - }); - }); - - // Create round_history table - db.run(` - CREATE TABLE IF NOT EXISTS round_history ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - player_id INTEGER NOT NULL, - date DATE NOT NULL, - competition_name TEXT NOT NULL, - rating INTEGER NOT NULL, - FOREIGN KEY (player_id) REFERENCES players (id) - ) - `); - - // Create rating_history table - db.run(` - CREATE TABLE IF NOT EXISTS rating_history ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - player_id INTEGER NOT NULL, - date DATE NOT NULL, - rating INTEGER NOT NULL, - FOREIGN KEY (player_id) REFERENCES players (id) - ) - `); - - // Create courses table - db.run(` - CREATE TABLE IF NOT EXISTS courses ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - name TEXT NOT NULL, - link TEXT UNIQUE NOT NULL, - city TEXT, - last_updated DATETIME DEFAULT CURRENT_TIMESTAMP - ) - `); - - // Create layouts table - db.run(` - CREATE TABLE IF NOT EXISTS layouts ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - course_id INTEGER NOT NULL, - name TEXT NOT NULL, - par INTEGER NOT NULL, - mean_rating INTEGER, - rating_count INTEGER DEFAULT 0, - last_calculated DATETIME, - FOREIGN KEY (course_id) REFERENCES courses (id), - UNIQUE(course_id, name, par) - ) - `, (err) => { - if (err) { - reject(err); - } else { - // Add missing columns if they don't exist (migration) - db.run(`ALTER TABLE layouts ADD COLUMN mean_rating INTEGER`, () => { - // Ignore error if column already exists - db.run(`ALTER TABLE layouts ADD COLUMN rating_count INTEGER DEFAULT 0`, () => { - // Ignore error if column already exists - db.run(`ALTER TABLE layouts ADD COLUMN last_calculated DATETIME`, () => { - // Ignore error if column already exists - db.run(`ALTER TABLE layouts ADD COLUMN last_played DATE`, () => { - // Ignore error if column already exists - console.log('Database initialized successfully'); - resolve(); - }); - }); - }); - }); - } - }); - }); - }); -} - -// Check and populate database from PDGA numbers file at startup (only if DB is empty) -async function checkAndPopulateDatabase() { - try { - // Check if database has any players - const playerCount = await new Promise((resolve, reject) => { - db.get('SELECT COUNT(*) as count FROM players', [], (err, row) => { - if (err) reject(err); - else resolve(row.count); - }); - }); - - if (playerCount > 0) { - console.log(`✓ Database already has ${playerCount} players - skipping text file import`); - console.log('📝 Note: pdga-numbers.txt is only used when database is empty'); - return; - } - - console.log('=== Database is empty - populating from PDGA numbers file ==='); - - const pdgaNumbers = fs.readFileSync('pdga-numbers.txt', 'utf-8') - .split('\n') - .map(num => num.trim()) - .filter(num => num); - - console.log(`Found ${pdgaNumbers.length} PDGA numbers in file`); - - if (pdgaNumbers.length === 0) { - console.log('⚠ No PDGA numbers found in file'); - return; - } - - console.log('Populating database with players from file...'); - - for (let i = 0; i < pdgaNumbers.length; i++) { - const pdgaNumber = pdgaNumbers[i]; - console.log(`[${i + 1}/${pdgaNumbers.length}] Adding PDGA ${pdgaNumber}...`); - - try { - const playerData = await scrapePDGARating(pdgaNumber); - console.log(` ✓ Added ${playerData.name}`); - - // Delay between requests to be respectful to PDGA - if (i < pdgaNumbers.length - 1) { - await new Promise(resolve => setTimeout(resolve, 2000)); - } - } catch (error) { - console.error(` ✗ Failed to add PDGA ${pdgaNumber}:`, error.message); - } - } - - console.log('=== Database population complete ==='); - - } catch (error) { - console.error('Error during database population check:', error.message); - } -} - -// Database helper functions -function getPlayerFromDB(pdgaNumber) { - return new Promise((resolve, reject) => { - db.get( - 'SELECT * FROM players WHERE pdga_number = ?', - [pdgaNumber], - (err, row) => { - if (err) reject(err); - else resolve(row); - } - ); - }); -} - -function savePlayerToDB(playerData) { - return new Promise((resolve, reject) => { - db.run( - `INSERT OR REPLACE INTO players (pdga_number, name, current_rating, rating_change, last_updated) - VALUES (?, ?, ?, ?, datetime('now'))`, - [playerData.pdgaNumber, playerData.name, playerData.rating, playerData.ratingChange], - function(err) { - if (err) reject(err); - else resolve(this.lastID); - } - ); - }); -} - -function getRatingHistoryFromDB(pdgaNumber) { - return new Promise((resolve, reject) => { - db.get('SELECT id FROM players WHERE pdga_number = ?', [pdgaNumber], (err, player) => { - if (err) return reject(err); - if (!player) return resolve(null); - - db.all( - 'SELECT * FROM rating_history WHERE player_id = ? ORDER BY date ASC', - [player.id], - (err, rows) => { - if (err) reject(err); - else resolve(rows); - } - ); - }); - }); -} - -function saveRatingHistoryToDB(pdgaNumber, history) { - return new Promise((resolve, reject) => { - db.get('SELECT id FROM players WHERE pdga_number = ?', [pdgaNumber], (err, player) => { - if (err) return reject(err); - if (!player) return reject(new Error('Player not found')); - - // Clear existing history for this player - db.run('DELETE FROM rating_history WHERE player_id = ?', [player.id], (err) => { - if (err) return reject(err); - - // Insert new history - const stmt = db.prepare('INSERT INTO rating_history (player_id, date, rating) VALUES (?, ?, ?)'); - - for (const entry of history) { - stmt.run([player.id, entry.date, entry.rating]); - } - - stmt.finalize((err) => { - if (err) reject(err); - else resolve(); - }); - }); - }); - }); -} - -function getRoundHistoryFromDB(pdgaNumber) { - return new Promise((resolve, reject) => { - db.get('SELECT id FROM players WHERE pdga_number = ?', [pdgaNumber], (err, player) => { - if (err) return reject(err); - if (!player) return resolve([]); - - db.all( - 'SELECT * FROM round_history WHERE player_id = ? ORDER BY date DESC', - [player.id], - (err, rows) => { - if (err) reject(err); - else resolve(rows); - } - ); - }); - }); -} - -function getLastRoundUpdateDate(pdgaNumber) { - return new Promise((resolve, reject) => { - db.get( - 'SELECT last_round_update FROM players WHERE pdga_number = ?', - [pdgaNumber], - (err, row) => { - if (err) reject(err); - else resolve(row ? row.last_round_update : null); - } - ); - }); -} - -function updateLastRoundUpdateDate(pdgaNumber) { - return new Promise((resolve, reject) => { - db.run( - 'UPDATE players SET last_round_update = CURRENT_TIMESTAMP WHERE pdga_number = ?', - [pdgaNumber], - function(err) { - if (err) reject(err); - else resolve(); - } - ); - }); -} - -function saveRatingHistoryToDB(pdgaNumber, ratingHistory) { - return new Promise((resolve, reject) => { - db.get('SELECT id FROM players WHERE pdga_number = ?', [pdgaNumber], (err, player) => { - if (err) return reject(err); - if (!player) return reject(new Error('Player not found')); - - // Clear existing rating history for this player - db.run('DELETE FROM rating_history WHERE player_id = ?', [player.id], (err) => { - if (err) return reject(err); - - if (ratingHistory.length === 0) { - return resolve(); - } - - let completed = 0; - const total = ratingHistory.length; - - ratingHistory.forEach(entry => { - const parsedDate = parseDate(entry.date); - - db.run( - 'INSERT INTO rating_history (player_id, date, rating) VALUES (?, ?, ?)', - [player.id, parsedDate.toISOString().split('T')[0], entry.rating], - (err) => { - if (err) return reject(err); - - completed++; - if (completed === total) { - resolve(); - } - } - ); - }); - }); - }); - }); -} - -function saveRoundHistoryToDB(pdgaNumber, roundData, isIncremental = false) { - return new Promise((resolve, reject) => { - db.get('SELECT id FROM players WHERE pdga_number = ?', [pdgaNumber], (err, player) => { - if (err) return reject(err); - if (!player) return reject(new Error('Player not found')); - - const processRounds = () => { - if (roundData.length === 0) { - // Update last_round_update timestamp even if no new rounds - db.run('UPDATE players SET last_round_update = datetime("now") WHERE pdga_number = ?', [pdgaNumber], (err) => { - if (err) reject(err); - else resolve(); - }); - return; - } - - // Insert new round history - const stmt = db.prepare('INSERT OR REPLACE INTO round_history (player_id, date, competition_name, rating) VALUES (?, ?, ?, ?)'); - - for (const round of roundData) { - stmt.run([player.id, round.date.toISOString().split('T')[0], round.competition || 'Unknown', round.rating]); - } - - stmt.finalize((err) => { - if (err) { - reject(err); - } else { - // Update last_round_update timestamp - db.run('UPDATE players SET last_round_update = datetime("now") WHERE pdga_number = ?', [pdgaNumber], (updateErr) => { - if (updateErr) reject(updateErr); - else resolve(); - }); - } - }); - }; - - if (!isIncremental) { - // Clear existing round history for full refresh - db.run('DELETE FROM round_history WHERE player_id = ?', [player.id], (err) => { - if (err) return reject(err); - processRounds(); - }); - } else { - // For incremental updates, just add new rounds - processRounds(); - } - }); - }); -} - -// Legacy in-memory cache (will be phased out) -const cache = new Map(); -const CACHE_DURATION = 24 * 60 * 60 * 1000; - -async function fetchPlayerDataHTTP(pdgaNumber) { - return new Promise((resolve, reject) => { - const options = { - hostname: 'www.pdga.com', - port: 443, - path: `/player/${pdgaNumber}`, - method: 'GET', - headers: { - 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36' - }, - timeout: 30000 - }; - - const req = https.request(options, (res) => { - let data = ''; - res.on('data', (chunk) => { - data += chunk; - }); - - res.on('end', () => { - if (res.statusCode === 200) { - resolve(data); - } else { - // Log rate limiting information if available - const rateLimitInfo = { - statusCode: res.statusCode, - headers: res.headers - }; - - console.log(`PDGA Response Status for #${pdgaNumber}: ${res.statusCode}`); - console.log('Response Headers:', JSON.stringify(res.headers, null, 2)); - - // Check for common rate limiting headers - if (res.headers['retry-after']) { - console.log(`Retry-After header: ${res.headers['retry-after']}`); - } - if (res.headers['x-ratelimit-limit']) { - console.log(`Rate Limit: ${res.headers['x-ratelimit-limit']}`); - } - if (res.headers['x-ratelimit-remaining']) { - console.log(`Rate Limit Remaining: ${res.headers['x-ratelimit-remaining']}`); - } - if (res.headers['x-ratelimit-reset']) { - console.log(`Rate Limit Reset: ${res.headers['x-ratelimit-reset']}`); - } - - const error = new Error(`HTTP ${res.statusCode}`); - error.rateLimitInfo = rateLimitInfo; - reject(error); - } - }); - }); - - req.on('error', (error) => { - console.log(`Request error for PDGA #${pdgaNumber}:`, error.code, error.message); - if (error.code === 'ECONNRESET') { - console.log('Connection reset - likely rate limited by PDGA'); - } - reject(error); - }); - - req.on('timeout', () => { - req.destroy(); - reject(new Error('Request timeout')); - }); - - req.setTimeout(30000); - req.end(); - }); -} - -function parsePlayerData(html, pdgaNumber) { - try { - // Extract player name from title - const nameMatch = html.match(/([^<]+?)\s*\|\s*Professional Disc Golf Association/i); - const name = nameMatch ? nameMatch[1].trim() : 'Unknown'; - - // Extract current rating - account for HTML tags between "Current Rating:" and the number - const ratingMatch = html.match(/Current Rating:[^>]*>\s*(\d+)/i); - const rating = ratingMatch ? parseInt(ratingMatch[1]) : 0; - - // Extract rating change - look for the +/- number in the rating context - const changeMatch = html.match(/Current Rating:[\s\S]*?([+\-]\d+)[\s\S]*?\(as of/i); - const ratingChange = changeMatch ? parseInt(changeMatch[1]) : null; - - return { - pdgaNumber, - name: name.replace(/\s*#\d+$/, ''), - rating, - ratingChange, - predictedRating: null - }; - } catch (error) { - console.error(`Error parsing data for PDGA ${pdgaNumber}:`, error.message); - return { - pdgaNumber, - name: 'Error', - rating: 0, - ratingChange: null, - predictedRating: null - }; - } -} - -// Function to get player data from DB only (for page loads) -async function getPlayerDataFromDB(pdgaNumber) { - try { - const cachedPlayer = await getPlayerFromDB(pdgaNumber); - if (cachedPlayer) { - console.log(`Loading PDGA ${pdgaNumber} from DB (source of truth)`); - - // Use stored predicted_rating if available, otherwise calculate it from round history - let predictedRating = cachedPlayer.predicted_rating; - let stdDev = cachedPlayer.std_dev; - if (!predictedRating || predictedRating === 0) { - predictedRating = await getPredictedRatingFromDB(pdgaNumber); - // After calculation, re-fetch to get the updated std_dev - const updatedPlayer = await getPlayerFromDB(pdgaNumber); - stdDev = updatedPlayer?.std_dev; - } - - return { - pdgaNumber: cachedPlayer.pdga_number, - name: cachedPlayer.name, - rating: cachedPlayer.current_rating, - ratingChange: cachedPlayer.rating_change, - predictedRating: predictedRating > 0 ? predictedRating : null, - stdDev: stdDev > 0 ? stdDev : null - }; - } - return null; // No data in DB - } catch (err) { - console.error(`Database error for PDGA ${pdgaNumber}:`, err.message); - return null; - } -} - -// Function for explicit refresh (scrape PDGA + update DB) -async function scrapePDGARating(pdgaNumber, retries = 3) { - console.log(`=== Refreshing PDGA ${pdgaNumber} from PDGA website ===`); - - for (let attempt = 1; attempt <= retries; attempt++) { - try { - console.log(`Attempt ${attempt}/${retries} for PDGA ${pdgaNumber} (using HTTP)`); - - const html = await fetchPlayerDataHTTP(pdgaNumber); - const result = parsePlayerData(html, pdgaNumber); - - // Save to database - try { - await savePlayerToDB(result); - console.log(`Saved PDGA ${pdgaNumber} to database`); - } catch (dbErr) { - console.error(`Failed to save PDGA ${pdgaNumber} to database:`, dbErr.message); - } - - console.log(`Successfully scraped PDGA ${pdgaNumber} on attempt ${attempt}`); - return result; - - } catch (error) { - console.error(`Attempt ${attempt}/${retries} failed for PDGA ${pdgaNumber}:`, error.message); - - if (attempt === retries) { - return { - pdgaNumber, - name: 'Error', - rating: 0, - ratingChange: null, - predictedRating: null - }; - } - - // Adaptive retry delay based on error type - let retryDelay = 2000 * attempt; // Base delay - - if (error.rateLimitInfo) { - const retryAfter = error.rateLimitInfo.headers['retry-after']; - if (retryAfter) { - // If server tells us when to retry, use that + some buffer - retryDelay = Math.max(retryDelay, (parseInt(retryAfter) + 1) * 1000); - console.log(`Using Retry-After header: waiting ${retryDelay/1000}s`); - } - } - - if (error.code === 'ECONNRESET') { - // Connection reset usually means rate limiting - wait longer - retryDelay = Math.max(retryDelay, 10000); - console.log(`Connection reset detected: waiting ${retryDelay/1000}s`); - } - - await new Promise(resolve => setTimeout(resolve, retryDelay)); - } - } -} - -async function getPredictedRating(browser, pdgaNumber, retries = 2) { - for (let attempt = 1; attempt <= retries; attempt++) { - try { - console.log(`Predicted rating attempt ${attempt}/${retries} for PDGA ${pdgaNumber}`); - const roundRatings = await getPlayerCompetitionRatings(browser, pdgaNumber); - const result = calculatePredictedRating(roundRatings); - - if (result.rating > 0) { - return result.rating; - } - - if (attempt < retries) { - console.log(`No ratings found, waiting before retry...`); - await new Promise(resolve => setTimeout(resolve, 5000)); - } - } catch (error) { - console.error(`Predicted rating attempt ${attempt}/${retries} failed for ${pdgaNumber}:`, error.message); - if (attempt < retries) { - await new Promise(resolve => setTimeout(resolve, 5000)); - } - } - } - - console.log(`All attempts failed for predicted rating of PDGA ${pdgaNumber}`); - return 0; -} - -async function getPredictedRatingFromDB(pdgaNumber) { - try { - const roundHistory = await getRoundHistoryFromDB(pdgaNumber); - if (roundHistory.length > 0) { - console.log(`Using ${roundHistory.length} cached rounds for PDGA ${pdgaNumber} prediction`); - - // Convert to the format expected by calculatePredictedRating - const roundRatings = roundHistory.map(round => ({ - rating: round.rating, - date: new Date(round.date), - competition: round.competition_name || 'Unknown' - })); - - const result = calculatePredictedRating(roundRatings); - - // Save the calculated prediction to database - await savePredictedRatingToDB(pdgaNumber, result.rating, result.stdDev); - - return result.rating; - } - return 0; - } catch (err) { - console.error(`Error getting predicted rating from DB for ${pdgaNumber}:`, err.message); - return 0; - } -} - -function savePredictedRatingToDB(pdgaNumber, predictedRating, stdDev = null) { - return new Promise((resolve, reject) => { - db.run( - 'UPDATE players SET predicted_rating = ?, std_dev = ? WHERE pdga_number = ?', - [predictedRating, stdDev, pdgaNumber], - function(err) { - if (err) reject(err); - else resolve(); - } - ); - }); -} - -async function getOfficialRatingHistory(browser, pdgaNumber) { - const page = await browser.newPage(); - let ratingHistory = []; - - try { - const url = `https://www.pdga.com/player/${pdgaNumber}/history`; - await page.goto(url, { waitUntil: 'networkidle2', timeout: 45000 }); - await page.waitForTimeout(1000); // Reduced delay - - // Extract the rating history data - ratingHistory = await page.evaluate(() => { - const history = []; - - // Try each selector until we find rating data - const selectors = [ - 'table tbody tr', - 'table tr', - '.view-content tbody tr' - ]; - - for (const selector of selectors) { - const rows = document.querySelectorAll(selector); - - for (const row of rows) { - const cells = row.querySelectorAll('td'); - if (cells.length >= 3) { - const dateText = cells[0]?.innerText?.trim(); - const ratingText = cells[1]?.innerText?.trim(); - - // Check if this looks like a date and rating - if (dateText && ratingText && /^\d{4}-\d{2}-\d{2}$|^\d{1,2}-\w{3}-\d{4}$|^\w{3} \d{1,2}, \d{4}$/.test(dateText)) { - const rating = parseInt(ratingText); - if (!isNaN(rating) && rating > 800 && rating < 1200) { - history.push({ - date: dateText, - rating: rating, - tournament: cells[2]?.innerText?.trim() || 'Unknown' - }); - } - } - } - } - - if (history.length > 0) break; - } - - return history; - }); - - - } catch (error) { - console.error('Error fetching official rating history:', error.message); - } finally { - await page.close(); - } - - return ratingHistory; -} - -async function getPlayerTournamentDetails(browser, pdgaNumber) { - const page = await browser.newPage(); - let tournamentRounds = []; - - try { - const url = `https://www.pdga.com/player/${pdgaNumber}/details`; - await page.goto(url, { waitUntil: 'networkidle2', timeout: 45000 }); - await page.waitForTimeout(1000); // Reduced delay - - // Extract individual tournament rounds with actual dates and ratings - tournamentRounds = await page.evaluate(() => { - const rounds = []; - const rows = document.querySelectorAll('table tbody tr'); - - // Log first few rows to see structure - console.log('First few table rows for debugging:'); - for (let i = 0; i < Math.min(3, rows.length); i++) { - const cells = rows[i].querySelectorAll('td'); - const cellTexts = Array.from(cells).map(cell => cell.innerText.trim()); - console.log(`Row ${i}: [${cellTexts.join(' | ')}]`); - } - - rows.forEach(row => { - const cells = row.querySelectorAll('td'); - - // Try to identify which columns contain date and rating information - if (cells.length >= 4) { - const cellTexts = Array.from(cells).map(cell => cell.innerText.trim()); - - // Look for patterns in the data - let tournamentName = ''; - let dateText = ''; - let rating = 0; - let division = ''; - - // Try to find date and rating in different column positions - cellTexts.forEach((text, index) => { - // Look for date patterns, including multi-day tournaments - // Examples: "2-Sep-2023", "2-Sep to 3-Sep-2023", "2 to 3-Sep-2023" - if (/\d{1,2}(-\w{3})?(\s+to\s+)\d{1,2}-\w{3}-\d{4}/.test(text) || /\d{1,2}-\w{3}-\d{4}/.test(text)) { - dateText = text; - } - // Look for rating patterns (3-4 digit numbers between 800-1200) - if (/^\d{3,4}$/.test(text) && parseInt(text) >= 800 && parseInt(text) <= 1200) { - rating = parseInt(text); - } - // Look for division patterns (like MA3, MPO, etc.) - if (/^M[A-Z]\d*$|^F[A-Z]\d*$/.test(text)) { - division = text; - } - // First cell is usually tournament name - if (index === 0) { - tournamentName = text; - } - }); - - if (tournamentName && dateText && rating > 0) { - rounds.push({ - tournament: tournamentName, - dateText: dateText, - rating: rating, - division: division, - competition: `${tournamentName} (${division})` - }); - } - } - }); - - return rounds; - }); - - // Parse dates properly after extraction - const fixedRounds = tournamentRounds.map(round => { - let validDate = new Date(); - if (round.dateText) { - try { - const pdgaParsed = parseDate(round.dateText); - if (pdgaParsed instanceof Date && !isNaN(pdgaParsed.getTime())) { - validDate = pdgaParsed; - } else { - const nativeParsed = new Date(round.dateText); - if (!isNaN(nativeParsed.getTime())) { - validDate = nativeParsed; - } - } - } catch (e) { - console.log(`Date parsing failed for "${round.dateText}": ${e.message}`); - } - } - return { - tournament: round.tournament, - date: validDate, - rating: round.rating, - division: round.division, - competition: round.competition - }; - }); - tournamentRounds = fixedRounds; - - } catch (error) { - console.error('Error fetching tournament details:', error.message); - } finally { - await page.close(); - } - - return tournamentRounds; -} - -// Get the most recent tournament date from /details page (official rating rounds) -async function getLatestOfficialRoundDate(browser, pdgaNumber) { - try { - const detailsRounds = await getPlayerTournamentDetails(browser, pdgaNumber); - if (detailsRounds.length === 0) { - return null; - } - - // Find the most recent date from details page - const sortedRounds = detailsRounds.sort((a, b) => b.date - a.date); - const latestDate = sortedRounds[0].date; - - console.log(`Latest official round date for PDGA ${pdgaNumber}: ${latestDate.toDateString()}`); - return latestDate; - } catch (error) { - console.error('Error getting latest official round date:', error.message); - return null; - } -} - -// Get NEW tournament rounds (played after the latest official round) -async function getNewTournamentRounds(browser, pdgaNumber, afterDate) { - const page = await browser.newPage(); - let newRounds = []; - - try { - const url = `https://www.pdga.com/player/${pdgaNumber}`; - await page.goto(url, { waitUntil: 'networkidle2' }); - - console.log(`Looking for tournaments after ${afterDate.toDateString()}...`); - - // Get tournament URLs that are newer than afterDate - const newTournamentUrls = await page.evaluate((afterTimestamp) => { - const afterDate = new Date(afterTimestamp); - const tables = document.querySelectorAll('table[id*="player-results"]'); - const urls = []; - - tables.forEach(table => { - const rows = table.querySelectorAll('tbody tr'); - rows.forEach(row => { - const dateCell = row.querySelector('.dates'); - const tournamentCell = row.querySelector('.tournament a'); - - if (dateCell && tournamentCell) { - const dateText = dateCell.innerText.trim(); - const dateMatch = dateText.match(/\d{1,2}-[A-Za-z]{3}-\d{4}/); - - if (dateMatch) { - const dateStr = dateMatch[0]; - const date = new Date(dateStr); - - // Only include tournaments AFTER the latest official round - if (date > afterDate) { - const href = tournamentCell.getAttribute('href'); - if (href) { - urls.push({ - url: `https://www.pdga.com${href}`, - date: dateStr, - name: tournamentCell.innerText.trim() - }); - } - } - } - } - }); - }); - - return urls; - }, afterDate.getTime()); - - console.log(`Found ${newTournamentUrls.length} new tournaments after ${afterDate.toDateString()}`); - - // Scrape individual round ratings from new tournaments - for (const tournamentData of newTournamentUrls) { - try { - console.log(`Scraping new tournament: ${tournamentData.name} (${tournamentData.date})`); - - await page.goto(tournamentData.url, { waitUntil: 'domcontentloaded', timeout: 30000 }); - await page.waitForTimeout(500); // Reduced from 2s to 0.5s since we're only scraping a few tournaments - - const roundRatings = await page.evaluate((pdgaNum) => { - const rows = document.querySelectorAll('tr'); - - for (const row of rows) { - const cells = row.querySelectorAll('td'); - const hasPlayerNumber = Array.from(cells).some(cell => - cell.innerText && cell.innerText.includes(pdgaNum.toString()) - ); - - if (hasPlayerNumber) { - const roundRatingCells = row.querySelectorAll('td.round-rating'); - const ratings = []; - - roundRatingCells.forEach(cell => { - const rating = parseInt(cell.innerText.trim()); - if (!isNaN(rating) && rating > 0) { - ratings.push(rating); - } - }); - - return ratings; - } - } - - return []; - }, pdgaNumber); - - if (roundRatings.length > 0) { - const parsedDate = parseDate(tournamentData.date); - roundRatings.forEach(rating => { - newRounds.push({ - rating, - date: parsedDate, - competition: tournamentData.name - }); - }); - - console.log(`✓ Found ${roundRatings.length} round ratings for ${tournamentData.name}`); - } - - } catch (error) { - console.error(`Error scraping tournament ${tournamentData.name}:`, error.message); - } - } - - } catch (error) { - console.error(`Error getting new tournament rounds for PDGA ${pdgaNumber}:`, error); - } finally { - await page.close(); - } - - return newRounds; -} - -// Optimized function: Get /details rounds + new tournaments only -async function getOptimizedPlayerRounds(browser, pdgaNumber) { - console.log(`=== Optimized Round Collection for PDGA ${pdgaNumber} ===`); - - try { - // Step 1: Get all official rating rounds from /details page - console.log('Step 1: Getting official rating rounds from /details page...'); - const officialRounds = await getPlayerTournamentDetails(browser, pdgaNumber); - - if (officialRounds.length === 0) { - console.log('No official rounds found in details page'); - return []; - } - - console.log(`✓ Found ${officialRounds.length} official rating rounds`); - - // Step 2: Find the most recent official round date - const sortedRounds = officialRounds.sort((a, b) => b.date - a.date); - const latestOfficialDate = sortedRounds[0].date; - console.log(`Latest official round: ${latestOfficialDate.toDateString()}`); - - // Step 3: Get NEW tournament rounds (after latest official round) - console.log('Step 2: Looking for NEW tournaments since latest official round...'); - const newRounds = await getNewTournamentRounds(browser, pdgaNumber, latestOfficialDate); - - if (newRounds.length > 0) { - console.log(`✓ Found ${newRounds.length} new round ratings`); - } else { - console.log('ℹ No new tournaments found since latest official round'); - } - - // Step 4: Combine official rounds + new rounds - const allRounds = [ - ...officialRounds.map(round => ({ - rating: round.rating, - date: round.date, - competition: round.competition, - source: 'official' // From /details page - })), - ...newRounds.map(round => ({ - rating: round.rating, - date: round.date, - competition: round.competition, - source: 'new' // From individual tournaments - })) - ]; - - // Sort by date (oldest first) - allRounds.sort((a, b) => a.date - b.date); - - console.log(`=== Summary: ${officialRounds.length} official + ${newRounds.length} new = ${allRounds.length} total rounds ===`); - - return allRounds; - - } catch (error) { - console.error('Error in optimized round collection:', error.message); - return []; - } -} - -// Legacy function - keep for backward compatibility but mark as deprecated -async function getPlayerCompetitionRatings(browser, pdgaNumber, sinceDate = null) { - const page = await browser.newPage(); - let allRatings = []; - let tournamentCount = 0; - let successfulTournaments = 0; - - try { - const url = `https://www.pdga.com/player/${pdgaNumber}`; - await page.goto(url, { waitUntil: 'networkidle2' }); - - // Calculate the next PDGA update date to filter tournaments - const nextUpdateDate = getNextPDGAUpdateDate(); - - const tournamentUrls = await page.evaluate((nextUpdateTimestamp, sinceDateString) => { - const nextUpdateDate = new Date(nextUpdateTimestamp); - const sinceDate = sinceDateString ? new Date(sinceDateString) : null; - const tables = document.querySelectorAll('table[id*="player-results"]'); - const urls = []; - - tables.forEach(table => { - const rows = table.querySelectorAll('tbody tr'); - rows.forEach(row => { - const dateCell = row.querySelector('.dates'); - const tournamentCell = row.querySelector('.tournament a'); - - if (dateCell && tournamentCell) { - const dateText = dateCell.innerText.trim(); - const dateMatch = dateText.match(/\d{1,2}-[A-Za-z]{3}-\d{4}/); - - if (dateMatch) { - const dateStr = dateMatch[0]; - const date = new Date(dateStr); - const oneYearAgo = new Date(); - oneYearAgo.setFullYear(oneYearAgo.getFullYear() - 1); - - // Apply date filters - const dateValid = date > oneYearAgo && date < nextUpdateDate; - const isNewTournament = !sinceDate || date > sinceDate; - - if (dateValid && isNewTournament) { - const href = tournamentCell.getAttribute('href'); - if (href) { - urls.push({ - url: `https://www.pdga.com${href}`, - date: dateStr - }); - } - } - } - } - }); - }); - - return urls; // Get all tournaments from the past year - }, nextUpdateDate.getTime(), sinceDate ? sinceDate.toISOString() : null); - - const updateType = sinceDate ? `incremental (since ${sinceDate.toDateString()})` : 'full'; - console.log(`Found ${tournamentUrls.length} tournaments for PDGA ${pdgaNumber} (${updateType})`); - - for (const tournamentData of tournamentUrls) { - tournamentCount++; - try { - console.log(`[${tournamentCount}/${tournamentUrls.length}] Navigating to tournament: ${tournamentData.url}`); - const navigationStart = Date.now(); - - try { - await page.goto(tournamentData.url, { waitUntil: 'domcontentloaded', timeout: 45000 }); - const navigationTime = Date.now() - navigationStart; - console.log(`✓ Navigation completed in ${navigationTime}ms`); - } catch (navError) { - console.error(`✗ Navigation failed for ${tournamentData.url}:`); - console.error('Navigation error details:', { - type: navError.constructor.name, - message: navError.message, - code: navError.code, - stack: navError.stack?.split('\n')[0] - }); - throw navError; // Re-throw to be caught by outer try-catch - } - - console.log(`Waiting 1s before scraping tournament data...`); - await page.waitForTimeout(1000); // Reduced delay for optimized approach - - console.log(`Starting page evaluation for PDGA ${pdgaNumber}...`); - let roundRatings; - try { - roundRatings = await page.evaluate((pdgaNum) => { - const rows = document.querySelectorAll('tr'); - - for (const row of rows) { - const cells = row.querySelectorAll('td'); - const hasPlayerNumber = Array.from(cells).some(cell => - cell.innerText && cell.innerText.includes(pdgaNum.toString()) - ); - - if (hasPlayerNumber) { - const roundRatingCells = row.querySelectorAll('td.round-rating'); - const ratings = []; - - roundRatingCells.forEach(cell => { - const rating = parseInt(cell.innerText.trim()); - if (!isNaN(rating) && rating > 0) { - ratings.push(rating); - } - }); - - return ratings; - } - } - - return []; - }, pdgaNumber); - console.log(`✓ Page evaluation completed, found ${roundRatings.length} round ratings`); - } catch (evalError) { - console.error(`✗ Page evaluation failed for ${tournamentData.url}:`); - console.error('Evaluation error details:', { - type: evalError.constructor.name, - message: evalError.message, - code: evalError.code, - stack: evalError.stack?.split('\n')[0] - }); - throw evalError; // Re-throw to be caught by outer try-catch - } - - if (roundRatings.length > 0) { - const parsedDate = parseDate(tournamentData.date); - // Extract tournament name from URL for better database storage - const tournamentName = tournamentData.url.split('/').pop() || 'Unknown Tournament'; - - const newRounds = []; - roundRatings.forEach(rating => { - const roundData = { - rating, - date: parsedDate, - competition: tournamentName - }; - allRatings.push(roundData); - newRounds.push(roundData); - }); - - successfulTournaments++; - console.log(`✓ [${tournamentCount}/${tournamentUrls.length}] Found ${roundRatings.length} round ratings for ${tournamentName}`); - - // Save rounds immediately to database (partial save) - try { - await saveRoundHistoryToDB(pdgaNumber, newRounds, true); - console.log(`💾 Saved ${newRounds.length} rounds to database`); - } catch (saveError) { - console.error(`âš ī¸ Could not save rounds to DB: ${saveError.message}`); - } - - } else { - console.log(`✗ [${tournamentCount}/${tournamentUrls.length}] No round ratings found for ${tournamentData.url}`); - } - - } catch (error) { - console.error(`✗ [${tournamentCount}/${tournamentUrls.length}] Error scraping tournament ${tournamentData.url}:`); - console.error('Tournament error type:', error.constructor.name); - console.error('Tournament error message:', error.message); - console.error('Tournament error code:', error.code); - console.error('Tournament error name:', error.name); - console.error('Tournament full error object:', JSON.stringify(error, Object.getOwnPropertyNames(error), 2)); - - // Log the current state when error occurs - console.error(`Tournament scraping progress: ${tournamentCount}/${tournamentUrls.length} (${successfulTournaments} successful so far)`); - console.error(`Total rounds collected before this error: ${allRatings.length}`); - - if (error.message.includes('socket hang up')) { - console.error('🔌 Socket hang up detected at tournament level - PDGA may be rate limiting'); - console.error('💡 Will continue trying remaining tournaments after this failure'); - } - if (error.message.includes('Navigation timeout')) { - console.error('⏰ Navigation timeout at tournament level - page took too long to load'); - } - if (error.message.includes('net::ERR_CONNECTION_RESET')) { - console.error('đŸšĢ Connection reset at tournament level - PDGA blocking requests'); - } - - // Don't let individual tournament failures stop the whole process - console.error('âš ī¸ Continuing with next tournament despite this error...'); - } - } - - // Log summary of scraping results - console.log(`=== Scraping Summary for PDGA ${pdgaNumber} ===`); - console.log(`Tournaments processed: ${tournamentCount}/${tournamentUrls.length}`); - console.log(`Successful tournaments: ${successfulTournaments}`); - console.log(`Total rounds found: ${allRatings.length}`); - console.log(`Completion rate: ${Math.round((successfulTournaments / tournamentUrls.length) * 100)}%`); - - } catch (error) { - console.error(`Error getting competition ratings for PDGA ${pdgaNumber}:`, error); - console.error(`=== Partial Results Before Error ===`); - console.error(`Tournaments processed: ${tournamentCount}/${tournamentUrls.length || 0}`); - console.error(`Successful tournaments: ${successfulTournaments}`); - console.error(`Total rounds collected: ${allRatings.length}`); - if (allRatings.length > 0) { - console.error(`Rounds saved to database before error occurred`); - } - } finally { - await page.close(); - } - - // Return all ratings from the last year (already filtered above) - return allRatings; -} - -function parseDate(dateStr) { - // Handle multi-day tournament formats first - // Examples: "2-Sep to 3-Sep-2023", "2 to 3-Sep-2023" - const multiDayMatch = dateStr.match(/^(\d{1,2})(-([A-Za-z]{3}))?(\s+to\s+)(\d{1,2})-([A-Za-z]{3})-(\d{4})$/); - if (multiDayMatch) { - // Extract first day and use that as the tournament date - const day = parseInt(multiDayMatch[1]); - const month = multiDayMatch[3] || multiDayMatch[6]; // Use first month if available, otherwise second - const year = parseInt(multiDayMatch[7]); - - const monthMap = { - 'Jan': 0, 'Feb': 1, 'Mar': 2, 'Apr': 3, 'May': 4, 'Jun': 5, - 'Jul': 6, 'Aug': 7, 'Sep': 8, 'Oct': 9, 'Nov': 10, 'Dec': 11 - }; - - return new Date(year, monthMap[month], day); - } - - const formats = [ - /^(\d{1,2})-([A-Za-z]{3})-(\d{4})$/, - /^(\d{1,2})\/(\d{1,2})\/(\d{4})$/ - ]; - - for (const format of formats) { - const match = dateStr.match(format); - if (match) { - if (format === formats[0]) { - const monthMap = { - 'Jan': 0, 'Feb': 1, 'Mar': 2, 'Apr': 3, 'May': 4, 'Jun': 5, - 'Jul': 6, 'Aug': 7, 'Sep': 8, 'Oct': 9, 'Nov': 10, 'Dec': 11 - }; - const day = parseInt(match[1]); - const month = monthMap[match[2]]; - const year = parseInt(match[3]); - return new Date(year, month, day); - } - } - } - - return new Date(dateStr); -} - -function getNextPDGAUpdateDate() { - const today = new Date(); - const currentMonth = today.getMonth(); - const currentYear = today.getFullYear(); - - // Calculate 2nd Tuesday of current month - const firstDayOfMonth = new Date(currentYear, currentMonth, 1); - const firstTuesday = new Date(firstDayOfMonth); - - // Find first Tuesday (day 2 = Tuesday, 0 = Sunday) - const daysUntilTuesday = (2 - firstDayOfMonth.getDay() + 7) % 7; - firstTuesday.setDate(1 + daysUntilTuesday); - - // Second Tuesday is 7 days after first Tuesday - const secondTuesday = new Date(firstTuesday); - secondTuesday.setDate(firstTuesday.getDate() + 7); - - // If today is before or on the 2nd Tuesday of this month, use this month's date - // Otherwise, use next month's 2nd Tuesday - if (today <= secondTuesday) { - return secondTuesday; - } else { - // Calculate 2nd Tuesday of next month - const nextMonth = currentMonth === 11 ? 0 : currentMonth + 1; - const nextYear = currentMonth === 11 ? currentYear + 1 : currentYear; - - const firstDayNextMonth = new Date(nextYear, nextMonth, 1); - const firstTuesdayNext = new Date(firstDayNextMonth); - - const daysUntilTuesdayNext = (2 - firstDayNextMonth.getDay() + 7) % 7; - firstTuesdayNext.setDate(1 + daysUntilTuesdayNext); - - const secondTuesdayNext = new Date(firstTuesdayNext); - secondTuesdayNext.setDate(firstTuesdayNext.getDate() + 7); - - return secondTuesdayNext; - } -} - -function calculatePredictedRating(roundRatings) { - const debugLog = []; - debugLog.push('=== PDGA RATING CALCULATION (Following Official Rules) ==='); - - if (!roundRatings || roundRatings.length === 0) { - debugLog.push('❌ No rounds provided for prediction'); - return { rating: 0, debugLog }; - } - - debugLog.push(`📊 Starting with ${roundRatings.length} total rounds`); - - // PDGA Simulation: Only include rounds that would be rated by next update - const nextUpdateDate = getNextPDGAUpdateDate(); - debugLog.push(`đŸŽ¯ PDGA Update Simulation: Next update date is ${nextUpdateDate.toDateString()}`); - debugLog.push(` Only including rounds played before ${nextUpdateDate.toDateString()}`); - - // Sort all rounds by date (most recent first), but only include rounds before next update - const allSortedRounds = roundRatings - .filter(r => r.rating > 0 && r.date < nextUpdateDate) - .sort((a, b) => b.date - a.date); - - if (allSortedRounds.length === 0) { - debugLog.push('❌ No valid rounds after filtering for update date'); - return { rating: 0, debugLog }; - } - - debugLog.push(`📊 After update date filter: ${allSortedRounds.length} rounds`); - - // PDGA Rule: Use rounds from 12 months prior to next update date - const twelveMonthsBeforeUpdate = new Date(nextUpdateDate); - twelveMonthsBeforeUpdate.setFullYear(twelveMonthsBeforeUpdate.getFullYear() - 1); - - const mostRecentDate = allSortedRounds[0].date; - debugLog.push(`📅 Most recent round: ${mostRecentDate.toDateString()}`); - debugLog.push(`📅 12-month cutoff: ${twelveMonthsBeforeUpdate.toDateString()} (1 year before update)`); - - // Step 1: Get rounds from last 12 months before update - let eligibleRounds = allSortedRounds.filter(r => r.date >= twelveMonthsBeforeUpdate); - - debugLog.push('đŸ—“ī¸ 12-MONTH FILTERING:'); - debugLog.push(`✅ Rounds in last 12 months: ${eligibleRounds.length}`); - - // PDGA Rule: If fewer than 8 rounds in 12 months, extend to 24 months before update - if (eligibleRounds.length < 8) { - const twentyFourMonthsBeforeUpdate = new Date(nextUpdateDate); - twentyFourMonthsBeforeUpdate.setFullYear(twentyFourMonthsBeforeUpdate.getFullYear() - 2); - - eligibleRounds = allSortedRounds.filter(r => r.date >= twentyFourMonthsBeforeUpdate); - debugLog.push(`âš ī¸ Extended to 24 months before update (${twentyFourMonthsBeforeUpdate.toDateString()}) - now ${eligibleRounds.length} rounds`); - } - - if (eligibleRounds.length === 0) { - debugLog.push('❌ No eligible rounds found'); - return { rating: 0, debugLog }; - } - - debugLog.push(`📈 ELIGIBLE ROUNDS: ${eligibleRounds.length}`); - eligibleRounds.forEach((round, index) => { - debugLog.push(` ${index + 1}. ${round.date.toDateString()}: ${round.rating} (${round.competition})`); - }); - - let workingRounds = [...eligibleRounds]; - let workingRatings = workingRounds.map(r => r.rating); - - // PDGA Rule: Apply outlier exclusion if â‰Ĩ7 rounds - if (workingRatings.length >= 7) { - debugLog.push('🔍 OUTLIER EXCLUSION (â‰Ĩ7 rounds available):'); - - const mean = workingRatings.reduce((sum, r) => sum + r, 0) / workingRatings.length; - const stdDev = calculateStandardDeviation(workingRatings); - - debugLog.push(` Mean: ${mean.toFixed(1)}`); - debugLog.push(` Std Dev: ${stdDev.toFixed(1)}`); - - // Two PDGA exclusion rules: - // 1. More than 2.5 standard deviations below average - const stdDevCutoff = mean - 2.5 * stdDev; - // 2. More than 100 points below average - const hundredPointCutoff = mean - 100; - - debugLog.push(` 2.5΃ cutoff: ${stdDevCutoff.toFixed(1)}`); - debugLog.push(` 100-point cutoff: ${hundredPointCutoff.toFixed(1)}`); - - const filteredByStdDev = workingRatings.filter(rating => rating >= stdDevCutoff); - const filteredBy100Points = workingRatings.filter(rating => rating >= hundredPointCutoff); - - // Apply both exclusion rules - const filteredRatings = workingRatings.filter(rating => - rating >= stdDevCutoff && rating >= hundredPointCutoff - ); - - const stdDevOutliers = workingRatings.filter(rating => rating < stdDevCutoff); - const hundredPointOutliers = workingRatings.filter(rating => rating < hundredPointCutoff && rating >= stdDevCutoff); - - if (stdDevOutliers.length > 0) { - debugLog.push(` ❌ 2.5΃ outliers removed: ${stdDevOutliers.length} rounds`); - stdDevOutliers.forEach(rating => { - const round = workingRounds.find(r => r.rating === rating); - debugLog.push(` - ${rating} (${round.date.toDateString()}: ${round.competition})`); - }); - } - - if (hundredPointOutliers.length > 0) { - debugLog.push(` ❌ 100-point outliers removed: ${hundredPointOutliers.length} rounds`); - hundredPointOutliers.forEach(rating => { - const round = workingRounds.find(r => r.rating === rating); - debugLog.push(` - ${rating} (${round.date.toDateString()}: ${round.competition})`); - }); - } - - if (stdDevOutliers.length === 0 && hundredPointOutliers.length === 0) { - debugLog.push(` ✅ No outliers detected`); - } - - // Keep filtered rounds only if we still have enough data - if (filteredRatings.length >= 4) { - workingRounds = workingRounds.filter(round => - round.rating >= stdDevCutoff && round.rating >= hundredPointCutoff - ); - workingRatings = filteredRatings; - debugLog.push(` ✅ Using ${filteredRatings.length} rounds after outlier removal`); - } else { - debugLog.push(` âš ī¸ Too few rounds after outlier removal (${filteredRatings.length}), keeping all rounds`); - } - } else { - debugLog.push(`â­ī¸ OUTLIER EXCLUSION SKIPPED (only ${workingRatings.length} rounds, need â‰Ĩ7)`); - } - - // PDGA Rule: Most recent 25% of rounds get double weight if â‰Ĩ9 rounds - debugLog.push('âš–ī¸ WEIGHTING (Most recent 25% count double if â‰Ĩ9 rounds):'); - const weightedRatings = []; - - if (workingRatings.length >= 9) { - const recentCount = Math.round(workingRatings.length * 0.25); - debugLog.push(` ✅ Double-weighting most recent ${recentCount} rounds`); - - // Add all ratings once - weightedRatings.push(...workingRatings); - - // Add the most recent 25% again (double weight) - for (let i = 0; i < recentCount; i++) { - weightedRatings.push(workingRatings[i]); - const round = workingRounds[i]; - debugLog.push(` 2x weight: ${workingRatings[i]} (${round.date.toDateString()}: ${round.competition})`); - } - - debugLog.push(` 📊 Total values: ${workingRatings.length} + ${recentCount} double-weighted = ${weightedRatings.length}`); - } else { - debugLog.push(` âžĄī¸ No double weighting (${workingRatings.length} rounds, need â‰Ĩ9)`); - weightedRatings.push(...workingRatings); - } - - // Calculate final rating - const sum = weightedRatings.reduce((sum, r) => sum + r, 0); - const average = sum / weightedRatings.length; - const finalRating = Math.round(average); - - // Calculate standard deviation of the weighted ratings - const stdDev = calculateStandardDeviation(weightedRatings); - - debugLog.push('đŸŽ¯ FINAL CALCULATION:'); - debugLog.push(` Sum: ${sum}`); - debugLog.push(` Count: ${weightedRatings.length}`); - debugLog.push(` Average: ${average.toFixed(1)}`); - debugLog.push(` Standard Deviation: ${stdDev.toFixed(1)}`); - debugLog.push(` Final Rating: ${finalRating}`); - debugLog.push('=== END PDGA CALCULATION ==='); - - return { rating: finalRating, stdDev: Math.round(stdDev), debugLog }; -} - -function calculateStandardDeviation(ratings) { - if (!ratings || ratings.length === 0) return 0; - - const mean = ratings.reduce((sum, r) => sum + r, 0) / ratings.length; - const variance = ratings.reduce((sum, r) => sum + Math.pow(r - mean, 2), 0) / ratings.length; - - return Math.sqrt(variance); -} - -// Database helper functions for courses and layouts -function saveCourseToDB(courseData) { - return new Promise((resolve, reject) => { - db.run( - `INSERT OR REPLACE INTO courses (name, link, city, last_updated) - VALUES (?, ?, ?, datetime('now'))`, - [courseData.name, courseData.link, courseData.city], - function(err) { - if (err) reject(err); - else resolve(this.lastID); - } - ); - }); -} - -function getCourseFromDB(link) { - return new Promise((resolve, reject) => { - db.get( - 'SELECT * FROM courses WHERE link = ?', - [link], - (err, row) => { - if (err) reject(err); - else resolve(row); - } - ); - }); -} - -function getAllCoursesFromDB() { - return new Promise((resolve, reject) => { - db.all( - 'SELECT * FROM courses ORDER BY name ASC', - [], - (err, rows) => { - if (err) reject(err); - else resolve(rows); - } - ); - }); -} - -function saveLayoutToDB(courseId, layoutData) { - return new Promise((resolve, reject) => { - db.run( - `INSERT OR IGNORE INTO layouts (course_id, name, par) - VALUES (?, ?, ?)`, - [courseId, layoutData.name, layoutData.par], - function(err) { - if (err) reject(err); - else resolve(this.lastID); - } - ); - }); -} - -function getLayoutsForCourse(courseId) { - return new Promise((resolve, reject) => { - db.all( - 'SELECT * FROM layouts WHERE course_id = ? ORDER BY last_played DESC, name ASC', - [courseId], - (err, rows) => { - if (err) reject(err); - else resolve(rows); - } - ); - }); -} - -function updateLayoutRating(courseId, layoutName, par, meanRating, ratingCount, lastPlayed = null) { - return new Promise((resolve, reject) => { - db.run( - `UPDATE layouts - SET mean_rating = ?, rating_count = ?, last_calculated = datetime('now'), last_played = ? - WHERE course_id = ? AND name = ? AND par = ?`, - [meanRating, ratingCount, lastPlayed, courseId, layoutName, par], - function(err) { - if (err) reject(err); - else resolve(this.changes); - } - ); - }); -} - -// Course scraping functions -async function scrapeCourseDirectory(browser) { - console.log('=== Scraping Swedish courses from PDGA course directory ==='); - const page = await browser.newPage(); - const allCourses = []; - let pageNumber = 0; - let hasMorePages = true; - - try { - while (hasMorePages) { - const url = `https://www.pdga.com/course-directory/advanced?title=&field_course_location_country=SE&field_course_location_locality=&field_course_location_administrative_area=All&field_course_location_postal_code=&field_course_type_value=All&rating_value=All&field_course_holes_value=18-100&field_course_total_length_value=All&field_course_target_type_value=All&field_course_tee_type_value=All&field_location_type_value=All&field_course_camping_value=All&field_course_facilities_value=All&field_course_fees_value=All&field_course_handicap_value=All&field_course_private_value=All&field_course_signage_value=All&field_cart_friendly_value=All&page=${pageNumber}`; - - console.log(`Scraping page ${pageNumber}...`); - await page.goto(url, { waitUntil: 'networkidle2', timeout: 45000 }); - await page.waitForTimeout(1000); - - // Extract course data - const courses = await page.evaluate(() => { - const courseData = []; - const rows = document.querySelectorAll('table tbody tr'); - - rows.forEach(row => { - const titleCell = row.querySelector('td.views-field-title'); - const locationCell = row.querySelector('td.views-field-field-course-location'); - - if (titleCell) { - const link = titleCell.querySelector('a'); - if (link) { - courseData.push({ - name: link.innerText.trim(), - link: 'https://www.pdga.com' + link.getAttribute('href'), - city: locationCell ? locationCell.innerText.trim() : 'Unknown' - }); - } - } - }); - - return courseData; - }); - - if (courses.length === 0) { - console.log(`No courses found on page ${pageNumber}, stopping pagination`); - hasMorePages = false; - } else { - console.log(`Found ${courses.length} courses on page ${pageNumber}`); - allCourses.push(...courses); - - // Save courses to database - for (const course of courses) { - try { - await saveCourseToDB(course); - console.log(`✓ Saved course: ${course.name} (${course.city})`); - } catch (err) { - console.error(`Error saving course ${course.name}:`, err.message); - } - } - - pageNumber++; - - // Delay between pages to be respectful - if (hasMorePages) { - console.log('Waiting 2s before next page...'); - await new Promise(resolve => setTimeout(resolve, 2000)); - } - } - } - - console.log(`✓ Total courses scraped: ${allCourses.length} across ${pageNumber} pages`); - - } catch (error) { - console.error('Error scraping course directory:', error.message); - } finally { - await page.close(); - } - - return allCourses; -} - -async function scrapeCourseLayouts(browser, courseLink, courseId) { - console.log(`\n=== Scraping layouts from: ${courseLink} ===`); - const page = await browser.newPage(); - const layouts = []; - - try { - await page.goto(courseLink, { waitUntil: 'networkidle2', timeout: 45000 }); - await page.waitForTimeout(1000); - - // Click on Layouts tab - const layoutsTabClicked = await page.evaluate(() => { - const selectors = [ - 'a.quicktabs-tab-course_node-2', - 'li.quicktabs-tab-course_node-2 a', - 'a[href*="layouts"]', - '.quicktabs-tabs a', - 'ul.quicktabs-tabs a', - '.quicktabs-wrapper a' - ]; - - for (const selector of selectors) { - const tabs = document.querySelectorAll(selector); - for (const tab of tabs) { - const text = tab.innerText?.trim(); - if (text && (text.includes('Layouts') || text.includes('Layout'))) { - tab.click(); - return true; - } - } - } - return false; - }); - - if (layoutsTabClicked) { - console.log('✓ Layouts tab found and clicked'); - await page.waitForTimeout(3000); - } else { - console.warn('âš ī¸ Layouts tab not found - may be on a single-layout course page'); - } - - // Extract layouts from the page - const extractedLayouts = await page.evaluate(() => { - const layoutData = []; - const tournamentsDiv = document.querySelector('div.tournaments'); - - if (!tournamentsDiv) { - console.warn('No div.tournaments found on page'); - return layoutData; - } - - const tournamentCourses = tournamentsDiv.querySelectorAll('details.tournament-course'); - - tournamentCourses.forEach((details) => { - // Get the event results URL from div.results - const resultsDiv = details.querySelector('div.results'); - const resultsLink = resultsDiv ? resultsDiv.querySelector('a') : null; - const eventUrl = resultsLink ? resultsLink.getAttribute('href') : null; - const fullEventUrl = eventUrl ? 'https://www.pdga.com' + eventUrl : null; - - const layoutsDiv = details.querySelector('div.layouts'); - if (!layoutsDiv) { - return; - } - - const layoutDivs = layoutsDiv.querySelectorAll('div.layout'); - - layoutDivs.forEach((layoutDiv) => { - const h4WithClass = layoutDiv.querySelector('h4.title'); - const h4Any = layoutDiv.querySelector('h4'); - - let layoutName = ''; - if (h4WithClass) { - layoutName = (h4WithClass.textContent || h4WithClass.innerText || '').trim(); - } else if (h4Any) { - layoutName = (h4Any.textContent || h4Any.innerText || '').trim(); - } - - const allText = layoutDiv.textContent || layoutDiv.innerText || ''; - - const parPatterns = [ - /Par[:\s]+(\d+)/i, - /Par\s*=\s*(\d+)/i, - /\(Par\s+(\d+)\)/i, - /Total Par:\s*(\d+)/i - ]; - - let par = null; - for (const pattern of parPatterns) { - const match = allText.match(pattern); - if (match) { - par = parseInt(match[1]); - break; - } - } - - // Extract divisions from li.divisions - const divisionsLi = layoutDiv.querySelector('li.divisions'); - let divisions = []; - if (divisionsLi) { - const divisionsText = (divisionsLi.textContent || '').replace('Divisions:', '').trim(); - divisions = divisionsText.split(/[,\s]+/).filter(d => d.length > 0); - } - - if (layoutName && par && !isNaN(par) && par > 0) { - layoutData.push({ - name: layoutName, - par: par, - divisions: divisions, - eventUrl: fullEventUrl - }); - } else if (layoutName) { - // Log skipped layouts for debugging - console.warn(`âš ī¸ Skipped layout "${layoutName}" - Par: ${par}, Text sample: ${allText.substring(0, 200)}`); - } - }); - }); - - return layoutData; - }); - - if (extractedLayouts.length === 0) { - console.warn('âš ī¸ No layouts extracted from page'); - } - - layouts.push(...extractedLayouts); - - // Store all layout data in memory cache - const courseIdInt = typeof courseId === 'string' ? parseInt(courseId) : courseId; - layoutEventCache.set(courseIdInt, layouts); - - console.log(`✓ Successfully parsed ${layouts.length} layouts from course page`); - - // Deduplicate for database: same name + same par = same layout - const uniqueLayouts = []; - const seen = new Set(); - - for (const layout of layouts) { - const key = `${layout.name}|${layout.par}`; - if (!seen.has(key)) { - seen.add(key); - uniqueLayouts.push(layout); - } - } - - if (uniqueLayouts.length < layouts.length) { - console.log(`â„šī¸ Deduplicated to ${uniqueLayouts.length} unique layouts`); - } - - // Save layouts to database - for (const layout of uniqueLayouts) { - try { - await saveLayoutToDB(courseId, layout); - console.log(` ✓ Saved layout: ${layout.name} (Par ${layout.par})`); - } catch (err) { - console.error(` ✗ Error saving layout ${layout.name}:`, err.message); - } - } - - } catch (error) { - console.error('Error scraping course layouts:', error.message); - } finally { - await page.close(); - } - - return layouts; -} - -async function scrapeEventResults(browser, eventUrl, layoutsWithDivisions) { - const page = await browser.newPage(); - const layoutRatings = {}; // key: layout name+par, value: array of ratings - - try { - await page.goto(eventUrl, { waitUntil: 'networkidle2', timeout: 45000 }); - await page.waitForTimeout(1000); - - // Extract event date by searching for date pattern in page text - const eventDateRaw = await page.evaluate(() => { - const allText = document.body.textContent; - const datePattern = /\d{1,2}-[A-Z][a-z]{2}-\d{4}/; - const match = allText.match(datePattern); - return match ? match[0] : null; - }); - - // Parse date from format like "29-Aug-2025" to ISO format "2025-08-29" - let eventDate = null; - if (eventDateRaw) { - try { - const parsedDate = new Date(eventDateRaw); - if (!isNaN(parsedDate.getTime())) { - eventDate = parsedDate.toISOString().split('T')[0]; // Get YYYY-MM-DD format - } - } catch (e) { - // Ignore date parsing errors - } - } - - // Process each layout - for (const layout of layoutsWithDivisions) { - const layoutKey = `${layout.name}|${layout.par}`; - const ratingsForLayout = []; - - // For each division in this layout - for (const division of layout.divisions) { - const divisionData = await page.evaluate((divisionName, targetPar) => { - // Find the details tag that contains h3 with the matching division ID - const divisionH3 = document.querySelector(`h3#${divisionName}`); - if (!divisionH3) { - return { found: false, ratings: [] }; - } - - // Find the parent details tag - const detailsTag = divisionH3.closest('details'); - if (!detailsTag) { - return { found: false, ratings: [] }; - } - - // Find the table.results inside this details tag - const table = detailsTag.querySelector('table.results'); - if (!table) { - return { found: false, ratings: [] }; - } - - // Find all rows with results matching target par - const ratings = []; - const rows = table.querySelectorAll('tbody tr'); - - rows.forEach(row => { - // Get all round scores and their ratings - const roundCells = row.querySelectorAll('td.round'); - - roundCells.forEach(roundCell => { - const scoreText = (roundCell.textContent || '').trim(); - const scoreMatch = scoreText.match(/^(\d+)$/); - - if (scoreMatch) { - const scoreValue = parseInt(scoreMatch[1]); - - // Check if this round score matches target par - if (scoreValue === targetPar) { - // Get the next sibling which should be td.round-rating - const ratingCell = roundCell.nextElementSibling; - - if (ratingCell && ratingCell.classList.contains('round-rating')) { - const ratingText = (ratingCell.textContent || '').trim(); - const rating = parseInt(ratingText); - - if (!isNaN(rating) && rating > 0) { - ratings.push(rating); - } - } - } - } - }); - }); - - return { found: true, ratings: ratings }; - }, division, layout.par); - - if (divisionData.found && divisionData.ratings.length > 0) { - ratingsForLayout.push(...divisionData.ratings); - } - } - - if (ratingsForLayout.length > 0) { - const meanRating = ratingsForLayout.reduce((sum, r) => sum + r, 0) / ratingsForLayout.length; - layoutRatings[layoutKey] = { - name: layout.name, - par: layout.par, - ratings: ratingsForLayout, - count: ratingsForLayout.length, - meanRating: Math.round(meanRating), - eventDate: eventDate - }; - } - } - - } catch (error) { - console.error('Error scraping event results:', error.message); - } finally { - await page.close(); - } - - return layoutRatings; -} - -async function getAllRatingsFromDB(progressCallback = null) { - try { - // Get all players from database instead of text file - const allPlayers = await new Promise((resolve, reject) => { - db.all( - 'SELECT pdga_number, name, current_rating, rating_change FROM players ORDER BY pdga_number', - [], - (err, rows) => { - if (err) reject(err); - else resolve(rows || []); - } - ); - }); - - console.log(`Loading ${allPlayers.length} players from database...`); - - const ratings = []; - const total = allPlayers.length; - - for (let i = 0; i < allPlayers.length; i++) { - const player = allPlayers[i]; - const pdgaNumber = player.pdga_number; - - if (progressCallback) { - progressCallback({ - current: i + 1, - total, - pdgaNumber, - status: 'loading' - }); - } - - try { - // Load full player data from database - const playerData = await getPlayerDataFromDB(pdgaNumber); - - if (playerData) { - ratings.push(playerData); - } - - if (progressCallback) { - progressCallback({ - current: i + 1, - total, - pdgaNumber, - status: 'completed', - name: playerData ? playerData.name : player.name - }); - } - } catch (error) { - console.error(`Failed to load PDGA ${pdgaNumber} from database:`, error.message); - const errorData = { - pdgaNumber: parseInt(pdgaNumber), - name: player.name || 'Database Error', - rating: player.current_rating, - ratingChange: player.rating_change, - predictedRating: null - }; - ratings.push(errorData); - - if (progressCallback) { - progressCallback({ - current: i + 1, - total, - pdgaNumber, - status: 'error', - name: player.name || 'Database Error' - }); - } - } - } - - return ratings.sort((a, b) => (b.rating || 0) - (a.rating || 0)); - } catch (error) { - console.error('Error loading players from database:', error); - return []; - } -} - -app.get('/', (req, res) => { - res.sendFile(path.join(__dirname, 'index.html')); -}); - -app.get('/courses.html', (req, res) => { - res.sendFile(path.join(__dirname, 'courses.html')); -}); - -app.get('/api/ratings', async (req, res) => { - try { - const ratings = await getAllRatingsFromDB(); - res.json(ratings); - } catch (error) { - res.status(500).json({ error: 'Failed to fetch ratings' }); - } -}); - -app.get('/api/ratings/progress', (req, res) => { - res.writeHead(200, { - 'Content-Type': 'text/event-stream', - 'Cache-Control': 'no-cache', - 'Connection': 'keep-alive', - 'Access-Control-Allow-Origin': '*', - 'Access-Control-Allow-Headers': 'Cache-Control' - }); - - const progressCallback = (progress) => { - res.write(`data: ${JSON.stringify(progress)}\n\n`); - }; - - getAllRatingsFromDB(progressCallback).then(ratings => { - res.write(`data: ${JSON.stringify({ status: 'complete', ratings })}\n\n`); - res.end(); - }).catch(error => { - res.write(`data: ${JSON.stringify({ status: 'error', error: error.message })}\n\n`); - res.end(); - }); - - req.on('close', () => { - res.end(); - }); -}); - -// Endpoint to populate database from PDGA numbers file -app.post('/api/populate-database', (req, res) => { - res.writeHead(200, { - 'Content-Type': 'text/event-stream', - 'Cache-Control': 'no-cache', - 'Connection': 'keep-alive', - 'Access-Control-Allow-Origin': '*', - }); - - const progressCallback = (progress) => { - res.write(`data: ${JSON.stringify(progress)}\n\n`); - }; - - console.log('=== Starting database population from database players ==='); - - // Populate database by refreshing all players in database - refreshAllPlayersInDB(progressCallback).then(ratings => { - console.log(`=== Database population complete: ${ratings.length} players refreshed ===`); - res.write(`data: ${JSON.stringify({ status: 'complete', ratings, message: `Successfully refreshed ${ratings.length} players` })}\n\n`); - res.end(); - }).catch(error => { - console.error('Error populating database:', error); - res.write(`data: ${JSON.stringify({ status: 'error', message: error.message })}\n\n`); - res.end(); - }); -}); - -// Simple endpoint to check database status -app.get('/api/database-status', async (req, res) => { - try { - const playerCount = await new Promise((resolve, reject) => { - db.get('SELECT COUNT(*) as count FROM players', [], (err, row) => { - if (err) reject(err); - else resolve(row.count); - }); - }); - - res.json({ - playersInDB: playerCount, - needsPopulation: playerCount === 0 - }); - } catch (error) { - res.status(500).json({ error: 'Failed to check database status' }); - } -}); - -app.get('/api/load-all-players', (req, res) => { - res.writeHead(200, { - 'Content-Type': 'text/event-stream', - 'Cache-Control': 'no-cache', - 'Connection': 'keep-alive', - 'Access-Control-Allow-Origin': '*', - 'Access-Control-Allow-Headers': 'Cache-Control' - }); - - const progressCallback = (progress) => { - res.write(`data: ${JSON.stringify(progress)}\n\n`); - }; - - // Refresh all players currently in database - refreshAllPlayersInDB(progressCallback).then(ratings => { - res.write(`data: ${JSON.stringify({ status: 'complete', ratings })}\n\n`); - res.end(); - }).catch(error => { - res.write(`data: ${JSON.stringify({ status: 'error', error: error.message })}\n\n`); - res.end(); - }); - - req.on('close', () => { - res.end(); - }); -}); - -// Original scraping function for bulk loading -async function getAllRatingsWithScraping(progressCallback = null) { - try { - const pdgaNumbers = fs.readFileSync('pdga-numbers.txt', 'utf-8') - .split('\n') - .map(num => num.trim()) - .filter(num => num); - - const ratings = []; - const total = pdgaNumbers.length; - - for (let i = 0; i < pdgaNumbers.length; i++) { - const pdgaNumber = pdgaNumbers[i]; - console.log(`Scraping PDGA ${pdgaNumber}... (${i + 1}/${total})`); - - if (progressCallback) { - progressCallback({ - current: i + 1, - total, - pdgaNumber, - status: 'loading' - }); - } - - try { - const playerData = await scrapePDGARating(pdgaNumber); - ratings.push(playerData); - - if (progressCallback) { - progressCallback({ - current: i + 1, - total, - pdgaNumber, - status: 'completed', - name: playerData.name - }); - } - - // Delay between PDGA scraping requests to be respectful - await new Promise(resolve => setTimeout(resolve, 2000)); - } catch (error) { - console.error(`Failed to scrape PDGA ${pdgaNumber}:`, error.message); - const errorData = { - pdgaNumber: parseInt(pdgaNumber), - name: 'Error', - rating: 0, - ratingChange: null, - predictedRating: null - }; - ratings.push(errorData); - - if (progressCallback) { - progressCallback({ - current: i + 1, - total, - pdgaNumber, - status: 'error', - name: 'Error' - }); - } - } - } - - return ratings.sort((a, b) => (b.rating || 0) - (a.rating || 0)); - } catch (error) { - console.error('Error reading PDGA numbers:', error); - return []; - } -} - -// Refresh all players currently in database -async function refreshAllPlayersInDB(progressCallback = null) { - try { - // Get all players from database - const allPlayers = await new Promise((resolve, reject) => { - db.all( - 'SELECT pdga_number, name FROM players ORDER BY pdga_number', - [], - (err, rows) => { - if (err) reject(err); - else resolve(rows || []); - } - ); - }); - - console.log(`Refreshing ${allPlayers.length} players from database...`); - - const ratings = []; - const total = allPlayers.length; - - for (let i = 0; i < allPlayers.length; i++) { - const player = allPlayers[i]; - const pdgaNumber = player.pdga_number; - - console.log(`Refreshing PDGA ${pdgaNumber}... (${i + 1}/${total})`); - - if (progressCallback) { - progressCallback({ - current: i + 1, - total, - pdgaNumber, - status: 'loading' - }); - } - - try { - const playerData = await scrapePDGARating(pdgaNumber); - ratings.push(playerData); - - if (progressCallback) { - progressCallback({ - current: i + 1, - total, - pdgaNumber, - status: 'completed', - name: playerData.name - }); - } - - // Delay between PDGA scraping requests to be respectful - await new Promise(resolve => setTimeout(resolve, 2000)); - } catch (error) { - console.error(`Failed to refresh PDGA ${pdgaNumber}:`, error.message); - const errorData = { - pdgaNumber: parseInt(pdgaNumber), - name: player.name || 'Error', - rating: 0, - ratingChange: null, - predictedRating: null - }; - ratings.push(errorData); - - if (progressCallback) { - progressCallback({ - current: i + 1, - total, - pdgaNumber, - status: 'error', - name: player.name || 'Error' - }); - } - } - } - - return ratings.sort((a, b) => (b.rating || 0) - (a.rating || 0)); - } catch (error) { - console.error('Error refreshing all players:', error); - return []; - } -} - -async function fetchRatingHistory(pdgaNumber) { - return new Promise((resolve, reject) => { - const options = { - hostname: 'www.pdga.com', - port: 443, - path: `/player/${pdgaNumber}/history`, - method: 'GET', - headers: { - 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36' - }, - timeout: 30000 - }; - - console.log(`Fetching rating history for PDGA #${pdgaNumber} from: https://www.pdga.com/player/${pdgaNumber}/history`); - - const req = https.request(options, (res) => { - let data = ''; - res.on('data', (chunk) => { - data += chunk; - }); - - res.on('end', () => { - if (res.statusCode === 200) { - console.log(`Rating history request successful for PDGA #${pdgaNumber}`); - resolve(data); - } else { - // Log detailed error information for rating history - console.log(`Rating History Error for PDGA #${pdgaNumber}:`); - console.log(`Status: ${res.statusCode}`); - console.log('Response Headers:', JSON.stringify(res.headers, null, 2)); - - // Check for rate limiting headers - if (res.headers['retry-after']) { - console.log(`Retry-After: ${res.headers['retry-after']} seconds`); - } - if (res.headers['x-ratelimit-limit']) { - console.log(`Rate Limit: ${res.headers['x-ratelimit-limit']}`); - } - if (res.headers['x-ratelimit-remaining']) { - console.log(`Rate Limit Remaining: ${res.headers['x-ratelimit-remaining']}`); - } - - // Log partial response if available - if (data.length > 0) { - console.log(`Partial response received (${data.length} bytes):`, data.substring(0, 200)); - } - - const error = new Error(`HTTP ${res.statusCode} for rating history`); - error.statusCode = res.statusCode; - error.headers = res.headers; - reject(error); - } - }); - }); - - req.on('error', (error) => { - console.log(`Rating history request error for PDGA #${pdgaNumber}:`, { - code: error.code, - message: error.message, - errno: error.errno, - syscall: error.syscall - }); - - if (error.code === 'ECONNRESET') { - console.log('Connection reset on rating history - likely rate limited by PDGA'); - } - if (error.code === 'ECONNREFUSED') { - console.log('Connection refused - PDGA server may be blocking requests'); - } - if (error.code === 'ETIMEDOUT') { - console.log('Request timed out - server may be overloaded'); - } - - reject(error); - }); - - req.on('timeout', () => { - console.log(`Rating history request timeout for PDGA #${pdgaNumber} after 30s`); - req.destroy(); - reject(new Error('Request timeout')); - }); - - req.setTimeout(30000); - req.end(); - }); -} - -function parseRatingHistory(html) { - const history = []; - - // Find all table rows with rating data - const rowMatches = html.match(/<tr[^>]*>[\s\S]*?<\/tr>/gi); - - if (rowMatches) { - for (const row of rowMatches) { - // Skip header rows and empty rows - if (row.includes('<th') || !row.includes('<td')) continue; - - // Extract date, rating, and rounds from table cells - const cellMatches = row.match(/<td[^>]*>(.*?)<\/td>/gi); - - if (cellMatches && cellMatches.length >= 2) { - const dateText = cellMatches[0].replace(/<[^>]*>/g, '').trim(); - const ratingText = cellMatches[1].replace(/<[^>]*>/g, '').trim(); - - // Parse date (DD-Mon-YYYY format) - const dateMatch = dateText.match(/(\d{1,2})-([A-Za-z]{3})-(\d{4})/); - if (dateMatch && !isNaN(parseInt(ratingText))) { - const [, day, month, year] = dateMatch; - const monthMap = { - 'Jan': 0, 'Feb': 1, 'Mar': 2, 'Apr': 3, 'May': 4, 'Jun': 5, - 'Jul': 6, 'Aug': 7, 'Sep': 8, 'Oct': 9, 'Nov': 10, 'Dec': 11 - }; - - const date = new Date(parseInt(year), monthMap[month], parseInt(day)); - - history.push({ - date: date.toISOString().split('T')[0], // YYYY-MM-DD format - rating: parseInt(ratingText), - displayDate: dateText - }); - } - } - } - } - - // Sort by date (oldest first for chart display) - return history.sort((a, b) => new Date(a.date) - new Date(b.date)); -} - -app.get('/api/rating-history/:pdgaNumber', async (req, res) => { - try { - const { pdgaNumber } = req.params; - - // Check database first - const cachedHistory = await getRatingHistoryFromDB(pdgaNumber); - if (cachedHistory && cachedHistory.length > 0) { - console.log(`Using cached rating history from DB for PDGA ${pdgaNumber}`); - const formattedHistory = cachedHistory.map(row => ({ - date: row.date, - rating: row.rating, - displayDate: new Date(row.date).toLocaleDateString('en-US', { - day: '2-digit', - month: 'short', - year: 'numeric' - }) - })); - - res.json({ - pdgaNumber: parseInt(pdgaNumber), - history: formattedHistory - }); - return; - } - - console.log(`Fetching rating history for PDGA ${pdgaNumber}...`); - const html = await fetchRatingHistory(pdgaNumber); - const history = parseRatingHistory(html); - - // Save to database - try { - await saveRatingHistoryToDB(pdgaNumber, history); - console.log(`Saved rating history for PDGA ${pdgaNumber} to database`); - } catch (dbErr) { - console.error(`Failed to save rating history to database:`, dbErr.message); - } - - res.json({ - pdgaNumber: parseInt(pdgaNumber), - history - }); - } catch (error) { - console.error('Error fetching rating history:', error.message); - res.status(500).json({ error: 'Failed to fetch rating history' }); - } -}); - -app.post('/api/clear-cache', (req, res) => { - try { - // Clear database cache by updating timestamps to force refresh - db.run('UPDATE players SET last_updated = datetime("now", "-25 hours"), last_round_update = NULL', (err) => { - if (err) { - console.error('Error clearing database cache:', err); - res.status(500).json({ error: 'Failed to clear database cache' }); - return; - } - - // Also clear legacy in-memory cache - const cacheSize = cache.size; - cache.clear(); - - console.log('Database cache cleared - all players will be refreshed on next request'); - res.json({ - success: true, - message: `Cache cleared - database and ${cacheSize} memory entries reset` - }); - }); - } catch (error) { - console.error('Error clearing cache:', error); - res.status(500).json({ error: 'Failed to clear cache' }); - } -}); - -// Individual player refresh endpoints -// Search for a player (check if exists in DB and fetch from PDGA) -app.get('/api/search-player/:pdgaNumber', async (req, res) => { - try { - const { pdgaNumber } = req.params; - console.log(`Searching for player with PDGA number ${pdgaNumber}`); - - // Check if player already exists in database - const existingPlayer = await getPlayerFromDB(pdgaNumber); - if (existingPlayer) { - return res.json({ - alreadyExists: true, - player: { - pdgaNumber: existingPlayer.pdga_number, - name: existingPlayer.name, - rating: existingPlayer.current_rating, - ratingChange: existingPlayer.rating_change - } - }); - } - - // Fetch player data from PDGA - const html = await fetchPlayerDataHTTP(pdgaNumber); - const playerData = parsePlayerData(html, pdgaNumber); - - // Check if player was found (name shouldn't be 'Unknown') - if (playerData.name === 'Unknown' || !playerData.name) { - return res.status(404).json({ error: 'Player not found' }); - } - - res.json({ - alreadyExists: false, - player: playerData - }); - } catch (error) { - console.error('Error searching for player:', error.message); - res.status(500).json({ error: 'Failed to search for player' }); - } -}); - -// Add a new player to the database -app.post('/api/add-player', async (req, res) => { - try { - const { pdgaNumber } = req.body; - - if (!pdgaNumber) { - return res.status(400).json({ error: 'PDGA number is required' }); - } - - console.log(`Adding player with PDGA number ${pdgaNumber}`); - - // Check if player already exists - const existingPlayer = await getPlayerFromDB(pdgaNumber); - if (existingPlayer) { - return res.status(409).json({ - error: 'Player already exists', - player: { - pdgaNumber: existingPlayer.pdga_number, - name: existingPlayer.name, - rating: existingPlayer.current_rating - } - }); - } - - // Fetch player data from PDGA - const html = await fetchPlayerDataHTTP(pdgaNumber); - const playerData = parsePlayerData(html, pdgaNumber); - - // Verify player was found - if (playerData.name === 'Unknown' || !playerData.name) { - return res.status(404).json({ error: 'Player not found' }); - } - - // Save to database - await savePlayerToDB(playerData); - - console.log(`Successfully added player: ${playerData.name} (#${pdgaNumber})`); - - res.json({ - success: true, - player: playerData - }); - } catch (error) { - console.error('Error adding player:', error.message); - res.status(500).json({ error: 'Failed to add player' }); - } -}); - -app.post('/api/refresh-player/:pdgaNumber', async (req, res) => { - try { - const { pdgaNumber } = req.params; - console.log(`Manually refreshing player data for PDGA ${pdgaNumber}`); - - // Force refresh by bypassing cache - const html = await fetchPlayerDataHTTP(pdgaNumber); - const playerData = parsePlayerData(html, pdgaNumber); - - // Save to database - await savePlayerToDB(playerData); - - res.json({ - success: true, - player: playerData - }); - } catch (error) { - console.error('Error refreshing player data:', error.message); - res.status(500).json({ error: 'Failed to refresh player data' }); - } -}); - -app.post('/api/refresh-rating-history/:pdgaNumber', async (req, res) => { - try { - const { pdgaNumber } = req.params; - console.log(`=== Manually refreshing rating history for PDGA ${pdgaNumber} ===`); - - const startTime = Date.now(); - const html = await fetchRatingHistory(pdgaNumber); - const fetchTime = Date.now() - startTime; - - console.log(`HTML fetch completed in ${fetchTime}ms, received ${html.length} bytes`); - - const parseStartTime = Date.now(); - const history = parseRatingHistory(html); - const parseTime = Date.now() - parseStartTime; - - console.log(`Parsing completed in ${parseTime}ms, found ${history.length} history entries`); - - if (history.length > 0) { - console.log('Sample history entries:', history.slice(0, 3)); - } else { - console.log('No history entries found. HTML sample:', html.substring(0, 500)); - } - - const dbStartTime = Date.now(); - await saveRatingHistoryToDB(pdgaNumber, history); - const dbTime = Date.now() - dbStartTime; - - console.log(`Database save completed in ${dbTime}ms`); - - const formattedHistory = history.map(entry => ({ - date: entry.date, - rating: entry.rating, - displayDate: entry.displayDate - })); - - console.log(`=== Rating history refresh completed for PDGA ${pdgaNumber} ===`); - - res.json({ - success: true, - history: formattedHistory - }); - } catch (error) { - console.error(`=== Error refreshing rating history for PDGA ${pdgaNumber} ===`); - console.error('Error type:', error.constructor.name); - console.error('Error message:', error.message); - console.error('Error code:', error.code); - console.error('Status code:', error.statusCode); - if (error.stack) { - console.error('Stack trace:', error.stack); - } - - res.status(500).json({ - error: 'Failed to refresh rating history', - details: error.message, - code: error.code - }); - } -}); - -app.post('/api/refresh-round-history/:pdgaNumber', async (req, res) => { - // Increase timeout for tournament scraping - req.setTimeout(600000); // 10 minutes - res.setTimeout(600000); - - let browser = null; - const { pdgaNumber } = req.params; - try { - - // Check when we last updated rounds for this player - const lastRoundUpdate = await getLastRoundUpdateDate(pdgaNumber); - const sinceDate = lastRoundUpdate ? new Date(lastRoundUpdate) : null; - - // Rate limit: Only allow refresh once every 24 hours - if (sinceDate) { - const hoursSinceUpdate = (Date.now() - sinceDate.getTime()) / (1000 * 60 * 60); - if (hoursSinceUpdate < 24) { - const hoursRemaining = Math.ceil(24 - hoursSinceUpdate); - return res.status(429).json({ - error: 'Rate limit exceeded', - message: `Prediction can only be refreshed once every 24 hours. Please try again in ${hoursRemaining} hour(s).`, - lastUpdate: sinceDate.toISOString(), - hoursRemaining: hoursRemaining - }); - } - } - - const isIncremental = !!sinceDate; - - console.log(`${isIncremental ? 'Incrementally updating' : 'Fully refreshing'} round history for PDGA ${pdgaNumber}${sinceDate ? ` since ${sinceDate.toDateString()}` : ''}`); - - try { - browser = await puppeteer.launch({ - headless: "new", - args: [ - '--no-sandbox', - '--disable-setuid-sandbox', - '--disable-dev-shm-usage', - '--disable-accelerated-2d-canvas', - '--no-first-run', - '--no-zygote', - '--disable-gpu' - ] - }); - } catch (launchError) { - // Fallback with minimal options - browser = await puppeteer.launch({ - headless: true, - args: ['--no-sandbox', '--disable-dev-shm-usage'] - }); - } - - // Step 1: Get official rating history - let officialHistory; - try { - officialHistory = await getOfficialRatingHistory(browser, pdgaNumber); - if (officialHistory.length > 0) { - await saveRatingHistoryToDB(pdgaNumber, officialHistory); - } - } catch (historyError) { - console.error('Failed to fetch official history:', historyError.message); - officialHistory = []; - } - - // Step 2: Get optimized round collection (details + new tournaments only) - let allRounds = []; - try { - console.log(`Using optimized approach: /details + new tournaments only for PDGA ${pdgaNumber}...`); - allRounds = await getOptimizedPlayerRounds(browser, pdgaNumber); - - if (allRounds.length > 0) { - // Convert to the format expected by saveRoundHistoryToDB - const roundsForDB = allRounds.map(round => ({ - rating: round.rating, - date: round.date, - competition: round.competition - })); - - // Save all rounds (replacing existing data with the complete optimized set) - await saveRoundHistoryToDB(pdgaNumber, roundsForDB, false); // false = replace all - console.log(`✓ Saved ${allRounds.length} rounds using optimized approach`); - - // Update timestamp to mark when we last did a full collection - await updateLastRoundUpdateDate(pdgaNumber); - } else { - console.log('ℹ No rounds found'); - } - } catch (detailsError) { - console.error('Failed to fetch rounds using optimized approach:', detailsError.message); - allRounds = []; - } - - await browser.close(); - browser = null; - - // Calculate prediction from optimized round collection - const dbRounds = await getRoundHistoryFromDB(pdgaNumber); - const roundsForPrediction = dbRounds.map(round => ({ - rating: round.rating, - date: new Date(round.date), - competition: round.competition_name - })); - - const result = calculatePredictedRating(roundsForPrediction); - - // Save the predicted rating to database for persistence - await savePredictedRatingToDB(pdgaNumber, result.rating, result.stdDev); - - // Count official vs new rounds - const officialCount = allRounds.filter(r => r.source === 'official').length; - const newCount = allRounds.filter(r => r.source === 'new').length; - - res.json({ - success: true, - predictedRating: result.rating, - stdDev: result.stdDev, - debugLog: result.debugLog, - totalRounds: roundsForPrediction.length, - officialRounds: officialCount, - newRounds: newCount, - approach: 'optimized', - message: `Used /details (${officialCount} rounds) + new tournaments (${newCount} rounds)` - }); - } catch (error) { - console.error(`=== Error refreshing round history for PDGA ${pdgaNumber} ===`); - console.error('Error type:', error.constructor.name); - console.error('Error message:', error.message); - console.error('Error code:', error.code); - console.error('Error name:', error.name); - - // Log all error properties for debugging - console.error('Full error object:', JSON.stringify(error, Object.getOwnPropertyNames(error), 2)); - - // Check if this is a puppeteer-specific error - if (error.name) { - console.error(`Specific error name: ${error.name}`); - } - - // Log timing information - const currentTime = new Date().toISOString(); - console.error(`Error occurred at: ${currentTime}`); - - // Check if we have browser information - if (browser) { - console.error('Browser was active when error occurred'); - } else { - console.error('No active browser session'); - } - - if (error.message.includes('socket hang up')) { - console.error('🔌 Socket hang up - likely rate limited by PDGA'); - console.error('💡 Try waiting a few minutes before attempting again'); - console.error('🔍 This usually happens when PDGA blocks too many rapid requests'); - } - - if (error.message.includes('Navigation timeout')) { - console.error('⏰ Navigation timeout - PDGA pages loading slowly'); - console.error('💡 Try reducing the number of tournaments scraped'); - } - - if (error.message.includes('net::ERR_CONNECTION_RESET')) { - console.error('đŸšĢ Connection reset by PDGA server'); - console.error('💡 PDGA may be blocking or rate limiting requests'); - } - - if (error.stack) { - console.error('Full stack trace:'); - console.error(error.stack); - } else { - console.error('No stack trace available'); - } - - if (browser) { - try { - await browser.close(); - console.log('Browser closed successfully'); - } catch (closeError) { - console.error('Error closing browser:', closeError.message); - } - } - - res.status(500).json({ - error: 'Failed to refresh round history', - details: error.message, - errorType: error.constructor.name, - errorName: error.name, - timestamp: new Date().toISOString(), - suggestion: error.message.includes('socket hang up') ? - 'Rate limited by PDGA - try again in a few minutes. This happens when too many requests are made too quickly.' : - error.message.includes('timeout') ? - 'PDGA pages are loading slowly - try again later when PDGA servers are less busy.' : - 'Tournament scraping failed - check server logs for detailed error information' - }); - } -}); - -// Course API endpoints -app.get('/api/courses', async (req, res) => { - try { - const courses = await getAllCoursesFromDB(); - res.json(courses); - } catch (error) { - console.error('Error fetching courses:', error.message); - res.status(500).json({ error: 'Failed to fetch courses' }); - } -}); - -app.get('/api/layouts/:courseId', async (req, res) => { - try { - const { courseId } = req.params; - const layouts = await getLayoutsForCourse(courseId); - res.json(layouts); - } catch (error) { - console.error('Error fetching layouts:', error.message); - res.status(500).json({ error: 'Failed to fetch layouts' }); - } -}); - -app.post('/api/scrape-courses', async (req, res) => { - // Increase timeout for course directory scraping - req.setTimeout(600000); // 10 minutes - res.setTimeout(600000); - - let browser = null; - try { - console.log('Starting course directory scraping...'); - - browser = await puppeteer.launch({ - headless: "new", - args: [ - '--no-sandbox', - '--disable-setuid-sandbox', - '--disable-dev-shm-usage', - '--disable-accelerated-2d-canvas', - '--no-first-run', - '--no-zygote', - '--disable-gpu' - ] - }); - - const courses = await scrapeCourseDirectory(browser); - - await browser.close(); - browser = null; - - res.json({ - success: true, - coursesFound: courses.length, - message: `Successfully scraped ${courses.length} courses` - }); - } catch (error) { - console.error('Error scraping courses:', error.message); - if (browser) { - try { - await browser.close(); - } catch (closeError) { - console.error('Error closing browser:', closeError.message); - } - } - res.status(500).json({ error: 'Failed to scrape courses' }); - } -}); - -app.post('/api/scrape-layouts/:courseId', async (req, res) => { - // Increase timeout for this endpoint since scraping can take several minutes - req.setTimeout(600000); // 10 minutes - res.setTimeout(600000); - - const { courseId } = req.params; - const lockKey = `layout-${courseId}`; - - // Check if there's already a scrape in progress for this course - if (activeScrapes.has(lockKey)) { - console.log(`âš ī¸ Scrape already in progress for course ${courseId}`); - return res.status(409).json({ - error: 'Scrape already in progress for this course', - message: 'Please wait for the current scrape to complete' - }); - } - - let browser = null; - - // Create a promise for this scrape operation - const scrapePromise = (async () => { - try { - // Get course from database - const course = await new Promise((resolve, reject) => { - db.get('SELECT * FROM courses WHERE id = ?', [courseId], (err, row) => { - if (err) reject(err); - else resolve(row); - }); - }); - - if (!course) { - throw new Error('Course not found'); - } - - console.log(`Starting layout scraping for course: ${course.name}`); - - browser = await puppeteer.launch({ - headless: "new", - args: [ - '--no-sandbox', - '--disable-setuid-sandbox', - '--disable-dev-shm-usage', - '--disable-accelerated-2d-canvas', - '--no-first-run', - '--no-zygote', - '--disable-gpu' - ] - }); - - const layouts = await scrapeCourseLayouts(browser, course.link, courseId); - - console.log(`\n=== Starting event results scraping for ${course.name} ===`); - - // Get layout data from cache - const courseIdInt = parseInt(courseId); - const layoutData = layoutEventCache.get(courseIdInt); - - if (!layoutData || layoutData.length === 0) { - console.log('No event data found in cache, skipping event results scraping'); - await browser.close(); - browser = null; - - return res.json({ - success: true, - layoutsFound: layouts.length, - message: `Successfully scraped ${layouts.length} layouts for ${course.name} (no events found)` - }); - } - - // Group layouts by event URL - const eventGroups = {}; - layoutData.forEach(layout => { - if (layout.eventUrl) { - if (!eventGroups[layout.eventUrl]) { - eventGroups[layout.eventUrl] = []; - } - eventGroups[layout.eventUrl].push(layout); - } - }); - - - // Process all events and accumulate ratings by unique layout - const allLayoutRatings = {}; // key: "layoutName|par", value: array of all ratings - - let eventCount = 0; - for (const eventUrl in eventGroups) { - eventCount++; - const eventLayouts = eventGroups[eventUrl]; - - const results = await scrapeEventResults(browser, eventUrl, eventLayouts); - - // Accumulate ratings for each layout - for (const layoutKey in results) { - const layoutDataResult = results[layoutKey]; - - if (!allLayoutRatings[layoutKey]) { - allLayoutRatings[layoutKey] = { - name: layoutDataResult.name, - par: layoutDataResult.par, - allRatings: [], - latestDate: layoutDataResult.eventDate - }; - } else { - // Update to latest date if this event is more recent - if (layoutDataResult.eventDate && (!allLayoutRatings[layoutKey].latestDate || - new Date(layoutDataResult.eventDate) > new Date(allLayoutRatings[layoutKey].latestDate))) { - allLayoutRatings[layoutKey].latestDate = layoutDataResult.eventDate; - } - } - - // Add all ratings from this event to the accumulated ratings - allLayoutRatings[layoutKey].allRatings.push(...layoutDataResult.ratings); - } - - // Small delay between events - await new Promise(resolve => setTimeout(resolve, 2000)); - } - - console.log(`\n=== Calculating final ratings for all layouts ===`); - - // Calculate mean ratings and save to database - let savedCount = 0; - for (const layoutKey in allLayoutRatings) { - const layoutDataResult = allLayoutRatings[layoutKey]; - - if (layoutDataResult.allRatings.length > 0) { - const meanRating = Math.round( - layoutDataResult.allRatings.reduce((sum, r) => sum + r, 0) / layoutDataResult.allRatings.length - ); - - console.log(`Layout: ${layoutDataResult.name} (Par ${layoutDataResult.par})`); - console.log(` Total ratings collected: ${layoutDataResult.allRatings.length}`); - console.log(` Mean rating: ${meanRating}`); - console.log(` Last played: ${layoutDataResult.latestDate || 'Unknown'}`); - - try { - const changes = await updateLayoutRating( - courseIdInt, - layoutDataResult.name, - layoutDataResult.par, - meanRating, - layoutDataResult.allRatings.length, - layoutDataResult.latestDate - ); - if (changes > 0) { - console.log(` ✓ Updated in database`); - savedCount++; - } - } catch (err) { - console.error(` Error updating layout ${layoutDataResult.name}:`, err.message); - } - } - } - - await browser.close(); - browser = null; - - return { - success: true, - layoutsFound: layouts.length, - eventsProcessed: Object.keys(eventGroups).length, - layoutsWithRatings: savedCount, - message: `Successfully scraped ${layouts.length} layouts and processed ${Object.keys(eventGroups).length} events for ${course.name}` - }; - } catch (error) { - console.error('Error scraping layouts:', error.message); - if (browser) { - try { - await browser.close(); - } catch (closeError) { - console.error('Error closing browser:', closeError.message); - } - } - throw error; - } - })(); - - // Store the promise in activeScrapes - activeScrapes.set(lockKey, scrapePromise); - - try { - // Wait for the scrape to complete - const result = await scrapePromise; - res.json(result); - } catch (error) { - res.status(500).json({ - error: 'Failed to scrape layouts', - message: error.message - }); - } finally { - // Always remove from active scrapes when done - activeScrapes.delete(lockKey); - console.log(`✓ Released lock for course ${courseId}`); - } -}); - -app.post('/api/scrape-event-results/:courseId', async (req, res) => { - // Increase timeout for scraping operations - req.setTimeout(600000); // 10 minutes - res.setTimeout(600000); - - let browser = null; - try { - const { courseId } = req.params; - const courseIdInt = parseInt(courseId); - - // Get layout data from cache - const layoutData = layoutEventCache.get(courseIdInt); - - if (!layoutData || layoutData.length === 0) { - return res.status(404).json({ - error: 'No layout data found in cache. Please scrape layouts first.' - }); - } - - browser = await puppeteer.launch({ - headless: "new", - args: [ - '--no-sandbox', - '--disable-setuid-sandbox', - '--disable-dev-shm-usage', - '--disable-accelerated-2d-canvas', - '--no-first-run', - '--no-zygote', - '--disable-gpu' - ] - }); - - // Group layouts by event URL - const eventGroups = {}; - layoutData.forEach(layout => { - if (layout.eventUrl) { - if (!eventGroups[layout.eventUrl]) { - eventGroups[layout.eventUrl] = []; - } - eventGroups[layout.eventUrl].push(layout); - } - }); - - - // Process all events and accumulate ratings by unique layout - const allLayoutRatings = {}; // key: "layoutName|par", value: array of all ratings - - let eventCount = 0; - for (const eventUrl in eventGroups) { - eventCount++; - const eventLayouts = eventGroups[eventUrl]; - - const results = await scrapeEventResults(browser, eventUrl, eventLayouts); - - // Accumulate ratings for each layout - for (const layoutKey in results) { - const layoutData = results[layoutKey]; - - if (!allLayoutRatings[layoutKey]) { - allLayoutRatings[layoutKey] = { - name: layoutData.name, - par: layoutData.par, - allRatings: [], - latestDate: layoutData.eventDate - }; - } else { - // Update to latest date if this event is more recent - if (layoutData.eventDate && (!allLayoutRatings[layoutKey].latestDate || - new Date(layoutData.eventDate) > new Date(allLayoutRatings[layoutKey].latestDate))) { - allLayoutRatings[layoutKey].latestDate = layoutData.eventDate; - } - } - - // Add all ratings from this event to the accumulated ratings - allLayoutRatings[layoutKey].allRatings.push(...layoutData.ratings); - } - - // Small delay between events - await new Promise(resolve => setTimeout(resolve, 2000)); - } - - await browser.close(); - browser = null; - - console.log(`\n=== Calculating final ratings for all layouts ===`); - - // Calculate mean ratings and save to database - let savedCount = 0; - for (const layoutKey in allLayoutRatings) { - const layoutData = allLayoutRatings[layoutKey]; - - if (layoutData.allRatings.length > 0) { - const meanRating = Math.round( - layoutData.allRatings.reduce((sum, r) => sum + r, 0) / layoutData.allRatings.length - ); - - console.log(`Layout: ${layoutData.name} (Par ${layoutData.par})`); - console.log(` Total ratings collected: ${layoutData.allRatings.length}`); - console.log(` Mean rating: ${meanRating}`); - console.log(` Last played: ${layoutData.latestDate || 'Unknown'}`); - - try { - const changes = await updateLayoutRating( - courseIdInt, - layoutData.name, - layoutData.par, - meanRating, - layoutData.allRatings.length, - layoutData.latestDate - ); - if (changes > 0) { - console.log(` ✓ Updated in database`); - savedCount++; - } - } catch (err) { - console.error(` Error updating layout ${layoutData.name}:`, err.message); - } - } - } - - res.json({ - success: true, - eventsProcessed: Object.keys(eventGroups).length, - uniqueLayouts: Object.keys(allLayoutRatings).length, - layoutsSaved: savedCount, - message: `Processed ${Object.keys(eventGroups).length} events, updated ${savedCount} layouts` - }); - } catch (error) { - console.error('Error scraping event results:', error.message); - if (browser) { - try { - await browser.close(); - } catch (closeError) { - console.error('Error closing browser:', closeError.message); - } - } - res.status(500).json({ error: 'Failed to scrape event results' }); - } -}); - -app.post('/api/predicted-rating/:pdgaNumber', async (req, res) => { - let browser = null; - try { - const { pdgaNumber } = req.params; - - // Always check database first (source of truth) - const cachedPrediction = await getPredictedRatingFromDB(pdgaNumber); - if (cachedPrediction > 0) { - console.log(`Using DB round history for PDGA ${pdgaNumber} prediction (source of truth)`); - res.json({ - pdgaNumber: parseInt(pdgaNumber), - predictedRating: cachedPrediction - }); - return; - } - - browser = await puppeteer.launch({ - headless: "new", - args: [ - '--no-sandbox', - '--disable-setuid-sandbox', - '--disable-dev-shm-usage', - '--disable-accelerated-2d-canvas', - '--no-first-run', - '--no-zygote', - '--disable-gpu' - ] - }); - - console.log(`Calculating predicted rating for PDGA ${pdgaNumber}...`); - - // Check for incremental update - const lastRoundUpdate = await getLastRoundUpdateDate(pdgaNumber); - const sinceDate = lastRoundUpdate ? new Date(lastRoundUpdate) : null; - const isIncremental = !!sinceDate; - - // Get round ratings and calculate prediction - const newRoundRatings = await getPlayerCompetitionRatings(browser, pdgaNumber, sinceDate); - - await browser.close(); - browser = null; - - // Save new round history to database - await saveRoundHistoryToDB(pdgaNumber, newRoundRatings, isIncremental); - - // Get all rounds for prediction calculation - const allRounds = await getRoundHistoryFromDB(pdgaNumber); - const roundRatings = allRounds.map(round => ({ - rating: round.rating, - date: new Date(round.date), - competition: round.competition_name - })); - - const result = calculatePredictedRating(roundRatings); - - res.json({ - pdgaNumber: parseInt(pdgaNumber), - predictedRating: result.rating, - stdDev: result.stdDev, - debugLog: result.debugLog - }); - } catch (error) { - console.error('Error calculating predicted rating:', error.message || error); - if (browser) { - try { - await browser.close(); - } catch (closeError) { - console.error('Error closing browser:', closeError.message); - } - } - res.status(500).json({ error: 'Failed to calculate predicted rating' }); - } -}); - -// Test function to probe PDGA rate limiting -async function testPDGARateLimit() { - console.log('Testing PDGA rate limiting behavior...'); - - const testPdgaNumbers = ['60954', '178737', '251092']; // First few from our list - const requestTimes = []; - - for (let i = 0; i < testPdgaNumbers.length; i++) { - const startTime = Date.now(); - try { - console.log(`Test request ${i + 1}: PDGA #${testPdgaNumbers[i]}`); - await fetchPlayerDataHTTP(testPdgaNumbers[i]); - const endTime = Date.now(); - requestTimes.push(endTime - startTime); - console.log(`Request ${i + 1} completed in ${endTime - startTime}ms`); - } catch (error) { - const endTime = Date.now(); - requestTimes.push(endTime - startTime); - console.log(`Request ${i + 1} failed after ${endTime - startTime}ms:`, error.message); - } - - // Small delay between test requests - if (i < testPdgaNumbers.length - 1) { - await new Promise(resolve => setTimeout(resolve, 500)); - } - } - - console.log('Rate limit test completed. Request times:', requestTimes); -} - -// Uncomment the line below to run rate limit test on startup -// testPDGARateLimit(); - -// Initialize database and start server initializeDatabase().then(async () => { - // Check and populate missing players from PDGA numbers file await checkAndPopulateDatabase(); - + app.listen(PORT, () => { console.log(`PDGA Ratings app running on http://localhost:${PORT}`); }); }).catch(err => { console.error('Failed to initialize database:', err); process.exit(1); -}); \ No newline at end of file +}); diff --git a/src/db.js b/src/db.js new file mode 100644 index 0000000..7be1f64 --- /dev/null +++ b/src/db.js @@ -0,0 +1,183 @@ +const sqlite3 = require('sqlite3').verbose(); + +const dbPath = process.env.DB_PATH || './ratings.db'; +const db = new sqlite3.Database(dbPath); + +function initializeDatabase() { + return new Promise((resolve, reject) => { + db.serialize(() => { + db.run(` + CREATE TABLE IF NOT EXISTS players ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + pdga_number INTEGER UNIQUE NOT NULL, + name TEXT NOT NULL, + current_rating INTEGER, + rating_change INTEGER, + last_updated DATETIME DEFAULT CURRENT_TIMESTAMP, + last_round_update DATETIME DEFAULT NULL + ) + `); + + db.get("PRAGMA table_info(players)", (err, info) => { + if (err) { + console.error('Error checking table schema:', err); + return; + } + + db.all("PRAGMA table_info(players)", (err, columns) => { + if (err) { + console.error('Error getting table info:', err); + return; + } + + const hasLastRoundUpdate = columns.some(col => col.name === 'last_round_update'); + const hasPredictedRating = columns.some(col => col.name === 'predicted_rating'); + const hasStdDev = columns.some(col => col.name === 'std_dev'); + + if (!hasLastRoundUpdate) { + console.log('Adding last_round_update column to players table...'); + db.run(`ALTER TABLE players ADD COLUMN last_round_update DATETIME DEFAULT NULL`, (err) => { + if (err) console.error('Error adding last_round_update column:', err.message); + else console.log('Successfully added last_round_update column'); + }); + } + + if (!hasPredictedRating) { + console.log('Adding predicted_rating column to players table...'); + db.run(`ALTER TABLE players ADD COLUMN predicted_rating INTEGER DEFAULT NULL`, (err) => { + if (err) console.error('Error adding predicted_rating column:', err.message); + else console.log('Successfully added predicted_rating column'); + }); + } + + if (!hasStdDev) { + console.log('Adding std_dev column to players table...'); + db.run(`ALTER TABLE players ADD COLUMN std_dev INTEGER DEFAULT NULL`, (err) => { + if (err) console.error('Error adding std_dev column:', err.message); + else console.log('Successfully added std_dev column'); + }); + } + }); + }); + + db.run(` + CREATE TABLE IF NOT EXISTS round_history ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + player_id INTEGER NOT NULL, + date DATE NOT NULL, + competition_name TEXT NOT NULL, + rating INTEGER NOT NULL, + FOREIGN KEY (player_id) REFERENCES players (id) + ) + `); + + db.run(` + CREATE TABLE IF NOT EXISTS rating_history ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + player_id INTEGER NOT NULL, + date DATE NOT NULL, + rating INTEGER NOT NULL, + FOREIGN KEY (player_id) REFERENCES players (id) + ) + `); + + db.run(` + CREATE TABLE IF NOT EXISTS courses ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL, + link TEXT UNIQUE NOT NULL, + city TEXT, + last_updated DATETIME DEFAULT CURRENT_TIMESTAMP + ) + `); + + db.run(` + CREATE TABLE IF NOT EXISTS layouts ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + course_id INTEGER NOT NULL, + name TEXT NOT NULL, + par INTEGER NOT NULL, + mean_rating INTEGER, + rating_count INTEGER DEFAULT 0, + last_calculated DATETIME, + FOREIGN KEY (course_id) REFERENCES courses (id), + UNIQUE(course_id, name, par) + ) + `, (err) => { + if (err) { + reject(err); + } else { + db.run(`ALTER TABLE layouts ADD COLUMN mean_rating INTEGER`, () => { + db.run(`ALTER TABLE layouts ADD COLUMN rating_count INTEGER DEFAULT 0`, () => { + db.run(`ALTER TABLE layouts ADD COLUMN last_calculated DATETIME`, () => { + db.run(`ALTER TABLE layouts ADD COLUMN last_played DATE`, () => { + console.log('Database initialized successfully'); + resolve(); + }); + }); + }); + }); + } + }); + }); + }); +} + +async function checkAndPopulateDatabase() { + const fs = require('fs'); + const { scrapePDGARating } = require('./services/player-service'); + + try { + const playerCount = await new Promise((resolve, reject) => { + db.get('SELECT COUNT(*) as count FROM players', [], (err, row) => { + if (err) reject(err); + else resolve(row.count); + }); + }); + + if (playerCount > 0) { + console.log(`✓ Database already has ${playerCount} players - skipping text file import`); + console.log('📝 Note: pdga-numbers.txt is only used when database is empty'); + return; + } + + console.log('=== Database is empty - populating from PDGA numbers file ==='); + + const pdgaNumbers = fs.readFileSync('pdga-numbers.txt', 'utf-8') + .split('\n') + .map(num => num.trim()) + .filter(num => num); + + console.log(`Found ${pdgaNumbers.length} PDGA numbers in file`); + + if (pdgaNumbers.length === 0) { + console.log('⚠ No PDGA numbers found in file'); + return; + } + + console.log('Populating database with players from file...'); + + for (let i = 0; i < pdgaNumbers.length; i++) { + const pdgaNumber = pdgaNumbers[i]; + console.log(`[${i + 1}/${pdgaNumbers.length}] Adding PDGA ${pdgaNumber}...`); + + try { + const playerData = await scrapePDGARating(pdgaNumber); + console.log(` ✓ Added ${playerData.name}`); + + if (i < pdgaNumbers.length - 1) { + await new Promise(resolve => setTimeout(resolve, 2000)); + } + } catch (error) { + console.error(` ✗ Failed to add PDGA ${pdgaNumber}:`, error.message); + } + } + + console.log('=== Database population complete ==='); + + } catch (error) { + console.error('Error during database population check:', error.message); + } +} + +module.exports = { db, initializeDatabase, checkAndPopulateDatabase }; diff --git a/src/models/course.js b/src/models/course.js new file mode 100644 index 0000000..e035e59 --- /dev/null +++ b/src/models/course.js @@ -0,0 +1,78 @@ +const { db } = require('../db'); + +function saveCourseToDB(courseData) { + return new Promise((resolve, reject) => { + db.run( + `INSERT OR REPLACE INTO courses (name, link, city, last_updated) + VALUES (?, ?, ?, datetime('now'))`, + [courseData.name, courseData.link, courseData.city], + function(err) { + if (err) reject(err); + else resolve(this.lastID); + } + ); + }); +} + +function getAllCoursesFromDB() { + return new Promise((resolve, reject) => { + db.all( + 'SELECT * FROM courses ORDER BY name ASC', + [], + (err, rows) => { + if (err) reject(err); + else resolve(rows); + } + ); + }); +} + +function saveLayoutToDB(courseId, layoutData) { + return new Promise((resolve, reject) => { + db.run( + `INSERT OR IGNORE INTO layouts (course_id, name, par) + VALUES (?, ?, ?)`, + [courseId, layoutData.name, layoutData.par], + function(err) { + if (err) reject(err); + else resolve(this.lastID); + } + ); + }); +} + +function getLayoutsForCourse(courseId) { + return new Promise((resolve, reject) => { + db.all( + 'SELECT * FROM layouts WHERE course_id = ? ORDER BY last_played DESC, name ASC', + [courseId], + (err, rows) => { + if (err) reject(err); + else resolve(rows); + } + ); + }); +} + +function updateLayoutRating(courseId, layoutName, par, meanRating, ratingCount, lastPlayed = null) { + return new Promise((resolve, reject) => { + db.run( + `UPDATE layouts + SET mean_rating = ?, rating_count = ?, last_calculated = datetime('now'), last_played = ? + WHERE course_id = ? AND name = ? AND par = ?`, + [meanRating, ratingCount, lastPlayed, courseId, layoutName, par], + function(err) { + if (err) reject(err); + else resolve(this.changes); + } + ); + }); +} + +module.exports = { + saveCourseToDB, + getAllCoursesFromDB, + saveLayoutToDB, + getLayoutsForCourse, + updateLayoutRating +}; diff --git a/src/models/player.js b/src/models/player.js new file mode 100644 index 0000000..856122c --- /dev/null +++ b/src/models/player.js @@ -0,0 +1,198 @@ +const { db } = require('../db'); +const { parseDate } = require('../services/rating-calculator'); + +function getPlayerFromDB(pdgaNumber) { + return new Promise((resolve, reject) => { + db.get( + 'SELECT * FROM players WHERE pdga_number = ?', + [pdgaNumber], + (err, row) => { + if (err) reject(err); + else resolve(row); + } + ); + }); +} + +function savePlayerToDB(playerData) { + return new Promise((resolve, reject) => { + db.run( + `INSERT OR REPLACE INTO players (pdga_number, name, current_rating, rating_change, last_updated) + VALUES (?, ?, ?, ?, datetime('now'))`, + [playerData.pdgaNumber, playerData.name, playerData.rating, playerData.ratingChange], + function(err) { + if (err) reject(err); + else resolve(this.lastID); + } + ); + }); +} + +function getRatingHistoryFromDB(pdgaNumber) { + return new Promise((resolve, reject) => { + db.get('SELECT id FROM players WHERE pdga_number = ?', [pdgaNumber], (err, player) => { + if (err) return reject(err); + if (!player) return resolve(null); + + db.all( + 'SELECT * FROM rating_history WHERE player_id = ? ORDER BY date ASC', + [player.id], + (err, rows) => { + if (err) reject(err); + else resolve(rows); + } + ); + }); + }); +} + +function saveRatingHistoryToDB(pdgaNumber, ratingHistory) { + return new Promise((resolve, reject) => { + db.get('SELECT id FROM players WHERE pdga_number = ?', [pdgaNumber], (err, player) => { + if (err) return reject(err); + if (!player) return reject(new Error('Player not found')); + + db.run('DELETE FROM rating_history WHERE player_id = ?', [player.id], (err) => { + if (err) return reject(err); + + if (ratingHistory.length === 0) { + return resolve(); + } + + let completed = 0; + const total = ratingHistory.length; + + ratingHistory.forEach(entry => { + const parsedDate = parseDate(entry.date); + + db.run( + 'INSERT INTO rating_history (player_id, date, rating) VALUES (?, ?, ?)', + [player.id, parsedDate.toISOString().split('T')[0], entry.rating], + (err) => { + if (err) return reject(err); + + completed++; + if (completed === total) { + resolve(); + } + } + ); + }); + }); + }); + }); +} + +function getRoundHistoryFromDB(pdgaNumber) { + return new Promise((resolve, reject) => { + db.get('SELECT id FROM players WHERE pdga_number = ?', [pdgaNumber], (err, player) => { + if (err) return reject(err); + if (!player) return resolve([]); + + db.all( + 'SELECT * FROM round_history WHERE player_id = ? ORDER BY date DESC', + [player.id], + (err, rows) => { + if (err) reject(err); + else resolve(rows); + } + ); + }); + }); +} + +function getLastRoundUpdateDate(pdgaNumber) { + return new Promise((resolve, reject) => { + db.get( + 'SELECT last_round_update FROM players WHERE pdga_number = ?', + [pdgaNumber], + (err, row) => { + if (err) reject(err); + else resolve(row ? row.last_round_update : null); + } + ); + }); +} + +function updateLastRoundUpdateDate(pdgaNumber) { + return new Promise((resolve, reject) => { + db.run( + 'UPDATE players SET last_round_update = CURRENT_TIMESTAMP WHERE pdga_number = ?', + [pdgaNumber], + function(err) { + if (err) reject(err); + else resolve(); + } + ); + }); +} + +function saveRoundHistoryToDB(pdgaNumber, roundData, isIncremental = false) { + return new Promise((resolve, reject) => { + db.get('SELECT id FROM players WHERE pdga_number = ?', [pdgaNumber], (err, player) => { + if (err) return reject(err); + if (!player) return reject(new Error('Player not found')); + + const processRounds = () => { + if (roundData.length === 0) { + db.run('UPDATE players SET last_round_update = datetime("now") WHERE pdga_number = ?', [pdgaNumber], (err) => { + if (err) reject(err); + else resolve(); + }); + return; + } + + const stmt = db.prepare('INSERT OR REPLACE INTO round_history (player_id, date, competition_name, rating) VALUES (?, ?, ?, ?)'); + + for (const round of roundData) { + stmt.run([player.id, round.date.toISOString().split('T')[0], round.competition || 'Unknown', round.rating]); + } + + stmt.finalize((err) => { + if (err) { + reject(err); + } else { + db.run('UPDATE players SET last_round_update = datetime("now") WHERE pdga_number = ?', [pdgaNumber], (updateErr) => { + if (updateErr) reject(updateErr); + else resolve(); + }); + } + }); + }; + + if (!isIncremental) { + db.run('DELETE FROM round_history WHERE player_id = ?', [player.id], (err) => { + if (err) return reject(err); + processRounds(); + }); + } else { + processRounds(); + } + }); + }); +} + +function savePredictedRatingToDB(pdgaNumber, predictedRating, stdDev = null) { + return new Promise((resolve, reject) => { + db.run( + 'UPDATE players SET predicted_rating = ?, std_dev = ? WHERE pdga_number = ?', + [predictedRating, stdDev, pdgaNumber], + function(err) { + if (err) reject(err); + else resolve(); + } + ); + }); +} + +module.exports = { + getPlayerFromDB, + savePlayerToDB, + getRatingHistoryFromDB, + saveRatingHistoryToDB, + getRoundHistoryFromDB, + getLastRoundUpdateDate, + updateLastRoundUpdateDate, + saveRoundHistoryToDB, + savePredictedRatingToDB +}; diff --git a/src/routes/courses.js b/src/routes/courses.js new file mode 100644 index 0000000..69dbd01 --- /dev/null +++ b/src/routes/courses.js @@ -0,0 +1,341 @@ +const express = require('express'); +const router = express.Router(); +const { db } = require('../db'); +const { getAllCoursesFromDB, getLayoutsForCourse, updateLayoutRating } = require('../models/course'); +const { launchBrowser } = require('../scrapers/browser'); +const { layoutEventCache, scrapeCourseDirectory, scrapeCourseLayouts, scrapeEventResults } = require('../scrapers/course-puppeteer'); + +// Request locking to prevent concurrent scrapes of the same resource +const activeScrapes = new Map(); + +router.get('/api/courses', async (req, res) => { + try { + const courses = await getAllCoursesFromDB(); + res.json(courses); + } catch (error) { + console.error('Error fetching courses:', error.message); + res.status(500).json({ error: 'Failed to fetch courses' }); + } +}); + +router.get('/api/layouts/:courseId', async (req, res) => { + try { + const { courseId } = req.params; + const layouts = await getLayoutsForCourse(courseId); + res.json(layouts); + } catch (error) { + console.error('Error fetching layouts:', error.message); + res.status(500).json({ error: 'Failed to fetch layouts' }); + } +}); + +router.post('/api/scrape-courses', async (req, res) => { + req.setTimeout(600000); + res.setTimeout(600000); + + let browser = null; + try { + console.log('Starting course directory scraping...'); + + browser = await launchBrowser(); + + const courses = await scrapeCourseDirectory(browser); + + await browser.close(); + browser = null; + + res.json({ + success: true, + coursesFound: courses.length, + message: `Successfully scraped ${courses.length} courses` + }); + } catch (error) { + console.error('Error scraping courses:', error.message); + if (browser) { + try { await browser.close(); } catch (e) {} + } + res.status(500).json({ error: 'Failed to scrape courses' }); + } +}); + +router.post('/api/scrape-layouts/:courseId', async (req, res) => { + req.setTimeout(600000); + res.setTimeout(600000); + + const { courseId } = req.params; + const lockKey = `layout-${courseId}`; + + if (activeScrapes.has(lockKey)) { + console.log(`âš ī¸ Scrape already in progress for course ${courseId}`); + return res.status(409).json({ + error: 'Scrape already in progress for this course', + message: 'Please wait for the current scrape to complete' + }); + } + + let browser = null; + + const scrapePromise = (async () => { + try { + const course = await new Promise((resolve, reject) => { + db.get('SELECT * FROM courses WHERE id = ?', [courseId], (err, row) => { + if (err) reject(err); + else resolve(row); + }); + }); + + if (!course) { + throw new Error('Course not found'); + } + + console.log(`Starting layout scraping for course: ${course.name}`); + + browser = await launchBrowser(); + + const layouts = await scrapeCourseLayouts(browser, course.link, courseId); + + console.log(`\n=== Starting event results scraping for ${course.name} ===`); + + const courseIdInt = parseInt(courseId); + const layoutData = layoutEventCache.get(courseIdInt); + + if (!layoutData || layoutData.length === 0) { + console.log('No event data found in cache, skipping event results scraping'); + await browser.close(); + browser = null; + + return { + success: true, + layoutsFound: layouts.length, + message: `Successfully scraped ${layouts.length} layouts for ${course.name} (no events found)` + }; + } + + const eventGroups = {}; + layoutData.forEach(layout => { + if (layout.eventUrl) { + if (!eventGroups[layout.eventUrl]) { + eventGroups[layout.eventUrl] = []; + } + eventGroups[layout.eventUrl].push(layout); + } + }); + + const allLayoutRatings = {}; + + let eventCount = 0; + for (const eventUrl in eventGroups) { + eventCount++; + const eventLayouts = eventGroups[eventUrl]; + + const results = await scrapeEventResults(browser, eventUrl, eventLayouts); + + for (const layoutKey in results) { + const layoutDataResult = results[layoutKey]; + + if (!allLayoutRatings[layoutKey]) { + allLayoutRatings[layoutKey] = { + name: layoutDataResult.name, + par: layoutDataResult.par, + allRatings: [], + latestDate: layoutDataResult.eventDate + }; + } else { + if (layoutDataResult.eventDate && (!allLayoutRatings[layoutKey].latestDate || + new Date(layoutDataResult.eventDate) > new Date(allLayoutRatings[layoutKey].latestDate))) { + allLayoutRatings[layoutKey].latestDate = layoutDataResult.eventDate; + } + } + + allLayoutRatings[layoutKey].allRatings.push(...layoutDataResult.ratings); + } + + await new Promise(resolve => setTimeout(resolve, 2000)); + } + + console.log(`\n=== Calculating final ratings for all layouts ===`); + + let savedCount = 0; + for (const layoutKey in allLayoutRatings) { + const layoutDataResult = allLayoutRatings[layoutKey]; + + if (layoutDataResult.allRatings.length > 0) { + const meanRating = Math.round( + layoutDataResult.allRatings.reduce((sum, r) => sum + r, 0) / layoutDataResult.allRatings.length + ); + + console.log(`Layout: ${layoutDataResult.name} (Par ${layoutDataResult.par})`); + console.log(` Total ratings collected: ${layoutDataResult.allRatings.length}`); + console.log(` Mean rating: ${meanRating}`); + console.log(` Last played: ${layoutDataResult.latestDate || 'Unknown'}`); + + try { + const changes = await updateLayoutRating( + courseIdInt, + layoutDataResult.name, + layoutDataResult.par, + meanRating, + layoutDataResult.allRatings.length, + layoutDataResult.latestDate + ); + if (changes > 0) { + console.log(` ✓ Updated in database`); + savedCount++; + } + } catch (err) { + console.error(` Error updating layout ${layoutDataResult.name}:`, err.message); + } + } + } + + await browser.close(); + browser = null; + + return { + success: true, + layoutsFound: layouts.length, + eventsProcessed: Object.keys(eventGroups).length, + layoutsWithRatings: savedCount, + message: `Successfully scraped ${layouts.length} layouts and processed ${Object.keys(eventGroups).length} events for ${course.name}` + }; + } catch (error) { + console.error('Error scraping layouts:', error.message); + if (browser) { + try { await browser.close(); } catch (e) {} + } + throw error; + } + })(); + + activeScrapes.set(lockKey, scrapePromise); + + try { + const result = await scrapePromise; + res.json(result); + } catch (error) { + res.status(500).json({ + error: 'Failed to scrape layouts', + message: error.message + }); + } finally { + activeScrapes.delete(lockKey); + console.log(`✓ Released lock for course ${courseId}`); + } +}); + +router.post('/api/scrape-event-results/:courseId', async (req, res) => { + req.setTimeout(600000); + res.setTimeout(600000); + + let browser = null; + try { + const { courseId } = req.params; + const courseIdInt = parseInt(courseId); + + const layoutData = layoutEventCache.get(courseIdInt); + + if (!layoutData || layoutData.length === 0) { + return res.status(404).json({ + error: 'No layout data found in cache. Please scrape layouts first.' + }); + } + + browser = await launchBrowser(); + + const eventGroups = {}; + layoutData.forEach(layout => { + if (layout.eventUrl) { + if (!eventGroups[layout.eventUrl]) { + eventGroups[layout.eventUrl] = []; + } + eventGroups[layout.eventUrl].push(layout); + } + }); + + const allLayoutRatings = {}; + + let eventCount = 0; + for (const eventUrl in eventGroups) { + eventCount++; + const eventLayouts = eventGroups[eventUrl]; + + const results = await scrapeEventResults(browser, eventUrl, eventLayouts); + + for (const layoutKey in results) { + const ld = results[layoutKey]; + + if (!allLayoutRatings[layoutKey]) { + allLayoutRatings[layoutKey] = { + name: ld.name, + par: ld.par, + allRatings: [], + latestDate: ld.eventDate + }; + } else { + if (ld.eventDate && (!allLayoutRatings[layoutKey].latestDate || + new Date(ld.eventDate) > new Date(allLayoutRatings[layoutKey].latestDate))) { + allLayoutRatings[layoutKey].latestDate = ld.eventDate; + } + } + + allLayoutRatings[layoutKey].allRatings.push(...ld.ratings); + } + + await new Promise(resolve => setTimeout(resolve, 2000)); + } + + await browser.close(); + browser = null; + + console.log(`\n=== Calculating final ratings for all layouts ===`); + + let savedCount = 0; + for (const layoutKey in allLayoutRatings) { + const ld = allLayoutRatings[layoutKey]; + + if (ld.allRatings.length > 0) { + const meanRating = Math.round( + ld.allRatings.reduce((sum, r) => sum + r, 0) / ld.allRatings.length + ); + + console.log(`Layout: ${ld.name} (Par ${ld.par})`); + console.log(` Total ratings collected: ${ld.allRatings.length}`); + console.log(` Mean rating: ${meanRating}`); + console.log(` Last played: ${ld.latestDate || 'Unknown'}`); + + try { + const changes = await updateLayoutRating( + courseIdInt, + ld.name, + ld.par, + meanRating, + ld.allRatings.length, + ld.latestDate + ); + if (changes > 0) { + console.log(` ✓ Updated in database`); + savedCount++; + } + } catch (err) { + console.error(` Error updating layout ${ld.name}:`, err.message); + } + } + } + + res.json({ + success: true, + eventsProcessed: Object.keys(eventGroups).length, + uniqueLayouts: Object.keys(allLayoutRatings).length, + layoutsSaved: savedCount, + message: `Processed ${Object.keys(eventGroups).length} events, updated ${savedCount} layouts` + }); + } catch (error) { + console.error('Error scraping event results:', error.message); + if (browser) { + try { await browser.close(); } catch (e) {} + } + res.status(500).json({ error: 'Failed to scrape event results' }); + } +}); + +module.exports = router; diff --git a/src/routes/pages.js b/src/routes/pages.js new file mode 100644 index 0000000..33cfb9b --- /dev/null +++ b/src/routes/pages.js @@ -0,0 +1,13 @@ +const express = require('express'); +const path = require('path'); +const router = express.Router(); + +router.get('/', (req, res) => { + res.sendFile(path.join(__dirname, '../../index.html')); +}); + +router.get('/courses.html', (req, res) => { + res.sendFile(path.join(__dirname, '../../courses.html')); +}); + +module.exports = router; diff --git a/src/routes/players.js b/src/routes/players.js new file mode 100644 index 0000000..209cf85 --- /dev/null +++ b/src/routes/players.js @@ -0,0 +1,452 @@ +const express = require('express'); +const router = express.Router(); +const { db } = require('../db'); +const { getPlayerFromDB, savePlayerToDB, getRatingHistoryFromDB, saveRatingHistoryToDB, getRoundHistoryFromDB, getLastRoundUpdateDate, updateLastRoundUpdateDate, saveRoundHistoryToDB, savePredictedRatingToDB } = require('../models/player'); +const { fetchPlayerDataHTTP, parsePlayerData, fetchRatingHistory, parseRatingHistory } = require('../scrapers/player-http'); +const { getOfficialRatingHistory, getOptimizedPlayerRounds } = require('../scrapers/player-puppeteer'); +const { launchBrowser } = require('../scrapers/browser'); +const { getPlayerDataFromDB, scrapePDGARating, getAllRatingsFromDB, refreshAllPlayersInDB, getPredictedRatingFromDB } = require('../services/player-service'); +const { calculatePredictedRating } = require('../services/rating-calculator'); + +router.get('/api/ratings', async (req, res) => { + try { + const ratings = await getAllRatingsFromDB(); + res.json(ratings); + } catch (error) { + res.status(500).json({ error: 'Failed to fetch ratings' }); + } +}); + +router.get('/api/ratings/progress', (req, res) => { + res.writeHead(200, { + 'Content-Type': 'text/event-stream', + 'Cache-Control': 'no-cache', + 'Connection': 'keep-alive', + 'Access-Control-Allow-Origin': '*', + 'Access-Control-Allow-Headers': 'Cache-Control' + }); + + const progressCallback = (progress) => { + res.write(`data: ${JSON.stringify(progress)}\n\n`); + }; + + getAllRatingsFromDB(progressCallback).then(ratings => { + res.write(`data: ${JSON.stringify({ status: 'complete', ratings })}\n\n`); + res.end(); + }).catch(error => { + res.write(`data: ${JSON.stringify({ status: 'error', error: error.message })}\n\n`); + res.end(); + }); + + req.on('close', () => { + res.end(); + }); +}); + +router.post('/api/populate-database', (req, res) => { + res.writeHead(200, { + 'Content-Type': 'text/event-stream', + 'Cache-Control': 'no-cache', + 'Connection': 'keep-alive', + 'Access-Control-Allow-Origin': '*', + }); + + const progressCallback = (progress) => { + res.write(`data: ${JSON.stringify(progress)}\n\n`); + }; + + console.log('=== Starting database population from database players ==='); + + refreshAllPlayersInDB(progressCallback).then(ratings => { + console.log(`=== Database population complete: ${ratings.length} players refreshed ===`); + res.write(`data: ${JSON.stringify({ status: 'complete', ratings, message: `Successfully refreshed ${ratings.length} players` })}\n\n`); + res.end(); + }).catch(error => { + console.error('Error populating database:', error); + res.write(`data: ${JSON.stringify({ status: 'error', message: error.message })}\n\n`); + res.end(); + }); +}); + +router.get('/api/database-status', async (req, res) => { + try { + const playerCount = await new Promise((resolve, reject) => { + db.get('SELECT COUNT(*) as count FROM players', [], (err, row) => { + if (err) reject(err); + else resolve(row.count); + }); + }); + + res.json({ + playersInDB: playerCount, + needsPopulation: playerCount === 0 + }); + } catch (error) { + res.status(500).json({ error: 'Failed to check database status' }); + } +}); + +router.get('/api/load-all-players', (req, res) => { + res.writeHead(200, { + 'Content-Type': 'text/event-stream', + 'Cache-Control': 'no-cache', + 'Connection': 'keep-alive', + 'Access-Control-Allow-Origin': '*', + 'Access-Control-Allow-Headers': 'Cache-Control' + }); + + const progressCallback = (progress) => { + res.write(`data: ${JSON.stringify(progress)}\n\n`); + }; + + refreshAllPlayersInDB(progressCallback).then(ratings => { + res.write(`data: ${JSON.stringify({ status: 'complete', ratings })}\n\n`); + res.end(); + }).catch(error => { + res.write(`data: ${JSON.stringify({ status: 'error', error: error.message })}\n\n`); + res.end(); + }); + + req.on('close', () => { + res.end(); + }); +}); + +router.get('/api/rating-history/:pdgaNumber', async (req, res) => { + try { + const { pdgaNumber } = req.params; + + const cachedHistory = await getRatingHistoryFromDB(pdgaNumber); + if (cachedHistory && cachedHistory.length > 0) { + console.log(`Using cached rating history from DB for PDGA ${pdgaNumber}`); + const formattedHistory = cachedHistory.map(row => ({ + date: row.date, + rating: row.rating, + displayDate: new Date(row.date).toLocaleDateString('en-US', { + day: '2-digit', + month: 'short', + year: 'numeric' + }) + })); + + res.json({ + pdgaNumber: parseInt(pdgaNumber), + history: formattedHistory + }); + return; + } + + console.log(`Fetching rating history for PDGA ${pdgaNumber}...`); + const html = await fetchRatingHistory(pdgaNumber); + const history = parseRatingHistory(html); + + try { + await saveRatingHistoryToDB(pdgaNumber, history); + console.log(`Saved rating history for PDGA ${pdgaNumber} to database`); + } catch (dbErr) { + console.error(`Failed to save rating history to database:`, dbErr.message); + } + + res.json({ + pdgaNumber: parseInt(pdgaNumber), + history + }); + } catch (error) { + console.error('Error fetching rating history:', error.message); + res.status(500).json({ error: 'Failed to fetch rating history' }); + } +}); + +router.post('/api/clear-cache', (req, res) => { + try { + db.run('UPDATE players SET last_updated = datetime("now", "-25 hours"), last_round_update = NULL', (err) => { + if (err) { + console.error('Error clearing database cache:', err); + res.status(500).json({ error: 'Failed to clear database cache' }); + return; + } + + console.log('Database cache cleared - all players will be refreshed on next request'); + res.json({ + success: true, + message: 'Cache cleared - database reset' + }); + }); + } catch (error) { + console.error('Error clearing cache:', error); + res.status(500).json({ error: 'Failed to clear cache' }); + } +}); + +router.get('/api/search-player/:pdgaNumber', async (req, res) => { + try { + const { pdgaNumber } = req.params; + console.log(`Searching for player with PDGA number ${pdgaNumber}`); + + const existingPlayer = await getPlayerFromDB(pdgaNumber); + if (existingPlayer) { + return res.json({ + alreadyExists: true, + player: { + pdgaNumber: existingPlayer.pdga_number, + name: existingPlayer.name, + rating: existingPlayer.current_rating, + ratingChange: existingPlayer.rating_change + } + }); + } + + const html = await fetchPlayerDataHTTP(pdgaNumber); + const playerData = parsePlayerData(html, pdgaNumber); + + if (playerData.name === 'Unknown' || !playerData.name) { + return res.status(404).json({ error: 'Player not found' }); + } + + res.json({ + alreadyExists: false, + player: playerData + }); + } catch (error) { + console.error('Error searching for player:', error.message); + res.status(500).json({ error: 'Failed to search for player' }); + } +}); + +router.post('/api/add-player', async (req, res) => { + try { + const { pdgaNumber } = req.body; + + if (!pdgaNumber) { + return res.status(400).json({ error: 'PDGA number is required' }); + } + + console.log(`Adding player with PDGA number ${pdgaNumber}`); + + const existingPlayer = await getPlayerFromDB(pdgaNumber); + if (existingPlayer) { + return res.status(409).json({ + error: 'Player already exists', + player: { + pdgaNumber: existingPlayer.pdga_number, + name: existingPlayer.name, + rating: existingPlayer.current_rating + } + }); + } + + const html = await fetchPlayerDataHTTP(pdgaNumber); + const playerData = parsePlayerData(html, pdgaNumber); + + if (playerData.name === 'Unknown' || !playerData.name) { + return res.status(404).json({ error: 'Player not found' }); + } + + await savePlayerToDB(playerData); + + console.log(`Successfully added player: ${playerData.name} (#${pdgaNumber})`); + + res.json({ + success: true, + player: playerData + }); + } catch (error) { + console.error('Error adding player:', error.message); + res.status(500).json({ error: 'Failed to add player' }); + } +}); + +router.post('/api/refresh-player/:pdgaNumber', async (req, res) => { + try { + const { pdgaNumber } = req.params; + console.log(`Manually refreshing player data for PDGA ${pdgaNumber}`); + + const html = await fetchPlayerDataHTTP(pdgaNumber); + const playerData = parsePlayerData(html, pdgaNumber); + + await savePlayerToDB(playerData); + + res.json({ + success: true, + player: playerData + }); + } catch (error) { + console.error('Error refreshing player data:', error.message); + res.status(500).json({ error: 'Failed to refresh player data' }); + } +}); + +router.post('/api/refresh-rating-history/:pdgaNumber', async (req, res) => { + try { + const { pdgaNumber } = req.params; + console.log(`=== Manually refreshing rating history for PDGA ${pdgaNumber} ===`); + + const startTime = Date.now(); + const html = await fetchRatingHistory(pdgaNumber); + const fetchTime = Date.now() - startTime; + + console.log(`HTML fetch completed in ${fetchTime}ms, received ${html.length} bytes`); + + const parseStartTime = Date.now(); + const history = parseRatingHistory(html); + const parseTime = Date.now() - parseStartTime; + + console.log(`Parsing completed in ${parseTime}ms, found ${history.length} history entries`); + + if (history.length > 0) { + console.log('Sample history entries:', history.slice(0, 3)); + } else { + console.log('No history entries found. HTML sample:', html.substring(0, 500)); + } + + const dbStartTime = Date.now(); + await saveRatingHistoryToDB(pdgaNumber, history); + const dbTime = Date.now() - dbStartTime; + + console.log(`Database save completed in ${dbTime}ms`); + + const formattedHistory = history.map(entry => ({ + date: entry.date, + rating: entry.rating, + displayDate: entry.displayDate + })); + + console.log(`=== Rating history refresh completed for PDGA ${pdgaNumber} ===`); + + res.json({ + success: true, + history: formattedHistory + }); + } catch (error) { + console.error(`=== Error refreshing rating history for PDGA ${req.params.pdgaNumber} ===`); + console.error('Error type:', error.constructor.name); + console.error('Error message:', error.message); + + res.status(500).json({ + error: 'Failed to refresh rating history', + details: error.message, + code: error.code + }); + } +}); + +router.post('/api/refresh-round-history/:pdgaNumber', async (req, res) => { + req.setTimeout(600000); + res.setTimeout(600000); + + let browser = null; + const { pdgaNumber } = req.params; + try { + const lastRoundUpdate = await getLastRoundUpdateDate(pdgaNumber); + const sinceDate = lastRoundUpdate ? new Date(lastRoundUpdate) : null; + + if (sinceDate) { + const hoursSinceUpdate = (Date.now() - sinceDate.getTime()) / (1000 * 60 * 60); + if (hoursSinceUpdate < 24) { + const hoursRemaining = Math.ceil(24 - hoursSinceUpdate); + return res.status(429).json({ + error: 'Rate limit exceeded', + message: `Prediction can only be refreshed once every 24 hours. Please try again in ${hoursRemaining} hour(s).`, + lastUpdate: sinceDate.toISOString(), + hoursRemaining: hoursRemaining + }); + } + } + + const isIncremental = !!sinceDate; + + console.log(`${isIncremental ? 'Incrementally updating' : 'Fully refreshing'} round history for PDGA ${pdgaNumber}${sinceDate ? ` since ${sinceDate.toDateString()}` : ''}`); + + browser = await launchBrowser(); + + let officialHistory; + try { + officialHistory = await getOfficialRatingHistory(browser, pdgaNumber); + if (officialHistory.length > 0) { + await saveRatingHistoryToDB(pdgaNumber, officialHistory); + } + } catch (historyError) { + console.error('Failed to fetch official history:', historyError.message); + officialHistory = []; + } + + let allRounds = []; + try { + console.log(`Using optimized approach: /details + new tournaments only for PDGA ${pdgaNumber}...`); + allRounds = await getOptimizedPlayerRounds(browser, pdgaNumber); + + if (allRounds.length > 0) { + const roundsForDB = allRounds.map(round => ({ + rating: round.rating, + date: round.date, + competition: round.competition + })); + + await saveRoundHistoryToDB(pdgaNumber, roundsForDB, false); + console.log(`✓ Saved ${allRounds.length} rounds using optimized approach`); + + await updateLastRoundUpdateDate(pdgaNumber); + } else { + console.log('ℹ No rounds found'); + } + } catch (detailsError) { + console.error('Failed to fetch rounds using optimized approach:', detailsError.message); + allRounds = []; + } + + await browser.close(); + browser = null; + + const dbRounds = await getRoundHistoryFromDB(pdgaNumber); + const roundsForPrediction = dbRounds.map(round => ({ + rating: round.rating, + date: new Date(round.date), + competition: round.competition_name + })); + + const result = calculatePredictedRating(roundsForPrediction); + + await savePredictedRatingToDB(pdgaNumber, result.rating, result.stdDev); + + const officialCount = allRounds.filter(r => r.source === 'official').length; + const newCount = allRounds.filter(r => r.source === 'new').length; + + res.json({ + success: true, + predictedRating: result.rating, + stdDev: result.stdDev, + debugLog: result.debugLog, + totalRounds: roundsForPrediction.length, + officialRounds: officialCount, + newRounds: newCount, + approach: 'optimized', + message: `Used /details (${officialCount} rounds) + new tournaments (${newCount} rounds)` + }); + } catch (error) { + console.error(`=== Error refreshing round history for PDGA ${pdgaNumber} ===`); + console.error('Error type:', error.constructor.name); + console.error('Error message:', error.message); + + if (browser) { + try { + await browser.close(); + } catch (closeError) { + console.error('Error closing browser:', closeError.message); + } + } + + res.status(500).json({ + error: 'Failed to refresh round history', + details: error.message, + errorType: error.constructor.name, + timestamp: new Date().toISOString(), + suggestion: error.message.includes('socket hang up') ? + 'Rate limited by PDGA - try again in a few minutes.' : + error.message.includes('timeout') ? + 'PDGA pages are loading slowly - try again later.' : + 'Tournament scraping failed - check server logs for details' + }); + } +}); + +module.exports = router; diff --git a/src/scrapers/browser.js b/src/scrapers/browser.js new file mode 100644 index 0000000..0887a82 --- /dev/null +++ b/src/scrapers/browser.js @@ -0,0 +1,25 @@ +const puppeteer = require('puppeteer'); + +async function launchBrowser() { + try { + return await puppeteer.launch({ + headless: "new", + args: [ + '--no-sandbox', + '--disable-setuid-sandbox', + '--disable-dev-shm-usage', + '--disable-accelerated-2d-canvas', + '--no-first-run', + '--no-zygote', + '--disable-gpu' + ] + }); + } catch (err) { + return await puppeteer.launch({ + headless: true, + args: ['--no-sandbox', '--disable-dev-shm-usage'] + }); + } +} + +module.exports = { launchBrowser }; diff --git a/src/scrapers/course-puppeteer.js b/src/scrapers/course-puppeteer.js new file mode 100644 index 0000000..04240f9 --- /dev/null +++ b/src/scrapers/course-puppeteer.js @@ -0,0 +1,349 @@ +const { saveCourseToDB, saveLayoutToDB } = require('../models/course'); + +// In-memory cache for layout-division-event mapping +const layoutEventCache = new Map(); + +function getLayoutEventCache() { + return layoutEventCache; +} + +async function scrapeCourseDirectory(browser) { + console.log('=== Scraping Swedish courses from PDGA course directory ==='); + const page = await browser.newPage(); + const allCourses = []; + let pageNumber = 0; + let hasMorePages = true; + + try { + while (hasMorePages) { + const url = `https://www.pdga.com/course-directory/advanced?title=&field_course_location_country=SE&field_course_location_locality=&field_course_location_administrative_area=All&field_course_location_postal_code=&field_course_type_value=All&rating_value=All&field_course_holes_value=18-100&field_course_total_length_value=All&field_course_target_type_value=All&field_course_tee_type_value=All&field_location_type_value=All&field_course_camping_value=All&field_course_facilities_value=All&field_course_fees_value=All&field_course_handicap_value=All&field_course_private_value=All&field_course_signage_value=All&field_cart_friendly_value=All&page=${pageNumber}`; + + console.log(`Scraping page ${pageNumber}...`); + await page.goto(url, { waitUntil: 'networkidle2', timeout: 45000 }); + await page.waitForTimeout(1000); + + const courses = await page.evaluate(() => { + const courseData = []; + const rows = document.querySelectorAll('table tbody tr'); + + rows.forEach(row => { + const titleCell = row.querySelector('td.views-field-title'); + const locationCell = row.querySelector('td.views-field-field-course-location'); + + if (titleCell) { + const link = titleCell.querySelector('a'); + if (link) { + courseData.push({ + name: link.innerText.trim(), + link: 'https://www.pdga.com' + link.getAttribute('href'), + city: locationCell ? locationCell.innerText.trim() : 'Unknown' + }); + } + } + }); + + return courseData; + }); + + if (courses.length === 0) { + console.log(`No courses found on page ${pageNumber}, stopping pagination`); + hasMorePages = false; + } else { + console.log(`Found ${courses.length} courses on page ${pageNumber}`); + allCourses.push(...courses); + + for (const course of courses) { + try { + await saveCourseToDB(course); + console.log(`✓ Saved course: ${course.name} (${course.city})`); + } catch (err) { + console.error(`Error saving course ${course.name}:`, err.message); + } + } + + pageNumber++; + + if (hasMorePages) { + console.log('Waiting 2s before next page...'); + await new Promise(resolve => setTimeout(resolve, 2000)); + } + } + } + + console.log(`✓ Total courses scraped: ${allCourses.length} across ${pageNumber} pages`); + + } catch (error) { + console.error('Error scraping course directory:', error.message); + } finally { + await page.close(); + } + + return allCourses; +} + +async function scrapeCourseLayouts(browser, courseLink, courseId) { + console.log(`\n=== Scraping layouts from: ${courseLink} ===`); + const page = await browser.newPage(); + const layouts = []; + + try { + await page.goto(courseLink, { waitUntil: 'networkidle2', timeout: 45000 }); + await page.waitForTimeout(1000); + + const layoutsTabClicked = await page.evaluate(() => { + const selectors = [ + 'a.quicktabs-tab-course_node-2', + 'li.quicktabs-tab-course_node-2 a', + 'a[href*="layouts"]', + '.quicktabs-tabs a', + 'ul.quicktabs-tabs a', + '.quicktabs-wrapper a' + ]; + + for (const selector of selectors) { + const tabs = document.querySelectorAll(selector); + for (const tab of tabs) { + const text = tab.innerText?.trim(); + if (text && (text.includes('Layouts') || text.includes('Layout'))) { + tab.click(); + return true; + } + } + } + return false; + }); + + if (layoutsTabClicked) { + console.log('✓ Layouts tab found and clicked'); + await page.waitForTimeout(3000); + } else { + console.warn('âš ī¸ Layouts tab not found - may be on a single-layout course page'); + } + + const extractedLayouts = await page.evaluate(() => { + const layoutData = []; + const tournamentsDiv = document.querySelector('div.tournaments'); + + if (!tournamentsDiv) { + return layoutData; + } + + const tournamentCourses = tournamentsDiv.querySelectorAll('details.tournament-course'); + + tournamentCourses.forEach((details) => { + const resultsDiv = details.querySelector('div.results'); + const resultsLink = resultsDiv ? resultsDiv.querySelector('a') : null; + const eventUrl = resultsLink ? resultsLink.getAttribute('href') : null; + const fullEventUrl = eventUrl ? 'https://www.pdga.com' + eventUrl : null; + + const layoutsDiv = details.querySelector('div.layouts'); + if (!layoutsDiv) { + return; + } + + const layoutDivs = layoutsDiv.querySelectorAll('div.layout'); + + layoutDivs.forEach((layoutDiv) => { + const h4WithClass = layoutDiv.querySelector('h4.title'); + const h4Any = layoutDiv.querySelector('h4'); + + let layoutName = ''; + if (h4WithClass) { + layoutName = (h4WithClass.textContent || h4WithClass.innerText || '').trim(); + } else if (h4Any) { + layoutName = (h4Any.textContent || h4Any.innerText || '').trim(); + } + + const allText = layoutDiv.textContent || layoutDiv.innerText || ''; + + const parPatterns = [ + /Par[:\s]+(\d+)/i, + /Par\s*=\s*(\d+)/i, + /\(Par\s+(\d+)\)/i, + /Total Par:\s*(\d+)/i + ]; + + let par = null; + for (const pattern of parPatterns) { + const match = allText.match(pattern); + if (match) { + par = parseInt(match[1]); + break; + } + } + + const divisionsLi = layoutDiv.querySelector('li.divisions'); + let divisions = []; + if (divisionsLi) { + const divisionsText = (divisionsLi.textContent || '').replace('Divisions:', '').trim(); + divisions = divisionsText.split(/[,\s]+/).filter(d => d.length > 0); + } + + if (layoutName && par && !isNaN(par) && par > 0) { + layoutData.push({ + name: layoutName, + par: par, + divisions: divisions, + eventUrl: fullEventUrl + }); + } + }); + }); + + return layoutData; + }); + + layouts.push(...extractedLayouts); + + const courseIdInt = typeof courseId === 'string' ? parseInt(courseId) : courseId; + layoutEventCache.set(courseIdInt, layouts); + + console.log(`✓ Successfully parsed ${layouts.length} layouts from course page`); + + const uniqueLayouts = []; + const seen = new Set(); + + for (const layout of layouts) { + const key = `${layout.name}|${layout.par}`; + if (!seen.has(key)) { + seen.add(key); + uniqueLayouts.push(layout); + } + } + + if (uniqueLayouts.length < layouts.length) { + console.log(`â„šī¸ Deduplicated to ${uniqueLayouts.length} unique layouts`); + } + + for (const layout of uniqueLayouts) { + try { + await saveLayoutToDB(courseId, layout); + console.log(` ✓ Saved layout: ${layout.name} (Par ${layout.par})`); + } catch (err) { + console.error(` ✗ Error saving layout ${layout.name}:`, err.message); + } + } + + } catch (error) { + console.error('Error scraping course layouts:', error.message); + } finally { + await page.close(); + } + + return layouts; +} + +async function scrapeEventResults(browser, eventUrl, layoutsWithDivisions) { + const page = await browser.newPage(); + const layoutRatings = {}; + + try { + await page.goto(eventUrl, { waitUntil: 'networkidle2', timeout: 45000 }); + await page.waitForTimeout(1000); + + const eventDateRaw = await page.evaluate(() => { + const allText = document.body.textContent; + const datePattern = /\d{1,2}-[A-Z][a-z]{2}-\d{4}/; + const match = allText.match(datePattern); + return match ? match[0] : null; + }); + + let eventDate = null; + if (eventDateRaw) { + try { + const parsedDate = new Date(eventDateRaw); + if (!isNaN(parsedDate.getTime())) { + eventDate = parsedDate.toISOString().split('T')[0]; + } + } catch (e) { + // Ignore date parsing errors + } + } + + for (const layout of layoutsWithDivisions) { + const layoutKey = `${layout.name}|${layout.par}`; + const ratingsForLayout = []; + + for (const division of layout.divisions) { + const divisionData = await page.evaluate((divisionName, targetPar) => { + const divisionH3 = document.querySelector(`h3#${divisionName}`); + if (!divisionH3) { + return { found: false, ratings: [] }; + } + + const detailsTag = divisionH3.closest('details'); + if (!detailsTag) { + return { found: false, ratings: [] }; + } + + const table = detailsTag.querySelector('table.results'); + if (!table) { + return { found: false, ratings: [] }; + } + + const ratings = []; + const rows = table.querySelectorAll('tbody tr'); + + rows.forEach(row => { + const roundCells = row.querySelectorAll('td.round'); + + roundCells.forEach(roundCell => { + const scoreText = (roundCell.textContent || '').trim(); + const scoreMatch = scoreText.match(/^(\d+)$/); + + if (scoreMatch) { + const scoreValue = parseInt(scoreMatch[1]); + + if (scoreValue === targetPar) { + const ratingCell = roundCell.nextElementSibling; + + if (ratingCell && ratingCell.classList.contains('round-rating')) { + const ratingText = (ratingCell.textContent || '').trim(); + const rating = parseInt(ratingText); + + if (!isNaN(rating) && rating > 0) { + ratings.push(rating); + } + } + } + } + }); + }); + + return { found: true, ratings: ratings }; + }, division, layout.par); + + if (divisionData.found && divisionData.ratings.length > 0) { + ratingsForLayout.push(...divisionData.ratings); + } + } + + if (ratingsForLayout.length > 0) { + const meanRating = ratingsForLayout.reduce((sum, r) => sum + r, 0) / ratingsForLayout.length; + layoutRatings[layoutKey] = { + name: layout.name, + par: layout.par, + ratings: ratingsForLayout, + count: ratingsForLayout.length, + meanRating: Math.round(meanRating), + eventDate: eventDate + }; + } + } + + } catch (error) { + console.error('Error scraping event results:', error.message); + } finally { + await page.close(); + } + + return layoutRatings; +} + +module.exports = { + layoutEventCache, + getLayoutEventCache, + scrapeCourseDirectory, + scrapeCourseLayouts, + scrapeEventResults +}; diff --git a/src/scrapers/player-http.js b/src/scrapers/player-http.js new file mode 100644 index 0000000..2d6f2be --- /dev/null +++ b/src/scrapers/player-http.js @@ -0,0 +1,224 @@ +const https = require('https'); + +async function fetchPlayerDataHTTP(pdgaNumber) { + return new Promise((resolve, reject) => { + const options = { + hostname: 'www.pdga.com', + port: 443, + path: `/player/${pdgaNumber}`, + method: 'GET', + headers: { + 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36' + }, + timeout: 30000 + }; + + const req = https.request(options, (res) => { + let data = ''; + res.on('data', (chunk) => { + data += chunk; + }); + + res.on('end', () => { + if (res.statusCode === 200) { + resolve(data); + } else { + const rateLimitInfo = { + statusCode: res.statusCode, + headers: res.headers + }; + + console.log(`PDGA Response Status for #${pdgaNumber}: ${res.statusCode}`); + console.log('Response Headers:', JSON.stringify(res.headers, null, 2)); + + if (res.headers['retry-after']) { + console.log(`Retry-After header: ${res.headers['retry-after']}`); + } + if (res.headers['x-ratelimit-limit']) { + console.log(`Rate Limit: ${res.headers['x-ratelimit-limit']}`); + } + if (res.headers['x-ratelimit-remaining']) { + console.log(`Rate Limit Remaining: ${res.headers['x-ratelimit-remaining']}`); + } + if (res.headers['x-ratelimit-reset']) { + console.log(`Rate Limit Reset: ${res.headers['x-ratelimit-reset']}`); + } + + const error = new Error(`HTTP ${res.statusCode}`); + error.rateLimitInfo = rateLimitInfo; + reject(error); + } + }); + }); + + req.on('error', (error) => { + console.log(`Request error for PDGA #${pdgaNumber}:`, error.code, error.message); + if (error.code === 'ECONNRESET') { + console.log('Connection reset - likely rate limited by PDGA'); + } + reject(error); + }); + + req.on('timeout', () => { + req.destroy(); + reject(new Error('Request timeout')); + }); + + req.setTimeout(30000); + req.end(); + }); +} + +function parsePlayerData(html, pdgaNumber) { + try { + const nameMatch = html.match(/<title>([^<]+?)\s*\|\s*Professional Disc Golf Association/i); + const name = nameMatch ? nameMatch[1].trim() : 'Unknown'; + + const ratingMatch = html.match(/Current Rating:[^>]*>\s*(\d+)/i); + const rating = ratingMatch ? parseInt(ratingMatch[1]) : 0; + + const changeMatch = html.match(/Current Rating:[\s\S]*?([+\-]\d+)[\s\S]*?\(as of/i); + const ratingChange = changeMatch ? parseInt(changeMatch[1]) : null; + + return { + pdgaNumber, + name: name.replace(/\s*#\d+$/, ''), + rating, + ratingChange, + predictedRating: null + }; + } catch (error) { + console.error(`Error parsing data for PDGA ${pdgaNumber}:`, error.message); + return { + pdgaNumber, + name: 'Error', + rating: 0, + ratingChange: null, + predictedRating: null + }; + } +} + +async function fetchRatingHistory(pdgaNumber) { + return new Promise((resolve, reject) => { + const options = { + hostname: 'www.pdga.com', + port: 443, + path: `/player/${pdgaNumber}/history`, + method: 'GET', + headers: { + 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36' + }, + timeout: 30000 + }; + + console.log(`Fetching rating history for PDGA #${pdgaNumber} from: https://www.pdga.com/player/${pdgaNumber}/history`); + + const req = https.request(options, (res) => { + let data = ''; + res.on('data', (chunk) => { + data += chunk; + }); + + res.on('end', () => { + if (res.statusCode === 200) { + console.log(`Rating history request successful for PDGA #${pdgaNumber}`); + resolve(data); + } else { + console.log(`Rating History Error for PDGA #${pdgaNumber}:`); + console.log(`Status: ${res.statusCode}`); + console.log('Response Headers:', JSON.stringify(res.headers, null, 2)); + + if (res.headers['retry-after']) { + console.log(`Retry-After: ${res.headers['retry-after']} seconds`); + } + if (res.headers['x-ratelimit-limit']) { + console.log(`Rate Limit: ${res.headers['x-ratelimit-limit']}`); + } + if (res.headers['x-ratelimit-remaining']) { + console.log(`Rate Limit Remaining: ${res.headers['x-ratelimit-remaining']}`); + } + + if (data.length > 0) { + console.log(`Partial response received (${data.length} bytes):`, data.substring(0, 200)); + } + + const error = new Error(`HTTP ${res.statusCode} for rating history`); + error.statusCode = res.statusCode; + error.headers = res.headers; + reject(error); + } + }); + }); + + req.on('error', (error) => { + console.log(`Rating history request error for PDGA #${pdgaNumber}:`, { + code: error.code, + message: error.message, + errno: error.errno, + syscall: error.syscall + }); + + if (error.code === 'ECONNRESET') { + console.log('Connection reset on rating history - likely rate limited by PDGA'); + } + if (error.code === 'ECONNREFUSED') { + console.log('Connection refused - PDGA server may be blocking requests'); + } + if (error.code === 'ETIMEDOUT') { + console.log('Request timed out - server may be overloaded'); + } + + reject(error); + }); + + req.on('timeout', () => { + console.log(`Rating history request timeout for PDGA #${pdgaNumber} after 30s`); + req.destroy(); + reject(new Error('Request timeout')); + }); + + req.setTimeout(30000); + req.end(); + }); +} + +function parseRatingHistory(html) { + const history = []; + + const rowMatches = html.match(/<tr[^>]*>[\s\S]*?<\/tr>/gi); + + if (rowMatches) { + for (const row of rowMatches) { + if (row.includes('<th') || !row.includes('<td')) continue; + + const cellMatches = row.match(/<td[^>]*>(.*?)<\/td>/gi); + + if (cellMatches && cellMatches.length >= 2) { + const dateText = cellMatches[0].replace(/<[^>]*>/g, '').trim(); + const ratingText = cellMatches[1].replace(/<[^>]*>/g, '').trim(); + + const dateMatch = dateText.match(/(\d{1,2})-([A-Za-z]{3})-(\d{4})/); + if (dateMatch && !isNaN(parseInt(ratingText))) { + const [, day, month, year] = dateMatch; + const monthMap = { + 'Jan': 0, 'Feb': 1, 'Mar': 2, 'Apr': 3, 'May': 4, 'Jun': 5, + 'Jul': 6, 'Aug': 7, 'Sep': 8, 'Oct': 9, 'Nov': 10, 'Dec': 11 + }; + + const date = new Date(parseInt(year), monthMap[month], parseInt(day)); + + history.push({ + date: date.toISOString().split('T')[0], + rating: parseInt(ratingText), + displayDate: dateText + }); + } + } + } + } + + return history.sort((a, b) => new Date(a.date) - new Date(b.date)); +} + +module.exports = { fetchPlayerDataHTTP, parsePlayerData, fetchRatingHistory, parseRatingHistory }; diff --git a/src/scrapers/player-puppeteer.js b/src/scrapers/player-puppeteer.js new file mode 100644 index 0000000..6ae6bec --- /dev/null +++ b/src/scrapers/player-puppeteer.js @@ -0,0 +1,317 @@ +const { parseDate } = require('../services/rating-calculator'); + +async function getOfficialRatingHistory(browser, pdgaNumber) { + const page = await browser.newPage(); + let ratingHistory = []; + + try { + const url = `https://www.pdga.com/player/${pdgaNumber}/history`; + await page.goto(url, { waitUntil: 'networkidle2', timeout: 45000 }); + await page.waitForTimeout(1000); + + ratingHistory = await page.evaluate(() => { + const history = []; + + const selectors = [ + 'table tbody tr', + 'table tr', + '.view-content tbody tr' + ]; + + for (const selector of selectors) { + const rows = document.querySelectorAll(selector); + + for (const row of rows) { + const cells = row.querySelectorAll('td'); + if (cells.length >= 3) { + const dateText = cells[0]?.innerText?.trim(); + const ratingText = cells[1]?.innerText?.trim(); + + if (dateText && ratingText && /^\d{4}-\d{2}-\d{2}$|^\d{1,2}-\w{3}-\d{4}$|^\w{3} \d{1,2}, \d{4}$/.test(dateText)) { + const rating = parseInt(ratingText); + if (!isNaN(rating) && rating > 800 && rating < 1200) { + history.push({ + date: dateText, + rating: rating, + tournament: cells[2]?.innerText?.trim() || 'Unknown' + }); + } + } + } + } + + if (history.length > 0) break; + } + + return history; + }); + + } catch (error) { + console.error('Error fetching official rating history:', error.message); + } finally { + await page.close(); + } + + return ratingHistory; +} + +async function getPlayerTournamentDetails(browser, pdgaNumber) { + const page = await browser.newPage(); + let tournamentRounds = []; + + try { + const url = `https://www.pdga.com/player/${pdgaNumber}/details`; + await page.goto(url, { waitUntil: 'networkidle2', timeout: 45000 }); + await page.waitForTimeout(1000); + + tournamentRounds = await page.evaluate(() => { + const rounds = []; + const rows = document.querySelectorAll('table tbody tr'); + + rows.forEach(row => { + const cells = row.querySelectorAll('td'); + + if (cells.length >= 4) { + const cellTexts = Array.from(cells).map(cell => cell.innerText.trim()); + + let tournamentName = ''; + let dateText = ''; + let rating = 0; + let division = ''; + + cellTexts.forEach((text, index) => { + if (/\d{1,2}(-\w{3})?(\s+to\s+)\d{1,2}-\w{3}-\d{4}/.test(text) || /\d{1,2}-\w{3}-\d{4}/.test(text)) { + dateText = text; + } + if (/^\d{3,4}$/.test(text) && parseInt(text) >= 800 && parseInt(text) <= 1200) { + rating = parseInt(text); + } + if (/^M[A-Z]\d*$|^F[A-Z]\d*$/.test(text)) { + division = text; + } + if (index === 0) { + tournamentName = text; + } + }); + + if (tournamentName && dateText && rating > 0) { + rounds.push({ + tournament: tournamentName, + dateText: dateText, + rating: rating, + division: division, + competition: `${tournamentName} (${division})` + }); + } + } + }); + + return rounds; + }); + + const fixedRounds = tournamentRounds.map(round => { + let validDate = new Date(); + if (round.dateText) { + try { + const pdgaParsed = parseDate(round.dateText); + if (pdgaParsed instanceof Date && !isNaN(pdgaParsed.getTime())) { + validDate = pdgaParsed; + } else { + const nativeParsed = new Date(round.dateText); + if (!isNaN(nativeParsed.getTime())) { + validDate = nativeParsed; + } + } + } catch (e) { + console.log(`Date parsing failed for "${round.dateText}": ${e.message}`); + } + } + return { + tournament: round.tournament, + date: validDate, + rating: round.rating, + division: round.division, + competition: round.competition + }; + }); + tournamentRounds = fixedRounds; + + } catch (error) { + console.error('Error fetching tournament details:', error.message); + } finally { + await page.close(); + } + + return tournamentRounds; +} + +async function getNewTournamentRounds(browser, pdgaNumber, afterDate) { + const page = await browser.newPage(); + let newRounds = []; + + try { + const url = `https://www.pdga.com/player/${pdgaNumber}`; + await page.goto(url, { waitUntil: 'networkidle2' }); + + console.log(`Looking for tournaments after ${afterDate.toDateString()}...`); + + const newTournamentUrls = await page.evaluate((afterTimestamp) => { + const afterDate = new Date(afterTimestamp); + const tables = document.querySelectorAll('table[id*="player-results"]'); + const urls = []; + + tables.forEach(table => { + const rows = table.querySelectorAll('tbody tr'); + rows.forEach(row => { + const dateCell = row.querySelector('.dates'); + const tournamentCell = row.querySelector('.tournament a'); + + if (dateCell && tournamentCell) { + const dateText = dateCell.innerText.trim(); + const dateMatch = dateText.match(/\d{1,2}-[A-Za-z]{3}-\d{4}/); + + if (dateMatch) { + const dateStr = dateMatch[0]; + const date = new Date(dateStr); + + if (date > afterDate) { + const href = tournamentCell.getAttribute('href'); + if (href) { + urls.push({ + url: `https://www.pdga.com${href}`, + date: dateStr, + name: tournamentCell.innerText.trim() + }); + } + } + } + } + }); + }); + + return urls; + }, afterDate.getTime()); + + console.log(`Found ${newTournamentUrls.length} new tournaments after ${afterDate.toDateString()}`); + + for (const tournamentData of newTournamentUrls) { + try { + console.log(`Scraping new tournament: ${tournamentData.name} (${tournamentData.date})`); + + await page.goto(tournamentData.url, { waitUntil: 'domcontentloaded', timeout: 30000 }); + await page.waitForTimeout(500); + + const roundRatings = await page.evaluate((pdgaNum) => { + const rows = document.querySelectorAll('tr'); + + for (const row of rows) { + const cells = row.querySelectorAll('td'); + const hasPlayerNumber = Array.from(cells).some(cell => + cell.innerText && cell.innerText.includes(pdgaNum.toString()) + ); + + if (hasPlayerNumber) { + const roundRatingCells = row.querySelectorAll('td.round-rating'); + const ratings = []; + + roundRatingCells.forEach(cell => { + const rating = parseInt(cell.innerText.trim()); + if (!isNaN(rating) && rating > 0) { + ratings.push(rating); + } + }); + + return ratings; + } + } + + return []; + }, pdgaNumber); + + if (roundRatings.length > 0) { + const parsedDate = parseDate(tournamentData.date); + roundRatings.forEach(rating => { + newRounds.push({ + rating, + date: parsedDate, + competition: tournamentData.name + }); + }); + + console.log(`✓ Found ${roundRatings.length} round ratings for ${tournamentData.name}`); + } + + } catch (error) { + console.error(`Error scraping tournament ${tournamentData.name}:`, error.message); + } + } + + } catch (error) { + console.error(`Error getting new tournament rounds for PDGA ${pdgaNumber}:`, error); + } finally { + await page.close(); + } + + return newRounds; +} + +async function getOptimizedPlayerRounds(browser, pdgaNumber) { + console.log(`=== Optimized Round Collection for PDGA ${pdgaNumber} ===`); + + try { + console.log('Step 1: Getting official rating rounds from /details page...'); + const officialRounds = await getPlayerTournamentDetails(browser, pdgaNumber); + + if (officialRounds.length === 0) { + console.log('No official rounds found in details page'); + return []; + } + + console.log(`✓ Found ${officialRounds.length} official rating rounds`); + + const sortedRounds = officialRounds.sort((a, b) => b.date - a.date); + const latestOfficialDate = sortedRounds[0].date; + console.log(`Latest official round: ${latestOfficialDate.toDateString()}`); + + console.log('Step 2: Looking for NEW tournaments since latest official round...'); + const newRounds = await getNewTournamentRounds(browser, pdgaNumber, latestOfficialDate); + + if (newRounds.length > 0) { + console.log(`✓ Found ${newRounds.length} new round ratings`); + } else { + console.log('ℹ No new tournaments found since latest official round'); + } + + const allRounds = [ + ...officialRounds.map(round => ({ + rating: round.rating, + date: round.date, + competition: round.competition, + source: 'official' + })), + ...newRounds.map(round => ({ + rating: round.rating, + date: round.date, + competition: round.competition, + source: 'new' + })) + ]; + + allRounds.sort((a, b) => a.date - b.date); + + console.log(`=== Summary: ${officialRounds.length} official + ${newRounds.length} new = ${allRounds.length} total rounds ===`); + + return allRounds; + + } catch (error) { + console.error('Error in optimized round collection:', error.message); + return []; + } +} + +module.exports = { + getOfficialRatingHistory, + getPlayerTournamentDetails, + getNewTournamentRounds, + getOptimizedPlayerRounds +}; diff --git a/src/services/player-service.js b/src/services/player-service.js new file mode 100644 index 0000000..4a62188 --- /dev/null +++ b/src/services/player-service.js @@ -0,0 +1,275 @@ +const { db } = require('../db'); +const { getPlayerFromDB, getRoundHistoryFromDB, savePredictedRatingToDB, savePlayerToDB } = require('../models/player'); +const { fetchPlayerDataHTTP, parsePlayerData } = require('../scrapers/player-http'); +const { calculatePredictedRating } = require('./rating-calculator'); + +async function getPlayerDataFromDB(pdgaNumber) { + try { + const cachedPlayer = await getPlayerFromDB(pdgaNumber); + if (cachedPlayer) { + console.log(`Loading PDGA ${pdgaNumber} from DB (source of truth)`); + + let predictedRating = cachedPlayer.predicted_rating; + let stdDev = cachedPlayer.std_dev; + if (!predictedRating || predictedRating === 0) { + predictedRating = await getPredictedRatingFromDB(pdgaNumber); + const updatedPlayer = await getPlayerFromDB(pdgaNumber); + stdDev = updatedPlayer?.std_dev; + } + + return { + pdgaNumber: cachedPlayer.pdga_number, + name: cachedPlayer.name, + rating: cachedPlayer.current_rating, + ratingChange: cachedPlayer.rating_change, + predictedRating: predictedRating > 0 ? predictedRating : null, + stdDev: stdDev > 0 ? stdDev : null + }; + } + return null; + } catch (err) { + console.error(`Database error for PDGA ${pdgaNumber}:`, err.message); + return null; + } +} + +async function scrapePDGARating(pdgaNumber, retries = 3) { + console.log(`=== Refreshing PDGA ${pdgaNumber} from PDGA website ===`); + + for (let attempt = 1; attempt <= retries; attempt++) { + try { + console.log(`Attempt ${attempt}/${retries} for PDGA ${pdgaNumber} (using HTTP)`); + + const html = await fetchPlayerDataHTTP(pdgaNumber); + const result = parsePlayerData(html, pdgaNumber); + + try { + await savePlayerToDB(result); + console.log(`Saved PDGA ${pdgaNumber} to database`); + } catch (dbErr) { + console.error(`Failed to save PDGA ${pdgaNumber} to database:`, dbErr.message); + } + + console.log(`Successfully scraped PDGA ${pdgaNumber} on attempt ${attempt}`); + return result; + + } catch (error) { + console.error(`Attempt ${attempt}/${retries} failed for PDGA ${pdgaNumber}:`, error.message); + + if (attempt === retries) { + return { + pdgaNumber, + name: 'Error', + rating: 0, + ratingChange: null, + predictedRating: null + }; + } + + let retryDelay = 2000 * attempt; + + if (error.rateLimitInfo) { + const retryAfter = error.rateLimitInfo.headers['retry-after']; + if (retryAfter) { + retryDelay = Math.max(retryDelay, (parseInt(retryAfter) + 1) * 1000); + console.log(`Using Retry-After header: waiting ${retryDelay/1000}s`); + } + } + + if (error.code === 'ECONNRESET') { + retryDelay = Math.max(retryDelay, 10000); + console.log(`Connection reset detected: waiting ${retryDelay/1000}s`); + } + + await new Promise(resolve => setTimeout(resolve, retryDelay)); + } + } +} + +async function getPredictedRatingFromDB(pdgaNumber) { + try { + const roundHistory = await getRoundHistoryFromDB(pdgaNumber); + if (roundHistory.length > 0) { + console.log(`Using ${roundHistory.length} cached rounds for PDGA ${pdgaNumber} prediction`); + + const roundRatings = roundHistory.map(round => ({ + rating: round.rating, + date: new Date(round.date), + competition: round.competition_name || 'Unknown' + })); + + const result = calculatePredictedRating(roundRatings); + + await savePredictedRatingToDB(pdgaNumber, result.rating, result.stdDev); + + return result.rating; + } + return 0; + } catch (err) { + console.error(`Error getting predicted rating from DB for ${pdgaNumber}:`, err.message); + return 0; + } +} + +async function getAllRatingsFromDB(progressCallback = null) { + try { + const allPlayers = await new Promise((resolve, reject) => { + db.all( + 'SELECT pdga_number, name, current_rating, rating_change FROM players ORDER BY pdga_number', + [], + (err, rows) => { + if (err) reject(err); + else resolve(rows || []); + } + ); + }); + + console.log(`Loading ${allPlayers.length} players from database...`); + + const ratings = []; + const total = allPlayers.length; + + for (let i = 0; i < allPlayers.length; i++) { + const player = allPlayers[i]; + const pdgaNumber = player.pdga_number; + + if (progressCallback) { + progressCallback({ + current: i + 1, + total, + pdgaNumber, + status: 'loading' + }); + } + + try { + const playerData = await getPlayerDataFromDB(pdgaNumber); + + if (playerData) { + ratings.push(playerData); + } + + if (progressCallback) { + progressCallback({ + current: i + 1, + total, + pdgaNumber, + status: 'completed', + name: playerData ? playerData.name : player.name + }); + } + } catch (error) { + console.error(`Failed to load PDGA ${pdgaNumber} from database:`, error.message); + const errorData = { + pdgaNumber: parseInt(pdgaNumber), + name: player.name || 'Database Error', + rating: player.current_rating, + ratingChange: player.rating_change, + predictedRating: null + }; + ratings.push(errorData); + + if (progressCallback) { + progressCallback({ + current: i + 1, + total, + pdgaNumber, + status: 'error', + name: player.name || 'Database Error' + }); + } + } + } + + return ratings.sort((a, b) => (b.rating || 0) - (a.rating || 0)); + } catch (error) { + console.error('Error loading players from database:', error); + return []; + } +} + +async function refreshAllPlayersInDB(progressCallback = null) { + try { + const allPlayers = await new Promise((resolve, reject) => { + db.all( + 'SELECT pdga_number, name FROM players ORDER BY pdga_number', + [], + (err, rows) => { + if (err) reject(err); + else resolve(rows || []); + } + ); + }); + + console.log(`Refreshing ${allPlayers.length} players from database...`); + + const ratings = []; + const total = allPlayers.length; + + for (let i = 0; i < allPlayers.length; i++) { + const player = allPlayers[i]; + const pdgaNumber = player.pdga_number; + + console.log(`Refreshing PDGA ${pdgaNumber}... (${i + 1}/${total})`); + + if (progressCallback) { + progressCallback({ + current: i + 1, + total, + pdgaNumber, + status: 'loading' + }); + } + + try { + const playerData = await scrapePDGARating(pdgaNumber); + ratings.push(playerData); + + if (progressCallback) { + progressCallback({ + current: i + 1, + total, + pdgaNumber, + status: 'completed', + name: playerData.name + }); + } + + await new Promise(resolve => setTimeout(resolve, 2000)); + } catch (error) { + console.error(`Failed to refresh PDGA ${pdgaNumber}:`, error.message); + const errorData = { + pdgaNumber: parseInt(pdgaNumber), + name: player.name || 'Error', + rating: 0, + ratingChange: null, + predictedRating: null + }; + ratings.push(errorData); + + if (progressCallback) { + progressCallback({ + current: i + 1, + total, + pdgaNumber, + status: 'error', + name: player.name || 'Error' + }); + } + } + } + + return ratings.sort((a, b) => (b.rating || 0) - (a.rating || 0)); + } catch (error) { + console.error('Error refreshing all players:', error); + return []; + } +} + +module.exports = { + getPlayerDataFromDB, + scrapePDGARating, + getPredictedRatingFromDB, + getAllRatingsFromDB, + refreshAllPlayersInDB +}; diff --git a/src/services/rating-calculator.js b/src/services/rating-calculator.js new file mode 100644 index 0000000..f915102 --- /dev/null +++ b/src/services/rating-calculator.js @@ -0,0 +1,234 @@ +function parseDate(dateStr) { + const multiDayMatch = dateStr.match(/^(\d{1,2})(-([A-Za-z]{3}))?(\s+to\s+)(\d{1,2})-([A-Za-z]{3})-(\d{4})$/); + if (multiDayMatch) { + const day = parseInt(multiDayMatch[1]); + const month = multiDayMatch[3] || multiDayMatch[6]; + const year = parseInt(multiDayMatch[7]); + + const monthMap = { + 'Jan': 0, 'Feb': 1, 'Mar': 2, 'Apr': 3, 'May': 4, 'Jun': 5, + 'Jul': 6, 'Aug': 7, 'Sep': 8, 'Oct': 9, 'Nov': 10, 'Dec': 11 + }; + + return new Date(year, monthMap[month], day); + } + + const formats = [ + /^(\d{1,2})-([A-Za-z]{3})-(\d{4})$/, + /^(\d{1,2})\/(\d{1,2})\/(\d{4})$/ + ]; + + for (const format of formats) { + const match = dateStr.match(format); + if (match) { + if (format === formats[0]) { + const monthMap = { + 'Jan': 0, 'Feb': 1, 'Mar': 2, 'Apr': 3, 'May': 4, 'Jun': 5, + 'Jul': 6, 'Aug': 7, 'Sep': 8, 'Oct': 9, 'Nov': 10, 'Dec': 11 + }; + const day = parseInt(match[1]); + const month = monthMap[match[2]]; + const year = parseInt(match[3]); + return new Date(year, month, day); + } + } + } + + return new Date(dateStr); +} + +function getNextPDGAUpdateDate() { + const today = new Date(); + const currentMonth = today.getMonth(); + const currentYear = today.getFullYear(); + + const firstDayOfMonth = new Date(currentYear, currentMonth, 1); + const firstTuesday = new Date(firstDayOfMonth); + + const daysUntilTuesday = (2 - firstDayOfMonth.getDay() + 7) % 7; + firstTuesday.setDate(1 + daysUntilTuesday); + + const secondTuesday = new Date(firstTuesday); + secondTuesday.setDate(firstTuesday.getDate() + 7); + + if (today <= secondTuesday) { + return secondTuesday; + } else { + const nextMonth = currentMonth === 11 ? 0 : currentMonth + 1; + const nextYear = currentMonth === 11 ? currentYear + 1 : currentYear; + + const firstDayNextMonth = new Date(nextYear, nextMonth, 1); + const firstTuesdayNext = new Date(firstDayNextMonth); + + const daysUntilTuesdayNext = (2 - firstDayNextMonth.getDay() + 7) % 7; + firstTuesdayNext.setDate(1 + daysUntilTuesdayNext); + + const secondTuesdayNext = new Date(firstTuesdayNext); + secondTuesdayNext.setDate(firstTuesdayNext.getDate() + 7); + + return secondTuesdayNext; + } +} + +function calculateStandardDeviation(ratings) { + if (!ratings || ratings.length === 0) return 0; + + const mean = ratings.reduce((sum, r) => sum + r, 0) / ratings.length; + const variance = ratings.reduce((sum, r) => sum + Math.pow(r - mean, 2), 0) / ratings.length; + + return Math.sqrt(variance); +} + +function calculatePredictedRating(roundRatings) { + const debugLog = []; + debugLog.push('=== PDGA RATING CALCULATION (Following Official Rules) ==='); + + if (!roundRatings || roundRatings.length === 0) { + debugLog.push('❌ No rounds provided for prediction'); + return { rating: 0, debugLog }; + } + + debugLog.push(`📊 Starting with ${roundRatings.length} total rounds`); + + const nextUpdateDate = getNextPDGAUpdateDate(); + debugLog.push(`đŸŽ¯ PDGA Update Simulation: Next update date is ${nextUpdateDate.toDateString()}`); + debugLog.push(` Only including rounds played before ${nextUpdateDate.toDateString()}`); + + const allSortedRounds = roundRatings + .filter(r => r.rating > 0 && r.date < nextUpdateDate) + .sort((a, b) => b.date - a.date); + + if (allSortedRounds.length === 0) { + debugLog.push('❌ No valid rounds after filtering for update date'); + return { rating: 0, debugLog }; + } + + debugLog.push(`📊 After update date filter: ${allSortedRounds.length} rounds`); + + const twelveMonthsBeforeUpdate = new Date(nextUpdateDate); + twelveMonthsBeforeUpdate.setFullYear(twelveMonthsBeforeUpdate.getFullYear() - 1); + + const mostRecentDate = allSortedRounds[0].date; + debugLog.push(`📅 Most recent round: ${mostRecentDate.toDateString()}`); + debugLog.push(`📅 12-month cutoff: ${twelveMonthsBeforeUpdate.toDateString()} (1 year before update)`); + + let eligibleRounds = allSortedRounds.filter(r => r.date >= twelveMonthsBeforeUpdate); + + debugLog.push('đŸ—“ī¸ 12-MONTH FILTERING:'); + debugLog.push(`✅ Rounds in last 12 months: ${eligibleRounds.length}`); + + if (eligibleRounds.length < 8) { + const twentyFourMonthsBeforeUpdate = new Date(nextUpdateDate); + twentyFourMonthsBeforeUpdate.setFullYear(twentyFourMonthsBeforeUpdate.getFullYear() - 2); + + eligibleRounds = allSortedRounds.filter(r => r.date >= twentyFourMonthsBeforeUpdate); + debugLog.push(`âš ī¸ Extended to 24 months before update (${twentyFourMonthsBeforeUpdate.toDateString()}) - now ${eligibleRounds.length} rounds`); + } + + if (eligibleRounds.length === 0) { + debugLog.push('❌ No eligible rounds found'); + return { rating: 0, debugLog }; + } + + debugLog.push(`📈 ELIGIBLE ROUNDS: ${eligibleRounds.length}`); + eligibleRounds.forEach((round, index) => { + debugLog.push(` ${index + 1}. ${round.date.toDateString()}: ${round.rating} (${round.competition})`); + }); + + let workingRounds = [...eligibleRounds]; + let workingRatings = workingRounds.map(r => r.rating); + + if (workingRatings.length >= 7) { + debugLog.push('🔍 OUTLIER EXCLUSION (â‰Ĩ7 rounds available):'); + + const mean = workingRatings.reduce((sum, r) => sum + r, 0) / workingRatings.length; + const stdDev = calculateStandardDeviation(workingRatings); + + debugLog.push(` Mean: ${mean.toFixed(1)}`); + debugLog.push(` Std Dev: ${stdDev.toFixed(1)}`); + + const stdDevCutoff = mean - 2.5 * stdDev; + const hundredPointCutoff = mean - 100; + + debugLog.push(` 2.5΃ cutoff: ${stdDevCutoff.toFixed(1)}`); + debugLog.push(` 100-point cutoff: ${hundredPointCutoff.toFixed(1)}`); + + const filteredRatings = workingRatings.filter(rating => + rating >= stdDevCutoff && rating >= hundredPointCutoff + ); + + const stdDevOutliers = workingRatings.filter(rating => rating < stdDevCutoff); + const hundredPointOutliers = workingRatings.filter(rating => rating < hundredPointCutoff && rating >= stdDevCutoff); + + if (stdDevOutliers.length > 0) { + debugLog.push(` ❌ 2.5΃ outliers removed: ${stdDevOutliers.length} rounds`); + stdDevOutliers.forEach(rating => { + const round = workingRounds.find(r => r.rating === rating); + debugLog.push(` - ${rating} (${round.date.toDateString()}: ${round.competition})`); + }); + } + + if (hundredPointOutliers.length > 0) { + debugLog.push(` ❌ 100-point outliers removed: ${hundredPointOutliers.length} rounds`); + hundredPointOutliers.forEach(rating => { + const round = workingRounds.find(r => r.rating === rating); + debugLog.push(` - ${rating} (${round.date.toDateString()}: ${round.competition})`); + }); + } + + if (stdDevOutliers.length === 0 && hundredPointOutliers.length === 0) { + debugLog.push(` ✅ No outliers detected`); + } + + if (filteredRatings.length >= 4) { + workingRounds = workingRounds.filter(round => + round.rating >= stdDevCutoff && round.rating >= hundredPointCutoff + ); + workingRatings = filteredRatings; + debugLog.push(` ✅ Using ${filteredRatings.length} rounds after outlier removal`); + } else { + debugLog.push(` âš ī¸ Too few rounds after outlier removal (${filteredRatings.length}), keeping all rounds`); + } + } else { + debugLog.push(`â­ī¸ OUTLIER EXCLUSION SKIPPED (only ${workingRatings.length} rounds, need â‰Ĩ7)`); + } + + debugLog.push('âš–ī¸ WEIGHTING (Most recent 25% count double if â‰Ĩ9 rounds):'); + const weightedRatings = []; + + if (workingRatings.length >= 9) { + const recentCount = Math.round(workingRatings.length * 0.25); + debugLog.push(` ✅ Double-weighting most recent ${recentCount} rounds`); + + weightedRatings.push(...workingRatings); + + for (let i = 0; i < recentCount; i++) { + weightedRatings.push(workingRatings[i]); + const round = workingRounds[i]; + debugLog.push(` 2x weight: ${workingRatings[i]} (${round.date.toDateString()}: ${round.competition})`); + } + + debugLog.push(` 📊 Total values: ${workingRatings.length} + ${recentCount} double-weighted = ${weightedRatings.length}`); + } else { + debugLog.push(` âžĄī¸ No double weighting (${workingRatings.length} rounds, need â‰Ĩ9)`); + weightedRatings.push(...workingRatings); + } + + const sum = weightedRatings.reduce((sum, r) => sum + r, 0); + const average = sum / weightedRatings.length; + const finalRating = Math.round(average); + + const stdDev = calculateStandardDeviation(weightedRatings); + + debugLog.push('đŸŽ¯ FINAL CALCULATION:'); + debugLog.push(` Sum: ${sum}`); + debugLog.push(` Count: ${weightedRatings.length}`); + debugLog.push(` Average: ${average.toFixed(1)}`); + debugLog.push(` Standard Deviation: ${stdDev.toFixed(1)}`); + debugLog.push(` Final Rating: ${finalRating}`); + debugLog.push('=== END PDGA CALCULATION ==='); + + return { rating: finalRating, stdDev: Math.round(stdDev), debugLog }; +} + +module.exports = { parseDate, getNextPDGAUpdateDate, calculatePredictedRating, calculateStandardDeviation };