const express = require('express'); const puppeteer = require('puppeteer'); const https = require('https'); const fs = require('fs'); const path = require('path'); const app = express(); const PORT = 3000; app.use(express.static('public')); const cache = new Map(); const CACHE_DURATION = 24 * 60 * 60 * 1000; async function fetchPlayerDataHTTP(pdgaNumber) { return new Promise((resolve, reject) => { const options = { hostname: 'www.pdga.com', port: 443, path: `/player/${pdgaNumber}`, method: 'GET', headers: { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36' }, timeout: 30000 }; const req = https.request(options, (res) => { let data = ''; res.on('data', (chunk) => { data += chunk; }); res.on('end', () => { if (res.statusCode === 200) { resolve(data); } else { reject(new Error(`HTTP ${res.statusCode}`)); } }); }); req.on('error', (error) => { reject(error); }); req.on('timeout', () => { req.destroy(); reject(new Error('Request timeout')); }); req.setTimeout(30000); req.end(); }); } function parsePlayerData(html, pdgaNumber) { try { // Extract player name from title const nameMatch = html.match(/([^<]+?)\s*\|\s*Professional Disc Golf Association/i); const name = nameMatch ? nameMatch[1].trim() : 'Unknown'; // Extract current rating - account for HTML tags between "Current Rating:" and the number const ratingMatch = html.match(/Current Rating:[^>]*>\s*(\d+)/i); const rating = ratingMatch ? parseInt(ratingMatch[1]) : 0; // Extract rating change - look for the +/- number in the rating context const changeMatch = html.match(/Current Rating:[\s\S]*?([+\-]\d+)[\s\S]*?\(as of/i); const ratingChange = changeMatch ? parseInt(changeMatch[1]) : null; return { pdgaNumber, name: name.replace(/\s*#\d+$/, ''), rating, ratingChange, predictedRating: null }; } catch (error) { console.error(`Error parsing data for PDGA ${pdgaNumber}:`, error.message); return { pdgaNumber, name: 'Error', rating: 0, ratingChange: null, predictedRating: null }; } } async function scrapePDGARating(pdgaNumber, retries = 3) { const cacheKey = `player-${pdgaNumber}`; const cached = cache.get(cacheKey); if (cached && Date.now() - cached.timestamp < CACHE_DURATION) { console.log(`Using cached data for PDGA ${pdgaNumber}`); return cached.data; } for (let attempt = 1; attempt <= retries; attempt++) { try { console.log(`Attempt ${attempt}/${retries} for PDGA ${pdgaNumber} (using HTTP)`); const html = await fetchPlayerDataHTTP(pdgaNumber); const result = parsePlayerData(html, pdgaNumber); cache.set(cacheKey, { data: result, timestamp: Date.now() }); console.log(`Successfully scraped PDGA ${pdgaNumber} on attempt ${attempt}`); return result; } catch (error) { console.error(`Attempt ${attempt}/${retries} failed for PDGA ${pdgaNumber}:`, error.message); if (attempt === retries) { return { pdgaNumber, name: 'Error', rating: 0, ratingChange: null, predictedRating: null }; } // Wait before retry await new Promise(resolve => setTimeout(resolve, 2000 * attempt)); } } } async function getPredictedRating(browser, pdgaNumber, retries = 2) { for (let attempt = 1; attempt <= retries; attempt++) { try { console.log(`Predicted rating attempt ${attempt}/${retries} for PDGA ${pdgaNumber}`); const roundRatings = await getPlayerCompetitionRatings(browser, pdgaNumber); const predictedRating = calculatePredictedRating(roundRatings); if (predictedRating > 0) { return predictedRating; } if (attempt < retries) { console.log(`No ratings found, waiting before retry...`); await new Promise(resolve => setTimeout(resolve, 5000)); } } catch (error) { console.error(`Predicted rating attempt ${attempt}/${retries} failed for ${pdgaNumber}:`, error.message); if (attempt < retries) { await new Promise(resolve => setTimeout(resolve, 5000)); } } } console.log(`All attempts failed for predicted rating of PDGA ${pdgaNumber}`); return 0; } async function getPlayerCompetitionRatings(browser, pdgaNumber) { const page = await browser.newPage(); let allRatings = []; try { const url = `https://www.pdga.com/player/${pdgaNumber}`; await page.goto(url, { waitUntil: 'networkidle2' }); const tournamentUrls = await page.evaluate(() => { const tables = document.querySelectorAll('table[id*="player-results"]'); const urls = []; tables.forEach(table => { const rows = table.querySelectorAll('tbody tr'); rows.forEach(row => { const dateCell = row.querySelector('.dates'); const tournamentCell = row.querySelector('.tournament a'); if (dateCell && tournamentCell) { const dateText = dateCell.innerText.trim(); const dateMatch = dateText.match(/\d{1,2}-[A-Za-z]{3}-\d{4}/); if (dateMatch) { const dateStr = dateMatch[0]; const date = new Date(dateStr); const oneYearAgo = new Date(); oneYearAgo.setFullYear(oneYearAgo.getFullYear() - 1); if (date > oneYearAgo) { const href = tournamentCell.getAttribute('href'); if (href) { urls.push({ url: `https://www.pdga.com${href}`, date: dateStr }); } } } } }); }); return urls.slice(0, 8); // Reduce number of tournaments to scrape }); console.log(`Found ${tournamentUrls.length} recent tournaments for PDGA ${pdgaNumber}`); for (const tournamentData of tournamentUrls) { try { await page.goto(tournamentData.url, { waitUntil: 'domcontentloaded', timeout: 45000 }); await page.waitForTimeout(3000); // Longer delay between requests const roundRatings = await page.evaluate((pdgaNum) => { const rows = document.querySelectorAll('tr'); for (const row of rows) { const cells = row.querySelectorAll('td'); const hasPlayerNumber = Array.from(cells).some(cell => cell.innerText && cell.innerText.includes(pdgaNum.toString()) ); if (hasPlayerNumber) { const roundRatingCells = row.querySelectorAll('td.round-rating'); const ratings = []; roundRatingCells.forEach(cell => { const rating = parseInt(cell.innerText.trim()); if (!isNaN(rating) && rating > 0) { ratings.push(rating); } }); return ratings; } } return []; }, pdgaNumber); if (roundRatings.length > 0) { const parsedDate = parseDate(tournamentData.date); roundRatings.forEach(rating => { allRatings.push({ rating, date: parsedDate }); }); console.log(`Found ${roundRatings.length} round ratings for ${tournamentData.url}`); } } catch (error) { console.error(`Error scraping tournament ${tournamentData.url}:`, error); } } } catch (error) { console.error(`Error getting competition ratings for PDGA ${pdgaNumber}:`, error); } finally { await page.close(); } // Return all ratings from the last year (already filtered above) return allRatings; } function parseDate(dateStr) { const formats = [ /^(\d{1,2})-([A-Za-z]{3})-(\d{4})$/, /^(\d{1,2})\/(\d{1,2})\/(\d{4})$/ ]; for (const format of formats) { const match = dateStr.match(format); if (match) { if (format === formats[0]) { const monthMap = { 'Jan': 0, 'Feb': 1, 'Mar': 2, 'Apr': 3, 'May': 4, 'Jun': 5, 'Jul': 6, 'Aug': 7, 'Sep': 8, 'Oct': 9, 'Nov': 10, 'Dec': 11 }; const day = parseInt(match[1]); const month = monthMap[match[2]]; const year = parseInt(match[3]); return new Date(year, month, day); } } } return new Date(dateStr); } function calculatePredictedRating(roundRatings) { if (!roundRatings || roundRatings.length === 0) return 0; // Sort by date (most recent first) and extract ratings const sortedRatings = roundRatings .sort((a, b) => b.date - a.date) .map(r => r.rating) .filter(r => r > 0); if (sortedRatings.length === 0) return 0; // PDGA Rule: Need at least 7 rounds to apply outlier exclusion if (sortedRatings.length >= 7) { // Calculate standard deviation for outlier detection const mean = sortedRatings.reduce((sum, r) => sum + r, 0) / sortedRatings.length; const stdDev = calculateStandardDeviation(sortedRatings); // PDGA Rule: Exclude rounds more than 2.5 standard deviations below average const filteredRatings = sortedRatings.filter(rating => rating >= (mean - 2.5 * stdDev) ); // Use filtered ratings if we still have enough data if (filteredRatings.length >= 4) { sortedRatings.splice(0, sortedRatings.length, ...filteredRatings); } } // PDGA Rule: Most recent 25% of rounds count double if 9+ rounds const weightedRatings = []; if (sortedRatings.length >= 9) { const recentCount = Math.round(sortedRatings.length * 0.25); // Add all ratings once weightedRatings.push(...sortedRatings); // Add the most recent 25% again (double weight) for (let i = 0; i < recentCount; i++) { weightedRatings.push(sortedRatings[i]); } } else { // If fewer than 9 rounds, no double weighting weightedRatings.push(...sortedRatings); } // Calculate final average const finalRating = weightedRatings.reduce((sum, r) => sum + r, 0) / weightedRatings.length; return Math.round(finalRating); } function calculateStandardDeviation(ratings) { if (!ratings || ratings.length === 0) return 0; const mean = ratings.reduce((sum, r) => sum + r, 0) / ratings.length; const variance = ratings.reduce((sum, r) => sum + Math.pow(r - mean, 2), 0) / ratings.length; return Math.sqrt(variance); } async function getAllRatings(progressCallback = null) { try { const pdgaNumbers = fs.readFileSync('pdga-numbers.txt', 'utf-8') .split('\n') .map(num => num.trim()) .filter(num => num); const ratings = []; const total = pdgaNumbers.length; for (let i = 0; i < pdgaNumbers.length; i++) { const pdgaNumber = pdgaNumbers[i]; console.log(`Scraping PDGA ${pdgaNumber}... (${i + 1}/${total})`); if (progressCallback) { progressCallback({ current: i + 1, total, pdgaNumber, status: 'loading' }); } try { // Check if data is cached BEFORE scraping const cacheKey = `player-${pdgaNumber}`; const cached = cache.get(cacheKey); const wasFromCache = cached && Date.now() - cached.timestamp < CACHE_DURATION; const playerData = await scrapePDGARating(pdgaNumber); ratings.push(playerData); if (progressCallback) { progressCallback({ current: i + 1, total, pdgaNumber, status: 'completed', name: playerData.name }); } if (!wasFromCache) { // Delay only for fresh scrapes to avoid overwhelming the server await new Promise(resolve => setTimeout(resolve, 1000)); } } catch (error) { console.error(`Failed to scrape PDGA ${pdgaNumber}:`, error.message); const errorData = { pdgaNumber, name: 'Error', rating: 0, ratingChange: null, predictedRating: null }; ratings.push(errorData); if (progressCallback) { progressCallback({ current: i + 1, total, pdgaNumber, status: 'error', name: 'Error' }); } } } return ratings.sort((a, b) => b.rating - a.rating); } catch (error) { console.error('Error reading PDGA numbers:', error); return []; } } app.get('/', (req, res) => { res.sendFile(path.join(__dirname, 'index.html')); }); app.get('/api/ratings', async (req, res) => { try { const ratings = await getAllRatings(); res.json(ratings); } catch (error) { res.status(500).json({ error: 'Failed to fetch ratings' }); } }); app.get('/api/ratings/progress', (req, res) => { res.writeHead(200, { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache', 'Connection': 'keep-alive', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Headers': 'Cache-Control' }); const progressCallback = (progress) => { res.write(`data: ${JSON.stringify(progress)}\n\n`); }; getAllRatings(progressCallback).then(ratings => { res.write(`data: ${JSON.stringify({ status: 'complete', ratings })}\n\n`); res.end(); }).catch(error => { res.write(`data: ${JSON.stringify({ status: 'error', error: error.message })}\n\n`); res.end(); }); req.on('close', () => { res.end(); }); }); async function fetchRatingHistory(pdgaNumber) { return new Promise((resolve, reject) => { const options = { hostname: 'www.pdga.com', port: 443, path: `/player/${pdgaNumber}/history`, method: 'GET', headers: { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36' }, timeout: 30000 }; const req = https.request(options, (res) => { let data = ''; res.on('data', (chunk) => { data += chunk; }); res.on('end', () => { if (res.statusCode === 200) { resolve(data); } else { reject(new Error(`HTTP ${res.statusCode}`)); } }); }); req.on('error', (error) => { reject(error); }); req.on('timeout', () => { req.destroy(); reject(new Error('Request timeout')); }); req.setTimeout(30000); req.end(); }); } function parseRatingHistory(html) { const history = []; // Find all table rows with rating data const rowMatches = html.match(/<tr[^>]*>[\s\S]*?<\/tr>/gi); if (rowMatches) { for (const row of rowMatches) { // Skip header rows and empty rows if (row.includes('<th') || !row.includes('<td')) continue; // Extract date, rating, and rounds from table cells const cellMatches = row.match(/<td[^>]*>(.*?)<\/td>/gi); if (cellMatches && cellMatches.length >= 2) { const dateText = cellMatches[0].replace(/<[^>]*>/g, '').trim(); const ratingText = cellMatches[1].replace(/<[^>]*>/g, '').trim(); // Parse date (DD-Mon-YYYY format) const dateMatch = dateText.match(/(\d{1,2})-([A-Za-z]{3})-(\d{4})/); if (dateMatch && !isNaN(parseInt(ratingText))) { const [, day, month, year] = dateMatch; const monthMap = { 'Jan': 0, 'Feb': 1, 'Mar': 2, 'Apr': 3, 'May': 4, 'Jun': 5, 'Jul': 6, 'Aug': 7, 'Sep': 8, 'Oct': 9, 'Nov': 10, 'Dec': 11 }; const date = new Date(parseInt(year), monthMap[month], parseInt(day)); history.push({ date: date.toISOString().split('T')[0], // YYYY-MM-DD format rating: parseInt(ratingText), displayDate: dateText }); } } } } // Sort by date (oldest first for chart display) return history.sort((a, b) => new Date(a.date) - new Date(b.date)); } app.get('/api/rating-history/:pdgaNumber', async (req, res) => { try { const { pdgaNumber } = req.params; const cacheKey = `history-${pdgaNumber}`; const cached = cache.get(cacheKey); // Check cache first (24 hour cache for rating history) if (cached && Date.now() - cached.timestamp < CACHE_DURATION) { console.log(`Using cached rating history for PDGA ${pdgaNumber}`); res.json({ pdgaNumber: parseInt(pdgaNumber), history: cached.data }); return; } console.log(`Fetching rating history for PDGA ${pdgaNumber}...`); const html = await fetchRatingHistory(pdgaNumber); const history = parseRatingHistory(html); // Cache the result cache.set(cacheKey, { data: history, timestamp: Date.now() }); res.json({ pdgaNumber: parseInt(pdgaNumber), history }); } catch (error) { console.error('Error fetching rating history:', error.message); res.status(500).json({ error: 'Failed to fetch rating history' }); } }); app.post('/api/clear-cache', (req, res) => { try { const cacheSize = cache.size; cache.clear(); console.log(`Cache cleared - removed ${cacheSize} entries`); res.json({ success: true, message: `Cache cleared - ${cacheSize} entries removed` }); } catch (error) { console.error('Error clearing cache:', error); res.status(500).json({ error: 'Failed to clear cache' }); } }); app.post('/api/predicted-rating/:pdgaNumber', async (req, res) => { let browser = null; try { const { pdgaNumber } = req.params; const cacheKey = `predicted-${pdgaNumber}`; const cached = cache.get(cacheKey); // Check cache first (24 hour cache for predicted ratings) if (cached && Date.now() - cached.timestamp < CACHE_DURATION) { console.log(`Using cached predicted rating for PDGA ${pdgaNumber}`); res.json({ pdgaNumber: parseInt(pdgaNumber), predictedRating: cached.data }); return; } browser = await puppeteer.launch({ headless: "new", args: [ '--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-accelerated-2d-canvas', '--no-first-run', '--no-zygote', '--disable-gpu' ] }); console.log(`Calculating predicted rating for PDGA ${pdgaNumber}...`); const predictedRating = await getPredictedRating(browser, pdgaNumber); await browser.close(); browser = null; // Cache the result cache.set(cacheKey, { data: predictedRating, timestamp: Date.now() }); res.json({ pdgaNumber: parseInt(pdgaNumber), predictedRating }); } catch (error) { console.error('Error calculating predicted rating:', error.message || error); if (browser) { try { await browser.close(); } catch (closeError) { console.error('Error closing browser:', closeError.message); } } res.status(500).json({ error: 'Failed to calculate predicted rating' }); } }); app.listen(PORT, () => { console.log(`PDGA Ratings app running on http://localhost:${PORT}`); });