const express = require('express'); const puppeteer = require('puppeteer'); const https = require('https'); const fs = require('fs'); const path = require('path'); const app = express(); const PORT = 3000; app.use(express.static('public')); const cache = new Map(); const CACHE_DURATION = 24 * 60 * 60 * 1000; async function fetchPlayerDataHTTP(pdgaNumber) { return new Promise((resolve, reject) => { const options = { hostname: 'www.pdga.com', port: 443, path: `/player/${pdgaNumber}`, method: 'GET', headers: { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36' }, timeout: 30000 }; const req = https.request(options, (res) => { let data = ''; res.on('data', (chunk) => { data += chunk; }); res.on('end', () => { if (res.statusCode === 200) { resolve(data); } else { reject(new Error(`HTTP ${res.statusCode}`)); } }); }); req.on('error', (error) => { reject(error); }); req.on('timeout', () => { req.destroy(); reject(new Error('Request timeout')); }); req.setTimeout(30000); req.end(); }); } function parsePlayerData(html, pdgaNumber) { try { // Extract player name from title const nameMatch = html.match(/([^<]+?)\s*\|\s*Professional Disc Golf Association/i); const name = nameMatch ? nameMatch[1].trim() : 'Unknown'; // Extract current rating - account for HTML tags between "Current Rating:" and the number const ratingMatch = html.match(/Current Rating:[^>]*>\s*(\d+)/i); const rating = ratingMatch ? parseInt(ratingMatch[1]) : 0; // Extract rating change - look for the +/- number in the rating context const changeMatch = html.match(/Current Rating:[\s\S]*?([+\-]\d+)[\s\S]*?\(as of/i); const ratingChange = changeMatch ? parseInt(changeMatch[1]) : null; return { pdgaNumber, name: name.replace(/\s*#\d+$/, ''), rating, ratingChange, predictedRating: null }; } catch (error) { console.error(`Error parsing data for PDGA ${pdgaNumber}:`, error.message); return { pdgaNumber, name: 'Error', rating: 0, ratingChange: null, predictedRating: null }; } } async function scrapePDGARating(pdgaNumber, retries = 3) { const cacheKey = `player-${pdgaNumber}`; const cached = cache.get(cacheKey); if (cached && Date.now() - cached.timestamp < CACHE_DURATION) { console.log(`Using cached data for PDGA ${pdgaNumber}`); return cached.data; } for (let attempt = 1; attempt <= retries; attempt++) { try { console.log(`Attempt ${attempt}/${retries} for PDGA ${pdgaNumber} (using HTTP)`); const html = await fetchPlayerDataHTTP(pdgaNumber); const result = parsePlayerData(html, pdgaNumber); cache.set(cacheKey, { data: result, timestamp: Date.now() }); console.log(`Successfully scraped PDGA ${pdgaNumber} on attempt ${attempt}`); return result; } catch (error) { console.error(`Attempt ${attempt}/${retries} failed for PDGA ${pdgaNumber}:`, error.message); if (attempt === retries) { return { pdgaNumber, name: 'Error', rating: 0, ratingChange: null, predictedRating: null }; } // Wait before retry await new Promise(resolve => setTimeout(resolve, 2000 * attempt)); } } } async function getPredictedRating(browser, pdgaNumber) { try { const roundRatings = await getPlayerCompetitionRatings(browser, pdgaNumber); return calculatePredictedRating(roundRatings); } catch (error) { console.error(`Error getting predicted rating for ${pdgaNumber}:`, error); return 0; } } async function getPlayerCompetitionRatings(browser, pdgaNumber) { const page = await browser.newPage(); let allRatings = []; try { const url = `https://www.pdga.com/player/${pdgaNumber}`; await page.goto(url, { waitUntil: 'networkidle2' }); const tournamentUrls = await page.evaluate(() => { const tables = document.querySelectorAll('table[id*="player-results"]'); const urls = []; tables.forEach(table => { const rows = table.querySelectorAll('tbody tr'); rows.forEach(row => { const dateCell = row.querySelector('.dates'); const tournamentCell = row.querySelector('.tournament a'); if (dateCell && tournamentCell) { const dateText = dateCell.innerText.trim(); const dateMatch = dateText.match(/\d{1,2}-[A-Za-z]{3}-\d{4}/); if (dateMatch) { const dateStr = dateMatch[0]; const date = new Date(dateStr); const twoYearsAgo = new Date(); twoYearsAgo.setFullYear(twoYearsAgo.getFullYear() - 2); if (date > twoYearsAgo) { const href = tournamentCell.getAttribute('href'); if (href) { urls.push({ url: `https://www.pdga.com${href}`, date: dateStr }); } } } } }); }); return urls.slice(0, 15); }); console.log(`Found ${tournamentUrls.length} recent tournaments for PDGA ${pdgaNumber}`); for (const tournamentData of tournamentUrls) { try { await page.goto(tournamentData.url, { waitUntil: 'networkidle2' }); await page.waitForTimeout(2000); const roundRatings = await page.evaluate((pdgaNum) => { const rows = document.querySelectorAll('tr'); for (const row of rows) { const cells = row.querySelectorAll('td'); const hasPlayerNumber = Array.from(cells).some(cell => cell.innerText && cell.innerText.includes(pdgaNum.toString()) ); if (hasPlayerNumber) { const roundRatingCells = row.querySelectorAll('td.round-rating'); const ratings = []; roundRatingCells.forEach(cell => { const rating = parseInt(cell.innerText.trim()); if (!isNaN(rating) && rating > 0) { ratings.push(rating); } }); return ratings; } } return []; }, pdgaNumber); if (roundRatings.length > 0) { const parsedDate = parseDate(tournamentData.date); roundRatings.forEach(rating => { allRatings.push({ rating, date: parsedDate }); }); console.log(`Found ${roundRatings.length} round ratings for ${tournamentData.url}`); } } catch (error) { console.error(`Error scraping tournament ${tournamentData.url}:`, error); } } } catch (error) { console.error(`Error getting competition ratings for PDGA ${pdgaNumber}:`, error); } finally { await page.close(); } const oneYearAgo = new Date(); oneYearAgo.setFullYear(oneYearAgo.getFullYear() - 1); const recentRatings = allRatings.filter(r => r.date > oneYearAgo); return recentRatings.length > 8 ? recentRatings : allRatings; } function parseDate(dateStr) { const formats = [ /^(\d{1,2})-([A-Za-z]{3})-(\d{4})$/, /^(\d{1,2})\/(\d{1,2})\/(\d{4})$/ ]; for (const format of formats) { const match = dateStr.match(format); if (match) { if (format === formats[0]) { const monthMap = { 'Jan': 0, 'Feb': 1, 'Mar': 2, 'Apr': 3, 'May': 4, 'Jun': 5, 'Jul': 6, 'Aug': 7, 'Sep': 8, 'Oct': 9, 'Nov': 10, 'Dec': 11 }; const day = parseInt(match[1]); const month = monthMap[match[2]]; const year = parseInt(match[3]); return new Date(year, month, day); } } } return new Date(dateStr); } function calculatePredictedRating(roundRatings) { if (!roundRatings || roundRatings.length === 0) return 0; const ratings = roundRatings .sort((a, b) => b.date - a.date) .map(r => r.rating); const weightedRatings = []; const oneFourth = ratings.length > 9 ? Math.round(ratings.length * 0.25) : -1; for (let i = 0; i < ratings.length; i++) { const rating = ratings[i]; weightedRatings.push(rating); if (i < oneFourth) { weightedRatings.push(rating); } } const validRatings = weightedRatings.filter(r => r > 0); if (validRatings.length === 0) return 0; const mean = validRatings.reduce((sum, r) => sum + r, 0) / validRatings.length; const stdDev = calculateStandardDeviation(ratings); const deviation = Math.min(stdDev * 2.5, 100); const filteredRatings = validRatings.filter(rating => Math.abs(mean - rating) < deviation); if (filteredRatings.length === 0) return Math.round(mean); return Math.round(filteredRatings.reduce((sum, r) => sum + r, 0) / filteredRatings.length); } function calculateStandardDeviation(ratings) { if (!ratings || ratings.length === 0) return 0; const mean = ratings.reduce((sum, r) => sum + r, 0) / ratings.length; const variance = ratings.reduce((sum, r) => sum + Math.pow(r - mean, 2), 0) / ratings.length; return Math.sqrt(variance); } async function getAllRatings(progressCallback = null) { try { const pdgaNumbers = fs.readFileSync('pdga-numbers.txt', 'utf-8') .split('\n') .map(num => num.trim()) .filter(num => num); const ratings = []; const total = pdgaNumbers.length; for (let i = 0; i < pdgaNumbers.length; i++) { const pdgaNumber = pdgaNumbers[i]; console.log(`Scraping PDGA ${pdgaNumber}... (${i + 1}/${total})`); if (progressCallback) { progressCallback({ current: i + 1, total, pdgaNumber, status: 'loading' }); } try { const playerData = await scrapePDGARating(pdgaNumber); ratings.push(playerData); if (progressCallback) { progressCallback({ current: i + 1, total, pdgaNumber, status: 'completed', name: playerData.name }); } // Longer delay to avoid overwhelming the server await new Promise(resolve => setTimeout(resolve, 1000)); } catch (error) { console.error(`Failed to scrape PDGA ${pdgaNumber}:`, error.message); const errorData = { pdgaNumber, name: 'Error', rating: 0, ratingChange: null, predictedRating: null }; ratings.push(errorData); if (progressCallback) { progressCallback({ current: i + 1, total, pdgaNumber, status: 'error', name: 'Error' }); } } } return ratings.sort((a, b) => b.rating - a.rating); } catch (error) { console.error('Error reading PDGA numbers:', error); return []; } } app.get('/', (req, res) => { res.sendFile(path.join(__dirname, 'index.html')); }); app.get('/api/ratings', async (req, res) => { try { const ratings = await getAllRatings(); res.json(ratings); } catch (error) { res.status(500).json({ error: 'Failed to fetch ratings' }); } }); app.get('/api/ratings/progress', (req, res) => { res.writeHead(200, { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache', 'Connection': 'keep-alive', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Headers': 'Cache-Control' }); const progressCallback = (progress) => { res.write(`data: ${JSON.stringify(progress)}\n\n`); }; getAllRatings(progressCallback).then(ratings => { res.write(`data: ${JSON.stringify({ status: 'complete', ratings })}\n\n`); res.end(); }).catch(error => { res.write(`data: ${JSON.stringify({ status: 'error', error: error.message })}\n\n`); res.end(); }); req.on('close', () => { res.end(); }); }); async function fetchRatingHistory(pdgaNumber) { return new Promise((resolve, reject) => { const options = { hostname: 'www.pdga.com', port: 443, path: `/player/${pdgaNumber}/history`, method: 'GET', headers: { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36' }, timeout: 30000 }; const req = https.request(options, (res) => { let data = ''; res.on('data', (chunk) => { data += chunk; }); res.on('end', () => { if (res.statusCode === 200) { resolve(data); } else { reject(new Error(`HTTP ${res.statusCode}`)); } }); }); req.on('error', (error) => { reject(error); }); req.on('timeout', () => { req.destroy(); reject(new Error('Request timeout')); }); req.setTimeout(30000); req.end(); }); } function parseRatingHistory(html) { const history = []; // Find all table rows with rating data const rowMatches = html.match(/<tr[^>]*>[\s\S]*?<\/tr>/gi); if (rowMatches) { for (const row of rowMatches) { // Skip header rows and empty rows if (row.includes('<th') || !row.includes('<td')) continue; // Extract date, rating, and rounds from table cells const cellMatches = row.match(/<td[^>]*>(.*?)<\/td>/gi); if (cellMatches && cellMatches.length >= 2) { const dateText = cellMatches[0].replace(/<[^>]*>/g, '').trim(); const ratingText = cellMatches[1].replace(/<[^>]*>/g, '').trim(); // Parse date (DD-Mon-YYYY format) const dateMatch = dateText.match(/(\d{1,2})-([A-Za-z]{3})-(\d{4})/); if (dateMatch && !isNaN(parseInt(ratingText))) { const [, day, month, year] = dateMatch; const monthMap = { 'Jan': 0, 'Feb': 1, 'Mar': 2, 'Apr': 3, 'May': 4, 'Jun': 5, 'Jul': 6, 'Aug': 7, 'Sep': 8, 'Oct': 9, 'Nov': 10, 'Dec': 11 }; const date = new Date(parseInt(year), monthMap[month], parseInt(day)); history.push({ date: date.toISOString().split('T')[0], // YYYY-MM-DD format rating: parseInt(ratingText), displayDate: dateText }); } } } } // Sort by date (oldest first for chart display) return history.sort((a, b) => new Date(a.date) - new Date(b.date)); } app.get('/api/rating-history/:pdgaNumber', async (req, res) => { try { const { pdgaNumber } = req.params; console.log(`Fetching rating history for PDGA ${pdgaNumber}...`); const html = await fetchRatingHistory(pdgaNumber); const history = parseRatingHistory(html); res.json({ pdgaNumber: parseInt(pdgaNumber), history }); } catch (error) { console.error('Error fetching rating history:', error.message); res.status(500).json({ error: 'Failed to fetch rating history' }); } }); app.post('/api/predicted-rating/:pdgaNumber', async (req, res) => { let browser = null; try { const { pdgaNumber } = req.params; browser = await puppeteer.launch({ headless: "new", args: [ '--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-accelerated-2d-canvas', '--no-first-run', '--no-zygote', '--disable-gpu' ] }); console.log(`Calculating predicted rating for PDGA ${pdgaNumber}...`); const predictedRating = await getPredictedRating(browser, pdgaNumber); await browser.close(); browser = null; res.json({ pdgaNumber: parseInt(pdgaNumber), predictedRating }); } catch (error) { console.error('Error calculating predicted rating:', error.message || error); if (browser) { try { await browser.close(); } catch (closeError) { console.error('Error closing browser:', closeError.message); } } res.status(500).json({ error: 'Failed to calculate predicted rating' }); } }); app.listen(PORT, () => { console.log(`PDGA Ratings app running on http://localhost:${PORT}`); });