const express = require('express');
const puppeteer = require('puppeteer');
const https = require('https');
const fs = require('fs');
const path = require('path');
const app = express();
const PORT = 3000;
app.use(express.static('public'));
const cache = new Map();
const CACHE_DURATION = 24 * 60 * 60 * 1000;
async function fetchPlayerDataHTTP(pdgaNumber) {
return new Promise((resolve, reject) => {
const options = {
hostname: 'www.pdga.com',
port: 443,
path: `/player/${pdgaNumber}`,
method: 'GET',
headers: {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
},
timeout: 30000
};
const req = https.request(options, (res) => {
let data = '';
res.on('data', (chunk) => {
data += chunk;
});
res.on('end', () => {
if (res.statusCode === 200) {
resolve(data);
} else {
reject(new Error(`HTTP ${res.statusCode}`));
}
});
});
req.on('error', (error) => {
reject(error);
});
req.on('timeout', () => {
req.destroy();
reject(new Error('Request timeout'));
});
req.setTimeout(30000);
req.end();
});
}
function parsePlayerData(html, pdgaNumber) {
try {
// Extract player name from title
const nameMatch = html.match(/
([^<]+?)\s*\|\s*Professional Disc Golf Association/i);
const name = nameMatch ? nameMatch[1].trim() : 'Unknown';
// Extract current rating - account for HTML tags between "Current Rating:" and the number
const ratingMatch = html.match(/Current Rating:[^>]*>\s*(\d+)/i);
const rating = ratingMatch ? parseInt(ratingMatch[1]) : 0;
// Extract rating change - look for the +/- number in the rating context
const changeMatch = html.match(/Current Rating:[\s\S]*?([+\-]\d+)[\s\S]*?\(as of/i);
const ratingChange = changeMatch ? parseInt(changeMatch[1]) : null;
return {
pdgaNumber,
name: name.replace(/\s*#\d+$/, ''),
rating,
ratingChange,
predictedRating: null
};
} catch (error) {
console.error(`Error parsing data for PDGA ${pdgaNumber}:`, error.message);
return {
pdgaNumber,
name: 'Error',
rating: 0,
ratingChange: null,
predictedRating: null
};
}
}
async function scrapePDGARating(pdgaNumber, retries = 3) {
const cacheKey = `player-${pdgaNumber}`;
const cached = cache.get(cacheKey);
if (cached && Date.now() - cached.timestamp < CACHE_DURATION) {
console.log(`Using cached data for PDGA ${pdgaNumber}`);
return cached.data;
}
for (let attempt = 1; attempt <= retries; attempt++) {
try {
console.log(`Attempt ${attempt}/${retries} for PDGA ${pdgaNumber} (using HTTP)`);
const html = await fetchPlayerDataHTTP(pdgaNumber);
const result = parsePlayerData(html, pdgaNumber);
cache.set(cacheKey, {
data: result,
timestamp: Date.now()
});
console.log(`Successfully scraped PDGA ${pdgaNumber} on attempt ${attempt}`);
return result;
} catch (error) {
console.error(`Attempt ${attempt}/${retries} failed for PDGA ${pdgaNumber}:`, error.message);
if (attempt === retries) {
return {
pdgaNumber,
name: 'Error',
rating: 0,
ratingChange: null,
predictedRating: null
};
}
// Wait before retry
await new Promise(resolve => setTimeout(resolve, 2000 * attempt));
}
}
}
async function getPredictedRating(browser, pdgaNumber, retries = 2) {
for (let attempt = 1; attempt <= retries; attempt++) {
try {
console.log(`Predicted rating attempt ${attempt}/${retries} for PDGA ${pdgaNumber}`);
const roundRatings = await getPlayerCompetitionRatings(browser, pdgaNumber);
const predictedRating = calculatePredictedRating(roundRatings);
if (predictedRating > 0) {
return predictedRating;
}
if (attempt < retries) {
console.log(`No ratings found, waiting before retry...`);
await new Promise(resolve => setTimeout(resolve, 5000));
}
} catch (error) {
console.error(`Predicted rating attempt ${attempt}/${retries} failed for ${pdgaNumber}:`, error.message);
if (attempt < retries) {
await new Promise(resolve => setTimeout(resolve, 5000));
}
}
}
console.log(`All attempts failed for predicted rating of PDGA ${pdgaNumber}`);
return 0;
}
async function getPlayerCompetitionRatings(browser, pdgaNumber) {
const page = await browser.newPage();
let allRatings = [];
try {
const url = `https://www.pdga.com/player/${pdgaNumber}`;
await page.goto(url, { waitUntil: 'networkidle2' });
const tournamentUrls = await page.evaluate(() => {
const tables = document.querySelectorAll('table[id*="player-results"]');
const urls = [];
tables.forEach(table => {
const rows = table.querySelectorAll('tbody tr');
rows.forEach(row => {
const dateCell = row.querySelector('.dates');
const tournamentCell = row.querySelector('.tournament a');
if (dateCell && tournamentCell) {
const dateText = dateCell.innerText.trim();
const dateMatch = dateText.match(/\d{1,2}-[A-Za-z]{3}-\d{4}/);
if (dateMatch) {
const dateStr = dateMatch[0];
const date = new Date(dateStr);
const oneYearAgo = new Date();
oneYearAgo.setFullYear(oneYearAgo.getFullYear() - 1);
if (date > oneYearAgo) {
const href = tournamentCell.getAttribute('href');
if (href) {
urls.push({
url: `https://www.pdga.com${href}`,
date: dateStr
});
}
}
}
}
});
});
return urls.slice(0, 8); // Reduce number of tournaments to scrape
});
console.log(`Found ${tournamentUrls.length} recent tournaments for PDGA ${pdgaNumber}`);
for (const tournamentData of tournamentUrls) {
try {
await page.goto(tournamentData.url, { waitUntil: 'domcontentloaded', timeout: 45000 });
await page.waitForTimeout(3000); // Longer delay between requests
const roundRatings = await page.evaluate((pdgaNum) => {
const rows = document.querySelectorAll('tr');
for (const row of rows) {
const cells = row.querySelectorAll('td');
const hasPlayerNumber = Array.from(cells).some(cell =>
cell.innerText && cell.innerText.includes(pdgaNum.toString())
);
if (hasPlayerNumber) {
const roundRatingCells = row.querySelectorAll('td.round-rating');
const ratings = [];
roundRatingCells.forEach(cell => {
const rating = parseInt(cell.innerText.trim());
if (!isNaN(rating) && rating > 0) {
ratings.push(rating);
}
});
return ratings;
}
}
return [];
}, pdgaNumber);
if (roundRatings.length > 0) {
const parsedDate = parseDate(tournamentData.date);
roundRatings.forEach(rating => {
allRatings.push({
rating,
date: parsedDate
});
});
console.log(`Found ${roundRatings.length} round ratings for ${tournamentData.url}`);
}
} catch (error) {
console.error(`Error scraping tournament ${tournamentData.url}:`, error);
}
}
} catch (error) {
console.error(`Error getting competition ratings for PDGA ${pdgaNumber}:`, error);
} finally {
await page.close();
}
// Return all ratings from the last year (already filtered above)
return allRatings;
}
function parseDate(dateStr) {
const formats = [
/^(\d{1,2})-([A-Za-z]{3})-(\d{4})$/,
/^(\d{1,2})\/(\d{1,2})\/(\d{4})$/
];
for (const format of formats) {
const match = dateStr.match(format);
if (match) {
if (format === formats[0]) {
const monthMap = {
'Jan': 0, 'Feb': 1, 'Mar': 2, 'Apr': 3, 'May': 4, 'Jun': 5,
'Jul': 6, 'Aug': 7, 'Sep': 8, 'Oct': 9, 'Nov': 10, 'Dec': 11
};
const day = parseInt(match[1]);
const month = monthMap[match[2]];
const year = parseInt(match[3]);
return new Date(year, month, day);
}
}
}
return new Date(dateStr);
}
function calculatePredictedRating(roundRatings) {
if (!roundRatings || roundRatings.length === 0) return 0;
// Sort by date (most recent first) and extract ratings
const sortedRatings = roundRatings
.sort((a, b) => b.date - a.date)
.map(r => r.rating)
.filter(r => r > 0);
if (sortedRatings.length === 0) return 0;
// PDGA Rule: Need at least 7 rounds to apply outlier exclusion
if (sortedRatings.length >= 7) {
// Calculate standard deviation for outlier detection
const mean = sortedRatings.reduce((sum, r) => sum + r, 0) / sortedRatings.length;
const stdDev = calculateStandardDeviation(sortedRatings);
// PDGA Rule: Exclude rounds more than 2.5 standard deviations below average
const filteredRatings = sortedRatings.filter(rating =>
rating >= (mean - 2.5 * stdDev)
);
// Use filtered ratings if we still have enough data
if (filteredRatings.length >= 4) {
sortedRatings.splice(0, sortedRatings.length, ...filteredRatings);
}
}
// PDGA Rule: Most recent 25% of rounds count double if 9+ rounds
const weightedRatings = [];
if (sortedRatings.length >= 9) {
const recentCount = Math.round(sortedRatings.length * 0.25);
// Add all ratings once
weightedRatings.push(...sortedRatings);
// Add the most recent 25% again (double weight)
for (let i = 0; i < recentCount; i++) {
weightedRatings.push(sortedRatings[i]);
}
} else {
// If fewer than 9 rounds, no double weighting
weightedRatings.push(...sortedRatings);
}
// Calculate final average
const finalRating = weightedRatings.reduce((sum, r) => sum + r, 0) / weightedRatings.length;
return Math.round(finalRating);
}
function calculateStandardDeviation(ratings) {
if (!ratings || ratings.length === 0) return 0;
const mean = ratings.reduce((sum, r) => sum + r, 0) / ratings.length;
const variance = ratings.reduce((sum, r) => sum + Math.pow(r - mean, 2), 0) / ratings.length;
return Math.sqrt(variance);
}
async function getAllRatings(progressCallback = null) {
try {
const pdgaNumbers = fs.readFileSync('pdga-numbers.txt', 'utf-8')
.split('\n')
.map(num => num.trim())
.filter(num => num);
const ratings = [];
const total = pdgaNumbers.length;
for (let i = 0; i < pdgaNumbers.length; i++) {
const pdgaNumber = pdgaNumbers[i];
console.log(`Scraping PDGA ${pdgaNumber}... (${i + 1}/${total})`);
if (progressCallback) {
progressCallback({
current: i + 1,
total,
pdgaNumber,
status: 'loading'
});
}
try {
// Check if data is cached BEFORE scraping
const cacheKey = `player-${pdgaNumber}`;
const cached = cache.get(cacheKey);
const wasFromCache = cached && Date.now() - cached.timestamp < CACHE_DURATION;
const playerData = await scrapePDGARating(pdgaNumber);
ratings.push(playerData);
if (progressCallback) {
progressCallback({
current: i + 1,
total,
pdgaNumber,
status: 'completed',
name: playerData.name
});
}
if (!wasFromCache) {
// Delay only for fresh scrapes to avoid overwhelming the server
await new Promise(resolve => setTimeout(resolve, 1000));
}
} catch (error) {
console.error(`Failed to scrape PDGA ${pdgaNumber}:`, error.message);
const errorData = {
pdgaNumber,
name: 'Error',
rating: 0,
ratingChange: null,
predictedRating: null
};
ratings.push(errorData);
if (progressCallback) {
progressCallback({
current: i + 1,
total,
pdgaNumber,
status: 'error',
name: 'Error'
});
}
}
}
return ratings.sort((a, b) => b.rating - a.rating);
} catch (error) {
console.error('Error reading PDGA numbers:', error);
return [];
}
}
app.get('/', (req, res) => {
res.sendFile(path.join(__dirname, 'index.html'));
});
app.get('/api/ratings', async (req, res) => {
try {
const ratings = await getAllRatings();
res.json(ratings);
} catch (error) {
res.status(500).json({ error: 'Failed to fetch ratings' });
}
});
app.get('/api/ratings/progress', (req, res) => {
res.writeHead(200, {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Access-Control-Allow-Origin': '*',
'Access-Control-Allow-Headers': 'Cache-Control'
});
const progressCallback = (progress) => {
res.write(`data: ${JSON.stringify(progress)}\n\n`);
};
getAllRatings(progressCallback).then(ratings => {
res.write(`data: ${JSON.stringify({ status: 'complete', ratings })}\n\n`);
res.end();
}).catch(error => {
res.write(`data: ${JSON.stringify({ status: 'error', error: error.message })}\n\n`);
res.end();
});
req.on('close', () => {
res.end();
});
});
async function fetchRatingHistory(pdgaNumber) {
return new Promise((resolve, reject) => {
const options = {
hostname: 'www.pdga.com',
port: 443,
path: `/player/${pdgaNumber}/history`,
method: 'GET',
headers: {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
},
timeout: 30000
};
const req = https.request(options, (res) => {
let data = '';
res.on('data', (chunk) => {
data += chunk;
});
res.on('end', () => {
if (res.statusCode === 200) {
resolve(data);
} else {
reject(new Error(`HTTP ${res.statusCode}`));
}
});
});
req.on('error', (error) => {
reject(error);
});
req.on('timeout', () => {
req.destroy();
reject(new Error('Request timeout'));
});
req.setTimeout(30000);
req.end();
});
}
function parseRatingHistory(html) {
const history = [];
// Find all table rows with rating data
const rowMatches = html.match(/]*>[\s\S]*?<\/tr>/gi);
if (rowMatches) {
for (const row of rowMatches) {
// Skip header rows and empty rows
if (row.includes('| ]*>(.*?)<\/td>/gi);
if (cellMatches && cellMatches.length >= 2) {
const dateText = cellMatches[0].replace(/<[^>]*>/g, '').trim();
const ratingText = cellMatches[1].replace(/<[^>]*>/g, '').trim();
// Parse date (DD-Mon-YYYY format)
const dateMatch = dateText.match(/(\d{1,2})-([A-Za-z]{3})-(\d{4})/);
if (dateMatch && !isNaN(parseInt(ratingText))) {
const [, day, month, year] = dateMatch;
const monthMap = {
'Jan': 0, 'Feb': 1, 'Mar': 2, 'Apr': 3, 'May': 4, 'Jun': 5,
'Jul': 6, 'Aug': 7, 'Sep': 8, 'Oct': 9, 'Nov': 10, 'Dec': 11
};
const date = new Date(parseInt(year), monthMap[month], parseInt(day));
history.push({
date: date.toISOString().split('T')[0], // YYYY-MM-DD format
rating: parseInt(ratingText),
displayDate: dateText
});
}
}
}
}
// Sort by date (oldest first for chart display)
return history.sort((a, b) => new Date(a.date) - new Date(b.date));
}
app.get('/api/rating-history/:pdgaNumber', async (req, res) => {
try {
const { pdgaNumber } = req.params;
const cacheKey = `history-${pdgaNumber}`;
const cached = cache.get(cacheKey);
// Check cache first (24 hour cache for rating history)
if (cached && Date.now() - cached.timestamp < CACHE_DURATION) {
console.log(`Using cached rating history for PDGA ${pdgaNumber}`);
res.json({
pdgaNumber: parseInt(pdgaNumber),
history: cached.data
});
return;
}
console.log(`Fetching rating history for PDGA ${pdgaNumber}...`);
const html = await fetchRatingHistory(pdgaNumber);
const history = parseRatingHistory(html);
// Cache the result
cache.set(cacheKey, {
data: history,
timestamp: Date.now()
});
res.json({
pdgaNumber: parseInt(pdgaNumber),
history
});
} catch (error) {
console.error('Error fetching rating history:', error.message);
res.status(500).json({ error: 'Failed to fetch rating history' });
}
});
app.post('/api/clear-cache', (req, res) => {
try {
const cacheSize = cache.size;
cache.clear();
console.log(`Cache cleared - removed ${cacheSize} entries`);
res.json({
success: true,
message: `Cache cleared - ${cacheSize} entries removed`
});
} catch (error) {
console.error('Error clearing cache:', error);
res.status(500).json({ error: 'Failed to clear cache' });
}
});
app.post('/api/predicted-rating/:pdgaNumber', async (req, res) => {
let browser = null;
try {
const { pdgaNumber } = req.params;
const cacheKey = `predicted-${pdgaNumber}`;
const cached = cache.get(cacheKey);
// Check cache first (24 hour cache for predicted ratings)
if (cached && Date.now() - cached.timestamp < CACHE_DURATION) {
console.log(`Using cached predicted rating for PDGA ${pdgaNumber}`);
res.json({
pdgaNumber: parseInt(pdgaNumber),
predictedRating: cached.data
});
return;
}
browser = await puppeteer.launch({
headless: "new",
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-accelerated-2d-canvas',
'--no-first-run',
'--no-zygote',
'--disable-gpu'
]
});
console.log(`Calculating predicted rating for PDGA ${pdgaNumber}...`);
const predictedRating = await getPredictedRating(browser, pdgaNumber);
await browser.close();
browser = null;
// Cache the result
cache.set(cacheKey, {
data: predictedRating,
timestamp: Date.now()
});
res.json({
pdgaNumber: parseInt(pdgaNumber),
predictedRating
});
} catch (error) {
console.error('Error calculating predicted rating:', error.message || error);
if (browser) {
try {
await browser.close();
} catch (closeError) {
console.error('Error closing browser:', closeError.message);
}
}
res.status(500).json({ error: 'Failed to calculate predicted rating' });
}
});
app.listen(PORT, () => {
console.log(`PDGA Ratings app running on http://localhost:${PORT}`);
}); |