55188a8269
- Update predicted rating algorithm to match PDGA rating guide - Focus on tournaments from last 12 months only (improved accuracy) - Add proper outlier exclusion: rounds >2.5 std dev below average - Implement double weighting for most recent 25% of rounds (9+ rounds) - Apply PDGA minimum data requirements (7 rounds for outlier exclusion) - Improve error handling and rate limiting for tournament scraping - Add user-friendly error messages for failed calculations - Reduce tournament scraping from 15 to 8 tournaments to avoid rate limits 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
640 lines
19 KiB
JavaScript
640 lines
19 KiB
JavaScript
const express = require('express');
|
|
const puppeteer = require('puppeteer');
|
|
const https = require('https');
|
|
const fs = require('fs');
|
|
const path = require('path');
|
|
|
|
const app = express();
|
|
const PORT = 3000;
|
|
|
|
app.use(express.static('public'));
|
|
|
|
const cache = new Map();
|
|
const CACHE_DURATION = 24 * 60 * 60 * 1000;
|
|
|
|
async function fetchPlayerDataHTTP(pdgaNumber) {
|
|
return new Promise((resolve, reject) => {
|
|
const options = {
|
|
hostname: 'www.pdga.com',
|
|
port: 443,
|
|
path: `/player/${pdgaNumber}`,
|
|
method: 'GET',
|
|
headers: {
|
|
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
|
|
},
|
|
timeout: 30000
|
|
};
|
|
|
|
const req = https.request(options, (res) => {
|
|
let data = '';
|
|
res.on('data', (chunk) => {
|
|
data += chunk;
|
|
});
|
|
|
|
res.on('end', () => {
|
|
if (res.statusCode === 200) {
|
|
resolve(data);
|
|
} else {
|
|
reject(new Error(`HTTP ${res.statusCode}`));
|
|
}
|
|
});
|
|
});
|
|
|
|
req.on('error', (error) => {
|
|
reject(error);
|
|
});
|
|
|
|
req.on('timeout', () => {
|
|
req.destroy();
|
|
reject(new Error('Request timeout'));
|
|
});
|
|
|
|
req.setTimeout(30000);
|
|
req.end();
|
|
});
|
|
}
|
|
|
|
function parsePlayerData(html, pdgaNumber) {
|
|
try {
|
|
// Extract player name from title
|
|
const nameMatch = html.match(/<title>([^<]+?)\s*\|\s*Professional Disc Golf Association/i);
|
|
const name = nameMatch ? nameMatch[1].trim() : 'Unknown';
|
|
|
|
// Extract current rating - account for HTML tags between "Current Rating:" and the number
|
|
const ratingMatch = html.match(/Current Rating:[^>]*>\s*(\d+)/i);
|
|
const rating = ratingMatch ? parseInt(ratingMatch[1]) : 0;
|
|
|
|
// Extract rating change - look for the +/- number in the rating context
|
|
const changeMatch = html.match(/Current Rating:[\s\S]*?([+\-]\d+)[\s\S]*?\(as of/i);
|
|
const ratingChange = changeMatch ? parseInt(changeMatch[1]) : null;
|
|
|
|
return {
|
|
pdgaNumber,
|
|
name: name.replace(/\s*#\d+$/, ''),
|
|
rating,
|
|
ratingChange,
|
|
predictedRating: null
|
|
};
|
|
} catch (error) {
|
|
console.error(`Error parsing data for PDGA ${pdgaNumber}:`, error.message);
|
|
return {
|
|
pdgaNumber,
|
|
name: 'Error',
|
|
rating: 0,
|
|
ratingChange: null,
|
|
predictedRating: null
|
|
};
|
|
}
|
|
}
|
|
|
|
async function scrapePDGARating(pdgaNumber, retries = 3) {
|
|
const cacheKey = `player-${pdgaNumber}`;
|
|
const cached = cache.get(cacheKey);
|
|
|
|
if (cached && Date.now() - cached.timestamp < CACHE_DURATION) {
|
|
console.log(`Using cached data for PDGA ${pdgaNumber}`);
|
|
return cached.data;
|
|
}
|
|
|
|
for (let attempt = 1; attempt <= retries; attempt++) {
|
|
try {
|
|
console.log(`Attempt ${attempt}/${retries} for PDGA ${pdgaNumber} (using HTTP)`);
|
|
|
|
const html = await fetchPlayerDataHTTP(pdgaNumber);
|
|
const result = parsePlayerData(html, pdgaNumber);
|
|
|
|
cache.set(cacheKey, {
|
|
data: result,
|
|
timestamp: Date.now()
|
|
});
|
|
|
|
console.log(`Successfully scraped PDGA ${pdgaNumber} on attempt ${attempt}`);
|
|
return result;
|
|
|
|
} catch (error) {
|
|
console.error(`Attempt ${attempt}/${retries} failed for PDGA ${pdgaNumber}:`, error.message);
|
|
|
|
if (attempt === retries) {
|
|
return {
|
|
pdgaNumber,
|
|
name: 'Error',
|
|
rating: 0,
|
|
ratingChange: null,
|
|
predictedRating: null
|
|
};
|
|
}
|
|
|
|
// Wait before retry
|
|
await new Promise(resolve => setTimeout(resolve, 2000 * attempt));
|
|
}
|
|
}
|
|
}
|
|
|
|
async function getPredictedRating(browser, pdgaNumber, retries = 2) {
|
|
for (let attempt = 1; attempt <= retries; attempt++) {
|
|
try {
|
|
console.log(`Predicted rating attempt ${attempt}/${retries} for PDGA ${pdgaNumber}`);
|
|
const roundRatings = await getPlayerCompetitionRatings(browser, pdgaNumber);
|
|
const predictedRating = calculatePredictedRating(roundRatings);
|
|
|
|
if (predictedRating > 0) {
|
|
return predictedRating;
|
|
}
|
|
|
|
if (attempt < retries) {
|
|
console.log(`No ratings found, waiting before retry...`);
|
|
await new Promise(resolve => setTimeout(resolve, 5000));
|
|
}
|
|
} catch (error) {
|
|
console.error(`Predicted rating attempt ${attempt}/${retries} failed for ${pdgaNumber}:`, error.message);
|
|
if (attempt < retries) {
|
|
await new Promise(resolve => setTimeout(resolve, 5000));
|
|
}
|
|
}
|
|
}
|
|
|
|
console.log(`All attempts failed for predicted rating of PDGA ${pdgaNumber}`);
|
|
return 0;
|
|
}
|
|
|
|
async function getPlayerCompetitionRatings(browser, pdgaNumber) {
|
|
const page = await browser.newPage();
|
|
let allRatings = [];
|
|
|
|
try {
|
|
const url = `https://www.pdga.com/player/${pdgaNumber}`;
|
|
await page.goto(url, { waitUntil: 'networkidle2' });
|
|
|
|
const tournamentUrls = await page.evaluate(() => {
|
|
const tables = document.querySelectorAll('table[id*="player-results"]');
|
|
const urls = [];
|
|
|
|
tables.forEach(table => {
|
|
const rows = table.querySelectorAll('tbody tr');
|
|
rows.forEach(row => {
|
|
const dateCell = row.querySelector('.dates');
|
|
const tournamentCell = row.querySelector('.tournament a');
|
|
|
|
if (dateCell && tournamentCell) {
|
|
const dateText = dateCell.innerText.trim();
|
|
const dateMatch = dateText.match(/\d{1,2}-[A-Za-z]{3}-\d{4}/);
|
|
|
|
if (dateMatch) {
|
|
const dateStr = dateMatch[0];
|
|
const date = new Date(dateStr);
|
|
const oneYearAgo = new Date();
|
|
oneYearAgo.setFullYear(oneYearAgo.getFullYear() - 1);
|
|
|
|
if (date > oneYearAgo) {
|
|
const href = tournamentCell.getAttribute('href');
|
|
if (href) {
|
|
urls.push({
|
|
url: `https://www.pdga.com${href}`,
|
|
date: dateStr
|
|
});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
});
|
|
});
|
|
|
|
return urls.slice(0, 8); // Reduce number of tournaments to scrape
|
|
});
|
|
|
|
console.log(`Found ${tournamentUrls.length} recent tournaments for PDGA ${pdgaNumber}`);
|
|
|
|
for (const tournamentData of tournamentUrls) {
|
|
try {
|
|
await page.goto(tournamentData.url, { waitUntil: 'domcontentloaded', timeout: 45000 });
|
|
await page.waitForTimeout(3000); // Longer delay between requests
|
|
|
|
const roundRatings = await page.evaluate((pdgaNum) => {
|
|
const rows = document.querySelectorAll('tr');
|
|
|
|
for (const row of rows) {
|
|
const cells = row.querySelectorAll('td');
|
|
const hasPlayerNumber = Array.from(cells).some(cell =>
|
|
cell.innerText && cell.innerText.includes(pdgaNum.toString())
|
|
);
|
|
|
|
if (hasPlayerNumber) {
|
|
const roundRatingCells = row.querySelectorAll('td.round-rating');
|
|
const ratings = [];
|
|
|
|
roundRatingCells.forEach(cell => {
|
|
const rating = parseInt(cell.innerText.trim());
|
|
if (!isNaN(rating) && rating > 0) {
|
|
ratings.push(rating);
|
|
}
|
|
});
|
|
|
|
return ratings;
|
|
}
|
|
}
|
|
|
|
return [];
|
|
}, pdgaNumber);
|
|
|
|
if (roundRatings.length > 0) {
|
|
const parsedDate = parseDate(tournamentData.date);
|
|
roundRatings.forEach(rating => {
|
|
allRatings.push({
|
|
rating,
|
|
date: parsedDate
|
|
});
|
|
});
|
|
console.log(`Found ${roundRatings.length} round ratings for ${tournamentData.url}`);
|
|
}
|
|
|
|
} catch (error) {
|
|
console.error(`Error scraping tournament ${tournamentData.url}:`, error);
|
|
}
|
|
}
|
|
|
|
} catch (error) {
|
|
console.error(`Error getting competition ratings for PDGA ${pdgaNumber}:`, error);
|
|
} finally {
|
|
await page.close();
|
|
}
|
|
|
|
// Return all ratings from the last year (already filtered above)
|
|
return allRatings;
|
|
}
|
|
|
|
function parseDate(dateStr) {
|
|
const formats = [
|
|
/^(\d{1,2})-([A-Za-z]{3})-(\d{4})$/,
|
|
/^(\d{1,2})\/(\d{1,2})\/(\d{4})$/
|
|
];
|
|
|
|
for (const format of formats) {
|
|
const match = dateStr.match(format);
|
|
if (match) {
|
|
if (format === formats[0]) {
|
|
const monthMap = {
|
|
'Jan': 0, 'Feb': 1, 'Mar': 2, 'Apr': 3, 'May': 4, 'Jun': 5,
|
|
'Jul': 6, 'Aug': 7, 'Sep': 8, 'Oct': 9, 'Nov': 10, 'Dec': 11
|
|
};
|
|
const day = parseInt(match[1]);
|
|
const month = monthMap[match[2]];
|
|
const year = parseInt(match[3]);
|
|
return new Date(year, month, day);
|
|
}
|
|
}
|
|
}
|
|
|
|
return new Date(dateStr);
|
|
}
|
|
|
|
function calculatePredictedRating(roundRatings) {
|
|
if (!roundRatings || roundRatings.length === 0) return 0;
|
|
|
|
// Sort by date (most recent first) and extract ratings
|
|
const sortedRatings = roundRatings
|
|
.sort((a, b) => b.date - a.date)
|
|
.map(r => r.rating)
|
|
.filter(r => r > 0);
|
|
|
|
if (sortedRatings.length === 0) return 0;
|
|
|
|
// PDGA Rule: Need at least 7 rounds to apply outlier exclusion
|
|
if (sortedRatings.length >= 7) {
|
|
// Calculate standard deviation for outlier detection
|
|
const mean = sortedRatings.reduce((sum, r) => sum + r, 0) / sortedRatings.length;
|
|
const stdDev = calculateStandardDeviation(sortedRatings);
|
|
|
|
// PDGA Rule: Exclude rounds more than 2.5 standard deviations below average
|
|
const filteredRatings = sortedRatings.filter(rating =>
|
|
rating >= (mean - 2.5 * stdDev)
|
|
);
|
|
|
|
// Use filtered ratings if we still have enough data
|
|
if (filteredRatings.length >= 4) {
|
|
sortedRatings.splice(0, sortedRatings.length, ...filteredRatings);
|
|
}
|
|
}
|
|
|
|
// PDGA Rule: Most recent 25% of rounds count double if 9+ rounds
|
|
const weightedRatings = [];
|
|
if (sortedRatings.length >= 9) {
|
|
const recentCount = Math.round(sortedRatings.length * 0.25);
|
|
|
|
// Add all ratings once
|
|
weightedRatings.push(...sortedRatings);
|
|
|
|
// Add the most recent 25% again (double weight)
|
|
for (let i = 0; i < recentCount; i++) {
|
|
weightedRatings.push(sortedRatings[i]);
|
|
}
|
|
} else {
|
|
// If fewer than 9 rounds, no double weighting
|
|
weightedRatings.push(...sortedRatings);
|
|
}
|
|
|
|
// Calculate final average
|
|
const finalRating = weightedRatings.reduce((sum, r) => sum + r, 0) / weightedRatings.length;
|
|
|
|
return Math.round(finalRating);
|
|
}
|
|
|
|
function calculateStandardDeviation(ratings) {
|
|
if (!ratings || ratings.length === 0) return 0;
|
|
|
|
const mean = ratings.reduce((sum, r) => sum + r, 0) / ratings.length;
|
|
const variance = ratings.reduce((sum, r) => sum + Math.pow(r - mean, 2), 0) / ratings.length;
|
|
|
|
return Math.sqrt(variance);
|
|
}
|
|
|
|
async function getAllRatings(progressCallback = null) {
|
|
try {
|
|
const pdgaNumbers = fs.readFileSync('pdga-numbers.txt', 'utf-8')
|
|
.split('\n')
|
|
.map(num => num.trim())
|
|
.filter(num => num);
|
|
|
|
const ratings = [];
|
|
const total = pdgaNumbers.length;
|
|
|
|
for (let i = 0; i < pdgaNumbers.length; i++) {
|
|
const pdgaNumber = pdgaNumbers[i];
|
|
console.log(`Scraping PDGA ${pdgaNumber}... (${i + 1}/${total})`);
|
|
|
|
if (progressCallback) {
|
|
progressCallback({
|
|
current: i + 1,
|
|
total,
|
|
pdgaNumber,
|
|
status: 'loading'
|
|
});
|
|
}
|
|
|
|
try {
|
|
const playerData = await scrapePDGARating(pdgaNumber);
|
|
ratings.push(playerData);
|
|
|
|
if (progressCallback) {
|
|
progressCallback({
|
|
current: i + 1,
|
|
total,
|
|
pdgaNumber,
|
|
status: 'completed',
|
|
name: playerData.name
|
|
});
|
|
}
|
|
|
|
// Longer delay to avoid overwhelming the server
|
|
await new Promise(resolve => setTimeout(resolve, 1000));
|
|
} catch (error) {
|
|
console.error(`Failed to scrape PDGA ${pdgaNumber}:`, error.message);
|
|
const errorData = {
|
|
pdgaNumber,
|
|
name: 'Error',
|
|
rating: 0,
|
|
ratingChange: null,
|
|
predictedRating: null
|
|
};
|
|
ratings.push(errorData);
|
|
|
|
if (progressCallback) {
|
|
progressCallback({
|
|
current: i + 1,
|
|
total,
|
|
pdgaNumber,
|
|
status: 'error',
|
|
name: 'Error'
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
return ratings.sort((a, b) => b.rating - a.rating);
|
|
} catch (error) {
|
|
console.error('Error reading PDGA numbers:', error);
|
|
return [];
|
|
}
|
|
}
|
|
|
|
app.get('/', (req, res) => {
|
|
res.sendFile(path.join(__dirname, 'index.html'));
|
|
});
|
|
|
|
app.get('/api/ratings', async (req, res) => {
|
|
try {
|
|
const ratings = await getAllRatings();
|
|
res.json(ratings);
|
|
} catch (error) {
|
|
res.status(500).json({ error: 'Failed to fetch ratings' });
|
|
}
|
|
});
|
|
|
|
app.get('/api/ratings/progress', (req, res) => {
|
|
res.writeHead(200, {
|
|
'Content-Type': 'text/event-stream',
|
|
'Cache-Control': 'no-cache',
|
|
'Connection': 'keep-alive',
|
|
'Access-Control-Allow-Origin': '*',
|
|
'Access-Control-Allow-Headers': 'Cache-Control'
|
|
});
|
|
|
|
const progressCallback = (progress) => {
|
|
res.write(`data: ${JSON.stringify(progress)}\n\n`);
|
|
};
|
|
|
|
getAllRatings(progressCallback).then(ratings => {
|
|
res.write(`data: ${JSON.stringify({ status: 'complete', ratings })}\n\n`);
|
|
res.end();
|
|
}).catch(error => {
|
|
res.write(`data: ${JSON.stringify({ status: 'error', error: error.message })}\n\n`);
|
|
res.end();
|
|
});
|
|
|
|
req.on('close', () => {
|
|
res.end();
|
|
});
|
|
});
|
|
|
|
async function fetchRatingHistory(pdgaNumber) {
|
|
return new Promise((resolve, reject) => {
|
|
const options = {
|
|
hostname: 'www.pdga.com',
|
|
port: 443,
|
|
path: `/player/${pdgaNumber}/history`,
|
|
method: 'GET',
|
|
headers: {
|
|
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
|
|
},
|
|
timeout: 30000
|
|
};
|
|
|
|
const req = https.request(options, (res) => {
|
|
let data = '';
|
|
res.on('data', (chunk) => {
|
|
data += chunk;
|
|
});
|
|
|
|
res.on('end', () => {
|
|
if (res.statusCode === 200) {
|
|
resolve(data);
|
|
} else {
|
|
reject(new Error(`HTTP ${res.statusCode}`));
|
|
}
|
|
});
|
|
});
|
|
|
|
req.on('error', (error) => {
|
|
reject(error);
|
|
});
|
|
|
|
req.on('timeout', () => {
|
|
req.destroy();
|
|
reject(new Error('Request timeout'));
|
|
});
|
|
|
|
req.setTimeout(30000);
|
|
req.end();
|
|
});
|
|
}
|
|
|
|
function parseRatingHistory(html) {
|
|
const history = [];
|
|
|
|
// Find all table rows with rating data
|
|
const rowMatches = html.match(/<tr[^>]*>[\s\S]*?<\/tr>/gi);
|
|
|
|
if (rowMatches) {
|
|
for (const row of rowMatches) {
|
|
// Skip header rows and empty rows
|
|
if (row.includes('<th') || !row.includes('<td')) continue;
|
|
|
|
// Extract date, rating, and rounds from table cells
|
|
const cellMatches = row.match(/<td[^>]*>(.*?)<\/td>/gi);
|
|
|
|
if (cellMatches && cellMatches.length >= 2) {
|
|
const dateText = cellMatches[0].replace(/<[^>]*>/g, '').trim();
|
|
const ratingText = cellMatches[1].replace(/<[^>]*>/g, '').trim();
|
|
|
|
// Parse date (DD-Mon-YYYY format)
|
|
const dateMatch = dateText.match(/(\d{1,2})-([A-Za-z]{3})-(\d{4})/);
|
|
if (dateMatch && !isNaN(parseInt(ratingText))) {
|
|
const [, day, month, year] = dateMatch;
|
|
const monthMap = {
|
|
'Jan': 0, 'Feb': 1, 'Mar': 2, 'Apr': 3, 'May': 4, 'Jun': 5,
|
|
'Jul': 6, 'Aug': 7, 'Sep': 8, 'Oct': 9, 'Nov': 10, 'Dec': 11
|
|
};
|
|
|
|
const date = new Date(parseInt(year), monthMap[month], parseInt(day));
|
|
|
|
history.push({
|
|
date: date.toISOString().split('T')[0], // YYYY-MM-DD format
|
|
rating: parseInt(ratingText),
|
|
displayDate: dateText
|
|
});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Sort by date (oldest first for chart display)
|
|
return history.sort((a, b) => new Date(a.date) - new Date(b.date));
|
|
}
|
|
|
|
app.get('/api/rating-history/:pdgaNumber', async (req, res) => {
|
|
try {
|
|
const { pdgaNumber } = req.params;
|
|
const cacheKey = `history-${pdgaNumber}`;
|
|
const cached = cache.get(cacheKey);
|
|
|
|
// Check cache first (24 hour cache for rating history)
|
|
if (cached && Date.now() - cached.timestamp < CACHE_DURATION) {
|
|
console.log(`Using cached rating history for PDGA ${pdgaNumber}`);
|
|
res.json({
|
|
pdgaNumber: parseInt(pdgaNumber),
|
|
history: cached.data
|
|
});
|
|
return;
|
|
}
|
|
|
|
console.log(`Fetching rating history for PDGA ${pdgaNumber}...`);
|
|
const html = await fetchRatingHistory(pdgaNumber);
|
|
const history = parseRatingHistory(html);
|
|
|
|
// Cache the result
|
|
cache.set(cacheKey, {
|
|
data: history,
|
|
timestamp: Date.now()
|
|
});
|
|
|
|
res.json({
|
|
pdgaNumber: parseInt(pdgaNumber),
|
|
history
|
|
});
|
|
} catch (error) {
|
|
console.error('Error fetching rating history:', error.message);
|
|
res.status(500).json({ error: 'Failed to fetch rating history' });
|
|
}
|
|
});
|
|
|
|
app.post('/api/predicted-rating/:pdgaNumber', async (req, res) => {
|
|
let browser = null;
|
|
try {
|
|
const { pdgaNumber } = req.params;
|
|
const cacheKey = `predicted-${pdgaNumber}`;
|
|
const cached = cache.get(cacheKey);
|
|
|
|
// Check cache first (24 hour cache for predicted ratings)
|
|
if (cached && Date.now() - cached.timestamp < CACHE_DURATION) {
|
|
console.log(`Using cached predicted rating for PDGA ${pdgaNumber}`);
|
|
res.json({
|
|
pdgaNumber: parseInt(pdgaNumber),
|
|
predictedRating: cached.data
|
|
});
|
|
return;
|
|
}
|
|
|
|
browser = await puppeteer.launch({
|
|
headless: "new",
|
|
args: [
|
|
'--no-sandbox',
|
|
'--disable-setuid-sandbox',
|
|
'--disable-dev-shm-usage',
|
|
'--disable-accelerated-2d-canvas',
|
|
'--no-first-run',
|
|
'--no-zygote',
|
|
'--disable-gpu'
|
|
]
|
|
});
|
|
|
|
console.log(`Calculating predicted rating for PDGA ${pdgaNumber}...`);
|
|
const predictedRating = await getPredictedRating(browser, pdgaNumber);
|
|
|
|
await browser.close();
|
|
browser = null;
|
|
|
|
// Cache the result
|
|
cache.set(cacheKey, {
|
|
data: predictedRating,
|
|
timestamp: Date.now()
|
|
});
|
|
|
|
res.json({
|
|
pdgaNumber: parseInt(pdgaNumber),
|
|
predictedRating
|
|
});
|
|
} catch (error) {
|
|
console.error('Error calculating predicted rating:', error.message || error);
|
|
if (browser) {
|
|
try {
|
|
await browser.close();
|
|
} catch (closeError) {
|
|
console.error('Error closing browser:', closeError.message);
|
|
}
|
|
}
|
|
res.status(500).json({ error: 'Failed to calculate predicted rating' });
|
|
}
|
|
});
|
|
|
|
app.listen(PORT, () => {
|
|
console.log(`PDGA Ratings app running on http://localhost:${PORT}`);
|
|
}); |