Add course layouts scraping and rating calculation system
Features added: - Course directory scraping with pagination for Swedish courses - Layout scraping from course detail pages (AJAX tabs) - Event results scraping to calculate layout ratings - Mean rating calculation based on players who shot par - Last played date tracking for each layout (extracted from event pages) - Multi-event aggregation for accurate ratings across tournaments Database: - Added courses table (name, link, city, last_updated) - Added layouts table (name, par, mean_rating, rating_count, last_played) - Added database migrations for new columns - Foreign key relationship between courses and layouts API endpoints: - POST /api/scrape-courses - scrape course directory - POST /api/scrape-layouts/:courseId - scrape layouts and events (combined) - POST /api/scrape-all-layouts - bulk scrape all courses - POST /api/scrape-event-results/:courseId - process event results - GET /api/courses - fetch all courses - GET /api/layouts/:courseId - fetch layouts for course UI: - New courses.html page for course/layout management - Expandable course rows showing layouts - Display layout par, mean rating, and last played date - Layouts sorted by most recently played (newest first) - Individual and bulk scraping controls Technical details: - Date extraction using regex pattern matching from event pages - Proper detection of division results in details/table.results structure - Round score and rating extraction from td.round/td.round-rating pairs - Course location from td.views-field-field-course-location 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -11,7 +11,11 @@ const PORT = 3000;
|
||||
app.use(express.static('public'));
|
||||
|
||||
// Initialize SQLite database
|
||||
const db = new sqlite3.Database('./ratings.db');
|
||||
const dbPath = process.env.DB_PATH || './ratings.db';
|
||||
const db = new sqlite3.Database(dbPath);
|
||||
|
||||
// In-memory cache for layout-division-event mapping
|
||||
const layoutEventCache = new Map(); // key: courseId, value: array of {name, par, divisions, eventUrl}
|
||||
|
||||
// Initialize database schema
|
||||
function initializeDatabase() {
|
||||
@@ -73,7 +77,7 @@ function initializeDatabase() {
|
||||
)
|
||||
`);
|
||||
|
||||
// Create rating_history table
|
||||
// Create rating_history table
|
||||
db.run(`
|
||||
CREATE TABLE IF NOT EXISTS rating_history (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
@@ -82,12 +86,51 @@ function initializeDatabase() {
|
||||
rating INTEGER NOT NULL,
|
||||
FOREIGN KEY (player_id) REFERENCES players (id)
|
||||
)
|
||||
`);
|
||||
|
||||
// Create courses table
|
||||
db.run(`
|
||||
CREATE TABLE IF NOT EXISTS courses (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
name TEXT NOT NULL,
|
||||
link TEXT UNIQUE NOT NULL,
|
||||
city TEXT,
|
||||
last_updated DATETIME DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
`);
|
||||
|
||||
// Create layouts table
|
||||
db.run(`
|
||||
CREATE TABLE IF NOT EXISTS layouts (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
course_id INTEGER NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
par INTEGER NOT NULL,
|
||||
mean_rating INTEGER,
|
||||
rating_count INTEGER DEFAULT 0,
|
||||
last_calculated DATETIME,
|
||||
FOREIGN KEY (course_id) REFERENCES courses (id),
|
||||
UNIQUE(course_id, name, par)
|
||||
)
|
||||
`, (err) => {
|
||||
if (err) {
|
||||
reject(err);
|
||||
} else {
|
||||
console.log('Database initialized successfully');
|
||||
resolve();
|
||||
// Add missing columns if they don't exist (migration)
|
||||
db.run(`ALTER TABLE layouts ADD COLUMN mean_rating INTEGER`, () => {
|
||||
// Ignore error if column already exists
|
||||
db.run(`ALTER TABLE layouts ADD COLUMN rating_count INTEGER DEFAULT 0`, () => {
|
||||
// Ignore error if column already exists
|
||||
db.run(`ALTER TABLE layouts ADD COLUMN last_calculated DATETIME`, () => {
|
||||
// Ignore error if column already exists
|
||||
db.run(`ALTER TABLE layouts ADD COLUMN last_played DATE`, () => {
|
||||
// Ignore error if column already exists
|
||||
console.log('Database initialized successfully');
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
});
|
||||
});
|
||||
@@ -1419,6 +1462,431 @@ function calculateStandardDeviation(ratings) {
|
||||
return Math.sqrt(variance);
|
||||
}
|
||||
|
||||
// Database helper functions for courses and layouts
|
||||
function saveCourseToDB(courseData) {
|
||||
return new Promise((resolve, reject) => {
|
||||
db.run(
|
||||
`INSERT OR REPLACE INTO courses (name, link, city, last_updated)
|
||||
VALUES (?, ?, ?, datetime('now'))`,
|
||||
[courseData.name, courseData.link, courseData.city],
|
||||
function(err) {
|
||||
if (err) reject(err);
|
||||
else resolve(this.lastID);
|
||||
}
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
function getCourseFromDB(link) {
|
||||
return new Promise((resolve, reject) => {
|
||||
db.get(
|
||||
'SELECT * FROM courses WHERE link = ?',
|
||||
[link],
|
||||
(err, row) => {
|
||||
if (err) reject(err);
|
||||
else resolve(row);
|
||||
}
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
function getAllCoursesFromDB() {
|
||||
return new Promise((resolve, reject) => {
|
||||
db.all(
|
||||
'SELECT * FROM courses ORDER BY name ASC',
|
||||
[],
|
||||
(err, rows) => {
|
||||
if (err) reject(err);
|
||||
else resolve(rows);
|
||||
}
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
function saveLayoutToDB(courseId, layoutData) {
|
||||
return new Promise((resolve, reject) => {
|
||||
db.run(
|
||||
`INSERT OR IGNORE INTO layouts (course_id, name, par)
|
||||
VALUES (?, ?, ?)`,
|
||||
[courseId, layoutData.name, layoutData.par],
|
||||
function(err) {
|
||||
if (err) reject(err);
|
||||
else resolve(this.lastID);
|
||||
}
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
function getLayoutsForCourse(courseId) {
|
||||
return new Promise((resolve, reject) => {
|
||||
db.all(
|
||||
'SELECT * FROM layouts WHERE course_id = ? ORDER BY last_played DESC, name ASC',
|
||||
[courseId],
|
||||
(err, rows) => {
|
||||
if (err) reject(err);
|
||||
else resolve(rows);
|
||||
}
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
function updateLayoutRating(courseId, layoutName, par, meanRating, ratingCount, lastPlayed = null) {
|
||||
return new Promise((resolve, reject) => {
|
||||
db.run(
|
||||
`UPDATE layouts
|
||||
SET mean_rating = ?, rating_count = ?, last_calculated = datetime('now'), last_played = ?
|
||||
WHERE course_id = ? AND name = ? AND par = ?`,
|
||||
[meanRating, ratingCount, lastPlayed, courseId, layoutName, par],
|
||||
function(err) {
|
||||
if (err) reject(err);
|
||||
else resolve(this.changes);
|
||||
}
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
// Course scraping functions
|
||||
async function scrapeCourseDirectory(browser) {
|
||||
console.log('=== Scraping Swedish courses from PDGA course directory ===');
|
||||
const page = await browser.newPage();
|
||||
const allCourses = [];
|
||||
let pageNumber = 0;
|
||||
let hasMorePages = true;
|
||||
|
||||
try {
|
||||
while (hasMorePages) {
|
||||
const url = `https://www.pdga.com/course-directory/advanced?title=&field_course_location_country=SE&field_course_location_locality=&field_course_location_administrative_area=All&field_course_location_postal_code=&field_course_type_value=All&rating_value=All&field_course_holes_value=18-100&field_course_total_length_value=All&field_course_target_type_value=All&field_course_tee_type_value=All&field_location_type_value=All&field_course_camping_value=All&field_course_facilities_value=All&field_course_fees_value=All&field_course_handicap_value=All&field_course_private_value=All&field_course_signage_value=All&field_cart_friendly_value=All&page=${pageNumber}`;
|
||||
|
||||
console.log(`Scraping page ${pageNumber}...`);
|
||||
await page.goto(url, { waitUntil: 'networkidle2', timeout: 45000 });
|
||||
await page.waitForTimeout(1000);
|
||||
|
||||
// Extract course data
|
||||
const courses = await page.evaluate(() => {
|
||||
const courseData = [];
|
||||
const rows = document.querySelectorAll('table tbody tr');
|
||||
|
||||
rows.forEach(row => {
|
||||
const titleCell = row.querySelector('td.views-field-title');
|
||||
const locationCell = row.querySelector('td.views-field-field-course-location');
|
||||
|
||||
if (titleCell) {
|
||||
const link = titleCell.querySelector('a');
|
||||
if (link) {
|
||||
courseData.push({
|
||||
name: link.innerText.trim(),
|
||||
link: 'https://www.pdga.com' + link.getAttribute('href'),
|
||||
city: locationCell ? locationCell.innerText.trim() : 'Unknown'
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return courseData;
|
||||
});
|
||||
|
||||
if (courses.length === 0) {
|
||||
console.log(`No courses found on page ${pageNumber}, stopping pagination`);
|
||||
hasMorePages = false;
|
||||
} else {
|
||||
console.log(`Found ${courses.length} courses on page ${pageNumber}`);
|
||||
allCourses.push(...courses);
|
||||
|
||||
// Save courses to database
|
||||
for (const course of courses) {
|
||||
try {
|
||||
await saveCourseToDB(course);
|
||||
console.log(`✓ Saved course: ${course.name} (${course.city})`);
|
||||
} catch (err) {
|
||||
console.error(`Error saving course ${course.name}:`, err.message);
|
||||
}
|
||||
}
|
||||
|
||||
pageNumber++;
|
||||
|
||||
// Delay between pages to be respectful
|
||||
if (hasMorePages) {
|
||||
console.log('Waiting 2s before next page...');
|
||||
await new Promise(resolve => setTimeout(resolve, 2000));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`✓ Total courses scraped: ${allCourses.length} across ${pageNumber} pages`);
|
||||
|
||||
} catch (error) {
|
||||
console.error('Error scraping course directory:', error.message);
|
||||
} finally {
|
||||
await page.close();
|
||||
}
|
||||
|
||||
return allCourses;
|
||||
}
|
||||
|
||||
async function scrapeCourseLayouts(browser, courseLink, courseId) {
|
||||
const page = await browser.newPage();
|
||||
const layouts = [];
|
||||
|
||||
try {
|
||||
await page.goto(courseLink, { waitUntil: 'networkidle2', timeout: 45000 });
|
||||
await page.waitForTimeout(1000);
|
||||
|
||||
// Click on Layouts tab
|
||||
const layoutsTabClicked = await page.evaluate(() => {
|
||||
const selectors = [
|
||||
'a.quicktabs-tab-course_node-2',
|
||||
'li.quicktabs-tab-course_node-2 a',
|
||||
'a[href*="layouts"]',
|
||||
'.quicktabs-tabs a',
|
||||
'ul.quicktabs-tabs a',
|
||||
'.quicktabs-wrapper a'
|
||||
];
|
||||
|
||||
for (const selector of selectors) {
|
||||
const tabs = document.querySelectorAll(selector);
|
||||
for (const tab of tabs) {
|
||||
const text = tab.innerText?.trim();
|
||||
if (text && (text.includes('Layouts') || text.includes('Layout'))) {
|
||||
tab.click();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
});
|
||||
|
||||
if (layoutsTabClicked) {
|
||||
await page.waitForTimeout(3000);
|
||||
}
|
||||
|
||||
// Extract layouts from the page
|
||||
layouts.push(...await page.evaluate(() => {
|
||||
const layoutData = [];
|
||||
const tournamentsDiv = document.querySelector('div.tournaments');
|
||||
|
||||
if (!tournamentsDiv) {
|
||||
return layoutData;
|
||||
}
|
||||
|
||||
const tournamentCourses = tournamentsDiv.querySelectorAll('details.tournament-course');
|
||||
|
||||
tournamentCourses.forEach((details) => {
|
||||
// Get the event results URL from div.results
|
||||
const resultsDiv = details.querySelector('div.results');
|
||||
const resultsLink = resultsDiv ? resultsDiv.querySelector('a') : null;
|
||||
const eventUrl = resultsLink ? resultsLink.getAttribute('href') : null;
|
||||
const fullEventUrl = eventUrl ? 'https://www.pdga.com' + eventUrl : null;
|
||||
|
||||
const layoutsDiv = details.querySelector('div.layouts');
|
||||
if (!layoutsDiv) {
|
||||
return;
|
||||
}
|
||||
|
||||
const layoutDivs = layoutsDiv.querySelectorAll('div.layout');
|
||||
|
||||
layoutDivs.forEach((layoutDiv) => {
|
||||
const h4WithClass = layoutDiv.querySelector('h4.title');
|
||||
const h4Any = layoutDiv.querySelector('h4');
|
||||
|
||||
let layoutName = '';
|
||||
if (h4WithClass) {
|
||||
layoutName = (h4WithClass.textContent || h4WithClass.innerText || '').trim();
|
||||
} else if (h4Any) {
|
||||
layoutName = (h4Any.textContent || h4Any.innerText || '').trim();
|
||||
}
|
||||
|
||||
const allText = layoutDiv.textContent || layoutDiv.innerText || '';
|
||||
|
||||
const parPatterns = [
|
||||
/Par[:\s]+(\d+)/i,
|
||||
/Par\s*=\s*(\d+)/i,
|
||||
/\(Par\s+(\d+)\)/i,
|
||||
/Total Par:\s*(\d+)/i
|
||||
];
|
||||
|
||||
let par = null;
|
||||
for (const pattern of parPatterns) {
|
||||
const match = allText.match(pattern);
|
||||
if (match) {
|
||||
par = parseInt(match[1]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Extract divisions from li.divisions
|
||||
const divisionsLi = layoutDiv.querySelector('li.divisions');
|
||||
let divisions = [];
|
||||
if (divisionsLi) {
|
||||
const divisionsText = (divisionsLi.textContent || '').replace('Divisions:', '').trim();
|
||||
divisions = divisionsText.split(/[,\s]+/).filter(d => d.length > 0);
|
||||
}
|
||||
|
||||
if (layoutName && par && !isNaN(par) && par > 0) {
|
||||
layoutData.push({
|
||||
name: layoutName,
|
||||
par: par,
|
||||
divisions: divisions,
|
||||
eventUrl: fullEventUrl
|
||||
});
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
return layoutData;
|
||||
}));
|
||||
|
||||
// Store all layout data in memory cache
|
||||
const courseIdInt = typeof courseId === 'string' ? parseInt(courseId) : courseId;
|
||||
layoutEventCache.set(courseIdInt, layouts);
|
||||
|
||||
// Deduplicate for database: same name + same par = same layout
|
||||
const uniqueLayouts = [];
|
||||
const seen = new Set();
|
||||
|
||||
for (const layout of layouts) {
|
||||
const key = `${layout.name}|${layout.par}`;
|
||||
if (!seen.has(key)) {
|
||||
seen.add(key);
|
||||
uniqueLayouts.push(layout);
|
||||
}
|
||||
}
|
||||
|
||||
// Save layouts to database
|
||||
for (const layout of uniqueLayouts) {
|
||||
try {
|
||||
await saveLayoutToDB(courseId, layout);
|
||||
} catch (err) {
|
||||
console.error(`Error saving layout ${layout.name}:`, err.message);
|
||||
}
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.error('Error scraping course layouts:', error.message);
|
||||
} finally {
|
||||
await page.close();
|
||||
}
|
||||
|
||||
return layouts;
|
||||
}
|
||||
|
||||
async function scrapeEventResults(browser, eventUrl, layoutsWithDivisions) {
|
||||
const page = await browser.newPage();
|
||||
const layoutRatings = {}; // key: layout name+par, value: array of ratings
|
||||
|
||||
try {
|
||||
await page.goto(eventUrl, { waitUntil: 'networkidle2', timeout: 45000 });
|
||||
await page.waitForTimeout(1000);
|
||||
|
||||
// Extract event date by searching for date pattern in page text
|
||||
const eventDateRaw = await page.evaluate(() => {
|
||||
const allText = document.body.textContent;
|
||||
const datePattern = /\d{1,2}-[A-Z][a-z]{2}-\d{4}/;
|
||||
const match = allText.match(datePattern);
|
||||
return match ? match[0] : null;
|
||||
});
|
||||
|
||||
// Parse date from format like "29-Aug-2025" to ISO format "2025-08-29"
|
||||
let eventDate = null;
|
||||
if (eventDateRaw) {
|
||||
try {
|
||||
const parsedDate = new Date(eventDateRaw);
|
||||
if (!isNaN(parsedDate.getTime())) {
|
||||
eventDate = parsedDate.toISOString().split('T')[0]; // Get YYYY-MM-DD format
|
||||
}
|
||||
} catch (e) {
|
||||
// Ignore date parsing errors
|
||||
}
|
||||
}
|
||||
|
||||
// Process each layout
|
||||
for (const layout of layoutsWithDivisions) {
|
||||
const layoutKey = `${layout.name}|${layout.par}`;
|
||||
const ratingsForLayout = [];
|
||||
|
||||
// For each division in this layout
|
||||
for (const division of layout.divisions) {
|
||||
const divisionData = await page.evaluate((divisionName, targetPar) => {
|
||||
// Find the details tag that contains h3 with the matching division ID
|
||||
const divisionH3 = document.querySelector(`h3#${divisionName}`);
|
||||
if (!divisionH3) {
|
||||
return { found: false, ratings: [] };
|
||||
}
|
||||
|
||||
// Find the parent details tag
|
||||
const detailsTag = divisionH3.closest('details');
|
||||
if (!detailsTag) {
|
||||
return { found: false, ratings: [] };
|
||||
}
|
||||
|
||||
// Find the table.results inside this details tag
|
||||
const table = detailsTag.querySelector('table.results');
|
||||
if (!table) {
|
||||
return { found: false, ratings: [] };
|
||||
}
|
||||
|
||||
// Find all rows with results matching target par
|
||||
const ratings = [];
|
||||
const rows = table.querySelectorAll('tbody tr');
|
||||
|
||||
rows.forEach(row => {
|
||||
// Get all round scores and their ratings
|
||||
const roundCells = row.querySelectorAll('td.round');
|
||||
|
||||
roundCells.forEach(roundCell => {
|
||||
const scoreText = (roundCell.textContent || '').trim();
|
||||
const scoreMatch = scoreText.match(/^(\d+)$/);
|
||||
|
||||
if (scoreMatch) {
|
||||
const scoreValue = parseInt(scoreMatch[1]);
|
||||
|
||||
// Check if this round score matches target par
|
||||
if (scoreValue === targetPar) {
|
||||
// Get the next sibling which should be td.round-rating
|
||||
const ratingCell = roundCell.nextElementSibling;
|
||||
|
||||
if (ratingCell && ratingCell.classList.contains('round-rating')) {
|
||||
const ratingText = (ratingCell.textContent || '').trim();
|
||||
const rating = parseInt(ratingText);
|
||||
|
||||
if (!isNaN(rating) && rating > 0) {
|
||||
ratings.push(rating);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
return { found: true, ratings: ratings };
|
||||
}, division, layout.par);
|
||||
|
||||
if (divisionData.found && divisionData.ratings.length > 0) {
|
||||
ratingsForLayout.push(...divisionData.ratings);
|
||||
}
|
||||
}
|
||||
|
||||
if (ratingsForLayout.length > 0) {
|
||||
const meanRating = ratingsForLayout.reduce((sum, r) => sum + r, 0) / ratingsForLayout.length;
|
||||
layoutRatings[layoutKey] = {
|
||||
name: layout.name,
|
||||
par: layout.par,
|
||||
ratings: ratingsForLayout,
|
||||
count: ratingsForLayout.length,
|
||||
meanRating: Math.round(meanRating),
|
||||
eventDate: eventDate
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.error('Error scraping event results:', error.message);
|
||||
} finally {
|
||||
await page.close();
|
||||
}
|
||||
|
||||
return layoutRatings;
|
||||
}
|
||||
|
||||
async function getAllRatingsFromDB(progressCallback = null) {
|
||||
try {
|
||||
const pdgaNumbers = fs.readFileSync('pdga-numbers.txt', 'utf-8')
|
||||
@@ -1496,6 +1964,10 @@ app.get('/', (req, res) => {
|
||||
res.sendFile(path.join(__dirname, 'index.html'));
|
||||
});
|
||||
|
||||
app.get('/courses.html', (req, res) => {
|
||||
res.sendFile(path.join(__dirname, 'courses.html'));
|
||||
});
|
||||
|
||||
app.get('/api/ratings', async (req, res) => {
|
||||
try {
|
||||
const ratings = await getAllRatingsFromDB();
|
||||
@@ -2138,6 +2610,427 @@ app.post('/api/refresh-round-history/:pdgaNumber', async (req, res) => {
|
||||
}
|
||||
});
|
||||
|
||||
// Course API endpoints
|
||||
app.get('/api/courses', async (req, res) => {
|
||||
try {
|
||||
const courses = await getAllCoursesFromDB();
|
||||
res.json(courses);
|
||||
} catch (error) {
|
||||
console.error('Error fetching courses:', error.message);
|
||||
res.status(500).json({ error: 'Failed to fetch courses' });
|
||||
}
|
||||
});
|
||||
|
||||
app.get('/api/layouts/:courseId', async (req, res) => {
|
||||
try {
|
||||
const { courseId } = req.params;
|
||||
const layouts = await getLayoutsForCourse(courseId);
|
||||
res.json(layouts);
|
||||
} catch (error) {
|
||||
console.error('Error fetching layouts:', error.message);
|
||||
res.status(500).json({ error: 'Failed to fetch layouts' });
|
||||
}
|
||||
});
|
||||
|
||||
app.post('/api/scrape-courses', async (req, res) => {
|
||||
let browser = null;
|
||||
try {
|
||||
console.log('Starting course directory scraping...');
|
||||
|
||||
browser = await puppeteer.launch({
|
||||
headless: "new",
|
||||
args: [
|
||||
'--no-sandbox',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-dev-shm-usage',
|
||||
'--disable-accelerated-2d-canvas',
|
||||
'--no-first-run',
|
||||
'--no-zygote',
|
||||
'--disable-gpu'
|
||||
]
|
||||
});
|
||||
|
||||
const courses = await scrapeCourseDirectory(browser);
|
||||
|
||||
await browser.close();
|
||||
browser = null;
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
coursesFound: courses.length,
|
||||
message: `Successfully scraped ${courses.length} courses`
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('Error scraping courses:', error.message);
|
||||
if (browser) {
|
||||
try {
|
||||
await browser.close();
|
||||
} catch (closeError) {
|
||||
console.error('Error closing browser:', closeError.message);
|
||||
}
|
||||
}
|
||||
res.status(500).json({ error: 'Failed to scrape courses' });
|
||||
}
|
||||
});
|
||||
|
||||
app.post('/api/scrape-layouts/:courseId', async (req, res) => {
|
||||
let browser = null;
|
||||
try {
|
||||
const { courseId } = req.params;
|
||||
|
||||
// Get course from database
|
||||
const course = await new Promise((resolve, reject) => {
|
||||
db.get('SELECT * FROM courses WHERE id = ?', [courseId], (err, row) => {
|
||||
if (err) reject(err);
|
||||
else resolve(row);
|
||||
});
|
||||
});
|
||||
|
||||
if (!course) {
|
||||
return res.status(404).json({ error: 'Course not found' });
|
||||
}
|
||||
|
||||
console.log(`Starting layout scraping for course: ${course.name}`);
|
||||
|
||||
browser = await puppeteer.launch({
|
||||
headless: "new",
|
||||
args: [
|
||||
'--no-sandbox',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-dev-shm-usage',
|
||||
'--disable-accelerated-2d-canvas',
|
||||
'--no-first-run',
|
||||
'--no-zygote',
|
||||
'--disable-gpu'
|
||||
]
|
||||
});
|
||||
|
||||
const layouts = await scrapeCourseLayouts(browser, course.link, courseId);
|
||||
|
||||
console.log(`\n=== Starting event results scraping for ${course.name} ===`);
|
||||
|
||||
// Get layout data from cache
|
||||
const courseIdInt = parseInt(courseId);
|
||||
const layoutData = layoutEventCache.get(courseIdInt);
|
||||
|
||||
if (!layoutData || layoutData.length === 0) {
|
||||
console.log('No event data found in cache, skipping event results scraping');
|
||||
await browser.close();
|
||||
browser = null;
|
||||
|
||||
return res.json({
|
||||
success: true,
|
||||
layoutsFound: layouts.length,
|
||||
message: `Successfully scraped ${layouts.length} layouts for ${course.name} (no events found)`
|
||||
});
|
||||
}
|
||||
|
||||
// Group layouts by event URL
|
||||
const eventGroups = {};
|
||||
layoutData.forEach(layout => {
|
||||
if (layout.eventUrl) {
|
||||
if (!eventGroups[layout.eventUrl]) {
|
||||
eventGroups[layout.eventUrl] = [];
|
||||
}
|
||||
eventGroups[layout.eventUrl].push(layout);
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
// Process all events and accumulate ratings by unique layout
|
||||
const allLayoutRatings = {}; // key: "layoutName|par", value: array of all ratings
|
||||
|
||||
let eventCount = 0;
|
||||
for (const eventUrl in eventGroups) {
|
||||
eventCount++;
|
||||
const eventLayouts = eventGroups[eventUrl];
|
||||
|
||||
const results = await scrapeEventResults(browser, eventUrl, eventLayouts);
|
||||
|
||||
// Accumulate ratings for each layout
|
||||
for (const layoutKey in results) {
|
||||
const layoutDataResult = results[layoutKey];
|
||||
|
||||
if (!allLayoutRatings[layoutKey]) {
|
||||
allLayoutRatings[layoutKey] = {
|
||||
name: layoutDataResult.name,
|
||||
par: layoutDataResult.par,
|
||||
allRatings: [],
|
||||
latestDate: layoutDataResult.eventDate
|
||||
};
|
||||
} else {
|
||||
// Update to latest date if this event is more recent
|
||||
if (layoutDataResult.eventDate && (!allLayoutRatings[layoutKey].latestDate ||
|
||||
new Date(layoutDataResult.eventDate) > new Date(allLayoutRatings[layoutKey].latestDate))) {
|
||||
allLayoutRatings[layoutKey].latestDate = layoutDataResult.eventDate;
|
||||
}
|
||||
}
|
||||
|
||||
// Add all ratings from this event to the accumulated ratings
|
||||
allLayoutRatings[layoutKey].allRatings.push(...layoutDataResult.ratings);
|
||||
}
|
||||
|
||||
// Small delay between events
|
||||
await new Promise(resolve => setTimeout(resolve, 2000));
|
||||
}
|
||||
|
||||
console.log(`\n=== Calculating final ratings for all layouts ===`);
|
||||
|
||||
// Calculate mean ratings and save to database
|
||||
let savedCount = 0;
|
||||
for (const layoutKey in allLayoutRatings) {
|
||||
const layoutDataResult = allLayoutRatings[layoutKey];
|
||||
|
||||
if (layoutDataResult.allRatings.length > 0) {
|
||||
const meanRating = Math.round(
|
||||
layoutDataResult.allRatings.reduce((sum, r) => sum + r, 0) / layoutDataResult.allRatings.length
|
||||
);
|
||||
|
||||
console.log(`Layout: ${layoutDataResult.name} (Par ${layoutDataResult.par})`);
|
||||
console.log(` Total ratings collected: ${layoutDataResult.allRatings.length}`);
|
||||
console.log(` Mean rating: ${meanRating}`);
|
||||
console.log(` Last played: ${layoutDataResult.latestDate || 'Unknown'}`);
|
||||
|
||||
try {
|
||||
const changes = await updateLayoutRating(
|
||||
courseIdInt,
|
||||
layoutDataResult.name,
|
||||
layoutDataResult.par,
|
||||
meanRating,
|
||||
layoutDataResult.allRatings.length,
|
||||
layoutDataResult.latestDate
|
||||
);
|
||||
if (changes > 0) {
|
||||
console.log(` ✓ Updated in database`);
|
||||
savedCount++;
|
||||
}
|
||||
} catch (err) {
|
||||
console.error(` Error updating layout ${layoutDataResult.name}:`, err.message);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
await browser.close();
|
||||
browser = null;
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
layoutsFound: layouts.length,
|
||||
eventsProcessed: Object.keys(eventGroups).length,
|
||||
layoutsWithRatings: savedCount,
|
||||
message: `Successfully scraped ${layouts.length} layouts and processed ${Object.keys(eventGroups).length} events for ${course.name}`
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('Error scraping layouts:', error.message);
|
||||
if (browser) {
|
||||
try {
|
||||
await browser.close();
|
||||
} catch (closeError) {
|
||||
console.error('Error closing browser:', closeError.message);
|
||||
}
|
||||
}
|
||||
res.status(500).json({ error: 'Failed to scrape layouts' });
|
||||
}
|
||||
});
|
||||
|
||||
app.post('/api/scrape-event-results/:courseId', async (req, res) => {
|
||||
let browser = null;
|
||||
try {
|
||||
const { courseId } = req.params;
|
||||
const courseIdInt = parseInt(courseId);
|
||||
|
||||
// Get layout data from cache
|
||||
const layoutData = layoutEventCache.get(courseIdInt);
|
||||
|
||||
if (!layoutData || layoutData.length === 0) {
|
||||
return res.status(404).json({
|
||||
error: 'No layout data found in cache. Please scrape layouts first.'
|
||||
});
|
||||
}
|
||||
|
||||
browser = await puppeteer.launch({
|
||||
headless: "new",
|
||||
args: [
|
||||
'--no-sandbox',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-dev-shm-usage',
|
||||
'--disable-accelerated-2d-canvas',
|
||||
'--no-first-run',
|
||||
'--no-zygote',
|
||||
'--disable-gpu'
|
||||
]
|
||||
});
|
||||
|
||||
// Group layouts by event URL
|
||||
const eventGroups = {};
|
||||
layoutData.forEach(layout => {
|
||||
if (layout.eventUrl) {
|
||||
if (!eventGroups[layout.eventUrl]) {
|
||||
eventGroups[layout.eventUrl] = [];
|
||||
}
|
||||
eventGroups[layout.eventUrl].push(layout);
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
// Process all events and accumulate ratings by unique layout
|
||||
const allLayoutRatings = {}; // key: "layoutName|par", value: array of all ratings
|
||||
|
||||
let eventCount = 0;
|
||||
for (const eventUrl in eventGroups) {
|
||||
eventCount++;
|
||||
const eventLayouts = eventGroups[eventUrl];
|
||||
|
||||
const results = await scrapeEventResults(browser, eventUrl, eventLayouts);
|
||||
|
||||
// Accumulate ratings for each layout
|
||||
for (const layoutKey in results) {
|
||||
const layoutData = results[layoutKey];
|
||||
|
||||
if (!allLayoutRatings[layoutKey]) {
|
||||
allLayoutRatings[layoutKey] = {
|
||||
name: layoutData.name,
|
||||
par: layoutData.par,
|
||||
allRatings: [],
|
||||
latestDate: layoutData.eventDate
|
||||
};
|
||||
} else {
|
||||
// Update to latest date if this event is more recent
|
||||
if (layoutData.eventDate && (!allLayoutRatings[layoutKey].latestDate ||
|
||||
new Date(layoutData.eventDate) > new Date(allLayoutRatings[layoutKey].latestDate))) {
|
||||
allLayoutRatings[layoutKey].latestDate = layoutData.eventDate;
|
||||
}
|
||||
}
|
||||
|
||||
// Add all ratings from this event to the accumulated ratings
|
||||
allLayoutRatings[layoutKey].allRatings.push(...layoutData.ratings);
|
||||
}
|
||||
|
||||
// Small delay between events
|
||||
await new Promise(resolve => setTimeout(resolve, 2000));
|
||||
}
|
||||
|
||||
await browser.close();
|
||||
browser = null;
|
||||
|
||||
console.log(`\n=== Calculating final ratings for all layouts ===`);
|
||||
|
||||
// Calculate mean ratings and save to database
|
||||
let savedCount = 0;
|
||||
for (const layoutKey in allLayoutRatings) {
|
||||
const layoutData = allLayoutRatings[layoutKey];
|
||||
|
||||
if (layoutData.allRatings.length > 0) {
|
||||
const meanRating = Math.round(
|
||||
layoutData.allRatings.reduce((sum, r) => sum + r, 0) / layoutData.allRatings.length
|
||||
);
|
||||
|
||||
console.log(`Layout: ${layoutData.name} (Par ${layoutData.par})`);
|
||||
console.log(` Total ratings collected: ${layoutData.allRatings.length}`);
|
||||
console.log(` Mean rating: ${meanRating}`);
|
||||
console.log(` Last played: ${layoutData.latestDate || 'Unknown'}`);
|
||||
|
||||
try {
|
||||
const changes = await updateLayoutRating(
|
||||
courseIdInt,
|
||||
layoutData.name,
|
||||
layoutData.par,
|
||||
meanRating,
|
||||
layoutData.allRatings.length,
|
||||
layoutData.latestDate
|
||||
);
|
||||
if (changes > 0) {
|
||||
console.log(` ✓ Updated in database`);
|
||||
savedCount++;
|
||||
}
|
||||
} catch (err) {
|
||||
console.error(` Error updating layout ${layoutData.name}:`, err.message);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
eventsProcessed: Object.keys(eventGroups).length,
|
||||
uniqueLayouts: Object.keys(allLayoutRatings).length,
|
||||
layoutsSaved: savedCount,
|
||||
message: `Processed ${Object.keys(eventGroups).length} events, updated ${savedCount} layouts`
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('Error scraping event results:', error.message);
|
||||
if (browser) {
|
||||
try {
|
||||
await browser.close();
|
||||
} catch (closeError) {
|
||||
console.error('Error closing browser:', closeError.message);
|
||||
}
|
||||
}
|
||||
res.status(500).json({ error: 'Failed to scrape event results' });
|
||||
}
|
||||
});
|
||||
|
||||
app.post('/api/scrape-all-layouts', async (req, res) => {
|
||||
let browser = null;
|
||||
try {
|
||||
console.log('Starting bulk layout scraping for all courses...');
|
||||
|
||||
const courses = await getAllCoursesFromDB();
|
||||
console.log(`Found ${courses.length} courses to scrape`);
|
||||
|
||||
browser = await puppeteer.launch({
|
||||
headless: "new",
|
||||
args: [
|
||||
'--no-sandbox',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-dev-shm-usage',
|
||||
'--disable-accelerated-2d-canvas',
|
||||
'--no-first-run',
|
||||
'--no-zygote',
|
||||
'--disable-gpu'
|
||||
]
|
||||
});
|
||||
|
||||
let totalLayouts = 0;
|
||||
for (let i = 0; i < courses.length; i++) {
|
||||
const course = courses[i];
|
||||
console.log(`[${i + 1}/${courses.length}] Scraping layouts for: ${course.name}`);
|
||||
|
||||
try {
|
||||
const layouts = await scrapeCourseLayouts(browser, course.link, course.id);
|
||||
totalLayouts += layouts.length;
|
||||
|
||||
// Delay between requests to be respectful
|
||||
if (i < courses.length - 1) {
|
||||
console.log('Waiting 2s before next request...');
|
||||
await new Promise(resolve => setTimeout(resolve, 2000));
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`Error scraping layouts for ${course.name}:`, error.message);
|
||||
}
|
||||
}
|
||||
|
||||
await browser.close();
|
||||
browser = null;
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
coursesProcessed: courses.length,
|
||||
totalLayouts: totalLayouts,
|
||||
message: `Successfully scraped layouts for ${courses.length} courses (${totalLayouts} total layouts)`
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('Error scraping all layouts:', error.message);
|
||||
if (browser) {
|
||||
try {
|
||||
await browser.close();
|
||||
} catch (closeError) {
|
||||
console.error('Error closing browser:', closeError.message);
|
||||
}
|
||||
}
|
||||
res.status(500).json({ error: 'Failed to scrape all layouts' });
|
||||
}
|
||||
});
|
||||
|
||||
app.post('/api/predicted-rating/:pdgaNumber', async (req, res) => {
|
||||
let browser = null;
|
||||
try {
|
||||
|
||||
Reference in New Issue
Block a user