Add request locking, extended timeouts, and inactive layouts accordion
- Add request locking system to prevent concurrent scrapes of same course - Extend HTTP timeouts (10-30 min) for long-running scraping operations - Add comprehensive logging for layout parsing to debug silent failures - Implement accordion UI to hide layouts not played within 365 days - Return 409 status when scrape already in progress for a course - Add visual indicators for active vs inactive layouts 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -17,6 +17,9 @@ const db = new sqlite3.Database(dbPath);
|
||||
// In-memory cache for layout-division-event mapping
|
||||
const layoutEventCache = new Map(); // key: courseId, value: array of {name, par, divisions, eventUrl}
|
||||
|
||||
// Request locking to prevent concurrent scrapes of the same resource
|
||||
const activeScrapes = new Map(); // key: resourceId, value: Promise
|
||||
|
||||
// Initialize database schema
|
||||
function initializeDatabase() {
|
||||
return new Promise((resolve, reject) => {
|
||||
@@ -1624,6 +1627,7 @@ async function scrapeCourseDirectory(browser) {
|
||||
}
|
||||
|
||||
async function scrapeCourseLayouts(browser, courseLink, courseId) {
|
||||
console.log(`\n=== Scraping layouts from: ${courseLink} ===`);
|
||||
const page = await browser.newPage();
|
||||
const layouts = [];
|
||||
|
||||
@@ -1656,15 +1660,19 @@ async function scrapeCourseLayouts(browser, courseLink, courseId) {
|
||||
});
|
||||
|
||||
if (layoutsTabClicked) {
|
||||
console.log('✓ Layouts tab found and clicked');
|
||||
await page.waitForTimeout(3000);
|
||||
} else {
|
||||
console.warn('⚠️ Layouts tab not found - may be on a single-layout course page');
|
||||
}
|
||||
|
||||
// Extract layouts from the page
|
||||
layouts.push(...await page.evaluate(() => {
|
||||
const extractedLayouts = await page.evaluate(() => {
|
||||
const layoutData = [];
|
||||
const tournamentsDiv = document.querySelector('div.tournaments');
|
||||
|
||||
if (!tournamentsDiv) {
|
||||
console.warn('No div.tournaments found on page');
|
||||
return layoutData;
|
||||
}
|
||||
|
||||
@@ -1728,17 +1736,28 @@ async function scrapeCourseLayouts(browser, courseLink, courseId) {
|
||||
divisions: divisions,
|
||||
eventUrl: fullEventUrl
|
||||
});
|
||||
} else if (layoutName) {
|
||||
// Log skipped layouts for debugging
|
||||
console.warn(`⚠️ Skipped layout "${layoutName}" - Par: ${par}, Text sample: ${allText.substring(0, 200)}`);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
return layoutData;
|
||||
}));
|
||||
});
|
||||
|
||||
if (extractedLayouts.length === 0) {
|
||||
console.warn('⚠️ No layouts extracted from page');
|
||||
}
|
||||
|
||||
layouts.push(...extractedLayouts);
|
||||
|
||||
// Store all layout data in memory cache
|
||||
const courseIdInt = typeof courseId === 'string' ? parseInt(courseId) : courseId;
|
||||
layoutEventCache.set(courseIdInt, layouts);
|
||||
|
||||
console.log(`✓ Successfully parsed ${layouts.length} layouts from course page`);
|
||||
|
||||
// Deduplicate for database: same name + same par = same layout
|
||||
const uniqueLayouts = [];
|
||||
const seen = new Set();
|
||||
@@ -1751,12 +1770,17 @@ async function scrapeCourseLayouts(browser, courseLink, courseId) {
|
||||
}
|
||||
}
|
||||
|
||||
if (uniqueLayouts.length < layouts.length) {
|
||||
console.log(`ℹ️ Deduplicated to ${uniqueLayouts.length} unique layouts`);
|
||||
}
|
||||
|
||||
// Save layouts to database
|
||||
for (const layout of uniqueLayouts) {
|
||||
try {
|
||||
await saveLayoutToDB(courseId, layout);
|
||||
console.log(` ✓ Saved layout: ${layout.name} (Par ${layout.par})`);
|
||||
} catch (err) {
|
||||
console.error(`Error saving layout ${layout.name}:`, err.message);
|
||||
console.error(` ✗ Error saving layout ${layout.name}:`, err.message);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2438,10 +2462,14 @@ app.post('/api/refresh-rating-history/:pdgaNumber', async (req, res) => {
|
||||
});
|
||||
|
||||
app.post('/api/refresh-round-history/:pdgaNumber', async (req, res) => {
|
||||
// Increase timeout for tournament scraping
|
||||
req.setTimeout(600000); // 10 minutes
|
||||
res.setTimeout(600000);
|
||||
|
||||
let browser = null;
|
||||
const { pdgaNumber } = req.params;
|
||||
try {
|
||||
|
||||
|
||||
// Check when we last updated rounds for this player
|
||||
const lastRoundUpdate = await getLastRoundUpdateDate(pdgaNumber);
|
||||
const sinceDate = lastRoundUpdate ? new Date(lastRoundUpdate) : null;
|
||||
@@ -2633,6 +2661,10 @@ app.get('/api/layouts/:courseId', async (req, res) => {
|
||||
});
|
||||
|
||||
app.post('/api/scrape-courses', async (req, res) => {
|
||||
// Increase timeout for course directory scraping
|
||||
req.setTimeout(600000); // 10 minutes
|
||||
res.setTimeout(600000);
|
||||
|
||||
let browser = null;
|
||||
try {
|
||||
console.log('Starting course directory scraping...');
|
||||
@@ -2674,23 +2706,40 @@ app.post('/api/scrape-courses', async (req, res) => {
|
||||
});
|
||||
|
||||
app.post('/api/scrape-layouts/:courseId', async (req, res) => {
|
||||
let browser = null;
|
||||
try {
|
||||
const { courseId } = req.params;
|
||||
// Increase timeout for this endpoint since scraping can take several minutes
|
||||
req.setTimeout(600000); // 10 minutes
|
||||
res.setTimeout(600000);
|
||||
|
||||
// Get course from database
|
||||
const course = await new Promise((resolve, reject) => {
|
||||
db.get('SELECT * FROM courses WHERE id = ?', [courseId], (err, row) => {
|
||||
if (err) reject(err);
|
||||
else resolve(row);
|
||||
});
|
||||
const { courseId } = req.params;
|
||||
const lockKey = `layout-${courseId}`;
|
||||
|
||||
// Check if there's already a scrape in progress for this course
|
||||
if (activeScrapes.has(lockKey)) {
|
||||
console.log(`⚠️ Scrape already in progress for course ${courseId}`);
|
||||
return res.status(409).json({
|
||||
error: 'Scrape already in progress for this course',
|
||||
message: 'Please wait for the current scrape to complete'
|
||||
});
|
||||
}
|
||||
|
||||
if (!course) {
|
||||
return res.status(404).json({ error: 'Course not found' });
|
||||
}
|
||||
let browser = null;
|
||||
|
||||
console.log(`Starting layout scraping for course: ${course.name}`);
|
||||
// Create a promise for this scrape operation
|
||||
const scrapePromise = (async () => {
|
||||
try {
|
||||
// Get course from database
|
||||
const course = await new Promise((resolve, reject) => {
|
||||
db.get('SELECT * FROM courses WHERE id = ?', [courseId], (err, row) => {
|
||||
if (err) reject(err);
|
||||
else resolve(row);
|
||||
});
|
||||
});
|
||||
|
||||
if (!course) {
|
||||
throw new Error('Course not found');
|
||||
}
|
||||
|
||||
console.log(`Starting layout scraping for course: ${course.name}`);
|
||||
|
||||
browser = await puppeteer.launch({
|
||||
headless: "new",
|
||||
@@ -2810,30 +2859,53 @@ app.post('/api/scrape-layouts/:courseId', async (req, res) => {
|
||||
}
|
||||
}
|
||||
|
||||
await browser.close();
|
||||
browser = null;
|
||||
await browser.close();
|
||||
browser = null;
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
layoutsFound: layouts.length,
|
||||
eventsProcessed: Object.keys(eventGroups).length,
|
||||
layoutsWithRatings: savedCount,
|
||||
message: `Successfully scraped ${layouts.length} layouts and processed ${Object.keys(eventGroups).length} events for ${course.name}`
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('Error scraping layouts:', error.message);
|
||||
if (browser) {
|
||||
try {
|
||||
await browser.close();
|
||||
} catch (closeError) {
|
||||
console.error('Error closing browser:', closeError.message);
|
||||
return {
|
||||
success: true,
|
||||
layoutsFound: layouts.length,
|
||||
eventsProcessed: Object.keys(eventGroups).length,
|
||||
layoutsWithRatings: savedCount,
|
||||
message: `Successfully scraped ${layouts.length} layouts and processed ${Object.keys(eventGroups).length} events for ${course.name}`
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('Error scraping layouts:', error.message);
|
||||
if (browser) {
|
||||
try {
|
||||
await browser.close();
|
||||
} catch (closeError) {
|
||||
console.error('Error closing browser:', closeError.message);
|
||||
}
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
res.status(500).json({ error: 'Failed to scrape layouts' });
|
||||
})();
|
||||
|
||||
// Store the promise in activeScrapes
|
||||
activeScrapes.set(lockKey, scrapePromise);
|
||||
|
||||
try {
|
||||
// Wait for the scrape to complete
|
||||
const result = await scrapePromise;
|
||||
res.json(result);
|
||||
} catch (error) {
|
||||
res.status(500).json({
|
||||
error: 'Failed to scrape layouts',
|
||||
message: error.message
|
||||
});
|
||||
} finally {
|
||||
// Always remove from active scrapes when done
|
||||
activeScrapes.delete(lockKey);
|
||||
console.log(`✓ Released lock for course ${courseId}`);
|
||||
}
|
||||
});
|
||||
|
||||
app.post('/api/scrape-event-results/:courseId', async (req, res) => {
|
||||
// Increase timeout for scraping operations
|
||||
req.setTimeout(600000); // 10 minutes
|
||||
res.setTimeout(600000);
|
||||
|
||||
let browser = null;
|
||||
try {
|
||||
const { courseId } = req.params;
|
||||
@@ -2970,6 +3042,10 @@ app.post('/api/scrape-event-results/:courseId', async (req, res) => {
|
||||
});
|
||||
|
||||
app.post('/api/scrape-all-layouts', async (req, res) => {
|
||||
// Increase timeout for bulk scraping operations
|
||||
req.setTimeout(1800000); // 30 minutes for bulk operations
|
||||
res.setTimeout(1800000);
|
||||
|
||||
let browser = null;
|
||||
try {
|
||||
console.log('Starting bulk layout scraping for all courses...');
|
||||
|
||||
Reference in New Issue
Block a user