Add request locking, extended timeouts, and inactive layouts accordion

- Add request locking system to prevent concurrent scrapes of same course
- Extend HTTP timeouts (10-30 min) for long-running scraping operations
- Add comprehensive logging for layout parsing to debug silent failures
- Implement accordion UI to hide layouts not played within 365 days
- Return 409 status when scrape already in progress for a course
- Add visual indicators for active vs inactive layouts

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Samuel Enocsson
2025-10-01 22:25:20 +02:00
parent 858143d149
commit 4cd00e35aa
2 changed files with 263 additions and 49 deletions
+110 -34
View File
@@ -17,6 +17,9 @@ const db = new sqlite3.Database(dbPath);
// In-memory cache for layout-division-event mapping
const layoutEventCache = new Map(); // key: courseId, value: array of {name, par, divisions, eventUrl}
// Request locking to prevent concurrent scrapes of the same resource
const activeScrapes = new Map(); // key: resourceId, value: Promise
// Initialize database schema
function initializeDatabase() {
return new Promise((resolve, reject) => {
@@ -1624,6 +1627,7 @@ async function scrapeCourseDirectory(browser) {
}
async function scrapeCourseLayouts(browser, courseLink, courseId) {
console.log(`\n=== Scraping layouts from: ${courseLink} ===`);
const page = await browser.newPage();
const layouts = [];
@@ -1656,15 +1660,19 @@ async function scrapeCourseLayouts(browser, courseLink, courseId) {
});
if (layoutsTabClicked) {
console.log('✓ Layouts tab found and clicked');
await page.waitForTimeout(3000);
} else {
console.warn('⚠️ Layouts tab not found - may be on a single-layout course page');
}
// Extract layouts from the page
layouts.push(...await page.evaluate(() => {
const extractedLayouts = await page.evaluate(() => {
const layoutData = [];
const tournamentsDiv = document.querySelector('div.tournaments');
if (!tournamentsDiv) {
console.warn('No div.tournaments found on page');
return layoutData;
}
@@ -1728,17 +1736,28 @@ async function scrapeCourseLayouts(browser, courseLink, courseId) {
divisions: divisions,
eventUrl: fullEventUrl
});
} else if (layoutName) {
// Log skipped layouts for debugging
console.warn(`⚠️ Skipped layout "${layoutName}" - Par: ${par}, Text sample: ${allText.substring(0, 200)}`);
}
});
});
return layoutData;
}));
});
if (extractedLayouts.length === 0) {
console.warn('⚠️ No layouts extracted from page');
}
layouts.push(...extractedLayouts);
// Store all layout data in memory cache
const courseIdInt = typeof courseId === 'string' ? parseInt(courseId) : courseId;
layoutEventCache.set(courseIdInt, layouts);
console.log(`✓ Successfully parsed ${layouts.length} layouts from course page`);
// Deduplicate for database: same name + same par = same layout
const uniqueLayouts = [];
const seen = new Set();
@@ -1751,12 +1770,17 @@ async function scrapeCourseLayouts(browser, courseLink, courseId) {
}
}
if (uniqueLayouts.length < layouts.length) {
console.log(`️ Deduplicated to ${uniqueLayouts.length} unique layouts`);
}
// Save layouts to database
for (const layout of uniqueLayouts) {
try {
await saveLayoutToDB(courseId, layout);
console.log(` ✓ Saved layout: ${layout.name} (Par ${layout.par})`);
} catch (err) {
console.error(`Error saving layout ${layout.name}:`, err.message);
console.error(`Error saving layout ${layout.name}:`, err.message);
}
}
@@ -2438,10 +2462,14 @@ app.post('/api/refresh-rating-history/:pdgaNumber', async (req, res) => {
});
app.post('/api/refresh-round-history/:pdgaNumber', async (req, res) => {
// Increase timeout for tournament scraping
req.setTimeout(600000); // 10 minutes
res.setTimeout(600000);
let browser = null;
const { pdgaNumber } = req.params;
try {
// Check when we last updated rounds for this player
const lastRoundUpdate = await getLastRoundUpdateDate(pdgaNumber);
const sinceDate = lastRoundUpdate ? new Date(lastRoundUpdate) : null;
@@ -2633,6 +2661,10 @@ app.get('/api/layouts/:courseId', async (req, res) => {
});
app.post('/api/scrape-courses', async (req, res) => {
// Increase timeout for course directory scraping
req.setTimeout(600000); // 10 minutes
res.setTimeout(600000);
let browser = null;
try {
console.log('Starting course directory scraping...');
@@ -2674,23 +2706,40 @@ app.post('/api/scrape-courses', async (req, res) => {
});
app.post('/api/scrape-layouts/:courseId', async (req, res) => {
let browser = null;
try {
const { courseId } = req.params;
// Increase timeout for this endpoint since scraping can take several minutes
req.setTimeout(600000); // 10 minutes
res.setTimeout(600000);
// Get course from database
const course = await new Promise((resolve, reject) => {
db.get('SELECT * FROM courses WHERE id = ?', [courseId], (err, row) => {
if (err) reject(err);
else resolve(row);
});
const { courseId } = req.params;
const lockKey = `layout-${courseId}`;
// Check if there's already a scrape in progress for this course
if (activeScrapes.has(lockKey)) {
console.log(`⚠️ Scrape already in progress for course ${courseId}`);
return res.status(409).json({
error: 'Scrape already in progress for this course',
message: 'Please wait for the current scrape to complete'
});
}
if (!course) {
return res.status(404).json({ error: 'Course not found' });
}
let browser = null;
console.log(`Starting layout scraping for course: ${course.name}`);
// Create a promise for this scrape operation
const scrapePromise = (async () => {
try {
// Get course from database
const course = await new Promise((resolve, reject) => {
db.get('SELECT * FROM courses WHERE id = ?', [courseId], (err, row) => {
if (err) reject(err);
else resolve(row);
});
});
if (!course) {
throw new Error('Course not found');
}
console.log(`Starting layout scraping for course: ${course.name}`);
browser = await puppeteer.launch({
headless: "new",
@@ -2810,30 +2859,53 @@ app.post('/api/scrape-layouts/:courseId', async (req, res) => {
}
}
await browser.close();
browser = null;
await browser.close();
browser = null;
res.json({
success: true,
layoutsFound: layouts.length,
eventsProcessed: Object.keys(eventGroups).length,
layoutsWithRatings: savedCount,
message: `Successfully scraped ${layouts.length} layouts and processed ${Object.keys(eventGroups).length} events for ${course.name}`
});
} catch (error) {
console.error('Error scraping layouts:', error.message);
if (browser) {
try {
await browser.close();
} catch (closeError) {
console.error('Error closing browser:', closeError.message);
return {
success: true,
layoutsFound: layouts.length,
eventsProcessed: Object.keys(eventGroups).length,
layoutsWithRatings: savedCount,
message: `Successfully scraped ${layouts.length} layouts and processed ${Object.keys(eventGroups).length} events for ${course.name}`
};
} catch (error) {
console.error('Error scraping layouts:', error.message);
if (browser) {
try {
await browser.close();
} catch (closeError) {
console.error('Error closing browser:', closeError.message);
}
}
throw error;
}
res.status(500).json({ error: 'Failed to scrape layouts' });
})();
// Store the promise in activeScrapes
activeScrapes.set(lockKey, scrapePromise);
try {
// Wait for the scrape to complete
const result = await scrapePromise;
res.json(result);
} catch (error) {
res.status(500).json({
error: 'Failed to scrape layouts',
message: error.message
});
} finally {
// Always remove from active scrapes when done
activeScrapes.delete(lockKey);
console.log(`✓ Released lock for course ${courseId}`);
}
});
app.post('/api/scrape-event-results/:courseId', async (req, res) => {
// Increase timeout for scraping operations
req.setTimeout(600000); // 10 minutes
res.setTimeout(600000);
let browser = null;
try {
const { courseId } = req.params;
@@ -2970,6 +3042,10 @@ app.post('/api/scrape-event-results/:courseId', async (req, res) => {
});
app.post('/api/scrape-all-layouts', async (req, res) => {
// Increase timeout for bulk scraping operations
req.setTimeout(1800000); // 30 minutes for bulk operations
res.setTimeout(1800000);
let browser = null;
try {
console.log('Starting bulk layout scraping for all courses...');