Add automatic database population from PDGA numbers file at startup

- Read pdga-numbers.txt at server startup and check for missing players
- Automatically scrape and populate any missing players into database
- Maintain respectful 2-second delays between PDGA requests
- Add comprehensive logging for population process
- Include new API endpoints for manual database population and status checking
- Ensure database is fully populated before server accepts requests

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Samuel Enocsson
2025-08-18 09:49:54 +02:00
parent 1a5b3b9fb4
commit 765917d495
+119 -12
View File
@@ -94,6 +94,62 @@ function initializeDatabase() {
});
}
// Check and populate database from PDGA numbers file at startup
async function checkAndPopulateDatabase() {
try {
console.log('=== Checking database population against PDGA numbers file ===');
const pdgaNumbers = fs.readFileSync('pdga-numbers.txt', 'utf-8')
.split('\n')
.map(num => num.trim())
.filter(num => num);
console.log(`Found ${pdgaNumbers.length} PDGA numbers in file`);
const missingPlayers = [];
// Check which players are missing from database
for (const pdgaNumber of pdgaNumbers) {
const player = await getPlayerFromDB(pdgaNumber);
if (!player) {
missingPlayers.push(pdgaNumber);
}
}
if (missingPlayers.length === 0) {
console.log('✓ All players from PDGA numbers file are already in database');
return;
}
console.log(`Found ${missingPlayers.length} missing players: [${missingPlayers.join(', ')}]`);
console.log('=== Starting automatic population of missing players ===');
// Populate missing players
for (let i = 0; i < missingPlayers.length; i++) {
const pdgaNumber = missingPlayers[i];
console.log(`[${i + 1}/${missingPlayers.length}] Scraping missing player PDGA ${pdgaNumber}...`);
try {
const playerData = await scrapePDGARating(pdgaNumber);
console.log(`✓ Added PDGA ${pdgaNumber}: ${playerData.name}`);
// Delay between requests to be respectful to PDGA
if (i < missingPlayers.length - 1) {
console.log('Waiting 2s before next request...');
await new Promise(resolve => setTimeout(resolve, 2000));
}
} catch (error) {
console.error(`✗ Failed to add PDGA ${pdgaNumber}: ${error.message}`);
}
}
console.log('=== Database population complete ===');
} catch (error) {
console.error('Error during database population check:', error.message);
}
}
// Database helper functions
function getPlayerFromDB(pdgaNumber) {
return new Promise((resolve, reject) => {
@@ -1399,8 +1455,8 @@ async function getAllRatingsFromDB(progressCallback = null) {
current: i + 1,
total,
pdgaNumber,
status: 'completed',
name: playerData.name
status: playerData ? 'completed' : 'skipped',
name: playerData ? playerData.name : 'Not in DB'
});
}
} catch (error) {
@@ -1472,6 +1528,58 @@ app.get('/api/ratings/progress', (req, res) => {
});
});
// Endpoint to populate database from PDGA numbers file
app.post('/api/populate-database', (req, res) => {
res.writeHead(200, {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Access-Control-Allow-Origin': '*',
});
const progressCallback = (progress) => {
res.write(`data: ${JSON.stringify(progress)}\n\n`);
};
console.log('=== Starting database population from PDGA numbers file ===');
// Use the scraping function to populate database
getAllRatingsWithScraping(progressCallback).then(ratings => {
console.log(`=== Database population complete: ${ratings.length} players added ===`);
res.write(`data: ${JSON.stringify({ status: 'complete', ratings, message: `Successfully populated database with ${ratings.length} players` })}\n\n`);
res.end();
}).catch(error => {
console.error('Error populating database:', error);
res.write(`data: ${JSON.stringify({ status: 'error', message: error.message })}\n\n`);
res.end();
});
});
// Simple endpoint to check if database needs population
app.get('/api/database-status', async (req, res) => {
try {
const pdgaNumbers = fs.readFileSync('pdga-numbers.txt', 'utf-8')
.split('\n')
.map(num => num.trim())
.filter(num => num);
let playersInDB = 0;
for (const pdgaNumber of pdgaNumbers) {
const player = await getPlayerFromDB(pdgaNumber);
if (player) playersInDB++;
}
res.json({
totalExpected: pdgaNumbers.length,
playersInDB: playersInDB,
needsPopulation: playersInDB === 0,
populationProgress: Math.round((playersInDB / pdgaNumbers.length) * 100)
});
} catch (error) {
res.status(500).json({ error: 'Failed to check database status' });
}
});
app.get('/api/load-all-players', (req, res) => {
res.writeHead(200, {
'Content-Type': 'text/event-stream',
@@ -1524,25 +1632,21 @@ async function getAllRatingsWithScraping(progressCallback = null) {
}
try {
const playerData = await getPlayerDataFromDB(pdgaNumber);
if (playerData) {
const playerData = await scrapePDGARating(pdgaNumber);
ratings.push(playerData);
} else {
console.log(`PDGA ${pdgaNumber} not found in DB - skipping`);
// Skip players not in DB instead of scraping
}
if (progressCallback) {
progressCallback({
current: i + 1,
total,
pdgaNumber,
status: playerData ? 'completed' : 'skipped',
name: playerData ? playerData.name : 'Not in DB'
status: 'completed',
name: playerData.name
});
}
// No delay needed - just reading from DB
// Delay between PDGA scraping requests to be respectful
await new Promise(resolve => setTimeout(resolve, 2000));
} catch (error) {
console.error(`Failed to scrape PDGA ${pdgaNumber}:`, error.message);
const errorData = {
@@ -2136,7 +2240,10 @@ async function testPDGARateLimit() {
// testPDGARateLimit();
// Initialize database and start server
initializeDatabase().then(() => {
initializeDatabase().then(async () => {
// Check and populate missing players from PDGA numbers file
await checkAndPopulateDatabase();
app.listen(PORT, () => {
console.log(`PDGA Ratings app running on http://localhost:${PORT}`);
});