fix: prevent course ID changes on re-scrape and add layout repair script
saveCourseToDB now uses ON CONFLICT DO UPDATE instead of INSERT OR REPLACE, which preserves the course ID and prevents orphaning of layout foreign keys. Added scripts/repair-layouts.js to reassign orphaned layouts to their correct courses by detecting the ID offset from re-scraping.
This commit is contained in:
@@ -0,0 +1,145 @@
|
|||||||
|
#!/usr/bin/env node
|
||||||
|
|
||||||
|
// Repairs orphaned layouts by reassigning them to the correct course.
|
||||||
|
//
|
||||||
|
// The problem: saveCourseToDB used INSERT OR REPLACE which deletes and
|
||||||
|
// re-inserts courses with new IDs. Layouts still reference the old IDs.
|
||||||
|
//
|
||||||
|
// Strategy: For each orphaned layout, find a course that has a matching
|
||||||
|
// layout (same name + par) from a valid course_id. If no match, try to
|
||||||
|
// find the course by looking at the gap between old and new course IDs
|
||||||
|
// (courses were likely re-scraped in the same order).
|
||||||
|
|
||||||
|
const path = require('path');
|
||||||
|
const dbPath = process.env.DB_PATH || './ratings.db';
|
||||||
|
const sqlite3 = require('sqlite3').verbose();
|
||||||
|
const db = new sqlite3.Database(dbPath);
|
||||||
|
|
||||||
|
function all(sql, params) {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
db.all(sql, params || [], (err, rows) => {
|
||||||
|
if (err) reject(err);
|
||||||
|
else resolve(rows);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function run(sql, params) {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
db.run(sql, params || [], function(err) {
|
||||||
|
if (err) reject(err);
|
||||||
|
else resolve(this.changes);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async function repair() {
|
||||||
|
// Find all orphaned layouts (course_id not in courses table)
|
||||||
|
const orphaned = await all(`
|
||||||
|
SELECT l.id, l.course_id, l.name, l.par, l.mean_rating, l.rating_count, l.last_calculated, l.last_played
|
||||||
|
FROM layouts l
|
||||||
|
LEFT JOIN courses c ON l.course_id = c.id
|
||||||
|
WHERE c.id IS NULL
|
||||||
|
`);
|
||||||
|
|
||||||
|
console.log('Orphaned layouts:', orphaned.length);
|
||||||
|
|
||||||
|
if (orphaned.length === 0) {
|
||||||
|
console.log('Nothing to repair!');
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get all valid courses
|
||||||
|
const courses = await all('SELECT id, name, link FROM courses ORDER BY id');
|
||||||
|
console.log('Valid courses:', courses.length);
|
||||||
|
|
||||||
|
// Get all valid layouts (to avoid duplicates)
|
||||||
|
const validLayouts = await all(`
|
||||||
|
SELECT l.course_id, l.name, l.par
|
||||||
|
FROM layouts l
|
||||||
|
JOIN courses c ON l.course_id = c.id
|
||||||
|
`);
|
||||||
|
|
||||||
|
const validSet = new Set(validLayouts.map(l => l.course_id + '|' + l.name + '|' + l.par));
|
||||||
|
|
||||||
|
// Group orphaned layouts by old course_id
|
||||||
|
const byOldId = {};
|
||||||
|
for (const l of orphaned) {
|
||||||
|
if (!byOldId[l.course_id]) byOldId[l.course_id] = [];
|
||||||
|
byOldId[l.course_id].push(l);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('Unique orphaned course_ids:', Object.keys(byOldId).length);
|
||||||
|
|
||||||
|
// Try to match: old course_ids likely map to current courses
|
||||||
|
// If courses were re-scraped in order, old_id and new_id have a fixed offset
|
||||||
|
// Let's try to find the offset by checking if shifting all old_ids by some value matches existing courses
|
||||||
|
const oldIds = Object.keys(byOldId).map(Number).sort((a, b) => a - b);
|
||||||
|
const courseIds = courses.map(c => c.id);
|
||||||
|
const courseIdSet = new Set(courseIds);
|
||||||
|
|
||||||
|
// Try different offsets
|
||||||
|
let bestOffset = 0;
|
||||||
|
let bestMatches = 0;
|
||||||
|
|
||||||
|
for (let offset = -1000; offset <= 1000; offset++) {
|
||||||
|
let matches = 0;
|
||||||
|
for (const oldId of oldIds) {
|
||||||
|
if (courseIdSet.has(oldId + offset)) matches++;
|
||||||
|
}
|
||||||
|
if (matches > bestMatches) {
|
||||||
|
bestMatches = matches;
|
||||||
|
bestOffset = offset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('Best offset:', bestOffset, '(matches', bestMatches, 'of', oldIds.length, 'orphaned course_ids)');
|
||||||
|
|
||||||
|
let repaired = 0;
|
||||||
|
let skippedDuplicate = 0;
|
||||||
|
let noMatch = 0;
|
||||||
|
|
||||||
|
for (const oldId of oldIds) {
|
||||||
|
const newId = oldId + bestOffset;
|
||||||
|
const layouts = byOldId[oldId];
|
||||||
|
|
||||||
|
if (!courseIdSet.has(newId)) {
|
||||||
|
noMatch += layouts.length;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const layout of layouts) {
|
||||||
|
const key = newId + '|' + layout.name + '|' + layout.par;
|
||||||
|
if (validSet.has(key)) {
|
||||||
|
// Duplicate — delete the orphaned one
|
||||||
|
await run('DELETE FROM layouts WHERE id = ?', [layout.id]);
|
||||||
|
skippedDuplicate++;
|
||||||
|
} else {
|
||||||
|
// Reassign to correct course
|
||||||
|
await run('UPDATE layouts SET course_id = ? WHERE id = ?', [newId, layout.id]);
|
||||||
|
validSet.add(key);
|
||||||
|
repaired++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('\nResults:');
|
||||||
|
console.log(' Repaired:', repaired);
|
||||||
|
console.log(' Deleted (duplicates):', skippedDuplicate);
|
||||||
|
console.log(' No match found:', noMatch);
|
||||||
|
|
||||||
|
// Verify
|
||||||
|
const remaining = await all(`
|
||||||
|
SELECT COUNT(*) as c FROM layouts l
|
||||||
|
LEFT JOIN courses c ON l.course_id = c.id
|
||||||
|
WHERE c.id IS NULL
|
||||||
|
`);
|
||||||
|
console.log(' Remaining orphans:', remaining[0].c);
|
||||||
|
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
repair().catch(err => {
|
||||||
|
console.error('Error:', err);
|
||||||
|
process.exit(1);
|
||||||
|
});
|
||||||
@@ -3,8 +3,9 @@ const { db } = require('../db');
|
|||||||
function saveCourseToDB(courseData) {
|
function saveCourseToDB(courseData) {
|
||||||
return new Promise((resolve, reject) => {
|
return new Promise((resolve, reject) => {
|
||||||
db.run(
|
db.run(
|
||||||
`INSERT OR REPLACE INTO courses (name, link, city, last_updated)
|
`INSERT INTO courses (name, link, city, last_updated)
|
||||||
VALUES (?, ?, ?, datetime('now'))`,
|
VALUES (?, ?, ?, datetime('now'))
|
||||||
|
ON CONFLICT(link) DO UPDATE SET name = excluded.name, city = excluded.city, last_updated = datetime('now')`,
|
||||||
[courseData.name, courseData.link, courseData.city],
|
[courseData.name, courseData.link, courseData.city],
|
||||||
function(err) {
|
function(err) {
|
||||||
if (err) reject(err);
|
if (err) reject(err);
|
||||||
|
|||||||
Reference in New Issue
Block a user