speed up database pruning for throwaway copies
Disable crash-safety pragmas (synchronous=OFF, journal_mode=MEMORY), use a 200MB page cache, disable foreign keys to avoid per-row CASCADE overhead, bulk-delete junction tables explicitly, and wrap all deletes in a single transaction.
This commit is contained in:
parent
c31484f0cf
commit
3c890f0b83
@ -16,7 +16,9 @@ func PruneDatabase(dbPath string, targetDay time.Time) error {
|
|||||||
|
|
||||||
slog.Info("pruning database", "keep_from", dayStart, "keep_until", dayEnd)
|
slog.Info("pruning database", "keep_from", dayStart, "keep_until", dayEnd)
|
||||||
|
|
||||||
db, err := sql.Open("sqlite", dbPath+"?_pragma=journal_mode(WAL)&_pragma=foreign_keys(ON)")
|
// No foreign_keys — we handle junction tables manually to avoid per-row CASCADE overhead.
|
||||||
|
// journal_mode=MEMORY and synchronous=OFF are safe because this is a throwaway copy.
|
||||||
|
db, err := sql.Open("sqlite", dbPath+"?_pragma=journal_mode(MEMORY)&_pragma=synchronous(OFF)&_pragma=cache_size(-200000)")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("opening database: %w", err)
|
return fmt.Errorf("opening database: %w", err)
|
||||||
}
|
}
|
||||||
@ -42,18 +44,42 @@ func PruneDatabase(dbPath string, targetDay time.Time) error {
|
|||||||
targetDay.Format("2006-01-02"))
|
targetDay.Format("2006-01-02"))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Delete posts outside target day (CASCADE handles junction tables)
|
tx, err := db.Begin()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("beginning transaction: %w", err)
|
||||||
|
}
|
||||||
|
defer tx.Rollback()
|
||||||
|
|
||||||
|
// Delete junction table rows for posts outside target day (bulk, no CASCADE overhead)
|
||||||
|
slog.Info("deleting junction table rows for non-target posts")
|
||||||
|
keepSubquery := "SELECT id FROM posts WHERE timestamp >= ? AND timestamp < ?"
|
||||||
|
|
||||||
|
result, err := tx.Exec("DELETE FROM posts_hashtags WHERE post_id NOT IN ("+keepSubquery+")", dayStart, dayEnd)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("deleting posts_hashtags: %w", err)
|
||||||
|
}
|
||||||
|
deleted, _ := result.RowsAffected()
|
||||||
|
slog.Info("deleted posts_hashtags rows", "count", deleted)
|
||||||
|
|
||||||
|
result, err = tx.Exec("DELETE FROM posts_urls WHERE post_id NOT IN ("+keepSubquery+")", dayStart, dayEnd)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("deleting posts_urls: %w", err)
|
||||||
|
}
|
||||||
|
deleted, _ = result.RowsAffected()
|
||||||
|
slog.Info("deleted posts_urls rows", "count", deleted)
|
||||||
|
|
||||||
|
// Delete posts outside target day
|
||||||
slog.Info("deleting posts outside target day")
|
slog.Info("deleting posts outside target day")
|
||||||
result, err := db.Exec("DELETE FROM posts WHERE timestamp < ? OR timestamp >= ?", dayStart, dayEnd)
|
result, err = tx.Exec("DELETE FROM posts WHERE timestamp < ? OR timestamp >= ?", dayStart, dayEnd)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("deleting posts: %w", err)
|
return fmt.Errorf("deleting posts: %w", err)
|
||||||
}
|
}
|
||||||
deleted, _ := result.RowsAffected()
|
deleted, _ = result.RowsAffected()
|
||||||
slog.Info("deleted posts", "count", deleted)
|
slog.Info("deleted posts", "count", deleted)
|
||||||
|
|
||||||
// Clean up orphaned hashtags
|
// Clean up orphaned hashtags
|
||||||
slog.Info("cleaning orphaned hashtags")
|
slog.Info("cleaning orphaned hashtags")
|
||||||
result, err = db.Exec("DELETE FROM hashtags WHERE id NOT IN (SELECT DISTINCT hashtag_id FROM posts_hashtags)")
|
result, err = tx.Exec("DELETE FROM hashtags WHERE id NOT IN (SELECT DISTINCT hashtag_id FROM posts_hashtags)")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("deleting orphaned hashtags: %w", err)
|
return fmt.Errorf("deleting orphaned hashtags: %w", err)
|
||||||
}
|
}
|
||||||
@ -62,16 +88,16 @@ func PruneDatabase(dbPath string, targetDay time.Time) error {
|
|||||||
|
|
||||||
// Clean up orphaned urls
|
// Clean up orphaned urls
|
||||||
slog.Info("cleaning orphaned urls")
|
slog.Info("cleaning orphaned urls")
|
||||||
result, err = db.Exec("DELETE FROM urls WHERE id NOT IN (SELECT DISTINCT url_id FROM posts_urls)")
|
result, err = tx.Exec("DELETE FROM urls WHERE id NOT IN (SELECT DISTINCT url_id FROM posts_urls)")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("deleting orphaned urls: %w", err)
|
return fmt.Errorf("deleting orphaned urls: %w", err)
|
||||||
}
|
}
|
||||||
deleted, _ = result.RowsAffected()
|
deleted, _ = result.RowsAffected()
|
||||||
slog.Info("deleted orphaned urls", "count", deleted)
|
slog.Info("deleted orphaned urls", "count", deleted)
|
||||||
|
|
||||||
// Clear media (no FK to posts, not useful in daily segment)
|
// Clear media (no FK to posts, can't prune until post_id migration lands)
|
||||||
slog.Info("clearing media table")
|
slog.Info("clearing media table")
|
||||||
result, err = db.Exec("DELETE FROM media")
|
result, err = tx.Exec("DELETE FROM media")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("deleting media: %w", err)
|
return fmt.Errorf("deleting media: %w", err)
|
||||||
}
|
}
|
||||||
@ -80,13 +106,17 @@ func PruneDatabase(dbPath string, targetDay time.Time) error {
|
|||||||
|
|
||||||
// Clean up orphaned users
|
// Clean up orphaned users
|
||||||
slog.Info("cleaning orphaned users")
|
slog.Info("cleaning orphaned users")
|
||||||
result, err = db.Exec("DELETE FROM users WHERE did NOT IN (SELECT DISTINCT user_did FROM posts)")
|
result, err = tx.Exec("DELETE FROM users WHERE did NOT IN (SELECT DISTINCT user_did FROM posts)")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("deleting orphaned users: %w", err)
|
return fmt.Errorf("deleting orphaned users: %w", err)
|
||||||
}
|
}
|
||||||
deleted, _ = result.RowsAffected()
|
deleted, _ = result.RowsAffected()
|
||||||
slog.Info("deleted orphaned users", "count", deleted)
|
slog.Info("deleted orphaned users", "count", deleted)
|
||||||
|
|
||||||
|
if err := tx.Commit(); err != nil {
|
||||||
|
return fmt.Errorf("committing transaction: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
// Verify remaining count
|
// Verify remaining count
|
||||||
var remaining int64
|
var remaining int64
|
||||||
if err := db.QueryRow("SELECT COUNT(*) FROM posts").Scan(&remaining); err != nil {
|
if err := db.QueryRow("SELECT COUNT(*) FROM posts").Scan(&remaining); err != nil {
|
||||||
@ -98,7 +128,7 @@ func PruneDatabase(dbPath string, targetDay time.Time) error {
|
|||||||
slog.Info("posts remaining after prune", "count", remaining)
|
slog.Info("posts remaining after prune", "count", remaining)
|
||||||
|
|
||||||
// VACUUM to reclaim space
|
// VACUUM to reclaim space
|
||||||
slog.Info("running VACUUM (this may take a while)")
|
slog.Info("running VACUUM")
|
||||||
if _, err := db.Exec("VACUUM"); err != nil {
|
if _, err := db.Exec("VACUUM"); err != nil {
|
||||||
return fmt.Errorf("vacuum: %w", err)
|
return fmt.Errorf("vacuum: %w", err)
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user