aboutsummaryrefslogtreecommitdiff
path: root/backend/hist_data
diff options
context:
space:
mode:
Diffstat (limited to 'backend/hist_data')
-rw-r--r--backend/hist_data/README.md3
-rwxr-xr-xbackend/hist_data/reduce_event_data.py39
2 files changed, 42 insertions, 0 deletions
diff --git a/backend/hist_data/README.md b/backend/hist_data/README.md
index 96c31b5..bfecc1e 100644
--- a/backend/hist_data/README.md
+++ b/backend/hist_data/README.md
@@ -73,3 +73,6 @@ Some of the scripts use third-party packages:
1. Additional events can be described in `picked/events.json`, with images for them put
in `picked` (see the README for details).
1. Can run `gen_picked_data.py` to add those described events to the database.
+
+## Remove Events Without Images/Descs
+1. Run `reduce_event_data.py` to remove data for events that have no image/description.
diff --git a/backend/hist_data/reduce_event_data.py b/backend/hist_data/reduce_event_data.py
new file mode 100755
index 0000000..15c2ab5
--- /dev/null
+++ b/backend/hist_data/reduce_event_data.py
@@ -0,0 +1,39 @@
+#!/usr/bin/python3
+
+"""
+Delete extraneous events from the database that have no image (and consequently no description)
+"""
+
+import argparse
+import sqlite3
+
+DB_FILE = 'data.db'
+
+def reduceData(dbFile: str) -> None:
+ dbCon = sqlite3.connect(dbFile)
+ dbCur = dbCon.cursor()
+ #
+ print('Getting events to delete')
+ eventsToDel = set()
+ query = 'SELECT events.id FROM events LEFT JOIN event_imgs ON events.id = event_imgs.id WHERE event_imgs.id IS NULL'
+ for (eventId,) in dbCur.execute(query):
+ eventsToDel.add(eventId)
+ #
+ print('Deleting events')
+ iterNum = 0
+ for eventId in eventsToDel:
+ iterNum += 1
+ if iterNum % 1e5 == 0:
+ print(f'At iteration {iterNum}')
+ #
+ dbCur.execute('DELETE from events where id = ?', (eventId,))
+ dbCur.execute('DELETE from pop where id = ?', (eventId,))
+ #
+ dbCon.commit()
+ dbCon.close()
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+ args = parser.parse_args()
+ #
+ reduceData(DB_FILE)