aboutsummaryrefslogtreecommitdiff
path: root/backend/hist_data/reduce_event_data.py
diff options
context:
space:
mode:
authorTerry Truong <terry06890@gmail.com>2022-10-05 22:45:52 +1100
committerTerry Truong <terry06890@gmail.com>2022-10-05 22:54:51 +1100
commit141f310d87f4fd7c3a4c728bf278c10fadc19606 (patch)
tree8ad4954c04d0b01bd5ef894c626ccaec8158cfc4 /backend/hist_data/reduce_event_data.py
parentdf14a1112e28597483de86619dcbd57dc5b15db7 (diff)
Add reduce_event_data.py
Add unit test, update README
Diffstat (limited to 'backend/hist_data/reduce_event_data.py')
-rwxr-xr-xbackend/hist_data/reduce_event_data.py39
1 files changed, 39 insertions, 0 deletions
diff --git a/backend/hist_data/reduce_event_data.py b/backend/hist_data/reduce_event_data.py
new file mode 100755
index 0000000..15c2ab5
--- /dev/null
+++ b/backend/hist_data/reduce_event_data.py
@@ -0,0 +1,39 @@
+#!/usr/bin/python3
+
+"""
+Delete extraneous events from the database that have no image (and consequently no description)
+"""
+
+import argparse
+import sqlite3
+
+DB_FILE = 'data.db'
+
+def reduceData(dbFile: str) -> None:
+ dbCon = sqlite3.connect(dbFile)
+ dbCur = dbCon.cursor()
+ #
+ print('Getting events to delete')
+ eventsToDel = set()
+ query = 'SELECT events.id FROM events LEFT JOIN event_imgs ON events.id = event_imgs.id WHERE event_imgs.id IS NULL'
+ for (eventId,) in dbCur.execute(query):
+ eventsToDel.add(eventId)
+ #
+ print('Deleting events')
+ iterNum = 0
+ for eventId in eventsToDel:
+ iterNum += 1
+ if iterNum % 1e5 == 0:
+ print(f'At iteration {iterNum}')
+ #
+ dbCur.execute('DELETE from events where id = ?', (eventId,))
+ dbCur.execute('DELETE from pop where id = ?', (eventId,))
+ #
+ dbCon.commit()
+ dbCon.close()
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+ args = parser.parse_args()
+ #
+ reduceData(DB_FILE)