aboutsummaryrefslogtreecommitdiff
path: root/backend/hist_data/reduce_event_data.py
diff options
context:
space:
mode:
Diffstat (limited to 'backend/hist_data/reduce_event_data.py')
-rwxr-xr-xbackend/hist_data/reduce_event_data.py43
1 files changed, 34 insertions, 9 deletions
diff --git a/backend/hist_data/reduce_event_data.py b/backend/hist_data/reduce_event_data.py
index 15c2ab5..5801f4d 100755
--- a/backend/hist_data/reduce_event_data.py
+++ b/backend/hist_data/reduce_event_data.py
@@ -1,23 +1,44 @@
#!/usr/bin/python3
"""
-Delete extraneous events from the database that have no image (and consequently no description)
+Delete events from the database that have no image
"""
+# Code used in unit testing (for resolving imports of modules within this directory)
+import os, sys
+parentDir = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(parentDir)
+# Standard imports
import argparse
import sqlite3
+# Local imports
+from cal import SCALES, dbDateToHistDate, dateToUnit
DB_FILE = 'data.db'
-def reduceData(dbFile: str) -> None:
+def reduceData(dbFile: str, scales: list[int]) -> None:
dbCon = sqlite3.connect(dbFile)
dbCur = dbCon.cursor()
#
print('Getting events to delete')
- eventsToDel = set()
- query = 'SELECT events.id FROM events LEFT JOIN event_imgs ON events.id = event_imgs.id WHERE event_imgs.id IS NULL'
- for (eventId,) in dbCur.execute(query):
- eventsToDel.add(eventId)
+ eventsToDel: list[int] = []
+ scaleUnitToDelCount: dict[tuple[int, int], int] = {} # Stores counts to subtract from entries in 'dist'
+ query = 'SELECT events.id, events.start, events.fmt FROM events' \
+ ' LEFT JOIN event_imgs ON events.id = event_imgs.id WHERE event_imgs.id IS NULL'
+ iterNum = 0
+ for (eventId, start, fmt) in dbCur.execute(query):
+ if iterNum % 1e5 == 0:
+ print(f'At iteration {iterNum}')
+ #
+ eventsToDel.append(eventId)
+ date = dbDateToHistDate(start, fmt)
+ for scale in scales:
+ unit = dateToUnit(date, scale)
+ if (scale, unit) not in scaleUnitToDelCount:
+ scaleUnitToDelCount[(scale, unit)] = 1
+ else:
+ scaleUnitToDelCount[(scale, unit)] += 1
+ print(f'Found {len(eventsToDel)}')
#
print('Deleting events')
iterNum = 0
@@ -26,8 +47,12 @@ def reduceData(dbFile: str) -> None:
if iterNum % 1e5 == 0:
print(f'At iteration {iterNum}')
#
- dbCur.execute('DELETE from events where id = ?', (eventId,))
- dbCur.execute('DELETE from pop where id = ?', (eventId,))
+ dbCur.execute('DELETE FROM events WHERE id = ?', (eventId,))
+ dbCur.execute('DELETE FROM pop WHERE id = ?', (eventId,))
+ dbCur.execute('DELETE FROM event_disp WHERE id = ?', (eventId,))
+ for (scale, unit), delCount in scaleUnitToDelCount.items():
+ dbCur.execute('UPDATE dist SET count = count - ? WHERE scale = ? AND unit = ?', (delCount, scale, unit))
+ dbCur.execute('DELETE FROM dist WHERE count < 1')
#
dbCon.commit()
dbCon.close()
@@ -36,4 +61,4 @@ if __name__ == '__main__':
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
args = parser.parse_args()
#
- reduceData(DB_FILE)
+ reduceData(DB_FILE, SCALES)