aboutsummaryrefslogtreecommitdiff
path: root/backend/hist_data/gen_disp_data.py
diff options
context:
space:
mode:
Diffstat (limited to 'backend/hist_data/gen_disp_data.py')
-rwxr-xr-x[-rw-r--r--]backend/hist_data/gen_disp_data.py53
1 files changed, 44 insertions, 9 deletions
diff --git a/backend/hist_data/gen_disp_data.py b/backend/hist_data/gen_disp_data.py
index a81263f..d796d92 100644..100755
--- a/backend/hist_data/gen_disp_data.py
+++ b/backend/hist_data/gen_disp_data.py
@@ -1,15 +1,18 @@
#!/usr/bin/python3
"""
-Adds data about event distribution and displayability to the database.
+Adds data about event distribution to the database,
+and removes events not eligible for display
"""
-# Enable unit testing code to, when running this script, resolve imports of modules within this directory
+# Code used in unit testing (for resolving imports of modules within this directory)
import os, sys
parentDir = os.path.dirname(os.path.realpath(__file__))
sys.path.append(parentDir)
-
+# Standard imports
+import argparse
import sqlite3
+# Local imports
from cal import SCALES, dbDateToHistDate, dateToUnit
MAX_DISPLAYED_PER_UNIT = 4
@@ -23,12 +26,12 @@ def genData(dbFile: str, scales: list[int], maxDisplayedPerUnit: int) -> None:
scaleUnitToCounts: dict[tuple[int, int], list[int]] = {}
# Maps scale and unit to two counts (num events in that unit, num events displayable for that unit)
# Only includes events with popularity values
- idScales: set[tuple[int, int]] = set() # Maps event ids to scales they are displayable on
+ idScales: dict[int, list[int]] = {} # Maps event ids to scales they are displayable on
iterNum = 0
query = 'SELECT events.id, start, fmt FROM events INNER JOIN pop ON events.id = pop.id ORDER BY pop.pop DESC'
for eventId, eventStart, fmt in dbCur.execute(query):
iterNum += 1
- if iterNum % 1e3 == 0:
+ if iterNum % 1e5 == 0:
print(f'At iteration {iterNum}')
# For each scale
for scale in scales:
@@ -42,23 +45,55 @@ def genData(dbFile: str, scales: list[int], maxDisplayedPerUnit: int) -> None:
counts = [1, 0]
if counts[1] < maxDisplayedPerUnit:
counts[1] += 1
- idScales.add((eventId, scale))
+ if eventId not in idScales:
+ idScales[eventId] = []
+ idScales[eventId].append(scale)
scaleUnitToCounts[(scale, unit)] = counts
+ print(f'Results: {len(idScales)} displayable events')
+ #
+ print('Looking for non-displayable events')
+ eventsToDel: list[int] = []
+ for eventId, eventStart, fmt in dbCur.execute(query):
+ if eventId in idScales:
+ continue
+ eventsToDel.append(eventId)
+ # Remove from data to be added to 'dist'
+ for scale in scales:
+ unit = dateToUnit(dbDateToHistDate(eventStart, fmt), scale)
+ count = scaleUnitToCounts[(scale, unit)][0] - 1
+ if count == 0:
+ del scaleUnitToCounts[(scale, unit)]
+ else:
+ scaleUnitToCounts[(scale, unit)][0] = count
+ for (eventId,) in dbCur.execute( # Find events without scores
+ 'SELECT events.id FROM events LEFT JOIN pop ON events.id = pop.id WHERE pop.id IS NULL'):
+ eventsToDel.append(eventId)
+ print(f'Found {len(eventsToDel)}')
+ #
+ print(f'Deleting {len(eventsToDel)} events')
+ iterNum = 0
+ for eventId in eventsToDel:
+ iterNum += 1
+ if iterNum % 1e5 == 0:
+ print(f'At iteration {iterNum}')
+ #
+ dbCur.execute('DELETE FROM events WHERE id = ?', (eventId,))
+ dbCur.execute('DELETE FROM pop WHERE id = ?', (eventId,))
#
print('Writing to db')
dbCur.execute('CREATE TABLE dist (scale INT, unit INT, count INT, PRIMARY KEY (scale, unit))')
for (scale, unit), (count, _) in scaleUnitToCounts.items():
dbCur.execute('INSERT INTO dist VALUES (?, ?, ?)', (scale, unit, count))
dbCur.execute('CREATE TABLE event_disp (id INT, scale INT, PRIMARY KEY (id, scale))')
- for eventId, scale in idScales:
- dbCur.execute('INSERT INTO event_disp VALUES (?, ?)', (eventId, scale))
+ for eventId, scales in idScales.items():
+ for scale in scales:
+ dbCur.execute('INSERT INTO event_disp VALUES (?, ?)', (eventId, scale))
#
print('Closing db')
dbCon.commit()
dbCon.close()
if __name__ == '__main__':
- import argparse
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
args = parser.parse_args()
#