diff options
Diffstat (limited to 'backend/hist_data/gen_disp_data.py')
| -rw-r--r-- | backend/hist_data/gen_disp_data.py | 45 |
1 files changed, 39 insertions, 6 deletions
diff --git a/backend/hist_data/gen_disp_data.py b/backend/hist_data/gen_disp_data.py index a81263f..e771e57 100644 --- a/backend/hist_data/gen_disp_data.py +++ b/backend/hist_data/gen_disp_data.py @@ -1,7 +1,7 @@ #!/usr/bin/python3 """ -Adds data about event distribution and displayability to the database. +Adds data about event distribution to the database, and removes events not eligible for display. """ # Enable unit testing code to, when running this script, resolve imports of modules within this directory @@ -23,12 +23,12 @@ def genData(dbFile: str, scales: list[int], maxDisplayedPerUnit: int) -> None: scaleUnitToCounts: dict[tuple[int, int], list[int]] = {} # Maps scale and unit to two counts (num events in that unit, num events displayable for that unit) # Only includes events with popularity values - idScales: set[tuple[int, int]] = set() # Maps event ids to scales they are displayable on + idScales: dict[int, list[int]] = {} # Maps event ids to scales they are displayable on iterNum = 0 query = 'SELECT events.id, start, fmt FROM events INNER JOIN pop ON events.id = pop.id ORDER BY pop.pop DESC' for eventId, eventStart, fmt in dbCur.execute(query): iterNum += 1 - if iterNum % 1e3 == 0: + if iterNum % 1e5 == 0: print(f'At iteration {iterNum}') # For each scale for scale in scales: @@ -42,16 +42,49 @@ def genData(dbFile: str, scales: list[int], maxDisplayedPerUnit: int) -> None: counts = [1, 0] if counts[1] < maxDisplayedPerUnit: counts[1] += 1 - idScales.add((eventId, scale)) + if eventId not in idScales: + idScales[eventId] = [] + idScales[eventId].append(scale) scaleUnitToCounts[(scale, unit)] = counts + print(f'Results: {len(idScales)} displayable events') + # + print('Looking for non-displayable events') + eventsToDel: list[int] = [] + for eventId, eventStart, fmt in dbCur.execute(query): + if eventId in idScales: + continue + eventsToDel.append(eventId) + # Remove from data to be added to 'dist' + for scale in scales: + unit = dateToUnit(dbDateToHistDate(eventStart, fmt), scale) + count = scaleUnitToCounts[(scale, unit)][0] - 1 + if count == 0: + del scaleUnitToCounts[(scale, unit)] + else: + scaleUnitToCounts[(scale, unit)][0] = count + query2 = 'SELECT events.id FROM events LEFT JOIN pop ON events.id = pop.id WHERE pop.id IS NULL' + for (eventId,) in dbCur.execute(query2): # Include events without scores + eventsToDel.append(eventId) + print(f'Found {len(eventsToDel)}') + # + print(f'Deleting {len(eventsToDel)} events') + iterNum = 0 + for eventId in eventsToDel: + iterNum += 1 + if iterNum % 1e5 == 0: + print(f'At iteration {iterNum}') + # + dbCur.execute('DELETE FROM events WHERE id = ?', (eventId,)) + dbCur.execute('DELETE FROM pop WHERE id = ?', (eventId,)) # print('Writing to db') dbCur.execute('CREATE TABLE dist (scale INT, unit INT, count INT, PRIMARY KEY (scale, unit))') for (scale, unit), (count, _) in scaleUnitToCounts.items(): dbCur.execute('INSERT INTO dist VALUES (?, ?, ?)', (scale, unit, count)) dbCur.execute('CREATE TABLE event_disp (id INT, scale INT, PRIMARY KEY (id, scale))') - for eventId, scale in idScales: - dbCur.execute('INSERT INTO event_disp VALUES (?, ?)', (eventId, scale)) + for eventId, scales in idScales.items(): + for scale in scales: + dbCur.execute('INSERT INTO event_disp VALUES (?, ?)', (eventId, scale)) # print('Closing db') dbCon.commit() |
