From ebe74b640adaed5382046a49b1c2d6a48b31ebfd Mon Sep 17 00:00:00 2001 From: Terry Truong Date: Sat, 14 Jan 2023 22:03:57 +1100 Subject: Add img_dist and img_disp db tables Having separate event counts for events with images resolves some redundant server querying, and makes event count indicators moree representative. --- backend/hist_data/README.md | 7 +++++++ backend/hist_data/gen_disp_data.py | 41 ++++++++++++++++++++++---------------- 2 files changed, 31 insertions(+), 17 deletions(-) (limited to 'backend/hist_data') diff --git a/backend/hist_data/README.md b/backend/hist_data/README.md index c947aa6..50108e0 100644 --- a/backend/hist_data/README.md +++ b/backend/hist_data/README.md @@ -27,6 +27,10 @@ This directory holds files used to generate the history database data.db. - `event_disp`:
Format: `id INT, scale INT, unit INT, PRIMARY KEY (id, scale)`
Maps events to scales+units they are 'displayable' on (used to make displayed events more uniform across time). +- `img_dist`:
+ Like `dist`, but only counts events with images. +- `img_disp`:
+ Like `events_disp`, but only counts events with images. - `images`:
Format: `id INT PRIMARY KEY, url TEXT, license TEXT, artist TEXT, credit TEXT`
Holds metadata for available images @@ -84,3 +88,6 @@ Some of the scripts use third-party packages: 1. Additional events can be described in `picked/events.json`, with images for them put in `picked` (see the README for details). 1. Can run `gen_picked_data.py` to add those described events to the database. + +## Generation Event Image Display Data +1. Run `gen_disp_data.py img`, which adds the `img_dist` and `img_disp` tables. diff --git a/backend/hist_data/gen_disp_data.py b/backend/hist_data/gen_disp_data.py index e8b2bf4..193adbb 100755 --- a/backend/hist_data/gen_disp_data.py +++ b/backend/hist_data/gen_disp_data.py @@ -18,7 +18,7 @@ from cal import SCALES, dbDateToHistDate, dateToUnit MAX_DISPLAYED_PER_UNIT = 4 DB_FILE = 'data.db' -def genData(dbFile: str, scales: list[int], maxDisplayedPerUnit: int) -> None: +def genData(dbFile: str, scales: list[int], maxDisplayedPerUnit: int, forImageTables: bool) -> None: dbCon = sqlite3.connect(dbFile) dbCur = dbCon.cursor() # @@ -28,7 +28,9 @@ def genData(dbFile: str, scales: list[int], maxDisplayedPerUnit: int) -> None: # Only includes events with popularity values idScales: dict[int, list[tuple[int, int]]] = {} # Maps event ids to scales+units they are displayable on iterNum = 0 - query = 'SELECT events.id, start, fmt FROM events INNER JOIN pop ON events.id = pop.id ORDER BY pop.pop DESC' + query = 'SELECT events.id, start, fmt FROM events INNER JOIN pop ON events.id = pop.id' \ + + ('' if not forImageTables else ' INNER JOIN event_imgs ON events.id = event_imgs.id') \ + + ' ORDER BY pop.pop DESC' for eventId, eventStart, fmt in dbCur.execute(query): iterNum += 1 if iterNum % 1e5 == 0: @@ -70,25 +72,28 @@ def genData(dbFile: str, scales: list[int], maxDisplayedPerUnit: int) -> None: eventsToDel.append(eventId) print(f'Found {len(eventsToDel)}') # - print(f'Deleting {len(eventsToDel)} events') - iterNum = 0 - for eventId in eventsToDel: - iterNum += 1 - if iterNum % 1e5 == 0: - print(f'At iteration {iterNum}') - # - dbCur.execute('DELETE FROM events WHERE id = ?', (eventId,)) - dbCur.execute('DELETE FROM pop WHERE id = ?', (eventId,)) + if not forImageTables: + print(f'Deleting {len(eventsToDel)} events') + iterNum = 0 + for eventId in eventsToDel: + iterNum += 1 + if iterNum % 1e5 == 0: + print(f'At iteration {iterNum}') + # + dbCur.execute('DELETE FROM events WHERE id = ?', (eventId,)) + dbCur.execute('DELETE FROM pop WHERE id = ?', (eventId,)) # print('Writing to db') - dbCur.execute('CREATE TABLE dist (scale INT, unit INT, count INT, PRIMARY KEY (scale, unit))') + distTable = 'dist' if not forImageTables else 'img_dist' + dispTable = 'event_disp' if not forImageTables else 'img_disp' + dbCur.execute(f'CREATE TABLE {distTable} (scale INT, unit INT, count INT, PRIMARY KEY (scale, unit))') for (scale, unit), (count, _) in scaleUnitToCounts.items(): - dbCur.execute('INSERT INTO dist VALUES (?, ?, ?)', (scale, unit, count)) - dbCur.execute('CREATE TABLE event_disp (id INT, scale INT, unit INT, PRIMARY KEY (id, scale))') - dbCur.execute('CREATE INDEX event_disp_scale_unit_idx ON event_disp(scale, unit)') + dbCur.execute(f'INSERT INTO {distTable} VALUES (?, ?, ?)', (scale, unit, count)) + dbCur.execute(f'CREATE TABLE {dispTable} (id INT, scale INT, unit INT, PRIMARY KEY (id, scale))') + dbCur.execute(f'CREATE INDEX {dispTable}_scale_unit_idx ON event_disp(scale, unit)') for eventId, scaleUnits in idScales.items(): for [scale, unit] in scaleUnits: - dbCur.execute('INSERT INTO event_disp VALUES (?, ?, ?)', (eventId, scale, unit)) + dbCur.execute(f'INSERT INTO {dispTable} VALUES (?, ?, ?)', (eventId, scale, unit)) # print('Closing db') dbCon.commit() @@ -96,6 +101,8 @@ def genData(dbFile: str, scales: list[int], maxDisplayedPerUnit: int) -> None: if __name__ == '__main__': parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument( + 'type', nargs='?', choices=['event', 'img'], default='event', help='The type of tables to generate') args = parser.parse_args() # - genData(DB_FILE, SCALES, MAX_DISPLAYED_PER_UNIT) + genData(DB_FILE, SCALES, MAX_DISPLAYED_PER_UNIT, args.type == 'img') -- cgit v1.2.3