diff options
| author | Terry Truong <terry06890@gmail.com> | 2023-01-14 22:03:57 +1100 |
|---|---|---|
| committer | Terry Truong <terry06890@gmail.com> | 2023-01-14 22:03:57 +1100 |
| commit | ebe74b640adaed5382046a49b1c2d6a48b31ebfd (patch) | |
| tree | 8ca2b824d3667d9a6e5ef4e8b2d6b739f6692aaf | |
| parent | a8ea534f13cc23dfab25c6d856f9d9833dd000ba (diff) | |
Add img_dist and img_disp db tables
Having separate event counts for events with images resolves some
redundant server querying, and makes event count indicators moree
representative.
| -rw-r--r-- | backend/hist_data/README.md | 7 | ||||
| -rwxr-xr-x | backend/hist_data/gen_disp_data.py | 41 | ||||
| -rwxr-xr-x | backend/histplorer.py | 24 | ||||
| -rw-r--r-- | backend/tests/test_gen_disp_data.py | 49 |
4 files changed, 92 insertions, 29 deletions
diff --git a/backend/hist_data/README.md b/backend/hist_data/README.md index c947aa6..50108e0 100644 --- a/backend/hist_data/README.md +++ b/backend/hist_data/README.md @@ -27,6 +27,10 @@ This directory holds files used to generate the history database data.db. - `event_disp`: <br> Format: `id INT, scale INT, unit INT, PRIMARY KEY (id, scale)` <br> Maps events to scales+units they are 'displayable' on (used to make displayed events more uniform across time). +- `img_dist`: <br> + Like `dist`, but only counts events with images. +- `img_disp`: <br> + Like `events_disp`, but only counts events with images. - `images`: <br> Format: `id INT PRIMARY KEY, url TEXT, license TEXT, artist TEXT, credit TEXT` <br> Holds metadata for available images @@ -84,3 +88,6 @@ Some of the scripts use third-party packages: 1. Additional events can be described in `picked/events.json`, with images for them put in `picked` (see the README for details). 1. Can run `gen_picked_data.py` to add those described events to the database. + +## Generation Event Image Display Data +1. Run `gen_disp_data.py img`, which adds the `img_dist` and `img_disp` tables. diff --git a/backend/hist_data/gen_disp_data.py b/backend/hist_data/gen_disp_data.py index e8b2bf4..193adbb 100755 --- a/backend/hist_data/gen_disp_data.py +++ b/backend/hist_data/gen_disp_data.py @@ -18,7 +18,7 @@ from cal import SCALES, dbDateToHistDate, dateToUnit MAX_DISPLAYED_PER_UNIT = 4 DB_FILE = 'data.db' -def genData(dbFile: str, scales: list[int], maxDisplayedPerUnit: int) -> None: +def genData(dbFile: str, scales: list[int], maxDisplayedPerUnit: int, forImageTables: bool) -> None: dbCon = sqlite3.connect(dbFile) dbCur = dbCon.cursor() # @@ -28,7 +28,9 @@ def genData(dbFile: str, scales: list[int], maxDisplayedPerUnit: int) -> None: # Only includes events with popularity values idScales: dict[int, list[tuple[int, int]]] = {} # Maps event ids to scales+units they are displayable on iterNum = 0 - query = 'SELECT events.id, start, fmt FROM events INNER JOIN pop ON events.id = pop.id ORDER BY pop.pop DESC' + query = 'SELECT events.id, start, fmt FROM events INNER JOIN pop ON events.id = pop.id' \ + + ('' if not forImageTables else ' INNER JOIN event_imgs ON events.id = event_imgs.id') \ + + ' ORDER BY pop.pop DESC' for eventId, eventStart, fmt in dbCur.execute(query): iterNum += 1 if iterNum % 1e5 == 0: @@ -70,25 +72,28 @@ def genData(dbFile: str, scales: list[int], maxDisplayedPerUnit: int) -> None: eventsToDel.append(eventId) print(f'Found {len(eventsToDel)}') # - print(f'Deleting {len(eventsToDel)} events') - iterNum = 0 - for eventId in eventsToDel: - iterNum += 1 - if iterNum % 1e5 == 0: - print(f'At iteration {iterNum}') - # - dbCur.execute('DELETE FROM events WHERE id = ?', (eventId,)) - dbCur.execute('DELETE FROM pop WHERE id = ?', (eventId,)) + if not forImageTables: + print(f'Deleting {len(eventsToDel)} events') + iterNum = 0 + for eventId in eventsToDel: + iterNum += 1 + if iterNum % 1e5 == 0: + print(f'At iteration {iterNum}') + # + dbCur.execute('DELETE FROM events WHERE id = ?', (eventId,)) + dbCur.execute('DELETE FROM pop WHERE id = ?', (eventId,)) # print('Writing to db') - dbCur.execute('CREATE TABLE dist (scale INT, unit INT, count INT, PRIMARY KEY (scale, unit))') + distTable = 'dist' if not forImageTables else 'img_dist' + dispTable = 'event_disp' if not forImageTables else 'img_disp' + dbCur.execute(f'CREATE TABLE {distTable} (scale INT, unit INT, count INT, PRIMARY KEY (scale, unit))') for (scale, unit), (count, _) in scaleUnitToCounts.items(): - dbCur.execute('INSERT INTO dist VALUES (?, ?, ?)', (scale, unit, count)) - dbCur.execute('CREATE TABLE event_disp (id INT, scale INT, unit INT, PRIMARY KEY (id, scale))') - dbCur.execute('CREATE INDEX event_disp_scale_unit_idx ON event_disp(scale, unit)') + dbCur.execute(f'INSERT INTO {distTable} VALUES (?, ?, ?)', (scale, unit, count)) + dbCur.execute(f'CREATE TABLE {dispTable} (id INT, scale INT, unit INT, PRIMARY KEY (id, scale))') + dbCur.execute(f'CREATE INDEX {dispTable}_scale_unit_idx ON event_disp(scale, unit)') for eventId, scaleUnits in idScales.items(): for [scale, unit] in scaleUnits: - dbCur.execute('INSERT INTO event_disp VALUES (?, ?, ?)', (eventId, scale, unit)) + dbCur.execute(f'INSERT INTO {dispTable} VALUES (?, ?, ?)', (eventId, scale, unit)) # print('Closing db') dbCon.commit() @@ -96,6 +101,8 @@ def genData(dbFile: str, scales: list[int], maxDisplayedPerUnit: int) -> None: if __name__ == '__main__': parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument( + 'type', nargs='?', choices=['event', 'img'], default='event', help='The type of tables to generate') args = parser.parse_args() # - genData(DB_FILE, SCALES, MAX_DISPLAYED_PER_UNIT) + genData(DB_FILE, SCALES, MAX_DISPLAYED_PER_UNIT, args.type == 'img') diff --git a/backend/histplorer.py b/backend/histplorer.py index b42b731..f25e727 100755 --- a/backend/histplorer.py +++ b/backend/histplorer.py @@ -192,7 +192,7 @@ def handleEventsReq(params: dict[str, str], dbCur: sqlite3.Cursor) -> EventRespo imgonly = 'imgonly' in params # events = lookupEvents(start, end, scale, incl, resultLimit, ctgs, imgonly, dbCur) - unitCounts = lookupUnitCounts(start, end, scale, dbCur) + unitCounts = lookupUnitCounts(start, end, scale, imgonly, dbCur) return EventResponse(events, unitCounts) def reqParamToHistDate(s: str): """ Produces a HistDate from strings like '2010-10-3', '-8000', and '' (throws ValueError if invalid) """ @@ -210,27 +210,27 @@ def lookupEvents( ctgs: list[str] | None, imgonly: bool, dbCur: sqlite3.Cursor) -> list[Event]: """ Looks for events within a date range, in given scale, restricted by event category, an optional particular inclusion, and a result limit """ - imgJoin = 'INNER JOIN' if imgonly else 'LEFT JOIN' + dispTable = 'event_disp' if not imgonly else 'img_disp' query = \ 'SELECT events.id, title, start, start_upper, end, end_upper, fmt, ctg, images.id, pop.pop FROM events' \ - ' INNER JOIN event_disp ON events.id = event_disp.id' \ + f' INNER JOIN {dispTable} ON events.id = {dispTable}.id' \ ' INNER JOIN pop ON events.id = pop.id' \ - f' {imgJoin} event_imgs ON events.id = event_imgs.id' \ - f' {imgJoin} images ON event_imgs.img_id = images.id' - constraints = ['event_disp.scale = ?'] + ' LEFT JOIN event_imgs ON events.id = event_imgs.id' \ + ' LEFT JOIN images ON event_imgs.img_id = images.id' + constraints = [f'{dispTable}.scale = ?'] params: list[str | int] = [scale] # Constrain by start/end startUnit = dateToUnit(start, scale) if start is not None else None endUnit = dateToUnit(end, scale) if end is not None else None if startUnit is not None and startUnit == endUnit: - constraints.append('event_disp.unit = ?') + constraints.append(f'{dispTable}.unit = ?') params.append(startUnit) else: if startUnit is not None: - constraints.append('event_disp.unit >= ?') + constraints.append(f'{dispTable}.unit >= ?') params.append(startUnit) if endUnit is not None: - constraints.append('event_disp.unit < ?') + constraints.append(f'{dispTable}.unit < ?') params.append(endUnit) # Constrain by event category if ctgs is not None: @@ -272,9 +272,11 @@ def eventEntryToResults( # return Event(eventId, title, cast(HistDate, newDates[0]), newDates[1], newDates[2], newDates[3], ctg, imageId, pop) def lookupUnitCounts( - start: HistDate | None, end: HistDate | None, scale: int, dbCur: sqlite3.Cursor) -> dict[int, int] | None: + start: HistDate | None, end: HistDate | None, scale: int, + imgonly: bool, dbCur: sqlite3.Cursor) -> dict[int, int] | None: # Build query - query = 'SELECT unit, count FROM dist WHERE scale = ?' + distTable = 'dist' if not imgonly else 'img_dist' + query = f'SELECT unit, count FROM {distTable} WHERE scale = ?' params = [scale] if start: query += ' AND unit >= ?' diff --git a/backend/tests/test_gen_disp_data.py b/backend/tests/test_gen_disp_data.py index 792a9b2..0d54eb0 100644 --- a/backend/tests/test_gen_disp_data.py +++ b/backend/tests/test_gen_disp_data.py @@ -46,8 +46,21 @@ class TestGenData(unittest.TestCase): (11, 45), } ) + createTestDbTable( + dbFile, + 'CREATE TABLE event_imgs (id INT PRIMARY KEY, img_id INT)', + 'INSERT INTO event_imgs VALUES (?, ?)', + { + (1, 10), + (4, 40), + (5, 50), + (6, 60), + (7, 70), + } + ) # Run - genData(dbFile, [10, 1, MONTH_SCALE, DAY_SCALE], 2) + genData(dbFile, [10, 1, MONTH_SCALE, DAY_SCALE], 2, False) + genData(dbFile, [10, 1, MONTH_SCALE, DAY_SCALE], 2, True) # Check self.assertEqual( readTestDbTable(dbFile, 'SELECT * FROM events'), @@ -117,3 +130,37 @@ class TestGenData(unittest.TestCase): (2, DAY_SCALE, 2452607), } ) + self.assertEqual( + readTestDbTable(dbFile, 'SELECT scale, unit, count FROM img_dist'), + { + (10, 190, 5), + (1, 1900, 4), + (1, 1901, 1), + (MONTH_SCALE, gregorianToJdn(1900, 1, 1), 3), + (MONTH_SCALE, gregorianToJdn(1901, 1, 1), 1), + (MONTH_SCALE, julianToJdn(1900, 10, 1), 1), + (DAY_SCALE, gregorianToJdn(1900, 1, 1), 2), + (DAY_SCALE, gregorianToJdn(1900, 1, 10), 1), + (DAY_SCALE, julianToJdn(1900, 10, 1), 1), + (DAY_SCALE, gregorianToJdn(1901, 1, 1), 1), + } + ) + self.assertEqual( + readTestDbTable(dbFile, 'SELECT id, scale, unit FROM img_disp'), + { + (5, 10, 190), + (7, 10, 190), + (5, 1, 1900), + (7, 1, 1900), + (4, 1, 1901), + (1, MONTH_SCALE, 2415021), + (7, MONTH_SCALE, 2415021), + (4, MONTH_SCALE, 2415386), + (5, MONTH_SCALE, 2415307), + (1, DAY_SCALE, 2415021), + (7, DAY_SCALE, 2415021), + (6, DAY_SCALE, 2415030), + (4, DAY_SCALE, 2415386), + (5, DAY_SCALE, 2415307), + } + ) |
