diff options
Diffstat (limited to 'backend')
| -rw-r--r-- | backend/hist_data/README.md | 10 | ||||
| -rw-r--r-- | backend/hist_data/gen_disp_data.py (renamed from backend/hist_data/gen_score_data.py) | 16 | ||||
| -rwxr-xr-x | backend/histplorer.py | 15 | ||||
| -rw-r--r-- | backend/tests/test_gen_disp_data.py (renamed from backend/tests/test_gen_score_data.py) | 38 | ||||
| -rw-r--r-- | backend/tests/test_histplorer.py | 18 |
5 files changed, 49 insertions, 48 deletions
diff --git a/backend/hist_data/README.md b/backend/hist_data/README.md index 1a832ef..b557b14 100644 --- a/backend/hist_data/README.md +++ b/backend/hist_data/README.md @@ -33,9 +33,9 @@ This directory holds files used to generate the history database data.db. - `dist`: <br> Format: `scale INT, unit INT, count INT, PRIMARY KEY (scale, unit)` <br> Maps scale units to event counts. -- `scores`: <br> - Format: `id INT, scale INT, score INT, PRIMARY KEY (id, scale)` <br> - Maps events to score values for each scale (used to show events by popularity and uniformity across scale). +- `event_disp`: <br> + Format: `id INT, scale INT, PRIMARY KEY (id, scale)` <br> + Maps events to scales they are 'displayable' on (used to make displayed events more uniform across time). # Generating the Database @@ -83,5 +83,5 @@ Some of the scripts use third-party packages: ## Remove Events Without Images/Descs 1. Run `reduce_event_data.py` to remove data for events that have no image/description. -## Generate Distribution and Score Data -1. Run `gen_score_data.py`, which add the `dist` and `scores` tables. +## Generate Distribution and Displayability Data +1. Run `gen_disp_data.py`, which add the `dist` and `event_disp` tables. diff --git a/backend/hist_data/gen_score_data.py b/backend/hist_data/gen_disp_data.py index 4ea66c3..e425efc 100644 --- a/backend/hist_data/gen_score_data.py +++ b/backend/hist_data/gen_disp_data.py @@ -1,7 +1,7 @@ #!/usr/bin/python3 """ -Adds data about event distribution and scores to the database. +Adds data about event distribution and displayability to the database. """ # Enable unit testing code to, when running this script, resolve imports of modules within this directory @@ -27,10 +27,10 @@ def genData(dbFile: str, scales: list[int], maxDisplayedPerUnit: int) -> None: scaleUnitToCounts: dict[tuple[int, int], list[int]] = {} # Maps scale and unit to two counts (num events in that unit, num events displayable for that unit) # Only includes events with popularity values - idAndScaleToScore: dict[tuple[int, int], int] = {} # Maps event id and scale to score + idScales: set[tuple[int, int]] = set() # Maps event ids to scales they are displayable on iterNum = 0 - query = 'SELECT events.id, start, fmt, pop FROM events INNER JOIN pop ON events.id = pop.id ORDER BY pop.pop DESC' - for eventId, eventStart, fmt, pop in dbCur.execute(query): + query = 'SELECT events.id, start, fmt FROM events INNER JOIN pop ON events.id = pop.id ORDER BY pop.pop DESC' + for eventId, eventStart, fmt in dbCur.execute(query): iterNum += 1 if iterNum % 1e3 == 0: print(f'At iteration {iterNum}') @@ -57,16 +57,16 @@ def genData(dbFile: str, scales: list[int], maxDisplayedPerUnit: int) -> None: counts = [1, 0] if counts[1] < maxDisplayedPerUnit: counts[1] += 1 - idAndScaleToScore[(eventId, scale)] = pop + idScales.add((eventId, scale)) scaleUnitToCounts[(scale, unit)] = counts # print('Writing to db') dbCur.execute('CREATE TABLE dist (scale INT, unit INT, count INT, PRIMARY KEY (scale, unit))') - dbCur.execute('CREATE TABLE scores (id INT, scale INT, score INT, PRIMARY KEY (id, scale))') for (scale, unit), (count, _) in scaleUnitToCounts.items(): dbCur.execute('INSERT INTO dist VALUES (?, ?, ?)', (scale, unit, count)) - for (eventId, scale), score in idAndScaleToScore.items(): - dbCur.execute('INSERT INTO scores VALUES (?, ?, ?)', (eventId, scale, score)) + dbCur.execute('CREATE TABLE event_disp (id INT, scale INT, PRIMARY KEY (id, scale))') + for eventId, scale in idScales: + dbCur.execute('INSERT INTO event_disp VALUES (?, ?)', (eventId, scale)) # print('Closing db') dbCon.commit() diff --git a/backend/histplorer.py b/backend/histplorer.py index 09aabd8..72ef88e 100755 --- a/backend/histplorer.py +++ b/backend/histplorer.py @@ -11,7 +11,7 @@ Expected HTTP query parameters: Examples: range=1000.1910-10-09 means '1000 CE to 09/10/1910 (inclusive)' range=-13000. means '13000 BCE onwards' -- scale: With type=events, specifies a date scale (matched against 'scale' column in 'scores' table) +- scale: With type=events, specifies a date scale (matched against 'scale' column in 'event_disp' table) - incl: With type=events, specifies an event to include, as an event ID - event: With type=info, specifies the event to get info for - input: With type=sugg, specifies a search string to suggest for @@ -214,11 +214,12 @@ def lookupEvents(start: HistDate | None, end: HistDate | None, scale: int, ctg: """ Looks for events within a date range, in given scale, restricted by event category, an optional particular inclusion, and a result limit """ query = \ - 'SELECT events.id, title, start, start_upper, end, end_upper, fmt, ctg, images.id, scores.score FROM events' \ - ' INNER JOIN scores ON events.id = scores.id' \ + 'SELECT events.id, title, start, start_upper, end, end_upper, fmt, ctg, images.id, pop.pop FROM events' \ + ' INNER JOIN event_disp ON events.id = event_disp.id' \ + ' INNER JOIN pop ON events.id = pop.id' \ ' INNER JOIN event_imgs ON events.id = event_imgs.id' \ ' INNER JOIN images ON event_imgs.img_id = images.id' - constraints = ['scores.scale = ?'] + constraints = ['event_disp.scale = ?'] params: list[str | int] = [scale] # Constrain by start/end if start is not None: @@ -251,7 +252,7 @@ def lookupEvents(start: HistDate | None, end: HistDate | None, scale: int, ctg: query2 = query if constraints: query2 += ' WHERE ' + ' AND '.join(constraints) - query2 += ' ORDER BY scores.score DESC' + query2 += ' ORDER BY pop.pop DESC' query2 += f' LIMIT {resultLimit}' # Run query results: list[Event] = [] @@ -270,7 +271,7 @@ def lookupEvents(start: HistDate | None, end: HistDate | None, scale: int, ctg: return results def eventEntryToResults( row: tuple[int, str, int, int | None, int | None, int | None, int, str, int, int | None]) -> Event: - eventId, title, start, startUpper, end, endUpper, fmt, ctg, imageId, score = row + eventId, title, start, startUpper, end, endUpper, fmt, ctg, imageId, pop = row """ Helper for converting an 'events' db entry into an Event object """ # Convert dates dateVals: list[int | None] = [start, startUpper, end, endUpper] @@ -293,7 +294,7 @@ def eventEntryToResults( else: newDates[i] = HistDate(True, *jdnToGregorian(n)) # - return Event(eventId, title, newDates[0], newDates[1], newDates[2], newDates[3], ctg, imageId, score) + return Event(eventId, title, newDates[0], newDates[1], newDates[2], newDates[3], ctg, imageId, pop) # For type=info def handleInfoReq(params: dict[str, str], dbCur: sqlite3.Cursor): diff --git a/backend/tests/test_gen_score_data.py b/backend/tests/test_gen_disp_data.py index a466679..b806958 100644 --- a/backend/tests/test_gen_score_data.py +++ b/backend/tests/test_gen_disp_data.py @@ -2,7 +2,7 @@ import unittest import tempfile, os from tests.common import createTestDbTable, readTestDbTable -from hist_data.gen_score_data import genData, MONTH_SCALE, DAY_SCALE +from hist_data.gen_disp_data import genData, MONTH_SCALE, DAY_SCALE from hist_data.cal import gregorianToJdn class TestGenData(unittest.TestCase): @@ -59,24 +59,24 @@ class TestGenData(unittest.TestCase): } ) self.assertEqual( - readTestDbTable(dbFile, 'SELECT id, scale, score FROM scores'), + readTestDbTable(dbFile, 'SELECT id, scale FROM event_disp'), { - (5, 10, 50), - (1, 10, 11), - (2, 10, 21), - (5, 1, 50), - (1, 1, 11), - (4, 1, 5), - (2, 1, 21), - (1, MONTH_SCALE, 11), - (6, MONTH_SCALE, 10), - (4, MONTH_SCALE, 5), - (5, MONTH_SCALE, 50), - (2, MONTH_SCALE, 21), - (1, DAY_SCALE, 11), - (4, DAY_SCALE, 5), - (5, DAY_SCALE, 50), - (6, DAY_SCALE, 10), - (2, DAY_SCALE, 21), + (5, 10), + (1, 10), + (2, 10), + (5, 1), + (1, 1), + (4, 1), + (2, 1), + (1, MONTH_SCALE), + (6, MONTH_SCALE), + (4, MONTH_SCALE), + (5, MONTH_SCALE), + (2, MONTH_SCALE), + (1, DAY_SCALE), + (4, DAY_SCALE), + (5, DAY_SCALE), + (6, DAY_SCALE), + (2, DAY_SCALE), } ) diff --git a/backend/tests/test_histplorer.py b/backend/tests/test_histplorer.py index eb89f50..a2b4623 100644 --- a/backend/tests/test_histplorer.py +++ b/backend/tests/test_histplorer.py @@ -34,16 +34,16 @@ def initTestDb(dbFile: str) -> None: ) createTestDbTable( dbFile, - 'CREATE TABLE scores (id INT, scale INT, score INT, PRIMARY KEY (id, scale))', - 'INSERT INTO scores VALUES (?, ?, ?)', + 'CREATE TABLE event_disp (id INT, scale INT, PRIMARY KEY (id, scale))', + 'INSERT INTO event_disp VALUES (?, ?)', { - (1, 1, 11), - (1, 10, 11), - (2, 1, 21), - (3, 1, 0), - (4, 1, 1000), - (5, 1, 51), - (6, 10, 60), + (1, 1), + (1, 10), + (2, 1), + (3, 1), + (4, 1), + (5, 1), + (6, 10), } ) createTestDbTable( |
