diff options
Diffstat (limited to 'backend/hist_data')
| -rw-r--r-- | backend/hist_data/README.md | 10 | ||||
| -rw-r--r-- | backend/hist_data/gen_disp_data.py (renamed from backend/hist_data/gen_score_data.py) | 16 |
2 files changed, 13 insertions, 13 deletions
diff --git a/backend/hist_data/README.md b/backend/hist_data/README.md index 1a832ef..b557b14 100644 --- a/backend/hist_data/README.md +++ b/backend/hist_data/README.md @@ -33,9 +33,9 @@ This directory holds files used to generate the history database data.db. - `dist`: <br> Format: `scale INT, unit INT, count INT, PRIMARY KEY (scale, unit)` <br> Maps scale units to event counts. -- `scores`: <br> - Format: `id INT, scale INT, score INT, PRIMARY KEY (id, scale)` <br> - Maps events to score values for each scale (used to show events by popularity and uniformity across scale). +- `event_disp`: <br> + Format: `id INT, scale INT, PRIMARY KEY (id, scale)` <br> + Maps events to scales they are 'displayable' on (used to make displayed events more uniform across time). # Generating the Database @@ -83,5 +83,5 @@ Some of the scripts use third-party packages: ## Remove Events Without Images/Descs 1. Run `reduce_event_data.py` to remove data for events that have no image/description. -## Generate Distribution and Score Data -1. Run `gen_score_data.py`, which add the `dist` and `scores` tables. +## Generate Distribution and Displayability Data +1. Run `gen_disp_data.py`, which add the `dist` and `event_disp` tables. diff --git a/backend/hist_data/gen_score_data.py b/backend/hist_data/gen_disp_data.py index 4ea66c3..e425efc 100644 --- a/backend/hist_data/gen_score_data.py +++ b/backend/hist_data/gen_disp_data.py @@ -1,7 +1,7 @@ #!/usr/bin/python3 """ -Adds data about event distribution and scores to the database. +Adds data about event distribution and displayability to the database. """ # Enable unit testing code to, when running this script, resolve imports of modules within this directory @@ -27,10 +27,10 @@ def genData(dbFile: str, scales: list[int], maxDisplayedPerUnit: int) -> None: scaleUnitToCounts: dict[tuple[int, int], list[int]] = {} # Maps scale and unit to two counts (num events in that unit, num events displayable for that unit) # Only includes events with popularity values - idAndScaleToScore: dict[tuple[int, int], int] = {} # Maps event id and scale to score + idScales: set[tuple[int, int]] = set() # Maps event ids to scales they are displayable on iterNum = 0 - query = 'SELECT events.id, start, fmt, pop FROM events INNER JOIN pop ON events.id = pop.id ORDER BY pop.pop DESC' - for eventId, eventStart, fmt, pop in dbCur.execute(query): + query = 'SELECT events.id, start, fmt FROM events INNER JOIN pop ON events.id = pop.id ORDER BY pop.pop DESC' + for eventId, eventStart, fmt in dbCur.execute(query): iterNum += 1 if iterNum % 1e3 == 0: print(f'At iteration {iterNum}') @@ -57,16 +57,16 @@ def genData(dbFile: str, scales: list[int], maxDisplayedPerUnit: int) -> None: counts = [1, 0] if counts[1] < maxDisplayedPerUnit: counts[1] += 1 - idAndScaleToScore[(eventId, scale)] = pop + idScales.add((eventId, scale)) scaleUnitToCounts[(scale, unit)] = counts # print('Writing to db') dbCur.execute('CREATE TABLE dist (scale INT, unit INT, count INT, PRIMARY KEY (scale, unit))') - dbCur.execute('CREATE TABLE scores (id INT, scale INT, score INT, PRIMARY KEY (id, scale))') for (scale, unit), (count, _) in scaleUnitToCounts.items(): dbCur.execute('INSERT INTO dist VALUES (?, ?, ?)', (scale, unit, count)) - for (eventId, scale), score in idAndScaleToScore.items(): - dbCur.execute('INSERT INTO scores VALUES (?, ?, ?)', (eventId, scale, score)) + dbCur.execute('CREATE TABLE event_disp (id INT, scale INT, PRIMARY KEY (id, scale))') + for eventId, scale in idScales: + dbCur.execute('INSERT INTO event_disp VALUES (?, ?)', (eventId, scale)) # print('Closing db') dbCon.commit() |
