aboutsummaryrefslogtreecommitdiff
path: root/backend
diff options
context:
space:
mode:
authorTerry Truong <terry06890@gmail.com>2023-01-14 22:03:57 +1100
committerTerry Truong <terry06890@gmail.com>2023-01-14 22:03:57 +1100
commitebe74b640adaed5382046a49b1c2d6a48b31ebfd (patch)
tree8ca2b824d3667d9a6e5ef4e8b2d6b739f6692aaf /backend
parenta8ea534f13cc23dfab25c6d856f9d9833dd000ba (diff)
Add img_dist and img_disp db tables
Having separate event counts for events with images resolves some redundant server querying, and makes event count indicators moree representative.
Diffstat (limited to 'backend')
-rw-r--r--backend/hist_data/README.md7
-rwxr-xr-xbackend/hist_data/gen_disp_data.py41
-rwxr-xr-xbackend/histplorer.py24
-rw-r--r--backend/tests/test_gen_disp_data.py49
4 files changed, 92 insertions, 29 deletions
diff --git a/backend/hist_data/README.md b/backend/hist_data/README.md
index c947aa6..50108e0 100644
--- a/backend/hist_data/README.md
+++ b/backend/hist_data/README.md
@@ -27,6 +27,10 @@ This directory holds files used to generate the history database data.db.
- `event_disp`: <br>
Format: `id INT, scale INT, unit INT, PRIMARY KEY (id, scale)` <br>
Maps events to scales+units they are 'displayable' on (used to make displayed events more uniform across time).
+- `img_dist`: <br>
+ Like `dist`, but only counts events with images.
+- `img_disp`: <br>
+ Like `events_disp`, but only counts events with images.
- `images`: <br>
Format: `id INT PRIMARY KEY, url TEXT, license TEXT, artist TEXT, credit TEXT` <br>
Holds metadata for available images
@@ -84,3 +88,6 @@ Some of the scripts use third-party packages:
1. Additional events can be described in `picked/events.json`, with images for them put
in `picked` (see the README for details).
1. Can run `gen_picked_data.py` to add those described events to the database.
+
+## Generation Event Image Display Data
+1. Run `gen_disp_data.py img`, which adds the `img_dist` and `img_disp` tables.
diff --git a/backend/hist_data/gen_disp_data.py b/backend/hist_data/gen_disp_data.py
index e8b2bf4..193adbb 100755
--- a/backend/hist_data/gen_disp_data.py
+++ b/backend/hist_data/gen_disp_data.py
@@ -18,7 +18,7 @@ from cal import SCALES, dbDateToHistDate, dateToUnit
MAX_DISPLAYED_PER_UNIT = 4
DB_FILE = 'data.db'
-def genData(dbFile: str, scales: list[int], maxDisplayedPerUnit: int) -> None:
+def genData(dbFile: str, scales: list[int], maxDisplayedPerUnit: int, forImageTables: bool) -> None:
dbCon = sqlite3.connect(dbFile)
dbCur = dbCon.cursor()
#
@@ -28,7 +28,9 @@ def genData(dbFile: str, scales: list[int], maxDisplayedPerUnit: int) -> None:
# Only includes events with popularity values
idScales: dict[int, list[tuple[int, int]]] = {} # Maps event ids to scales+units they are displayable on
iterNum = 0
- query = 'SELECT events.id, start, fmt FROM events INNER JOIN pop ON events.id = pop.id ORDER BY pop.pop DESC'
+ query = 'SELECT events.id, start, fmt FROM events INNER JOIN pop ON events.id = pop.id' \
+ + ('' if not forImageTables else ' INNER JOIN event_imgs ON events.id = event_imgs.id') \
+ + ' ORDER BY pop.pop DESC'
for eventId, eventStart, fmt in dbCur.execute(query):
iterNum += 1
if iterNum % 1e5 == 0:
@@ -70,25 +72,28 @@ def genData(dbFile: str, scales: list[int], maxDisplayedPerUnit: int) -> None:
eventsToDel.append(eventId)
print(f'Found {len(eventsToDel)}')
#
- print(f'Deleting {len(eventsToDel)} events')
- iterNum = 0
- for eventId in eventsToDel:
- iterNum += 1
- if iterNum % 1e5 == 0:
- print(f'At iteration {iterNum}')
- #
- dbCur.execute('DELETE FROM events WHERE id = ?', (eventId,))
- dbCur.execute('DELETE FROM pop WHERE id = ?', (eventId,))
+ if not forImageTables:
+ print(f'Deleting {len(eventsToDel)} events')
+ iterNum = 0
+ for eventId in eventsToDel:
+ iterNum += 1
+ if iterNum % 1e5 == 0:
+ print(f'At iteration {iterNum}')
+ #
+ dbCur.execute('DELETE FROM events WHERE id = ?', (eventId,))
+ dbCur.execute('DELETE FROM pop WHERE id = ?', (eventId,))
#
print('Writing to db')
- dbCur.execute('CREATE TABLE dist (scale INT, unit INT, count INT, PRIMARY KEY (scale, unit))')
+ distTable = 'dist' if not forImageTables else 'img_dist'
+ dispTable = 'event_disp' if not forImageTables else 'img_disp'
+ dbCur.execute(f'CREATE TABLE {distTable} (scale INT, unit INT, count INT, PRIMARY KEY (scale, unit))')
for (scale, unit), (count, _) in scaleUnitToCounts.items():
- dbCur.execute('INSERT INTO dist VALUES (?, ?, ?)', (scale, unit, count))
- dbCur.execute('CREATE TABLE event_disp (id INT, scale INT, unit INT, PRIMARY KEY (id, scale))')
- dbCur.execute('CREATE INDEX event_disp_scale_unit_idx ON event_disp(scale, unit)')
+ dbCur.execute(f'INSERT INTO {distTable} VALUES (?, ?, ?)', (scale, unit, count))
+ dbCur.execute(f'CREATE TABLE {dispTable} (id INT, scale INT, unit INT, PRIMARY KEY (id, scale))')
+ dbCur.execute(f'CREATE INDEX {dispTable}_scale_unit_idx ON event_disp(scale, unit)')
for eventId, scaleUnits in idScales.items():
for [scale, unit] in scaleUnits:
- dbCur.execute('INSERT INTO event_disp VALUES (?, ?, ?)', (eventId, scale, unit))
+ dbCur.execute(f'INSERT INTO {dispTable} VALUES (?, ?, ?)', (eventId, scale, unit))
#
print('Closing db')
dbCon.commit()
@@ -96,6 +101,8 @@ def genData(dbFile: str, scales: list[int], maxDisplayedPerUnit: int) -> None:
if __name__ == '__main__':
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+ parser.add_argument(
+ 'type', nargs='?', choices=['event', 'img'], default='event', help='The type of tables to generate')
args = parser.parse_args()
#
- genData(DB_FILE, SCALES, MAX_DISPLAYED_PER_UNIT)
+ genData(DB_FILE, SCALES, MAX_DISPLAYED_PER_UNIT, args.type == 'img')
diff --git a/backend/histplorer.py b/backend/histplorer.py
index b42b731..f25e727 100755
--- a/backend/histplorer.py
+++ b/backend/histplorer.py
@@ -192,7 +192,7 @@ def handleEventsReq(params: dict[str, str], dbCur: sqlite3.Cursor) -> EventRespo
imgonly = 'imgonly' in params
#
events = lookupEvents(start, end, scale, incl, resultLimit, ctgs, imgonly, dbCur)
- unitCounts = lookupUnitCounts(start, end, scale, dbCur)
+ unitCounts = lookupUnitCounts(start, end, scale, imgonly, dbCur)
return EventResponse(events, unitCounts)
def reqParamToHistDate(s: str):
""" Produces a HistDate from strings like '2010-10-3', '-8000', and '' (throws ValueError if invalid) """
@@ -210,27 +210,27 @@ def lookupEvents(
ctgs: list[str] | None, imgonly: bool, dbCur: sqlite3.Cursor) -> list[Event]:
""" Looks for events within a date range, in given scale,
restricted by event category, an optional particular inclusion, and a result limit """
- imgJoin = 'INNER JOIN' if imgonly else 'LEFT JOIN'
+ dispTable = 'event_disp' if not imgonly else 'img_disp'
query = \
'SELECT events.id, title, start, start_upper, end, end_upper, fmt, ctg, images.id, pop.pop FROM events' \
- ' INNER JOIN event_disp ON events.id = event_disp.id' \
+ f' INNER JOIN {dispTable} ON events.id = {dispTable}.id' \
' INNER JOIN pop ON events.id = pop.id' \
- f' {imgJoin} event_imgs ON events.id = event_imgs.id' \
- f' {imgJoin} images ON event_imgs.img_id = images.id'
- constraints = ['event_disp.scale = ?']
+ ' LEFT JOIN event_imgs ON events.id = event_imgs.id' \
+ ' LEFT JOIN images ON event_imgs.img_id = images.id'
+ constraints = [f'{dispTable}.scale = ?']
params: list[str | int] = [scale]
# Constrain by start/end
startUnit = dateToUnit(start, scale) if start is not None else None
endUnit = dateToUnit(end, scale) if end is not None else None
if startUnit is not None and startUnit == endUnit:
- constraints.append('event_disp.unit = ?')
+ constraints.append(f'{dispTable}.unit = ?')
params.append(startUnit)
else:
if startUnit is not None:
- constraints.append('event_disp.unit >= ?')
+ constraints.append(f'{dispTable}.unit >= ?')
params.append(startUnit)
if endUnit is not None:
- constraints.append('event_disp.unit < ?')
+ constraints.append(f'{dispTable}.unit < ?')
params.append(endUnit)
# Constrain by event category
if ctgs is not None:
@@ -272,9 +272,11 @@ def eventEntryToResults(
#
return Event(eventId, title, cast(HistDate, newDates[0]), newDates[1], newDates[2], newDates[3], ctg, imageId, pop)
def lookupUnitCounts(
- start: HistDate | None, end: HistDate | None, scale: int, dbCur: sqlite3.Cursor) -> dict[int, int] | None:
+ start: HistDate | None, end: HistDate | None, scale: int,
+ imgonly: bool, dbCur: sqlite3.Cursor) -> dict[int, int] | None:
# Build query
- query = 'SELECT unit, count FROM dist WHERE scale = ?'
+ distTable = 'dist' if not imgonly else 'img_dist'
+ query = f'SELECT unit, count FROM {distTable} WHERE scale = ?'
params = [scale]
if start:
query += ' AND unit >= ?'
diff --git a/backend/tests/test_gen_disp_data.py b/backend/tests/test_gen_disp_data.py
index 792a9b2..0d54eb0 100644
--- a/backend/tests/test_gen_disp_data.py
+++ b/backend/tests/test_gen_disp_data.py
@@ -46,8 +46,21 @@ class TestGenData(unittest.TestCase):
(11, 45),
}
)
+ createTestDbTable(
+ dbFile,
+ 'CREATE TABLE event_imgs (id INT PRIMARY KEY, img_id INT)',
+ 'INSERT INTO event_imgs VALUES (?, ?)',
+ {
+ (1, 10),
+ (4, 40),
+ (5, 50),
+ (6, 60),
+ (7, 70),
+ }
+ )
# Run
- genData(dbFile, [10, 1, MONTH_SCALE, DAY_SCALE], 2)
+ genData(dbFile, [10, 1, MONTH_SCALE, DAY_SCALE], 2, False)
+ genData(dbFile, [10, 1, MONTH_SCALE, DAY_SCALE], 2, True)
# Check
self.assertEqual(
readTestDbTable(dbFile, 'SELECT * FROM events'),
@@ -117,3 +130,37 @@ class TestGenData(unittest.TestCase):
(2, DAY_SCALE, 2452607),
}
)
+ self.assertEqual(
+ readTestDbTable(dbFile, 'SELECT scale, unit, count FROM img_dist'),
+ {
+ (10, 190, 5),
+ (1, 1900, 4),
+ (1, 1901, 1),
+ (MONTH_SCALE, gregorianToJdn(1900, 1, 1), 3),
+ (MONTH_SCALE, gregorianToJdn(1901, 1, 1), 1),
+ (MONTH_SCALE, julianToJdn(1900, 10, 1), 1),
+ (DAY_SCALE, gregorianToJdn(1900, 1, 1), 2),
+ (DAY_SCALE, gregorianToJdn(1900, 1, 10), 1),
+ (DAY_SCALE, julianToJdn(1900, 10, 1), 1),
+ (DAY_SCALE, gregorianToJdn(1901, 1, 1), 1),
+ }
+ )
+ self.assertEqual(
+ readTestDbTable(dbFile, 'SELECT id, scale, unit FROM img_disp'),
+ {
+ (5, 10, 190),
+ (7, 10, 190),
+ (5, 1, 1900),
+ (7, 1, 1900),
+ (4, 1, 1901),
+ (1, MONTH_SCALE, 2415021),
+ (7, MONTH_SCALE, 2415021),
+ (4, MONTH_SCALE, 2415386),
+ (5, MONTH_SCALE, 2415307),
+ (1, DAY_SCALE, 2415021),
+ (7, DAY_SCALE, 2415021),
+ (6, DAY_SCALE, 2415030),
+ (4, DAY_SCALE, 2415386),
+ (5, DAY_SCALE, 2415307),
+ }
+ )