diff options
Diffstat (limited to 'backend')
| -rw-r--r-- | backend/hist_data/README.md | 3 | ||||
| -rwxr-xr-x | backend/hist_data/reduce_event_data.py | 64 | ||||
| -rwxr-xr-x | backend/histplorer.py | 42 |
3 files changed, 26 insertions, 83 deletions
diff --git a/backend/hist_data/README.md b/backend/hist_data/README.md index 2a9475a..c947aa6 100644 --- a/backend/hist_data/README.md +++ b/backend/hist_data/README.md @@ -84,6 +84,3 @@ Some of the scripts use third-party packages: 1. Additional events can be described in `picked/events.json`, with images for them put in `picked` (see the README for details). 1. Can run `gen_picked_data.py` to add those described events to the database. - -## Remove Events Without Images/Descs -1. Run `reduce_event_data.py` to remove data for events that have no image. diff --git a/backend/hist_data/reduce_event_data.py b/backend/hist_data/reduce_event_data.py deleted file mode 100755 index 5801f4d..0000000 --- a/backend/hist_data/reduce_event_data.py +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/python3 - -""" -Delete events from the database that have no image -""" - -# Code used in unit testing (for resolving imports of modules within this directory) -import os, sys -parentDir = os.path.dirname(os.path.realpath(__file__)) -sys.path.append(parentDir) -# Standard imports -import argparse -import sqlite3 -# Local imports -from cal import SCALES, dbDateToHistDate, dateToUnit - -DB_FILE = 'data.db' - -def reduceData(dbFile: str, scales: list[int]) -> None: - dbCon = sqlite3.connect(dbFile) - dbCur = dbCon.cursor() - # - print('Getting events to delete') - eventsToDel: list[int] = [] - scaleUnitToDelCount: dict[tuple[int, int], int] = {} # Stores counts to subtract from entries in 'dist' - query = 'SELECT events.id, events.start, events.fmt FROM events' \ - ' LEFT JOIN event_imgs ON events.id = event_imgs.id WHERE event_imgs.id IS NULL' - iterNum = 0 - for (eventId, start, fmt) in dbCur.execute(query): - if iterNum % 1e5 == 0: - print(f'At iteration {iterNum}') - # - eventsToDel.append(eventId) - date = dbDateToHistDate(start, fmt) - for scale in scales: - unit = dateToUnit(date, scale) - if (scale, unit) not in scaleUnitToDelCount: - scaleUnitToDelCount[(scale, unit)] = 1 - else: - scaleUnitToDelCount[(scale, unit)] += 1 - print(f'Found {len(eventsToDel)}') - # - print('Deleting events') - iterNum = 0 - for eventId in eventsToDel: - iterNum += 1 - if iterNum % 1e5 == 0: - print(f'At iteration {iterNum}') - # - dbCur.execute('DELETE FROM events WHERE id = ?', (eventId,)) - dbCur.execute('DELETE FROM pop WHERE id = ?', (eventId,)) - dbCur.execute('DELETE FROM event_disp WHERE id = ?', (eventId,)) - for (scale, unit), delCount in scaleUnitToDelCount.items(): - dbCur.execute('UPDATE dist SET count = count - ? WHERE scale = ? AND unit = ?', (delCount, scale, unit)) - dbCur.execute('DELETE FROM dist WHERE count < 1') - # - dbCon.commit() - dbCon.close() - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) - args = parser.parse_args() - # - reduceData(DB_FILE, SCALES) diff --git a/backend/histplorer.py b/backend/histplorer.py index 7d02f30..b42b731 100755 --- a/backend/histplorer.py +++ b/backend/histplorer.py @@ -18,9 +18,10 @@ Expected HTTP query parameters: - limit: With type=events or type=sugg, specifies the max number of results - ctgs: With type=events|info|sugg, specifies event categories to restrict results to Interpreted as a period-separated list of category names (eg: person.place). An empty string is ignored. +- imgonly: With type=events|info|sugg, if present, restricts results to events with images. """ -from typing import Iterable +from typing import Iterable, cast import sys, re import urllib.parse, sqlite3 import gzip, jsonpickle @@ -171,8 +172,6 @@ def handleEventsReq(params: dict[str, str], dbCur: sqlite3.Cursor) -> EventRespo except ValueError: print('INFO: Invalid scale value', file=sys.stderr) return None - # Get event category - ctgs = params['ctgs'].split('.') if 'ctgs' in params else None # Get incl value try: incl = int(params['incl']) if 'incl' in params else None @@ -189,7 +188,10 @@ def handleEventsReq(params: dict[str, str], dbCur: sqlite3.Cursor) -> EventRespo print(f'INFO: Invalid results limit {resultLimit}', file=sys.stderr) return None # - events = lookupEvents(start, end, scale, ctgs, incl, resultLimit, dbCur) + ctgs = params['ctgs'].split('.') if 'ctgs' in params else None + imgonly = 'imgonly' in params + # + events = lookupEvents(start, end, scale, incl, resultLimit, ctgs, imgonly, dbCur) unitCounts = lookupUnitCounts(start, end, scale, dbCur) return EventResponse(events, unitCounts) def reqParamToHistDate(s: str): @@ -203,16 +205,18 @@ def reqParamToHistDate(s: str): return HistDate(None, int(m.group(1))) else: return HistDate(True, int(m.group(1)), int(m.group(2)), int(m.group(3))) -def lookupEvents(start: HistDate | None, end: HistDate | None, scale: int, ctgs: list[str] | None, - incl: int | None, resultLimit: int, dbCur: sqlite3.Cursor) -> list[Event]: +def lookupEvents( + start: HistDate | None, end: HistDate | None, scale: int, incl: int | None, resultLimit: int, + ctgs: list[str] | None, imgonly: bool, dbCur: sqlite3.Cursor) -> list[Event]: """ Looks for events within a date range, in given scale, restricted by event category, an optional particular inclusion, and a result limit """ + imgJoin = 'INNER JOIN' if imgonly else 'LEFT JOIN' query = \ 'SELECT events.id, title, start, start_upper, end, end_upper, fmt, ctg, images.id, pop.pop FROM events' \ ' INNER JOIN event_disp ON events.id = event_disp.id' \ ' INNER JOIN pop ON events.id = pop.id' \ - ' LEFT JOIN event_imgs ON events.id = event_imgs.id' \ - ' LEFT JOIN images ON event_imgs.img_id = images.id' + f' {imgJoin} event_imgs ON events.id = event_imgs.id' \ + f' {imgJoin} images ON event_imgs.img_id = images.id' constraints = ['event_disp.scale = ?'] params: list[str | int] = [scale] # Constrain by start/end @@ -266,7 +270,7 @@ def eventEntryToResults( if n is not None: newDates[i] = dbDateToHistDate(n, fmt, i < 2) # - return Event(eventId, title, newDates[0], newDates[1], newDates[2], newDates[3], ctg, imageId, pop) + return Event(eventId, title, cast(HistDate, newDates[0]), newDates[1], newDates[2], newDates[3], ctg, imageId, pop) def lookupUnitCounts( start: HistDate | None, end: HistDate | None, scale: int, dbCur: sqlite3.Cursor) -> dict[int, int] | None: # Build query @@ -292,16 +296,18 @@ def handleInfoReq(params: dict[str, str], dbCur: sqlite3.Cursor): print('INFO: No \'event\' parameter for type=info request', file=sys.stderr) return None ctgs = params['ctgs'].split('.') if 'ctgs' in params else None - return lookupEventInfo(params['event'], ctgs, dbCur) -def lookupEventInfo(eventTitle: str, ctgs: list[str] | None, dbCur: sqlite3.Cursor) -> EventInfo | None: + imgonly = 'imgonly' in params + return lookupEventInfo(params['event'], ctgs, imgonly, dbCur) +def lookupEventInfo(eventTitle: str, ctgs: list[str] | None, imgonly: bool, dbCur: sqlite3.Cursor) -> EventInfo | None: """ Look up an event with given title, and return a descriptive EventInfo """ + imgJoin = 'INNER JOIN' if imgonly else 'LEFT JOIN' query = \ 'SELECT events.id, title, start, start_upper, end, end_upper, fmt, ctg, images.id, pop.pop, ' \ ' descs.desc, descs.wiki_id, ' \ ' images.url, images.license, images.artist, images.credit FROM events' \ ' INNER JOIN pop ON events.id = pop.id' \ - ' LEFT JOIN event_imgs ON events.id = event_imgs.id' \ - ' LEFT JOIN images ON event_imgs.img_id = images.id' \ + f' {imgJoin} event_imgs ON events.id = event_imgs.id' \ + f' {imgJoin} images ON event_imgs.img_id = images.id' \ ' LEFT JOIN descs ON events.id = descs.id' \ ' WHERE events.title = ? COLLATE NOCASE' row = dbCur.execute(query, (eventTitle,)).fetchone() @@ -336,11 +342,15 @@ def handleSuggReq(params: dict[str, str], dbCur: sqlite3.Cursor): return None # ctgs = params['ctgs'].split('.') if 'ctgs' in params else None - return lookupSuggs(searchStr, resultLimit, ctgs, dbCur) -def lookupSuggs(searchStr: str, resultLimit: int, ctgs: list[str] | None, dbCur: sqlite3.Cursor) -> SuggResponse: + imgonly = 'imgonly' in params + return lookupSuggs(searchStr, resultLimit, ctgs, imgonly, dbCur) +def lookupSuggs( + searchStr: str, resultLimit: int, ctgs: list[str] | None, imgonly: bool, dbCur: sqlite3.Cursor) -> SuggResponse: """ For a search string, returns a SuggResponse describing search suggestions """ tempLimit = resultLimit + 1 # For determining if 'more suggestions exist' - query = 'SELECT title FROM events LEFT JOIN pop ON events.id = pop.id WHERE title LIKE ?' + query = 'SELECT title FROM events LEFT JOIN pop ON events.id = pop.id' \ + + (' INNER JOIN event_imgs ON events.id = event_imgs.id' if imgonly else '') \ + + ' WHERE title LIKE ?' if ctgs is not None: query += ' AND ctg IN (' + ','.join('?' * len(ctgs)) + ')' query += f' ORDER BY pop.pop DESC LIMIT {tempLimit}' |
