diff options
| -rw-r--r-- | backend/hist_data/README.md | 5 | ||||
| -rwxr-xr-x | backend/hist_data/enwiki/download_imgs.py | 2 | ||||
| -rwxr-xr-x | backend/hist_data/gen_imgs.py | 8 | ||||
| -rwxr-xr-x | backend/histplorer.py | 56 | ||||
| -rw-r--r-- | src/components/TimeLine.vue | 4 |
5 files changed, 36 insertions, 39 deletions
diff --git a/backend/hist_data/README.md b/backend/hist_data/README.md index 4cc7103..2a9475a 100644 --- a/backend/hist_data/README.md +++ b/backend/hist_data/README.md @@ -68,10 +68,13 @@ Some of the scripts use third-party packages: USER_AGENT variable applies here as well. 1. Run `gen_imgs.py`, which creates resized/cropped images in img/, from images in enwiki/imgs/. Adds the `imgs` and `event_imgs` tables. <br> - The output images may need additional manual changes: + The output images might need additional manual changes: - An input image might have no output produced, possibly due to data incompatibilities, memory limits, etc. - An input x.gif might produce x-1.jpg, x-2.jpg, etc, instead of x.jpg. + - An input image might produce output with unexpected dimensions. + This seems to happen when the image is very large, and triggers a + decompression bomb warning. ## Generate Description Data 1. In enwiki/, run `gen_desc_data.py`, which extracts page descriptions into a database. diff --git a/backend/hist_data/enwiki/download_imgs.py b/backend/hist_data/enwiki/download_imgs.py index 7dd0771..378de7f 100755 --- a/backend/hist_data/enwiki/download_imgs.py +++ b/backend/hist_data/enwiki/download_imgs.py @@ -9,6 +9,8 @@ The program can be re-run to continue downloading, and looks in the output directory do decide what to skip. """ +# Took about a week to downloaded about 60k images + import argparse import re, os, time, signal import sqlite3 diff --git a/backend/hist_data/gen_imgs.py b/backend/hist_data/gen_imgs.py index 6d57180..46cf6ee 100755 --- a/backend/hist_data/gen_imgs.py +++ b/backend/hist_data/gen_imgs.py @@ -10,6 +10,8 @@ processing. It uses already-existing database entries to decide what to skip. """ +# Took about 10 hours to process about 60k images + import argparse import os, subprocess, signal import sqlite3, urllib.parse @@ -44,7 +46,7 @@ def genImgs(imgDir: str, imgDb: str, outDir: str, dbFile: str): imgsDone.add(imgId) print(f'Found {len(eventsDone)} events and {len(imgsDone)} images to skip') # - print('Processing images from eol and enwiki') + print('Processing images') processImgs(imgDir, imgDb, outDir, dbCur, eventsDone, imgsDone) # dbCon.commit() @@ -89,8 +91,7 @@ def processImgs(imgDir: str, imgDb: str, outDir: str, dbCur: sqlite3.Cursor, if not success: flag = True break - # Add entry to db - if imgId not in imgsDone: + # Add image to db row = imgDbCur.execute('SELECT name, license, artist, credit FROM imgs WHERE id = ?', (imgId,)).fetchone() if row is None: print(f'ERROR: No image record for ID {imgId}') @@ -99,6 +100,7 @@ def processImgs(imgDir: str, imgDb: str, outDir: str, dbCur: sqlite3.Cursor, name, license, artist, credit = row url = 'https://en.wikipedia.org/wiki/File:' + urllib.parse.quote(name) dbCur.execute('INSERT INTO images VALUES (?, ?, ?, ?, ?)', (imgId, url, license, artist, credit)) + # Add event association to db for eventId in eventIds: dbCur.execute('INSERT INTO event_imgs VALUES (?, ?)', (eventId, imgId)) imgDbCon.close() diff --git a/backend/histplorer.py b/backend/histplorer.py index a553f88..6822427 100755 --- a/backend/histplorer.py +++ b/backend/histplorer.py @@ -207,16 +207,12 @@ def lookupEvents(start: HistDate | None, end: HistDate | None, scale: int, ctgs: incl: int | None, resultLimit: int, dbCur: sqlite3.Cursor) -> list[Event]: """ Looks for events within a date range, in given scale, restricted by event category, an optional particular inclusion, and a result limit """ - #query = \ - # 'SELECT events.id, title, start, start_upper, end, end_upper, fmt, ctg, images.id, pop.pop FROM events' \ - # ' INNER JOIN event_disp ON events.id = event_disp.id' \ - # ' INNER JOIN pop ON events.id = pop.id' \ - # ' INNER JOIN event_imgs ON events.id = event_imgs.id' \ - # ' INNER JOIN images ON event_imgs.img_id = images.id' query = \ - 'SELECT events.id, title, start, start_upper, end, end_upper, fmt, ctg, pop.pop FROM events' \ + 'SELECT events.id, title, start, start_upper, end, end_upper, fmt, ctg, images.id, pop.pop FROM events' \ ' INNER JOIN event_disp ON events.id = event_disp.id' \ - ' INNER JOIN pop ON events.id = pop.id' + ' INNER JOIN pop ON events.id = pop.id' \ + ' INNER JOIN event_imgs ON events.id = event_imgs.id' \ + ' INNER JOIN images ON event_imgs.img_id = images.id' constraints = ['event_disp.scale = ?'] params: list[str | int] = [scale] # Constrain by start/end @@ -260,10 +256,8 @@ def lookupEvents(start: HistDate | None, end: HistDate | None, scale: int, ctgs: # return results def eventEntryToResults( - #row: tuple[int, str, int, int | None, int | None, int | None, int, str, int, int]) -> Event: - row: tuple[int, str, int, int | None, int | None, int | None, int, str, int]) -> Event: - #eventId, title, start, startUpper, end, endUpper, fmt, ctg, imageId, pop = row - eventId, title, start, startUpper, end, endUpper, fmt, ctg, pop = row + row: tuple[int, str, int, int | None, int | None, int | None, int, str, int, int]) -> Event: + eventId, title, start, startUpper, end, endUpper, fmt, ctg, imageId, pop = row """ Helper for converting an 'events' db entry into an Event object """ # Convert dates dateVals: list[int | None] = [start, startUpper, end, endUpper] @@ -272,8 +266,7 @@ def eventEntryToResults( if n is not None: newDates[i] = dbDateToHistDate(n, fmt, i < 2) # - #return Event(eventId, title, newDates[0], newDates[1], newDates[2], newDates[3], ctg, imageId, pop) - return Event(eventId, title, newDates[0], newDates[1], newDates[2], newDates[3], ctg, 0, pop) + return Event(eventId, title, newDates[0], newDates[1], newDates[2], newDates[3], ctg, imageId, pop) def lookupUnitCounts( start: HistDate | None, end: HistDate | None, scale: int, dbCur: sqlite3.Cursor) -> dict[int, int] | None: # Build query @@ -301,25 +294,22 @@ def handleInfoReq(params: dict[str, str], dbCur: sqlite3.Cursor): return lookupEventInfo(params['event'], dbCur) def lookupEventInfo(eventTitle: str, dbCur: sqlite3.Cursor) -> EventInfo | None: """ Look up an event with given title, and return a descriptive EventInfo """ - return EventInfo( - Event(1, eventTitle, HistDate(True, 2000, 10, 1), None, None, None, 'event', 10, 100), - f'DESC for {eventTitle}', 1, ImgInfo(f'http://example.org/{eventTitle}', 'license', 'artist', 'credit')) - #query = \ - # 'SELECT events.id, title, start, start_upper, end, end_upper, fmt, ctg, images.id, pop.pop, ' \ - # ' descs.desc, descs.wiki_id, ' \ - # ' images.url, images.license, images.artist, images.credit FROM events' \ - # ' INNER JOIN pop ON events.id = pop.id' \ - # ' INNER JOIN descs ON events.id = descs.id' \ - # ' INNER JOIN event_imgs ON events.id = event_imgs.id' \ - # ' INNER JOIN images ON event_imgs.img_id = images.id' \ - # ' WHERE events.title = ? COLLATE NOCASE' - #row = dbCur.execute(query, (eventTitle,)).fetchone() - #if row is not None: - # event = eventEntryToResults(row[:10]) - # desc, wikiId, url, license, artist, credit = row[10:] - # return EventInfo(event, desc, wikiId, ImgInfo(url, license, artist, credit)) - #else: - # return None + query = \ + 'SELECT events.id, title, start, start_upper, end, end_upper, fmt, ctg, images.id, pop.pop, ' \ + ' descs.desc, descs.wiki_id, ' \ + ' images.url, images.license, images.artist, images.credit FROM events' \ + ' INNER JOIN pop ON events.id = pop.id' \ + ' INNER JOIN descs ON events.id = descs.id' \ + ' INNER JOIN event_imgs ON events.id = event_imgs.id' \ + ' INNER JOIN images ON event_imgs.img_id = images.id' \ + ' WHERE events.title = ? COLLATE NOCASE' + row = dbCur.execute(query, (eventTitle,)).fetchone() + if row is not None: + event = eventEntryToResults(row[:10]) + desc, wikiId, url, license, artist, credit = row[10:] + return EventInfo(event, desc, wikiId, ImgInfo(url, license, artist, credit)) + else: + return None # For type=sugg def handleSuggReq(params: dict[str, str], dbCur: sqlite3.Cursor): diff --git a/src/components/TimeLine.vue b/src/components/TimeLine.vue index e46472b..6d0f632 100644 --- a/src/components/TimeLine.vue +++ b/src/components/TimeLine.vue @@ -80,7 +80,7 @@ import CloseIcon from './icon/CloseIcon.vue'; import {WRITING_MODE_HORZ, MIN_DATE, MAX_DATE, MONTH_SCALE, DAY_SCALE, SCALES, MONTH_NAMES, MIN_CAL_DATE, getDaysInMonth, HistDate, stepDate, getScaleRatio, getNumSubUnits, getUnitDiff, getEventPrecision, dateToUnit, dateToScaleDate, - moduloPositive, TimelineState, HistEvent} from '../lib'; + moduloPositive, TimelineState, HistEvent, getImagePath} from '../lib'; import {useStore} from '../store'; import {RBTree} from '../rbtree'; @@ -1288,7 +1288,7 @@ function eventImgStyles(eventId: number){ return { width: store.eventImgSz + 'px', height: store.eventImgSz + 'px', - //backgroundImage: `url(${getImagePath(event.imgId)})`, + backgroundImage: `url(${getImagePath(event.imgId)})`, backgroundColor: 'black', backgroundSize: 'cover', borderColor: color, |
