diff options
| author | Terry Truong <terry06890@gmail.com> | 2022-10-04 23:58:08 +1100 |
|---|---|---|
| committer | Terry Truong <terry06890@gmail.com> | 2022-10-04 23:58:08 +1100 |
| commit | b1d4c709cb2793745e61d85c337514b9c6c85603 (patch) | |
| tree | 5e26d56ec90e810862d1aba8d0ce03abb0e8cc27 /backend/hist_data | |
| parent | 07b7ef49b07242014f288652980f5b15bfc087f1 (diff) | |
Add gen_picked_data.py
Add unit test
Update READMEs and .gitignore
Diffstat (limited to 'backend/hist_data')
| -rw-r--r-- | backend/hist_data/README.md | 5 | ||||
| -rwxr-xr-x | backend/hist_data/gen_picked_data.py | 62 | ||||
| -rw-r--r-- | backend/hist_data/picked/README.md | 29 |
3 files changed, 96 insertions, 0 deletions
diff --git a/backend/hist_data/README.md b/backend/hist_data/README.md index 517259c..3fcb8df 100644 --- a/backend/hist_data/README.md +++ b/backend/hist_data/README.md @@ -68,3 +68,8 @@ Some of the scripts use third-party packages: 1. In enwiki/, run `gen_desc_data.py`, which extracts page descriptions into a database. 1. Run `gen_desc_data.py`, which adds the `descs` table, using data in enwiki/, and the `events` and `images` tables (only adds descriptions for events with images). + +## Optionally Add Extra Event Data +1. Additional events can be described in `picked/events.json`, with images for them put + in `picked` (see the README for details). +1. Can run `gen_picked_data.py` to add those described events to the database. diff --git a/backend/hist_data/gen_picked_data.py b/backend/hist_data/gen_picked_data.py new file mode 100755 index 0000000..7d6071a --- /dev/null +++ b/backend/hist_data/gen_picked_data.py @@ -0,0 +1,62 @@ +#!/usr/bin/python3 + +""" +Adds additional manually-picked events to the database +""" + +# Enable unit testing code to, when running this script, resolve imports of modules within this directory +import os, sys +parentDir = os.path.dirname(os.path.realpath(__file__)) +sys.path.append(parentDir) + +import json, sqlite3 +from gen_imgs import convertImage + +PICKED_DIR = 'picked' +PICKED_EVT_FILE = 'events.json' +DB_FILE = 'data.db' +IMG_OUT_DIR = 'img' + +def genData(pickedDir: str, pickedEvtFile: str, dbFile: str, imgOutDir: str) -> None: + dbCon = sqlite3.connect(dbFile) + dbCur = dbCon.cursor() + # + with open(os.path.join(pickedDir, pickedEvtFile)) as f: + eventsToAdd = json.load(f) + nextId = -1 + for event in eventsToAdd: + row = dbCur.execute('SELECT id from events where title = ?', (event['title'],)).fetchone() + if row is not None: + print(f'WARNING: Event "{event["title"]}" already exists, and will be skipped') + continue + print(f'Adding event {event["title"]}') + print("- Updating 'events'") + dbCur.execute('INSERT INTO events VALUES (?, ?, ?, ?, ?, ?, ?, ?)', + (nextId, event['title'], event['start'], event['start_upper'], event['end'], event['end_upper'], + event['fmt'], event['ctg'])) + print('- Converting image file') + image = event['image'] + success = convertImage(os.path.join(pickedDir, image['file']), os.path.join(imgOutDir, str(nextId) + '.jpg')) + if not success: + break + print("- Updating 'images'") + dbCur.execute('INSERT INTO images VALUES (?, ?, ?, ?, ?)', + (nextId, image['url'], image['license'], image['artist'], image['credit'])) + print("- Updating 'event_imgs'") + dbCur.execute('INSERT INTO event_imgs VALUES (?, ?)', (nextId, nextId)) + print("- Updating 'descs'") + dbCur.execute('INSERT INTO descs VALUES (?, ?, ?)', (nextId, nextId, event['desc'])) + print("- Updating 'pop'") + dbCur.execute('INSERT INTO pop VALUES (?, ?)', (nextId, event['pop'])) + # + nextId -= 1 + # + dbCon.commit() + dbCon.close() + +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) + args = parser.parse_args() + # + genData(PICKED_DIR, PICKED_EVT_FILE, DB_FILE, IMG_OUT_DIR) diff --git a/backend/hist_data/picked/README.md b/backend/hist_data/picked/README.md new file mode 100644 index 0000000..becbd24 --- /dev/null +++ b/backend/hist_data/picked/README.md @@ -0,0 +1,29 @@ +This directory holds data for additional events + +Files +===== +- events.json <br> + Encodes an array of objects, each describing an event to add. + For example: + + [{ + "title": "COVID-19 Pandemic", + "start": 2458919, + "start_upper": null, + "end": null, + "end_upper": null, + "fmt": 2, + "ctg": "event", + "image": { + "file": "covid.jpg", + "url": "https://en.wikipedia.org/wiki/File:Covid-19_SP_-_UTI_V._Nova_Cachoeirinha.jpg", + "license": "cc-by-sa 4.0", + "artist": "Gustavo Basso", + "credit": "" + }, + "desc": "Global pandemic caused by the virus SARS-CoV-2", + "pop": 100 + }] + + The `image.file` field should name an image file in this directory. + Other fields correspond to those in the `events`, `images`, `descs`, and `pop` tables (see `../README.md`). |
