aboutsummaryrefslogtreecommitdiff
path: root/backend/hist_data
diff options
context:
space:
mode:
authorTerry Truong <terry06890@gmail.com>2022-10-04 23:58:08 +1100
committerTerry Truong <terry06890@gmail.com>2022-10-04 23:58:08 +1100
commitb1d4c709cb2793745e61d85c337514b9c6c85603 (patch)
tree5e26d56ec90e810862d1aba8d0ce03abb0e8cc27 /backend/hist_data
parent07b7ef49b07242014f288652980f5b15bfc087f1 (diff)
Add gen_picked_data.py
Add unit test Update READMEs and .gitignore
Diffstat (limited to 'backend/hist_data')
-rw-r--r--backend/hist_data/README.md5
-rwxr-xr-xbackend/hist_data/gen_picked_data.py62
-rw-r--r--backend/hist_data/picked/README.md29
3 files changed, 96 insertions, 0 deletions
diff --git a/backend/hist_data/README.md b/backend/hist_data/README.md
index 517259c..3fcb8df 100644
--- a/backend/hist_data/README.md
+++ b/backend/hist_data/README.md
@@ -68,3 +68,8 @@ Some of the scripts use third-party packages:
1. In enwiki/, run `gen_desc_data.py`, which extracts page descriptions into a database.
1. Run `gen_desc_data.py`, which adds the `descs` table, using data in enwiki/,
and the `events` and `images` tables (only adds descriptions for events with images).
+
+## Optionally Add Extra Event Data
+1. Additional events can be described in `picked/events.json`, with images for them put
+ in `picked` (see the README for details).
+1. Can run `gen_picked_data.py` to add those described events to the database.
diff --git a/backend/hist_data/gen_picked_data.py b/backend/hist_data/gen_picked_data.py
new file mode 100755
index 0000000..7d6071a
--- /dev/null
+++ b/backend/hist_data/gen_picked_data.py
@@ -0,0 +1,62 @@
+#!/usr/bin/python3
+
+"""
+Adds additional manually-picked events to the database
+"""
+
+# Enable unit testing code to, when running this script, resolve imports of modules within this directory
+import os, sys
+parentDir = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(parentDir)
+
+import json, sqlite3
+from gen_imgs import convertImage
+
+PICKED_DIR = 'picked'
+PICKED_EVT_FILE = 'events.json'
+DB_FILE = 'data.db'
+IMG_OUT_DIR = 'img'
+
+def genData(pickedDir: str, pickedEvtFile: str, dbFile: str, imgOutDir: str) -> None:
+ dbCon = sqlite3.connect(dbFile)
+ dbCur = dbCon.cursor()
+ #
+ with open(os.path.join(pickedDir, pickedEvtFile)) as f:
+ eventsToAdd = json.load(f)
+ nextId = -1
+ for event in eventsToAdd:
+ row = dbCur.execute('SELECT id from events where title = ?', (event['title'],)).fetchone()
+ if row is not None:
+ print(f'WARNING: Event "{event["title"]}" already exists, and will be skipped')
+ continue
+ print(f'Adding event {event["title"]}')
+ print("- Updating 'events'")
+ dbCur.execute('INSERT INTO events VALUES (?, ?, ?, ?, ?, ?, ?, ?)',
+ (nextId, event['title'], event['start'], event['start_upper'], event['end'], event['end_upper'],
+ event['fmt'], event['ctg']))
+ print('- Converting image file')
+ image = event['image']
+ success = convertImage(os.path.join(pickedDir, image['file']), os.path.join(imgOutDir, str(nextId) + '.jpg'))
+ if not success:
+ break
+ print("- Updating 'images'")
+ dbCur.execute('INSERT INTO images VALUES (?, ?, ?, ?, ?)',
+ (nextId, image['url'], image['license'], image['artist'], image['credit']))
+ print("- Updating 'event_imgs'")
+ dbCur.execute('INSERT INTO event_imgs VALUES (?, ?)', (nextId, nextId))
+ print("- Updating 'descs'")
+ dbCur.execute('INSERT INTO descs VALUES (?, ?, ?)', (nextId, nextId, event['desc']))
+ print("- Updating 'pop'")
+ dbCur.execute('INSERT INTO pop VALUES (?, ?)', (nextId, event['pop']))
+ #
+ nextId -= 1
+ #
+ dbCon.commit()
+ dbCon.close()
+
+if __name__ == '__main__':
+ import argparse
+ parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+ args = parser.parse_args()
+ #
+ genData(PICKED_DIR, PICKED_EVT_FILE, DB_FILE, IMG_OUT_DIR)
diff --git a/backend/hist_data/picked/README.md b/backend/hist_data/picked/README.md
new file mode 100644
index 0000000..becbd24
--- /dev/null
+++ b/backend/hist_data/picked/README.md
@@ -0,0 +1,29 @@
+This directory holds data for additional events
+
+Files
+=====
+- events.json <br>
+ Encodes an array of objects, each describing an event to add.
+ For example:
+
+ [{
+ "title": "COVID-19 Pandemic",
+ "start": 2458919,
+ "start_upper": null,
+ "end": null,
+ "end_upper": null,
+ "fmt": 2,
+ "ctg": "event",
+ "image": {
+ "file": "covid.jpg",
+ "url": "https://en.wikipedia.org/wiki/File:Covid-19_SP_-_UTI_V._Nova_Cachoeirinha.jpg",
+ "license": "cc-by-sa 4.0",
+ "artist": "Gustavo Basso",
+ "credit": ""
+ },
+ "desc": "Global pandemic caused by the virus SARS-CoV-2",
+ "pop": 100
+ }]
+
+ The `image.file` field should name an image file in this directory.
+ Other fields correspond to those in the `events`, `images`, `descs`, and `pop` tables (see `../README.md`).