aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--backend/hist_data/README.md18
-rw-r--r--backend/hist_data/cal.py30
-rw-r--r--backend/hist_data/enwiki/README.md8
-rwxr-xr-xbackend/hist_data/enwiki/download_img_license_info.py5
-rwxr-xr-xbackend/hist_data/enwiki/download_imgs.py9
-rwxr-xr-xbackend/hist_data/enwiki/gen_desc_data.py7
-rwxr-xr-xbackend/hist_data/enwiki/gen_dump_index_db.py8
-rwxr-xr-xbackend/hist_data/enwiki/gen_img_data.py8
-rwxr-xr-xbackend/hist_data/gen_desc_data.py5
-rwxr-xr-x[-rw-r--r--]backend/hist_data/gen_disp_data.py14
-rwxr-xr-xbackend/hist_data/gen_events_data.py101
-rwxr-xr-xbackend/hist_data/gen_imgs.py5
-rwxr-xr-xbackend/hist_data/gen_picked_data.py7
-rwxr-xr-xbackend/hist_data/gen_pop_data.py2
-rw-r--r--backend/hist_data/picked/README.md2
-rwxr-xr-xbackend/hist_data/reduce_event_data.py7
-rwxr-xr-xbackend/histplorer.py46
-rw-r--r--backend/tests/enwiki/test_gen_img_data.py10
-rw-r--r--backend/tests/test_cal.py4
-rw-r--r--backend/tests/test_gen_desc_data.py2
-rw-r--r--backend/tests/test_gen_disp_data.py14
-rw-r--r--backend/tests/test_gen_events_data.py26
-rw-r--r--backend/tests/test_gen_imgs.py2
-rw-r--r--backend/tests/test_histplorer.py4
-rw-r--r--backend/tests/test_reduce_event_data.py2
-rw-r--r--src/components/TimeLine.vue6
-rw-r--r--src/lib.ts6
27 files changed, 185 insertions, 173 deletions
diff --git a/backend/hist_data/README.md b/backend/hist_data/README.md
index d05016c..a3ae6c1 100644
--- a/backend/hist_data/README.md
+++ b/backend/hist_data/README.md
@@ -11,13 +11,13 @@ This directory holds files used to generate the history database data.db.
If `start_upper` is present, it and `start` denote an uncertain range of start times.
Similarly for 'end' and 'end_upper'.
- `fmt` indicates format info for `start`, `start_upper`, `end`, and `end_upper`.
+ - If 0, they denote a number of years AD (if positive) or BC (if negative).
- If 1, they denote a Julian date number.
- This allows simple comparison of events with day-level precision, but only goes back to 4713 BCE.
- - If 2, same as 1, but dates are preferably displayed using the Gregorian calendar, not the Julian calendar.
+ This allows simple comparison of events with day-level precision, but only goes back to 4713 BC.
+ - If 2, same as 1, but with a preference for display using the Julian calendar, not the Gregorian calendar.
For example, William Shakespeare's birth appears 'preferably Julian', but Samuel Johnson's does not.
- - If 3, same as 1, but 'end' and 'end_upper' are 'preferably Gregorian'.
+ - If 3, same as 2, but where 'start' and 'start_upper' are 'preferably Julian'.
For example, Galileo Galilei's birth date appears 'preferably Julian', but his death date does not.
- - If 0, they denote a number of years CE (if positive) or BCE (if negative).
- `pop`: <br>
Format: `id INT PRIMARY KEY, pop INT` <br>
Associates each event with a popularity measure (currently an average monthly viewcount)
@@ -49,6 +49,7 @@ Some of the scripts use third-party packages:
## Generate Event Data
1. Obtain a Wikidata JSON dump in wikidata/, as specified in it's README.
1. Run `gen_events_data.py`, which creates `data.db`, and adds the `events` table.
+ You might want to set WIKIDATA_FILE in the script to the dump file's name.
## Generate Popularity Data
1. Obtain an enwiki dump and 'page view files' in enwiki/, as specified in the README.
@@ -61,11 +62,14 @@ Some of the scripts use third-party packages:
1. In enwiki/, run `gen_img_data.py` which looks at pages in the dump that match entries in `events`,
looks for infobox image names, and stores them in an image database.
1. In enwiki/, run `download_img_license_info.py`, which downloads licensing info for found
- images, and adds them to the image database.
-1. In enwiki/, run `download_imgs.py`, which downloads images into enwiki/imgs/.
+ images, and adds them to the image database. You should probably first change the USER_AGENT
+ script variable to identify yourself to the online API (this is expected
+ [best practice](https://www.mediawiki.org/wiki/API:Etiquette)).
+1. In enwiki/, run `download_imgs.py`, which downloads images into enwiki/imgs/. Setting the
+ USER_AGENT variable applies here as well.
1. Run `gen_imgs.py`, which creates resized/cropped images in img/, from images in enwiki/imgs/.
Adds the `imgs` and `event_imgs` tables. <br>
- The outputs will likely need additional manual changes:
+ The output images may need additional manual changes:
- An input image might have no output produced, possibly due to
data incompatibilities, memory limits, etc.
- An input x.gif might produce x-1.jpg, x-2.jpg, etc, instead of x.jpg.
diff --git a/backend/hist_data/cal.py b/backend/hist_data/cal.py
index 3b65205..550303e 100644
--- a/backend/hist_data/cal.py
+++ b/backend/hist_data/cal.py
@@ -1,14 +1,14 @@
"""
Provides date conversion functions, HistDate, and date scales.
-Algorithms for converting between calendars and Julian day number values were obtained from
-https://en.wikipedia.org/wiki/Julian_day#Converting_Gregorian_calendar_date_to_Julian_Day_Number.
"""
+# For conversion between calendars and Julian day numbers. Algorithms were obtained from
+# https://en.wikipedia.org/wiki/Julian_day#Converting_Gregorian_calendar_date_to_Julian_Day_Number.
def gregorianToJdn(year: int, month: int, day: int) -> int:
"""
Converts a Gregorian calendar date to a Julian day number,
denoting the noon-to-noon 'Julian day' that starts within the input day.
- A year of 1 means 1 CE, and -1 means 1 BC (0 is treated like -1).
+ A year of 1 means 1 AD, and -1 means 1 BC (0 is treated like -1).
A month of 1 means January. Can use a month of 13 and a day of 0.
Valid for dates from 24th Nov 4714 BC onwards.
"""
@@ -20,7 +20,6 @@ def gregorianToJdn(year: int, month: int, day: int) -> int:
jdn -= int((3 * int((year + 4900 + x) / 100)) / 4)
jdn += day - 32075
return jdn
-
def julianToJdn(year: int, month: int, day: int) -> int:
"""
Like gregorianToJdn(), but converts a Julian calendar date.
@@ -33,7 +32,6 @@ def julianToJdn(year: int, month: int, day: int) -> int:
jdn += int(275 * month / 9)
jdn += day + 1729777
return jdn
-
def jdnToGregorian(jdn: int) -> tuple[int, int, int]:
"""
Converts a Julian day number to a Gregorian calendar date, denoting the
@@ -50,7 +48,6 @@ def jdnToGregorian(jdn: int) -> tuple[int, int, int]:
if Y <= 0:
Y -= 1
return Y, M, D
-
def jdnToJulian(jdn: int) -> tuple[int, int, int]:
""" Like jdnToGregorian(), but converts to a Julian calendar date """
f = jdn + 1401
@@ -63,26 +60,25 @@ def jdnToJulian(jdn: int) -> tuple[int, int, int]:
if Y <= 0:
Y -= 1
return Y, M, D
-
def julianToGregorian(year: int, month: int, day: int) -> tuple[int, int, int]:
return jdnToGregorian(julianToJdn(year, month, day))
-
def gregorianToJulian(year: int, month: int, day: int) -> tuple[int, int, int]:
return jdnToJulian(gregorianToJdn(year, month, day))
-MIN_CAL_YEAR = -4713 # Disallow within-year dates before this year
+# For date representation
+MIN_CAL_YEAR = -4713 # Year before which JDNs are not usable
MONTH_SCALE = -1;
DAY_SCALE = -2;
-SCALES: list[int] = [int(x) for x in [1e9, 1e8, 1e7, 1e6, 1e5, 1e4, 1e3, 100, 10, 1, MONTH_SCALE, DAY_SCALE]];
+SCALES: list[int] = [int(s) for s in [1e9, 1e8, 1e7, 1e6, 1e5, 1e4, 1e3, 100, 10, 1, MONTH_SCALE, DAY_SCALE]];
class HistDate:
"""
Represents a historical date
- - 'year' may be negative (-1 means 1 BCE)
+ - 'year' may be negative (-1 means 1 BC)
- 'month' and 'day' are at least 1, if given
- 'gcal' may be:
- True: Indicates a Gregorian calendar date
- False: Means the date should, for display, be converted to a Julian calendar date
- - None: 'month' and 'day' are 1 (used for dates before the Julian period starting year 4713 BCE)
+ - None: 'month' and 'day' are 1 (required for dates before MIN_CAL_YEAR)
"""
def __init__(self, gcal: bool | None, year: int, month=1, day=1):
self.gcal = gcal
@@ -96,22 +92,24 @@ class HistDate:
def __repr__(self):
return str(self.__dict__)
def dbDateToHistDate(n: int, fmt: int, end=False) -> HistDate:
+ """ Converts a start/start_upper/etc and fmt value in the 'events' db table, into a HistDate """
if fmt == 0: # year
if n >= MIN_CAL_YEAR:
return HistDate(True, n, 1, 1)
else:
return HistDate(None, n)
- elif fmt == 1 or fmt == 3 and not end: # jdn for julian calendar
- return HistDate(False, *jdnToJulian(n))
- else: # fmt == 2 or fmt == 3 and end
+ elif fmt == 1 or fmt == 3 and end: # jdn for gregorian calendar
return HistDate(True, *jdnToGregorian(n))
+ else: # fmt == 2 or fmt == 3 and not end
+ return HistDate(False, *jdnToJulian(n))
def dateToUnit(date: HistDate, scale: int) -> int:
+ """ Converts a date to an int representing a unit on a scale """
if scale >= 1:
return date.year // scale
elif scale == MONTH_SCALE:
if date.gcal == False:
return julianToJdn(date.year, date.month, 1)
- else:
+ else: # True or None
return gregorianToJdn(date.year, date.month, 1)
else: # scale == DAY_SCALE
if date.gcal == False:
diff --git a/backend/hist_data/enwiki/README.md b/backend/hist_data/enwiki/README.md
index 262ebdb..76d33e5 100644
--- a/backend/hist_data/enwiki/README.md
+++ b/backend/hist_data/enwiki/README.md
@@ -33,11 +33,11 @@ This directory holds files obtained/derived from [English Wikipedia](https://en.
# Image Files
- `gen_img_data.py` <br>
- Used to find infobox image names for page IDs, and store them into a database.
+ Finds infobox image names for page IDs, and stores them into a database.
- `download_img_license_info.py` <br>
- Used to download licensing metadata for image names, via wikipedia's online API, and store them into a database.
+ Downloads licensing metadata for image names, via wikipedia's online API, and stores them into a database.
- `img_data.db` <br>
- Used to hold metadata about infobox images for a set of page IDs.
+ Holds metadata about infobox images for a set of page IDs.
Generated using `gen_img_data.py` and `download_img_license_info.py`. <br>
Tables: <br>
- `page_imgs`: `page_id INT PRIMARY KEY, title TEXT UNIQUE, img_name TEXT` <br>
@@ -47,7 +47,7 @@ This directory holds files obtained/derived from [English Wikipedia](https://en.
<br>
Might lack some matches for `img_name` in `page_imgs`, due to licensing info unavailability.
- `download_imgs.py` <br>
- Used to download image files into imgs/.
+ Downloads image files into imgs/.
# Description Files
- `gen_desc_data.py` <br>
diff --git a/backend/hist_data/enwiki/download_img_license_info.py b/backend/hist_data/enwiki/download_img_license_info.py
index 1217caf..43f2c43 100755
--- a/backend/hist_data/enwiki/download_img_license_info.py
+++ b/backend/hist_data/enwiki/download_img_license_info.py
@@ -9,10 +9,10 @@ The program can be re-run to continue downloading, and looks
at already-processed names to decide what to skip.
"""
-import re
+import argparse
+import re, time, signal
import sqlite3, urllib.parse, html
import requests
-import time, signal
IMG_DB = 'img_data.db'
#
@@ -150,7 +150,6 @@ def downloadInfo(imgDb: str) -> None:
dbCon.close()
if __name__ == '__main__':
- import argparse
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
parser.parse_args()
#
diff --git a/backend/hist_data/enwiki/download_imgs.py b/backend/hist_data/enwiki/download_imgs.py
index bbd2cda..7dd0771 100755
--- a/backend/hist_data/enwiki/download_imgs.py
+++ b/backend/hist_data/enwiki/download_imgs.py
@@ -9,10 +9,10 @@ The program can be re-run to continue downloading, and looks
in the output directory do decide what to skip.
"""
-import re, os
+import argparse
+import re, os, time, signal
import sqlite3
import urllib.parse, requests
-import time, signal
IMG_DB = 'img_data.db' # About 130k image names
OUT_DIR = 'imgs'
@@ -22,7 +22,7 @@ USER_AGENT = 'terryt.dev (terry06890@gmail.com)'
TIMEOUT = 1
# https://en.wikipedia.org/wiki/Wikipedia:Database_download says to 'throttle to 1 cache miss per sec'
# It's unclear how to properly check for cache misses, so we just aim for 1 per sec
-BACKOFF = False # If True, double the timeout each time a download error occurs (otherwise just exit)
+EXP_BACKOFF = False # If True, double the timeout each time a download error occurs (otherwise just exit)
def downloadImgs(imgDb: str, outDir: str, timeout: int) -> None:
if not os.path.exists(outDir):
@@ -84,7 +84,7 @@ def downloadImgs(imgDb: str, outDir: str, timeout: int) -> None:
time.sleep(timeout)
except Exception as e:
print(f'Error while downloading to {outFile}: {e}')
- if not BACKOFF:
+ if not EXP_BACKOFF:
return
else:
timeout *= 2
@@ -94,7 +94,6 @@ def downloadImgs(imgDb: str, outDir: str, timeout: int) -> None:
dbCon.close()
if __name__ == '__main__':
- import argparse
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
parser.parse_args()
#
diff --git a/backend/hist_data/enwiki/gen_desc_data.py b/backend/hist_data/enwiki/gen_desc_data.py
index b3fde52..bb2b845 100755
--- a/backend/hist_data/enwiki/gen_desc_data.py
+++ b/backend/hist_data/enwiki/gen_desc_data.py
@@ -7,14 +7,14 @@ and adds them to a database
# In testing, this script took over 10 hours to run, and generated about 5GB
+import argparse
import sys, os, re
-import bz2
-import html, mwxml, mwparserfromhell
+import bz2, html, mwxml, mwparserfromhell
import sqlite3
DUMP_FILE = 'enwiki-20220501-pages-articles-multistream.xml.bz2' # Had about 22e6 pages
DB_FILE = 'desc_data.db'
-
+# Regexps
DESC_LINE_REGEX = re.compile('^ *[A-Z\'"]')
EMBEDDED_HTML_REGEX = re.compile(r'<[^<]+/>|<!--[^<]+-->|<[^</]+>([^<]*|[^<]*<[^<]+>[^<]*)</[^<]+>|<[^<]+$')
# Recognises a self-closing HTML tag, a tag with 0 children, tag with 1 child with 0 children, or unclosed tag
@@ -119,7 +119,6 @@ def convertTitle(title: str) -> str:
return html.unescape(title).replace('_', ' ')
if __name__ == '__main__':
- import argparse
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
parser.parse_args()
#
diff --git a/backend/hist_data/enwiki/gen_dump_index_db.py b/backend/hist_data/enwiki/gen_dump_index_db.py
index 5778680..6be8bc5 100755
--- a/backend/hist_data/enwiki/gen_dump_index_db.py
+++ b/backend/hist_data/enwiki/gen_dump_index_db.py
@@ -1,11 +1,12 @@
#!/usr/bin/python3
"""
-Adds data from the wiki dump index-file into a database
+Adds data from the wiki-dump index-file into a database
"""
+
+import argparse
import sys, os, re
-import bz2
-import sqlite3
+import bz2, sqlite3
INDEX_FILE = 'enwiki-20220501-pages-articles-multistream-index.txt.bz2' # Had about 22e6 lines
DB_FILE = 'dump_index.db'
@@ -53,7 +54,6 @@ def genData(indexFile: str, dbFile: str) -> None:
dbCon.close()
if __name__ == '__main__':
- import argparse
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
parser.parse_args()
#
diff --git a/backend/hist_data/enwiki/gen_img_data.py b/backend/hist_data/enwiki/gen_img_data.py
index 922b893..9aa3863 100755
--- a/backend/hist_data/enwiki/gen_img_data.py
+++ b/backend/hist_data/enwiki/gen_img_data.py
@@ -8,15 +8,15 @@ The program can be re-run with an updated set of page IDs, and
will skip already-processed page IDs.
"""
-import re
-import os, bz2, html, urllib.parse
+import os, re
+import bz2, html, urllib.parse
import sqlite3
DUMP_FILE = 'enwiki-20220501-pages-articles-multistream.xml.bz2'
INDEX_DB = 'dump_index.db'
IMG_DB = 'img_data.db' # The database to create
DB_FILE = os.path.join('..', 'data.db')
-#
+# Regexps
ID_LINE_REGEX = re.compile(r'<id>(.*)</id>')
IMG_LINE_REGEX = re.compile(r'.*\| *image *= *([^|]*)')
BRACKET_IMG_REGEX = re.compile(r'\[\[(File:[^|]*).*]]')
@@ -33,7 +33,7 @@ def genData(pageIds: set[int], dumpFile: str, indexDb: str, imgDb: str) -> None:
if imgDbCur.execute('SELECT name FROM sqlite_master WHERE type="table" AND name="page_imgs"').fetchone() is None:
# Create tables if not present
imgDbCur.execute('CREATE TABLE page_imgs (page_id INT PRIMARY KEY, title TEXT UNIQUE, img_name TEXT)')
- # 'img_name' may be NULL
+ # 'img_name' values are set to NULL to indicate page IDs where no image was found
imgDbCur.execute('CREATE INDEX page_imgs_idx ON page_imgs(img_name)')
else:
# Check for already-processed page IDs
diff --git a/backend/hist_data/gen_desc_data.py b/backend/hist_data/gen_desc_data.py
index 0d7ee88..6c9fee2 100755
--- a/backend/hist_data/gen_desc_data.py
+++ b/backend/hist_data/gen_desc_data.py
@@ -1,10 +1,10 @@
#!/usr/bin/python3
"""
-Maps events to short descriptions from Wikipedia,
-and stores them in the database.
+Maps events to short descriptions from Wikipedia, and stores them in the database.
"""
+import argparse
import os, sqlite3
ENWIKI_DB = os.path.join('enwiki', 'desc_data.db')
@@ -52,7 +52,6 @@ def genData(enwikiDb: str, dbFile: str) -> None:
dbCon.close()
if __name__ == '__main__':
- import argparse
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
args = parser.parse_args()
#
diff --git a/backend/hist_data/gen_disp_data.py b/backend/hist_data/gen_disp_data.py
index e771e57..d796d92 100644..100755
--- a/backend/hist_data/gen_disp_data.py
+++ b/backend/hist_data/gen_disp_data.py
@@ -1,15 +1,18 @@
#!/usr/bin/python3
"""
-Adds data about event distribution to the database, and removes events not eligible for display.
+Adds data about event distribution to the database,
+and removes events not eligible for display
"""
-# Enable unit testing code to, when running this script, resolve imports of modules within this directory
+# Code used in unit testing (for resolving imports of modules within this directory)
import os, sys
parentDir = os.path.dirname(os.path.realpath(__file__))
sys.path.append(parentDir)
-
+# Standard imports
+import argparse
import sqlite3
+# Local imports
from cal import SCALES, dbDateToHistDate, dateToUnit
MAX_DISPLAYED_PER_UNIT = 4
@@ -62,8 +65,8 @@ def genData(dbFile: str, scales: list[int], maxDisplayedPerUnit: int) -> None:
del scaleUnitToCounts[(scale, unit)]
else:
scaleUnitToCounts[(scale, unit)][0] = count
- query2 = 'SELECT events.id FROM events LEFT JOIN pop ON events.id = pop.id WHERE pop.id IS NULL'
- for (eventId,) in dbCur.execute(query2): # Include events without scores
+ for (eventId,) in dbCur.execute( # Find events without scores
+ 'SELECT events.id FROM events LEFT JOIN pop ON events.id = pop.id WHERE pop.id IS NULL'):
eventsToDel.append(eventId)
print(f'Found {len(eventsToDel)}')
#
@@ -91,7 +94,6 @@ def genData(dbFile: str, scales: list[int], maxDisplayedPerUnit: int) -> None:
dbCon.close()
if __name__ == '__main__':
- import argparse
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
args = parser.parse_args()
#
diff --git a/backend/hist_data/gen_events_data.py b/backend/hist_data/gen_events_data.py
index f054f76..51d6940 100755
--- a/backend/hist_data/gen_events_data.py
+++ b/backend/hist_data/gen_events_data.py
@@ -2,8 +2,8 @@
"""
Reads a Wikidata JSON dump, looking for entities usable as historical events. For each such
-entity, finds a start date (may be a range), optional end date, and event category (eg: normal
-event, person with birth/death date, country, etc). Writes the results into a database.
+entity, finds a start date (may be a range), optional end date, and event category (eg: discovery,
+person with birth/death date, etc). Writes the results into a database.
The JSON dump contains an array of objects, each of which describes a Wikidata item item1,
and takes up it's own line.
@@ -12,11 +12,11 @@ and takes up it's own line.
- Getting a property statement value: item1['claims'][prop1][idx1]['mainsnak']['datavalue']
'idx1' indexes an array of statements
-Value objects have a 'type' and 'value' field.
+'datavalue' objects have a 'type' and 'value' field.
Info about objects with type 'time' can be found at: https://www.wikidata.org/wiki/Help:Dates
An example:
{"value":{
- "time":"+1830-10-04T00:00:00Z", # The year is always signed and padded to 4-16 digits (-0001 means 1 BCE)
+ "time":"+1830-10-04T00:00:00Z", # The year is always signed and padded to 4-16 digits (-0001 means 1 BC)
"timezone":0, # Unused
"before":0, # Unused
"after":0, # Unused
@@ -52,30 +52,31 @@ Info about objects with type 'quantity' can be found at: https://www.wikidata.or
"http://www.wikidata.org/entity/Q524410" - gigaannum (1e9 yrs)
"""
-# On Linux, running on the full dataset seems to make the processes hang when done. This was resolved by:
-# - Storing subprocess results in temp files. Apparently passing large objects through pipes can cause deadlock.
-# - Using set_start_method('spawn'). Apparently 'fork' can cause unexpected copying of lock/semaphore/etc state.
+# On Linux, running on the full dataset seems to make the processes hang when done. This was resolved by:
+# - Storing subprocess results in temp files. Apparently passing large objects through pipes can cause deadlock.
+# - Using set_start_method('spawn'). Apparently 'fork' can cause unexpected copying of lock/semaphore/etc state.
# Related: https://bugs.python.org/issue6721
# - Using pool.map() instead of pool.imap_unordered(), which seems to hang in some cases (was using python 3.8).
# Possibly related: https://github.com/python/cpython/issues/72882
-# Enable unit testing code to, when running this script, resolve imports of modules within this directory
+# Code used in unit testing (for resolving imports of modules within this directory)
import os, sys
parentDir = os.path.dirname(os.path.realpath(__file__))
sys.path.append(parentDir)
+# Standard imports
+import argparse
+import math, re
+import io, bz2, json, sqlite3
+import indexed_bzip2, pickle, multiprocessing, tempfile
+# Local imports
+from cal import gregorianToJdn, julianToJdn, MIN_CAL_YEAR
-import io, math, re, argparse
-import bz2, json, sqlite3
-import multiprocessing, indexed_bzip2, pickle, tempfile
-# Modules in this directory
-from cal import gregorianToJdn, julianToJdn
-
+# Constants
WIKIDATA_FILE = os.path.join('wikidata', 'latest-all.json.bz2')
DUMP_YEAR = 2022 # Used for converting 'age' values into dates
OFFSETS_FILE = os.path.join('wikidata', 'offsets.dat')
DB_FILE = 'data.db'
-N_PROCS = 6
-
+N_PROCS = 6 # Number of processes to use
# For getting Wikidata entity IDs
INSTANCE_OF = 'P31'
EVENT_CTG: dict[str, dict[str, str]] = {
@@ -91,31 +92,33 @@ EVENT_CTG: dict[str, dict[str, str]] = {
'recurring event': 'Q15275719',
'event sequence': 'Q15900616',
'incident': 'Q18669875',
+ 'project': 'Q170584',
+ 'number of deaths': 'P1120',
},
- 'human': {
- 'human': 'Q5',
- },
- 'country': {
+ 'place': {
'country': 'Q6256',
'state': 'Q7275',
'sovereign state': 'Q3624078',
+ 'city': 'Q515',
+ 'tourist attraction': 'Q570116',
+ 'heritage site': 'Q358',
+ 'terrestrial planet': 'Q128207',
+ 'navigational star': 'Q108171565',
+ 'G-type main-sequence star': 'Q5864',
+ },
+ 'organism': {
+ 'taxon': 'Q16521',
+ },
+ 'person': {
+ 'human': 'Q5',
+ },
+ 'work': {
+ 'creator': 'P170',
+ 'genre': 'P136',
},
'discovery': {
'time of discovery or invention': 'P575',
},
- 'media': {
- 'work of art': 'Q4502142',
- 'literary work': 'Q7725634',
- 'comic book series': 'Q14406742',
- 'painting': 'Q3305213',
- 'musical work/composition': 'Q105543609',
- 'film': 'Q11424',
- 'animated film': 'Q202866',
- 'television series': 'Q16401',
- 'anime television series': 'Q63952888',
- 'video game': 'Q7889',
- 'video game series': 'Q7058673',
- },
}
ID_TO_CTG = {id: ctg for ctg, nmToId in EVENT_CTG.items() for name, id in nmToId.items()}
EVENT_PROP: dict[str, str] = {
@@ -148,14 +151,14 @@ PROP_RULES: list[tuple[str] | tuple[str, str] | tuple[str, str, bool]] = [
('time of discovery or invention',),
('publication date',),
]
-UNIT_TO_SCALE: dict[str, int] = { # Maps 'unit' values (found in type=quantity value objects) to numbers of years
+UNIT_TO_SCALE: dict[str, int] = {
+ # Maps 'unit' values (found in 'datavalue' objects with type=quantity) to numbers of years
'http://www.wikidata.org/entity/Q577': 1, # 'year'
'http://www.wikidata.org/entity/Q24564698': 1, # 'years old'
'http://www.wikidata.org/entity/Q3013059': 10**3, # 'kiloannum' (1e3 yrs)
'http://www.wikidata.org/entity/Q20764': 10**6, # 'megaannum' (1e6 yrs)
'http://www.wikidata.org/entity/Q524410': 10**9, # 'gigaannum' (1e9 yrs)
}
-
# For filtering lines before parsing JSON
TYPE_ID_REGEX = ('"id":(?:"' + '"|"'.join([id for id in ID_TO_CTG if id.startswith('Q')]) + '")').encode()
PROP_ID_REGEX = ('(?:"' + '"|"'.join([id for id in ID_TO_CTG if id.startswith('P')]) + '"):\[{"mainsnak"').encode()
@@ -183,12 +186,12 @@ def genData(wikidataFile: str, offsetsFile: str, dbFile: str, nProcs: int) -> No
# The 'OR IGNORE' is for a few entries that share the same title (and seem like redirects)
else:
if not os.path.exists(offsetsFile):
- print('Creating offsets file') # For indexed access for multiprocessing (creation took about 6.7 hours)
+ print('Creating offsets file') # For indexed access used in multiprocessing (may take about 7 hours)
with indexed_bzip2.open(wikidataFile) as file:
with open(offsetsFile, 'wb') as file2:
pickle.dump(file.block_offsets(), file2)
print('Allocating file into chunks')
- fileSz: int # About 1.4 TB
+ fileSz: int # Was about 1.4 TB
with indexed_bzip2.open(wikidataFile) as file:
with open(offsetsFile, 'rb') as file2:
file.set_block_offsets(pickle.load(file2))
@@ -206,15 +209,15 @@ def genData(wikidataFile: str, offsetsFile: str, dbFile: str, nProcs: int) -> No
chunkIdxs[i], chunkIdxs[i+1]) for i in range(nProcs)]):
# Add entries from subprocess output file
with open(outFile, 'rb') as file:
- for entry in pickle.load(file):
- dbCur.execute('INSERT OR IGNORE INTO events VALUES (?, ?, ?, ?, ?, ?, ?, ?)', entry)
+ for item in pickle.load(file):
+ dbCur.execute('INSERT OR IGNORE INTO events VALUES (?, ?, ?, ?, ?, ?, ?, ?)', item)
dbCon.commit()
dbCon.close()
# For data extraction
def readDumpLine(lineBytes: bytes) -> tuple[int, str, int, int | None, int | None, int | None, int, str] | None:
""" Parses a Wikidata dump line, returning an entry to add to the db """
- # Check with regex
+ # Check with regexes
if re.search(TYPE_ID_REGEX, lineBytes) is None and re.search(PROP_ID_REGEX, lineBytes) is None:
return None
# Decode
@@ -283,7 +286,7 @@ def readDumpLine(lineBytes: bytes) -> tuple[int, str, int, int | None, int | Non
#
return (itemId, itemTitle, start, startUpper, end, endUpper, timeFmt, eventCtg)
def getTimeData(startVal, endVal, timeType: str) -> tuple[int, int | None, int | None, int | None, int] | None:
- """ Obtains event start+end data from value objects with type 'time', according to 'timeType' """
+ """ Obtains event start+end data from 'datavalue' objects with type 'time', according to 'timeType' """
# Values to return
start: int
startUpper: int | None = None
@@ -317,7 +320,7 @@ def getTimeData(startVal, endVal, timeType: str) -> tuple[int, int | None, int |
else:
start = DUMP_YEAR - upperBound * scale
startUpper = DUMP_YEAR - lowerBound * scale
- # Account for non-existence of 0 CE
+ # Account for non-existence of 0 AD
if start <= 0:
start -= 1
if startUpper is not None and startUpper <= 0:
@@ -342,7 +345,7 @@ def getTimeData(startVal, endVal, timeType: str) -> tuple[int, int | None, int |
return None
end, _, timeFmt2 = endTimeVals
if timeFmt != timeFmt2:
- if timeFmt == 1 and timeFmt2 == 2:
+ if timeFmt == 2 and timeFmt2 == 1:
timeFmt = 3
else:
return None
@@ -359,13 +362,13 @@ def getTimeData(startVal, endVal, timeType: str) -> tuple[int, int | None, int |
return None
end, endUpper, timeFmt2 = endTimeVals
if timeFmt != timeFmt2:
- if timeFmt == 1 and timeFmt2 == 2:
+ if timeFmt == 2 and timeFmt2 == 1:
timeFmt = 3
else:
return None
return start, startUpper, end, endUpper, timeFmt
def getEventTime(dataVal) -> tuple[int, int | None, int] | None:
- """ Obtains event start (or end) data from a value object with type 'time' """
+ """ Obtains event start (or end) data from a 'datavalue' object with type 'time' """
if 'type' not in dataVal or dataVal['type'] != 'time':
return None
# Get time data
@@ -385,20 +388,20 @@ def getEventTime(dataVal) -> tuple[int, int | None, int] | None:
startUpper: int | None = None
timeFmt: int
if precision in [10, 11]: # 'month' or 'day' precision
- if year < -4713: # If before 4713 BCE (start of valid julian date period)
- print(f'WARNING: Skipping sub-year-precision date before 4713 BCE: {json.dumps(dataVal)}')
+ if year < MIN_CAL_YEAR: # If before start of valid julian date period
+ print(f'WARNING: Skipping sub-year-precision date before {-MIN_CAL_YEAR} BC: {json.dumps(dataVal)}')
return None
day = max(day, 1) # With month-precision, entry may have a 'day' of 0
if calendarmodel == 'http://www.wikidata.org/entity/Q1985727': # 'proleptic gregorian calendar'
start = gregorianToJdn(year, month, day)
if precision == 10:
startUpper = gregorianToJdn(year, month+1, 0)
- timeFmt = 2
+ timeFmt = 1
else: # "http://www.wikidata.org/entity/Q1985786" ('proleptic julian calendar')
start = julianToJdn(year, month, day)
if precision == 10:
startUpper = julianToJdn(year, month+1, 0)
- timeFmt = 1
+ timeFmt = 2
elif 0 <= precision < 10: # 'year' to 'gigaannum' precision
scale: int = 10 ** (9 - precision)
start = year // scale * scale
diff --git a/backend/hist_data/gen_imgs.py b/backend/hist_data/gen_imgs.py
index 817de03..bf3bcd0 100755
--- a/backend/hist_data/gen_imgs.py
+++ b/backend/hist_data/gen_imgs.py
@@ -10,9 +10,9 @@ processing. It uses already-existing database entries to decide what
to skip.
"""
-import os, math, subprocess
+import argparse
+import os, math, subprocess, signal
import sqlite3, urllib.parse
-import signal
from PIL import Image
IMG_DIR = os.path.join('enwiki', 'imgs')
@@ -147,7 +147,6 @@ def convertImage(imgPath: str, outPath: str):
return True
if __name__ == '__main__':
- import argparse
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
parser.parse_args()
#
diff --git a/backend/hist_data/gen_picked_data.py b/backend/hist_data/gen_picked_data.py
index 7d6071a..933af24 100755
--- a/backend/hist_data/gen_picked_data.py
+++ b/backend/hist_data/gen_picked_data.py
@@ -4,12 +4,14 @@
Adds additional manually-picked events to the database
"""
-# Enable unit testing code to, when running this script, resolve imports of modules within this directory
+# Code used in unit testing (for resolving imports of modules within this directory)
import os, sys
parentDir = os.path.dirname(os.path.realpath(__file__))
sys.path.append(parentDir)
-
+# Standard imports
+import argparse
import json, sqlite3
+# Local imports
from gen_imgs import convertImage
PICKED_DIR = 'picked'
@@ -55,7 +57,6 @@ def genData(pickedDir: str, pickedEvtFile: str, dbFile: str, imgOutDir: str) ->
dbCon.close()
if __name__ == '__main__':
- import argparse
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
args = parser.parse_args()
#
diff --git a/backend/hist_data/gen_pop_data.py b/backend/hist_data/gen_pop_data.py
index 8eaa142..aaaf69d 100755
--- a/backend/hist_data/gen_pop_data.py
+++ b/backend/hist_data/gen_pop_data.py
@@ -1,7 +1,7 @@
#!/usr/bin/python3
"""
-Adds Wikipedia page view info to the database as popularity values.
+Adds Wikipedia page view info to the database as popularity values
"""
import os, sqlite3
diff --git a/backend/hist_data/picked/README.md b/backend/hist_data/picked/README.md
index becbd24..395fd9d 100644
--- a/backend/hist_data/picked/README.md
+++ b/backend/hist_data/picked/README.md
@@ -1,4 +1,4 @@
-This directory holds data for additional events
+This directory holds data for additional manually-picked events.
Files
=====
diff --git a/backend/hist_data/reduce_event_data.py b/backend/hist_data/reduce_event_data.py
index c061f90..5801f4d 100755
--- a/backend/hist_data/reduce_event_data.py
+++ b/backend/hist_data/reduce_event_data.py
@@ -1,16 +1,17 @@
#!/usr/bin/python3
"""
-Delete events from the database that have no image.
+Delete events from the database that have no image
"""
-# Enable unit testing code to, when running this script, resolve imports of modules within this directory
+# Code used in unit testing (for resolving imports of modules within this directory)
import os, sys
parentDir = os.path.dirname(os.path.realpath(__file__))
sys.path.append(parentDir)
-
+# Standard imports
import argparse
import sqlite3
+# Local imports
from cal import SCALES, dbDateToHistDate, dateToUnit
DB_FILE = 'data.db'
diff --git a/backend/histplorer.py b/backend/histplorer.py
index d397c17..20c63de 100755
--- a/backend/histplorer.py
+++ b/backend/histplorer.py
@@ -1,29 +1,29 @@
"""
-WSGI script that serves historical data
+WSGI script that serves historical data.
Expected HTTP query parameters:
- type:
- If 'events', reply with list of event objects, within a date range, for a given scale
+ If 'events', reply with information on events within a date range, for a given scale
If 'info', reply with information about a given event
If 'sugg', reply with search suggestions for an event search string
- range: With type=events, specifies a historical-date range
- If absent, the default is 'all of time'
+ If absent, the default is 'all of time'.
Examples:
- range=1000.1910-10-09 means '1000 CE to 09/10/1910 (inclusive)'
- range=-13000. means '13000 BCE onwards'
-- scale: With type=events, specifies a date scale (matched against 'scale' column in 'event_disp' table)
+ range=1000.1910-10-09 means '1000 AD to 09/10/1910 (inclusive)'
+ range=-13000. means '13000 BC onwards'
+- scale: With type=events, specifies a date scale
- incl: With type=events, specifies an event to include, as an event ID
- event: With type=info, specifies the event to get info for
- input: With type=sugg, specifies a search string to suggest for
- limit: With type=events or type=sugg, specifies the max number of results
-- ctg: With type=events or type=sugg, specifies event categories to restrict results to
+- ctg: With type=events or type=sugg, specifies an event category to restrict results to
"""
from typing import Iterable
import sys, re
import urllib.parse, sqlite3
import gzip, jsonpickle
-from hist_data.cal import gregorianToJdn, HistDate, dbDateToHistDate, dateToUnit
+from hist_data.cal import gregorianToJdn, HistDate, MIN_CAL_YEAR, dbDateToHistDate, dateToUnit
DB_FILE = 'hist_data/data.db'
MAX_REQ_EVENTS = 500
@@ -32,7 +32,7 @@ DEFAULT_REQ_EVENTS = 20
MAX_REQ_SUGGS = 50
DEFAULT_REQ_SUGGS = 5
-# Classes for objects sent as responses
+# Classes for values sent as responses
class Event:
""" Represents an historical event """
def __init__(
@@ -146,17 +146,6 @@ def handleReq(dbFile: str, environ: dict[str, str]) -> None | EventResponse | Ev
elif reqType == 'sugg':
return handleSuggReq(params, dbCur)
return None
-def reqParamToHistDate(s: str):
- """ Produces a HistDate from strings like '2010-10-3', '-8000', and '' (throws ValueError if invalid) """
- if not s:
- return None
- m = re.match(r'(-?\d+)(?:-(\d+)-(\d+))?', s)
- if m is None:
- raise ValueError('Invalid HistDate string')
- if m.lastindex == 1:
- return HistDate(None, int(m.group(1)))
- else:
- return HistDate(True, int(m.group(1)), int(m.group(2)), int(m.group(3)))
# For type=events
def handleEventsReq(params: dict[str, str], dbCur: sqlite3.Cursor) -> EventResponse | None:
@@ -201,6 +190,17 @@ def handleEventsReq(params: dict[str, str], dbCur: sqlite3.Cursor) -> EventRespo
events = lookupEvents(start, end, scale, ctg, incl, resultLimit, dbCur)
unitCounts = lookupUnitCounts(start, end, scale, dbCur)
return EventResponse(events, unitCounts)
+def reqParamToHistDate(s: str):
+ """ Produces a HistDate from strings like '2010-10-3', '-8000', and '' (throws ValueError if invalid) """
+ if not s:
+ return None
+ m = re.match(r'(-?\d+)(?:-(\d+)-(\d+))?', s)
+ if m is None:
+ raise ValueError('Invalid HistDate string')
+ if m.lastindex == 1:
+ return HistDate(None, int(m.group(1)))
+ else:
+ return HistDate(True, int(m.group(1)), int(m.group(2)), int(m.group(3)))
def lookupEvents(start: HistDate | None, end: HistDate | None, scale: int, ctg: str | None,
incl: int | None, resultLimit: int, dbCur: sqlite3.Cursor) -> list[Event]:
""" Looks for events within a date range, in given scale,
@@ -217,7 +217,7 @@ def lookupEvents(start: HistDate | None, end: HistDate | None, scale: int, ctg:
if start is not None:
constraint = '(start >= ? AND fmt > 0 OR start >= ? AND fmt = 0)'
if start.gcal is None:
- startJdn = gregorianToJdn(start.year, 1, 1) if start.year >= -4713 else 0
+ startJdn = gregorianToJdn(start.year, 1, 1) if start.year >= MIN_CAL_YEAR else 0
constraints.append(constraint)
params.extend([startJdn, start.year])
else:
@@ -228,7 +228,7 @@ def lookupEvents(start: HistDate | None, end: HistDate | None, scale: int, ctg:
if end is not None:
constraint = '(start <= ? AND fmt > 0 OR start <= ? AND fmt = 0)'
if end.gcal is None:
- endJdn = gregorianToJdn(end.year, 1, 1) if end.year >= -4713 else -1
+ endJdn = gregorianToJdn(end.year, 1, 1) if end.year >= MIN_CAL_YEAR else -1
constraints.append(constraint)
params.extend([endJdn, end.year])
else:
@@ -269,7 +269,7 @@ def eventEntryToResults(
dateVals: list[int | None] = [start, startUpper, end, endUpper]
newDates: list[HistDate | None] = [None for n in dateVals]
for i, n in enumerate(dateVals):
- if n:
+ if n is not None:
newDates[i] = dbDateToHistDate(n, fmt, i < 2)
#
return Event(eventId, title, newDates[0], newDates[1], newDates[2], newDates[3], ctg, imageId, pop)
diff --git a/backend/tests/enwiki/test_gen_img_data.py b/backend/tests/enwiki/test_gen_img_data.py
index 04fdd69..d18dddf 100644
--- a/backend/tests/enwiki/test_gen_img_data.py
+++ b/backend/tests/enwiki/test_gen_img_data.py
@@ -17,11 +17,11 @@ class TestGetInputPageIdsFromDb(unittest.TestCase):
'start INT, start_upper INT, end INT, end_upper INT, fmt INT, ctg TEXT)',
'INSERT INTO events VALUES (?, ?, ?, ?, ?, ?, ?, ?)',
{
- (1, 'Belgium', 2389729, None, None, None, 2, 'country'),
- (2, 'George Washington', 2353711, None, 2378478, None, 2, 'human'),
- (3, 'Douglas Adams', 2434082, None, 2452040, None, 2, 'human'),
- (4, 'World War II', 2429507, None, 2431700, None, 2, 'event'),
- (5, 'Marie Curie', 2403277, None, 2427622, None, 2, 'human'),
+ (1, 'Belgium', 2389729, None, None, None, 1, 'country'),
+ (2, 'George Washington', 2353711, None, 2378478, None, 1, 'human'),
+ (3, 'Douglas Adams', 2434082, None, 2452040, None, 1, 'human'),
+ (4, 'World War II', 2429507, None, 2431700, None, 1, 'event'),
+ (5, 'Marie Curie', 2403277, None, 2427622, None, 1, 'human'),
}
)
# Create temp dump-index db
diff --git a/backend/tests/test_cal.py b/backend/tests/test_cal.py
index d5f2860..78b2c8b 100644
--- a/backend/tests/test_cal.py
+++ b/backend/tests/test_cal.py
@@ -30,8 +30,8 @@ class TestCal(unittest.TestCase):
self.assertEqual(julianToGregorian(1616, 4, 23), (1616, 5, 3))
def test_db_to_hist_date(self):
self.assertEqual(dbDateToHistDate(2001, 0), HistDate(True, 2001, 1, 1))
- self.assertEqual(dbDateToHistDate(1721455, 1), HistDate(False, 1, 2, 1))
- self.assertEqual(dbDateToHistDate(1356438, 2), HistDate(True, -1000, 9, 13))
+ self.assertEqual(dbDateToHistDate(1356438, 1), HistDate(True, -1000, 9, 13))
+ self.assertEqual(dbDateToHistDate(1721455, 2), HistDate(False, 1, 2, 1))
self.assertEqual(dbDateToHistDate(2268942, 3, False), HistDate(False, 1500, 1, 10))
self.assertEqual(dbDateToHistDate(2268933, 3, True), HistDate(True, 1500, 1, 10))
def test_date_to_unit(self):
diff --git a/backend/tests/test_gen_desc_data.py b/backend/tests/test_gen_desc_data.py
index eabe644..4c902ad 100644
--- a/backend/tests/test_gen_desc_data.py
+++ b/backend/tests/test_gen_desc_data.py
@@ -50,7 +50,7 @@ class TestGenData(unittest.TestCase):
(10, 'I', 100, None, None, None, 0, 'event'),
(20, 'II', 200, None, None, None, 0, 'discovery'),
(30, 'III', 300, None, 350, None, 0, 'event'),
- (50, 'V', 5, 10, None, None, 1, 'human'),
+ (50, 'V', 5, 10, None, None, 2, 'human'),
}
)
# Run
diff --git a/backend/tests/test_gen_disp_data.py b/backend/tests/test_gen_disp_data.py
index c39c962..db6ddc0 100644
--- a/backend/tests/test_gen_disp_data.py
+++ b/backend/tests/test_gen_disp_data.py
@@ -20,13 +20,13 @@ class TestGenData(unittest.TestCase):
(2, 'event two', 2452607, None, 2455369, None, 3, 'human'), # 15/11/2002
(3, 'event three', 1900, None, 2000, None, 0, 'event'), # version of 1 without pop score
(4, 'event four', 1901, None, 2000, 2010, 0, 'event'),
- (5, 'event five', 2415307, None, None, None, 1, 'event'), # 01/10/1900
- (6, 'event six', 2415030, None, None, None, 2, 'event'), # 10/01/1900
+ (5, 'event five', 2415307, None, None, None, 2, 'event'), # 01/10/1900
+ (6, 'event six', 2415030, None, None, None, 1, 'event'), # 10/01/1900
(7, 'event seven', 1900, None, None, None, 0, 'event'), # popular version of 1
(8, 'event eight', 1900, None, None, None, 0, 'event'), # less popular version of 1
(9, 'event nine', 1900, None, None, None, 0, 'event'), # less popular version of 1
- (10, 'event ten', 2415307, None, None, None, 1, 'event'), # less popular version of 5
- (11, 'event eleven', 2415307, None, None, None, 1, 'event'), # slightly less popular version of 5
+ (10, 'event ten', 2415307, None, None, None, 2, 'event'), # less popular version of 5
+ (11, 'event eleven', 2415307, None, None, None, 2, 'event'), # slightly less popular version of 5
}
)
createTestDbTable(
@@ -55,10 +55,10 @@ class TestGenData(unittest.TestCase):
(1, 'event one', 1900, None, None, None, 0, 'event'),
(2, 'event two', 2452607, None, 2455369, None, 3, 'human'),
(4, 'event four', 1901, None, 2000, 2010, 0, 'event'),
- (5, 'event five', 2415307, None, None, None, 1, 'event'),
- (6, 'event six', 2415030, None, None, None, 2, 'event'),
+ (5, 'event five', 2415307, None, None, None, 2, 'event'),
+ (6, 'event six', 2415030, None, None, None, 1, 'event'),
(7, 'event seven', 1900, None, None, None, 0, 'event'),
- (11, 'event eleven', 2415307, None, None, None, 1, 'event'), # 01/10/1900
+ (11, 'event eleven', 2415307, None, None, None, 2, 'event'), # 01/10/1900
}
)
self.assertEqual(
diff --git a/backend/tests/test_gen_events_data.py b/backend/tests/test_gen_events_data.py
index 0f298ca..b3dfddc 100644
--- a/backend/tests/test_gen_events_data.py
+++ b/backend/tests/test_gen_events_data.py
@@ -1,6 +1,6 @@
import unittest
import tempfile, os, json, bz2, pickle, indexed_bzip2
-
+# Local imports
from tests.common import readTestDbTable
from hist_data.gen_events_data import genData
@@ -115,6 +115,7 @@ class TestGenData(unittest.TestCase):
'id': 'Q6',
'claims': {
'P31': [{'mainsnak': {'datavalue': {'value': {'id': 'Q7725634'}}}}], # 'instance of' 'literary work'
+ 'P170': [{'mainsnak': {'datavalue': {'value': {'id': 'Q180'}}}}], # 'creator'
'P1319': [{'mainsnak': {'datavalue': {'type': 'time', 'value': { # 'earliest date'
'time':'-0020-08-01T00:00:00Z',
'precision':11, # day precision
@@ -132,6 +133,7 @@ class TestGenData(unittest.TestCase):
'id': 'Q7',
'claims': {
'P31': [{'mainsnak': {'datavalue': {'value': {'id': 'Q11424'}}}}], # 'instance of' 'film'
+ 'P136': [{'mainsnak': {'datavalue': {'value': {'id': 'Q157394'}}}}], # 'genre'
'P577': [{'mainsnak': {'datavalue': {'type': 'time', 'value': { # 'publication date'
'time':'-2103-00-00T00:00:00Z',
'precision':7, # century precision
@@ -144,18 +146,24 @@ class TestGenData(unittest.TestCase):
'id': 'Q8',
'claims': {
'P31': [{'mainsnak': {'datavalue': {'value': {'id': 'Q16521'}}}}], # 'instance of' 'taxon'
- }
- # No title
+ 'P571': [{'mainsnak': {'datavalue': {'type': 'time', 'value': { # 'inception'
+ 'time':'-400000000-00-00T00:00:01Z',
+ 'precision':1, # hundred million years precision
+ 'calendarmodel':'http://www.wikidata.org/entity/Q1985727' # 'proleptic gregorian calendar'
+ }}}}],
+ },
+ 'sitelinks': {'enwiki': {'title': 'organism one'}},
},
]
self.expectedRows = {
- (1, 'event one', 2433617, 2433647, None, None, 2, 'event'),
- (2, 'Human One', 2452594, None, 2455369, None, 3, 'human'),
- (3, 'country one', -1001, None, -99, None, 0, 'country'),
- (4, 'country two', -9000, -7000, None, None, 0, 'country'),
+ (1, 'event one', 2433617, 2433647, None, None, 1, 'event'),
+ (2, 'Human One', 2452594, None, 2455369, None, 3, 'person'),
+ (3, 'country one', -1001, None, -99, None, 0, 'place'),
+ (4, 'country two', -9000, -7000, None, None, 0, 'place'),
(5, 'discovery one', 1, 1000, None, None, 0, 'discovery'),
- (6, 'media one', 1714331, None, 1714362, None, 1, 'media'),
- (7, 'media two', -2199, -2100, None, None, 0, 'media'),
+ (6, 'media one', 1714331, None, 1714362, None, 2, 'work'),
+ (7, 'media two', -2199, -2100, None, None, 0, 'work'),
+ (8, 'organism one', -400000000, -300000001, None, None, 0, 'organism'),
}
def test_wikiItems(self):
rows = runGenData(self.testWikiItems, False, 1)
diff --git a/backend/tests/test_gen_imgs.py b/backend/tests/test_gen_imgs.py
index f8bfeb6..ea4bd70 100644
--- a/backend/tests/test_gen_imgs.py
+++ b/backend/tests/test_gen_imgs.py
@@ -50,7 +50,7 @@ class TestGenImgs(unittest.TestCase):
{
(10, 'first', 100, 1000, None, None, 0, 'event'),
(20, 'second', 10, 20, None, None, 0, 'event'),
- (30, 'third', 1, 20, 30, 40, 2, 'event'),
+ (30, 'third', 1, 20, 30, 40, 1, 'event'),
}
)
# Run
diff --git a/backend/tests/test_histplorer.py b/backend/tests/test_histplorer.py
index be01a90..592d534 100644
--- a/backend/tests/test_histplorer.py
+++ b/backend/tests/test_histplorer.py
@@ -13,8 +13,8 @@ def initTestDb(dbFile: str) -> None:
{
(1, 'event one', 1900, None, None, None, 0, 'event'),
(2, 'event two', 2452594, None, 2455369, None, 3, 'human'), # 2/11/2002 to 21/06/2010
- (3, 'event three', 2448175, 2451828, None, None, 2, 'discovery'), # 10/10/1990 til 10/10/2000
- (4, 'event four', 991206, None, 1721706, None, 1, 'event'), # 10/10/-2000 to 10/10/1
+ (3, 'event three', 2448175, 2451828, None, None, 1, 'discovery'), # 10/10/1990 til 10/10/2000
+ (4, 'event four', 991206, None, 1721706, None, 2, 'event'), # 10/10/-2000 to 10/10/1
(5, 'event five', 2000, None, 2001, None, 0, 'event'),
(6, 'event six', 1900, None, 2000, None, 0, 'event'),
}
diff --git a/backend/tests/test_reduce_event_data.py b/backend/tests/test_reduce_event_data.py
index 7f1ce73..22fe204 100644
--- a/backend/tests/test_reduce_event_data.py
+++ b/backend/tests/test_reduce_event_data.py
@@ -18,7 +18,7 @@ class TestReduceData(unittest.TestCase):
{
(1, 'event one', 1900, None, None, None, 0, 'event'),
(2, 'event two', 2452594, None, 2455369, None, 3, 'human'), # 2/11/2002
- (3, 'event three', 2448175, 2448200, None, None, 2, 'discovery'), # 10/10/1990
+ (3, 'event three', 2448175, 2448200, None, None, 1, 'discovery'), # 10/10/1990
(4, 'event four', 1900, None, None, None, 0, 'event'), # Copy of 1
(5, 'event five', 2452595, None, 2455369, None, 3, 'human'), # Day after 2
}
diff --git a/src/components/TimeLine.vue b/src/components/TimeLine.vue
index 1e78a87..c869aa3 100644
--- a/src/components/TimeLine.vue
+++ b/src/components/TimeLine.vue
@@ -245,7 +245,7 @@ class Tick {
}
function getNumDisplayUnits({inclOffsets=true} = {}): number { // Get num major units in display range
let unitDiff = Math.ceil(getUnitDiff(startDate.value, endDate.value, scale.value));
- // Note: Rounding up due to cases like 1 CE to 10 CE with 10-year scale
+ // Note: Rounding up due to cases like 1 AD to 10 AD with 10-year scale
if (inclOffsets){
unitDiff += startOffset.value + endOffset.value;
}
@@ -886,7 +886,7 @@ function zoomTimeline(zoomRatio: number){
newStartOffset /= oldUnitsPerNew;
newEndOffset /= oldUnitsPerNew;
// Shift starting and ending points to align with new scale
- // Note: There is some distortion due to not fully accounting for no year 0 CE here,
+ // Note: There is some distortion due to not fully accounting for no year 0 AD here,
// but the result seems tolerable, and resolving it adds a fair bit of code complexity
let newStartSubUnits =
(scale.value == DAY_SCALE) ? getDaysInMonth(newStart.year, newStart.month) :
@@ -929,7 +929,7 @@ function zoomTimeline(zoomRatio: number){
} else {
newStart.year = Math.floor(newStart.year / newScale) * newScale;
newEnd.year = Math.floor(newEnd.year / newScale) * newScale;
- // Account for no 0 CE
+ // Account for no 0 AD
if (newStart.year == 0){
newStart.year = 1;
}
diff --git a/src/lib.ts b/src/lib.ts
index d9d5867..e2d65f9 100644
--- a/src/lib.ts
+++ b/src/lib.ts
@@ -150,7 +150,7 @@ export class HistDate {
}
getYearDiff(other: HistDate){
let yearDiff = Math.abs(this.year - other.year);
- if (this.year * other.year < 0){ // Account for no 0 CE
+ if (this.year * other.year < 0){ // Account for no 0 AD
yearDiff -= 1;
}
return yearDiff;
@@ -418,7 +418,7 @@ export function stepDate( // If stepping by month or years, leaves day value unc
let newYear;
if (forward){
newYear = newDate.year + count*scale;
- if (newYear == 0){ // Account for there being no 0 CE
+ if (newYear == 0){ // Account for there being no 0 AD
newYear = 1;
} else if (newDate.year == 1 && scale > 1){
newYear -= 1;
@@ -471,7 +471,7 @@ export function getNumSubUnits(date: HistDate, scaleIdx: number){
} else if (scale == 1){
return 12;
} else {
- return scale / SCALES[scaleIdx + 1] - (date.year == 1 ? 1 : 0); // Account for lack of 0 CE
+ return scale / SCALES[scaleIdx + 1] - (date.year == 1 ? 1 : 0); // Account for lack of 0 AD
}
}
export function getUnitDiff(date: HistDate, date2: HistDate, scale: number): number {