aboutsummaryrefslogtreecommitdiff
path: root/backend/tests/wikidata
diff options
context:
space:
mode:
Diffstat (limited to 'backend/tests/wikidata')
-rw-r--r--backend/tests/wikidata/test_gen_events_data.py171
1 files changed, 0 insertions, 171 deletions
diff --git a/backend/tests/wikidata/test_gen_events_data.py b/backend/tests/wikidata/test_gen_events_data.py
deleted file mode 100644
index faa19c9..0000000
--- a/backend/tests/wikidata/test_gen_events_data.py
+++ /dev/null
@@ -1,171 +0,0 @@
-import unittest
-import tempfile, os, json, bz2, pickle, indexed_bzip2
-
-from tests.common import readTestDbTable
-from hist_data.wikidata.gen_events_data import genData
-
-def runGenData(wikiItemArray: str, preGenOffsets: bool, nProcs: int):
- """ Sets up wikidata file to be read by genData(), runs it, and returns the output database's contents.
- If 'preGenOffsets' is True, generates a bz2 offsets file before running genData(). """
- with tempfile.TemporaryDirectory() as tempDir:
- # Create temp wikidata file
- wikidataFile = os.path.join(tempDir, 'dump.json.bz2')
- with bz2.open(wikidataFile, mode='wb') as file:
- file.write(b'[\n')
- for i in range(len(wikiItemArray)):
- file.write(json.dumps(wikiItemArray[i], separators=(',',':')).encode())
- if i < len(wikiItemArray) - 1:
- file.write(b',')
- file.write(b'\n')
- file.write(b']\n')
- # Create temp offsets file if requested
- offsetsFile = os.path.join(tempDir, 'offsets.dat')
- if preGenOffsets:
- with indexed_bzip2.open(wikidataFile) as file:
- with open(offsetsFile, 'wb') as file2:
- pickle.dump(file.block_offsets(), file2)
- # Run genData()
- dbFile = os.path.join(tempDir, 'events.db')
- genData(wikidataFile, offsetsFile, dbFile, nProcs)
- # Read db
- return readTestDbTable(dbFile, 'SELECT * FROM events')
-
-class TestGenData(unittest.TestCase):
- def setUp(self):
- self.maxDiff = None # Remove output-diff size limit
- self.testWikiItems = [
- {
- 'id': 'Q1',
- 'claims': {
- 'P31': [{'mainsnak': {'datavalue': {'value': {'id': 'Q1656682'}}}}], # 'instance of' 'event'
- 'P585': [{'mainsnak': {'datavalue': {'type': 'time', 'value': { # 'point in time'
- 'time':'+1950-12-00T00:00:00Z',
- 'timezone':0,
- 'before':0,
- 'after':0,
- 'precision':10, # month precision
- 'calendarmodel':'http://www.wikidata.org/entity/Q1985727' # 'proleptic gregorian calendar'
- }}}}],
- 'P141': [{'mainsnak': {'datavalue': {'value': {'id': 'Q211005'}}}}], # Other random property
- },
- 'sitelinks': {'enwiki': {'title': 'event one'}},
- },
- {
- 'id': 'Q2',
- 'claims': {
- 'P31': [{'mainsnak': {'datavalue': {'value': {'id': 'Q5'}}}}], # 'instance of' 'human'
- 'P569': [{'mainsnak': {'datavalue': {'type': 'time', 'value': { # 'date of birth'
- 'time':'+2002-11-02T00:00:00Z',
- 'precision':11, # day precision
- 'calendarmodel':'http://www.wikidata.org/entity/Q1985786' # 'proleptic julian calendar'
- }}}}],
- 'P570': [{'mainsnak': {'datavalue': {'type': 'time', 'value': { # 'date of death'
- 'time':'+2010-06-21T00:00:01Z',
- 'timezone':1,
- 'precision':11,
- 'calendarmodel':'http://www.wikidata.org/entity/Q1985727' # 'proleptic gregorian calendar'
- }}}}],
- },
- 'sitelinks': {'enwiki': {'title': 'Human One'}},
- },
- {
- 'id': 'Q3',
- 'claims': {
- 'P31': [{'mainsnak': {'datavalue': {'value': {'id': 'Q7275'}}}}], # 'instance of' 'state'
- 'P580': [{'mainsnak': {'datavalue': {'type': 'time', 'value': { # 'start time'
- 'time':'-1001-00-00T00:00:00Z',
- 'precision':9, # year precision
- 'calendarmodel':'http://www.wikidata.org/entity/Q1985727'
- }}}}],
- 'P582': [{'mainsnak': {'datavalue': {'type': 'time', 'value': { # 'end time'
- 'time':'-99-00-00T00:00:01Z',
- 'precision':9,
- 'calendarmodel':'http://www.wikidata.org/entity/Q1985786'
- }}}}],
- },
- 'sitelinks': {'enwiki': {'title': 'country one'}},
- },
- {
- 'id': 'Q4',
- 'claims': {
- 'P31': [{'mainsnak': {'datavalue': {'value': {'id': 'Q6256'}}}}], # 'instance of' 'country'
- 'P7584': [{'mainsnak': {'datavalue': {'type': 'quantity', 'value': {
- # 'age estimated by a dating method'
- "amount":"+10.9",
- "unit":"http://www.wikidata.org/entity/Q3013059", # kiloannum
- "lowerBound":"+9",
- "upperBound":"+11",
- }}}}],
- },
- 'sitelinks': {'enwiki': {'title': 'country two'}},
- },
- {
- 'id': 'Q5',
- 'claims': {
- 'P31': [{'mainsnak': {'datavalue': {'value': {'id': 'Q11019'}}}}], # 'instance of' 'machine'
- 'P575': [{'mainsnak': {'datavalue': {'type': 'time', 'value': { # 'time of discovery or invention'
- 'time':'+0101-00-00T00:00:01Z',
- 'precision':6, # millenium precision
- 'calendarmodel':'http://www.wikidata.org/entity/Q1985786'
- }}}}],
- },
- 'sitelinks': {'enwiki': {'title': 'discovery one'}},
- },
- {
- 'id': 'Q6',
- 'claims': {
- 'P31': [{'mainsnak': {'datavalue': {'value': {'id': 'Q7725634'}}}}], # 'instance of' 'literary work'
- 'P1319': [{'mainsnak': {'datavalue': {'type': 'time', 'value': { # 'earliest date'
- 'time':'-0020-08-01T00:00:00Z',
- 'precision':11, # day precision
- 'calendarmodel':'http://www.wikidata.org/entity/Q1985786' # 'proleptic julian calendar'
- }}}}],
- 'P1326': [{'mainsnak': {'datavalue': {'type': 'time', 'value': { # 'latest date'
- 'time':'-0020-09-01T00:00:00Z',
- 'precision':11,
- 'calendarmodel':'http://www.wikidata.org/entity/Q1985786' # 'proleptic julian calendar'
- }}}}],
- },
- 'sitelinks': {'enwiki': {'title': 'media one'}},
- },
- {
- 'id': 'Q7',
- 'claims': {
- 'P31': [{'mainsnak': {'datavalue': {'value': {'id': 'Q11424'}}}}], # 'instance of' 'film'
- 'P577': [{'mainsnak': {'datavalue': {'type': 'time', 'value': { # 'publication date'
- 'time':'-2103-00-00T00:00:00Z',
- 'precision':7, # century precision
- 'calendarmodel':'http://www.wikidata.org/entity/Q1985727'
- }}}}],
- },
- 'sitelinks': {'enwiki': {'title': 'media two'}},
- },
- {
- 'id': 'Q8',
- 'claims': {
- 'P31': [{'mainsnak': {'datavalue': {'value': {'id': 'Q16521'}}}}], # 'instance of' 'taxon'
- }
- # No title
- },
- ]
- self.expectedRows = {
- (1, 'event one', 2433616, 2433646, None, None, 2, 'event'),
- (2, 'Human One', 2452593, None, 2455368, None, 3, 'human'),
- (3, 'country one', -1001, None, -99, None, 0, 'country'),
- (4, 'country two', -9000, -7000, None, None, 0, 'country'),
- (5, 'discovery one', 1, 1000, None, None, 0, 'discovery'),
- (6, 'media one', 1713965, None, 1713996, None, 1, 'media'),
- (7, 'media two', -2199, -2100, None, None, 0, 'media'),
- }
- def test_wikiItems(self):
- rows = runGenData(self.testWikiItems, False, 1)
- self.assertEqual(rows, self.expectedRows)
- def test_empty_dump(self):
- rows = runGenData([{}], False, 1)
- self.assertEqual(rows, set())
- def test_multiprocessing(self):
- rows = runGenData(self.testWikiItems, False, 4)
- self.assertEqual(rows, self.expectedRows)
- def test_existing_offsets(self):
- rows = runGenData(self.testWikiItems, True, 3)
- self.assertEqual(rows, self.expectedRows)