diff options
Diffstat (limited to 'backend/tests/wikidata/test_gen_events_data.py')
| -rw-r--r-- | backend/tests/wikidata/test_gen_events_data.py | 171 |
1 files changed, 0 insertions, 171 deletions
diff --git a/backend/tests/wikidata/test_gen_events_data.py b/backend/tests/wikidata/test_gen_events_data.py deleted file mode 100644 index faa19c9..0000000 --- a/backend/tests/wikidata/test_gen_events_data.py +++ /dev/null @@ -1,171 +0,0 @@ -import unittest -import tempfile, os, json, bz2, pickle, indexed_bzip2 - -from tests.common import readTestDbTable -from hist_data.wikidata.gen_events_data import genData - -def runGenData(wikiItemArray: str, preGenOffsets: bool, nProcs: int): - """ Sets up wikidata file to be read by genData(), runs it, and returns the output database's contents. - If 'preGenOffsets' is True, generates a bz2 offsets file before running genData(). """ - with tempfile.TemporaryDirectory() as tempDir: - # Create temp wikidata file - wikidataFile = os.path.join(tempDir, 'dump.json.bz2') - with bz2.open(wikidataFile, mode='wb') as file: - file.write(b'[\n') - for i in range(len(wikiItemArray)): - file.write(json.dumps(wikiItemArray[i], separators=(',',':')).encode()) - if i < len(wikiItemArray) - 1: - file.write(b',') - file.write(b'\n') - file.write(b']\n') - # Create temp offsets file if requested - offsetsFile = os.path.join(tempDir, 'offsets.dat') - if preGenOffsets: - with indexed_bzip2.open(wikidataFile) as file: - with open(offsetsFile, 'wb') as file2: - pickle.dump(file.block_offsets(), file2) - # Run genData() - dbFile = os.path.join(tempDir, 'events.db') - genData(wikidataFile, offsetsFile, dbFile, nProcs) - # Read db - return readTestDbTable(dbFile, 'SELECT * FROM events') - -class TestGenData(unittest.TestCase): - def setUp(self): - self.maxDiff = None # Remove output-diff size limit - self.testWikiItems = [ - { - 'id': 'Q1', - 'claims': { - 'P31': [{'mainsnak': {'datavalue': {'value': {'id': 'Q1656682'}}}}], # 'instance of' 'event' - 'P585': [{'mainsnak': {'datavalue': {'type': 'time', 'value': { # 'point in time' - 'time':'+1950-12-00T00:00:00Z', - 'timezone':0, - 'before':0, - 'after':0, - 'precision':10, # month precision - 'calendarmodel':'http://www.wikidata.org/entity/Q1985727' # 'proleptic gregorian calendar' - }}}}], - 'P141': [{'mainsnak': {'datavalue': {'value': {'id': 'Q211005'}}}}], # Other random property - }, - 'sitelinks': {'enwiki': {'title': 'event one'}}, - }, - { - 'id': 'Q2', - 'claims': { - 'P31': [{'mainsnak': {'datavalue': {'value': {'id': 'Q5'}}}}], # 'instance of' 'human' - 'P569': [{'mainsnak': {'datavalue': {'type': 'time', 'value': { # 'date of birth' - 'time':'+2002-11-02T00:00:00Z', - 'precision':11, # day precision - 'calendarmodel':'http://www.wikidata.org/entity/Q1985786' # 'proleptic julian calendar' - }}}}], - 'P570': [{'mainsnak': {'datavalue': {'type': 'time', 'value': { # 'date of death' - 'time':'+2010-06-21T00:00:01Z', - 'timezone':1, - 'precision':11, - 'calendarmodel':'http://www.wikidata.org/entity/Q1985727' # 'proleptic gregorian calendar' - }}}}], - }, - 'sitelinks': {'enwiki': {'title': 'Human One'}}, - }, - { - 'id': 'Q3', - 'claims': { - 'P31': [{'mainsnak': {'datavalue': {'value': {'id': 'Q7275'}}}}], # 'instance of' 'state' - 'P580': [{'mainsnak': {'datavalue': {'type': 'time', 'value': { # 'start time' - 'time':'-1001-00-00T00:00:00Z', - 'precision':9, # year precision - 'calendarmodel':'http://www.wikidata.org/entity/Q1985727' - }}}}], - 'P582': [{'mainsnak': {'datavalue': {'type': 'time', 'value': { # 'end time' - 'time':'-99-00-00T00:00:01Z', - 'precision':9, - 'calendarmodel':'http://www.wikidata.org/entity/Q1985786' - }}}}], - }, - 'sitelinks': {'enwiki': {'title': 'country one'}}, - }, - { - 'id': 'Q4', - 'claims': { - 'P31': [{'mainsnak': {'datavalue': {'value': {'id': 'Q6256'}}}}], # 'instance of' 'country' - 'P7584': [{'mainsnak': {'datavalue': {'type': 'quantity', 'value': { - # 'age estimated by a dating method' - "amount":"+10.9", - "unit":"http://www.wikidata.org/entity/Q3013059", # kiloannum - "lowerBound":"+9", - "upperBound":"+11", - }}}}], - }, - 'sitelinks': {'enwiki': {'title': 'country two'}}, - }, - { - 'id': 'Q5', - 'claims': { - 'P31': [{'mainsnak': {'datavalue': {'value': {'id': 'Q11019'}}}}], # 'instance of' 'machine' - 'P575': [{'mainsnak': {'datavalue': {'type': 'time', 'value': { # 'time of discovery or invention' - 'time':'+0101-00-00T00:00:01Z', - 'precision':6, # millenium precision - 'calendarmodel':'http://www.wikidata.org/entity/Q1985786' - }}}}], - }, - 'sitelinks': {'enwiki': {'title': 'discovery one'}}, - }, - { - 'id': 'Q6', - 'claims': { - 'P31': [{'mainsnak': {'datavalue': {'value': {'id': 'Q7725634'}}}}], # 'instance of' 'literary work' - 'P1319': [{'mainsnak': {'datavalue': {'type': 'time', 'value': { # 'earliest date' - 'time':'-0020-08-01T00:00:00Z', - 'precision':11, # day precision - 'calendarmodel':'http://www.wikidata.org/entity/Q1985786' # 'proleptic julian calendar' - }}}}], - 'P1326': [{'mainsnak': {'datavalue': {'type': 'time', 'value': { # 'latest date' - 'time':'-0020-09-01T00:00:00Z', - 'precision':11, - 'calendarmodel':'http://www.wikidata.org/entity/Q1985786' # 'proleptic julian calendar' - }}}}], - }, - 'sitelinks': {'enwiki': {'title': 'media one'}}, - }, - { - 'id': 'Q7', - 'claims': { - 'P31': [{'mainsnak': {'datavalue': {'value': {'id': 'Q11424'}}}}], # 'instance of' 'film' - 'P577': [{'mainsnak': {'datavalue': {'type': 'time', 'value': { # 'publication date' - 'time':'-2103-00-00T00:00:00Z', - 'precision':7, # century precision - 'calendarmodel':'http://www.wikidata.org/entity/Q1985727' - }}}}], - }, - 'sitelinks': {'enwiki': {'title': 'media two'}}, - }, - { - 'id': 'Q8', - 'claims': { - 'P31': [{'mainsnak': {'datavalue': {'value': {'id': 'Q16521'}}}}], # 'instance of' 'taxon' - } - # No title - }, - ] - self.expectedRows = { - (1, 'event one', 2433616, 2433646, None, None, 2, 'event'), - (2, 'Human One', 2452593, None, 2455368, None, 3, 'human'), - (3, 'country one', -1001, None, -99, None, 0, 'country'), - (4, 'country two', -9000, -7000, None, None, 0, 'country'), - (5, 'discovery one', 1, 1000, None, None, 0, 'discovery'), - (6, 'media one', 1713965, None, 1713996, None, 1, 'media'), - (7, 'media two', -2199, -2100, None, None, 0, 'media'), - } - def test_wikiItems(self): - rows = runGenData(self.testWikiItems, False, 1) - self.assertEqual(rows, self.expectedRows) - def test_empty_dump(self): - rows = runGenData([{}], False, 1) - self.assertEqual(rows, set()) - def test_multiprocessing(self): - rows = runGenData(self.testWikiItems, False, 4) - self.assertEqual(rows, self.expectedRows) - def test_existing_offsets(self): - rows = runGenData(self.testWikiItems, True, 3) - self.assertEqual(rows, self.expectedRows) |
