From 0e5e46cedaaeacf59cfd0f2e30c1ae6923466870 Mon Sep 17 00:00:00 2001 From: Terry Truong Date: Fri, 30 Dec 2022 23:28:09 +1100 Subject: Generate event_disp data before image-generation Make gen_disp_data.py delete non-displayable events Make reduce_event_data.py also delete from 'dist' and 'event_disp' Remove MAX_IMGS_PER_CTG from enwiki/gen_img_data.py Make gen_desc_data.py include events without images --- backend/tests/enwiki/test_gen_img_data.py | 21 ++----- backend/tests/test_gen_desc_data.py | 13 ----- backend/tests/test_gen_disp_data.py | 61 +++++++++++++++++---- backend/tests/test_reduce_event_data.py | 91 +++++++++++++++++++++++++++---- 4 files changed, 134 insertions(+), 52 deletions(-) (limited to 'backend/tests') diff --git a/backend/tests/enwiki/test_gen_img_data.py b/backend/tests/enwiki/test_gen_img_data.py index 93bb196..04fdd69 100644 --- a/backend/tests/enwiki/test_gen_img_data.py +++ b/backend/tests/enwiki/test_gen_img_data.py @@ -9,7 +9,7 @@ TEST_DUMP_FILE = os.path.join(os.path.dirname(__file__), 'sample_enwiki_pages_ar class TestGetInputPageIdsFromDb(unittest.TestCase): def test_get(self): with tempfile.TemporaryDirectory() as tempDir: - # Create temp tree-of-life db + # Create temp history db dbFile = os.path.join(tempDir, 'data.db') createTestDbTable( dbFile, @@ -24,19 +24,6 @@ class TestGetInputPageIdsFromDb(unittest.TestCase): (5, 'Marie Curie', 2403277, None, 2427622, None, 2, 'human'), } ) - # Create temp pageviews db - pageviewDb = os.path.join(tempDir, 'pageview_data.db') - createTestDbTable( - pageviewDb, - 'CREATE TABLE views (title TEXT PRIMARY KEY, id INT, views INT)', - 'INSERT INTO views VALUES (?, ?, ?)', - { - ('George Washington', 2, 8), - ('Marie Curie', 5, 10), - ('Douglas Adams', 3, 5), - ('Belgium', 1, 100), - } - ) # Create temp dump-index db indexDb = os.path.join(tempDir, 'dump_index.db') createTestDbTable( @@ -46,15 +33,15 @@ class TestGetInputPageIdsFromDb(unittest.TestCase): { ('Belgium',10,0,-1), ('George Washington',20,0,-1), - ('Douglas Adamns',30,0,-1), + ('Douglas Adams',30,0,-1), ('Marie Curie',50,0,-1), ('Autism',25,0,-1), } ) # Run - pageIds = getInputPageIdsFromDb(dbFile, pageviewDb, indexDb, 2) + pageIds = getInputPageIdsFromDb(dbFile, indexDb) # Check - self.assertEqual(pageIds, {50, 20, 10}) + self.assertEqual(pageIds, {10, 20, 30, 50}) class TestGenData(unittest.TestCase): def test_gen(self): diff --git a/backend/tests/test_gen_desc_data.py b/backend/tests/test_gen_desc_data.py index 6f321b4..eabe644 100644 --- a/backend/tests/test_gen_desc_data.py +++ b/backend/tests/test_gen_desc_data.py @@ -18,7 +18,6 @@ class TestGenData(unittest.TestCase): (3, 'III'), (4, 'IV'), (5, 'V'), - (6, 'VI'), } ) createTestDbTable( @@ -38,7 +37,6 @@ class TestGenData(unittest.TestCase): (3, 'Three'), (4, 'Four'), (5, 'Five'), - (6, 'Six'), } ) # Create temp history db @@ -53,17 +51,6 @@ class TestGenData(unittest.TestCase): (20, 'II', 200, None, None, None, 0, 'discovery'), (30, 'III', 300, None, 350, None, 0, 'event'), (50, 'V', 5, 10, None, None, 1, 'human'), - (60, 'VI', 6000, None, None, None, None, 'event'), - } - ) - createTestDbTable( - dbFile, - 'CREATE TABLE event_imgs (id INT PRIMARY KEY, img_id INT)', - 'INSERT INTO event_imgs VALUES (?, ?)', - { - (10, 100), - (30, 300), - (50, 500), } ) # Run diff --git a/backend/tests/test_gen_disp_data.py b/backend/tests/test_gen_disp_data.py index 464405a..c39c962 100644 --- a/backend/tests/test_gen_disp_data.py +++ b/backend/tests/test_gen_disp_data.py @@ -17,11 +17,16 @@ class TestGenData(unittest.TestCase): 'INSERT INTO events VALUES (?, ?, ?, ?, ?, ?, ?, ?)', { (1, 'event one', 1900, None, None, None, 0, 'event'), - (2, 'event two', 2452607, None, 2455369, None, 3, 'human'), # 15/11/2002 to 21/06/2010 - (3, 'event three', 1900, None, 2000, None, 0, 'event'), + (2, 'event two', 2452607, None, 2455369, None, 3, 'human'), # 15/11/2002 + (3, 'event three', 1900, None, 2000, None, 0, 'event'), # version of 1 without pop score (4, 'event four', 1901, None, 2000, 2010, 0, 'event'), (5, 'event five', 2415307, None, None, None, 1, 'event'), # 01/10/1900 (6, 'event six', 2415030, None, None, None, 2, 'event'), # 10/01/1900 + (7, 'event seven', 1900, None, None, None, 0, 'event'), # popular version of 1 + (8, 'event eight', 1900, None, None, None, 0, 'event'), # less popular version of 1 + (9, 'event nine', 1900, None, None, None, 0, 'event'), # less popular version of 1 + (10, 'event ten', 2415307, None, None, None, 1, 'event'), # less popular version of 5 + (11, 'event eleven', 2415307, None, None, None, 1, 'event'), # slightly less popular version of 5 } ) createTestDbTable( @@ -34,26 +39,55 @@ class TestGenData(unittest.TestCase): (4, 5), (5, 50), (6, 10), + (7, 100), + (8, 1), + (9, 2), + (10, 40), + (11, 45), } ) # Run genData(dbFile, [10, 1, MONTH_SCALE, DAY_SCALE], 2) # Check + self.assertEqual( + readTestDbTable(dbFile, 'SELECT * FROM events'), + { + (1, 'event one', 1900, None, None, None, 0, 'event'), + (2, 'event two', 2452607, None, 2455369, None, 3, 'human'), + (4, 'event four', 1901, None, 2000, 2010, 0, 'event'), + (5, 'event five', 2415307, None, None, None, 1, 'event'), + (6, 'event six', 2415030, None, None, None, 2, 'event'), + (7, 'event seven', 1900, None, None, None, 0, 'event'), + (11, 'event eleven', 2415307, None, None, None, 1, 'event'), # 01/10/1900 + } + ) + self.assertEqual( + readTestDbTable(dbFile, 'SELECT * FROM pop'), + { + (1, 11), + (2, 21), + (4, 5), + (5, 50), + (6, 10), + (7, 100), + (11, 45), + } + ) self.assertEqual( readTestDbTable(dbFile, 'SELECT scale, unit, count FROM dist'), { - (10, 190, 4), + (10, 190, 6), (10, 200, 1), - (1, 1900, 3), + (1, 1900, 5), (1, 1901, 1), (1, 2002, 1), - (MONTH_SCALE, gregorianToJdn(1900, 1, 1), 2), + (MONTH_SCALE, gregorianToJdn(1900, 1, 1), 3), (MONTH_SCALE, gregorianToJdn(1901, 1, 1), 1), - (MONTH_SCALE, julianToJdn(1900, 10, 1), 1), + (MONTH_SCALE, julianToJdn(1900, 10, 1), 2), (MONTH_SCALE, julianToJdn(2002, 11, 1), 1), - (DAY_SCALE, gregorianToJdn(1900, 1, 1), 1), + (DAY_SCALE, gregorianToJdn(1900, 1, 1), 2), (DAY_SCALE, gregorianToJdn(1900, 1, 10), 1), - (DAY_SCALE, julianToJdn(1900, 10, 1), 1), + (DAY_SCALE, julianToJdn(1900, 10, 1), 2), (DAY_SCALE, gregorianToJdn(1901, 1, 1), 1), (DAY_SCALE, julianToJdn(2002, 11, 15), 1), } @@ -62,21 +96,24 @@ class TestGenData(unittest.TestCase): readTestDbTable(dbFile, 'SELECT id, scale FROM event_disp'), { (5, 10), - (1, 10), + (7, 10), (2, 10), (5, 1), - (1, 1), + (7, 1), (4, 1), (2, 1), (1, MONTH_SCALE), - (6, MONTH_SCALE), + (7, MONTH_SCALE), (4, MONTH_SCALE), (5, MONTH_SCALE), + (11, MONTH_SCALE), (2, MONTH_SCALE), (1, DAY_SCALE), + (7, DAY_SCALE), + (6, DAY_SCALE), (4, DAY_SCALE), (5, DAY_SCALE), - (6, DAY_SCALE), + (11, DAY_SCALE), (2, DAY_SCALE), } ) diff --git a/backend/tests/test_reduce_event_data.py b/backend/tests/test_reduce_event_data.py index c879150..7f1ce73 100644 --- a/backend/tests/test_reduce_event_data.py +++ b/backend/tests/test_reduce_event_data.py @@ -3,6 +3,7 @@ import tempfile, os from tests.common import createTestDbTable, readTestDbTable from hist_data.reduce_event_data import reduceData +from hist_data.cal import gregorianToJdn, julianToJdn, MONTH_SCALE, DAY_SCALE class TestReduceData(unittest.TestCase): def test_reduce(self): @@ -16,8 +17,10 @@ class TestReduceData(unittest.TestCase): 'INSERT INTO events VALUES (?, ?, ?, ?, ?, ?, ?, ?)', { (1, 'event one', 1900, None, None, None, 0, 'event'), - (2, 'event two', 2452594, None, 2455369, None, 3, 'human'), # 2/11/2002 to 21/06/2010 - (3, 'event three', 2448175, 2451828, None, None, 2, 'discovery'), # 10/10/1990 to 10/10/2000 + (2, 'event two', 2452594, None, 2455369, None, 3, 'human'), # 2/11/2002 + (3, 'event three', 2448175, 2448200, None, None, 2, 'discovery'), # 10/10/1990 + (4, 'event four', 1900, None, None, None, 0, 'event'), # Copy of 1 + (5, 'event five', 2452595, None, 2455369, None, 3, 'human'), # Day after 2 } ) createTestDbTable( @@ -25,8 +28,50 @@ class TestReduceData(unittest.TestCase): 'CREATE TABLE pop (id INT PRIMARY KEY, pop INT)', 'INSERT INTO pop VALUES (?, ?)', { - (1, 11), - (2, 21), + (1, 10), + (2, 20), + (3, 30), + (4, 40), + (5, 50), + } + ) + createTestDbTable( + dbFile, + 'CREATE TABLE dist (scale INT, unit INT, count INT, PRIMARY KEY (scale, unit))', + 'INSERT INTO dist VALUES (?, ?, ?)', + { + (1, 1900, 2), + (1, 1990, 1), + (1, 2002, 2), + (MONTH_SCALE, gregorianToJdn(1900, 1, 1), 2), + (MONTH_SCALE, gregorianToJdn(1990, 10, 1), 1), + (MONTH_SCALE, julianToJdn(2002, 11, 1), 2), + (DAY_SCALE, gregorianToJdn(1900, 1, 1), 2), + (DAY_SCALE, gregorianToJdn(1990, 10, 10), 1), + (DAY_SCALE, 2452594, 1), + (DAY_SCALE, 2452595, 1), + } + ) + createTestDbTable( + dbFile, + 'CREATE TABLE event_disp (id INT, scale INT, PRIMARY KEY (id, scale))', + 'INSERT INTO event_disp VALUES (?, ?)', + { + (1, 1), + (1, MONTH_SCALE), + (1, DAY_SCALE), + (2, 1), + (2, MONTH_SCALE), + (2, DAY_SCALE), + (3, 1), + (3, MONTH_SCALE), + (3, DAY_SCALE), + (4, 1), + (4, MONTH_SCALE), + (4, DAY_SCALE), + (5, 1), + (5, MONTH_SCALE), + (5, DAY_SCALE), } ) createTestDbTable( @@ -34,7 +79,8 @@ class TestReduceData(unittest.TestCase): 'CREATE TABLE event_imgs (id INT PRIMARY KEY, img_id INT)', 'INSERT INTO event_imgs VALUES (?, ?)', { - (1, 10), + (1, 11), + (2, 21), } ) createTestDbTable( @@ -42,7 +88,8 @@ class TestReduceData(unittest.TestCase): 'CREATE TABLE images (id INT PRIMARY KEY, url TEXT, license TEXT, artist TEXT, credit TEXT)', 'INSERT INTO images VALUES (?, ?, ?, ?, ?)', { - (10, 'example.com/1', 'cc0', 'artist one', 'credits one'), + (11, 'example.com/1', 'cc0', 'artist one', 'credits one'), + (21, 'example.com/1', 'cc0', 'artist two', 'credits two'), } ) createTestDbTable( @@ -54,17 +101,41 @@ class TestReduceData(unittest.TestCase): } ) # Run - reduceData(dbFile) + reduceData(dbFile, [1, MONTH_SCALE, DAY_SCALE]) # Check self.assertEqual( - readTestDbTable(dbFile, 'SELECT id, title, start, start_upper, end, end_upper, fmt, ctg FROM events'), + readTestDbTable(dbFile, 'SELECT * FROM events'), { (1, 'event one', 1900, None, None, None, 0, 'event'), + (2, 'event two', 2452594, None, 2455369, None, 3, 'human'), } ) self.assertEqual( - readTestDbTable(dbFile, 'SELECT id, pop from pop'), + readTestDbTable(dbFile, 'SELECT * from pop'), { - (1, 11), + (1, 10), + (2, 20), + } + ) + self.assertEqual( + readTestDbTable(dbFile, 'SELECT * from dist'), + { + (1, 1900, 1), + (1, 2002, 1), + (MONTH_SCALE, gregorianToJdn(1900, 1, 1), 1), + (MONTH_SCALE, julianToJdn(2002, 11, 1), 1), + (DAY_SCALE, gregorianToJdn(1900, 1, 1), 1), + (DAY_SCALE, 2452594, 1), + } + ) + self.assertEqual( + readTestDbTable(dbFile, 'SELECT * from event_disp'), + { + (1, 1), + (1, MONTH_SCALE), + (1, DAY_SCALE), + (2, 1), + (2, MONTH_SCALE), + (2, DAY_SCALE), } ) -- cgit v1.2.3