aboutsummaryrefslogtreecommitdiff
path: root/backend/tests/enwiki/test_gen_img_data.py
diff options
context:
space:
mode:
authorTerry Truong <terry06890@gmail.com>2022-09-11 14:55:42 +1000
committerTerry Truong <terry06890@gmail.com>2022-09-11 15:04:14 +1000
commit5de5fb93e50fe9006221b30ac4a66f1be0db82e7 (patch)
tree2567c25c902dbb40d44419805cebb38171df47fa /backend/tests/enwiki/test_gen_img_data.py
parentdaccbbd9c73a5292ea9d6746560d7009e5aa666d (diff)
Add backend unit tests
- Add unit testing code in backend/tests/ - Change to snake-case for script/file/directory names - Use os.path.join() instead of '/' - Refactor script code into function defs and a main-guard - Make global vars all-caps Some fixes: - For getting descriptions, some wiki redirects weren't properly resolved - Linked images were sub-optimally propagated - Generation of reduced trees assumed a wiki-id association implied a description - Tilo.py had potential null dereferences by not always using a reduced node set - EOL image downloading didn't properly wait for all threads to end when finishing
Diffstat (limited to 'backend/tests/enwiki/test_gen_img_data.py')
-rw-r--r--backend/tests/enwiki/test_gen_img_data.py64
1 files changed, 64 insertions, 0 deletions
diff --git a/backend/tests/enwiki/test_gen_img_data.py b/backend/tests/enwiki/test_gen_img_data.py
new file mode 100644
index 0000000..1703b78
--- /dev/null
+++ b/backend/tests/enwiki/test_gen_img_data.py
@@ -0,0 +1,64 @@
+import unittest
+import tempfile, os
+
+from tests.common import createTestDbTable, readTestDbTable
+from tol_data.enwiki.gen_img_data import getInputPageIdsFromDb, genData
+
+TEST_DUMP_FILE = os.path.join(os.path.dirname(__file__), 'sample_enwiki_pages_articles.xml.bz2')
+
+class TestGetInputPageIdsFromDb(unittest.TestCase):
+ def test_get(self):
+ with tempfile.TemporaryDirectory() as tempDir:
+ # Create temp tree-of-life db
+ dbFile = os.path.join(tempDir, 'data.db')
+ createTestDbTable(
+ dbFile,
+ 'CREATE TABLE wiki_ids (name TEXT PRIMARY KEY, id INT)',
+ 'INSERT INTO wiki_ids VALUES (?, ?)',
+ {
+ ('one', 1),
+ ('and another', 2),
+ }
+ )
+ # Run
+ pageIds = getInputPageIdsFromDb(dbFile)
+ # Check
+ self.assertEqual(pageIds, {1, 2})
+
+class TestGenData(unittest.TestCase):
+ def test_gen(self):
+ with tempfile.TemporaryDirectory() as tempDir:
+ # Create temp dump-index db
+ indexDb = os.path.join(tempDir, 'dump_index.db')
+ createTestDbTable(
+ indexDb,
+ 'CREATE TABLE offsets (title TEXT PRIMARY KEY, id INT UNIQUE, offset INT, next_offset INT)',
+ 'INSERT INTO offsets VALUES (?, ?, ?, ?)',
+ {
+ ('AccessibleComputing',10,0,-1),
+ ('AfghanistanHistory',13,0,-1),
+ ('Autism',25,0,-1),
+ }
+ )
+ # Run
+ imgDb = os.path.join(tempDir, 'imgData.db')
+ genData({10, 25}, TEST_DUMP_FILE, indexDb, imgDb)
+ # Check
+ self.assertEqual(
+ readTestDbTable(imgDb, 'SELECT page_id, img_name from page_imgs'),
+ {
+ (10, None),
+ (25, 'Autism-stacking-cans 2nd edit.jpg'),
+ }
+ )
+ # Run with updated page-ids set
+ genData({13, 10}, TEST_DUMP_FILE, indexDb, imgDb)
+ # Check
+ self.assertEqual(
+ readTestDbTable(imgDb, 'SELECT page_id, img_name from page_imgs'),
+ {
+ (10, None),
+ (13, None),
+ (25, 'Autism-stacking-cans 2nd edit.jpg'),
+ }
+ )