aboutsummaryrefslogtreecommitdiff
path: root/backend/tests/enwiki/test_gen_dump_index_db.py
diff options
context:
space:
mode:
Diffstat (limited to 'backend/tests/enwiki/test_gen_dump_index_db.py')
-rw-r--r--backend/tests/enwiki/test_gen_dump_index_db.py39
1 files changed, 39 insertions, 0 deletions
diff --git a/backend/tests/enwiki/test_gen_dump_index_db.py b/backend/tests/enwiki/test_gen_dump_index_db.py
new file mode 100644
index 0000000..e0715f3
--- /dev/null
+++ b/backend/tests/enwiki/test_gen_dump_index_db.py
@@ -0,0 +1,39 @@
+import unittest
+import tempfile, os
+
+from tests.common import createTestBz2, readTestDbTable
+from tol_data.enwiki.gen_dump_index_db import genData
+
+def runGenData(indexFileContents: str):
+ """ Sets up index file to be read by genData(), runs it, reads the output database, and returns offset info. """
+ with tempfile.TemporaryDirectory() as tempDir:
+ # Create temp index file
+ indexFile = os.path.join(tempDir, 'index.txt.bz2')
+ createTestBz2(indexFile, indexFileContents)
+ # Run
+ dbFile = os.path.join(tempDir, 'data.db')
+ genData(indexFile, dbFile)
+ # Read db
+ return readTestDbTable(dbFile, 'SELECT title, id, offset, next_offset FROM offsets')
+
+class TestGenData(unittest.TestCase):
+ def setUp(self):
+ self.maxDiff = None # Remove output-diff size limit
+ def test_index_file(self):
+ indexFileContents = (
+ '100:10:apple\n'
+ '100:11:ant\n'
+ '300:99:banana ice-cream\n'
+ '1000:2030:Custard!\n'
+ )
+ offsetsMap = runGenData(indexFileContents)
+ self.assertEqual(offsetsMap, {
+ ('apple', 10, 100, 300),
+ ('ant', 11, 100, 300),
+ ('banana ice-cream', 99, 300, 1000),
+ ('Custard!', 2030, 1000, -1),
+ })
+ def test_emp_index(self):
+ offsetsMap = runGenData('')
+ self.assertEqual(offsetsMap, set())
+ pass