1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
|
import unittest
import tempfile, os
from tests.common import createTestBz2, readTestDbTable
from hist_data.enwiki.gen_dump_index_db import genData
def runGenData(indexFileContents: str):
""" Sets up index file to be read by genData(), runs it, reads the output database, and returns offset info. """
with tempfile.TemporaryDirectory() as tempDir:
# Create temp index file
indexFile = os.path.join(tempDir, 'index.txt.bz2')
createTestBz2(indexFile, indexFileContents)
# Run
dbFile = os.path.join(tempDir, 'data.db')
genData(indexFile, dbFile)
# Read db
return readTestDbTable(dbFile, 'SELECT title, id, offset, next_offset FROM offsets')
class TestGenData(unittest.TestCase):
def setUp(self):
self.maxDiff = None # Remove output-diff size limit
def test_index_file(self):
indexFileContents = (
'100:10:apple\n'
'100:11:ant\n'
'300:99:banana ice-cream\n'
'1000:2030:Custard!\n'
)
offsetsMap = runGenData(indexFileContents)
self.assertEqual(offsetsMap, {
('apple', 10, 100, 300),
('ant', 11, 100, 300),
('banana ice-cream', 99, 300, 1000),
('Custard!', 2030, 1000, -1),
})
def test_emp_index(self):
offsetsMap = runGenData('')
self.assertEqual(offsetsMap, set())
pass
|