aboutsummaryrefslogtreecommitdiff
path: root/backend/tests/enwiki/test_gen_dump_index_db.py
blob: 5281911704c4ac8514e6482a26b5868fa017e4ad (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import unittest
import tempfile
import os

from tests.common import createTestBz2, readTestDbTable
from hist_data.enwiki.gen_dump_index_db import genData

def runGenData(indexFileContents: str):
	""" Sets up index file to be read by genData(), runs it, reads the output database, and returns offset info. """
	with tempfile.TemporaryDirectory() as tempDir:
		# Create temp index file
		indexFile = os.path.join(tempDir, 'index.txt.bz2')
		createTestBz2(indexFile, indexFileContents)

		# Run
		dbFile = os.path.join(tempDir, 'data.db')
		genData(indexFile, dbFile)

		# Read db
		return readTestDbTable(dbFile, 'SELECT title, id, offset, next_offset FROM offsets')

class TestGenData(unittest.TestCase):
	def setUp(self):
		self.maxDiff = None # Remove output-diff size limit

	def test_index_file(self):
		indexFileContents = (
			'100:10:apple\n'
			'100:11:ant\n'
			'300:99:banana ice-cream\n'
			'1000:2030:Custard!\n'
		)
		offsetsMap = runGenData(indexFileContents)
		self.assertEqual(offsetsMap, {
			('apple', 10, 100, 300),
			('ant', 11, 100, 300),
			('banana ice-cream', 99, 300, 1000),
			('Custard!', 2030, 1000, -1),
		})

	def test_emp_index(self):
		offsetsMap = runGenData('')
		self.assertEqual(offsetsMap, set())
		pass