aboutsummaryrefslogtreecommitdiff
path: root/backend/tests/enwiki/test_gen_desc_data.py
blob: f6d4250f92c5bb958bf81ed4376082435dd44f15 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import unittest
import os, tempfile

from tests.common import readTestDbTable
from hist_data.enwiki.gen_desc_data import genData

TEST_DUMP_FILE = os.path.join(os.path.dirname(__file__), 'sample_enwiki_pages_articles.xml.bz2')

class TestGenData(unittest.TestCase):
	def test_gen(self):
		with tempfile.TemporaryDirectory() as tempDir:
			# Run
			dbFile = os.path.join(tempDir, 'descData.db')
			genData(TEST_DUMP_FILE, dbFile)
			# Check
			self.assertEqual(
				readTestDbTable(dbFile, 'SELECT id, title FROM pages'),
				{
					(10, 'AccessibleComputing'),
					(13, 'AfghanistanHistory'),
					(25, 'Autism'),
				}
			)
			self.assertEqual(
				readTestDbTable(dbFile, 'SELECT id, target FROM redirects'),
				{
					(10, 'Computer accessibility'),
					(13, 'History of Afghanistan'),
				}
			)
			descsRows = readTestDbTable(dbFile, 'SELECT id, desc FROM descs')
			expectedDescPrefixes = {
				25: 'Kanner autism, or classic autism, is a neurodevelopmental disorder',
			}
			self.assertEqual({row[0] for row in descsRows}, set(expectedDescPrefixes.keys()))
			for id, desc in descsRows:
				self.assertTrue(id in expectedDescPrefixes and desc.startswith(expectedDescPrefixes[id]))