diff options
| author | Terry Truong <terry06890@gmail.com> | 2022-10-01 21:07:59 +1000 |
|---|---|---|
| committer | Terry Truong <terry06890@gmail.com> | 2022-10-01 21:07:59 +1000 |
| commit | a0b1e1a8a303504dd2cc743ab72937aee7f60f4d (patch) | |
| tree | 8dfe88637c4c1f7830bb0f089ff630f8582310b2 /backend/tests/enwiki/test_download_imgs.py | |
| parent | de9d6642ad2a57830f559fce22e36e3d68c5c70f (diff) | |
Add unit tests for Wikipedia extraction
Diffstat (limited to 'backend/tests/enwiki/test_download_imgs.py')
| -rw-r--r-- | backend/tests/enwiki/test_download_imgs.py | 54 |
1 files changed, 54 insertions, 0 deletions
diff --git a/backend/tests/enwiki/test_download_imgs.py b/backend/tests/enwiki/test_download_imgs.py new file mode 100644 index 0000000..823ac37 --- /dev/null +++ b/backend/tests/enwiki/test_download_imgs.py @@ -0,0 +1,54 @@ +import unittest +from unittest.mock import Mock, patch +import tempfile, os + +from tests.common import readTestFile, createTestDbTable +from hist_data.enwiki.download_imgs import downloadImgs + +class TestDownloadInfo(unittest.TestCase): + @patch('requests.get', autospec=True) + def test_download(self, requestsGetMock): + requestsGetMock.side_effect = lambda url, **kwargs: Mock(content=('img:' + url).encode()) + with tempfile.TemporaryDirectory() as tempDir: + # Create temp image-data db + imgDb = os.path.join(tempDir, 'img_data.db') + createTestDbTable( + imgDb, + 'CREATE TABLE page_imgs (page_id INT PRIMARY KEY, img_name TEXT)', + 'INSERT into page_imgs VALUES (?, ?)', + { + (1, 'one'), + (2, 'two'), + (3, 'three'), + (4, 'four'), + (5, 'five'), + (6, 'six'), + (7, 'seven'), + } + ) + createTestDbTable( + imgDb, + 'CREATE TABLE imgs (id INT PRIMARY KEY, name TEXT UNIQUE, ' \ + 'license TEXT, artist TEXT, credit TEXT, restrictions TEXT, url TEXT)', + 'INSERT INTO imgs VALUES (?, ?, ?, ?, ?, ?, ?)', + { + (11, 'one','cc-by','alice','anna','','https://upload.wikimedia.org/1.jpg'), + (12, 'two','???','bob','barbara','','https://upload.wikimedia.org/2.png'), + (13, 'three','cc-by-sa','clare','File:?','','https://upload.wikimedia.org/3.gif'), + (14, 'four','cc-by-sa 4.0','dave','dan','all','https://upload.wikimedia.org/4.jpeg'), + (15, 'five','cc0','eve','eric',None,'https://upload.wikimedia.org/5.png'), + (16, 'six','cc-by','','fred','','https://upload.wikimedia.org/6.png'), + } + ) + # Create temp output directory + with tempfile.TemporaryDirectory() as outDir: + # Run + downloadImgs(imgDb, outDir, 0) + # Check + expectedImgs = { + '11.jpg': 'img:https://upload.wikimedia.org/1.jpg', + '15.png': 'img:https://upload.wikimedia.org/5.png', + } + self.assertEqual(set(os.listdir(outDir)), set(expectedImgs.keys())) + for imgName, content in expectedImgs.items(): + self.assertEqual(readTestFile(os.path.join(outDir, imgName)), content) |
