From 5de5fb93e50fe9006221b30ac4a66f1be0db82e7 Mon Sep 17 00:00:00 2001 From: Terry Truong Date: Sun, 11 Sep 2022 14:55:42 +1000 Subject: Add backend unit tests - Add unit testing code in backend/tests/ - Change to snake-case for script/file/directory names - Use os.path.join() instead of '/' - Refactor script code into function defs and a main-guard - Make global vars all-caps Some fixes: - For getting descriptions, some wiki redirects weren't properly resolved - Linked images were sub-optimally propagated - Generation of reduced trees assumed a wiki-id association implied a description - Tilo.py had potential null dereferences by not always using a reduced node set - EOL image downloading didn't properly wait for all threads to end when finishing --- backend/tolData/eol/genImagesListDb.py | 34 ---------------------------------- 1 file changed, 34 deletions(-) delete mode 100755 backend/tolData/eol/genImagesListDb.py (limited to 'backend/tolData/eol/genImagesListDb.py') diff --git a/backend/tolData/eol/genImagesListDb.py b/backend/tolData/eol/genImagesListDb.py deleted file mode 100755 index 808292d..0000000 --- a/backend/tolData/eol/genImagesListDb.py +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/python3 - -import os, re -import csv -import sqlite3 - -import argparse -parser = argparse.ArgumentParser(description=""" -Generates a sqlite db from a directory of CSV files holding EOL image data -""", formatter_class=argparse.RawDescriptionHelpFormatter) -parser.parse_args() - -imagesListDir = 'imagesList/' -dbFile = 'imagesList.db' - -print('Creating database') -dbCon = sqlite3.connect(dbFile) -dbCur = dbCon.cursor() -dbCur.execute('CREATE TABLE images' \ - ' (content_id INT PRIMARY KEY, page_id INT, source_url TEXT, copy_url TEXT, license TEXT, copyright_owner TEXT)') -dbCur.execute('CREATE INDEX images_pid_idx ON images(page_id)') -print('Reading CSV files') -csvFilenames = os.listdir(imagesListDir) -for filename in csvFilenames: - print(f'Processing {imagesListDir}{filename}') - with open(imagesListDir + filename, newline='') as file: - for contentId, pageId, sourceUrl, copyUrl, license, owner in csv.reader(file): - if re.match(r'^[a-zA-Z]', contentId): # Skip header line - continue - dbCur.execute('INSERT INTO images VALUES (?, ?, ?, ?, ?, ?)', - (int(contentId), int(pageId), sourceUrl, copyUrl, license, owner)) -print('Closing database') -dbCon.commit() -dbCon.close() -- cgit v1.2.3