From 0a9b2c2e5eca8a04e37fbdd423379882863237c2 Mon Sep 17 00:00:00 2001 From: Terry Truong Date: Sat, 21 Jan 2023 12:21:03 +1100 Subject: Adjust backend coding style Increase line spacing, add section comments, etc --- backend/hist_data/gen_desc_data.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'backend/hist_data/gen_desc_data.py') diff --git a/backend/hist_data/gen_desc_data.py b/backend/hist_data/gen_desc_data.py index 6c9fee2..bcd8870 100755 --- a/backend/hist_data/gen_desc_data.py +++ b/backend/hist_data/gen_desc_data.py @@ -5,7 +5,8 @@ Maps events to short descriptions from Wikipedia, and stores them in the databas """ import argparse -import os, sqlite3 +import os +import sqlite3 ENWIKI_DB = os.path.join('enwiki', 'desc_data.db') DB_FILE = 'data.db' @@ -15,12 +16,12 @@ def genData(enwikiDb: str, dbFile: str) -> None: dbCon = sqlite3.connect(dbFile) dbCur = dbCon.cursor() dbCur.execute('CREATE TABLE descs (id INT PRIMARY KEY, wiki_id INT, desc TEXT)') - # + print('Getting events') titleToId: dict[str, int] = {} for eventId, title in dbCur.execute('SELECT id, title FROM events'): titleToId[title] = eventId - # + print('Getting Wikipedia descriptions') enwikiCon = sqlite3.connect(enwikiDb) enwikiCur = enwikiCon.cursor() @@ -29,11 +30,13 @@ def genData(enwikiDb: str, dbFile: str) -> None: iterNum += 1 if iterNum % 1e4 == 0: print(f'At iteration {iterNum}') + # Get wiki ID row = enwikiCur.execute('SELECT id FROM pages WHERE title = ?', (title,)).fetchone() if row is None: continue wikiId = row[0] + # Check for redirect wikiIdToGet = wikiId query = \ @@ -41,12 +44,13 @@ def genData(enwikiDb: str, dbFile: str) -> None: row = enwikiCur.execute(query, (wikiId,)).fetchone() if row is not None: wikiIdToGet = row[0] + # Get desc row = enwikiCur.execute('SELECT desc FROM descs where id = ?', (wikiIdToGet,)).fetchone() if row is None: continue dbCur.execute('INSERT INTO descs VALUES (?, ?, ?)', (eventId, wikiId, row[0])) - # + print('Closing databases') dbCon.commit() dbCon.close() @@ -54,5 +58,5 @@ def genData(enwikiDb: str, dbFile: str) -> None: if __name__ == '__main__': parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) args = parser.parse_args() - # + genData(ENWIKI_DB, DB_FILE) -- cgit v1.2.3