From daccbbd9c73a5292ea9d6746560d7009e5aa666d Mon Sep 17 00:00:00 2001 From: Terry Truong Date: Wed, 7 Sep 2022 11:37:37 +1000 Subject: Add python type annotations Also use consistent quote symbols Also use 'is None' instead of '== None' Also use 'if list1' instead of 'if len(list1) > 0' --- backend/tolData/enwiki/lookupPage.py | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'backend/tolData/enwiki/lookupPage.py') diff --git a/backend/tolData/enwiki/lookupPage.py b/backend/tolData/enwiki/lookupPage.py index e7b95f0..427aa7a 100755 --- a/backend/tolData/enwiki/lookupPage.py +++ b/backend/tolData/enwiki/lookupPage.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 -import sys, re +import sys import bz2 import sqlite3 @@ -12,24 +12,24 @@ db, and prints the corresponding . parser.add_argument("title", help="The title to look up") args = parser.parse_args() -dumpFile = "enwiki-20220501-pages-articles-multistream.xml.bz2" -indexDb = "dumpIndex.db" -pageTitle = args.title.replace("_", " ") +dumpFile = 'enwiki-20220501-pages-articles-multistream.xml.bz2' +indexDb = 'dumpIndex.db' +pageTitle = args.title.replace('_', ' ') -print("Looking up offset in index db") +print('Looking up offset in index db') dbCon = sqlite3.connect(indexDb) dbCur = dbCon.cursor() -query = "SELECT title, offset, next_offset FROM offsets WHERE title = ?" +query = 'SELECT title, offset, next_offset FROM offsets WHERE title = ?' row = dbCur.execute(query, (pageTitle,)).fetchone() -if row == None: - print("Title not found") +if row is None: + print('Title not found') sys.exit(0) _, pageOffset, endOffset = row dbCon.close() -print(f"Found chunk at offset {pageOffset}") +print(f'Found chunk at offset {pageOffset}') -print("Reading from wiki dump") -content = [] +print('Reading from wiki dump') +content: list[str] = [] with open(dumpFile, mode='rb') as file: # Get uncompressed chunk file.seek(pageOffset) @@ -42,25 +42,25 @@ with open(dumpFile, mode='rb') as file: pageNum = 0 while not found: line = lines[lineIdx] - if line.lstrip() == "": + if line.lstrip() == '': pageNum += 1 if pageNum > 100: - print("ERROR: Did not find title after 100 pages") + print('ERROR: Did not find title after 100 pages') break lineIdx += 1 titleLine = lines[lineIdx] if titleLine.lstrip() == '' + pageTitle + '': found = True - print(f"Found title in chunk as page {pageNum}") + print(f'Found title in chunk as page {pageNum}') content.append(line) content.append(titleLine) while True: lineIdx += 1 line = lines[lineIdx] content.append(line) - if line.lstrip() == "": + if line.lstrip() == '': break lineIdx += 1 -print("Content: ") -print("\n".join(content)) +print('Content: ') +print('\n'.join(content)) -- cgit v1.2.3