From daccbbd9c73a5292ea9d6746560d7009e5aa666d Mon Sep 17 00:00:00 2001 From: Terry Truong Date: Wed, 7 Sep 2022 11:37:37 +1000 Subject: Add python type annotations Also use consistent quote symbols Also use 'is None' instead of '== None' Also use 'if list1' instead of 'if len(list1) > 0' --- backend/tolData/enwiki/genPageviewData.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'backend/tolData/enwiki/genPageviewData.py') diff --git a/backend/tolData/enwiki/genPageviewData.py b/backend/tolData/enwiki/genPageviewData.py index f0901b2..6a5d79c 100755 --- a/backend/tolData/enwiki/genPageviewData.py +++ b/backend/tolData/enwiki/genPageviewData.py @@ -5,10 +5,10 @@ from collections import defaultdict import bz2, sqlite3 import argparse -parser = argparse.ArgumentParser(description=''' +parser = argparse.ArgumentParser(description=""" Reads through wikimedia files containing pageview counts, computes average counts, and adds them to a database -''', formatter_class=argparse.RawDescriptionHelpFormatter) +""", formatter_class=argparse.RawDescriptionHelpFormatter) args = parser.parse_args() pageviewFiles = glob.glob('./pageviews/pageviews-*-user.bz2') @@ -26,7 +26,7 @@ if os.path.exists(dbFile): # platform (eg: mobile-web), monthly view count, # hourly count string (eg: A1B2 means 1 view on day 1 and 2 views on day 2) namespaceRegex = re.compile(r'[a-zA-Z]+:') -titleToViews = defaultdict(int) +titleToViews: dict[str, int] = defaultdict(int) linePrefix = b'en.wikipedia ' for filename in pageviewFiles: print(f'Reading from {filename}') @@ -40,7 +40,7 @@ for filename in pageviewFiles: line = line[len(linePrefix):line.rfind(b' ')] # Remove first and last fields title = line[:line.find(b' ')].decode('utf-8') viewCount = int(line[line.rfind(b' ')+1:]) - if namespaceRegex.match(title) != None: + if namespaceRegex.match(title) is not None: continue # Update map titleToViews[title] += viewCount @@ -54,7 +54,7 @@ idbCur = idbCon.cursor() dbCur.execute('CREATE TABLE views (title TEXT PRIMARY KEY, id INT, views INT)') for title, views in titleToViews.items(): row = idbCur.execute('SELECT id FROM offsets WHERE title = ?', (title,)).fetchone() - if row != None: + if row is not None: wikiId = int(row[0]) dbCur.execute('INSERT INTO views VALUES (?, ?, ?)', (title, wikiId, math.floor(views / len(pageviewFiles)))) dbCon.commit() -- cgit v1.2.3