diff options
| author | Terry Truong <terry06890@gmail.com> | 2022-09-07 11:37:37 +1000 |
|---|---|---|
| committer | Terry Truong <terry06890@gmail.com> | 2022-09-07 11:37:37 +1000 |
| commit | daccbbd9c73a5292ea9d6746560d7009e5aa666d (patch) | |
| tree | 9156bf011ab6302eb3c0d219d40587d594f51841 /backend/tolData/enwiki/genPageviewData.py | |
| parent | 1a7fe33edafa68a6f759d124bdeee673ff9cf9ff (diff) | |
Add python type annotations
Also use consistent quote symbols
Also use 'is None' instead of '== None'
Also use 'if list1' instead of 'if len(list1) > 0'
Diffstat (limited to 'backend/tolData/enwiki/genPageviewData.py')
| -rwxr-xr-x | backend/tolData/enwiki/genPageviewData.py | 10 |
1 files changed, 5 insertions, 5 deletions
diff --git a/backend/tolData/enwiki/genPageviewData.py b/backend/tolData/enwiki/genPageviewData.py index f0901b2..6a5d79c 100755 --- a/backend/tolData/enwiki/genPageviewData.py +++ b/backend/tolData/enwiki/genPageviewData.py @@ -5,10 +5,10 @@ from collections import defaultdict import bz2, sqlite3 import argparse -parser = argparse.ArgumentParser(description=''' +parser = argparse.ArgumentParser(description=""" Reads through wikimedia files containing pageview counts, computes average counts, and adds them to a database -''', formatter_class=argparse.RawDescriptionHelpFormatter) +""", formatter_class=argparse.RawDescriptionHelpFormatter) args = parser.parse_args() pageviewFiles = glob.glob('./pageviews/pageviews-*-user.bz2') @@ -26,7 +26,7 @@ if os.path.exists(dbFile): # platform (eg: mobile-web), monthly view count, # hourly count string (eg: A1B2 means 1 view on day 1 and 2 views on day 2) namespaceRegex = re.compile(r'[a-zA-Z]+:') -titleToViews = defaultdict(int) +titleToViews: dict[str, int] = defaultdict(int) linePrefix = b'en.wikipedia ' for filename in pageviewFiles: print(f'Reading from {filename}') @@ -40,7 +40,7 @@ for filename in pageviewFiles: line = line[len(linePrefix):line.rfind(b' ')] # Remove first and last fields title = line[:line.find(b' ')].decode('utf-8') viewCount = int(line[line.rfind(b' ')+1:]) - if namespaceRegex.match(title) != None: + if namespaceRegex.match(title) is not None: continue # Update map titleToViews[title] += viewCount @@ -54,7 +54,7 @@ idbCur = idbCon.cursor() dbCur.execute('CREATE TABLE views (title TEXT PRIMARY KEY, id INT, views INT)') for title, views in titleToViews.items(): row = idbCur.execute('SELECT id FROM offsets WHERE title = ?', (title,)).fetchone() - if row != None: + if row is not None: wikiId = int(row[0]) dbCur.execute('INSERT INTO views VALUES (?, ?, ?)', (title, wikiId, math.floor(views / len(pageviewFiles)))) dbCon.commit() |
