aboutsummaryrefslogtreecommitdiff
path: root/backend/tolData/enwiki/genPageviewData.py
diff options
context:
space:
mode:
Diffstat (limited to 'backend/tolData/enwiki/genPageviewData.py')
-rwxr-xr-xbackend/tolData/enwiki/genPageviewData.py10
1 files changed, 5 insertions, 5 deletions
diff --git a/backend/tolData/enwiki/genPageviewData.py b/backend/tolData/enwiki/genPageviewData.py
index f0901b2..6a5d79c 100755
--- a/backend/tolData/enwiki/genPageviewData.py
+++ b/backend/tolData/enwiki/genPageviewData.py
@@ -5,10 +5,10 @@ from collections import defaultdict
import bz2, sqlite3
import argparse
-parser = argparse.ArgumentParser(description='''
+parser = argparse.ArgumentParser(description="""
Reads through wikimedia files containing pageview counts,
computes average counts, and adds them to a database
-''', formatter_class=argparse.RawDescriptionHelpFormatter)
+""", formatter_class=argparse.RawDescriptionHelpFormatter)
args = parser.parse_args()
pageviewFiles = glob.glob('./pageviews/pageviews-*-user.bz2')
@@ -26,7 +26,7 @@ if os.path.exists(dbFile):
# platform (eg: mobile-web), monthly view count,
# hourly count string (eg: A1B2 means 1 view on day 1 and 2 views on day 2)
namespaceRegex = re.compile(r'[a-zA-Z]+:')
-titleToViews = defaultdict(int)
+titleToViews: dict[str, int] = defaultdict(int)
linePrefix = b'en.wikipedia '
for filename in pageviewFiles:
print(f'Reading from {filename}')
@@ -40,7 +40,7 @@ for filename in pageviewFiles:
line = line[len(linePrefix):line.rfind(b' ')] # Remove first and last fields
title = line[:line.find(b' ')].decode('utf-8')
viewCount = int(line[line.rfind(b' ')+1:])
- if namespaceRegex.match(title) != None:
+ if namespaceRegex.match(title) is not None:
continue
# Update map
titleToViews[title] += viewCount
@@ -54,7 +54,7 @@ idbCur = idbCon.cursor()
dbCur.execute('CREATE TABLE views (title TEXT PRIMARY KEY, id INT, views INT)')
for title, views in titleToViews.items():
row = idbCur.execute('SELECT id FROM offsets WHERE title = ?', (title,)).fetchone()
- if row != None:
+ if row is not None:
wikiId = int(row[0])
dbCur.execute('INSERT INTO views VALUES (?, ?, ?)', (title, wikiId, math.floor(views / len(pageviewFiles))))
dbCon.commit()