aboutsummaryrefslogtreecommitdiff
path: root/backend/hist_data/gen_pop_data.py
diff options
context:
space:
mode:
authorTerry Truong <terry06890@gmail.com>2023-01-21 12:21:03 +1100
committerTerry Truong <terry06890@gmail.com>2023-01-21 12:32:01 +1100
commit0a9b2c2e5eca8a04e37fbdd423379882863237c2 (patch)
tree1812bdb6bb13e4f76fdd7ef04075b291f775c213 /backend/hist_data/gen_pop_data.py
parent8321e2f92dbc073b8f1de87895d6620a2021b22e (diff)
Adjust backend coding style
Increase line spacing, add section comments, etc
Diffstat (limited to 'backend/hist_data/gen_pop_data.py')
-rwxr-xr-xbackend/hist_data/gen_pop_data.py17
1 files changed, 9 insertions, 8 deletions
diff --git a/backend/hist_data/gen_pop_data.py b/backend/hist_data/gen_pop_data.py
index aaaf69d..8d50b6b 100755
--- a/backend/hist_data/gen_pop_data.py
+++ b/backend/hist_data/gen_pop_data.py
@@ -4,7 +4,9 @@
Adds Wikipedia page view info to the database as popularity values
"""
-import os, sqlite3
+import argparse
+import os
+import sqlite3
PAGEVIEWS_DB = os.path.join('enwiki', 'pageview_data.db')
DB_FILE = 'data.db'
@@ -12,12 +14,12 @@ DB_FILE = 'data.db'
def genData(pageviewsDb: str, dbFile: str) -> None:
dbCon = sqlite3.connect(dbFile)
dbCur = dbCon.cursor()
- #
+
print('Getting event data')
titleToId: dict[str, int] = {}
for eventId, title in dbCur.execute('SELECT id, title FROM events'):
titleToId[title] = eventId
- #
+
print('Getting view counts')
pdbCon = sqlite3.connect(pageviewsDb)
pdbCur = pdbCon.cursor()
@@ -27,24 +29,23 @@ def genData(pageviewsDb: str, dbFile: str) -> None:
iterNum += 1
if iterNum % 1e6 == 0:
print(f'At iteration {iterNum}')
- #
+
if title not in titleToId:
continue
titleToViews[title] = views
pdbCon.close()
- #
+
print(f'Result: {len(titleToViews)} out of {len(titleToId)}')
dbCur.execute('CREATE TABLE pop (id INT PRIMARY KEY, pop INT)')
dbCur.execute('CREATE INDEX pop_idx ON pop(pop)')
for title, views in titleToViews.items():
dbCur.execute('INSERT INTO pop VALUES (?, ?)', (titleToId[title], views))
- #
+
dbCon.commit()
dbCon.close()
if __name__ == '__main__':
- import argparse
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
args = parser.parse_args()
- #
+
genData(PAGEVIEWS_DB, DB_FILE)