aboutsummaryrefslogtreecommitdiff
path: root/backend/tolData/genPopData.py
diff options
context:
space:
mode:
Diffstat (limited to 'backend/tolData/genPopData.py')
-rwxr-xr-xbackend/tolData/genPopData.py40
1 files changed, 40 insertions, 0 deletions
diff --git a/backend/tolData/genPopData.py b/backend/tolData/genPopData.py
new file mode 100755
index 0000000..9c5382c
--- /dev/null
+++ b/backend/tolData/genPopData.py
@@ -0,0 +1,40 @@
+#!/usr/bin/python3
+
+import sys
+import sqlite3
+
+import argparse
+parser = argparse.ArgumentParser(description='''
+Reads enwiki page view info from a database, and stores it
+as node popularity values in the database.
+''', formatter_class=argparse.RawDescriptionHelpFormatter)
+args = parser.parse_args()
+
+pageviewsDb = 'enwiki/pageviewData.db'
+dbFile = 'data.db'
+
+dbCon = sqlite3.connect(dbFile)
+dbCur = dbCon.cursor()
+
+print('Getting view counts')
+pdbCon = sqlite3.connect(pageviewsDb)
+pdbCur = pdbCon.cursor()
+nodeToViews = {} # Maps node names to counts
+iterNum = 0
+for wikiId, views in pdbCur.execute('SELECT id, views from views'):
+ iterNum += 1
+ if iterNum % 1e4 == 0:
+ print(f'At iteration {iterNum}') # Reached 1.6e6
+ #
+ row = dbCur.execute('SELECT name FROM wiki_ids WHERE id = ?', (wikiId,)).fetchone()
+ if row != None:
+ nodeToViews[row[0]] = views
+pdbCon.close()
+
+print(f'Writing {len(nodeToViews)} entries to db')
+dbCur.execute('CREATE TABLE node_pop (name TEXT PRIMARY KEY, pop INT)')
+for nodeName, views in nodeToViews.items():
+ dbCur.execute('INSERT INTO node_pop VALUES (?, ?)', (nodeName, views))
+
+dbCon.commit()
+dbCon.close()