aboutsummaryrefslogtreecommitdiff
path: root/backend/tol_data/enwiki/gen_pageview_data.py
diff options
context:
space:
mode:
authorTerry Truong <terry06890@gmail.com>2023-01-23 18:00:43 +1100
committerTerry Truong <terry06890@gmail.com>2023-01-23 18:01:13 +1100
commit94a8ad9b067e5a2c442ce47ce72d1a53eb444160 (patch)
tree2056373ee56b8b2f8269ac3e94d40f8f0e6eec0d /backend/tol_data/enwiki/gen_pageview_data.py
parent796c4e5660b1006575b8f2af9d99e2ce592c767a (diff)
Clean up some docs and naming inconsistencies
Diffstat (limited to 'backend/tol_data/enwiki/gen_pageview_data.py')
-rwxr-xr-xbackend/tol_data/enwiki/gen_pageview_data.py3
1 files changed, 2 insertions, 1 deletions
diff --git a/backend/tol_data/enwiki/gen_pageview_data.py b/backend/tol_data/enwiki/gen_pageview_data.py
index ce3b674..8aee1cc 100755
--- a/backend/tol_data/enwiki/gen_pageview_data.py
+++ b/backend/tol_data/enwiki/gen_pageview_data.py
@@ -12,7 +12,7 @@ from collections import defaultdict
import bz2, sqlite3
PAGEVIEW_FILES = glob.glob('./pageviews/pageviews-*-user.bz2')
-DUMP_INDEX_DB = 'dumpIndex.db'
+DUMP_INDEX_DB = 'dump_index.db'
DB_FILE = 'pageview_data.db'
def genData(pageviewFiles: list[str], dumpIndexDb: str, dbFile: str) -> None:
@@ -42,6 +42,7 @@ def genData(pageviewFiles: list[str], dumpIndexDb: str, dbFile: str) -> None:
if namespaceRegex.match(title) is not None:
continue
# Update map
+ title = title.replace('_', ' ')
titleToViews[title] += viewCount
print(f'Found {len(titleToViews)} titles')
#