diff options
| author | Terry Truong <terry06890@gmail.com> | 2023-01-02 14:51:53 +1100 |
|---|---|---|
| committer | Terry Truong <terry06890@gmail.com> | 2023-01-02 14:51:53 +1100 |
| commit | 56369bccd977ac726bef70895883e79da4e1edd8 (patch) | |
| tree | 67a894fe1579f2da150f0162ccbdc8a0a19ef9be /backend/hist_data/enwiki/gen_dump_index_db.py | |
| parent | 0e5e46cedaaeacf59cfd0f2e30c1ae6923466870 (diff) | |
Adjust wikidata event specifiers
Do minor refactors:
- Swap fmt=1 and fmt=2 in 'events' table
- Make documentation consistently use BC and AD
- import argparse at start of scripts
Diffstat (limited to 'backend/hist_data/enwiki/gen_dump_index_db.py')
| -rwxr-xr-x | backend/hist_data/enwiki/gen_dump_index_db.py | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/backend/hist_data/enwiki/gen_dump_index_db.py b/backend/hist_data/enwiki/gen_dump_index_db.py index 5778680..6be8bc5 100755 --- a/backend/hist_data/enwiki/gen_dump_index_db.py +++ b/backend/hist_data/enwiki/gen_dump_index_db.py @@ -1,11 +1,12 @@ #!/usr/bin/python3 """ -Adds data from the wiki dump index-file into a database +Adds data from the wiki-dump index-file into a database """ + +import argparse import sys, os, re -import bz2 -import sqlite3 +import bz2, sqlite3 INDEX_FILE = 'enwiki-20220501-pages-articles-multistream-index.txt.bz2' # Had about 22e6 lines DB_FILE = 'dump_index.db' @@ -53,7 +54,6 @@ def genData(indexFile: str, dbFile: str) -> None: dbCon.close() if __name__ == '__main__': - import argparse parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.parse_args() # |
