diff options
| author | Terry Truong <terry06890@gmail.com> | 2023-02-05 13:55:00 +1100 |
|---|---|---|
| committer | Terry Truong <terry06890@gmail.com> | 2023-02-05 13:55:00 +1100 |
| commit | ff638e269d439c853b5182b68ff10777f12597f1 (patch) | |
| tree | 2cc62068a501884821969b6ebeba3686df8f4044 /backend/hist_data/enwiki/gen_img_data.py | |
| parent | cc79c17fbc05bddc8b08f2734e721bc241123a4e (diff) | |
Use relative imports between data generation scriptspackage-imports
Avoids the need for code that modifies sys.path, but requires
running of the scripts using 'python -m' with backend/ as cwd.
Also expects constants like DB_FILE to be non-relative, due to
running from backend/.
Diffstat (limited to 'backend/hist_data/enwiki/gen_img_data.py')
| -rwxr-xr-x | backend/hist_data/enwiki/gen_img_data.py | 9 |
1 files changed, 5 insertions, 4 deletions
diff --git a/backend/hist_data/enwiki/gen_img_data.py b/backend/hist_data/enwiki/gen_img_data.py index 05df63d..044e5a0 100755 --- a/backend/hist_data/enwiki/gen_img_data.py +++ b/backend/hist_data/enwiki/gen_img_data.py @@ -16,10 +16,11 @@ import html import urllib.parse import sqlite3 -DUMP_FILE = 'enwiki-20220501-pages-articles-multistream.xml.bz2' -INDEX_DB = 'dump_index.db' -IMG_DB = 'img_data.db' # The database to create -DB_FILE = os.path.join('..', 'data.db') +ENWIKI_DIR = os.path.dirname(os.path.realpath(__file__)) +DUMP_FILE = os.path.join(ENWIKI_DIR, 'enwiki-20220501-pages-articles-multistream.xml.bz2') +INDEX_DB = os.path.join(ENWIKI_DIR, 'dump_index.db') +IMG_DB = os.path.join(ENWIKI_DIR, 'img_data.db') # The database to create +DB_FILE = os.path.join(ENWIKI_DIR, '..', 'data.db') ID_LINE_REGEX = re.compile(r'<id>(.*)</id>') IMG_LINE_REGEX = re.compile(r'.*\| *image *= *([^|]*)') |
