From ff638e269d439c853b5182b68ff10777f12597f1 Mon Sep 17 00:00:00 2001 From: Terry Truong Date: Sun, 5 Feb 2023 13:55:00 +1100 Subject: Use relative imports between data generation scripts Avoids the need for code that modifies sys.path, but requires running of the scripts using 'python -m' with backend/ as cwd. Also expects constants like DB_FILE to be non-relative, due to running from backend/. --- backend/hist_data/enwiki/gen_desc_data.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'backend/hist_data/enwiki/gen_desc_data.py') diff --git a/backend/hist_data/enwiki/gen_desc_data.py b/backend/hist_data/enwiki/gen_desc_data.py index 194afe8..b866c1e 100755 --- a/backend/hist_data/enwiki/gen_desc_data.py +++ b/backend/hist_data/enwiki/gen_desc_data.py @@ -18,8 +18,9 @@ import html import mwxml import mwparserfromhell -DUMP_FILE = 'enwiki-20220501-pages-articles-multistream.xml.bz2' # Had about 22e6 pages -DB_FILE = 'desc_data.db' +ENWIKI_DIR = os.path.dirname(os.path.realpath(__file__)) +DUMP_FILE = os.path.join(ENWIKI_DIR, 'enwiki-20220501-pages-articles-multistream.xml.bz2') # Had about 22e6 pages +DB_FILE = os.path.join(ENWIKI_DIR, 'desc_data.db') DESC_LINE_REGEX = re.compile('^ *[A-Z\'"]') EMBEDDED_HTML_REGEX = re.compile(r'<[^<]+/>||<[^([^<]*|[^<]*<[^<]+>[^<]*)|<[^<]+$') -- cgit v1.2.3