From 811946498edc472d91e5ca8d41a4a0568e0d6e8f Mon Sep 17 00:00:00 2001 From: Terry Truong Date: Fri, 3 Jun 2022 11:03:25 +1000 Subject: Adjust enwiki dump-index-db and lookup script to include wiki-ids --- backend/data/enwiki/genData.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'backend/data/enwiki/genData.py') diff --git a/backend/data/enwiki/genData.py b/backend/data/enwiki/genData.py index f439d11..3e60bb5 100755 --- a/backend/data/enwiki/genData.py +++ b/backend/data/enwiki/genData.py @@ -18,7 +18,7 @@ enwikiDb = "enwikiData.db" # Some regexps and functions for parsing wikitext descLineRegex = re.compile("^ *[A-Z'\"]") embeddedHtmlRegex = re.compile(r"<[^<]+/>||<[^([^<]*|[^<]*<[^<]+>[^<]*)|<[^<]+$") - # Recognises a self-closing HTML tag, a tag 0 children, tag with 1 child with 0 children, or unclosed tag + # Recognises a self-closing HTML tag, a tag with 0 children, tag with 1 child with 0 children, or unclosed tag convertTemplateRegex = re.compile(r"{{convert\|(\d[^|]*)\|(?:(to|-)\|(\d[^|]*)\|)?([a-z][^|}]*)[^}]*}}") parensGrpRegex = re.compile(r" \([^()]*\)") leftoverBraceRegex = re.compile(r"(?:{\||{{).*") -- cgit v1.2.3