aboutsummaryrefslogtreecommitdiff
path: root/backend/data/enwiki/genData.py
diff options
context:
space:
mode:
authorTerry Truong <terry06890@gmail.com>2022-06-03 11:03:25 +1000
committerTerry Truong <terry06890@gmail.com>2022-06-03 11:03:25 +1000
commit811946498edc472d91e5ca8d41a4a0568e0d6e8f (patch)
tree3ec4f0950950bc7b3cff782f4f9c2b13f9b51cb3 /backend/data/enwiki/genData.py
parent515e02b9453f7740d7429ad7e11d913e32e5ffdb (diff)
Adjust enwiki dump-index-db and lookup script to include wiki-ids
Diffstat (limited to 'backend/data/enwiki/genData.py')
-rwxr-xr-xbackend/data/enwiki/genData.py2
1 files changed, 1 insertions, 1 deletions
diff --git a/backend/data/enwiki/genData.py b/backend/data/enwiki/genData.py
index f439d11..3e60bb5 100755
--- a/backend/data/enwiki/genData.py
+++ b/backend/data/enwiki/genData.py
@@ -18,7 +18,7 @@ enwikiDb = "enwikiData.db"
# Some regexps and functions for parsing wikitext
descLineRegex = re.compile("^ *[A-Z'\"]")
embeddedHtmlRegex = re.compile(r"<[^<]+/>|<!--[^<]+-->|<[^</]+>([^<]*|[^<]*<[^<]+>[^<]*)</[^<]+>|<[^<]+$")
- # Recognises a self-closing HTML tag, a tag 0 children, tag with 1 child with 0 children, or unclosed tag
+ # Recognises a self-closing HTML tag, a tag with 0 children, tag with 1 child with 0 children, or unclosed tag
convertTemplateRegex = re.compile(r"{{convert\|(\d[^|]*)\|(?:(to|-)\|(\d[^|]*)\|)?([a-z][^|}]*)[^}]*}}")
parensGrpRegex = re.compile(r" \([^()]*\)")
leftoverBraceRegex = re.compile(r"(?:{\||{{).*")