aboutsummaryrefslogtreecommitdiff
path: root/backend/data/dbpedia/genData.py
diff options
context:
space:
mode:
authorTerry Truong <terry06890@gmail.com>2022-06-10 13:24:18 +1000
committerTerry Truong <terry06890@gmail.com>2022-06-10 13:24:18 +1000
commit354466c05753b6bee0e154972b9a640c55533871 (patch)
tree9bb50c22f6c2d3eb5f12d73ef4c086f64ae710a7 /backend/data/dbpedia/genData.py
parentf8fa9ae3dd1571fa2912067b6eed010ea5d928e9 (diff)
Add script for merging eol and enwiki image sets
Diffstat (limited to 'backend/data/dbpedia/genData.py')
-rwxr-xr-xbackend/data/dbpedia/genData.py3
1 files changed, 2 insertions, 1 deletions
diff --git a/backend/data/dbpedia/genData.py b/backend/data/dbpedia/genData.py
index 7b48ac4..41c48a8 100755
--- a/backend/data/dbpedia/genData.py
+++ b/backend/data/dbpedia/genData.py
@@ -24,7 +24,8 @@ dbCur = dbCon.cursor()
# Read/store labels
print("Reading/storing label data")
dbCur.execute("CREATE TABLE labels (iri TEXT PRIMARY KEY, label TEXT)")
-dbCur.execute("CREATE INDEX labels_idx ON labels(label COLLATE NOCASE)")
+dbCur.execute("CREATE INDEX labels_idx ON labels(label)")
+dbCur.execute("CREATE INDEX labels_idx_nc ON labels(label COLLATE NOCASE)")
labelLineRegex = re.compile(r'<([^>]+)> <[^>]+> "((?:[^"]|\\")+)"@en \.\n')
lineNum = 0
with bz2.open(labelsFile, mode='rt') as file: