diff options
| author | Terry Truong <terry06890@gmail.com> | 2022-06-10 13:24:18 +1000 |
|---|---|---|
| committer | Terry Truong <terry06890@gmail.com> | 2022-06-10 13:24:18 +1000 |
| commit | 354466c05753b6bee0e154972b9a640c55533871 (patch) | |
| tree | 9bb50c22f6c2d3eb5f12d73ef4c086f64ae710a7 /backend/data/dbpedia/genData.py | |
| parent | f8fa9ae3dd1571fa2912067b6eed010ea5d928e9 (diff) | |
Add script for merging eol and enwiki image sets
Diffstat (limited to 'backend/data/dbpedia/genData.py')
| -rwxr-xr-x | backend/data/dbpedia/genData.py | 3 |
1 files changed, 2 insertions, 1 deletions
diff --git a/backend/data/dbpedia/genData.py b/backend/data/dbpedia/genData.py index 7b48ac4..41c48a8 100755 --- a/backend/data/dbpedia/genData.py +++ b/backend/data/dbpedia/genData.py @@ -24,7 +24,8 @@ dbCur = dbCon.cursor() # Read/store labels print("Reading/storing label data") dbCur.execute("CREATE TABLE labels (iri TEXT PRIMARY KEY, label TEXT)") -dbCur.execute("CREATE INDEX labels_idx ON labels(label COLLATE NOCASE)") +dbCur.execute("CREATE INDEX labels_idx ON labels(label)") +dbCur.execute("CREATE INDEX labels_idx_nc ON labels(label COLLATE NOCASE)") labelLineRegex = re.compile(r'<([^>]+)> <[^>]+> "((?:[^"]|\\")+)"@en \.\n') lineNum = 0 with bz2.open(labelsFile, mode='rt') as file: |
