From e8e58a3bb9dc233dacf573973457c5b48d369503 Mon Sep 17 00:00:00 2001 From: Terry Truong Date: Tue, 30 Aug 2022 12:27:42 +1000 Subject: Add scripts for generating eol/enwiki mappings - New data sources: OTOL taxonomy, EOL provider-ids, Wikidata dump - Add 'node_iucn' table - Remove 'redirected' field from 'wiki_ids' table - Make 'eol_ids' table have 'name' as the primary key - Combine name-generation scripts into genNameData.py - Combine description-generation scripts into genDescData.py --- backend/tolData/dbpedia/genDescData.py | 1 + 1 file changed, 1 insertion(+) (limited to 'backend/tolData/dbpedia') diff --git a/backend/tolData/dbpedia/genDescData.py b/backend/tolData/dbpedia/genDescData.py index a23199d..8756a40 100755 --- a/backend/tolData/dbpedia/genDescData.py +++ b/backend/tolData/dbpedia/genDescData.py @@ -41,6 +41,7 @@ with bz2.open(labelsFile, mode='rt') as file: print("Reading/storing wiki page ids") dbCur.execute("CREATE TABLE ids (iri TEXT PRIMARY KEY, id INT)") +dbCur.execute("CREATE INDEX ids_idx ON ids(id)") idLineRegex = re.compile(r'<([^>]+)> <[^>]+> "(\d+)".*\n') lineNum = 0 with bz2.open(idsFile, mode='rt') as file: -- cgit v1.2.3