aboutsummaryrefslogtreecommitdiff
path: root/backend/data/genDbpData.py
diff options
context:
space:
mode:
authorTerry Truong <terry06890@gmail.com>2022-06-15 20:48:31 +1000
committerTerry Truong <terry06890@gmail.com>2022-06-15 20:48:31 +1000
commiteabec97d80e5f43e4710dd4c8327d0bc8519ab8c (patch)
tree0fd6d93f306b6dc7db2b9233ce63b65ebdc994bc /backend/data/genDbpData.py
parent3402f3d6e906afb500b95448d7d0b136b6b5ee86 (diff)
Separate node-wiki_id association from 'descs' table
Diffstat (limited to 'backend/data/genDbpData.py')
-rwxr-xr-xbackend/data/genDbpData.py9
1 files changed, 5 insertions, 4 deletions
diff --git a/backend/data/genDbpData.py b/backend/data/genDbpData.py
index 3755145..e921b6c 100755
--- a/backend/data/genDbpData.py
+++ b/backend/data/genDbpData.py
@@ -221,8 +221,8 @@ for (name, iri) in nodeToIri.items():
redirectingIriSet.add(name)
# Find descriptions, and add to db
print("Adding node description data")
-dbCur.execute("CREATE TABLE descs (name TEXT PRIMARY KEY, desc TEXT, redirected INT, wiki_id INT, from_dbp INT)")
-dbCur.execute("CREATE INDEX descs_id_idx ON descs(wiki_id)") # wiki_id intentionally left non-unique
+dbCur.execute("CREATE TABLE wiki_ids (name TEXT PRIMARY KEY, id INT, redirected INT)")
+dbCur.execute("CREATE TABLE descs (wiki_id INT PRIMARY KEY, desc TEXT, from_dbp INT)")
iterNum = 0
for (name, iri) in nodeToIri.items():
iterNum += 1
@@ -232,8 +232,9 @@ for (name, iri) in nodeToIri.items():
query = "SELECT abstract, id FROM abstracts INNER JOIN ids ON abstracts.iri = ids.iri WHERE ids.iri = ?"
row = dbpCur.execute(query, (iri,)).fetchone()
if row != None:
- dbCur.execute("INSERT INTO descs VALUES (?, ?, ?, ?, ?)",
- (name, row[0], 1 if name in redirectingIriSet else 0, row[1], 1))
+ (desc, wikiId) = row
+ dbCur.execute("INSERT INTO wiki_ids VALUES (?, ?, ?)", (name, wikiId, 1 if name in redirectingIriSet else 0))
+ dbCur.execute("INSERT OR IGNORE INTO descs VALUES (?, ?, ?)", (wikiId, desc, 1))
# Close dbs
dbCon.commit()
dbCon.close()