aboutsummaryrefslogtreecommitdiff
path: root/backend/tolData/genDescData.py
diff options
context:
space:
mode:
Diffstat (limited to 'backend/tolData/genDescData.py')
-rwxr-xr-xbackend/tolData/genDescData.py19
1 files changed, 9 insertions, 10 deletions
diff --git a/backend/tolData/genDescData.py b/backend/tolData/genDescData.py
index 28971f4..bb1cbc8 100755
--- a/backend/tolData/genDescData.py
+++ b/backend/tolData/genDescData.py
@@ -1,13 +1,12 @@
#!/usr/bin/python3
-import sys, os, re
import sqlite3
import argparse
-parser = argparse.ArgumentParser(description='''
+parser = argparse.ArgumentParser(description="""
Maps nodes to short descriptions, using data from DBpedia and
Wikipedia, and stores results in the database.
-''', formatter_class=argparse.RawDescriptionHelpFormatter)
+""", formatter_class=argparse.RawDescriptionHelpFormatter)
args = parser.parse_args()
dbpediaDb = 'dbpedia/descData.db'
@@ -20,7 +19,7 @@ dbCur = dbCon.cursor()
dbCur.execute('CREATE TABLE descs (wiki_id INT PRIMARY KEY, desc TEXT, from_dbp INT)')
print('Getting node mappings')
-nodeToWikiId = {}
+nodeToWikiId: dict[str, int] = {}
for name, wikiId in dbCur.execute('SELECT name, id from wiki_ids'):
nodeToWikiId[name] = wikiId
@@ -28,7 +27,7 @@ print('Reading data from DBpedia')
dbpCon = sqlite3.connect(dbpediaDb)
dbpCur = dbpCon.cursor()
print('Getting node IRIs')
-nodeToIri = {}
+nodeToIri: dict[str, str] = {}
iterNum = 0
for name, wikiId in nodeToWikiId.items():
iterNum += 1
@@ -36,7 +35,7 @@ for name, wikiId in nodeToWikiId.items():
print(f'At iteration {iterNum}')
#
row = dbpCur.execute('SELECT iri FROM ids where id = ?', (wikiId,)).fetchone()
- if row != None:
+ if row is not None:
nodeToIri[name] = row[0]
print('Resolving redirects')
iterNum = 0
@@ -46,7 +45,7 @@ for name, iri in nodeToIri.items():
print(f'At iteration {iterNum}')
#
row = dbpCur.execute('SELECT target FROM redirects where iri = ?', (iri,)).fetchone()
- if row != None:
+ if row is not None:
nodeToIri[name] = row[0]
print('Adding descriptions')
iterNum = 0
@@ -56,7 +55,7 @@ for name, iri in nodeToIri.items():
print(f'At iteration {iterNum}')
#
row = dbpCur.execute('SELECT abstract FROM abstracts WHERE iri = ?', (iri,)).fetchone()
- if row != None:
+ if row is not None:
dbCur.execute('INSERT OR IGNORE INTO descs VALUES (?, ?, ?)', (nodeToWikiId[name], row[0], 1))
del nodeToWikiId[name]
dbpCon.close()
@@ -73,7 +72,7 @@ for name, wikiId in nodeToWikiId.items():
#
query = 'SELECT pages.id FROM redirects INNER JOIN pages ON redirects.target = pages.title WHERE redirects.id = ?'
row = enwikiCur.execute(query, (wikiId,)).fetchone()
- if row != None:
+ if row is not None:
nodeToWikiId[name] = row[0]
print('Adding descriptions')
iterNum = 0
@@ -83,7 +82,7 @@ for name, wikiId in nodeToWikiId.items():
print(f'At iteration {iterNum}')
#
row = enwikiCur.execute('SELECT desc FROM descs where id = ?', (wikiId,)).fetchone()
- if row != None:
+ if row is not None:
dbCur.execute('INSERT OR IGNORE INTO descs VALUES (?, ?, ?)', (wikiId, row[0], 0))
print('Closing databases')