aboutsummaryrefslogtreecommitdiff
path: root/backend/data/genEolNameData.py
diff options
context:
space:
mode:
authorTerry Truong <terry06890@gmail.com>2022-05-01 17:09:04 +1000
committerTerry Truong <terry06890@gmail.com>2022-05-01 17:11:34 +1000
commit882cd54fa955b4fada612574ef13bdab1608f1de (patch)
tree7e12da668c92e334d4b921248da7ae4ad892c85c /backend/data/genEolNameData.py
parent391987ac31afeffee7ba5f82b31d095cd0c9f59f (diff)
Add fuzzy-search via sqlite extension spellfix1
Also add delay between client search-suggestion requests when search input undergoes multiple quick changes
Diffstat (limited to 'backend/data/genEolNameData.py')
-rwxr-xr-xbackend/data/genEolNameData.py11
1 files changed, 7 insertions, 4 deletions
diff --git a/backend/data/genEolNameData.py b/backend/data/genEolNameData.py
index 5070fd7..46e109a 100755
--- a/backend/data/genEolNameData.py
+++ b/backend/data/genEolNameData.py
@@ -1,7 +1,5 @@
#!/usr/bin/python3
-#
-
import sys, re
import csv, sqlite3
@@ -13,6 +11,9 @@ usageInfo += "\n"
usageInfo += "Expects a CSV header describing lines with format:\n"
usageInfo += " page_id, canonical_form, vernacular_string, language_code,\n"
usageInfo += " resource_name, is_preferred_by_resource, is_preferred_by_eol\n"
+if len(sys.argv) > 1:
+ print(usageInfo, file=sys.stderr)
+ sys.exit(1)
vnamesFile = "eol/vernacularNames.csv"
dbFile = "data.db"
@@ -54,6 +55,7 @@ with open(vnamesFile, newline="") as csvfile:
# Add to maps
updateMaps(name1, pid, True, False)
updateMaps(name2, pid, False, preferred)
+
# Open db connection
dbCon = sqlite3.connect(dbFile)
dbCur = dbCon.cursor()
@@ -62,9 +64,9 @@ dbCur.execute("CREATE TABLE names(name TEXT, alt_name TEXT, eol_id INT, pref_alt
# Iterate through 'nodes' table, resolving to canonical-names
usedPids = set()
unresolvedNodeNames = set()
-cur2 = dbCon.cursor()
+dbCur2 = dbCon.cursor()
iterationNum = 0
-for row in cur2.execute("SELECT name FROM nodes"):
+for row in dbCur2.execute("SELECT name FROM nodes"):
name = row[0]
iterationNum += 1
if iterationNum % 10000 == 0:
@@ -108,5 +110,6 @@ for name in unresolvedNodeNames:
for n in altNames:
isPreferred = 1 if (n == preferredName) else 0
dbCur.execute("INSERT INTO names VALUES (?, ?, ?, ?)", (name, n, pidToUse, isPreferred))
+# Close db
dbCon.commit()
dbCon.close()