diff options
| author | Terry Truong <terry06890@gmail.com> | 2022-05-01 17:09:04 +1000 |
|---|---|---|
| committer | Terry Truong <terry06890@gmail.com> | 2022-05-01 17:11:34 +1000 |
| commit | 882cd54fa955b4fada612574ef13bdab1608f1de (patch) | |
| tree | 7e12da668c92e334d4b921248da7ae4ad892c85c /backend/data/genEolNameData.py | |
| parent | 391987ac31afeffee7ba5f82b31d095cd0c9f59f (diff) | |
Add fuzzy-search via sqlite extension spellfix1
Also add delay between client search-suggestion requests when search
input undergoes multiple quick changes
Diffstat (limited to 'backend/data/genEolNameData.py')
| -rwxr-xr-x | backend/data/genEolNameData.py | 11 |
1 files changed, 7 insertions, 4 deletions
diff --git a/backend/data/genEolNameData.py b/backend/data/genEolNameData.py index 5070fd7..46e109a 100755 --- a/backend/data/genEolNameData.py +++ b/backend/data/genEolNameData.py @@ -1,7 +1,5 @@ #!/usr/bin/python3 -# - import sys, re import csv, sqlite3 @@ -13,6 +11,9 @@ usageInfo += "\n" usageInfo += "Expects a CSV header describing lines with format:\n" usageInfo += " page_id, canonical_form, vernacular_string, language_code,\n" usageInfo += " resource_name, is_preferred_by_resource, is_preferred_by_eol\n" +if len(sys.argv) > 1: + print(usageInfo, file=sys.stderr) + sys.exit(1) vnamesFile = "eol/vernacularNames.csv" dbFile = "data.db" @@ -54,6 +55,7 @@ with open(vnamesFile, newline="") as csvfile: # Add to maps updateMaps(name1, pid, True, False) updateMaps(name2, pid, False, preferred) + # Open db connection dbCon = sqlite3.connect(dbFile) dbCur = dbCon.cursor() @@ -62,9 +64,9 @@ dbCur.execute("CREATE TABLE names(name TEXT, alt_name TEXT, eol_id INT, pref_alt # Iterate through 'nodes' table, resolving to canonical-names usedPids = set() unresolvedNodeNames = set() -cur2 = dbCon.cursor() +dbCur2 = dbCon.cursor() iterationNum = 0 -for row in cur2.execute("SELECT name FROM nodes"): +for row in dbCur2.execute("SELECT name FROM nodes"): name = row[0] iterationNum += 1 if iterationNum % 10000 == 0: @@ -108,5 +110,6 @@ for name in unresolvedNodeNames: for n in altNames: isPreferred = 1 if (n == preferredName) else 0 dbCur.execute("INSERT INTO names VALUES (?, ?, ?, ?)", (name, n, pidToUse, isPreferred)) +# Close db dbCon.commit() dbCon.close() |
