aboutsummaryrefslogtreecommitdiff
path: root/backend/data
diff options
context:
space:
mode:
Diffstat (limited to 'backend/data')
-rw-r--r--backend/data/README.md3
-rwxr-xr-xbackend/data/genEolNameData.py19
2 files changed, 20 insertions, 2 deletions
diff --git a/backend/data/README.md b/backend/data/README.md
index 2a6344c..6ec629a 100644
--- a/backend/data/README.md
+++ b/backend/data/README.md
@@ -8,7 +8,8 @@ File Generation Process
2 Name Data for Search
1 Obtain data in eol/, as specified in it's README.
2 Run genEolNameData.py, which adds 'names' and 'eol\_ids' tables to data.db,
- using data in eol/vernacularNames.csv and the 'nodes' table.
+ using data in eol/vernacularNames.csv and the 'nodes' table, and possibly
+ genEolNameDataPickedIds.txt.
3 Image Data
1 In eol/, run downloadImgs.py to download EOL images into eol/imgsForReview/.
It uses data in eol/imagesList.db, and the 'eol\_ids' table.
diff --git a/backend/data/genEolNameData.py b/backend/data/genEolNameData.py
index 4945386..1b80dd8 100755
--- a/backend/data/genEolNameData.py
+++ b/backend/data/genEolNameData.py
@@ -1,6 +1,6 @@
#!/usr/bin/python3
-import sys, re
+import sys, re, os
import html, csv, sqlite3
usageInfo = f"usage: {sys.argv[0]}\n"
@@ -18,6 +18,7 @@ if len(sys.argv) > 1:
vnamesFile = "eol/vernacularNames.csv"
dbFile = "data.db"
NAMES_TO_SKIP = {"unknown", "unknown species", "unidentified species"}
+pickedIdsFile = "genEolNameDataPickedIds.txt"
# Read in vernacular-names data
# Note: Canonical-names may have multiple pids
@@ -62,6 +63,15 @@ with open(vnamesFile, newline="") as csvfile:
updateMaps(name1, pid, True, False)
if lang == "eng" and name2 != "":
updateMaps(name2, pid, False, preferred)
+# Check for manually-picked pids
+print("Checking for manually-picked pids")
+nameToPickedPid = {}
+if os.path.exists(pickedIdsFile):
+ with open(pickedIdsFile) as file:
+ for line in file:
+ (name, _, eolId) = line.rstrip().partition("|")
+ nameToPickedPid[name] = None if eolId == "" else int(eolId)
+print(f"Found {len(nameToPickedPid)}")
# Open db connection
dbCon = sqlite3.connect(dbFile)
dbCur = dbCon.cursor()
@@ -85,11 +95,18 @@ def addToDb(nodeName, pidToUse):
for n in altNames:
isPreferred = 1 if (n == preferredName) else 0
dbCur.execute("INSERT INTO names VALUES (?, ?, ?)", (nodeName, n, isPreferred))
+for name in nameToPickedPid: # Add manually-picked pids
+ pickedPid = nameToPickedPid[name]
+ usedPids.add(pickedPid)
+ if pickedPid != None:
+ addToDb(name, pickedPid)
iterationNum = 0
for (name,) in dbCur2.execute("SELECT name FROM nodes"):
iterationNum += 1
if iterationNum % 10000 == 0:
print(f"Loop 1 iteration {iterationNum}")
+ if name in nameToPickedPid:
+ continue
# If name matches a canonical-name, add alt-name entries to 'names' table
if name in canonicalNameToPids:
pidToUse = None