diff options
| author | Terry Truong <terry06890@gmail.com> | 2022-04-27 00:29:20 +1000 |
|---|---|---|
| committer | Terry Truong <terry06890@gmail.com> | 2022-04-27 01:38:04 +1000 |
| commit | 6f52cd6b07970010c40270003d63aa74f84f6ae9 (patch) | |
| tree | f23c0a9ebc50db89463621516ab643d5c76c1590 /backend | |
| parent | 55e281a57c2ac9acb18836ea7a48f5a553d924e2 (diff) | |
Use EOL vernacular-names data for searching
Add data/eolNamesToSqlite.py to read EOL vernacular-names data and add
to sqlite db. Adjust server.py to handle search requests, and adjust
SearchModal to make them.
Diffstat (limited to 'backend')
| -rw-r--r-- | backend/data/eol/README.md | 8 | ||||
| -rwxr-xr-x | backend/data/eolNamesToSqlite.py | 56 | ||||
| -rw-r--r-- | backend/data/otol/README.md | 6 | ||||
| -rwxr-xr-x | backend/data/otolToSqlite.py | 2 | ||||
| -rwxr-xr-x | backend/server.py | 32 |
5 files changed, 92 insertions, 12 deletions
diff --git a/backend/data/eol/README.md b/backend/data/eol/README.md index e69de29..ed970d2 100644 --- a/backend/data/eol/README.md +++ b/backend/data/eol/README.md @@ -0,0 +1,8 @@ +Files +===== +- images\_list.tgz + Obtained from https://opendata.eol.org/dataset/images-list on 24/04/2022 + Listed as being last updated on 05/02/2020 +- vernacular\_names.csv + Obtained from https://opendata.eol.org/dataset/vernacular-names on 24/04/2022 + Listed as being last updated on 27/10/2020 diff --git a/backend/data/eolNamesToSqlite.py b/backend/data/eolNamesToSqlite.py new file mode 100755 index 0000000..4013a4c --- /dev/null +++ b/backend/data/eolNamesToSqlite.py @@ -0,0 +1,56 @@ +#!/usr/bin/python3 + +import sys, re +import csv, sqlite3 + +vnamesFile = "eol/vernacular_names.csv" +dbFile = "data.db" + +# Read in vernacular-names data +nameToPids = {} +pidToNames = {} +def updateMaps(name, pid): + if name not in nameToPids: + nameToPids[name] = {pid} + elif pid not in nameToPids[name]: + nameToPids[name].add(pid) + if pid not in pidToNames: + pidToNames[pid] = {name} + elif name not in pidToNames[pid]: + pidToNames[pid].add(name) +with open(vnamesFile, newline="") as csvfile: + reader = csv.reader(csvfile) + lineNum = 0 + for row in reader: + lineNum += 1 + if lineNum == 1: + continue + pid = int(row[0]) + name1 = re.sub(r"<[^>]+>", "", row[1].lower()) + name2 = row[2].lower() + # Add to maps + updateMaps(name1, pid) + updateMaps(name2, pid) +# Open db connection +dbCon = sqlite3.connect(dbFile) +cur = dbCon.cursor() +# Create 'names' table +cur.execute("CREATE TABLE names(name TEXT, alt_name TEXT, eol_id INT, PRIMARY KEY(name, alt_name))") +# Iterate through 'nodes' table +cur2 = dbCon.cursor() +iterationNum = 0 +for row in cur2.execute("SELECT name FROM nodes"): + name = row[0] + iterationNum += 1 + if iterationNum % 10000 == 0: + print("Iteration {}".format(iterationNum)) + # If name matches a vernacular-names name, add alt-name entries to the 'names' table + if name in nameToPids: + altNames = {name} + for pid in nameToPids[name]: + for n in pidToNames[pid]: + altNames.add(n) + for n in altNames: + cur.execute("INSERT INTO names VALUES (?, ?, ?)", (name, n, pid)) +dbCon.commit() +dbCon.close() diff --git a/backend/data/otol/README.md b/backend/data/otol/README.md index e69de29..f720772 100644 --- a/backend/data/otol/README.md +++ b/backend/data/otol/README.md @@ -0,0 +1,6 @@ +Files +===== +- labelled\_supertree\_ottnames.tre + Obtained from https://tree.opentreeoflife.org/about/synthesis-release/v13.4 +- annotations.json + Obtained from https://tree.opentreeoflife.org/about/synthesis-release/v13.4 diff --git a/backend/data/otolToSqlite.py b/backend/data/otolToSqlite.py index 93ed294..2ee47b7 100755 --- a/backend/data/otolToSqlite.py +++ b/backend/data/otolToSqlite.py @@ -29,7 +29,7 @@ if len(sys.argv) > 1: treeFile = "otol/labelled_supertree_ottnames.tre" annFile = "otol/annotations.json" -dbFile = "otol.db" +dbFile = "data.db" nodeMap = {} # Maps node names to node objects idToName = {} # Maps node IDs to names diff --git a/backend/server.py b/backend/server.py index ec43ff3..ded74d6 100755 --- a/backend/server.py +++ b/backend/server.py @@ -6,10 +6,9 @@ import urllib.parse hostname = "localhost" port = 8000 -dbFile = "data/otol.db" -tolnodeReqDepth = 2 - # For a /tolnode/name1 request, respond with name1's node, and descendent nodes in a subtree to some depth - # A depth of 0 means only respond with one node +dbFile = "data/data.db" +tolnodeReqDepth = 1 + # For a /node?name=name1 request, respond with name1's node, and descendent nodes in a subtree to some depth > 0 usageInfo = f"usage: {sys.argv[0]}\n" usageInfo += "Starts a server that listens for GET requests to http://" + hostname + ":" + str(port) + ".\n" @@ -20,13 +19,19 @@ usageInfo += " Responds with a map from names to node objects, representing\n usageInfo += " nodes name1, and child nodes up to depth " + str(tolnodeReqDepth) + ".\n" usageInfo += "If type1 is 'children': Like 'node', but excludes node name1.\n" usageInfo += "If type1 is 'chain': Like 'node', but gets nodes from name1 up to the root, and their direct children.\n" +usageInfo += "If type1 is 'search': Responds with a tolnode name that has alt-name name1, or null.\n" dbCon = sqlite3.connect(dbFile) -def lookupName(name): +def lookupNode(name): cur = dbCon.cursor() cur.execute("SELECT name, data FROM nodes WHERE name = ?", (name,)) row = cur.fetchone() return row[1] if row != None else None +def lookupName(name): + cur = dbCon.cursor() + cur.execute("SELECT name, alt_name FROM names WHERE alt_name = ?", (name,)) + row = cur.fetchone() + return json.dumps(row[0]) if row != None else None class DbServer(BaseHTTPRequestHandler): def do_GET(self): @@ -42,19 +47,19 @@ class DbServer(BaseHTTPRequestHandler): print(name) # Check query string if reqType == "node": - nodeJson = lookupName(name) + nodeJson = lookupNode(name) if nodeJson != None: results = [] getResultsUntilDepth(name, nodeJson, tolnodeReqDepth, results) self.respondJson(nodeResultsToJSON(results)) return elif reqType == "children": - nodeJson = lookupName(name) + nodeJson = lookupNode(name) if nodeJson != None: obj = json.loads(nodeJson) results = [] for childName in obj["children"]: - nodeJson = lookupName(childName) + nodeJson = lookupNode(childName) if nodeJson != None: getResultsUntilDepth(childName, nodeJson, tolnodeReqDepth, results) self.respondJson(nodeResultsToJSON(results)) @@ -63,7 +68,7 @@ class DbServer(BaseHTTPRequestHandler): results = [] ranOnce = False while True: - jsonResult = lookupName(name) + jsonResult = lookupNode(name) if jsonResult == None: if ranOnce: print("ERROR: Parent-chain node {} not found".format(name), file=sys.stderr) @@ -76,7 +81,7 @@ class DbServer(BaseHTTPRequestHandler): else: internalFail = False for childName in obj["children"]: - jsonResult = lookupName(childName) + jsonResult = lookupNode(childName) if jsonResult == None: print("ERROR: Parent-chain-child node {} not found".format(name), file=sys.stderr) internalFail = True @@ -90,6 +95,11 @@ class DbServer(BaseHTTPRequestHandler): return else: name = obj["parent"] + elif reqType == "search": + nameJson = lookupName(name) + if nameJson != None: + self.respondJson(nameJson) + return self.send_response(404) self.end_headers() self.end_headers() @@ -104,7 +114,7 @@ def getResultsUntilDepth(name, nodeJson, depth, results): if depth > 0: obj = json.loads(nodeJson) for childName in obj["children"]: - childJson = lookupName(childName) + childJson = lookupNode(childName) if childJson != None: getResultsUntilDepth(childName, childJson, depth-1, results) def nodeResultsToJSON(results): |
