aboutsummaryrefslogtreecommitdiff
path: root/backend
diff options
context:
space:
mode:
Diffstat (limited to 'backend')
-rw-r--r--backend/data/eol/README.md8
-rwxr-xr-xbackend/data/eolNamesToSqlite.py56
-rw-r--r--backend/data/otol/README.md6
-rwxr-xr-xbackend/data/otolToSqlite.py2
-rwxr-xr-xbackend/server.py32
5 files changed, 92 insertions, 12 deletions
diff --git a/backend/data/eol/README.md b/backend/data/eol/README.md
index e69de29..ed970d2 100644
--- a/backend/data/eol/README.md
+++ b/backend/data/eol/README.md
@@ -0,0 +1,8 @@
+Files
+=====
+- images\_list.tgz
+ Obtained from https://opendata.eol.org/dataset/images-list on 24/04/2022
+ Listed as being last updated on 05/02/2020
+- vernacular\_names.csv
+ Obtained from https://opendata.eol.org/dataset/vernacular-names on 24/04/2022
+ Listed as being last updated on 27/10/2020
diff --git a/backend/data/eolNamesToSqlite.py b/backend/data/eolNamesToSqlite.py
new file mode 100755
index 0000000..4013a4c
--- /dev/null
+++ b/backend/data/eolNamesToSqlite.py
@@ -0,0 +1,56 @@
+#!/usr/bin/python3
+
+import sys, re
+import csv, sqlite3
+
+vnamesFile = "eol/vernacular_names.csv"
+dbFile = "data.db"
+
+# Read in vernacular-names data
+nameToPids = {}
+pidToNames = {}
+def updateMaps(name, pid):
+ if name not in nameToPids:
+ nameToPids[name] = {pid}
+ elif pid not in nameToPids[name]:
+ nameToPids[name].add(pid)
+ if pid not in pidToNames:
+ pidToNames[pid] = {name}
+ elif name not in pidToNames[pid]:
+ pidToNames[pid].add(name)
+with open(vnamesFile, newline="") as csvfile:
+ reader = csv.reader(csvfile)
+ lineNum = 0
+ for row in reader:
+ lineNum += 1
+ if lineNum == 1:
+ continue
+ pid = int(row[0])
+ name1 = re.sub(r"<[^>]+>", "", row[1].lower())
+ name2 = row[2].lower()
+ # Add to maps
+ updateMaps(name1, pid)
+ updateMaps(name2, pid)
+# Open db connection
+dbCon = sqlite3.connect(dbFile)
+cur = dbCon.cursor()
+# Create 'names' table
+cur.execute("CREATE TABLE names(name TEXT, alt_name TEXT, eol_id INT, PRIMARY KEY(name, alt_name))")
+# Iterate through 'nodes' table
+cur2 = dbCon.cursor()
+iterationNum = 0
+for row in cur2.execute("SELECT name FROM nodes"):
+ name = row[0]
+ iterationNum += 1
+ if iterationNum % 10000 == 0:
+ print("Iteration {}".format(iterationNum))
+ # If name matches a vernacular-names name, add alt-name entries to the 'names' table
+ if name in nameToPids:
+ altNames = {name}
+ for pid in nameToPids[name]:
+ for n in pidToNames[pid]:
+ altNames.add(n)
+ for n in altNames:
+ cur.execute("INSERT INTO names VALUES (?, ?, ?)", (name, n, pid))
+dbCon.commit()
+dbCon.close()
diff --git a/backend/data/otol/README.md b/backend/data/otol/README.md
index e69de29..f720772 100644
--- a/backend/data/otol/README.md
+++ b/backend/data/otol/README.md
@@ -0,0 +1,6 @@
+Files
+=====
+- labelled\_supertree\_ottnames.tre
+ Obtained from https://tree.opentreeoflife.org/about/synthesis-release/v13.4
+- annotations.json
+ Obtained from https://tree.opentreeoflife.org/about/synthesis-release/v13.4
diff --git a/backend/data/otolToSqlite.py b/backend/data/otolToSqlite.py
index 93ed294..2ee47b7 100755
--- a/backend/data/otolToSqlite.py
+++ b/backend/data/otolToSqlite.py
@@ -29,7 +29,7 @@ if len(sys.argv) > 1:
treeFile = "otol/labelled_supertree_ottnames.tre"
annFile = "otol/annotations.json"
-dbFile = "otol.db"
+dbFile = "data.db"
nodeMap = {} # Maps node names to node objects
idToName = {} # Maps node IDs to names
diff --git a/backend/server.py b/backend/server.py
index ec43ff3..ded74d6 100755
--- a/backend/server.py
+++ b/backend/server.py
@@ -6,10 +6,9 @@ import urllib.parse
hostname = "localhost"
port = 8000
-dbFile = "data/otol.db"
-tolnodeReqDepth = 2
- # For a /tolnode/name1 request, respond with name1's node, and descendent nodes in a subtree to some depth
- # A depth of 0 means only respond with one node
+dbFile = "data/data.db"
+tolnodeReqDepth = 1
+ # For a /node?name=name1 request, respond with name1's node, and descendent nodes in a subtree to some depth > 0
usageInfo = f"usage: {sys.argv[0]}\n"
usageInfo += "Starts a server that listens for GET requests to http://" + hostname + ":" + str(port) + ".\n"
@@ -20,13 +19,19 @@ usageInfo += " Responds with a map from names to node objects, representing\n
usageInfo += " nodes name1, and child nodes up to depth " + str(tolnodeReqDepth) + ".\n"
usageInfo += "If type1 is 'children': Like 'node', but excludes node name1.\n"
usageInfo += "If type1 is 'chain': Like 'node', but gets nodes from name1 up to the root, and their direct children.\n"
+usageInfo += "If type1 is 'search': Responds with a tolnode name that has alt-name name1, or null.\n"
dbCon = sqlite3.connect(dbFile)
-def lookupName(name):
+def lookupNode(name):
cur = dbCon.cursor()
cur.execute("SELECT name, data FROM nodes WHERE name = ?", (name,))
row = cur.fetchone()
return row[1] if row != None else None
+def lookupName(name):
+ cur = dbCon.cursor()
+ cur.execute("SELECT name, alt_name FROM names WHERE alt_name = ?", (name,))
+ row = cur.fetchone()
+ return json.dumps(row[0]) if row != None else None
class DbServer(BaseHTTPRequestHandler):
def do_GET(self):
@@ -42,19 +47,19 @@ class DbServer(BaseHTTPRequestHandler):
print(name)
# Check query string
if reqType == "node":
- nodeJson = lookupName(name)
+ nodeJson = lookupNode(name)
if nodeJson != None:
results = []
getResultsUntilDepth(name, nodeJson, tolnodeReqDepth, results)
self.respondJson(nodeResultsToJSON(results))
return
elif reqType == "children":
- nodeJson = lookupName(name)
+ nodeJson = lookupNode(name)
if nodeJson != None:
obj = json.loads(nodeJson)
results = []
for childName in obj["children"]:
- nodeJson = lookupName(childName)
+ nodeJson = lookupNode(childName)
if nodeJson != None:
getResultsUntilDepth(childName, nodeJson, tolnodeReqDepth, results)
self.respondJson(nodeResultsToJSON(results))
@@ -63,7 +68,7 @@ class DbServer(BaseHTTPRequestHandler):
results = []
ranOnce = False
while True:
- jsonResult = lookupName(name)
+ jsonResult = lookupNode(name)
if jsonResult == None:
if ranOnce:
print("ERROR: Parent-chain node {} not found".format(name), file=sys.stderr)
@@ -76,7 +81,7 @@ class DbServer(BaseHTTPRequestHandler):
else:
internalFail = False
for childName in obj["children"]:
- jsonResult = lookupName(childName)
+ jsonResult = lookupNode(childName)
if jsonResult == None:
print("ERROR: Parent-chain-child node {} not found".format(name), file=sys.stderr)
internalFail = True
@@ -90,6 +95,11 @@ class DbServer(BaseHTTPRequestHandler):
return
else:
name = obj["parent"]
+ elif reqType == "search":
+ nameJson = lookupName(name)
+ if nameJson != None:
+ self.respondJson(nameJson)
+ return
self.send_response(404)
self.end_headers()
self.end_headers()
@@ -104,7 +114,7 @@ def getResultsUntilDepth(name, nodeJson, depth, results):
if depth > 0:
obj = json.loads(nodeJson)
for childName in obj["children"]:
- childJson = lookupName(childName)
+ childJson = lookupNode(childName)
if childJson != None:
getResultsUntilDepth(childName, childJson, depth-1, results)
def nodeResultsToJSON(results):