aboutsummaryrefslogtreecommitdiff
path: root/backend
diff options
context:
space:
mode:
authorTerry Truong <terry06890@gmail.com>2022-06-28 02:10:17 +1000
committerTerry Truong <terry06890@gmail.com>2022-06-28 02:18:27 +1000
commitd4431d3d200a77f7abfa00e5adc848c633597352 (patch)
tree7e2fa4d1e2d64ed0d570c46af78a2363b959f9af /backend
parent96bb515a603499abb016d381f0bdb5bd51ebda92 (diff)
Convert server code to CGI scriptdeployable
Also adjust server-client API, for clarity/consistency
Diffstat (limited to 'backend')
-rw-r--r--backend/README.md9
-rwxr-xr-xbackend/cgi-bin/data.py (renamed from backend/server.py)268
2 files changed, 132 insertions, 145 deletions
diff --git a/backend/README.md b/backend/README.md
index 20263fd..3efa585 100644
--- a/backend/README.md
+++ b/backend/README.md
@@ -1,3 +1,8 @@
# Files
-- server.py: Runs the server
-- data/: For generating the server's tree-of-life database
+- cgi-bin/data.py: CGI script for providing tree-of-life data to client
+- data: Contains scripts for generating the tree-of-life database
+
+# During development
+Having generated the database as data/data.db, and with this as the current
+directory, running `python3 -m http.server --cgi 8000` allows the client to access
+the CGI script, and hence the database, via `localhost:8000/cgi-bin/data.py`.
diff --git a/backend/server.py b/backend/cgi-bin/data.py
index a1f1585..05ed81b 100755
--- a/backend/server.py
+++ b/backend/cgi-bin/data.py
@@ -1,15 +1,12 @@
#!/usr/bin/python3
-import sys, re, sqlite3, time
-import os.path
-from http.server import HTTPServer, BaseHTTPRequestHandler
+import sys, os.path, re, time
import urllib.parse
+import sqlite3
import gzip, jsonpickle
-hostname = "localhost"
-port = 8000
dbFile = "data/data.db"
-imgDir = "../public/img/"
+imgDir = "data/img/"
DEFAULT_SUGG_LIM = 5
MAX_SUGG_LIM = 50
CORS_ANY_ORIGIN = True # Used during development to avoid Cross-Origin Resource Sharing restrictions
@@ -17,16 +14,19 @@ CORS_ANY_ORIGIN = True # Used during development to avoid Cross-Origin Resource
usageInfo = f"""
Usage: {sys.argv[0]}
-Starts a server that listens for GET requests to http://" + hostname + ":" + str(port) + ".
-Responds to path+query /data/type1?name=name1 with JSON data.
-The 'name' parameter can be omitted, which specifies the root node.
-An additional query parameter tree=reduced is usable to get reduced-tree data.
+CGI script that provides tree-of-life data, in JSON form.
-If type1 is 'node': Responds with a name-to-TolNode map, describing the node and it's children.
-If type1 is 'chain': Like 'node', but also gets nodes upward to the root, and their direct children.
-If type1 is 'search': Responds with a SearchSuggResponse.
- A query parameter 'limit=n1' specifies the max number of suggestions (default 5).
-If type1 is 'info': Responds with an InfoResponse.
+Query parameters:
+- name: Provides a name, or partial name, of a tree-of-life node. If absent, the root node is used.
+- type: Specifies what data to reply with.
+ If 'node', reply with a name-to-TolNode map, describing the named node and it's children.
+ If 'sugg', reply with a SearchSuggResponse, describing search suggestions for the possibly-partial name.
+ If 'info', reply with an InfoResponse, describing the named node.
+- toroot: Used with type=node, and causes inclusion of nodes upward to the root, and their children.
+ If specified, the value should be 'true'.
+- limit: Used with type=sugg to specify the max number of suggestions.
+- rtree: Specifies whether the reduced tree should be used.
+ If specified, the value should be 'true'.
"""
if len(sys.argv) > 1:
print(usageInfo, file=sys.stderr)
@@ -49,7 +49,7 @@ class SearchSugg:
self.name = name # string
self.canonicalName = canonicalName # string | null
class SearchSuggResponse:
- " Sent as responses to 'search' requests "
+ " Sent as responses to 'sugg' requests "
def __init__(self, searchSuggs, hasMore):
self.suggs = searchSuggs # SearchSugg[]
self.hasMore = hasMore # boolean
@@ -81,54 +81,45 @@ class InfoResponse:
self.nodeInfo = nodeInfo # NodeInfo
self.subNodesInfo = subNodesInfo # [] | [NodeInfo, NodeInfo]
-# Connect to db
-dbCon = sqlite3.connect(dbFile)
-# Get root node
-dbCur = dbCon.cursor()
-query = "SELECT name FROM nodes LEFT JOIN edges ON nodes.name = edges.child WHERE edges.parent IS NULL LIMIT 1"
-(rootName,) = dbCur.execute(query).fetchone()
-
-# Some helper functions
-def lookupNodes(names, useReducedTree):
+# For data lookup
+def lookupNodes(names, useReducedTree, dbCur):
" For a set of node names, returns a name-to-TolNode map that describes those nodes "
- global dbCon
- cur = dbCon.cursor()
# Get node info
nameToNodes = {}
nodesTable = "nodes" if not useReducedTree else "r_nodes"
edgesTable = "edges" if not useReducedTree else "r_edges"
queryParamStr = ",".join(["?"] * len(names))
query = f"SELECT name, id, tips FROM {nodesTable} WHERE name IN ({queryParamStr})"
- for (nodeName, otolId, tips) in cur.execute(query, names):
+ for (nodeName, otolId, tips) in dbCur.execute(query, names):
nameToNodes[nodeName] = TolNode(otolId, [], tips=tips)
# Get child info
query = f"SELECT parent, child FROM {edgesTable} WHERE parent IN ({queryParamStr})"
- for (nodeName, childName) in cur.execute(query, names):
+ for (nodeName, childName) in dbCur.execute(query, names):
nameToNodes[nodeName].children.append(childName)
# Order children by tips
for (nodeName, node) in nameToNodes.items():
childToTips = {}
query = "SELECT name, tips FROM {} WHERE name IN ({})"
query = query.format(nodesTable, ",".join(["?"] * len(node.children)))
- for (n, tips) in cur.execute(query, node.children):
+ for (n, tips) in dbCur.execute(query, node.children):
childToTips[n] = tips
node.children.sort(key=lambda n: childToTips[n], reverse=True)
# Get parent info
query = f"SELECT parent, child, p_support FROM {edgesTable} WHERE child IN ({queryParamStr})"
- for (nodeName, childName, pSupport) in cur.execute(query, names):
+ for (nodeName, childName, pSupport) in dbCur.execute(query, names):
nameToNodes[childName].parent = nodeName
nameToNodes[childName].pSupport = (pSupport == 1)
# Get image names
idsToNames = {nameToNodes[n].otolId: n for n in nameToNodes.keys()}
query = "SELECT nodes.id from nodes INNER JOIN node_imgs ON nodes.name = node_imgs.name" \
" WHERE nodes.id IN ({})".format(",".join(["?"] * len(idsToNames)))
- for (otolId,) in cur.execute(query, list(idsToNames.keys())):
+ for (otolId,) in dbCur.execute(query, list(idsToNames.keys())):
nameToNodes[idsToNames[otolId]].imgName = otolId + ".jpg"
# Get 'linked' images for unresolved names
unresolvedNames = [n for n in nameToNodes if nameToNodes[n].imgName == None]
query = "SELECT name, otol_ids from linked_imgs WHERE name IN ({})"
query = query.format(",".join(["?"] * len(unresolvedNames)))
- for (name, otolIds) in cur.execute(query, unresolvedNames):
+ for (name, otolIds) in dbCur.execute(query, unresolvedNames):
if "," not in otolIds:
nameToNodes[name].imgName = otolIds + ".jpg"
else:
@@ -139,14 +130,12 @@ def lookupNodes(names, useReducedTree):
]
# Get preferred-name info
query = f"SELECT name, alt_name FROM names WHERE pref_alt = 1 AND name IN ({queryParamStr})"
- for (name, altName) in cur.execute(query, names):
+ for (name, altName) in dbCur.execute(query, names):
nameToNodes[name].commonName = altName
#
return nameToNodes
-def lookupName(searchStr, suggLimit, useReducedTree):
+def lookupSuggs(searchStr, suggLimit, useReducedTree, dbCur):
" For a search string, returns a SearchSuggResponse describing search suggestions "
- global dbCon
- cur = dbCon.cursor()
results = []
hasMore = False
# Get node names and alt-names
@@ -164,21 +153,21 @@ def lookupName(searchStr, suggLimit, useReducedTree):
" WHERE alt_name LIKE ? ORDER BY length(alt_name) LIMIT ?"
# Join results, and get shortest
suggs = []
- for (nodeName,) in cur.execute(query1, (searchStr + "%", suggLimit + 1)):
+ for (nodeName,) in dbCur.execute(query1, (searchStr + "%", suggLimit + 1)):
suggs.append(SearchSugg(nodeName))
- for (altName, nodeName) in cur.execute(query2, (searchStr + "%", suggLimit + 1)):
+ for (altName, nodeName) in dbCur.execute(query2, (searchStr + "%", suggLimit + 1)):
suggs.append(SearchSugg(altName, nodeName))
# If insufficient results, try substring-search
foundNames = {n.name for n in suggs}
if len(suggs) < suggLimit:
newLim = suggLimit + 1 - len(suggs)
- for (nodeName,) in cur.execute(query1, ("%" + searchStr + "%", newLim)):
+ for (nodeName,) in dbCur.execute(query1, ("%" + searchStr + "%", newLim)):
if nodeName not in foundNames:
suggs.append(SearchSugg(nodeName))
foundNames.add(nodeName)
if len(suggs) < suggLimit:
newLim = suggLimit + 1 - len(suggs)
- for (altName, nodeName) in cur.execute(query2, ("%" + searchStr + "%", suggLimit + 1)):
+ for (altName, nodeName) in dbCur.execute(query2, ("%" + searchStr + "%", suggLimit + 1)):
if altName not in foundNames:
suggs.append(SearchSugg(altName, nodeName))
foundNames.add(altName)
@@ -191,12 +180,10 @@ def lookupName(searchStr, suggLimit, useReducedTree):
hasMore = True
#
return SearchSuggResponse(results, hasMore)
-def lookupNodeInfo(name, useReducedTree):
+def lookupInfo(name, useReducedTree, dbCur):
" For a node name, returns an InfoResponse, or None "
- global dbCon
- cur = dbCon.cursor()
# Get node info
- nameToNodes = lookupNodes([name], useReducedTree)
+ nameToNodes = lookupNodes([name], useReducedTree, dbCur)
tolNode = nameToNodes[name] if name in nameToNodes else None
if tolNode == None:
return None
@@ -204,7 +191,7 @@ def lookupNodeInfo(name, useReducedTree):
match = re.fullmatch(r"\[(.+) \+ (.+)]", name)
subNames = [match.group(1), match.group(2)] if match != None else []
if len(subNames) > 0:
- nameToSubNodes = lookupNodes(subNames, useReducedTree)
+ nameToSubNodes = lookupNodes(subNames, useReducedTree, dbCur)
if len(nameToSubNodes) < 2:
print(f"ERROR: Unable to find sub-names entries for {name}", file=sys.stderr)
return None
@@ -215,7 +202,7 @@ def lookupNodeInfo(name, useReducedTree):
query = "SELECT name, desc, wiki_id, redirected, from_dbp FROM" \
" wiki_ids INNER JOIN descs ON wiki_ids.id = descs.wiki_id" \
" WHERE wiki_ids.name IN ({})".format(",".join(["?"] * len(namesToLookup)))
- for (nodeName, desc, wikiId, redirected, fromDbp) in cur.execute(query, namesToLookup):
+ for (nodeName, desc, wikiId, redirected, fromDbp) in dbCur.execute(query, namesToLookup):
nameToDescInfo[nodeName] = DescInfo(desc, wikiId, redirected == 1, fromDbp == 1)
# Get image info
nameToImgInfo = {}
@@ -225,7 +212,7 @@ def lookupNodeInfo(name, useReducedTree):
" nodes INNER JOIN node_imgs ON nodes.name = node_imgs.name" \
" INNER JOIN images ON node_imgs.img_id = images.id AND node_imgs.src = images.src" \
" WHERE nodes.id IN ({})".format(",".join(["?"] * len(idsToLookup)))
- for (id, imgId, imgSrc, url, license, artist, credit) in cur.execute(query, idsToLookup):
+ for (id, imgId, imgSrc, url, license, artist, credit) in dbCur.execute(query, idsToLookup):
nameToImgInfo[idsToNames[id]] = ImgInfo(imgId, imgSrc, url, license, artist, credit)
# Construct response
nodeInfoObjs = [
@@ -237,102 +224,97 @@ def lookupNodeInfo(name, useReducedTree):
]
return InfoResponse(nodeInfoObjs[0], nodeInfoObjs[1:])
-class DbServer(BaseHTTPRequestHandler):
- " Provides handlers for requests to the server "
- def do_GET(self):
- global rootName
- # Parse URL
- urlParts = urllib.parse.urlparse(self.path)
- path = urllib.parse.unquote(urlParts.path)
- queryDict = urllib.parse.parse_qs(urlParts.query)
- # Check first element of path
- match = re.match(r"/([^/]+)/(.+)", path)
- if match != None and match.group(1) == "data" and \
- ("tree" not in queryDict or queryDict["tree"][0] == "reduced"):
- reqType = match.group(2)
- name = queryDict["name"][0] if "name" in queryDict else rootName
- useReducedTree = "tree" in queryDict
- # Check query string
- if reqType == "node":
- tolNodes = lookupNodes([name], useReducedTree)
- if len(tolNodes) > 0:
- tolNode = tolNodes[name]
- childNodeObjs = lookupNodes(tolNode.children, useReducedTree)
- childNodeObjs[name] = tolNode
- self.respondJson(childNodeObjs)
- return
- elif reqType == "chain":
- results = {}
- ranOnce = False
- while True:
- # Get node
- tolNodes = lookupNodes([name], useReducedTree)
- if len(tolNodes) == 0:
- if not ranOnce:
- self.respondJson(results)
- return
- print(f"ERROR: Parent-chain node {name} not found", file=sys.stderr)
- break
- tolNode = tolNodes[name]
- results[name] = tolNode
- # Potentially add children
+# For handling request
+def respondJson(val):
+ content = jsonpickle.encode(val, unpicklable=False).encode("utf-8")
+ print("Content-type: application/json")
+ if "HTTP_ACCEPT_ENCODING" in os.environ and "gzip" in os.environ["HTTP_ACCEPT_ENCODING"]:
+ if len(content) > 100:
+ content = gzip.compress(content, compresslevel=5)
+ print(f"Content-length: {len(content)}")
+ print(f"Content-encoding: gzip")
+ if CORS_ANY_ORIGIN:
+ print("Access-Control-Allow-Origin: *")
+ print()
+ sys.stdout.flush()
+ sys.stdout.buffer.write(content)
+def handleReq(dbCur):
+ # Get query params
+ queryStr = os.environ["QUERY_STRING"] if "QUERY_STRING" in os.environ else ""
+ queryDict = urllib.parse.parse_qs(queryStr)
+ # Set vars from params
+ name = queryDict["name"][0] if "name" in queryDict else None
+ if name == None: # Get root node
+ query = "SELECT name FROM nodes LEFT JOIN edges ON nodes.name = edges.child WHERE edges.parent IS NULL LIMIT 1"
+ (name,) = dbCur.execute(query).fetchone()
+ reqType = queryDict["type"][0] if "type" in queryDict else None
+ useReducedTree = "rtree" in queryDict and queryDict["rtree"][0] == "true"
+ # Get data of requested type
+ if reqType == "node":
+ toroot = "toroot" in queryDict and queryDict["toroot"][0] == "true"
+ if not toroot:
+ tolNodes = lookupNodes([name], useReducedTree, dbCur)
+ if len(tolNodes) > 0:
+ tolNode = tolNodes[name]
+ childNodeObjs = lookupNodes(tolNode.children, useReducedTree, dbCur)
+ childNodeObjs[name] = tolNode
+ respondJson(childNodeObjs)
+ return
+ else:
+ results = {}
+ ranOnce = False
+ while True:
+ # Get node
+ tolNodes = lookupNodes([name], useReducedTree, dbCur)
+ if len(tolNodes) == 0:
if not ranOnce:
- ranOnce = True
- else:
- childNamesToAdd = []
- for childName in tolNode.children:
- if childName not in results:
- childNamesToAdd.append(childName)
- childNodeObjs = lookupNodes(childNamesToAdd, useReducedTree)
- results.update(childNodeObjs)
- # Check if root
- if tolNode.parent == None:
- self.respondJson(results)
+ respondJson(results)
return
- else:
- name = tolNode.parent
- elif reqType == "search":
- # Check for suggestion-limit
- suggLimit = None
- invalidLimit = False
- try:
- suggLimit = int(queryDict["limit"][0]) if "limit" in queryDict else DEFAULT_SUGG_LIM
- if suggLimit <= 0 or suggLimit > MAX_SUGG_LIM:
- invalidLimit = True
- except ValueError:
- invalidLimit = True
- print(f"Invalid limit {suggLimit}")
- # Get search suggestions
- if not invalidLimit:
- self.respondJson(lookupName(name, suggLimit, useReducedTree))
- return
- elif reqType == "info":
- infoResponse = lookupNodeInfo(name, useReducedTree)
- if infoResponse != None:
- self.respondJson(infoResponse)
+ print(f"ERROR: Parent-chain node {name} not found", file=sys.stderr)
+ break
+ tolNode = tolNodes[name]
+ results[name] = tolNode
+ # Potentially add children
+ if not ranOnce:
+ ranOnce = True
+ else:
+ childNamesToAdd = []
+ for childName in tolNode.children:
+ if childName not in results:
+ childNamesToAdd.append(childName)
+ childNodeObjs = lookupNodes(childNamesToAdd, useReducedTree, dbCur)
+ results.update(childNodeObjs)
+ # Check if root
+ if tolNode.parent == None:
+ respondJson(results)
return
- self.send_response(404)
- def respondJson(self, val):
- global CORS_ANY_ORIGIN
- content = jsonpickle.encode(val, unpicklable=False).encode("utf-8")
- self.send_response(200)
- self.send_header("Content-type", "application/json")
- if CORS_ANY_ORIGIN:
- self.send_header("Access-Control-Allow-Origin", "*")
- if "accept-encoding" in self.headers and "gzip" in self.headers["accept-encoding"]:
- if len(content) > 100:
- content = gzip.compress(content, compresslevel=5)
- self.send_header("Content-length", str(len(content)))
- self.send_header("Content-encoding", "gzip")
- self.end_headers()
- self.wfile.write(content)
+ else:
+ name = tolNode.parent
+ elif reqType == "sugg":
+ # Check for suggestion-limit
+ suggLimit = None
+ invalidLimit = False
+ try:
+ suggLimit = int(queryDict["limit"][0]) if "limit" in queryDict else DEFAULT_SUGG_LIM
+ if suggLimit <= 0 or suggLimit > MAX_SUGG_LIM:
+ invalidLimit = True
+ except ValueError:
+ invalidLimit = True
+ print(f"INFO: Invalid limit {suggLimit}", file=sys.stderr)
+ # Get search suggestions
+ if not invalidLimit:
+ respondJson(lookupSuggs(name, suggLimit, useReducedTree, dbCur))
+ return
+ elif reqType == "info":
+ infoResponse = lookupInfo(name, useReducedTree, dbCur)
+ if infoResponse != None:
+ respondJson(infoResponse)
+ return
+ # On failure, provide empty response
+ respondJson(None)
-server = HTTPServer((hostname, port), DbServer)
-print(f"Server started at http://{hostname}:{port}")
-try:
- server.serve_forever()
-except KeyboardInterrupt:
- pass
-server.server_close()
-dbCon.close()
-print("Server stopped")
+# Open db
+dbCon = sqlite3.connect(dbFile)
+dbCur = dbCon.cursor()
+# Handle request
+handleReq(dbCur)