diff options
Diffstat (limited to 'backend')
| -rw-r--r-- | backend/data/README.md | 2 | ||||
| -rwxr-xr-x | backend/data/genLinkedImgs.py | 55 | ||||
| -rwxr-xr-x | backend/server.py | 60 |
3 files changed, 89 insertions, 28 deletions
diff --git a/backend/data/README.md b/backend/data/README.md index 438188c..a1bc287 100644 --- a/backend/data/README.md +++ b/backend/data/README.md @@ -39,7 +39,7 @@ data.db Tables - names: name TEXT, alt\_name TEXT, pref\_alt INT, PRIMARY KEY(name, alt\_name) - eol\_ids: id INT PRIMARY KEY, name TEXT - images: eol\_id INT PRIMARY KEY, source\_url TEXT, license TEXT, copyright\_owner TEXT -- linked\_imgs: name TEXT PRIMARY KEY, eol\_id INT +- linked\_imgs: name TEXT PRIMARY KEY, eol\_id INT, eol\_id2 INT - descs: name TEXT PRIMARY KEY, desc TEXT, redirected INT, wiki\_id INT, from\_dbp INT - r\_nodes: name TEXT PRIMARY KEY, tips INT - r\_edges: node TEXT, child TEXT, p\_support INT, PRIMARY KEY (node, child) diff --git a/backend/data/genLinkedImgs.py b/backend/data/genLinkedImgs.py index 5f49ffc..d32196e 100755 --- a/backend/data/genLinkedImgs.py +++ b/backend/data/genLinkedImgs.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 -import sys +import sys, re import sqlite3 usageInfo = f"usage: {sys.argv[0]}\n" @@ -11,11 +11,14 @@ if len(sys.argv) > 1: sys.exit(1) dbFile = "data.db" +compoundNameRegex = re.compile(r"\[(.+) \+ (.+)]") +upPropagateCompoundImgs = False # Open db dbCon = sqlite3.connect(dbFile) dbCur = dbCon.cursor() -dbCur.execute("CREATE TABLE linked_imgs (name TEXT PRIMARY KEY, eol_id INT)") +dbCur.execute("CREATE TABLE linked_imgs (name TEXT PRIMARY KEY, eol_id INT, eol_id2 INT)") + # The eol_id2 column is used to provide the second part of a compound-image # Get nodes with images print("Getting nodes with images") resolvedNodes = {} # Will map node names to eol IDs with a usable image @@ -28,7 +31,8 @@ print("Got {} nodes".format(len(resolvedNodes))) # Iterate through resolved nodes, resolving ancestors where able print("Resolving ancestor nodes") nodesToResolve = {} -processedNodes = set() +processedNodes = {} +parentToChosenTips = {} iterNum = 0 while len(resolvedNodes) > 0: iterNum += 1 @@ -36,7 +40,7 @@ while len(resolvedNodes) > 0: print("At iteration {}".format(iterNum)) # Get next node (nodeName, eolId) = resolvedNodes.popitem() - processedNodes.add(nodeName) + processedNodes[nodeName] = eolId # Traverse upwards, resolving ancestors if able while True: # Get parent @@ -56,9 +60,10 @@ while len(resolvedNodes) > 0: # Check if highest-tips child if (childObjs[0]["name"] == nodeName): # Resolve parent, and continue from it - dbCur.execute("INSERT INTO linked_imgs VALUES (?, ?)", (parent, eolId)) + dbCur.execute("INSERT INTO linked_imgs VALUES (?, ?, ?)", (parent, eolId, None)) del nodesToResolve[parent] - processedNodes.add(parent) + processedNodes[parent] = eolId + parentToChosenTips[parent] = childObjs[0]["tips"] nodeName = parent continue else: @@ -71,8 +76,44 @@ while len(resolvedNodes) > 0: for (name, childObjs) in nodesToResolve.items(): childObj = next(c for c in childObjs if c["eolId"] != None) resolvedNodes[name] = childObj["eolId"] - dbCur.execute("INSERT INTO linked_imgs VALUES (?, ?)", (name, childObj["eolId"])) + parentToChosenTips[name] = childObj["tips"] + dbCur.execute("INSERT INTO linked_imgs VALUES (?, ?, ?)", (name, childObj["eolId"], None)) nodesToResolve.clear() +# Iterate through processed nodes with compound names +print("Replacing images for compound-name nodes") +iterNum = 0 +for nodeName in processedNodes.keys(): + iterNum += 1 + if iterNum % 1e3 == 0: + print("At iteration {}".format(iterNum)) + # + match = compoundNameRegex.fullmatch(nodeName) + if match != None: + # Replace associated image with subname images + (subName1, subName2) = match.group(1,2) + eolIdPair = [0, 0] + if subName1 in processedNodes: + eolIdPair[0] = processedNodes[subName1] + if subName2 in processedNodes: + eolIdPair[1] = processedNodes[subName2] + dbCur.execute("UPDATE linked_imgs SET eol_id = ?, eol_id2 = ? WHERE name = ?", + (eolIdPair[0], eolIdPair[1], nodeName,)) + if upPropagateCompoundImgs: + # Repeat operation for parents, where needed + while True: + # Get parent + row = dbCur.execute("SELECT node FROM edges WHERE child = ?", (nodeName,)).fetchone() + if row != None: + parent = row[0] + # Check num tips + (numTips,) = dbCur.execute("SELECT tips from nodes WHERE name = ?", (nodeName,)).fetchone() + if parent in parentToChosenTips and parentToChosenTips[parent] <= numTips: + # Replace associated image + dbCur.execute("UPDATE linked_imgs SET eol_id = ?, eol_id2 = ? WHERE name = ?", + (eolIdPair[0], eolIdPair[1], parent)) + nodeName = parent + continue + break # Close db dbCon.commit() dbCon.close() diff --git a/backend/server.py b/backend/server.py index f15c95f..5152503 100755 --- a/backend/server.py +++ b/backend/server.py @@ -16,9 +16,11 @@ usageInfo += "Starts a server that listens for GET requests to http://" + hostna usageInfo += "Responds to path+query /data/type1?name=name1 with JSON data.\n" usageInfo += "An additional query parameter tree=reduced is usable to get reduced-tree data\n" usageInfo += "\n" -usageInfo += "If type1 is 'node': Responds with map from names to objects representing node name1 and it's children.\n" +usageInfo += "If type1 is 'node': Responds with map from names to TolNode objects for node name1 and it's children.\n" usageInfo += "If type1 is 'chain': Like 'node', but gets nodes from name1 up to the root, and their direct children.\n" -usageInfo += "If type1 is 'search': Responds with a tolnode name that has alt-name name1, or null.\n" +usageInfo += "If type1 is 'search': Responds with a SearchSuggResponse object.\n" +usageInfo += "If type1 is 'info': Responds with a TileInfoResponse object.\n" +usageInfo += "(Object type information can be found in src/)\n" if len(sys.argv) > 1: print(usageInfo, file=sys.stderr) sys.exit(1) @@ -59,9 +61,16 @@ def lookupNodes(names, useReducedTree): nodeObjs[name]["imgName"] = str(eolId) + ".jpg" # Get 'linked' images for unresolved names unresolvedNames = [n for n in nodeObjs if nodeObjs[n]["imgName"] == None] - query = "SELECT name, eol_id from linked_imgs WHERE name IN ({})".format(",".join(["?"] * len(unresolvedNames))) - for (name, eolId) in cur.execute(query, unresolvedNames): - nodeObjs[name]["imgName"] = str(eolId) + ".jpg" + query = "SELECT name, eol_id, eol_id2 from linked_imgs WHERE name IN ({})" + query = query.format(",".join(["?"] * len(unresolvedNames))) + for (name, eolId, eolId2) in cur.execute(query, unresolvedNames): + if eolId2 == None: + nodeObjs[name]["imgName"] = str(eolId) + ".jpg" + else: + nodeObjs[name]["imgName"] = [ + str(eolId) + ".jpg" if eolId != 0 else None, + str(eolId2) + ".jpg" if eolId2 != 0 else None, + ] # Get preferred-name info query = "SELECT name, alt_name FROM names WHERE pref_alt = 1 AND name IN ({})".format(queryParamStr) for (name, altName) in cur.execute(query, names): @@ -106,30 +115,41 @@ def lookupNodeInfo(name, useReducedTree): nodeObj = temp[name] if name in temp else None # Get node desc descData = None - query = "SELECT desc, redirected, wiki_id, from_dbp from descs WHERE descs.name = ?" match = re.fullmatch(r"\[(.+) \+ (.+)]", name) if match == None: + query = "SELECT desc, redirected, wiki_id, from_dbp from descs WHERE descs.name = ?" row = cur.execute(query, (name,)).fetchone() if row != None: descData = {"text": row[0], "fromRedirect": row[1] == 1, "wikiId": row[2], "fromDbp": row[3] == 1} else: - # Get descs for compound-node elements + # Get descs for compound-node element descData = [None, None] - row = cur.execute(query, (match.group(1),)).fetchone() - if row != None: - descData[0] = {"text": row[0], "fromRedirect": row[1] == 1, "wikiId": row[2], "fromDbp": row[3] == 1} - row = cur.execute(query, (match.group(2),)).fetchone() - if row != None: - descData[1] = {"text": row[0], "fromRedirect": row[1] == 1, "wikiId": row[2], "fromDbp": row[3] == 1} + query = "SELECT name, desc, redirected, wiki_id, from_dbp from descs WHERE descs.name IN (?, ?)" + for row in cur.execute(query, match.group(1,2)): + if row[0] == match.group(1): + descData[0] = {"text": row[1], "fromRedirect": row[2] == 1, "wikiId": row[3], "fromDbp": row[4] == 1} + else: + descData[1] = {"text": row[1], "fromRedirect": row[2] == 1, "wikiId": row[3], "fromDbp": row[4] == 1} # Get img info - imgInfo = None - if nodeObj != None and nodeObj["imgName"] != None: - eolId = int(nodeObj["imgName"][:-4]) # Convert filename excluding .jpg suffix - imgInfoQuery = "SELECT eol_id, source_url, license, copyright_owner FROM images WHERE eol_id = ?" - row = cur.execute(imgInfoQuery, (eolId,)).fetchone() - imgInfo = {"eolId": row[0], "sourceUrl": row[1], "license": row[2], "copyrightOwner": row[3]} + imgData = None + if nodeObj != None: + if isinstance(nodeObj["imgName"], str): + eolId = int(nodeObj["imgName"][:-4]) # Convert filename excluding .jpg suffix + query = "SELECT eol_id, source_url, license, copyright_owner FROM images WHERE eol_id = ?" + row = cur.execute(query, (eolId,)).fetchone() + imgData = {"eolId": row[0], "sourceUrl": row[1], "license": row[2], "copyrightOwner": row[3]} + elif isinstance(nodeObj["imgName"], list): + # Get info for compound-image parts + imgData = [None, None] + idsToLookup = [int(n[:-4]) for n in nodeObj["imgName"] if n != None] + query = "SELECT eol_id, source_url, license, copyright_owner FROM images WHERE eol_id IN (?, ?)" + for row in cur.execute(query, idsToLookup): + if str(row[0]) == nodeObj["imgName"][0][:-4]: + imgData[0] = {"eolId": row[0], "sourceUrl": row[1], "license": row[2], "copyrightOwner": row[3]} + else: + imgData[1] = {"eolId": row[0], "sourceUrl": row[1], "license": row[2], "copyrightOwner": row[3]} # - return {"descData": descData, "imgInfo": imgInfo, "nodeObj": nodeObj} + return {"descData": descData, "imgData": imgData, "nodeObj": nodeObj} class DbServer(BaseHTTPRequestHandler): def do_GET(self): |
