From 703750ea5de00354c6bb807f95e8bd9685b3dddd Mon Sep 17 00:00:00 2001 From: Terry Truong Date: Sun, 22 May 2022 20:20:38 +1000 Subject: Up-propgate images for usage in parents without images Also adjust vite config to avoid apparent redundant processing when images are being downloaded. --- backend/data/README.md | 26 +++++++++------ backend/data/genImgsForWeb.py | 2 +- backend/data/genLinkedImgs.py | 78 +++++++++++++++++++++++++++++++++++++++++++ backend/server.py | 49 ++++++--------------------- vite.config.js | 2 +- 5 files changed, 106 insertions(+), 51 deletions(-) create mode 100755 backend/data/genLinkedImgs.py diff --git a/backend/data/README.md b/backend/data/README.md index 44219da..438188c 100644 --- a/backend/data/README.md +++ b/backend/data/README.md @@ -10,12 +10,15 @@ File Generation Process 2 Run genEolNameData.py, which adds 'names' and 'eol\_ids' tables to data.db, using data in eol/vernacularNames.csv and the 'nodes' table. 3 Image Data - 1 Use downloadImgsForReview.py to download EOL images into imgsForReview/. + 1 Run downloadImgsForReview.py to download EOL images into imgsForReview/. It uses data in eol/imagesList.db, and the 'eol\_ids' table. - 2 Use reviewImgs.py to filter images in imgsForReview/ into EOL-id-unique + 2 Run reviewImgs.py to filter images in imgsForReview/ into EOL-id-unique images in imgsReviewed/ (uses 'names' and 'eol\_ids' to display extra info). - 3 Use genImgsForWeb.py to create cropped/resized images in img/, using + 3 Run genImgsForWeb.py to create cropped/resized images in img/, using images in imgsReviewed, and also to add an 'images' table to data.db. + 4 Run genLinkedImgs.py to add a 'linked_imgs' table to data.db, + which uses 'nodes', 'edges', 'eol_ids', and 'images', to associate + nodes without images to child images. 4 Node Description Data - Using DBpedia 1 Obtain data in dbpedia/, as specified in it's README. @@ -31,14 +34,15 @@ File Generation Process data.db Tables ============== -- nodes: name TEXT PRIMARY KEY, tips INT -- edges: node TEXT, child TEXT, p\_support INT, PRIMARY KEY (node, child) -- names: name TEXT, alt\_name TEXT, pref\_alt INT, PRIMARY KEY(name, alt\_name) -- eol\_ids: id INT PRIMARY KEY, name TEXT -- images: eol\_id INT PRIMARY KEY, source\_url TEXT, license TEXT, copyright\_owner TEXT -- descs: name TEXT PRIMARY KEY, desc TEXT, redirected INT, wiki\_id INT, from\_dbp INT -- r\_nodes: name TEXT PRIMARY KEY, tips INT -- r\_edges: node TEXT, child TEXT, p\_support INT, PRIMARY KEY (node, child) +- nodes: name TEXT PRIMARY KEY, tips INT +- edges: node TEXT, child TEXT, p\_support INT, PRIMARY KEY (node, child) +- names: name TEXT, alt\_name TEXT, pref\_alt INT, PRIMARY KEY(name, alt\_name) +- eol\_ids: id INT PRIMARY KEY, name TEXT +- images: eol\_id INT PRIMARY KEY, source\_url TEXT, license TEXT, copyright\_owner TEXT +- linked\_imgs: name TEXT PRIMARY KEY, eol\_id INT +- descs: name TEXT PRIMARY KEY, desc TEXT, redirected INT, wiki\_id INT, from\_dbp INT +- r\_nodes: name TEXT PRIMARY KEY, tips INT +- r\_edges: node TEXT, child TEXT, p\_support INT, PRIMARY KEY (node, child) Other Files =========== diff --git a/backend/data/genImgsForWeb.py b/backend/data/genImgsForWeb.py index 14583d6..91a1cde 100755 --- a/backend/data/genImgsForWeb.py +++ b/backend/data/genImgsForWeb.py @@ -28,7 +28,7 @@ if not os.path.exists(outDir): # Open images-list db imagesListDbCon = sqlite3.connect(imagesListDb) imagesListCur = imagesListDbCon.cursor() -# Create/open data db +# Open data db dbCon = sqlite3.connect(dbFile) dbCur = dbCon.cursor() if dbCur.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='images'").fetchone() == None: diff --git a/backend/data/genLinkedImgs.py b/backend/data/genLinkedImgs.py new file mode 100755 index 0000000..5f49ffc --- /dev/null +++ b/backend/data/genLinkedImgs.py @@ -0,0 +1,78 @@ +#!/usr/bin/python3 + +import sys +import sqlite3 + +usageInfo = f"usage: {sys.argv[0]}\n" +usageInfo += "Adds a table to data.db, associating nodes without images to\n" +usageInfo += "usable child images.\n" +if len(sys.argv) > 1: + print(usageInfo, file=sys.stderr) + sys.exit(1) + +dbFile = "data.db" + +# Open db +dbCon = sqlite3.connect(dbFile) +dbCur = dbCon.cursor() +dbCur.execute("CREATE TABLE linked_imgs (name TEXT PRIMARY KEY, eol_id INT)") +# Get nodes with images +print("Getting nodes with images") +resolvedNodes = {} # Will map node names to eol IDs with a usable image +query = "SELECT nodes.name, eol_ids.id FROM" \ + " nodes INNER JOIN eol_ids ON nodes.name = eol_ids.name" \ + " INNER JOIN images ON eol_ids.id = images.eol_id" +for (name, eolId) in dbCur.execute(query): + resolvedNodes[name] = eolId +print("Got {} nodes".format(len(resolvedNodes))) +# Iterate through resolved nodes, resolving ancestors where able +print("Resolving ancestor nodes") +nodesToResolve = {} +processedNodes = set() +iterNum = 0 +while len(resolvedNodes) > 0: + iterNum += 1 + if iterNum % 1e3 == 0: + print("At iteration {}".format(iterNum)) + # Get next node + (nodeName, eolId) = resolvedNodes.popitem() + processedNodes.add(nodeName) + # Traverse upwards, resolving ancestors if able + while True: + # Get parent + row = dbCur.execute("SELECT node FROM edges WHERE child = ?", (nodeName,)).fetchone() + if row == None or row[0] in processedNodes or row[0] in resolvedNodes: + break + parent = row[0] + # Get parent data + if parent not in nodesToResolve: + childNames = [row[0] for row in dbCur.execute("SELECT child FROM edges WHERE node = ?", (parent,))] + query = "SELECT name, tips FROM nodes WHERE name IN ({})".format(",".join(["?"] * len(childNames))) + childObjs = [{"name": row[0], "tips": row[1], "eolId": None} for row in dbCur.execute(query, childNames)] + childObjs.sort(key=lambda x: x["tips"], reverse=True) + nodesToResolve[parent] = childObjs + else: + childObjs = nodesToResolve[parent] + # Check if highest-tips child + if (childObjs[0]["name"] == nodeName): + # Resolve parent, and continue from it + dbCur.execute("INSERT INTO linked_imgs VALUES (?, ?)", (parent, eolId)) + del nodesToResolve[parent] + processedNodes.add(parent) + nodeName = parent + continue + else: + # Add potential EOL ID to parent + childObj = next(c for c in childObjs if c["name"] == nodeName) + childObj["eolId"] = eolId + break + # When out of resolved nodes, resolve any nodesToResolve nodes + if len(resolvedNodes) == 0: + for (name, childObjs) in nodesToResolve.items(): + childObj = next(c for c in childObjs if c["eolId"] != None) + resolvedNodes[name] = childObj["eolId"] + dbCur.execute("INSERT INTO linked_imgs VALUES (?, ?)", (name, childObj["eolId"])) + nodesToResolve.clear() +# Close db +dbCon.commit() +dbCon.close() diff --git a/backend/server.py b/backend/server.py index a64a145..a232c48 100755 --- a/backend/server.py +++ b/backend/server.py @@ -50,36 +50,18 @@ def lookupNodes(names, useReducedTree): for (nodeName, childName, pSupport) in cur.execute(query, names): nodeObjs[childName]["parent"] = None if nodeName == "" else nodeName nodeObjs[childName]["pSupport"] = (pSupport == 1) - # Get names for image files - namesForImgs = [] - firstSubnames = {} - secondSubnames = {} - for (name, nodeObj) in nodeObjs.items(): - match = re.fullmatch(r"\[(.+) \+ (.+)]", name) - if match == None: - namesForImgs.append(name) - else: - name1 = match.group(1) - name2 = match.group(2) - namesForImgs.extend([name1, name2]) - firstSubnames[name1] = name - secondSubnames[name2] = name # Get image names - query = "SELECT name, id FROM eol_ids WHERE" \ - " name IN ({})".format(",".join(["?"] * len(namesForImgs))) - for [n, id] in cur.execute(query, namesForImgs): - filename = str(id) + ".jpg" - if not os.path.exists(imgDir + filename): - continue - if n in firstSubnames: - nodeName = firstSubnames[n] - nodeObjs[nodeName]["imgName"] = filename - elif n in secondSubnames: - nodeName = secondSubnames[n] - if nodeObjs[nodeName]["imgName"] == None: - nodeObjs[nodeName]["imgName"] = filename - else: - nodeObjs[n]["imgName"] = filename + query = "SELECT nodes.name, eol_id FROM" \ + " nodes INNER JOIN eol_ids ON nodes.name = eol_ids.name" \ + " INNER JOIN images ON eol_ids.id = images.eol_id WHERE" \ + " nodes.name IN ({})".format(",".join(["?"] * len(nodeObjs))) + for (name, eolId) in cur.execute(query, list(nodeObjs.keys())): + nodeObjs[name]["imgName"] = str(eolId) + ".jpg" + # Get 'linked' images for unresolved names + unresolvedNames = [n for n in nodeObjs if nodeObjs[n]["imgName"] == None] + query = "SELECT name, eol_id from linked_imgs WHERE name IN ({})".format(",".join(["?"] * len(unresolvedNames))) + for (name, eolId) in cur.execute(query, unresolvedNames): + nodeObjs[name]["imgName"] = str(eolId) + ".jpg" # Get preferred-name info query = "SELECT name, alt_name FROM names WHERE pref_alt = 1 AND name IN ({})".format(queryParamStr) for (name, altName) in cur.execute(query, names): @@ -87,15 +69,6 @@ def lookupNodes(names, useReducedTree): nodeObjs[name]["commonName"] = altName # return nodeObjs -def getNodeImg(name): - cur = dbCon.cursor() - row = cur.execute("SELECT name, id FROM eol_ids WHERE name = ?", (name,)).fetchone() - if row != None: - eolId = row[1] - filename = str(eolId) + ".jpg" - if os.path.exists(imgDir + filename): - return filename - return None def lookupName(name, useReducedTree): cur = dbCon.cursor() results = [] diff --git a/vite.config.js b/vite.config.js index 25119dd..2e470fc 100644 --- a/vite.config.js +++ b/vite.config.js @@ -8,7 +8,7 @@ export default defineConfig({ '/data': 'http://localhost:8000', }, watch: { - ignored: ['**/imgsForReview/*', '**/imgsReviewed/*', '**/img/*'] + ignored: ['**/backend', '**/public'] }, }, //server: {open: true} //open browser when dev server starts -- cgit v1.2.3