aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--backend/data/README.md26
-rwxr-xr-xbackend/data/genImgsForWeb.py2
-rwxr-xr-xbackend/data/genLinkedImgs.py78
-rwxr-xr-xbackend/server.py49
-rw-r--r--vite.config.js2
5 files changed, 106 insertions, 51 deletions
diff --git a/backend/data/README.md b/backend/data/README.md
index 44219da..438188c 100644
--- a/backend/data/README.md
+++ b/backend/data/README.md
@@ -10,12 +10,15 @@ File Generation Process
2 Run genEolNameData.py, which adds 'names' and 'eol\_ids' tables to data.db,
using data in eol/vernacularNames.csv and the 'nodes' table.
3 Image Data
- 1 Use downloadImgsForReview.py to download EOL images into imgsForReview/.
+ 1 Run downloadImgsForReview.py to download EOL images into imgsForReview/.
It uses data in eol/imagesList.db, and the 'eol\_ids' table.
- 2 Use reviewImgs.py to filter images in imgsForReview/ into EOL-id-unique
+ 2 Run reviewImgs.py to filter images in imgsForReview/ into EOL-id-unique
images in imgsReviewed/ (uses 'names' and 'eol\_ids' to display extra info).
- 3 Use genImgsForWeb.py to create cropped/resized images in img/, using
+ 3 Run genImgsForWeb.py to create cropped/resized images in img/, using
images in imgsReviewed, and also to add an 'images' table to data.db.
+ 4 Run genLinkedImgs.py to add a 'linked_imgs' table to data.db,
+ which uses 'nodes', 'edges', 'eol_ids', and 'images', to associate
+ nodes without images to child images.
4 Node Description Data
- Using DBpedia
1 Obtain data in dbpedia/, as specified in it's README.
@@ -31,14 +34,15 @@ File Generation Process
data.db Tables
==============
-- nodes: name TEXT PRIMARY KEY, tips INT
-- edges: node TEXT, child TEXT, p\_support INT, PRIMARY KEY (node, child)
-- names: name TEXT, alt\_name TEXT, pref\_alt INT, PRIMARY KEY(name, alt\_name)
-- eol\_ids: id INT PRIMARY KEY, name TEXT
-- images: eol\_id INT PRIMARY KEY, source\_url TEXT, license TEXT, copyright\_owner TEXT
-- descs: name TEXT PRIMARY KEY, desc TEXT, redirected INT, wiki\_id INT, from\_dbp INT
-- r\_nodes: name TEXT PRIMARY KEY, tips INT
-- r\_edges: node TEXT, child TEXT, p\_support INT, PRIMARY KEY (node, child)
+- nodes: name TEXT PRIMARY KEY, tips INT
+- edges: node TEXT, child TEXT, p\_support INT, PRIMARY KEY (node, child)
+- names: name TEXT, alt\_name TEXT, pref\_alt INT, PRIMARY KEY(name, alt\_name)
+- eol\_ids: id INT PRIMARY KEY, name TEXT
+- images: eol\_id INT PRIMARY KEY, source\_url TEXT, license TEXT, copyright\_owner TEXT
+- linked\_imgs: name TEXT PRIMARY KEY, eol\_id INT
+- descs: name TEXT PRIMARY KEY, desc TEXT, redirected INT, wiki\_id INT, from\_dbp INT
+- r\_nodes: name TEXT PRIMARY KEY, tips INT
+- r\_edges: node TEXT, child TEXT, p\_support INT, PRIMARY KEY (node, child)
Other Files
===========
diff --git a/backend/data/genImgsForWeb.py b/backend/data/genImgsForWeb.py
index 14583d6..91a1cde 100755
--- a/backend/data/genImgsForWeb.py
+++ b/backend/data/genImgsForWeb.py
@@ -28,7 +28,7 @@ if not os.path.exists(outDir):
# Open images-list db
imagesListDbCon = sqlite3.connect(imagesListDb)
imagesListCur = imagesListDbCon.cursor()
-# Create/open data db
+# Open data db
dbCon = sqlite3.connect(dbFile)
dbCur = dbCon.cursor()
if dbCur.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='images'").fetchone() == None:
diff --git a/backend/data/genLinkedImgs.py b/backend/data/genLinkedImgs.py
new file mode 100755
index 0000000..5f49ffc
--- /dev/null
+++ b/backend/data/genLinkedImgs.py
@@ -0,0 +1,78 @@
+#!/usr/bin/python3
+
+import sys
+import sqlite3
+
+usageInfo = f"usage: {sys.argv[0]}\n"
+usageInfo += "Adds a table to data.db, associating nodes without images to\n"
+usageInfo += "usable child images.\n"
+if len(sys.argv) > 1:
+ print(usageInfo, file=sys.stderr)
+ sys.exit(1)
+
+dbFile = "data.db"
+
+# Open db
+dbCon = sqlite3.connect(dbFile)
+dbCur = dbCon.cursor()
+dbCur.execute("CREATE TABLE linked_imgs (name TEXT PRIMARY KEY, eol_id INT)")
+# Get nodes with images
+print("Getting nodes with images")
+resolvedNodes = {} # Will map node names to eol IDs with a usable image
+query = "SELECT nodes.name, eol_ids.id FROM" \
+ " nodes INNER JOIN eol_ids ON nodes.name = eol_ids.name" \
+ " INNER JOIN images ON eol_ids.id = images.eol_id"
+for (name, eolId) in dbCur.execute(query):
+ resolvedNodes[name] = eolId
+print("Got {} nodes".format(len(resolvedNodes)))
+# Iterate through resolved nodes, resolving ancestors where able
+print("Resolving ancestor nodes")
+nodesToResolve = {}
+processedNodes = set()
+iterNum = 0
+while len(resolvedNodes) > 0:
+ iterNum += 1
+ if iterNum % 1e3 == 0:
+ print("At iteration {}".format(iterNum))
+ # Get next node
+ (nodeName, eolId) = resolvedNodes.popitem()
+ processedNodes.add(nodeName)
+ # Traverse upwards, resolving ancestors if able
+ while True:
+ # Get parent
+ row = dbCur.execute("SELECT node FROM edges WHERE child = ?", (nodeName,)).fetchone()
+ if row == None or row[0] in processedNodes or row[0] in resolvedNodes:
+ break
+ parent = row[0]
+ # Get parent data
+ if parent not in nodesToResolve:
+ childNames = [row[0] for row in dbCur.execute("SELECT child FROM edges WHERE node = ?", (parent,))]
+ query = "SELECT name, tips FROM nodes WHERE name IN ({})".format(",".join(["?"] * len(childNames)))
+ childObjs = [{"name": row[0], "tips": row[1], "eolId": None} for row in dbCur.execute(query, childNames)]
+ childObjs.sort(key=lambda x: x["tips"], reverse=True)
+ nodesToResolve[parent] = childObjs
+ else:
+ childObjs = nodesToResolve[parent]
+ # Check if highest-tips child
+ if (childObjs[0]["name"] == nodeName):
+ # Resolve parent, and continue from it
+ dbCur.execute("INSERT INTO linked_imgs VALUES (?, ?)", (parent, eolId))
+ del nodesToResolve[parent]
+ processedNodes.add(parent)
+ nodeName = parent
+ continue
+ else:
+ # Add potential EOL ID to parent
+ childObj = next(c for c in childObjs if c["name"] == nodeName)
+ childObj["eolId"] = eolId
+ break
+ # When out of resolved nodes, resolve any nodesToResolve nodes
+ if len(resolvedNodes) == 0:
+ for (name, childObjs) in nodesToResolve.items():
+ childObj = next(c for c in childObjs if c["eolId"] != None)
+ resolvedNodes[name] = childObj["eolId"]
+ dbCur.execute("INSERT INTO linked_imgs VALUES (?, ?)", (name, childObj["eolId"]))
+ nodesToResolve.clear()
+# Close db
+dbCon.commit()
+dbCon.close()
diff --git a/backend/server.py b/backend/server.py
index a64a145..a232c48 100755
--- a/backend/server.py
+++ b/backend/server.py
@@ -50,36 +50,18 @@ def lookupNodes(names, useReducedTree):
for (nodeName, childName, pSupport) in cur.execute(query, names):
nodeObjs[childName]["parent"] = None if nodeName == "" else nodeName
nodeObjs[childName]["pSupport"] = (pSupport == 1)
- # Get names for image files
- namesForImgs = []
- firstSubnames = {}
- secondSubnames = {}
- for (name, nodeObj) in nodeObjs.items():
- match = re.fullmatch(r"\[(.+) \+ (.+)]", name)
- if match == None:
- namesForImgs.append(name)
- else:
- name1 = match.group(1)
- name2 = match.group(2)
- namesForImgs.extend([name1, name2])
- firstSubnames[name1] = name
- secondSubnames[name2] = name
# Get image names
- query = "SELECT name, id FROM eol_ids WHERE" \
- " name IN ({})".format(",".join(["?"] * len(namesForImgs)))
- for [n, id] in cur.execute(query, namesForImgs):
- filename = str(id) + ".jpg"
- if not os.path.exists(imgDir + filename):
- continue
- if n in firstSubnames:
- nodeName = firstSubnames[n]
- nodeObjs[nodeName]["imgName"] = filename
- elif n in secondSubnames:
- nodeName = secondSubnames[n]
- if nodeObjs[nodeName]["imgName"] == None:
- nodeObjs[nodeName]["imgName"] = filename
- else:
- nodeObjs[n]["imgName"] = filename
+ query = "SELECT nodes.name, eol_id FROM" \
+ " nodes INNER JOIN eol_ids ON nodes.name = eol_ids.name" \
+ " INNER JOIN images ON eol_ids.id = images.eol_id WHERE" \
+ " nodes.name IN ({})".format(",".join(["?"] * len(nodeObjs)))
+ for (name, eolId) in cur.execute(query, list(nodeObjs.keys())):
+ nodeObjs[name]["imgName"] = str(eolId) + ".jpg"
+ # Get 'linked' images for unresolved names
+ unresolvedNames = [n for n in nodeObjs if nodeObjs[n]["imgName"] == None]
+ query = "SELECT name, eol_id from linked_imgs WHERE name IN ({})".format(",".join(["?"] * len(unresolvedNames)))
+ for (name, eolId) in cur.execute(query, unresolvedNames):
+ nodeObjs[name]["imgName"] = str(eolId) + ".jpg"
# Get preferred-name info
query = "SELECT name, alt_name FROM names WHERE pref_alt = 1 AND name IN ({})".format(queryParamStr)
for (name, altName) in cur.execute(query, names):
@@ -87,15 +69,6 @@ def lookupNodes(names, useReducedTree):
nodeObjs[name]["commonName"] = altName
#
return nodeObjs
-def getNodeImg(name):
- cur = dbCon.cursor()
- row = cur.execute("SELECT name, id FROM eol_ids WHERE name = ?", (name,)).fetchone()
- if row != None:
- eolId = row[1]
- filename = str(eolId) + ".jpg"
- if os.path.exists(imgDir + filename):
- return filename
- return None
def lookupName(name, useReducedTree):
cur = dbCon.cursor()
results = []
diff --git a/vite.config.js b/vite.config.js
index 25119dd..2e470fc 100644
--- a/vite.config.js
+++ b/vite.config.js
@@ -8,7 +8,7 @@ export default defineConfig({
'/data': 'http://localhost:8000',
},
watch: {
- ignored: ['**/imgsForReview/*', '**/imgsReviewed/*', '**/img/*']
+ ignored: ['**/backend', '**/public']
},
},
//server: {open: true} //open browser when dev server starts