diff options
Diffstat (limited to 'backend')
| -rw-r--r-- | backend/data/README.md | 2 | ||||
| -rwxr-xr-x | backend/data/genImgsForWeb.py | 9 | ||||
| -rwxr-xr-x | backend/data/genLinkedImgs.py | 4 | ||||
| -rwxr-xr-x | backend/data/trimTree.py | 2 | ||||
| -rwxr-xr-x | backend/server.py | 15 |
5 files changed, 18 insertions, 14 deletions
diff --git a/backend/data/README.md b/backend/data/README.md index 87cf58d..fb3ca16 100644 --- a/backend/data/README.md +++ b/backend/data/README.md @@ -66,8 +66,8 @@ data.db Tables - names: name TEXT, alt\_name TEXT, pref\_alt INT, src TEXT, PRIMARY KEY(name, alt\_name) - eol\_ids: id INT PRIMARY KEY, name TEXT - descs: name TEXT PRIMARY KEY, desc TEXT, redirected INT, wiki\_id INT, from\_dbp INT +- node\_imgs: name TEXT PRIMARY KEY, img\_id INT, src TEXT - images: id INT, src TEXT, url TEXT, license TEXT, artist TEXT, credit TEXT, PRIMARY KEY (id, src) -- node\_imgs: id TEXT PRIMARY KEY, img\_id INT, src TEXT - linked\_imgs: name TEXT PRIMARY KEY, otol\_id INT, otol\_id2 INT - r\_nodes: name TEXT PRIMARY KEY, tips INT - r\_edges: node TEXT, child TEXT, p\_support INT, PRIMARY KEY (node, child) diff --git a/backend/data/genImgsForWeb.py b/backend/data/genImgsForWeb.py index 2c4f58d..8bf1435 100755 --- a/backend/data/genImgsForWeb.py +++ b/backend/data/genImgsForWeb.py @@ -36,12 +36,12 @@ enwikiCur = enwikiCon.cursor() nodesDone = set() imgsDone = set() if dbCur.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='node_imgs'").fetchone() == None: - dbCur.execute("CREATE TABLE node_imgs (id TEXT PRIMARY KEY, img_id INT, src TEXT)") + dbCur.execute("CREATE TABLE node_imgs (name TEXT PRIMARY KEY, img_id INT, src TEXT)") dbCur.execute("CREATE TABLE images" \ " (id INT, src TEXT, url TEXT, license TEXT, artist TEXT, credit TEXT, PRIMARY KEY (id, src))") else: # Get existing node-associations - for (otolId,) in dbCur.execute("SELECT id from node_imgs"): + for (otolId,) in dbCur.execute("SELECT nodes.id FROM node_imgs INNER JOIN nodes ON node_imgs.name = nodes.name"): nodesDone.add(otolId) # And images for (imgId, imgSrc) in dbCur.execute("SELECT id, src from images"): @@ -85,6 +85,7 @@ with open(imgListFile) as file: print(f"ERROR: smartcrop had exit status {completedProcess.returncode}") break # Add entry to db + nodeName = dbCur.execute("SELECT name FROM nodes WHERE id = ?", (otolId,)) fromEol = imgPath.startswith("eol/") imgName = os.path.basename(os.path.normpath(imgPath)) # Get last path component imgName = os.path.splitext(imgName)[0] # Remove extension @@ -101,7 +102,7 @@ with open(imgListFile) as file: dbCur.execute("INSERT INTO images VALUES (?, ?, ?, ?, ?, ?)", (eolId, "eol", url, license, owner, "")) imgsDone.add((eolId, "eol")) - dbCur.execute("INSERT INTO node_imgs VALUES (?, ?, ?)", (otolId, eolId, "eol")) + dbCur.execute("INSERT INTO node_imgs VALUES (?, ?, ?)", (nodeName, eolId, "eol")) else: enwikiId = int(imgName) if (enwikiId, "enwiki") not in imgsDone: @@ -117,7 +118,7 @@ with open(imgListFile) as file: dbCur.execute("INSERT INTO images VALUES (?, ?, ?, ?, ?, ?)", (enwikiId, "enwiki", url, license, artist, credit)) imgsDone.add((enwikiId, "enwiki")) - dbCur.execute("INSERT INTO node_imgs VALUES (?, ?, ?)", (otolId, enwikiId, "enwiki")) + dbCur.execute("INSERT INTO node_imgs VALUES (?, ?, ?)", (nodeName, enwikiId, "enwiki")) # Close dbs dbCon.commit() dbCon.close() diff --git a/backend/data/genLinkedImgs.py b/backend/data/genLinkedImgs.py index 4ad1a8d..bcc91c5 100755 --- a/backend/data/genLinkedImgs.py +++ b/backend/data/genLinkedImgs.py @@ -22,7 +22,7 @@ dbCur.execute("CREATE TABLE linked_imgs (name TEXT PRIMARY KEY, otol_id TEXT, ot # Get nodes with images print("Getting nodes with images") resolvedNodes = {} # Will map node names to otol IDs with a usable image -query = "SELECT name, nodes.id FROM nodes INNER JOIN node_imgs ON nodes.id = node_imgs.id" +query = "SELECT nodes.name, nodes.id FROM nodes INNER JOIN node_imgs ON nodes.name = node_imgs.name" for (name, otolId) in dbCur.execute(query): resolvedNodes[name] = otolId print(f"Got {len(resolvedNodes)} nodes") @@ -82,7 +82,7 @@ print("Replacing images for compound-name nodes") iterNum = 0 for nodeName in processedNodes.keys(): iterNum += 1 - if iterNum % 1e3 == 0: + if iterNum % 1e4 == 0: print(f"At iteration {iterNum}") # match = compoundNameRegex.fullmatch(nodeName) diff --git a/backend/data/trimTree.py b/backend/data/trimTree.py index 1291c14..3c98ae8 100755 --- a/backend/data/trimTree.py +++ b/backend/data/trimTree.py @@ -24,7 +24,7 @@ print("\tFinding nodes with descs") for (name,) in dbCur.execute("SELECT name FROM descs"): nodesToKeep.add(name) print("\tFinding nodes with images") -for (name,) in dbCur.execute("SELECT name FROM nodes INNER JOIN node_imgs ON nodes.id = node_imgs.id"): +for (name,) in dbCur.execute("SELECT name FROM node_imgs"): nodesToKeep.add(name) print("\tFinding nodes in reduced-tree") for (name,) in dbCur.execute("SELECT name from r_nodes"): diff --git a/backend/server.py b/backend/server.py index 999da25..e252d5e 100755 --- a/backend/server.py +++ b/backend/server.py @@ -55,7 +55,8 @@ def lookupNodes(names, useReducedTree): nodeObjs[childName]["pSupport"] = (pSupport == 1) # Get image names idsToNames = {nodeObjs[n]["otolId"]: n for n in nodeObjs.keys()} - query = "SELECT id FROM node_imgs WHERE id IN ({})".format(",".join(["?"] * len(idsToNames))) + query = "SELECT nodes.id from nodes INNER JOIN node_imgs ON nodes.name = node_imgs.name" \ + " WHERE nodes.id IN ({})".format(",".join(["?"] * len(idsToNames))) for (otolId,) in cur.execute(query, list(idsToNames.keys())): nodeObjs[idsToNames[otolId]]["imgName"] = otolId + ".jpg" # Get 'linked' images for unresolved names @@ -136,8 +137,9 @@ def lookupNodeInfo(name, useReducedTree): otolId = nodeObj["imgName"][:-4] # Convert filename excluding .jpg suffix print(otolId) query = "SELECT images.id, images.src, url, license, artist, credit FROM" \ - " node_imgs INNER JOIN images ON node_imgs.img_id = images.id AND node_imgs.src = images.src" \ - " WHERE node_imgs.id = ?" + " nodes INNER JOIN node_imgs ON nodes.name = node_imgs.name" \ + " INNER JOIN images ON node_imgs.img_id = images.id AND node_imgs.src = images.src" \ + " WHERE nodes.id = ?" (imgId, imgSrc, url, license, artist, credit) = cur.execute(query, (otolId,)).fetchone() imgData = {"imgId": imgId, "imgSrc": imgSrc, "url": url, "license": license, "artist": artist, "credit": credit} @@ -145,9 +147,10 @@ def lookupNodeInfo(name, useReducedTree): # Get info for compound-image parts imgData = [None, None] idsToLookup = [n[:-4] for n in nodeObj["imgName"] if n != None] - query = "SELECT node_imgs.id, images.id, images.src, url, license, artist, credit FROM" \ - " node_imgs INNER JOIN images ON node_imgs.img_id = images.id AND node_imgs.src = images.src" \ - " WHERE node_imgs.id IN ({})".format(",".join(["?"] * len(idsToLookup))) + query = "SELECT nodes.id, images.id, images.src, url, license, artist, credit FROM" \ + " nodes INNER JOIN node_imgs ON nodes.name = node_imgs.name" \ + " INNER JOIN images ON node_imgs.img_id = images.id AND node_imgs.src = images.src" \ + " WHERE nodes.id IN ({})".format(",".join(["?"] * len(idsToLookup))) for (imgOtolId, imgId, imgSrc, url, license, artist, credit) in cur.execute(query, idsToLookup): imgDataVal = {"imgId": imgId, "imgSrc": imgSrc, "url": url, "license": license, "artist": artist, "credit": credit} |
