diff options
Diffstat (limited to 'backend/data')
| -rw-r--r-- | backend/data/README.md | 2 | ||||
| -rwxr-xr-x | backend/data/genImgsForWeb.py | 9 | ||||
| -rwxr-xr-x | backend/data/genLinkedImgs.py | 4 | ||||
| -rwxr-xr-x | backend/data/trimTree.py | 2 |
4 files changed, 9 insertions, 8 deletions
diff --git a/backend/data/README.md b/backend/data/README.md index 87cf58d..fb3ca16 100644 --- a/backend/data/README.md +++ b/backend/data/README.md @@ -66,8 +66,8 @@ data.db Tables - names: name TEXT, alt\_name TEXT, pref\_alt INT, src TEXT, PRIMARY KEY(name, alt\_name) - eol\_ids: id INT PRIMARY KEY, name TEXT - descs: name TEXT PRIMARY KEY, desc TEXT, redirected INT, wiki\_id INT, from\_dbp INT +- node\_imgs: name TEXT PRIMARY KEY, img\_id INT, src TEXT - images: id INT, src TEXT, url TEXT, license TEXT, artist TEXT, credit TEXT, PRIMARY KEY (id, src) -- node\_imgs: id TEXT PRIMARY KEY, img\_id INT, src TEXT - linked\_imgs: name TEXT PRIMARY KEY, otol\_id INT, otol\_id2 INT - r\_nodes: name TEXT PRIMARY KEY, tips INT - r\_edges: node TEXT, child TEXT, p\_support INT, PRIMARY KEY (node, child) diff --git a/backend/data/genImgsForWeb.py b/backend/data/genImgsForWeb.py index 2c4f58d..8bf1435 100755 --- a/backend/data/genImgsForWeb.py +++ b/backend/data/genImgsForWeb.py @@ -36,12 +36,12 @@ enwikiCur = enwikiCon.cursor() nodesDone = set() imgsDone = set() if dbCur.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='node_imgs'").fetchone() == None: - dbCur.execute("CREATE TABLE node_imgs (id TEXT PRIMARY KEY, img_id INT, src TEXT)") + dbCur.execute("CREATE TABLE node_imgs (name TEXT PRIMARY KEY, img_id INT, src TEXT)") dbCur.execute("CREATE TABLE images" \ " (id INT, src TEXT, url TEXT, license TEXT, artist TEXT, credit TEXT, PRIMARY KEY (id, src))") else: # Get existing node-associations - for (otolId,) in dbCur.execute("SELECT id from node_imgs"): + for (otolId,) in dbCur.execute("SELECT nodes.id FROM node_imgs INNER JOIN nodes ON node_imgs.name = nodes.name"): nodesDone.add(otolId) # And images for (imgId, imgSrc) in dbCur.execute("SELECT id, src from images"): @@ -85,6 +85,7 @@ with open(imgListFile) as file: print(f"ERROR: smartcrop had exit status {completedProcess.returncode}") break # Add entry to db + nodeName = dbCur.execute("SELECT name FROM nodes WHERE id = ?", (otolId,)) fromEol = imgPath.startswith("eol/") imgName = os.path.basename(os.path.normpath(imgPath)) # Get last path component imgName = os.path.splitext(imgName)[0] # Remove extension @@ -101,7 +102,7 @@ with open(imgListFile) as file: dbCur.execute("INSERT INTO images VALUES (?, ?, ?, ?, ?, ?)", (eolId, "eol", url, license, owner, "")) imgsDone.add((eolId, "eol")) - dbCur.execute("INSERT INTO node_imgs VALUES (?, ?, ?)", (otolId, eolId, "eol")) + dbCur.execute("INSERT INTO node_imgs VALUES (?, ?, ?)", (nodeName, eolId, "eol")) else: enwikiId = int(imgName) if (enwikiId, "enwiki") not in imgsDone: @@ -117,7 +118,7 @@ with open(imgListFile) as file: dbCur.execute("INSERT INTO images VALUES (?, ?, ?, ?, ?, ?)", (enwikiId, "enwiki", url, license, artist, credit)) imgsDone.add((enwikiId, "enwiki")) - dbCur.execute("INSERT INTO node_imgs VALUES (?, ?, ?)", (otolId, enwikiId, "enwiki")) + dbCur.execute("INSERT INTO node_imgs VALUES (?, ?, ?)", (nodeName, enwikiId, "enwiki")) # Close dbs dbCon.commit() dbCon.close() diff --git a/backend/data/genLinkedImgs.py b/backend/data/genLinkedImgs.py index 4ad1a8d..bcc91c5 100755 --- a/backend/data/genLinkedImgs.py +++ b/backend/data/genLinkedImgs.py @@ -22,7 +22,7 @@ dbCur.execute("CREATE TABLE linked_imgs (name TEXT PRIMARY KEY, otol_id TEXT, ot # Get nodes with images print("Getting nodes with images") resolvedNodes = {} # Will map node names to otol IDs with a usable image -query = "SELECT name, nodes.id FROM nodes INNER JOIN node_imgs ON nodes.id = node_imgs.id" +query = "SELECT nodes.name, nodes.id FROM nodes INNER JOIN node_imgs ON nodes.name = node_imgs.name" for (name, otolId) in dbCur.execute(query): resolvedNodes[name] = otolId print(f"Got {len(resolvedNodes)} nodes") @@ -82,7 +82,7 @@ print("Replacing images for compound-name nodes") iterNum = 0 for nodeName in processedNodes.keys(): iterNum += 1 - if iterNum % 1e3 == 0: + if iterNum % 1e4 == 0: print(f"At iteration {iterNum}") # match = compoundNameRegex.fullmatch(nodeName) diff --git a/backend/data/trimTree.py b/backend/data/trimTree.py index 1291c14..3c98ae8 100755 --- a/backend/data/trimTree.py +++ b/backend/data/trimTree.py @@ -24,7 +24,7 @@ print("\tFinding nodes with descs") for (name,) in dbCur.execute("SELECT name FROM descs"): nodesToKeep.add(name) print("\tFinding nodes with images") -for (name,) in dbCur.execute("SELECT name FROM nodes INNER JOIN node_imgs ON nodes.id = node_imgs.id"): +for (name,) in dbCur.execute("SELECT name FROM node_imgs"): nodesToKeep.add(name) print("\tFinding nodes in reduced-tree") for (name,) in dbCur.execute("SELECT name from r_nodes"): |
