aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore2
-rwxr-xr-xbackend/cgi-bin/data.py59
-rw-r--r--backend/data/README.md23
-rwxr-xr-xbackend/data/genReducedTreeData.py177
-rwxr-xr-xbackend/data/genReducedTrees.py324
-rwxr-xr-xbackend/data/trimTree.py143
-rw-r--r--src/App.vue10
-rw-r--r--src/components/SearchModal.vue4
-rw-r--r--src/components/SettingsModal.vue15
-rw-r--r--src/components/TileInfoModal.vue2
-rw-r--r--src/lib.ts4
11 files changed, 388 insertions, 375 deletions
diff --git a/.gitignore b/.gitignore
index 789dadd..99bd1e5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -28,5 +28,5 @@
/backend/data/pickedEnwikiNamesToSkip.txt
/backend/data/pickedDbpLabels.txt
/backend/data/pickedEnwikiLabels.txt
-/backend/data/pickedReducedNodes.txt
+/backend/data/pickedNodes.txt
/backend/data/pickedNames.txt
diff --git a/backend/cgi-bin/data.py b/backend/cgi-bin/data.py
index b152c14..3815715 100755
--- a/backend/cgi-bin/data.py
+++ b/backend/cgi-bin/data.py
@@ -24,8 +24,10 @@ Query parameters:
- toroot: Used with type=node, and causes inclusion of nodes upward to the root, and their children.
If specified, the value should be 'true'.
- limit: Used with type=sugg to specify the max number of suggestions.
-- rtree: Specifies whether the reduced tree should be used.
- If specified, the value should be 'true'.
+- tree: Specifies which tree should be used.
+ May be 'trimmed', 'images', or 'picked', corresponding to the
+ weakly-trimmed, images-only, and picked-nodes trees. The default
+ is 'images'.
"""
if len(sys.argv) > 1:
print(usageInfo, file=sys.stderr)
@@ -81,12 +83,13 @@ class InfoResponse:
self.subNodesInfo = subNodesInfo # [] | [NodeInfo, NodeInfo]
# For data lookup
-def lookupNodes(names, useReducedTree, dbCur):
+def lookupNodes(names, tree, dbCur):
" For a set of node names, returns a name-to-TolNode map that describes those nodes "
# Get node info
nameToNodes = {}
- nodesTable = "nodes" if not useReducedTree else "r_nodes"
- edgesTable = "edges" if not useReducedTree else "r_edges"
+ tblSuffix = "t" if tree == "trimmed" else "i" if tree == "images" else "p"
+ nodesTable = f"nodes_{tblSuffix}"
+ edgesTable = f"edges_{tblSuffix}"
queryParamStr = ",".join(["?"] * len(names))
query = f"SELECT name, id, tips FROM {nodesTable} WHERE name IN ({queryParamStr})"
for (nodeName, otolId, tips) in dbCur.execute(query, names):
@@ -133,23 +136,20 @@ def lookupNodes(names, useReducedTree, dbCur):
nameToNodes[name].commonName = altName
#
return nameToNodes
-def lookupSuggs(searchStr, suggLimit, useReducedTree, dbCur):
+def lookupSuggs(searchStr, suggLimit, tree, dbCur):
" For a search string, returns a SearchSuggResponse describing search suggestions "
+ time1 = time.time()
results = []
hasMore = False
# Get node names and alt-names
query1, query2 = (None, None)
- if not useReducedTree:
- query1 = "SELECT DISTINCT name FROM nodes" \
- " WHERE name LIKE ? AND name NOT LIKE '[%' ORDER BY length(name) LIMIT ?"
- query2 = "SELECT DISTINCT alt_name, name FROM names" \
- " WHERE alt_name LIKE ? ORDER BY length(alt_name) LIMIT ?"
- else:
- query1 = "SELECT DISTINCT name FROM r_nodes" \
- " WHERE name LIKE ? AND name NOT LIKE '[%' ORDER BY length(name) LIMIT ?"
- query2 = "SELECT DISTINCT alt_name, names.name FROM" \
- " names INNER JOIN r_nodes ON names.name = r_nodes.name" \
- " WHERE alt_name LIKE ? ORDER BY length(alt_name) LIMIT ?"
+ tblSuffix = "t" if tree == "trimmed" else "i" if tree == "images" else "p"
+ nodesTable = f"nodes_{tblSuffix}"
+ query1 = f"SELECT DISTINCT name FROM {nodesTable}" \
+ f" WHERE name LIKE ? AND name NOT LIKE '[%' ORDER BY length(name) LIMIT ?"
+ query2 = f"SELECT DISTINCT alt_name, names.name FROM" \
+ f" names INNER JOIN {nodesTable} ON names.name = {nodesTable}.name" \
+ f" WHERE alt_name LIKE ? ORDER BY length(alt_name) LIMIT ?"
# Join results, and get shortest
suggs = []
for (nodeName,) in dbCur.execute(query1, (searchStr + "%", suggLimit + 1)):
@@ -178,11 +178,12 @@ def lookupSuggs(searchStr, suggLimit, useReducedTree, dbCur):
if len(suggs) > suggLimit:
hasMore = True
#
+ print(time.time() - time1, file=sys.stderr)
return SearchSuggResponse(results, hasMore)
-def lookupInfo(name, useReducedTree, dbCur):
+def lookupInfo(name, tree, dbCur):
" For a node name, returns an InfoResponse, or None "
# Get node info
- nameToNodes = lookupNodes([name], useReducedTree, dbCur)
+ nameToNodes = lookupNodes([name], tree, dbCur)
tolNode = nameToNodes[name] if name in nameToNodes else None
if tolNode == None:
return None
@@ -190,7 +191,7 @@ def lookupInfo(name, useReducedTree, dbCur):
match = re.fullmatch(r"\[(.+) \+ (.+)]", name)
subNames = [match.group(1), match.group(2)] if match != None else []
if len(subNames) > 0:
- nameToSubNodes = lookupNodes(subNames, useReducedTree, dbCur)
+ nameToSubNodes = lookupNodes(subNames, tree, dbCur)
if len(nameToSubNodes) < 2:
print(f"ERROR: Unable to find sub-names entries for {name}", file=sys.stderr)
return None
@@ -247,15 +248,19 @@ def handleReq(dbCur):
query = "SELECT name FROM nodes LEFT JOIN edges ON nodes.name = edges.child WHERE edges.parent IS NULL LIMIT 1"
(name,) = dbCur.execute(query).fetchone()
reqType = queryDict["type"][0] if "type" in queryDict else None
- useReducedTree = "rtree" in queryDict and queryDict["rtree"][0] == "true"
+ tree = queryDict["tree"][0] if "tree" in queryDict else "images"
+ # Check for valid 'tree'
+ if tree != None and re.fullmatch(r"trimmed|images|picked", tree) == None:
+ respondJson(None)
+ return
# Get data of requested type
if reqType == "node":
toroot = "toroot" in queryDict and queryDict["toroot"][0] == "true"
if not toroot:
- tolNodes = lookupNodes([name], useReducedTree, dbCur)
+ tolNodes = lookupNodes([name], tree, dbCur)
if len(tolNodes) > 0:
tolNode = tolNodes[name]
- childNodeObjs = lookupNodes(tolNode.children, useReducedTree, dbCur)
+ childNodeObjs = lookupNodes(tolNode.children, tree, dbCur)
childNodeObjs[name] = tolNode
respondJson(childNodeObjs)
return
@@ -264,7 +269,7 @@ def handleReq(dbCur):
ranOnce = False
while True:
# Get node
- tolNodes = lookupNodes([name], useReducedTree, dbCur)
+ tolNodes = lookupNodes([name], tree, dbCur)
if len(tolNodes) == 0:
if not ranOnce:
respondJson(results)
@@ -281,7 +286,7 @@ def handleReq(dbCur):
for childName in tolNode.children:
if childName not in results:
childNamesToAdd.append(childName)
- childNodeObjs = lookupNodes(childNamesToAdd, useReducedTree, dbCur)
+ childNodeObjs = lookupNodes(childNamesToAdd, tree, dbCur)
results.update(childNodeObjs)
# Check if root
if tolNode.parent == None:
@@ -302,10 +307,10 @@ def handleReq(dbCur):
print(f"INFO: Invalid limit {suggLimit}", file=sys.stderr)
# Get search suggestions
if not invalidLimit:
- respondJson(lookupSuggs(name, suggLimit, useReducedTree, dbCur))
+ respondJson(lookupSuggs(name, suggLimit, tree, dbCur))
return
elif reqType == "info":
- infoResponse = lookupInfo(name, useReducedTree, dbCur)
+ infoResponse = lookupInfo(name, tree, dbCur)
if infoResponse != None:
respondJson(infoResponse)
return
diff --git a/backend/data/README.md b/backend/data/README.md
index 776ff17..13aeb89 100644
--- a/backend/data/README.md
+++ b/backend/data/README.md
@@ -38,13 +38,11 @@ This directory holds files used to generate data.db, which contains tree-of-life
Associates a node with an image from another node.
`otol_ids` can be an otol ID, or two comma-separated otol IDs or empty strings.
The latter is used for compound nodes.
-## Reduced-tree data
-- `r_nodes` <br>
- Format: `name TEXT PRIMARY KEY, tips INT` <br>
- Like `nodes`, but for a reduced tree.
-- `r_edges` <br>
- Format: `node TEXT, child TEXT, p_support INT, PRIMARY KEY (node, child)` <br>
- Like `edges` but for a reduced tree.
+## Reduced tree data
+- `nodes_t`, `nodes_i`, `nodes_p` <br>
+ These are like `nodes`, but describe the nodes for various reduced trees.
+- `edges_t`, `edges_i`, `edges_p` <br>
+ Like `edges` but for reduced trees.
# Generating the Database
@@ -147,9 +145,8 @@ Some of the python scripts require third-party packages:
- pickedNames.txt: Has lines of the form `nodeName1|altName1|prefAlt1`.
These correspond to entries in the `names` table. `prefAlt` should be 1 or 0.
A line like `name1|name1|1` causes a node to have no preferred alt-name.
-3. Run genReducedTreeData.py, which generates a second, reduced version of the tree,
- adding the `r_nodes` and `r_edges` tables, using `nodes` and `names`. Reads from
- pickedReducedNodes.txt, which lists names of nodes that must be included (1 per line).
-4. Optionally run trimTree.py, which tries to remove some 'low significance' nodes,
- for the sake of performance and content-relevance. Otherwise, some nodes may have
- over 10k children, which can take a while to render (took over a minute in testing).
+3. Run genReducedTrees.py, which generates multiple reduced versions of the tree,
+ adding the `nodes_*` and `edges_*` tables, using `nodes` and `names`. Reads from
+ pickedNodes.txt, which lists names of nodes that must be included (1 per line).
+ The original tree isn't used for web-queries, as some nodes would have over
+ 10k children, which can take a while to render (took over a minute in testing).
diff --git a/backend/data/genReducedTreeData.py b/backend/data/genReducedTreeData.py
deleted file mode 100755
index 2e56bba..0000000
--- a/backend/data/genReducedTreeData.py
+++ /dev/null
@@ -1,177 +0,0 @@
-#!/usr/bin/python3
-
-import sys, os.path, re
-import json, sqlite3
-
-usageInfo = f"""
-Usage: {sys.argv[0]}
-
-Creates a reduced version of the tree in the database.
-Reads a subset of the node names from a file, and creates a
-minimal tree that contains them, possibly with a few extras.
-"""
-if len(sys.argv) > 1:
- print(usageInfo, file=sys.stderr)
- sys.exit(1)
-
-dbFile = "data.db"
-nodeNamesFile = "pickedReducedNodes.txt"
-minimalNames = set()
-nodeMap = {} # Maps node names to node objects
-PREF_NUM_CHILDREN = 3 # Attempt inclusion of children up to this limit
-compNameRegex = re.compile(r"\[.+ \+ .+]") # Used to recognise composite nodes
-
-class Node:
- " Represents a node from the database "
- def __init__(self, id, children, parent, tips, pSupport):
- self.id = id
- self.children = children
- self.parent = parent
- self.tips = tips
- self.pSupport = pSupport
-
-print("Opening database")
-dbCon = sqlite3.connect(dbFile)
-dbCur = dbCon.cursor()
-
-print("Getting minimal name set")
-iterNum = 0
-with open(nodeNamesFile) as file:
- for line in file:
- iterNum += 1
- if iterNum % 100 == 0:
- print(f"At iteration {iterNum}")
- #
- name = line.rstrip()
- row = dbCur.execute("SELECT name from nodes WHERE name = ?", (name,)).fetchone()
- if row == None:
- row = dbCur.execute("SELECT name from names WHERE alt_name = ?", (name,)).fetchone()
- if row != None:
- minimalNames.add(row[0])
-if len(minimalNames) == 0:
- print("No names found")
- sys.exit(0)
-print(f"Result has {len(minimalNames)} names")
-
-print("Getting ancestor nodes")
-rootName = None
-iterNum = 0
-for name in minimalNames:
- iterNum += 1
- if iterNum % 100 == 0:
- print(f"At iteration {iterNum}")
- #
- prevName = None
- while name != None:
- if name not in nodeMap:
- (id, tips) = dbCur.execute("SELECT id, tips from nodes where name = ?", (name,)).fetchone()
- row = dbCur.execute("SELECT parent, p_support from edges where child = ?", (name,)).fetchone()
- parent = None if row == None or row[0] == "" else row[0]
- pSupport = row == None or row[1] == 1
- children = [] if prevName == None else [prevName]
- nodeMap[name] = Node(id, children, parent, 0, pSupport)
- prevName = name
- name = parent
- else:
- if prevName != None:
- nodeMap[name].children.append(prevName)
- break
- if name == None:
- rootName = prevName
-print(f"Result has {len(nodeMap)} nodes")
-
-print("Merging-upward composite nodes")
-namesToRemove = set()
-for (name, node) in nodeMap.items():
- parent = node.parent
- if parent != None and compNameRegex.fullmatch(name) != None:
- # Connect children to parent
- nodeMap[parent].children.remove(name)
- nodeMap[parent].children.extend(node.children)
- for n in node.children:
- nodeMap[n].parent = parent
- nodeMap[n].pSupport &= node.pSupport
- # Remember for removal
- namesToRemove.add(name)
-for name in namesToRemove:
- del nodeMap[name]
-print(f"Result has {len(nodeMap)} nodes")
-
-print("Removing 'chain collapsible' nodes")
-namesToRemove2 = set()
-for (name, node) in nodeMap.items():
- hasOneChild = len(node.children) == 1
- isOnlyChild = node.parent != None and len(nodeMap[node.parent].children) == 1
- if name not in minimalNames and (hasOneChild or isOnlyChild):
- parent = node.parent
- # Connect parent and children
- nodeMap[parent].children.remove(name)
- nodeMap[parent].children.extend(node.children)
- for n in node.children:
- nodeMap[n].parent = parent
- nodeMap[n].pSupport &= node.pSupport
- # Remember for removal
- namesToRemove2.add(name)
-for name in namesToRemove2:
- del nodeMap[name]
- namesToRemove.add(name)
-print(f"Result has {len(nodeMap)} nodes")
-
-print("Adding some additional nearby children")
-namesToAdd = []
-iterNum = 0
-for (name, node) in nodeMap.items():
- iterNum += 1
- if iterNum % 100 == 0:
- print(f"At iteration {iterNum}")
- #
- numChildren = len(node.children)
- if numChildren < PREF_NUM_CHILDREN:
- children = [row[0] for row in dbCur.execute("SELECT child FROM edges where parent = ?", (name,))]
- newChildren = []
- for n in children:
- if n in nodeMap or n in namesToRemove:
- continue
- if compNameRegex.fullmatch(name) != None:
- continue
- if dbCur.execute("SELECT name from node_imgs WHERE name = ?", (n,)).fetchone() == None:
- continue
- if dbCur.execute("SELECT name from linked_imgs WHERE name = ?", (n,)).fetchone() == None:
- continue
- newChildren.append(n)
- newChildNames = newChildren[:max(0, PREF_NUM_CHILDREN - numChildren)]
- node.children.extend(newChildNames)
- namesToAdd.extend(newChildNames)
-for name in namesToAdd:
- parent, pSupport = dbCur.execute("SELECT parent, p_support from edges WHERE child = ?", (name,)).fetchone()
- (id,) = dbCur.execute("SELECT id FROM nodes WHERE name = ?", (name,)).fetchone()
- parent = None if parent == "" else parent
- nodeMap[name] = Node(id, [], parent, 0, pSupport == 1)
-print(f"Result has {len(nodeMap)} nodes")
-
-print("Setting 'tips' values")
-def setTips(nodeName):
- node = nodeMap[nodeName]
- if len(node.children) == 0:
- node.tips = 1
- return 1
- tips = sum([setTips(childName) for childName in node.children])
- node.tips = tips
- return tips
-setTips(rootName)
-
-print("Adding reduced tree to database")
-dbCur.execute("CREATE TABLE r_nodes (name TEXT PRIMARY KEY, id TEXT UNIQUE, tips INT)")
-dbCur.execute("CREATE INDEX r_nodes_idx_nc ON r_nodes(name COLLATE NOCASE)")
-dbCur.execute("CREATE TABLE r_edges (parent TEXT, child TEXT, p_support INT, PRIMARY KEY (parent, child))")
-dbCur.execute("CREATE INDEX r_edges_child_idx ON r_edges(child)")
-for (name, node) in nodeMap.items():
- parentName = "" if node.parent == None else node.parent
- dbCur.execute("INSERT INTO r_nodes VALUES (?, ?, ?)", (name, node.id, node.tips))
- for childName in node.children:
- pSupport = 1 if nodeMap[childName].pSupport else 0
- dbCur.execute("INSERT INTO r_edges VALUES (?, ?, ?)", (name, childName, pSupport))
-
-print("Closing database")
-dbCon.commit()
-dbCon.close()
diff --git a/backend/data/genReducedTrees.py b/backend/data/genReducedTrees.py
new file mode 100755
index 0000000..17a66f2
--- /dev/null
+++ b/backend/data/genReducedTrees.py
@@ -0,0 +1,324 @@
+#!/usr/bin/python3
+
+import sys, os.path, re
+import json, sqlite3
+
+usageInfo = f"""
+Usage: {sys.argv[0]} [tree1]
+
+Creates reduced versions of the tree in the database:
+- A 'picked nodes' tree:
+ Created from a minimal set of node names read from a file,
+ possibly with some extra randmly-picked children.
+- An 'images only' tree:
+ Created by removing nodes without an image or presence in the
+ 'picked' tree.
+- A 'weakly trimmed' tree:
+ Created by removing nodes that lack an image or description, or
+ presence in the 'picked' tree. And, for nodes with 'many' children,
+ removing some more, despite any node descriptions.
+
+If tree1 is specified, as 'picked', 'images', or 'trimmed', only that
+tree is generated.
+"""
+if len(sys.argv) > 2 or len(sys.argv) == 2 and re.fullmatch(r"picked|images|trimmed", sys.argv[1]) == None:
+ print(usageInfo, file=sys.stderr)
+ sys.exit(1)
+
+tree = sys.argv[1] if len(sys.argv) > 1 else None
+dbFile = "data.db"
+pickedNodesFile = "pickedNodes.txt"
+COMP_NAME_REGEX = re.compile(r"\[.+ \+ .+]") # Used to recognise composite nodes
+PREF_NUM_CHILDREN = 3 # When generating the picked-nodes tree, include extra children up to this limit
+SOFT_CHILD_LIMIT = 500 # When generating the weakly-trimmed tree, this indicates when a node has 'many' children
+
+class Node:
+ def __init__(self, id, children, parent, tips, pSupport):
+ self.id = id
+ self.children = children
+ self.parent = parent
+ self.tips = tips
+ self.pSupport = pSupport
+
+print("Opening database")
+dbCon = sqlite3.connect(dbFile)
+dbCur = dbCon.cursor()
+
+def genPickedNodeTree(dbCur, pickedNames, rootName):
+ global COMP_NAME_REGEX, PREF_NUM_CHILDREN
+ nodeMap = {} # Maps node names to Nodes
+ print("Getting ancestors")
+ nodeMap = genNodeMap(dbCur, pickedNames, 100)
+ print(f"Result has {len(nodeMap)} nodes")
+ print("Removing composite nodes")
+ removedNames = removeCompositeNodes(nodeMap)
+ print(f"Result has {len(nodeMap)} nodes")
+ print("Removing 'chain collapsible' nodes")
+ temp = removeCollapsibleNodes(nodeMap, pickedNames)
+ removedNames.update(temp)
+ print(f"Result has {len(nodeMap)} nodes")
+ print("Adding some additional nearby children")
+ namesToAdd = []
+ iterNum = 0
+ for (name, node) in nodeMap.items():
+ iterNum += 1
+ if iterNum % 100 == 0:
+ print(f"At iteration {iterNum}")
+ #
+ numChildren = len(node.children)
+ if numChildren < PREF_NUM_CHILDREN:
+ children = [row[0] for row in dbCur.execute("SELECT child FROM edges where parent = ?", (name,))]
+ newChildren = []
+ for n in children:
+ if n in nodeMap or n in removedNames:
+ continue
+ if COMP_NAME_REGEX.fullmatch(n) != None:
+ continue
+ if dbCur.execute("SELECT name from node_imgs WHERE name = ?", (n,)).fetchone() == None and \
+ dbCur.execute("SELECT name from linked_imgs WHERE name = ?", (n,)).fetchone() == None:
+ continue
+ newChildren.append(n)
+ newChildNames = newChildren[:(PREF_NUM_CHILDREN - numChildren)]
+ node.children.extend(newChildNames)
+ namesToAdd.extend(newChildNames)
+ for name in namesToAdd:
+ parent, pSupport = dbCur.execute("SELECT parent, p_support from edges WHERE child = ?", (name,)).fetchone()
+ (id,) = dbCur.execute("SELECT id FROM nodes WHERE name = ?", (name,)).fetchone()
+ parent = None if parent == "" else parent
+ nodeMap[name] = Node(id, [], parent, 0, pSupport == 1)
+ print(f"Result has {len(nodeMap)} nodes")
+ print("Setting 'tips' values")
+ updateTips(rootName, nodeMap)
+ print("Creating table")
+ addTreeTables(nodeMap, dbCur, "p")
+def genImagesOnlyTree(dbCur, nodesWithImgOrPicked, rootName):
+ print("Getting ancestors")
+ nodeMap = genNodeMap(dbCur, nodesWithImgOrPicked, 1e4)
+ print(f"Result has {len(nodeMap)} nodes")
+ print("Removing 'collapsible' nodes")
+ removeCollapsibleNodes(nodeMap, nodesWithImgOrPicked)
+ print(f"Result has {len(nodeMap)} nodes")
+ print(f"Setting 'tips' values")
+ updateTips(rootName, nodeMap)
+ print("Creating table")
+ addTreeTables(nodeMap, dbCur, "i")
+def genWeaklyTrimmedTree(dbCur, nodesWithImgDescOrPicked, nodesWithImgOrPicked, rootName):
+ global SOFT_CHILD_LIMIT
+ print("Getting ancestors")
+ nodeMap = genNodeMap(dbCur, nodesWithImgDescOrPicked, 1e5)
+ print(f"Result has {len(nodeMap)} nodes")
+ print("Getting nodes to 'strongly keep'")
+ iterNum = 0
+ nodesFromImgOrPicked = set()
+ for name in nodesWithImgOrPicked:
+ iterNum += 1
+ if iterNum % 1e4 == 0:
+ print(f"At iteration {iterNum}")
+ #
+ while name != None:
+ if name not in nodesFromImgOrPicked:
+ nodesFromImgOrPicked.add(name)
+ name = nodeMap[name].parent
+ else:
+ break
+ print(f"Node set has {len(nodesFromImgOrPicked)} nodes")
+ print("Removing 'collapsible' nodes")
+ removeCollapsibleNodes(nodeMap, nodesWithImgDescOrPicked)
+ print(f"Result has {len(nodeMap)} nodes")
+ print(f"Finding nodes with 'many' children")
+ namesToRemove = set()
+ iterNum = 0
+ def findTrimmables(nodeName):
+ nonlocal nodeMap, nodesFromImgOrPicked, iterNum
+ iterNum += 1
+ if iterNum % 1e5 == 0:
+ print(f"At iteration {iterNum}")
+ #
+ node = nodeMap[nodeName]
+ if len(node.children) > SOFT_CHILD_LIMIT:
+ numToTrim = len(node.children) - SOFT_CHILD_LIMIT
+ # Try removing weakly-kept nodes, preferring those with less tips
+ candidatesToTrim = [n for n in node.children if n not in nodesFromImgOrPicked]
+ childToTips = {n: nodeMap[n].tips for n in candidatesToTrim}
+ candidatesToTrim.sort(key=lambda n: childToTips[n], reverse=True)
+ childrenToRemove = set(candidatesToTrim[-numToTrim:])
+ node.children = [n for n in node.children if n not in childrenToRemove]
+ # Mark nodes for deletion
+ for n in childrenToRemove:
+ markForRemoval(n)
+ # Recurse on children
+ for n in node.children:
+ findTrimmables(n)
+ def markForRemoval(nodeName):
+ nonlocal nodeMap, namesToRemove
+ namesToRemove.add(nodeName)
+ for child in nodeMap[nodeName].children:
+ markForRemoval(child)
+ findTrimmables(rootName)
+ print(f"Deleting {len(namesToRemove)} nodes")
+ for nodeName in namesToRemove:
+ del nodeMap[nodeName]
+ print(f"Setting 'tips' values")
+ updateTips(rootName, nodeMap)
+ print("Creating table")
+ addTreeTables(nodeMap, dbCur, "t")
+# Helper functions
+def genNodeMap(dbCur, nameSet, itersBeforePrint = 1):
+ " Returns a subtree that includes nodes in 'nameSet', as a name-to-Node map "
+ nodeMap = {}
+ iterNum = 0
+ for name in nameSet:
+ iterNum += 1
+ if iterNum % itersBeforePrint == 0:
+ print(f"At iteration {iterNum}")
+ #
+ prevName = None
+ while name != None:
+ if name not in nodeMap:
+ # Add node
+ (id, tips) = dbCur.execute("SELECT id, tips from nodes where name = ?", (name,)).fetchone()
+ row = dbCur.execute("SELECT parent, p_support from edges where child = ?", (name,)).fetchone()
+ parent = None if row == None or row[0] == "" else row[0]
+ pSupport = row == None or row[1] == 1
+ children = [] if prevName == None else [prevName]
+ nodeMap[name] = Node(id, children, parent, 0, pSupport)
+ # Iterate to parent
+ prevName = name
+ name = parent
+ else:
+ # Just add as child
+ if prevName != None:
+ nodeMap[name].children.append(prevName)
+ break
+ return nodeMap
+def removeCompositeNodes(nodeMap):
+ " Given a tree, removes composite-name nodes, and returns the removed nodes' names "
+ global COMP_NAME_REGEX
+ namesToRemove = set()
+ for (name, node) in nodeMap.items():
+ parent = node.parent
+ if parent != None and COMP_NAME_REGEX.fullmatch(name) != None:
+ # Connect children to parent
+ nodeMap[parent].children.remove(name)
+ nodeMap[parent].children.extend(node.children)
+ for n in node.children:
+ nodeMap[n].parent = parent
+ nodeMap[n].pSupport &= node.pSupport
+ # Remember for removal
+ namesToRemove.add(name)
+ for name in namesToRemove:
+ del nodeMap[name]
+ return namesToRemove
+def removeCollapsibleNodes(nodeMap, nodesToKeep = {}):
+ """ Given a tree, removes single-child parents, then only-childs,
+ with given exceptions, and returns the set of removed nodes' names """
+ namesToRemove = set()
+ # Remove single-child parents
+ for (name, node) in nodeMap.items():
+ if len(node.children) == 1 and node.parent != None and name not in nodesToKeep:
+ # Connect parent and children
+ parent = node.parent
+ child = node.children[0]
+ nodeMap[parent].children.remove(name)
+ nodeMap[parent].children.append(child)
+ nodeMap[child].parent = parent
+ nodeMap[child].pSupport &= node.pSupport
+ # Remember for removal
+ namesToRemove.add(name)
+ for name in namesToRemove:
+ del nodeMap[name]
+ # Remove only-childs (not redundant because 'nodesToKeep' can cause single-child parents to be kept)
+ namesToRemove.clear()
+ for (name, node) in nodeMap.items():
+ isOnlyChild = node.parent != None and len(nodeMap[node.parent].children) == 1
+ if isOnlyChild and name not in nodesToKeep:
+ # Connect parent and children
+ parent = node.parent
+ nodeMap[parent].children = node.children
+ for n in node.children:
+ nodeMap[n].parent = parent
+ nodeMap[n].pSupport &= node.pSupport
+ # Remember for removal
+ namesToRemove.add(name)
+ for name in namesToRemove:
+ del nodeMap[name]
+ #
+ return namesToRemove
+def updateTips(nodeName, nodeMap):
+ " Updates the 'tips' values for a node and it's descendants, returning the node's new 'tips' value "
+ node = nodeMap[nodeName]
+ tips = sum([updateTips(childName, nodeMap) for childName in node.children])
+ tips = max(1, tips)
+ node.tips = tips
+ return tips
+def addTreeTables(nodeMap, dbCur, suffix):
+ " Adds a tree to the database, as tables nodes_X and edges_X, where X is the given suffix "
+ nodesTbl = f"nodes_{suffix}"
+ edgesTbl = f"edges_{suffix}"
+ dbCur.execute(f"CREATE TABLE {nodesTbl} (name TEXT PRIMARY KEY, id TEXT UNIQUE, tips INT)")
+ dbCur.execute(f"CREATE INDEX {nodesTbl}_idx_nc ON {nodesTbl}(name COLLATE NOCASE)")
+ dbCur.execute(f"CREATE TABLE {edgesTbl} (parent TEXT, child TEXT, p_support INT, PRIMARY KEY (parent, child))")
+ dbCur.execute(f"CREATE INDEX {edgesTbl}_child_idx ON {edgesTbl}(child)")
+ for (name, node) in nodeMap.items():
+ dbCur.execute(f"INSERT INTO {nodesTbl} VALUES (?, ?, ?)", (name, node.id, node.tips))
+ for childName in node.children:
+ pSupport = 1 if nodeMap[childName].pSupport else 0
+ dbCur.execute(f"INSERT INTO {edgesTbl} VALUES (?, ?, ?)", (name, childName, pSupport))
+
+print(f"Finding root node")
+query = "SELECT name FROM nodes LEFT JOIN edges ON nodes.name = edges.child WHERE edges.parent IS NULL LIMIT 1"
+(rootName,) = dbCur.execute(query).fetchone()
+print(f"Found \"{rootName}\"")
+
+print('=== Getting picked-nodes ===')
+pickedNames = set()
+pickedTreeExists = False
+if dbCur.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='nodes_p'").fetchone() == None:
+ print(f"Reading from {pickedNodesFile}")
+ with open(pickedNodesFile) as file:
+ for line in file:
+ name = line.rstrip()
+ row = dbCur.execute("SELECT name from nodes WHERE name = ?", (name,)).fetchone()
+ if row == None:
+ row = dbCur.execute("SELECT name from names WHERE alt_name = ?", (name,)).fetchone()
+ if row != None:
+ pickedNames.add(row[0])
+ if len(pickedNames) == 0:
+ raise Exception("ERROR: No picked names found")
+else:
+ pickedTreeExists = True
+ print("Picked-node tree already exists")
+ if tree == 'picked':
+ sys.exit()
+ for (name,) in dbCur.execute("SELECT name FROM nodes_p"):
+ pickedNames.add(name)
+print(f"Found {len(pickedNames)} names")
+
+if (tree == 'picked' or tree == None) and not pickedTreeExists:
+ print("=== Generating picked-nodes tree ===")
+ genPickedNodeTree(dbCur, pickedNames, rootName)
+if tree != 'picked':
+ print("=== Finding 'non-low significance' nodes ===")
+ nodesWithImgOrPicked = set()
+ nodesWithImgDescOrPicked = set()
+ print("Finding nodes with descs")
+ for (name,) in dbCur.execute("SELECT name FROM wiki_ids"): # Can assume the wiki_id has a desc
+ nodesWithImgDescOrPicked.add(name)
+ print("Finding nodes with images")
+ for (name,) in dbCur.execute("SELECT name FROM node_imgs"):
+ nodesWithImgDescOrPicked.add(name)
+ nodesWithImgOrPicked.add(name)
+ print("Adding picked nodes")
+ for name in pickedNames:
+ nodesWithImgDescOrPicked.add(name)
+ nodesWithImgOrPicked.add(name)
+ if tree == 'images' or tree == None:
+ print("=== Generating images-only tree ===")
+ genImagesOnlyTree(dbCur, nodesWithImgOrPicked, rootName)
+ if tree == 'trimmed' or tree == None:
+ print("=== Generating weakly-trimmed tree ===")
+ genWeaklyTrimmedTree(dbCur, nodesWithImgDescOrPicked, nodesWithImgOrPicked, rootName)
+
+print("Closing database")
+dbCon.commit()
+dbCon.close()
diff --git a/backend/data/trimTree.py b/backend/data/trimTree.py
deleted file mode 100755
index fa269d8..0000000
--- a/backend/data/trimTree.py
+++ /dev/null
@@ -1,143 +0,0 @@
-#!/usr/bin/python3
-
-import sys
-import sqlite3
-
-usageInfo = f"""
-Usage: {sys.argv[0]}
-
-Tries to remove 'low significance' nodes from the database. Currently
-removes nodes that don't have an image or description, or a presence in
-the reduced tree. Also, for nodes with 'many' children, trims some more,
-ignoring the presence of node descriptions.
-"""
-if len(sys.argv) > 1:
- print(usageInfo, file=sys.stderr)
- sys.exit(1)
-
-dbFile = "data.db"
-softChildLimit = 500 # Used to determine when a node has 'many' children
-
-print("Opening database")
-dbCon = sqlite3.connect(dbFile)
-dbCur = dbCon.cursor()
-
-print("Finding nodes to keep")
-nodesToKeep = set()
-nodesToStronglyKeep = set()
-print("\tFinding nodes with descs")
-for (name,) in dbCur.execute("SELECT name FROM wiki_ids"): # Can assume the wiki_id has a desc
- nodesToKeep.add(name)
-print("\tFinding nodes with images")
-for (name,) in dbCur.execute("SELECT name FROM node_imgs"):
- nodesToKeep.add(name)
- nodesToStronglyKeep.add(name)
-print("\tFinding nodes in reduced-tree")
-for (name,) in dbCur.execute("SELECT name from r_nodes"):
- nodesToKeep.add(name)
- nodesToStronglyKeep.add(name)
-print("\tFinding ancestors")
-ancestors = set()
-iterNum = 0
-for name in nodesToKeep:
- iterNum += 1
- if iterNum % 1e4 == 0:
- print(f"\tAt iteration {iterNum}")
- #
- while True:
- row = dbCur.execute("SELECT parent FROM edges WHERE child = ?", (name,)).fetchone()
- if row != None:
- parent = row[0]
- if parent not in nodesToKeep and parent not in ancestors:
- ancestors.add(parent)
- if name not in nodesToStronglyKeep:
- nodesToStronglyKeep.add(parent)
- name = parent
- continue
- break
-nodesToKeep.update(ancestors)
-print(f"Result: {len(nodesToKeep)} nodes to keep")
-
-# Find root node
-query = "SELECT name FROM nodes LEFT JOIN edges ON nodes.name = edges.child WHERE edges.parent IS NULL LIMIT 1"
-(rootName,) = dbCur.execute(query).fetchone()
-print(f"Found root node \"{rootName}\"")
-
-print("Looking for trimmable nodes")
-nodeToTipsChg = {} # Used to update 'tips' values after trimming
-nodesToDelete = set()
-iterNum = 0
-def findTrimmables(nodeName):
- global iterNum
- iterNum += 1
- if iterNum % 1e4 == 0:
- print(f"At iteration {iterNum}")
- #
- childNames = [row[0] for row in dbCur.execute("SELECT child FROM edges WHERE parent = ?", (nodeName,))]
- childrenToKeep, otherChildren = set(), set()
- for n in childNames:
- if n in nodesToKeep:
- childrenToKeep.add(n)
- else:
- otherChildren.add(n)
- tipsRemoved = 0
- # Check soft limit
- if len(childrenToKeep) > softChildLimit:
- numToTrim = len(childrenToKeep) - softChildLimit
- # Try removing weakly-kept nodes, preferring those with less tips
- candidatesToTrim = [n for n in childrenToKeep if n not in nodesToStronglyKeep]
- childToTips = {}
- query = "SELECT name, tips FROM nodes WHERE name IN ({})".format(",".join(["?"] * len(candidatesToTrim)))
- for (n, tips) in dbCur.execute(query, candidatesToTrim):
- childToTips[n] = tips
- candidatesToTrim.sort(key=lambda n: childToTips[n], reverse=True)
- otherChildren.update(candidatesToTrim[-numToTrim:])
- childrenToKeep.difference_update(candidatesToTrim[-numToTrim:])
- # Mark nodes for deletion
- for n in otherChildren:
- tipsRemoved += markForDeletion(n)
- # Recurse on children
- for n in childrenToKeep:
- tipsRemoved += findTrimmables(n)
- # Store info for updating num-tips later
- nodeToTipsChg[nodeName] = tipsRemoved
- return tipsRemoved
-def markForDeletion(nodeName):
- nodesToDelete.add(nodeName)
- childNames = [row[0] for row in dbCur.execute("SELECT child FROM edges WHERE parent = ?", (nodeName,))]
- if len(childNames) == 0:
- return 1
- else:
- tipsRemoved = 0
- for n in childNames:
- tipsRemoved += markForDeletion(n)
- return tipsRemoved
-findTrimmables(rootName)
-
-print(f"Deleting {len(nodesToDelete)} nodes")
-iterNum = 0
-for nodeName in nodesToDelete:
- iterNum += 1
- if iterNum % 1e4 == 0:
- print(f"At iteration {iterNum}")
- #
- dbCur.execute("DELETE FROM nodes WHERE name = ?", (nodeName,))
- dbCur.execute("DELETE FROM edges WHERE parent = ?", (nodeName,))
- dbCur.execute("DELETE FROM edges WHERE child = ?", (nodeName,))
- dbCur.execute("DELETE FROM names WHERE name = ?", (nodeName,))
- # Could also delete from 'eol_ids', 'wiki_ids', and 'descs', but this
- # makes it much harder to restore the original data if needed, and
- # the memory savings didn't seem significant.
-
-print(f"Updating num-tips for {len(nodeToTipsChg)} nodes")
-iterNum = 0
-for (nodeName, tipsChg) in nodeToTipsChg.items():
- iterNum += 1
- if iterNum % 1e5 == 0:
- print(f"At iteration {iterNum}")
- #
- dbCur.execute("UPDATE nodes SET tips = tips - ? WHERE name = ?", (tipsChg, nodeName))
-
-print("Closing database")
-dbCon.commit()
-dbCon.close()
diff --git a/src/App.vue b/src/App.vue
index f54cb31..13da971 100644
--- a/src/App.vue
+++ b/src/App.vue
@@ -326,7 +326,7 @@ export default defineComponent({
let tolNode = this.tolMap.get(layoutNode.name)!;
if (!this.tolMap.has(tolNode.children[0])){
let urlParams = 'type=node&name=' + encodeURIComponent(layoutNode.name);
- urlParams += this.uiOpts.useReducedTree ? '&rtree=true' : '';
+ urlParams += '&tree=' + this.uiOpts.tree;
let responseObj: {[x: string]: TolNode} = await getServerResponse(urlParams);
if (responseObj == null){
return false;
@@ -421,7 +421,7 @@ export default defineComponent({
let tolNode = this.tolMap.get(layoutNode.name)!;
if (!this.tolMap.has(tolNode.children[0])){
let urlParams = 'type=node&name=' + encodeURIComponent(layoutNode.name);
- urlParams += this.uiOpts.useReducedTree ? '&rtree=true' : '';
+ urlParams += '&tree=' + this.uiOpts.tree;
let responseObj: {[x: string]: TolNode} = await getServerResponse(urlParams);
if (responseObj == null){
return false;
@@ -490,7 +490,7 @@ export default defineComponent({
}
// Query server for tol-node info
let urlParams = 'type=info&name=' + encodeURIComponent(nodeName);
- urlParams += this.uiOpts.useReducedTree ? '&rtree=true' : '';
+ urlParams += '&tree=' + this.uiOpts.tree;
let responseObj: InfoResponse = await getServerResponse(urlParams);
if (responseObj == null){
return;
@@ -819,7 +819,7 @@ export default defineComponent({
//Note: Using JSON.stringify here to roughly deep-compare values
if (item == null && JSON.stringify(this.uiOpts[prop]) != JSON.stringify(uiOpts[prop])){
this.uiOpts[prop] = uiOpts[prop];
- if (prop == 'useReducedTree'){
+ if (prop == 'tree'){
changedTree = true;
}
}
@@ -875,7 +875,7 @@ export default defineComponent({
async initTreeFromServer(){
// Query server
let urlParams = 'type=node';
- urlParams += this.uiOpts.useReducedTree ? '&rtree=true' : '';
+ urlParams += '&tree=' + this.uiOpts.tree;
let responseObj: {[x: string]: TolNode} = await getServerResponse(urlParams);
if (responseObj == null){
return;
diff --git a/src/components/SearchModal.vue b/src/components/SearchModal.vue
index 2e8b9a2..4d39772 100644
--- a/src/components/SearchModal.vue
+++ b/src/components/SearchModal.vue
@@ -117,7 +117,7 @@ export default defineComponent({
// Get URL params to use for querying search-suggestions
let urlParams = 'type=sugg&name=' + encodeURIComponent(input.value);
urlParams += '&limit=' + this.uiOpts.searchSuggLimit;
- urlParams += this.uiOpts.useReducedTree ? '&rtree=true' : '';
+ urlParams += '&tree=' + this.uiOpts.tree;
// Query server, delaying/skipping if a request was recently sent
this.pendingSuggReqParams = urlParams;
this.pendingSuggInput = input.value;
@@ -186,7 +186,7 @@ export default defineComponent({
}
// Ask server for nodes in parent-chain, updates tolMap, then emits search event
let urlParams = 'type=node&toroot=true&name=' + encodeURIComponent(tolNodeName);
- urlParams += this.uiOpts.useReducedTree ? '&rtree=true' : '';
+ urlParams += '&tree=' + this.uiOpts.tree;
let responseObj: {[x: string]: TolNode} = await getServerResponse(urlParams);
if (responseObj == null){
return;
diff --git a/src/components/SettingsModal.vue b/src/components/SettingsModal.vue
index 16351c4..315ddb4 100644
--- a/src/components/SettingsModal.vue
+++ b/src/components/SettingsModal.vue
@@ -78,8 +78,15 @@
<div class="border rounded p-1">
<h2 class="text-center">Other</h2>
<div>
- <label> <input type="checkbox" v-model="uiOpts.useReducedTree"
- @change="onSettingChg('UI', 'useReducedTree')"/> Use simplified tree </label>
+ <div>Tree to use</div>
+ <ul>
+ <li> <label> <input type="radio" v-model="uiOpts.tree" value="trimmed"
+ @change="onSettingChg('UI', 'tree')"/> Comprehensive </label> </li>
+ <li> <label> <input type="radio" v-model="uiOpts.tree" value="images"
+ @change="onSettingChg('UI', 'tree')"/> Visual </label> </li>
+ <li> <label> <input type="radio" v-model="uiOpts.tree" value="picked"
+ @change="onSettingChg('UI', 'tree')"/> Minimal </label> </li>
+ </ul>
</div>
<div>
<label> <input type="checkbox" v-model="uiOpts.searchJumpMode"
@@ -161,7 +168,7 @@ export default defineComponent({
}
// Notify App
this.$emit('setting-chg', optionType, option,
- {relayout: optionType == 'LYT', reinit: optionType == 'UI' && option == 'useReducedTree'});
+ {relayout: optionType == 'LYT', reinit: optionType == 'UI' && option == 'tree'});
// Possibly make saved-indicator appear/animate
if (!this.saved){
this.saved = true;
@@ -205,7 +212,7 @@ export default defineComponent({
// Restore default options
let defaultLytOpts = getDefaultLytOpts();
let defaultUiOpts = getDefaultUiOpts(defaultLytOpts);
- let needReinit = this.uiOpts.useReducedTree != defaultUiOpts.useReducedTree;
+ let needReinit = this.uiOpts.tree != defaultUiOpts.tree;
Object.assign(this.lytOpts, defaultLytOpts);
Object.assign(this.uiOpts, defaultUiOpts);
// Notify App
diff --git a/src/components/TileInfoModal.vue b/src/components/TileInfoModal.vue
index 129e861..3593600 100644
--- a/src/components/TileInfoModal.vue
+++ b/src/components/TileInfoModal.vue
@@ -70,7 +70,7 @@ import CloseIcon from './icon/CloseIcon.vue';
import ExternalLinkIcon from './icon/ExternalLinkIcon.vue';
import {TolNode, TolMap} from '../tol';
import {LayoutNode, LayoutOptions} from '../layout';
-import {getServerResponse, getImagePath, DescInfo, ImgInfo, NodeInfo, InfoResponse, UiOptions} from '../lib';
+import {getImagePath, DescInfo, ImgInfo, NodeInfo, InfoResponse, UiOptions} from '../lib';
import {capitalizeWords} from '../util';
export default defineComponent({
diff --git a/src/lib.ts b/src/lib.ts
index bbeb02a..2b0aabc 100644
--- a/src/lib.ts
+++ b/src/lib.ts
@@ -106,7 +106,7 @@ export type UiOptions = {
touchDevice: boolean,
breakpoint: Breakpoint,
// Other
- useReducedTree: boolean,
+ tree: 'trimmed' | 'images' | 'picked',
searchSuggLimit: number, // Max number of search suggestions
searchJumpMode: boolean,
tutorialSkip: boolean,
@@ -169,7 +169,7 @@ export function getDefaultUiOpts(lytOpts: LayoutOptions): UiOptions {
touchDevice: onTouchDevice(),
breakpoint: getBreakpoint(),
// Other
- useReducedTree: false,
+ tree: 'images',
searchSuggLimit: 10,
searchJumpMode: false,
tutorialSkip: false,