diff options
| author | Terry Truong <terry06890@gmail.com> | 2022-07-01 19:28:12 +1000 |
|---|---|---|
| committer | Terry Truong <terry06890@gmail.com> | 2022-07-01 19:28:12 +1000 |
| commit | 551fbe163b90cc1f318612c167fbdfe738dd7132 (patch) | |
| tree | 00286538d754fdf686751a3d4c1689d799ecd65e /backend/data/genReducedTreeData.py | |
| parent | c2b9a8b7a706cdca58dab7f4a980401e1c20a602 (diff) | |
Generate 3 reduced trees, keeping the original, and serve only those
Generate a 'trimmed' reduced tree instead of changing the original.
Generate an 'images-only' reduced tree, and use it as the default.
Combine 'picked' reduced tree code with that of other reduced trees.
Adapt server API to allow selecting between more than 2 trees.
Add client setting for selecting between 3 trees.
Diffstat (limited to 'backend/data/genReducedTreeData.py')
| -rwxr-xr-x | backend/data/genReducedTreeData.py | 177 |
1 files changed, 0 insertions, 177 deletions
diff --git a/backend/data/genReducedTreeData.py b/backend/data/genReducedTreeData.py deleted file mode 100755 index 2e56bba..0000000 --- a/backend/data/genReducedTreeData.py +++ /dev/null @@ -1,177 +0,0 @@ -#!/usr/bin/python3 - -import sys, os.path, re -import json, sqlite3 - -usageInfo = f""" -Usage: {sys.argv[0]} - -Creates a reduced version of the tree in the database. -Reads a subset of the node names from a file, and creates a -minimal tree that contains them, possibly with a few extras. -""" -if len(sys.argv) > 1: - print(usageInfo, file=sys.stderr) - sys.exit(1) - -dbFile = "data.db" -nodeNamesFile = "pickedReducedNodes.txt" -minimalNames = set() -nodeMap = {} # Maps node names to node objects -PREF_NUM_CHILDREN = 3 # Attempt inclusion of children up to this limit -compNameRegex = re.compile(r"\[.+ \+ .+]") # Used to recognise composite nodes - -class Node: - " Represents a node from the database " - def __init__(self, id, children, parent, tips, pSupport): - self.id = id - self.children = children - self.parent = parent - self.tips = tips - self.pSupport = pSupport - -print("Opening database") -dbCon = sqlite3.connect(dbFile) -dbCur = dbCon.cursor() - -print("Getting minimal name set") -iterNum = 0 -with open(nodeNamesFile) as file: - for line in file: - iterNum += 1 - if iterNum % 100 == 0: - print(f"At iteration {iterNum}") - # - name = line.rstrip() - row = dbCur.execute("SELECT name from nodes WHERE name = ?", (name,)).fetchone() - if row == None: - row = dbCur.execute("SELECT name from names WHERE alt_name = ?", (name,)).fetchone() - if row != None: - minimalNames.add(row[0]) -if len(minimalNames) == 0: - print("No names found") - sys.exit(0) -print(f"Result has {len(minimalNames)} names") - -print("Getting ancestor nodes") -rootName = None -iterNum = 0 -for name in minimalNames: - iterNum += 1 - if iterNum % 100 == 0: - print(f"At iteration {iterNum}") - # - prevName = None - while name != None: - if name not in nodeMap: - (id, tips) = dbCur.execute("SELECT id, tips from nodes where name = ?", (name,)).fetchone() - row = dbCur.execute("SELECT parent, p_support from edges where child = ?", (name,)).fetchone() - parent = None if row == None or row[0] == "" else row[0] - pSupport = row == None or row[1] == 1 - children = [] if prevName == None else [prevName] - nodeMap[name] = Node(id, children, parent, 0, pSupport) - prevName = name - name = parent - else: - if prevName != None: - nodeMap[name].children.append(prevName) - break - if name == None: - rootName = prevName -print(f"Result has {len(nodeMap)} nodes") - -print("Merging-upward composite nodes") -namesToRemove = set() -for (name, node) in nodeMap.items(): - parent = node.parent - if parent != None and compNameRegex.fullmatch(name) != None: - # Connect children to parent - nodeMap[parent].children.remove(name) - nodeMap[parent].children.extend(node.children) - for n in node.children: - nodeMap[n].parent = parent - nodeMap[n].pSupport &= node.pSupport - # Remember for removal - namesToRemove.add(name) -for name in namesToRemove: - del nodeMap[name] -print(f"Result has {len(nodeMap)} nodes") - -print("Removing 'chain collapsible' nodes") -namesToRemove2 = set() -for (name, node) in nodeMap.items(): - hasOneChild = len(node.children) == 1 - isOnlyChild = node.parent != None and len(nodeMap[node.parent].children) == 1 - if name not in minimalNames and (hasOneChild or isOnlyChild): - parent = node.parent - # Connect parent and children - nodeMap[parent].children.remove(name) - nodeMap[parent].children.extend(node.children) - for n in node.children: - nodeMap[n].parent = parent - nodeMap[n].pSupport &= node.pSupport - # Remember for removal - namesToRemove2.add(name) -for name in namesToRemove2: - del nodeMap[name] - namesToRemove.add(name) -print(f"Result has {len(nodeMap)} nodes") - -print("Adding some additional nearby children") -namesToAdd = [] -iterNum = 0 -for (name, node) in nodeMap.items(): - iterNum += 1 - if iterNum % 100 == 0: - print(f"At iteration {iterNum}") - # - numChildren = len(node.children) - if numChildren < PREF_NUM_CHILDREN: - children = [row[0] for row in dbCur.execute("SELECT child FROM edges where parent = ?", (name,))] - newChildren = [] - for n in children: - if n in nodeMap or n in namesToRemove: - continue - if compNameRegex.fullmatch(name) != None: - continue - if dbCur.execute("SELECT name from node_imgs WHERE name = ?", (n,)).fetchone() == None: - continue - if dbCur.execute("SELECT name from linked_imgs WHERE name = ?", (n,)).fetchone() == None: - continue - newChildren.append(n) - newChildNames = newChildren[:max(0, PREF_NUM_CHILDREN - numChildren)] - node.children.extend(newChildNames) - namesToAdd.extend(newChildNames) -for name in namesToAdd: - parent, pSupport = dbCur.execute("SELECT parent, p_support from edges WHERE child = ?", (name,)).fetchone() - (id,) = dbCur.execute("SELECT id FROM nodes WHERE name = ?", (name,)).fetchone() - parent = None if parent == "" else parent - nodeMap[name] = Node(id, [], parent, 0, pSupport == 1) -print(f"Result has {len(nodeMap)} nodes") - -print("Setting 'tips' values") -def setTips(nodeName): - node = nodeMap[nodeName] - if len(node.children) == 0: - node.tips = 1 - return 1 - tips = sum([setTips(childName) for childName in node.children]) - node.tips = tips - return tips -setTips(rootName) - -print("Adding reduced tree to database") -dbCur.execute("CREATE TABLE r_nodes (name TEXT PRIMARY KEY, id TEXT UNIQUE, tips INT)") -dbCur.execute("CREATE INDEX r_nodes_idx_nc ON r_nodes(name COLLATE NOCASE)") -dbCur.execute("CREATE TABLE r_edges (parent TEXT, child TEXT, p_support INT, PRIMARY KEY (parent, child))") -dbCur.execute("CREATE INDEX r_edges_child_idx ON r_edges(child)") -for (name, node) in nodeMap.items(): - parentName = "" if node.parent == None else node.parent - dbCur.execute("INSERT INTO r_nodes VALUES (?, ?, ?)", (name, node.id, node.tips)) - for childName in node.children: - pSupport = 1 if nodeMap[childName].pSupport else 0 - dbCur.execute("INSERT INTO r_edges VALUES (?, ?, ?)", (name, childName, pSupport)) - -print("Closing database") -dbCon.commit() -dbCon.close() |
