From 834dab545931a3f224ef336530a890a7349b100a Mon Sep 17 00:00:00 2001 From: Terry Truong Date: Fri, 8 Jul 2022 14:19:49 +1000 Subject: Add ancestors_* tables, for faster 'toroot' lookup Speedup seemed minor, and for a non-wide range of situations. It also roughly quadrupled the database size. --- backend/data/genReducedTrees.py | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) (limited to 'backend/data/genReducedTrees.py') diff --git a/backend/data/genReducedTrees.py b/backend/data/genReducedTrees.py index a921be4..ad73d3c 100755 --- a/backend/data/genReducedTrees.py +++ b/backend/data/genReducedTrees.py @@ -88,8 +88,8 @@ def genPickedNodeTree(dbCur, pickedNames, rootName): print(f"Result has {len(nodeMap)} nodes") print("Updating 'tips' values") updateTips(rootName, nodeMap) - print("Creating table") - addTreeTables(nodeMap, dbCur, "p") + print("Creating tables") + addTreeTables(nodeMap, dbCur, "p", rootName) def genImagesOnlyTree(dbCur, nodesWithImgOrPicked, pickedNames, rootName): print("Getting ancestors") nodeMap = genNodeMap(dbCur, nodesWithImgOrPicked, 1e4) @@ -107,8 +107,8 @@ def genImagesOnlyTree(dbCur, nodesWithImgOrPicked, pickedNames, rootName): print(f"Result has {len(nodeMap)} nodes") print(f"Updating 'tips' values") updateTips(rootName, nodeMap) - print("Creating table") - addTreeTables(nodeMap, dbCur, "i") + print("Creating tables") + addTreeTables(nodeMap, dbCur, "i", rootName) def genWeaklyTrimmedTree(dbCur, nodesWithImgDescOrPicked, nodesWithImgOrPicked, rootName): print("Getting ancestors") nodeMap = genNodeMap(dbCur, nodesWithImgDescOrPicked, 1e5) @@ -139,7 +139,7 @@ def genWeaklyTrimmedTree(dbCur, nodesWithImgDescOrPicked, nodesWithImgOrPicked, print(f"Updating 'tips' values") updateTips(rootName, nodeMap) print("Creating table") - addTreeTables(nodeMap, dbCur, "t") + addTreeTables(nodeMap, dbCur, "t", rootName) # Helper functions def genNodeMap(dbCur, nameSet, itersBeforePrint = 1): " Returns a subtree that includes nodes in 'nameSet', as a name-to-Node map " @@ -256,8 +256,9 @@ def updateTips(nodeName, nodeMap): tips = max(1, tips) node.tips = tips return tips -def addTreeTables(nodeMap, dbCur, suffix): +def addTreeTables(nodeMap, dbCur, suffix, rootName): " Adds a tree to the database, as tables nodes_X and edges_X, where X is the given suffix " + print("Creating nodes and edges tables") nodesTbl = f"nodes_{suffix}" edgesTbl = f"edges_{suffix}" dbCur.execute(f"CREATE TABLE {nodesTbl} (name TEXT PRIMARY KEY, id TEXT UNIQUE, tips INT)") @@ -269,6 +270,25 @@ def addTreeTables(nodeMap, dbCur, suffix): for childName in node.children: pSupport = 1 if nodeMap[childName].pSupport else 0 dbCur.execute(f"INSERT INTO {edgesTbl} VALUES (?, ?, ?)", (name, childName, pSupport)) + print("Creating ancestors table") + ancestorsTbl = f"ancestors_{suffix}" + dbCur.execute(f"CREATE TABLE {ancestorsTbl} (name TEXT, ancestor TEXT, PRIMARY KEY (name, ancestor))") + dbCur.execute(f"CREATE INDEX {ancestorsTbl}_idx ON {ancestorsTbl}(name)") + iterNum = 0 + def addAncestors(nodeName, ancestors = []): + nonlocal nodeMap, dbCur, iterNum + iterNum += 1 + if iterNum % 1e4 == 0: + print(f"At iteration {iterNum}") + # + node = nodeMap[nodeName] + for ancestor in ancestors: + dbCur.execute(f"INSERT INTO {ancestorsTbl} VALUES (?, ?)", (nodeName, ancestor)) + ancestors.append(nodeName) + for childName in node.children: + addAncestors(childName, ancestors) + ancestors.pop() + addAncestors(rootName) print(f"Finding root node") query = "SELECT name FROM nodes LEFT JOIN edges ON nodes.name = edges.child WHERE edges.parent IS NULL LIMIT 1" -- cgit v1.2.3