diff options
| author | Terry Truong <terry06890@gmail.com> | 2022-06-30 23:38:27 +1000 |
|---|---|---|
| committer | Terry Truong <terry06890@gmail.com> | 2022-06-30 23:38:27 +1000 |
| commit | 0bd6af883e4cd4b4d1532975e36ad1e8ad6d8f6a (patch) | |
| tree | 6cf3e6b837ee0183138fa3cc85ce22339e8cd588 | |
| parent | c591233a4085176854b110fbffeae12d8568fe54 (diff) | |
Trim single-child no-img/desc/etc kept-node ancestors as welltrim-one-child-ancestors
| -rwxr-xr-x | backend/data/trimTree.py | 32 |
1 files changed, 31 insertions, 1 deletions
diff --git a/backend/data/trimTree.py b/backend/data/trimTree.py index fa269d8..5864161 100755 --- a/backend/data/trimTree.py +++ b/backend/data/trimTree.py @@ -25,6 +25,7 @@ dbCur = dbCon.cursor() print("Finding nodes to keep") nodesToKeep = set() nodesToStronglyKeep = set() +ancestorsToTrim = set() print("\tFinding nodes with descs") for (name,) in dbCur.execute("SELECT name FROM wiki_ids"): # Can assume the wiki_id has a desc nodesToKeep.add(name) @@ -55,8 +56,16 @@ for name in nodesToKeep: name = parent continue break +# For ancestors that would end up with 1 child, mark for trimming +for n in ancestors: + children = [row[0] for row in dbCur.execute("SELECT child FROM edges WHERE parent = ?", (n,))] + children = [n for n in children if n in nodesToKeep or n in ancestors] + if len(children) == 1: + ancestorsToTrim.add(n) +print(f"Found {len(ancestorsToTrim)} extra ancestors to trim") +# nodesToKeep.update(ancestors) -print(f"Result: {len(nodesToKeep)} nodes to keep") +print(f"Result: {len(nodesToKeep) - len(ancestorsToTrim)} nodes to keep") # Find root node query = "SELECT name FROM nodes LEFT JOIN edges ON nodes.name = edges.child WHERE edges.parent IS NULL LIMIT 1" @@ -104,6 +113,7 @@ def findTrimmables(nodeName): return tipsRemoved def markForDeletion(nodeName): nodesToDelete.add(nodeName) + ancestorsToTrim.discard(nodeName) childNames = [row[0] for row in dbCur.execute("SELECT child FROM edges WHERE parent = ?", (nodeName,))] if len(childNames) == 0: return 1 @@ -138,6 +148,26 @@ for (nodeName, tipsChg) in nodeToTipsChg.items(): # dbCur.execute("UPDATE nodes SET tips = tips - ? WHERE name = ?", (tipsChg, nodeName)) +print(f"Deleting {len(ancestorsToTrim)} single-child ancestor nodes") +iterNum = 0 +for nodeName in ancestorsToTrim: + iterNum += 1 + if iterNum % 1e2 == 0: + print(f"At iteration {iterNum}") + # Get parent and child + row = dbCur.execute("SELECT parent FROM edges WHERE child = ?", (nodeName,)).fetchone() + if row == None: + print("ERROR: Root node was marked for deletion") + sys.exit() + parent = row[0] + (child,) = dbCur.execute("SELECT child FROM edges WHERE parent = ?", (nodeName,)).fetchone() + # Connect parent and child + dbCur.execute("UPDATE edges SET parent = ? WHERE child = ?", (parent, child)) + dbCur.execute("DELETE FROM edges WHERE child = ?", (nodeName,)) + # Delete + dbCur.execute("DELETE FROM nodes WHERE name = ?", (nodeName,)) + dbCur.execute("DELETE FROM names WHERE name = ?", (nodeName,)) + print("Closing database") dbCon.commit() dbCon.close() |
