aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTerry Truong <terry06890@gmail.com>2022-06-30 23:38:27 +1000
committerTerry Truong <terry06890@gmail.com>2022-06-30 23:38:27 +1000
commit0bd6af883e4cd4b4d1532975e36ad1e8ad6d8f6a (patch)
tree6cf3e6b837ee0183138fa3cc85ce22339e8cd588
parentc591233a4085176854b110fbffeae12d8568fe54 (diff)
Trim single-child no-img/desc/etc kept-node ancestors as welltrim-one-child-ancestors
-rwxr-xr-xbackend/data/trimTree.py32
1 files changed, 31 insertions, 1 deletions
diff --git a/backend/data/trimTree.py b/backend/data/trimTree.py
index fa269d8..5864161 100755
--- a/backend/data/trimTree.py
+++ b/backend/data/trimTree.py
@@ -25,6 +25,7 @@ dbCur = dbCon.cursor()
print("Finding nodes to keep")
nodesToKeep = set()
nodesToStronglyKeep = set()
+ancestorsToTrim = set()
print("\tFinding nodes with descs")
for (name,) in dbCur.execute("SELECT name FROM wiki_ids"): # Can assume the wiki_id has a desc
nodesToKeep.add(name)
@@ -55,8 +56,16 @@ for name in nodesToKeep:
name = parent
continue
break
+# For ancestors that would end up with 1 child, mark for trimming
+for n in ancestors:
+ children = [row[0] for row in dbCur.execute("SELECT child FROM edges WHERE parent = ?", (n,))]
+ children = [n for n in children if n in nodesToKeep or n in ancestors]
+ if len(children) == 1:
+ ancestorsToTrim.add(n)
+print(f"Found {len(ancestorsToTrim)} extra ancestors to trim")
+#
nodesToKeep.update(ancestors)
-print(f"Result: {len(nodesToKeep)} nodes to keep")
+print(f"Result: {len(nodesToKeep) - len(ancestorsToTrim)} nodes to keep")
# Find root node
query = "SELECT name FROM nodes LEFT JOIN edges ON nodes.name = edges.child WHERE edges.parent IS NULL LIMIT 1"
@@ -104,6 +113,7 @@ def findTrimmables(nodeName):
return tipsRemoved
def markForDeletion(nodeName):
nodesToDelete.add(nodeName)
+ ancestorsToTrim.discard(nodeName)
childNames = [row[0] for row in dbCur.execute("SELECT child FROM edges WHERE parent = ?", (nodeName,))]
if len(childNames) == 0:
return 1
@@ -138,6 +148,26 @@ for (nodeName, tipsChg) in nodeToTipsChg.items():
#
dbCur.execute("UPDATE nodes SET tips = tips - ? WHERE name = ?", (tipsChg, nodeName))
+print(f"Deleting {len(ancestorsToTrim)} single-child ancestor nodes")
+iterNum = 0
+for nodeName in ancestorsToTrim:
+ iterNum += 1
+ if iterNum % 1e2 == 0:
+ print(f"At iteration {iterNum}")
+ # Get parent and child
+ row = dbCur.execute("SELECT parent FROM edges WHERE child = ?", (nodeName,)).fetchone()
+ if row == None:
+ print("ERROR: Root node was marked for deletion")
+ sys.exit()
+ parent = row[0]
+ (child,) = dbCur.execute("SELECT child FROM edges WHERE parent = ?", (nodeName,)).fetchone()
+ # Connect parent and child
+ dbCur.execute("UPDATE edges SET parent = ? WHERE child = ?", (parent, child))
+ dbCur.execute("DELETE FROM edges WHERE child = ?", (nodeName,))
+ # Delete
+ dbCur.execute("DELETE FROM nodes WHERE name = ?", (nodeName,))
+ dbCur.execute("DELETE FROM names WHERE name = ?", (nodeName,))
+
print("Closing database")
dbCon.commit()
dbCon.close()