Skip to content

Commit

Permalink
Some fixes and a cleaner, more general and more efficient t.ladderize().
Browse files Browse the repository at this point in the history
Added "topological" argument to allow for a ladderization with
respect to the actual branch lengths or the number of descendants,
in accordance to other functions that have the same "topological"
argument.

The old function did a strange ladderization when comparing a node
with few but distant descendants with a node with many shallow
descendants (which would appear after, not really looking like a
ladder). That had to do with using "sum" instead of "max". This
version fixes it.

This version does not return a value (which makes no sense to
the user that called the function).

Also, it is iterative instead of recursive, so more efficient and
cannot have a max recursion error. And it frees memory as it goes,
so should work well even for huge trees.
  • Loading branch information
jordibc committed Oct 21, 2023
1 parent e680db8 commit 57a0833
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 14 deletions.
35 changes: 23 additions & 12 deletions ete4/core/tree.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1202,9 +1202,13 @@ cdef class Tree(object):

return new_node

def ladderize(self, reverse=False):
def ladderize(self, topological=False, reverse=False):
"""Sort branches according to the size of each partition.
:param topological: If True, the distance between nodes will be the
number of nodes between them (instead of the sum of branch lenghts).
:param reverse: If True, sort with biggest partitions first.
Example::
t = Tree('(f,((d,((a,b),c)),e));')
Expand All @@ -1225,18 +1229,25 @@ cdef class Tree(object):
# ╰──┬╴a
# ╰╴b
"""
if not self.is_leaf:
n2s = {}
for n in self.get_children():
s = n.ladderize(reverse=reverse)
n2s[n] = s

self.children.sort(key=lambda x: n2s[x], reverse=reverse)
size = sum(n2s.values())
else:
size = 1
sizes = {} # sizes of the nodes

# Key function for the sort order. Sort by size, then by # of children.
key = lambda node: (sizes[node], len(node.children))

# Distance function (branch length to consider for each node).
dist = ((lambda node: 1) if topological else
(lambda node: float(node.props.get('dist', 1))))

for node in self.traverse('postorder'):
if node.is_leaf:
sizes[node] = dist(node)
else:
node.children.sort(key=key, reverse=reverse) # time to sort!

sizes[node] = dist(node) + max(sizes[n] for n in node.children)

return size
for n in node.children:
sizes.pop(n) # free memory, no need to keep all the sizes

def sort_descendants(self, prop='name'):
"""Sort branches by node names.
Expand Down
4 changes: 2 additions & 2 deletions tests/test_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,8 +440,8 @@ def test_tree_manipulation(self):
t1 = Tree('((A,B),(C,D,E,F), (G,H,I));')
t1.ladderize()
self.assertEqual(list(t1.leaf_names()), [_ for _ in 'ABGHICDEF'])
t1.ladderize(direction=1)
self.assertEqual(list(t1.leaf_names()), [_ for _ in 'FEDCIHGBA'])
t1.ladderize(reverse=True)
self.assertEqual(list(t1.leaf_names()), [_ for _ in 'CDEFGHIAB'])
t1.sort_descendants()
self.assertEqual(list(t1.leaf_names()), [_ for _ in 'ABCDEFGHI'])

Expand Down

0 comments on commit 57a0833

Please sign in to comment.