Skip to content

Commit

Permalink
newick.pyx: Fix overflow when reading very big trees.
Browse files Browse the repository at this point in the history
When reading a tree from a file with more than 5 GB or so, using
the position as an int would overflow. It happened for example
with K01181_GTDB_rooted.faa.alg_annotated.nw.

Changing int -> long (in cython) for the position fixes the problem,
and has no appreciable bad impact on performance.
  • Loading branch information
jordibc committed Mar 27, 2024
1 parent 08d45f9 commit bee42aa
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions ete4/parser/newick.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ def loads(tree_text, parser=None, tree_class=Tree):
return tree_class(props, nodes)


def read_nodes(nodes_text, parser, int pos=0, tree_class=Tree):
def read_nodes(nodes_text, parser, long pos=0, tree_class=Tree):
"""Return a list of nodes and the position in the text where they end."""
# nodes_text looks like '(a,b,c)', where any element can be a list of nodes
if nodes_text[pos] != '(':
Expand All @@ -256,7 +256,7 @@ def read_nodes(nodes_text, parser, int pos=0, tree_class=Tree):
return nodes, pos+1


def skip_spaces_and_comments(text, int pos):
def skip_spaces_and_comments(text, long pos):
"""Return position in text after pos and all whitespaces and comments."""
# text = '... [this is a comment] node1...'
# ^-- pos ^-- pos (returned)
Expand All @@ -274,7 +274,7 @@ def skip_spaces_and_comments(text, int pos):
return pos


def read_content(str text, int pos, endings=',);'):
def read_content(str text, long pos, endings=',);'):
"""Return content starting at position pos in text, and where it ends."""
# text = '...(node_1:0.5[&&NHX:p=a],...' -> 'node_1:0.5[&&NHX:p=a]'
# ^-- pos ^-- pos (returned)
Expand All @@ -291,7 +291,7 @@ def read_content(str text, int pos, endings=',);'):
return text[start:pos], pos


def skip_quoted_name(str text, int pos):
def skip_quoted_name(str text, long pos):
"""Return the position where a quoted name ends."""
# text = "... 'node ''2'' in tree' ..."
# ^-- pos ^-- pos (returned)
Expand Down

0 comments on commit bee42aa

Please sign in to comment.