Skip to content

Commit

Permalink
{AH} set MAX_POS to (1 << 31) - 1, fixes #741 and #732
Browse files Browse the repository at this point in the history
  • Loading branch information
AndreasHeger committed Jan 2, 2019
1 parent fc3f522 commit a1b5cef
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 18 deletions.
12 changes: 7 additions & 5 deletions pysam/libcalignmentfile.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

# cython: embedsignature=True
# cython: profile=True
########################################################
Expand Down Expand Up @@ -62,6 +61,8 @@ import warnings
import array
from libc.errno cimport errno, EPIPE
from libc.string cimport strcmp, strpbrk, strerror
from libc.stdint cimport INT32_MAX

from cpython cimport array as c_array
from cpython.version cimport PY_MAJOR_VERSION

Expand Down Expand Up @@ -94,7 +95,8 @@ IndexStats = collections.namedtuple("IndexStats",
########################################################
## global variables
# maximum genomic coordinace
cdef uint32_t MAX_POS = 2 << 31
# for some reason, using 'int' causes overlflow
cdef int MAX_POS = (1 << 31) - 1

# valid types for SAM headers
VALID_HEADER_TYPES = {"HD" : collections.Mapping,
Expand Down Expand Up @@ -1314,8 +1316,9 @@ cdef class AlignmentFile(HTSFile):
an iterator over genomic positions.
"""
cdef int rtid, rstart, rstop, has_coord

cdef int rtid, has_coord
cdef int32_t rstart, rstop

if not self.is_open:
raise ValueError("I/O operation on closed file")

Expand Down Expand Up @@ -2054,7 +2057,6 @@ cdef class IteratorRowRegion(IteratorRow):

IteratorRow.__init__(self, samfile,
multiple_iterators=multiple_iterators)

with nogil:
self.iter = sam_itr_queryi(
self.index,
Expand Down
2 changes: 1 addition & 1 deletion pysam/libcbcf.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ __all__ = ['VariantFile',
## Constants
########################################################################

cdef uint32_t MAX_POS = 2 << 31
cdef int MAX_POS = (1 << 31) - 1
cdef tuple VALUE_TYPES = ('Flag', 'Integer', 'Float', 'String')
cdef tuple METADATA_TYPES = ('FILTER', 'INFO', 'FORMAT', 'CONTIG', 'STRUCTURED', 'GENERIC')
cdef tuple METADATA_LENGTHS = ('FIXED', 'VARIABLE', 'A', 'G', 'R')
Expand Down
13 changes: 6 additions & 7 deletions pysam/libchtslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,9 @@

from posix.unistd cimport dup
from libc.errno cimport errno
from libc.stdint cimport INT32_MAX
from cpython cimport PyBytes_FromStringAndSize

from pysam.libchtslib cimport *

from pysam.libcutils cimport force_bytes, force_str, charptr_to_str, charptr_to_str_w_len
from pysam.libcutils cimport encode_filename, from_string_and_size

Expand All @@ -25,7 +24,7 @@ from pysam.libcutils cimport encode_filename, from_string_and_size
import os
import io
import re
from warnings import warn
from warnings import warn


########################################################################
Expand All @@ -41,7 +40,7 @@ DEF SEEK_CUR = 1
DEF SEEK_END = 2

# maximum genomic coordinace
cdef uint32_t MAX_POS = 2 << 31
cdef int MAX_POS = (1 << 31) - 1

cdef tuple FORMAT_CATEGORIES = ('UNKNOWN', 'ALIGNMENTS', 'VARIANTS', 'INDEX', 'REGIONS')
cdef tuple FORMATS = ('UNKNOWN', 'BINARY_FORMAT', 'TEXT_FORMAT', 'SAM', 'BAM', 'BAI', 'CRAM', 'CRAI',
Expand Down Expand Up @@ -630,8 +629,8 @@ cdef class HTSFile(object):
"""
cdef int rtid
cdef long long rstart
cdef long long rstop
cdef int32_t rstart
cdef int32_t rstop

if reference is not None:
if contig is not None:
Expand All @@ -644,7 +643,7 @@ cdef class HTSFile(object):
stop = end

if contig is None and tid is None and region is None:
return 0, 0, 0, 0
return 0, 0, 0, MAX_POS

rtid = -1
rstart = 0
Expand Down
3 changes: 2 additions & 1 deletion pysam/libcutils.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ from cpython cimport PyBytes_Check, PyUnicode_Check
from cpython cimport array as c_array
from libc.stdlib cimport calloc, free
from libc.string cimport strncpy
from libc.stdint cimport INT32_MAX, int32_t
from libc.stdio cimport fprintf, stderr, fflush
from libc.stdio cimport stdout as c_stdout
from posix.fcntl cimport open as c_open, O_WRONLY
Expand All @@ -24,7 +25,7 @@ from libcbcftools cimport bcftools_main, bcftools_set_stdout, bcftools_set_stder

#####################################################################
# hard-coded constants
cdef uint32_t MAX_POS = 2 << 31
cdef int MAX_POS = (1 << 31) - 1

#################################################################
# Utility functions for quality string conversions
Expand Down
23 changes: 19 additions & 4 deletions tests/AlignmentFileHeader_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ def test_header_constructed_from_references_without_text(self):
self.compare_headers(header, self.header_without_text)
self.check_name_mapping(header)


class TestHeaderSAM(unittest.TestCase):
"""testing header manipulation"""

Expand Down Expand Up @@ -287,6 +288,7 @@ def compare_headers(self, a, header_b):
def check_read_write(self, flag_write, header):

fn = get_temp_filename()
print(fn)
with pysam.AlignmentFile(
fn,
flag_write,
Expand All @@ -299,17 +301,30 @@ def check_read_write(self, flag_write, header):
with pysam.AlignmentFile(fn) as inf:
read_header = inf.header

os.unlink(fn)
# os.unlink(fn)
self.compare_headers(header, read_header)
expected_lengths = dict([(x["SN"], x["LN"]) for x in header["SQ"]])
self.assertEqual(expected_lengths,
dict(zip(read_header.references,
read_header.lengths)))

def test_SAM(self):
self.check_read_write("wh", self.header)

def test_BAM(self):
self.check_read_write("wb", self.header)

def test_CRAM(self):
header = copy.copy(self.header)
# for CRAM, \t needs to be quoted:
header['PG'][1]['CL'] = re.sub(r"\t", r"\\\\t", header['PG'][1]['CL'])
if "PG" in header:
# for CRAM, \t needs to be quoted:
header['PG'][1]['CL'] = re.sub(r"\t", r"\\\\t", header['PG'][1]['CL'])
self.check_read_write("wc", header)


class TestHeaderLargeContigs(TestHeaderWriteRead):
"""see issue 741"""

header = {'SQ': [{'LN': 2147483647, 'SN': 'chr1'},
{'LN': 1584, 'SN': 'chr2'}],
'HD': {'VN': '1.0'}}

0 comments on commit a1b5cef

Please sign in to comment.