Merge pull request #70 from HadrienG/1.2.1

1.2.1
HadrienG · Sep 19, 2018 · 8194554 · 8194554
2 parents 161a2e0 + 1d0c1b9
commit 8194554
Show file tree

Hide file tree

Showing 11 changed files with 119 additions and 109 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -1,12 +1,13 @@
 language: python
-python:
-    - '2.7'
-    - '3.5'
-    - '3.6'
-    - '3.7-dev'
 matrix:
-    allow_failures:
-        - python: '3.7-dev'
+  include:
+    - python: 2.7
+    - python: 3.4
+    - python: 3.5
+    - python: 3.6
+    - python: 3.7
+      dist: xenial
+      sudo: true
 install:
     - pip install pipenv
     - pipenv install --dev

diff --git a/Pipfile b/Pipfile
@@ -1,27 +1,20 @@
 [[source]]
-
 url = "https://pypi.python.org/simple"
 verify_ssl = true
 name = "pypi"
 
-
 [packages]
-
 future = "*"
 numpy = "*"
 scipy = "*"
 biopython = "*"
-pysam = "*"
 joblib = "*"
-
+pysam = "==0.15.1"
 
 [dev-packages]
-
 nose = "*"
 codecov = "*"
 
-
 [scripts]
-
 iss = "python -m iss"
 tests = "nosetests --with-coverage --cover-package=iss"
diff --git a/Pipfile.lock b/Pipfile.lock
diff --git a/README.md b/README.md
@@ -31,7 +31,7 @@ pip install InSilicoSeq
 Alternatively, with docker:
 
 ```shell
-docker pull hadrieng/insilicoseq:1.2.0
+docker pull hadrieng/insilicoseq:1.2.1
 ```
 
 For more installation options, please refer to the full [documentation](http://insilicoseq.readthedocs.io)
@@ -121,6 +121,6 @@ We welcome contributions from the community! See our [Contributing](CONTRIBUTING
 
 ## Citation
 
-A paper will be on its way. In the meantime if you use InSilicoSeq in your research, please cite the poster
+If you use our software, please cite us!
 
-> Gourlé, Hadrien (2017): Simulating Illumina data with InSilicoSeq. figshare. https://doi.org/10.6084/m9.figshare.5053327.v1
+> Gourlé H, Karlsson-Lindsjö O, Hayer J and Bongcam+Rudloff E, Simulating Illumina data with InSilicoSeq. *Bioinformatics* (2018) doi:10.1093/bioinformatics/bty630
diff --git a/doc/iss/install.rst b/doc/iss/install.rst
@@ -63,15 +63,15 @@ If you wish to use InSilicoSeq using docker
 
 .. code-block:: bash
 
-    docker pull hadrieng/insilicoseq:1.2.0
+    docker pull hadrieng/insilicoseq:1.2.1
 
 To use InSilicoSeq with docker, you need to provide a `volume` to the ``docker run`` command.
 Given with the ``-v`` option, the volume is your way of exchanging data (in this case, your input and output files) with the docker container.
 
 .. code-block:: bash
 
     docker run -v /Users/hadrien/data:/mnt/data -it --rm \
-        hadrieng/insilicoseq:1.2.0 iss generate \
+        hadrieng/insilicoseq:1.2.1 iss generate \
         --genomes /mnt/data/genomes.fasta -m miseq \
         -o /mnt/data/reads
 

diff --git a/iss/bam.py b/iss/bam.py
@@ -35,7 +35,8 @@ def read_bam(bam_file, n_reads=1000000):
         random_fraction = n_reads / total_records
         bam = pysam.AlignmentFile(bam_file, 'rb')  # reopen the file
 
-    except (IOError, ValueError, pysam.utils.SamtoolsError) as e:
+    except (IOError, ValueError,
+            ZeroDivisionError, pysam.utils.SamtoolsError) as e:
         logger.error('Failed to read bam file: %s' % e)
         sys.exit(1)
     else:
@@ -197,7 +198,13 @@ def to_model(bam_path, output):
 
     hists_f = modeller.quality_bins_to_histogram(quality_bins_f)
     hists_r = modeller.quality_bins_to_histogram(quality_bins_r)
-    read_length = len(hists_f[-1])  # the first low quality bin might be empty
+
+    # modern illumina instruments return reads of the same length
+    # in case our bam file contains aligned reads of different length,
+    # we coerce the model's read length to the smallest read of the bam file
+    length_forward = min((len(x) for x in hists_f if len(x) > 1))
+    length_reverse = min((len(x) for x in hists_r if len(x) > 1))
+    read_length = min(length_forward, length_reverse)
 
     # now we can resize the substitution and indel matrices before
     # doing operations on them

diff --git a/iss/error_models/kde.py b/iss/error_models/kde.py
@@ -86,7 +86,7 @@ def gen_phred_scores(self, cdfs, orientation):
         for cdf in cdfs_bin:
             random_quality = np.searchsorted(cdf, np.random.rand())
             phred_list.append(random_quality)
-        return phred_list
+        return phred_list[:self.read_length]
 
     def random_insert_size(self):
         """Draw a random insert size from the insert size cdf

diff --git a/iss/generator.py b/iss/generator.py
@@ -70,7 +70,8 @@ def reads(record, ErrorModel, n_pairs, cpu_number, output, gc_bias=False):
 
 
 def simulate_read(record, ErrorModel, i):
-    """From a read pair from one genome (or sequence) according to an ErrorModel
+    """From a read pair from one genome (or sequence) according to an
+    ErrorModel
 
     Each read is a SeqRecord object
     returns a tuple containing the forward and reverse read.

diff --git a/iss/modeller.py b/iss/modeller.py
@@ -79,7 +79,7 @@ def quality_bins_to_histogram(bin_lists):
     cdf_bins = []
     i = 0
     for qual_bin in bin_lists:
-        if len(qual_bin) > 0:
+        if len(qual_bin) > 1:
             logger.debug('Transposing matrix for mean cluster #%s' % i)
             # quals = np.asarray(qual_bin).T  # seems to make clunkier models
             quals = [q for q in zip(*qual_bin)]
@@ -88,7 +88,7 @@ def quality_bins_to_histogram(bin_lists):
             cdfs_list = raw_qualities_to_histogram(quals)
             cdf_bins.append(cdfs_list)
         else:
-            logger.debug('Mean quality bin #%s of length 0. Skipping' % i)
+            logger.debug('Mean quality bin #%s of length < 1. Skipping' % i)
             cdf_bins.append([])
         i += 1
     return cdf_bins
@@ -327,6 +327,9 @@ def dispatch_indels(read):
                 #     '%s not in dispatch: %s' % (deletion, e), exc_info=True)
                 position -= cigar_length
                 continue
+        else:
+            logger.debug("CIGAR %s. Skipping read." % cigar_type)
+            continue
         yield dispatch_tuple
 
 

diff --git a/iss/version.py b/iss/version.py
@@ -1 +1 @@
-__version__ = '1.2.0'
+__version__ = '1.2.1'
diff --git a/setup.py b/setup.py
@@ -28,7 +28,7 @@
     packages=find_packages(),
 
     tests_require=['nose'],
-    install_requires=['numpy', 'scipy', 'biopython', 'pysam', 'future',
+    install_requires=['numpy', 'scipy', 'biopython', 'pysam>=0.15.1', 'future',
                       'joblib'],
     include_package_data=True,