Feature/remove nnls patch (#43)

* ✨ remove patched nnls (using fixed scipy version now) * 💄 linting; line breaks Co-authored-by: Elmar Zander <[email protected]>
wfondrie · Jul 29, 2024 · 653179f · 653179f
1 parent 3d0e592
commit 653179f
Show file tree

Hide file tree

Showing 7 changed files with 33 additions and 208 deletions.
diff --git a/mokapot/_nnls.py b/mokapot/_nnls.py
diff --git a/mokapot/confidence_writer.py b/mokapot/confidence_writer.py
@@ -80,13 +80,18 @@ def write_confidences(
     data_iterator : Iterator[pd.DataFrame]
         An iterator that yields chunks of data as pandas DataFrames.
     q_value_iterator : Iterable[np.array]
-        A iterator that yields numpy arrays containing the q-values for each data chunk.
+        A iterator that yields numpy arrays containing the q-values for each
+        data chunk.
     pep_iterator : Iterable[np.array]
-        A iterator that yields numpy arrays containing the posterior error probabilities for each data chunk.
+        A iterator that yields numpy arrays containing the posterior error
+        probabilities for each data chunk.
     target_iterator : Iterable[np.array]
-        A iterator that yields numpy arrays indicating whether each data point is a target or decoy for each data chunk.
+        A iterator that yields numpy arrays indicating whether each data point
+        is a target or decoy for each data chunk.
     out_paths : list[Path]
-        A list of output file paths where the confidence data will be written. The first element contains the path for the targets and the second those for the decoys.
+        A list of output file paths where the confidence data will be written.
+        The first element contains the path for the targets and the second
+        those for the decoys.
     decoys : bool
         A boolean flag indicating whether to include decoy data in the output.
     level : str
@@ -96,7 +101,8 @@ def write_confidences(
     qvalue_column : str, optional
         The name of the column to store the q-values. Default is 'q_value'.
     pep_column : str, optional
-        The name of the column to store the posterior error probabilities. Default is 'posterior_error_prob'.
+        The name of the column to store the posterior error probabilities.
+        Default is 'posterior_error_prob'.
 
     Returns
     -------

diff --git a/mokapot/config.py b/mokapot/config.py
@@ -390,9 +390,9 @@ def _parser():
         "--sqlite_db_path",
         default=None,
         type=Path,
-        help="Optionally, sets a path to an MSAID sqlite result database for writing "
-             "outputs to. If not set (None), results are written in the standard TSV "
-             "format.",
+        help="Optionally, sets a path to an MSAID sqlite result database "
+             "for writing outputs to. If not set (None), results are "
+             "written in the standard TSV format.",
     )
 
     return parser

diff --git a/mokapot/peps.py b/mokapot/peps.py
@@ -3,11 +3,7 @@
 import matplotlib.pyplot as plt
 from triqler import qvality
 
-# TODO: Remove the next and uncomment the 2nd next line when
-#  scipy.optimize.nnls is fixed (see _nnls.py for explanation)
-from ._nnls import nnls
-
-# from scipy.optimize import nnls
+from scipy.optimize import nnls
 
 PEP_ALGORITHM = {
     "qvality": lambda scores, targets: peps_from_scores_qvality(

diff --git a/mokapot/streaming.py b/mokapot/streaming.py
@@ -151,10 +151,10 @@ def get_chunked_data_iterator(
 @typechecked
 class MergedTabularDataReader(TabularDataReader):
     """
-    Merges data from multiple tabular data sources vertically into a single data
-    source, ordering the rows (one by one) by the value of a priority column.
-    I.e. for each output row, the row of the input readers with the highest
-    value of the priority column is picked.
+    Merges data from multiple tabular data sources vertically into a single
+    data source, ordering the rows (one by one) by the value of a priority
+    column. I.e. for each output row, the row of the input readers with the
+    highest value of the priority column is picked.
 
     Attributes:
     -----------

diff --git a/mokapot/tabular_data.py b/mokapot/tabular_data.py
@@ -53,8 +53,8 @@ def get_score_column_type(suffix):
 @typechecked
 class TabularDataReader(ABC):
     """
-    An abstract class that represents a source for tabular data that can be read
-    in either completely or chunk-wise.
+    An abstract class that represents a source for tabular data that can be
+    read in either completely or chunk-wise.
 
     Implementations can be classes that either read from files, from memory
     (e.g. data frames), combine or modify other readers or represent computed
@@ -118,7 +118,8 @@ class ColumnMappedReader(TabularDataReader):
         reader : TabularDataReader
             The underlying reader for the original data.
         column_map : dict[str, str]
-            A dictionary that maps the original column names to the new column names.
+            A dictionary that maps the original column names to the new
+            column names.
     """
     def __init__(self, reader: TabularDataReader, column_map: dict[str, str]):
         self.reader = reader
@@ -437,8 +438,9 @@ def auto_finalize(writers: list[TabularDataWriter]):
 @typechecked
 class BufferedWriter(TabularDataWriter):
     """
-    This class represents a buffered writer for tabular data. It allows writing data to a tabular data writer in
-    batches, reducing the number of write operations.
+    This class represents a buffered writer for tabular data. It allows
+    writing data to a tabular data writer in batches, reducing the
+    number of write operations.
 
     Attributes:
     -----------
@@ -447,9 +449,11 @@ class BufferedWriter(TabularDataWriter):
     buffer_size : int
         The number of records to buffer before writing to the writer.
     buffer_type : TableType
-        The type of buffer being used. Can be one of TableType.DataFrame, TableType.Dicts, or TableType.Records.
+        The type of buffer being used. Can be one of TableType.DataFrame,
+        TableType.Dicts, or TableType.Records.
     buffer : pd.DataFrame or list of dictionaries or np.recarray or None
-        The buffer containing the tabular data to be written. The buffer type depends on the buffer_type attribute.
+        The buffer containing the tabular data to be written.
+        The buffer type depends on the buffer_type attribute.
     """
     writer: TabularDataWriter
     buffer_size: int
@@ -554,7 +558,8 @@ class CSVFileWriter(TabularDataWriter):
         The file path where the CSV file will be written.
 
     sep : str, optional
-        The separator string used to separate fields in the CSV file. Default is tab character ("\t").
+        The separator string used to separate fields in the CSV file.
+        Default is tab character ("\t").
     """
     file_name: Path
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -26,6 +26,7 @@ dependencies = [
     "importlib-metadata>=5.1.0",
     "typeguard>=4.1.5",
     "pyarrow>=15.0.0",
+    "scipy>=1.13.0",
 ]
 dynamic = ["version"]