Skip to content

Commit

Permalink
Feature/remove nnls patch (#43)
Browse files Browse the repository at this point in the history
* ✨ remove patched nnls (using fixed scipy version now)
* 💄 linting; line breaks

Co-authored-by: Elmar Zander <[email protected]>
  • Loading branch information
gessulat and ezander authored Jul 29, 2024
1 parent 3d0e592 commit 653179f
Show file tree
Hide file tree
Showing 7 changed files with 33 additions and 208 deletions.
183 changes: 0 additions & 183 deletions mokapot/_nnls.py

This file was deleted.

16 changes: 11 additions & 5 deletions mokapot/confidence_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,13 +80,18 @@ def write_confidences(
data_iterator : Iterator[pd.DataFrame]
An iterator that yields chunks of data as pandas DataFrames.
q_value_iterator : Iterable[np.array]
A iterator that yields numpy arrays containing the q-values for each data chunk.
A iterator that yields numpy arrays containing the q-values for each
data chunk.
pep_iterator : Iterable[np.array]
A iterator that yields numpy arrays containing the posterior error probabilities for each data chunk.
A iterator that yields numpy arrays containing the posterior error
probabilities for each data chunk.
target_iterator : Iterable[np.array]
A iterator that yields numpy arrays indicating whether each data point is a target or decoy for each data chunk.
A iterator that yields numpy arrays indicating whether each data point
is a target or decoy for each data chunk.
out_paths : list[Path]
A list of output file paths where the confidence data will be written. The first element contains the path for the targets and the second those for the decoys.
A list of output file paths where the confidence data will be written.
The first element contains the path for the targets and the second
those for the decoys.
decoys : bool
A boolean flag indicating whether to include decoy data in the output.
level : str
Expand All @@ -96,7 +101,8 @@ def write_confidences(
qvalue_column : str, optional
The name of the column to store the q-values. Default is 'q_value'.
pep_column : str, optional
The name of the column to store the posterior error probabilities. Default is 'posterior_error_prob'.
The name of the column to store the posterior error probabilities.
Default is 'posterior_error_prob'.
Returns
-------
Expand Down
6 changes: 3 additions & 3 deletions mokapot/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,9 +390,9 @@ def _parser():
"--sqlite_db_path",
default=None,
type=Path,
help="Optionally, sets a path to an MSAID sqlite result database for writing "
"outputs to. If not set (None), results are written in the standard TSV "
"format.",
help="Optionally, sets a path to an MSAID sqlite result database "
"for writing outputs to. If not set (None), results are "
"written in the standard TSV format.",
)

return parser
Expand Down
6 changes: 1 addition & 5 deletions mokapot/peps.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,7 @@
import matplotlib.pyplot as plt
from triqler import qvality

# TODO: Remove the next and uncomment the 2nd next line when
# scipy.optimize.nnls is fixed (see _nnls.py for explanation)
from ._nnls import nnls

# from scipy.optimize import nnls
from scipy.optimize import nnls

PEP_ALGORITHM = {
"qvality": lambda scores, targets: peps_from_scores_qvality(
Expand Down
8 changes: 4 additions & 4 deletions mokapot/streaming.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,10 +151,10 @@ def get_chunked_data_iterator(
@typechecked
class MergedTabularDataReader(TabularDataReader):
"""
Merges data from multiple tabular data sources vertically into a single data
source, ordering the rows (one by one) by the value of a priority column.
I.e. for each output row, the row of the input readers with the highest
value of the priority column is picked.
Merges data from multiple tabular data sources vertically into a single
data source, ordering the rows (one by one) by the value of a priority
column. I.e. for each output row, the row of the input readers with the
highest value of the priority column is picked.
Attributes:
-----------
Expand Down
21 changes: 13 additions & 8 deletions mokapot/tabular_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ def get_score_column_type(suffix):
@typechecked
class TabularDataReader(ABC):
"""
An abstract class that represents a source for tabular data that can be read
in either completely or chunk-wise.
An abstract class that represents a source for tabular data that can be
read in either completely or chunk-wise.
Implementations can be classes that either read from files, from memory
(e.g. data frames), combine or modify other readers or represent computed
Expand Down Expand Up @@ -118,7 +118,8 @@ class ColumnMappedReader(TabularDataReader):
reader : TabularDataReader
The underlying reader for the original data.
column_map : dict[str, str]
A dictionary that maps the original column names to the new column names.
A dictionary that maps the original column names to the new
column names.
"""
def __init__(self, reader: TabularDataReader, column_map: dict[str, str]):
self.reader = reader
Expand Down Expand Up @@ -437,8 +438,9 @@ def auto_finalize(writers: list[TabularDataWriter]):
@typechecked
class BufferedWriter(TabularDataWriter):
"""
This class represents a buffered writer for tabular data. It allows writing data to a tabular data writer in
batches, reducing the number of write operations.
This class represents a buffered writer for tabular data. It allows
writing data to a tabular data writer in batches, reducing the
number of write operations.
Attributes:
-----------
Expand All @@ -447,9 +449,11 @@ class BufferedWriter(TabularDataWriter):
buffer_size : int
The number of records to buffer before writing to the writer.
buffer_type : TableType
The type of buffer being used. Can be one of TableType.DataFrame, TableType.Dicts, or TableType.Records.
The type of buffer being used. Can be one of TableType.DataFrame,
TableType.Dicts, or TableType.Records.
buffer : pd.DataFrame or list of dictionaries or np.recarray or None
The buffer containing the tabular data to be written. The buffer type depends on the buffer_type attribute.
The buffer containing the tabular data to be written.
The buffer type depends on the buffer_type attribute.
"""
writer: TabularDataWriter
buffer_size: int
Expand Down Expand Up @@ -554,7 +558,8 @@ class CSVFileWriter(TabularDataWriter):
The file path where the CSV file will be written.
sep : str, optional
The separator string used to separate fields in the CSV file. Default is tab character ("\t").
The separator string used to separate fields in the CSV file.
Default is tab character ("\t").
"""
file_name: Path

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ dependencies = [
"importlib-metadata>=5.1.0",
"typeguard>=4.1.5",
"pyarrow>=15.0.0",
"scipy>=1.13.0",
]
dynamic = ["version"]

Expand Down

0 comments on commit 653179f

Please sign in to comment.