Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MsPathFinderT result reading #775

Merged
merged 16 commits into from
Jul 19, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion mzLib/Chemistry/ChemicalFormula.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,11 @@
using MzLibUtil;
using System;
using System.Collections.Generic;
using System.ComponentModel.DataAnnotations.Schema;
using System.Globalization;
using System.Linq;
using System.Text;
using System.Text.Json.Serialization;
using System.Text.RegularExpressions;

namespace Chemistry
Expand Down Expand Up @@ -65,7 +67,7 @@ public ChemicalFormula(IHasChemicalFormula capFormula)
Elements = new Dictionary<Element, int>(capFormula.ThisChemicalFormula.Elements);
}

public ChemicalFormula ThisChemicalFormula => this;
[JsonIgnore] public ChemicalFormula ThisChemicalFormula => this;

/// <summary>
/// Gets the average mass of this chemical formula
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
using CsvHelper.Configuration.Attributes;
using CsvHelper.Configuration;
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Easy.Common.Extensions;

namespace Readers
{
public class MsFraggerPeptide
{
public static CsvConfiguration CsvConfiguration = new CsvConfiguration(CultureInfo.InvariantCulture)
{
Delimiter = "\t",
HasHeaderRecord = true,
IgnoreBlankLines = true,
TrimOptions = TrimOptions.Trim,
BadDataFound = null,
MissingFieldFound = null,
HeaderValidated = null,
};

[Name("Peptide", "Sequence")] public string BaseSequence { get; set; }

[Name("Prev AA")] [Optional] public char PreviousAminoAcid { get; set; }

[Name("Next AA")] [Optional] public char NextAminoAcid { get; set; }

[Ignore] private int _peptideLength;

[Name("Peptide Length")]
[Optional]
public int PeptideLength
{
get => _peptideLength.IsDefault() ? BaseSequence.Length : _peptideLength;
set => _peptideLength = value;
}

[Name("Protein Start")] [Optional] public int OneBasedStartResidueInProtein { get; set; }

[Name("Protein End")] [Optional] public int OneBasedEndResidueInProtein { get; set; }

[Name("Charges", "Charge States")]
[TypeConverter(typeof(CommaDelimitedToIntegerArrayTypeConverter))]
public int[] Charge { get; set; }

[Name("Probability")] public double Probability { get; set; }

[Name("Spectral Count")] [Optional] public int SpectralCount { get; set; }

[Name("Intensity")] [Optional] public double Intensity { get; set; }

[Name("Assigned Modifications")]
[TypeConverter(typeof(CommaDelimitedToStringArrayTypeConverter))]
public string[] AssignedModifications { get; set; }

[Name("Observed Modifications")]
[Optional]
[TypeConverter(typeof(CommaDelimitedToStringArrayTypeConverter))]
public string[] ObservedModifications { get; set; }

[Name("Protein")] public string Protein { get; set; }

[Name("Protein ID")] [Optional] public string ProteinAccession { get; set; }

[Ignore] private string _proteinName;

[Name("Entry Name")]
[Optional]
public string ProteinName
{
get => _proteinName.IsDefault() ? Protein.Split('|').Last().Trim() : _proteinName;
set => _proteinName = value;
}

[Name("Gene")]
public string Gene { get; set; }

[Name("Protein Description")]
public string ProteinDescription { get; set; }

[Name("Mapped Genes")]
[Optional]
[TypeConverter(typeof(CommaDelimitedToStringArrayTypeConverter))]
public string[] MappedGenes { get; set; }

[Name("Mapped Proteins")]
[Optional]
[TypeConverter(typeof(CommaDelimitedToStringArrayTypeConverter))]
public string[] MappedProteins { get; set; }
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
using CsvHelper.Configuration.Attributes;
using CsvHelper.Configuration;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace Readers
{
public class MsFraggerProtein
{
public static CsvConfiguration CsvConfiguration => new CsvConfiguration(System.Globalization.CultureInfo.InvariantCulture)
{
Delimiter = "\t",
HasHeaderRecord = true,
IgnoreBlankLines = true,
TrimOptions = TrimOptions.Trim,
BadDataFound = null,
MissingFieldFound = null,
};

[Name("Protein")]
public string Protein { get; set; }

[Name("Protein ID")]
public string Accession { get; set; }

[Name("Entry Name")]
public string AccessionOrganism { get; set; }

[Name("Gene")]
public string Gene { get; set; }

[Name("Length", "Protein Length")]
public int Length { get; set; }

[Name("Organism")]
public string Organism { get; set; }

[Name("Protein Description", "Description")]
public string Description { get; set; }

[Name("Protein Existence")]
public string ProteinExistence { get; set; }

[Name("Coverage")]
[Optional]
public double Coverage { get; set; }

[Name("Protein Probability")]
public double ProteinProbability { get; set; }

[Name("Top Peptide Probability")]
public double TopPeptideProbability { get; set; }

[Name("Total Peptides", "Combined Total Peptides")]
public int TotalPeptides { get; set; }

[Name("Unique Peptides")]
[Optional]
public int UniquePeptides { get; set; }

[Name("Razor Peptides")]
[Optional]
public int RazorPeptides { get; set; }

[Name("Total Spectral Count", "Combined Total Spectral Count")]
public int TotalSpectralCount { get; set; }

[Name("Unique Spectral Count", "Combined Unique Spectral Count")]
public int UniqueSpectralCount { get; set; }

[Name("Razor Spectral Count")]
[Optional]
public int RazorSpectralCount { get; set; }

[Name("Total Intensity")]
[Optional]
public double TotalIntensity { get; set; }

[Name("Unique Intensity")]
[Optional]
public double UniqueIntensity { get; set; }

[Name("Razor Intensity")]
[Optional]
public double RazorIntensity { get; set; }

[Name("Razor Assigned Modifications")]
[TypeConverter(typeof(CommaDelimitedToStringArrayTypeConverter))]
[Optional]
public string[] RazorAssignedModifications { get; set; }

[Name("Razor Observed Modifications")]
[Optional]
[TypeConverter(typeof(CommaDelimitedToStringArrayTypeConverter))]
public string[] RazorObservedModifications { get; set; }

[Name("Indistinguishable Proteins")]
[TypeConverter(typeof(CommaDelimitedToStringArrayTypeConverter))]
public string[] IndistinguishableProteins { get; set; }

public MsFraggerProtein()
{
RazorAssignedModifications = new string[0];
RazorObservedModifications = new string[0];
IndistinguishableProteins = new string[0];
}
}
}
159 changes: 159 additions & 0 deletions mzLib/Readers/ExternalResults/IndividualResultRecords/MsFraggerPsm.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Runtime.InteropServices;
using System.Text;
using System.Threading.Tasks;
using System.Xml.Linq;
using CsvHelper.Configuration;
using CsvHelper.Configuration.Attributes;
using MassSpectrometry;
using Omics.Modifications;
using Proteomics.AminoAcidPolymer;
using Proteomics;
using static System.Net.Mime.MediaTypeNames;
using ThermoFisher.CommonCore.Data.Interfaces;

namespace Readers
{
public class MsFraggerPsm
{
public static CsvConfiguration CsvConfiguration = new CsvConfiguration(CultureInfo.InvariantCulture)
{
Delimiter = "\t",
HasHeaderRecord = true,
IgnoreBlankLines = true,
TrimOptions = TrimOptions.Trim,
BadDataFound = null,
};

#region MsFragger Fields

[Name("Spectrum")]
public string Spectrum { get; set; }

[Name("Spectrum File")]
public string SpectrumFilePath { get; set; }

[Name("Peptide")]
public string BaseSequence { get; set; }

[Name("Modified Peptide")]
public string FullSequence { get; set; }

[Name("Extended Peptide")]
public string ExtendedSequence { get; set; }

[Name("Prev AA")]
public char PreviousAminoAcid { get; set; }

[Name("Next AA")]
public char NextAminoAcid { get; set; }

[Name("Peptide Length")]
public int PeptideLength { get; set; }

[Name("Charge")]
public int Charge { get; set; }

[Name("Retention")]
public double RetentionTime { get; set; }

[Name("Observed Mass")]
public double ObservedMass { get; set; }

[Name("Calibrated Observed Mass")]
public double CalibratedObservedMass { get; set; }

[Name("Observed M/Z")]
public double ObservedMz { get; set; }

[Name("Calibrated Observed M/Z")]
public double CalibratedObservedMz { get; set; }

[Name("Calculated Peptide Mass")]
public double CalculatedPeptideMass { get; set; }

[Name("Calculated M/Z")]
public double CalculatedMz { get; set; }

[Name("Delta Mass")]
public double DeltaMass { get; set; }

[Name("Expectation")]
public double Expectation { get; set; }

[Name("Hyperscore")]
public double HyperScore { get; set; }

[Name("Nextscore")]
public double NextScore { get; set; }

[Name("PeptideProphet Probability")]
public double PeptideProphetProbability { get; set; }

[Name("Number of Enzymatic Termini")]
public int NumberOfEnzymaticTermini { get; set; }

[Name("Number of Missed Cleavages")]
public int NumberOfMissedCleavages { get; set; }

[Name("Protein Start")]
public int ProteinStart { get; set; }

[Name("Protein End")]
public int ProteinEnd { get; set; }

[Name("Intensity")]
public double Intensity { get; set; }

[Name("Assigned Modifications")]
public string AssignedModifications { get; set; }

[Name("Observed Modifications")]
public string ObservedModifications { get; set; }

[Name("Purity")]
public double Purity { get; set; }

[Name("Is Unique")]
public bool IsUnique { get; set; }

[Name("Protein")]
public string Protein { get; set; }

[Name("Protein ID")]
public string ProteinAccession { get; set; }

[Name("Entry Name")]
public string EntryName { get; set; }

[Name("Gene")]
public string Gene { get; set; }

[Name("Protein Description")]
public string ProteinDescription { get; set; }

[Name("Mapped Genes")]
public string MappedGenes { get; set; }

[Name("Mapped Proteins")]
public string MappedProteins { get; set; }

#endregion

#region Interpreted Fields

[Ignore] private string _fileNameWithoutExtension;
[Ignore] public string FileNameWithoutExtension =>
_fileNameWithoutExtension ??= Spectrum.Split('.')[0];

[Ignore] private int? _oneBasedScanNumber;

[Ignore]
public int OneBasedScanNumber => _oneBasedScanNumber ??= int.Parse(Spectrum.Split('.')[1]);

#endregion
}
}
Loading
Loading