Skip to content

Commit

Permalink
Re-added the new stratifiers to the this PR branch
Browse files Browse the repository at this point in the history
- This seems simpler as they depend on some indel error features
  • Loading branch information
michaelgatzen committed Aug 7, 2019
1 parent cdf839b commit cde82fb
Show file tree
Hide file tree
Showing 2 changed files with 134 additions and 22 deletions.
114 changes: 113 additions & 1 deletion src/main/java/picard/sam/SamErrorMetric/ReadBaseStratification.java
Original file line number Diff line number Diff line change
Expand Up @@ -483,6 +483,74 @@ public String getSuffix() {
}
}

/**
* Stratifies according to the number of matching cigar operators (from CIGAR string) that the read has.
*/
public static class CigarOperatorsInReadStratifier extends RecordStratifier<Integer> {

private CigarOperator operator;

public CigarOperatorsInReadStratifier(final CigarOperator op) {
operator = op;
}

@Override
public Integer stratify(final SAMRecord samRecord) {
try {
return samRecord.getCigar().getCigarElements().stream()
.filter(ce -> ce.getOperator().equals(operator))
.mapToInt(CigarElement::getLength)
.sum();
} catch (final Exception ex) {
return null;
}
}

@Override
public String getSuffix() {
return "cigar_elements_" + operator.name() + "_in_read";
}
}

/**
* Stratifies according to the number of indel bases (from CIGAR string) that the read has.
*/
public static class IndelsInReadStratifier extends RecordStratifier<Integer> {

@Override
public Integer stratify(final SAMRecord samRecord) {
try {
return samRecord.getCigar().getCigarElements().stream()
.filter(ce -> (ce.getOperator().equals(CigarOperator.I) || ce.getOperator().equals(CigarOperator.D)))
.mapToInt(CigarElement::getLength)
.sum();
} catch (final Exception ex) {
return null;
}
}

@Override
public String getSuffix() {
return "indels_in_read";
}
}

/**
* Stratifies according to the length of an insertion or deletion.
*/
public static class IndelLengthStratifier implements RecordAndOffsetStratifier {

@Override
public Integer stratify(final RecordAndOffset recordAndOffset, final SAMLocusAndReference locusInfo, final CollectSamErrorMetrics.BaseOperation operation) {
return stratifyIndelLength(recordAndOffset, locusInfo, operation);
}

@Override
public String getSuffix() {
return "indel_length";
}
}


/* ************ Instances of stratifiers so that they do not need to be allocated every time ****************/

Expand Down Expand Up @@ -647,6 +715,26 @@ public String getSuffix() {
*/
public static final NsInReadStratifier nsInReadStratifier = new NsInReadStratifier();

/**
* Stratify by Insertions in the read cigars.
*/
public static final CigarOperatorsInReadStratifier insertionsInReadStratifier = new CigarOperatorsInReadStratifier(CigarOperator.I);

/**
* Stratify by Deletions in the read cigars.
*/
public static final CigarOperatorsInReadStratifier deletionsInReadStratifier = new CigarOperatorsInReadStratifier(CigarOperator.D);

/**
* Stratify by Indels in the read cigars.
*/
public static final IndelsInReadStratifier indelsInReadStratifier = new IndelsInReadStratifier();

/**
* Stratifies into the number of bases in an insertion
*/
public static final IndelLengthStratifier indelLengthStratifier = new IndelLengthStratifier();

/* *************** enums **************/

/**
Expand Down Expand Up @@ -685,7 +773,11 @@ enum Stratifier implements CommandLineParser.ClpEnum {
ONE_BASE_PADDED_CONTEXT(() -> oneBasePaddedContextStratifier, "The current reference base and a one base padded region from the read resulting in a 3-base context."),
TWO_BASE_PADDED_CONTEXT(() -> twoBasePaddedContextStratifier, "The current reference base and a two base padded region from the read resulting in a 5-base context."),
CONSENSUS(() -> consensusStratifier, "Whether or not duplicate reads were used to form a consensus read. This stratifier makes use of the aD, bD, and cD tags for duplex consensus reads. If the reads are single index consensus, only the cD tags are used."),
NS_IN_READ(() -> nsInReadStratifier, "The number of Ns in the read.");
NS_IN_READ(() -> nsInReadStratifier, "The number of Ns in the read."),
INSERTIONS_IN_READ(() -> insertionsInReadStratifier, "The number of Insertions in the read cigar."),
DELETIONS_IN_READ(() -> deletionsInReadStratifier, "The number of Deletions in the read cigar."),
INDELS_IN_READ(() -> indelsInReadStratifier, "The number of INDELs in the read cigar."),
INDEL_LENGTH(() -> indelLengthStratifier, "The number of bases in an indel");

private final String docString;

Expand Down Expand Up @@ -1023,6 +1115,26 @@ private static String stratifyReadGroup(final SAMRecord sam) {
return sam.getReadGroup().getReadGroupId();
}

private static Integer stratifyIndelLength(final RecordAndOffset recordAndOffset, final SAMLocusAndReference locusInfo, final CollectSamErrorMetrics.BaseOperation operation) {
// If the base is not an indel, stratify it as a length of 0
if (operation != CollectSamErrorMetrics.BaseOperation.Insertion && operation != CollectSamErrorMetrics.BaseOperation.Deletion) {
return 0;
}

final CigarElement cigarElement = getIndelElement(recordAndOffset, operation);
if (cigarElement == null) {
// No CIGAR operation for given position.
return null;
}
if (operation == CollectSamErrorMetrics.BaseOperation.Insertion && cigarElement.getOperator() != CigarOperator.I) {
throw new IllegalStateException("Wrong CIGAR operator for the given position.");
}
if (operation == CollectSamErrorMetrics.BaseOperation.Deletion && cigarElement.getOperator() != CigarOperator.D) {
throw new IllegalStateException("Wrong CIGAR operator for the given position.");
}
return cigarElement.getLength();
}

public static CigarElement getIndelElement(final RecordAndOffset recordAndOffset, final CollectSamErrorMetrics.BaseOperation operation) {
final SAMRecord record = recordAndOffset.getRecord();
final int offset = recordAndOffset.getOffset();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -599,26 +599,26 @@ public Object[][] provideForTestIndelErrors() {
{ new String[] { "20M2D2I" }, ".indel_error_by_all", new IndelErrorMetric("all", 22, 1, 2, 1,2) },
{ new String[] { "20M2I2D" }, ".indel_error_by_all", new IndelErrorMetric("all", 22, 1, 2, 1,2) },

// // indel_length:
// // insertions
// { new String[] { "50M1I50M" }, ".indel_error_by_indel_length", new IndelErrorMetric("1", 1, 1, 1, 0, 0) },
// { new String[] { "50M2I50M" }, ".indel_error_by_indel_length", new IndelErrorMetric("2", 2, 1, 2, 0, 0) },
// { new String[] { "20M2I20M2I20M" }, ".indel_error_by_indel_length", new IndelErrorMetric("2", 4, 2, 4, 0, 0) },
// { new String[] { "1I10M" }, ".indel_error_by_indel_length", new IndelErrorMetric("1", 1, 1, 1, 0, 0) },
// { new String[] { "10M1I" }, ".indel_error_by_indel_length", new IndelErrorMetric("1", 1, 1, 1, 0, 0) },
// // deletions
// { new String[] { "50M1D50M" }, ".indel_error_by_indel_length", new IndelErrorMetric("1", 0, 0, 0, 1, 1) },
// { new String[] { "50M2D50M" }, ".indel_error_by_indel_length", new IndelErrorMetric("2", 0, 0, 0, 1, 2) },
// { new String[] { "20M2D20M2D20M" }, ".indel_error_by_indel_length", new IndelErrorMetric("2", 0, 0, 0, 2, 4) },
// { new String[] { "1D10M" }, ".indel_error_by_indel_length", new IndelErrorMetric("1", 0, 0, 0, 1, 1) },
// { new String[] { "10M1D" }, ".indel_error_by_indel_length", new IndelErrorMetric("1", 0, 0, 0, 1, 1) },
// // insertions & deletions
// { new String[] { "20M2I20M3D20M" }, ".indel_error_by_indel_length", new IndelErrorMetric("2", 2, 1, 2, 0, 0) },
// { new String[] { "20M2I20M3D20M" }, ".indel_error_by_indel_length", new IndelErrorMetric("3", 0, 0, 0, 1, 3) },
// { new String[] { "2I2D20M" }, ".indel_error_by_indel_length", new IndelErrorMetric("2", 2, 1, 2,1, 2) },
// { new String[] { "2D2I20M" }, ".indel_error_by_indel_length", new IndelErrorMetric("2", 2, 1, 2, 1, 2) },
// { new String[] { "2M2D2I" }, ".indel_error_by_indel_length", new IndelErrorMetric("2", 2, 1, 2, 1, 2) },
// { new String[] { "20M2I2D" }, ".indel_error_by_indel_length", new IndelErrorMetric("2", 2, 1, 2, 1, 2) },
// indel_length:
// insertions
{ new String[] { "50M1I50M" }, ".indel_error_by_indel_length", new IndelErrorMetric("1", 1, 1, 1, 0, 0) },
{ new String[] { "50M2I50M" }, ".indel_error_by_indel_length", new IndelErrorMetric("2", 2, 1, 2, 0, 0) },
{ new String[] { "20M2I20M2I20M" }, ".indel_error_by_indel_length", new IndelErrorMetric("2", 4, 2, 4, 0, 0) },
{ new String[] { "1I10M" }, ".indel_error_by_indel_length", new IndelErrorMetric("1", 1, 1, 1, 0, 0) },
{ new String[] { "10M1I" }, ".indel_error_by_indel_length", new IndelErrorMetric("1", 1, 1, 1, 0, 0) },
// deletions
{ new String[] { "50M1D50M" }, ".indel_error_by_indel_length", new IndelErrorMetric("1", 0, 0, 0, 1, 1) },
{ new String[] { "50M2D50M" }, ".indel_error_by_indel_length", new IndelErrorMetric("2", 0, 0, 0, 1, 2) },
{ new String[] { "20M2D20M2D20M" }, ".indel_error_by_indel_length", new IndelErrorMetric("2", 0, 0, 0, 2, 4) },
{ new String[] { "1D10M" }, ".indel_error_by_indel_length", new IndelErrorMetric("1", 0, 0, 0, 1, 1) },
{ new String[] { "10M1D" }, ".indel_error_by_indel_length", new IndelErrorMetric("1", 0, 0, 0, 1, 1) },
// insertions & deletions
{ new String[] { "20M2I20M3D20M" }, ".indel_error_by_indel_length", new IndelErrorMetric("2", 2, 1, 2, 0, 0) },
{ new String[] { "20M2I20M3D20M" }, ".indel_error_by_indel_length", new IndelErrorMetric("3", 0, 0, 0, 1, 3) },
{ new String[] { "2I2D20M" }, ".indel_error_by_indel_length", new IndelErrorMetric("2", 2, 1, 2,1, 2) },
{ new String[] { "2D2I20M" }, ".indel_error_by_indel_length", new IndelErrorMetric("2", 2, 1, 2, 1, 2) },
{ new String[] { "2M2D2I" }, ".indel_error_by_indel_length", new IndelErrorMetric("2", 2, 1, 2, 1, 2) },
{ new String[] { "20M2I2D" }, ".indel_error_by_indel_length", new IndelErrorMetric("2", 2, 1, 2, 1, 2) },
};
}

Expand Down Expand Up @@ -664,7 +664,7 @@ public void testIndelErrors(final String[] readCigars, final String errorSubscri
"OUTPUT=" + outputBaseFileName,
"REFERENCE_SEQUENCE=" + referenceFile.getAbsolutePath(),
"ERROR_METRICS=" + "INDEL_ERROR",
// "ERROR_METRICS=" + "INDEL_ERROR:INDEL_LENGTH",
"ERROR_METRICS=" + "INDEL_ERROR:INDEL_LENGTH",
"VCF=" + vcf.getAbsolutePath()
};

Expand Down

0 comments on commit cde82fb

Please sign in to comment.