Skip to content

Commit

Permalink
Added TransientAttributes to GATKRead api (#5664)
Browse files Browse the repository at this point in the history
  • Loading branch information
jamesemery authored Apr 22, 2019
1 parent 2f39f26 commit aa19925
Show file tree
Hide file tree
Showing 6 changed files with 56 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ public static JavaRDD<GATKRead> mark(final JavaRDD<GATKRead> reads, final SAMFil
if (markUnmappedMates || !read.isUnmapped()) {
int dupCount = namesOfNonDuplicateReadsAndOpticalCounts.replace(read.getName(), NO_OPTICAL_MARKER);
if (dupCount > -1) {
((SAMRecordToGATKReadAdapter) read).setTransientAttribute(MarkDuplicatesSparkUtils.OPTICAL_DUPLICATE_TOTAL_ATTRIBUTE_NAME, dupCount);
read.setTransientAttribute(MarkDuplicatesSparkUtils.OPTICAL_DUPLICATE_TOTAL_ATTRIBUTE_NAME, dupCount);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -406,10 +406,10 @@ static JavaPairRDD<String, GATKDuplicationMetrics> generateMetrics(final SAMFile
metrics.updateMetrics(read);
// NOTE: we use the SAMRecord transientAttribute field here specifically to prevent the already
// serialized read from being parsed again here for performance reasons.
if (((SAMRecordToGATKReadAdapter) read).getTransientAttribute(OPTICAL_DUPLICATE_TOTAL_ATTRIBUTE_NAME)!=null) {
if (read.getTransientAttribute(OPTICAL_DUPLICATE_TOTAL_ATTRIBUTE_NAME)!=null) {
// NOTE: there is a safety check above in getReadsGroupedByName()
metrics.READ_PAIR_OPTICAL_DUPLICATES +=
(int)((SAMRecordToGATKReadAdapter) read).getTransientAttribute(OPTICAL_DUPLICATE_TOTAL_ATTRIBUTE_NAME);
(int)(read.getTransientAttribute(OPTICAL_DUPLICATE_TOTAL_ATTRIBUTE_NAME));
}
return new Tuple2<>(library, metrics);
})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -505,7 +505,7 @@ private static boolean readHasNoPlausibleIdealsOfSize(final GATKRead read,
final int refStart,
final int maxIndelSize,
final boolean useCachedResults) {
BitSet cachedResult = (BitSet) ((SAMRecordToGATKReadAdapter)read).getTransientAttribute(INDEL_INFORMATIVE_BASES_CACHE_ATTRIBUTE_NAME);
BitSet cachedResult = (BitSet) read.getTransientAttribute(INDEL_INFORMATIVE_BASES_CACHE_ATTRIBUTE_NAME);
if (cachedResult == null || !useCachedResults) {
Utils.validate(readStart >= 0, "readStart must >= 0");
Utils.validate(refStart >= 0, "refStart must >= 0");
Expand Down Expand Up @@ -595,7 +595,7 @@ private static boolean readHasNoPlausibleIdealsOfSize(final GATKRead read,
}
}
cachedResult = informativeBases;
((SAMRecordToGATKReadAdapter)read).setTransientAttribute(INDEL_INFORMATIVE_BASES_CACHE_ATTRIBUTE_NAME, informativeBases);
read.setTransientAttribute(INDEL_INFORMATIVE_BASES_CACHE_ATTRIBUTE_NAME, informativeBases);
}
return cachedResult.get(readStart);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -667,6 +667,14 @@ default int numCigarElements(){
*/
byte[] getAttributeAsByteArray( final String attributeName );

/**
* This is used to access a transient attribute store provided by the underlying implementation. Transient attributes will not be serialized or written out with a record.
*
* NOTE: This is an advanced use case for GATKRead and you should probably use getAttribute() instead
* @param key key whose value is to be stored
*/
Object getTransientAttribute(final Object key);

/**
* Set an integer-valued attribute on the read.
*
Expand Down Expand Up @@ -694,6 +702,16 @@ default int numCigarElements(){
*/
void setAttribute( final String attributeName, final byte[] attributeValue );

/**
* This is used to access the transient attribute store in the underlying data type. This is used to store temporary
* attributes and cached data that will not be serialized or written out as a record.
*
* NOTE: This is an advanced use case for GATKRead and you should probably use setAttribute() instead
* @param key key under which the value will be stored
* @param value value to store
*/
public void setTransientAttribute(final Object key, final Object value);

/**
* Clear an individual attribute on the read.
*
Expand All @@ -707,6 +725,14 @@ default int numCigarElements(){
*/
void clearAttributes();

/**
* Clear an individual transient attribute on the read.
*
* @param attributeName Name of the attribute to clear. Must be legal according to {@link ReadUtils#assertAttributeNameIsLegal}
* @throws IllegalArgumentException if the attribute name is illegal according to {@link ReadUtils#assertAttributeNameIsLegal}
*/
void clearTransientAttribute( final String attributeName );

/**
* Return a copy of this read.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -698,6 +698,12 @@ public void clearAttributes() {
samRecord.clearAttributes();
}

@Override
public void clearTransientAttribute( final String attributeName ) {
clearCachedValues();
samRecord.removeTransientAttribute(attributeName);
}

@Override
public GATKRead copy() {
// Produces a shallow but "safe to use" copy.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,23 @@ public void testChangingContigsOnHeaderlessGATKRead(){
final GATKRead roundTrippedRead2 = SparkTestUtils.roundTripInKryo(read, GATKRead.class, conf);
Assert.assertEquals(roundTrippedRead2, read);
}

@Test
public void testTransientAttributeSerializationClearing(){
final SparkConf conf = new SparkConf().set("spark.kryo.registrator",
"org.broadinstitute.hellbender.engine.spark.SAMRecordToGATKReadAdapterSerializerUnitTest$TestGATKRegistrator");
final GATKRead read = ArtificialReadUtils.createHeaderlessSamBackedRead("read1", "1", 100, 50);
read.setTransientAttribute("test",1);
read.setTransientAttribute("removed",2);
read.clearTransientAttribute("removed");

Assert.assertEquals(read.getTransientAttribute("test"), 1);
Assert.assertNull(read.getTransientAttribute("removed"));

// Round tripping the read should cause it to be serialized and consequently not bring the transient attributes with it
final GATKRead roundTrippedRead = SparkTestUtils.roundTripInKryo(read, GATKRead.class, conf);

Assert.assertNull(roundTrippedRead.getTransientAttribute("test"));
Assert.assertNull(roundTrippedRead.getTransientAttribute("removed"));
}
}

0 comments on commit aa19925

Please sign in to comment.