Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change SAMTextHeaderCodec to no longer accumulate the entire text of … #1361

Merged
merged 2 commits into from
May 17, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions src/main/java/htsjdk/samtools/BAMFileWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -187,16 +187,12 @@ protected static void writeHeader(final BinaryCodec outputBinaryCodec, final SAM
}

/**
* Writes a header to a BAM file. Might need to regenerate the String version of the header, if one already has both the
* samFileHeader and the String, use the version of this method which takes both.
* Writes a header to a BAM file.
*/
protected static void writeHeader(final BinaryCodec outputBinaryCodec, final SAMFileHeader samFileHeader) {
// Do not use SAMFileHeader.getTextHeader() as it is not updated when changes to the underlying object are made
final String headerString;
final Writer stringWriter = new StringWriter();
new SAMTextHeaderCodec().encode(stringWriter, samFileHeader, true);
headerString = stringWriter.toString();

final String headerString = stringWriter.toString();
writeHeader(outputBinaryCodec, samFileHeader, headerString);
}

Expand Down
22 changes: 4 additions & 18 deletions src/main/java/htsjdk/samtools/SAMFileHeader.java
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,6 @@ public enum GroupOrder {
private final Map<String, SAMProgramRecord> mProgramRecordMap = new HashMap<>();
private SAMSequenceDictionary mSequenceDictionary = new SAMSequenceDictionary();
final private List<String> mComments = new ArrayList<>();
private String textHeader;
private final List<SAMValidationError> mValidationErrors = new ArrayList<>();

public SAMFileHeader() {
Expand Down Expand Up @@ -322,24 +321,11 @@ public void setAttribute(final String key, final String value) {
super.setAttribute(key, tempVal);
}

/**
* If this SAMHeader was read from a file, this property contains the header
* as it appeared in the file, otherwise it is null. Note that this is not a toString()
* operation. Changes to the SAMFileHeader object after reading from the file are not reflected in this value.
*
* In addition this value is only set if one of the following is true:
* - The size of the header is < 1,048,576 characters (1MB ascii, 2MB unicode)
* - There are either validation or parsing errors associated with the header
*
* Invalid header lines may appear in value but are not stored in the SAMFileHeader object.
*/
public String getTextHeader() {
return textHeader;
}
/** @deprecated since May 1st 2019 - text version of header is no longer stored. */
@Deprecated public String getTextHeader() { return null; }

public void setTextHeader(final String textHeader) {
this.textHeader = textHeader;
}
/** @deprecated since May 1st 2019 - text version of header is no longer stored. */
@Deprecated public void setTextHeader(final String textHeader) { }

public List<String> getComments() {
return Collections.unmodifiableList(mComments);
Expand Down
18 changes: 4 additions & 14 deletions src/main/java/htsjdk/samtools/SAMTextHeaderCodec.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
package htsjdk.samtools;

import htsjdk.samtools.SAMFileHeader.SortOrder;
import htsjdk.samtools.SAMValidationError.Type;
import htsjdk.samtools.util.DateParser;
import htsjdk.samtools.util.LineReader;
import htsjdk.samtools.util.RuntimeIOException;
Expand Down Expand Up @@ -55,8 +56,6 @@ public class SAMTextHeaderCodec {
private String mSource;
private List<SAMSequenceRecord> sequences;
private List<SAMReadGroupRecord> readGroups;
// Accumulate header while reading it from input.
private final StringBuilder textHeader = new StringBuilder();

// For error reporting when parsing
private ValidationStringency validationStringency = ValidationStringency.SILENT;
Expand Down Expand Up @@ -124,23 +123,14 @@ public SAMFileHeader decode(final LineReader reader, final String source) {
mFileHeader.setSequenceDictionary(new SAMSequenceDictionary(sequences));
mFileHeader.setReadGroups(readGroups);

// Only store the header text if there was a parsing error or the it's less than 1MB on disk / 2MB in mem
if (!mFileHeader.getValidationErrors().isEmpty() || textHeader.length() < (1024 * 1024)) {
mFileHeader.setTextHeader(textHeader.toString());
}

SAMUtils.processValidationErrors(mFileHeader.getValidationErrors(), -1, validationStringency);
return mFileHeader;
}

private String advanceLine() {
final int nextChar = mReader.peek();
if (nextChar != '@') {
return null;
}
mCurrentLine = mReader.readLine();
textHeader.append(mCurrentLine).append('\n');
return mCurrentLine;
this.mCurrentLine = (nextChar == '@') ? mReader.readLine() : null;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be better if there was a named constant for the magic character/string @.

return this.mCurrentLine;
}

/**
Expand Down Expand Up @@ -547,4 +537,4 @@ public void setValidationStringency(final ValidationStringency validationStringe
}
this.validationStringency = validationStringency;
}
}
}