From 660f5686e87e9d9164c3537b42d94e546d88461c Mon Sep 17 00:00:00 2001 From: tfenne Date: Wed, 1 May 2019 14:30:51 -0600 Subject: [PATCH 1/2] Change SAMTextHeaderCodec to no longer accumulate the entire text of the header into memory. --- .../java/htsjdk/samtools/SAMFileHeader.java | 22 ++++--------------- .../htsjdk/samtools/SAMTextHeaderCodec.java | 18 ++++----------- 2 files changed, 8 insertions(+), 32 deletions(-) diff --git a/src/main/java/htsjdk/samtools/SAMFileHeader.java b/src/main/java/htsjdk/samtools/SAMFileHeader.java index fced09a96c..a0d7d070a1 100644 --- a/src/main/java/htsjdk/samtools/SAMFileHeader.java +++ b/src/main/java/htsjdk/samtools/SAMFileHeader.java @@ -101,7 +101,6 @@ public enum GroupOrder { private final Map mProgramRecordMap = new HashMap<>(); private SAMSequenceDictionary mSequenceDictionary = new SAMSequenceDictionary(); final private List mComments = new ArrayList<>(); - private String textHeader; private final List mValidationErrors = new ArrayList<>(); public SAMFileHeader() { @@ -322,24 +321,11 @@ public void setAttribute(final String key, final String value) { super.setAttribute(key, tempVal); } - /** - * If this SAMHeader was read from a file, this property contains the header - * as it appeared in the file, otherwise it is null. Note that this is not a toString() - * operation. Changes to the SAMFileHeader object after reading from the file are not reflected in this value. - * - * In addition this value is only set if one of the following is true: - * - The size of the header is < 1,048,576 characters (1MB ascii, 2MB unicode) - * - There are either validation or parsing errors associated with the header - * - * Invalid header lines may appear in value but are not stored in the SAMFileHeader object. - */ - public String getTextHeader() { - return textHeader; - } + /** @deprecated since May 1st 2019 - text version of header is no longer stored. */ + @Deprecated public String getTextHeader() { return null; } - public void setTextHeader(final String textHeader) { - this.textHeader = textHeader; - } + /** @deprecated since May 1st 2019 - text version of header is no longer stored. */ + @Deprecated public void setTextHeader(final String textHeader) { } public List getComments() { return Collections.unmodifiableList(mComments); diff --git a/src/main/java/htsjdk/samtools/SAMTextHeaderCodec.java b/src/main/java/htsjdk/samtools/SAMTextHeaderCodec.java index 11d5dfd71a..d29365369d 100644 --- a/src/main/java/htsjdk/samtools/SAMTextHeaderCodec.java +++ b/src/main/java/htsjdk/samtools/SAMTextHeaderCodec.java @@ -24,6 +24,7 @@ package htsjdk.samtools; import htsjdk.samtools.SAMFileHeader.SortOrder; +import htsjdk.samtools.SAMValidationError.Type; import htsjdk.samtools.util.DateParser; import htsjdk.samtools.util.LineReader; import htsjdk.samtools.util.RuntimeIOException; @@ -55,8 +56,6 @@ public class SAMTextHeaderCodec { private String mSource; private List sequences; private List readGroups; - // Accumulate header while reading it from input. - private final StringBuilder textHeader = new StringBuilder(); // For error reporting when parsing private ValidationStringency validationStringency = ValidationStringency.SILENT; @@ -124,23 +123,14 @@ public SAMFileHeader decode(final LineReader reader, final String source) { mFileHeader.setSequenceDictionary(new SAMSequenceDictionary(sequences)); mFileHeader.setReadGroups(readGroups); - // Only store the header text if there was a parsing error or the it's less than 1MB on disk / 2MB in mem - if (!mFileHeader.getValidationErrors().isEmpty() || textHeader.length() < (1024 * 1024)) { - mFileHeader.setTextHeader(textHeader.toString()); - } - SAMUtils.processValidationErrors(mFileHeader.getValidationErrors(), -1, validationStringency); return mFileHeader; } private String advanceLine() { final int nextChar = mReader.peek(); - if (nextChar != '@') { - return null; - } - mCurrentLine = mReader.readLine(); - textHeader.append(mCurrentLine).append('\n'); - return mCurrentLine; + this.mCurrentLine = (nextChar == '@') ? mReader.readLine() : null; + return this.mCurrentLine; } /** @@ -547,4 +537,4 @@ public void setValidationStringency(final ValidationStringency validationStringe } this.validationStringency = validationStringency; } -} \ No newline at end of file +} From 188bd23edac0a745665c2bb24096bc2cef5acd50 Mon Sep 17 00:00:00 2001 From: tfenne Date: Fri, 17 May 2019 06:59:24 -0600 Subject: [PATCH 2/2] Removed comment in BAMFileWriter. --- src/main/java/htsjdk/samtools/BAMFileWriter.java | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/main/java/htsjdk/samtools/BAMFileWriter.java b/src/main/java/htsjdk/samtools/BAMFileWriter.java index 061e86c09e..4bdf950145 100644 --- a/src/main/java/htsjdk/samtools/BAMFileWriter.java +++ b/src/main/java/htsjdk/samtools/BAMFileWriter.java @@ -187,16 +187,12 @@ protected static void writeHeader(final BinaryCodec outputBinaryCodec, final SAM } /** - * Writes a header to a BAM file. Might need to regenerate the String version of the header, if one already has both the - * samFileHeader and the String, use the version of this method which takes both. + * Writes a header to a BAM file. */ protected static void writeHeader(final BinaryCodec outputBinaryCodec, final SAMFileHeader samFileHeader) { - // Do not use SAMFileHeader.getTextHeader() as it is not updated when changes to the underlying object are made - final String headerString; final Writer stringWriter = new StringWriter(); new SAMTextHeaderCodec().encode(stringWriter, samFileHeader, true); - headerString = stringWriter.toString(); - + final String headerString = stringWriter.toString(); writeHeader(outputBinaryCodec, samFileHeader, headerString); }