Skip to content

Commit c37e041

Browse files
committed
Release 1.14
2 parents 911cb8e + b136e5d commit c37e041

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+2017
-188
lines changed

.cirrus.yml

+3-1
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ test_template: &TEST
4848
test_script: |
4949
make test-shlib-exports
5050
make test
51-
51+
if test "x$DO_UNTRACKED_FILE_CHECK" = "xyes"; then make check-untracked ; fi
5252
5353
#--------------------------------------------------
5454
# Task: linux builds.
@@ -68,6 +68,7 @@ gcc_task:
6868
matrix:
6969
- environment:
7070
DO_MAINTAINER_CHECKS: yes
71+
DO_UNTRACKED_FILE_CHECK: yes
7172
USE_CONFIG: no
7273
- environment:
7374
USE_CONFIG: yes
@@ -96,6 +97,7 @@ ubuntu_task:
9697
matrix:
9798
- environment:
9899
USE_CONFIG: yes
100+
DO_UNTRACKED_FILE_CHECK: yes
99101
- environment:
100102
USE_CONFIG: yes
101103
CFLAGS: -g -Wall -O3 -fsanitize=address

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ shlib-exports-*.txt
4949
/test/hts_endian
5050
/test/longrefs/*.tmp.*
5151
/test/pileup
52+
/test/pileup_mod
5253
/test/plugins-dlhts
5354
/test/sam
5455
/test/tabix/*.tmp.*
@@ -61,6 +62,7 @@ shlib-exports-*.txt
6162
/test/test_introspection
6263
/test/test_kfunc
6364
/test/test_kstring
65+
/test/test_mod
6466
/test/test-parse-reg
6567
/test/test_realn
6668
/test/test-regidx

Makefile

+22-4
Original file line numberDiff line numberDiff line change
@@ -74,12 +74,14 @@ BUILT_TEST_PROGRAMS = \
7474
test/fieldarith \
7575
test/hfile \
7676
test/pileup \
77+
test/pileup_mod \
7778
test/plugins-dlhts \
7879
test/sam \
7980
test/test_bgzf \
8081
test/test_expr \
8182
test/test_kfunc \
8283
test/test_kstring \
84+
test/test_mod \
8385
test/test_realn \
8486
test/test-regidx \
8587
test/test_str2int \
@@ -129,8 +131,8 @@ LIBHTS_SOVERSION = 3
129131
# is not strictly necessary and should be removed the next time
130132
# LIBHTS_SOVERSION is bumped (see #1144 and
131133
# https://developer.apple.com/library/archive/documentation/DeveloperTools/Conceptual/DynamicLibraries/100-Articles/DynamicLibraryDesignGuidelines.html#//apple_ref/doc/uid/TP40002013-SW23)
132-
MACH_O_COMPATIBILITY_VERSION = 3.1.13
133-
MACH_O_CURRENT_VERSION = 3.1.13
134+
MACH_O_COMPATIBILITY_VERSION = 3.1.14
135+
MACH_O_CURRENT_VERSION = 3.1.14
134136

135137
# $(NUMERIC_VERSION) is for items that must have a numeric X.Y.Z string
136138
# even if this is a dirty or untagged Git working tree.
@@ -363,7 +365,7 @@ hfile_gcs.o hfile_gcs.pico: hfile_gcs.c config.h $(htslib_hts_h) $(htslib_kstrin
363365
hfile_libcurl.o hfile_libcurl.pico: hfile_libcurl.c config.h $(hfile_internal_h) $(htslib_hts_h) $(htslib_kstring_h) $(htslib_khash_h)
364366
hfile_s3_write.o hfile_s3_write.pico: hfile_s3_write.c config.h $(hfile_internal_h) $(htslib_hts_h) $(htslib_kstring_h) $(htslib_khash_h)
365367
hfile_s3.o hfile_s3.pico: hfile_s3.c config.h $(hfile_internal_h) $(htslib_hts_h) $(htslib_kstring_h)
366-
hts.o hts.pico: hts.c config.h $(htslib_hts_expr_h) $(htslib_hts_h) $(htslib_bgzf_h) $(cram_h) $(htslib_hfile_h) $(htslib_hts_endian_h) version.h config_vars.h $(hts_internal_h) $(hfile_internal_h) $(sam_internal_h) $(htslib_hts_os_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_ksort_h) $(htslib_tbx_h) $(htscodecs_htscodecs_h)
368+
hts.o hts.pico: hts.c config.h os/lzma_stub.h $(htslib_hts_h) $(htslib_bgzf_h) $(cram_h) $(htslib_hfile_h) $(htslib_hts_endian_h) version.h config_vars.h $(hts_internal_h) $(hfile_internal_h) $(sam_internal_h) $(htslib_hts_expr_h) $(htslib_hts_os_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_ksort_h) $(htslib_tbx_h) $(htscodecs_htscodecs_h)
367369
hts_expr.o hts_expr.pico: hts_expr.c config.h $(htslib_hts_expr_h) $(textutils_internal_h)
368370
hts_os.o hts_os.pico: hts_os.c config.h $(htslib_hts_defs_h) os/rand.c
369371
vcf.o vcf.pico: vcf.c config.h $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_tbx_h) $(htslib_hfile_h) $(hts_internal_h) $(htslib_khash_str2int_h) $(htslib_kstring_h) $(htslib_sam_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_hts_endian_h)
@@ -471,6 +473,13 @@ maintainer-check:
471473
test/maintainer/check_copyright.pl .
472474
test/maintainer/check_spaces.pl .
473475

476+
# Look for untracked files in the git repository.
477+
check-untracked:
478+
@if test -e .git && git status --porcelain | grep '^\?'; then \
479+
echo 'Untracked files detected (see above). Please either clean up, add to .gitignore, or for test output files consider naming them to match *.tmp or *.tmp.*' ; \
480+
false ; \
481+
fi
482+
474483
# Create a shorthand. We use $(SRC) or $(srcprefix) rather than $(srcdir)/
475484
# for brevity in test and install rules, and so that build logs do not have
476485
# ./ sprinkled throughout.
@@ -497,6 +506,7 @@ check test: $(BUILT_PROGRAMS) $(BUILT_TEST_PROGRAMS) $(BUILT_PLUGINS) $(HTSCODEC
497506
cd test/tabix && ./test-tabix.sh tabix.tst
498507
cd test/mpileup && ./test-pileup.sh mpileup.tst
499508
cd test/fastq && ./test-fastq.sh
509+
cd test/base_mods && ./base-mods.sh base-mods.tst
500510
REF_PATH=: test/sam test/ce.fa test/faidx.fa test/fastqs.fq
501511
test/test-regidx
502512
cd test && REF_PATH=: ./test.pl $${TEST_OPTS:-}
@@ -516,6 +526,9 @@ test/hfile: test/hfile.o libhts.a
516526
test/pileup: test/pileup.o libhts.a
517527
$(CC) $(LDFLAGS) -o $@ test/pileup.o libhts.a $(LIBS) -lpthread
518528

529+
test/pileup_mod: test/pileup_mod.o libhts.a
530+
$(CC) $(LDFLAGS) -o $@ test/pileup_mod.o libhts.a $(LIBS) -lpthread
531+
519532
test/plugins-dlhts: test/plugins-dlhts.o
520533
$(CC) $(LDFLAGS) -o $@ test/plugins-dlhts.o $(LIBS)
521534

@@ -534,6 +547,9 @@ test/test_kfunc: test/test_kfunc.o libhts.a
534547
test/test_kstring: test/test_kstring.o libhts.a
535548
$(CC) $(LDFLAGS) -o $@ test/test_kstring.o libhts.a -lz $(LIBS) -lpthread
536549

550+
test/test_mod: test/test_mod.o libhts.a
551+
$(CC) $(LDFLAGS) -o $@ test/test_mod.o libhts.a $(LIBS) -lpthread
552+
537553
test/test_realn: test/test_realn.o libhts.a
538554
$(CC) $(LDFLAGS) -o $@ test/test_realn.o libhts.a $(LIBS) -lpthread
539555

@@ -622,12 +638,14 @@ test/fuzz/hts_open_fuzzer.o: test/fuzz/hts_open_fuzzer.c config.h $(htslib_hfile
622638
test/fieldarith.o: test/fieldarith.c config.h $(htslib_sam_h)
623639
test/hfile.o: test/hfile.c config.h $(htslib_hfile_h) $(htslib_hts_defs_h) $(htslib_kstring_h)
624640
test/pileup.o: test/pileup.c config.h $(htslib_sam_h) $(htslib_kstring_h)
641+
test/pileup_mod.o: test/pileup_mod.c config.h $(htslib_sam_h)
625642
test/plugins-dlhts.o: test/plugins-dlhts.c config.h
626643
test/sam.o: test/sam.c config.h $(htslib_hts_defs_h) $(htslib_sam_h) $(htslib_faidx_h) $(htslib_khash_h) $(htslib_hts_log_h)
627644
test/test_bgzf.o: test/test_bgzf.c config.h $(htslib_bgzf_h) $(htslib_hfile_h) $(hfile_internal_h)
628645
test/test_expr.o: test/test_expr.c config.h $(htslib_hts_expr_h)
629646
test/test_kfunc.o: test/test_kfunc.c config.h $(htslib_kfunc_h)
630647
test/test_kstring.o: test/test_kstring.c config.h $(htslib_kstring_h)
648+
test/test_mod.o: test/test_mod.c config.h $(htslib_sam_h)
631649
test/test-parse-reg.o: test/test-parse-reg.c config.h $(htslib_hts_h) $(htslib_sam_h)
632650
test/test_realn.o: test/test_realn.c config.h $(htslib_hts_h) $(htslib_sam_h) $(htslib_faidx_h)
633651
test/test-regidx.o: test/test-regidx.c config.h $(htslib_kstring_h) $(htslib_regidx_h) $(htslib_hts_defs_h) $(textutils_internal_h)
@@ -790,7 +808,7 @@ distdir:
790808
force:
791809

792810

793-
.PHONY: all check clean distclean distdir force
811+
.PHONY: all check check-untracked clean distclean distdir force
794812
.PHONY: install install-pkgconfig installdirs lib-shared lib-static
795813
.PHONY: maintainer-check maintainer-clean mostlyclean plugins
796814
.PHONY: print-config print-version show-version tags

NEWS

+94-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,96 @@
1+
Noteworthy changes in release 1.14 (22nd October 2021)
2+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3+
4+
Features and Updates
5+
--------------------
6+
7+
* Added a keep option to bgzip to leave the original file untouched. This
8+
brings bgzip into line with gzip. (PR #1331, thanks to Alex Petty)
9+
10+
* "endpos" has been added to the filter language, giving the position
11+
of the rightmost mapped base as measured by the CIGAR string. For
12+
unmapped reads it is the same as "pos". (PR #1307, thanks to John Marshall)
13+
14+
* Interfaces have been added to interpret the new base modification tags
15+
added to the SAMtags document in samtools/hts-specs#418. (PR #1132)
16+
17+
* New API functions hts_flush()/sam_flush()/bcf_flush() for flushing output
18+
htsFile/samFile/vcfFile streams. (PR #1326, thanks to John Marshall)
19+
20+
* The synced_bcf_reader now sorts lines with symbolic alleles by END tag as
21+
well as POS. (PR #1321)
22+
23+
* Added synced_bcf_reader options BCF_SR_REGIONS_OVERLAP and
24+
BCF_SR_TARGETS_OVERLAP for better control of records that start outside
25+
the desired region but overlap it are handled. Fixes samtools/bcftools#1420
26+
and samtools/bcftools#1421 raised by John Marshall. (PR #1327)
27+
28+
* HTSlib will now accept long-cigar CG:B: tags made by htsjdk which don't
29+
quite follow the specification properly (using signed values instead of
30+
unsigned). Thanks to Colin Diesh for reporting an example file. (PR #1317)
31+
32+
* The warning printed when the BGZF reader finds a file with no EOF block
33+
has been changed to be less alarming. Unfortunately some third-party
34+
BGZF encoders don't write EOF blocks at the end of files. Thanks to
35+
Keiran Raine for reporting an example file. (PR #1323)
36+
37+
* The FASTA and FASTQ readers get an option to skip over the first item on
38+
the header line, and use the second as the read name. It allows the original
39+
name to be restored on some of the fastq files served from the European
40+
Nucleotide Archive (ENA). (PR #1325)
41+
42+
* HTSlib is now more strict when parsing the VCF samples line (beginning
43+
#CHROM). It will only accept tabs between the mandatory field names and
44+
sample names must be separated with tabs. (PR #1328)
45+
46+
* HTSlib will now warn if it looks like the header has been corrupted
47+
by diagnostic messages from the program that made it. This can happen when
48+
using `nohup`, which by default mixes stdout and stderr into the same
49+
stream. (PR#1339, thanks to John Marshall)
50+
51+
* File format detection will now recognise signatures for XZ, Zstd and D4
52+
files (note that HTSlib will not read them yet). (PR #1340, thanks to
53+
John Marshall)
54+
55+
Build changes
56+
-------------
57+
58+
These are compiler, configuration and makefile based changes.
59+
60+
* Some redundant tests have been removed from the test harness, speeding it up.
61+
(PR #1308)
62+
63+
* The version.sh script now works better on shallow checkouts. (PR #1324)
64+
65+
* A check-untracked Makefile target has been added to catch untracked files
66+
(mostly) left by the test harness. (PR #1324)
67+
68+
Bug fixes
69+
---------
70+
71+
* Fixed a case where flushing the thread pool could very occasionally cause
72+
a deadlock. (PR #1309)
73+
74+
* Fixed a bug where some CRAM files could fail to decode if the required_fields
75+
option was in use. Thanks to Matt Sexton for reporting the issue.
76+
(PR #1314, fixes samtools/samtools#1475)
77+
78+
* Fixed a regression where the S3 plugin could not read public files unless
79+
you supplied some Amazon credentials. Thanks to Chris Saunders for reporting.
80+
(PR #1332, fixes samtools/samtools#1491)
81+
82+
* Fixed a possible CRAM thread deadlock discovered by @ryancaicse.
83+
(PR #1330, fixes #1329)
84+
85+
* Some set-but-unused variables have been removed. (PR #1334)
86+
87+
* Fixed a bug which prevented "flag.read2" from working in the filter
88+
language unless it was at the end of the expression. Thanks to Vamsi Kodali
89+
for reporting the issue. (PR #1342)
90+
91+
* Fixed a memory leak that could happen if CRAM fails to inflate a LZMA
92+
block. (PR #1340, thanks to John Marshall)
93+
194
Noteworthy changes in release 1.13 (7th July 2021)
295
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
396

@@ -519,7 +612,7 @@ Bug fixes
519612

520613
* Fixed potential integer overflows in the VCF parser and ensured that
521614
the total length of FORMAT fields cannot go over 2Gbytes. [fuzz] (#1044,
522-
#1104)
615+
#1104; latter is CVE-2020-36403 affecting HTSlib versions 1.10 to 1.10.2)
523616

524617
* Download index files atomically in idx_test_and_fetch(). This prevents
525618
corruption when running parallel jobs on S3 files. Thanks to John Marshall.

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ HTSlib implements a generalized BAM index, with file extension `.csi`
1212
(coordinate-sorted index). The HTSlib file reader first looks for the new index
1313
and then for the old if the new index is absent.
1414

15-
This project also includes the popular tabix indexer, which indexes both `.tbi`
15+
This project also includes the popular tabix indexer, which creates both `.tbi`
1616
and `.csi` formats, and the bgzip compression utility.
1717

1818
[1]: http://samtools.github.io/hts-specs/

bcf_sr_sort.c

+24-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
Copyright (C) 2017-2019 Genome Research Ltd.
2+
Copyright (C) 2017-2021 Genome Research Ltd.
33
44
Author: Petr Danecek <[email protected]>
55
@@ -259,6 +259,7 @@ static int cmpstringp(const void *p1, const void *p2)
259259
return strcmp(* (char * const *) p1, * (char * const *) p2);
260260
}
261261

262+
#define DEBUG_VSETS 0
262263
#if DEBUG_VSETS
263264
void debug_vsets(sr_sort_t *srt)
264265
{
@@ -280,6 +281,7 @@ void debug_vsets(sr_sort_t *srt)
280281
}
281282
#endif
282283

284+
#define DEBUG_VBUF 0
283285
#if DEBUG_VBUF
284286
void debug_vbuf(sr_sort_t *srt)
285287
{
@@ -380,13 +382,33 @@ static int bcf_sr_sort_set(bcf_srs_t *readers, sr_sort_t *srt, const char *chr,
380382

381383
if ( srt->str.l ) kputc(';',&srt->str);
382384
srt->off[srt->noff++] = srt->str.l;
383-
size_t beg = srt->str.l;
385+
size_t beg = srt->str.l;
386+
int end_pos = -1;
384387
for (ivar=1; ivar<line->n_allele; ivar++)
385388
{
386389
if ( ivar>1 ) kputc(',',&srt->str);
387390
kputs(line->d.allele[0],&srt->str);
388391
kputc('>',&srt->str);
389392
kputs(line->d.allele[ivar],&srt->str);
393+
394+
// If symbolic allele, check also the END tag in case there are multiple events,
395+
// such as <DEL>s, starting at the same positions
396+
if ( line->d.allele[ivar][0]=='<' )
397+
{
398+
if ( end_pos==-1 )
399+
{
400+
bcf_info_t *end_info = bcf_get_info(reader->header,line,"END");
401+
if ( end_info )
402+
end_pos = (int)end_info->v1.i; // this is only to create a unique id, we don't mind a potential int64 overflow
403+
else
404+
end_pos = 0;
405+
}
406+
if ( end_pos )
407+
{
408+
kputc('/',&srt->str);
409+
kputw(end_pos, &srt->str);
410+
}
411+
}
390412
}
391413
if ( line->n_allele==1 )
392414
{

bgzf.c

+3-5
Original file line numberDiff line numberDiff line change
@@ -1022,7 +1022,7 @@ int bgzf_read_block(BGZF *fp)
10221022
if (j->hit_eof) {
10231023
if (!fp->last_block_eof && !fp->no_eof_block) {
10241024
fp->no_eof_block = 1;
1025-
hts_log_warning("EOF marker is absent. The input is probably truncated");
1025+
hts_log_warning("EOF marker is absent. The input may be truncated");
10261026
}
10271027
fp->mt->hit_eof = 1;
10281028
}
@@ -1124,7 +1124,7 @@ int bgzf_read_block(BGZF *fp)
11241124
if (count == 0) { // no data read
11251125
if (!fp->last_block_eof && !fp->no_eof_block && !fp->is_gzip) {
11261126
fp->no_eof_block = 1;
1127-
hts_log_warning("EOF marker is absent. The input is probably truncated");
1127+
hts_log_warning("EOF marker is absent. The input may be truncated");
11281128
}
11291129
fp->block_length = 0;
11301130
return 0;
@@ -1467,7 +1467,7 @@ static void *bgzf_mt_writer(void *vp) {
14671467
int bgzf_mt_read_block(BGZF *fp, bgzf_job *j)
14681468
{
14691469
uint8_t header[BLOCK_HEADER_LENGTH], *compressed_block;
1470-
int count, size = 0, block_length, remaining;
1470+
int count, block_length, remaining;
14711471

14721472
// NOTE: Guaranteed to be compressed as we block multi-threading in
14731473
// uncompressed mode. However it may be gzip compression instead
@@ -1496,7 +1496,6 @@ int bgzf_mt_read_block(BGZF *fp, bgzf_job *j)
14961496
if (count != sizeof(header)) // no data read
14971497
return -1;
14981498

1499-
size = count;
15001499
block_length = unpackInt16((uint8_t*)&header[16]) + 1; // +1 because when writing this number, we used "-1"
15011500
if (block_length < BLOCK_HEADER_LENGTH) {
15021501
j->errcode |= BGZF_ERR_HEADER;
@@ -1510,7 +1509,6 @@ int bgzf_mt_read_block(BGZF *fp, bgzf_job *j)
15101509
j->errcode |= BGZF_ERR_IO;
15111510
return -1;
15121511
}
1513-
size += count;
15141512
j->comp_len = block_length;
15151513
j->uncomp_len = BGZF_MAX_BLOCK_SIZE;
15161514
j->block_address = block_address;

0 commit comments

Comments
 (0)