Skip to content

Commit

Permalink
Allow adding lowercase sequences (#480)
Browse files Browse the repository at this point in the history
  • Loading branch information
ctb authored and luizirber committed May 27, 2018
1 parent 26cafa1 commit 06ffeac
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 1 deletion.
4 changes: 3 additions & 1 deletion sourmash/kmer_min_hash.hh
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,9 @@ public:
if (strlen(sequence) < ksize) {
return;
}
const std::string seq = sequence;
std::string seq = sequence;
transform(seq.begin(), seq.end(), seq.begin(), ::toupper);

if (!is_protein) {
for (unsigned int i = 0; i < seq.length() - ksize + 1; i++) {
const std::string kmer = seq.substr(i, ksize);
Expand Down
13 changes: 13 additions & 0 deletions tests/test__minhash.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,19 @@ def test_basic_dna_bad_force_2(track_abundance):
assert len(mh.get_mins()) == 2 # (only 2 hashes should be there)


def test_consume_lowercase(track_abundance):
a = MinHash(20, 10, track_abundance=track_abundance)
b = MinHash(20, 10, track_abundance=track_abundance)

a.add_sequence('TGCCGCCCAGCACCGGGTGACTAGGTTGAGCCATGATTAACCTGCAATGA'.lower())
b.add_sequence('TGCCGCCCAGCACCGGGTGACTAGGTTGAGCCATGATTAACCTGCAATGA')

assert a.compare(b) == 1.0
assert b.compare(b) == 1.0
assert b.compare(a) == 1.0
assert a.compare(a) == 1.0


def test_compare_1(track_abundance):
a = MinHash(20, 10, track_abundance=track_abundance)
b = MinHash(20, 10, track_abundance=track_abundance)
Expand Down

0 comments on commit 06ffeac

Please sign in to comment.