-
Notifications
You must be signed in to change notification settings - Fork 80
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add a HLL impl, move alphabet stuff to encodings
hll ffi expose cardinality, add_hash and add_sequence
- Loading branch information
Showing
25 changed files
with
5,186 additions
and
687 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
# -*- coding: UTF-8 -*- | ||
|
||
import sys | ||
from tempfile import NamedTemporaryFile | ||
|
||
from ._lowlevel import ffi, lib | ||
from .utils import RustObject, rustcall, decode_str | ||
from .exceptions import SourmashError | ||
from .minhash import to_bytes, MinHash | ||
|
||
|
||
class HLL(RustObject): | ||
__dealloc_func__ = lib.hll_free | ||
|
||
def __init__(self, error_rate, ksize): | ||
self._objptr = lib.hll_with_error_rate(error_rate, ksize) | ||
|
||
def __len__(self): | ||
return self.cardinality() | ||
|
||
def cardinality(self): | ||
return self._methodcall(lib.hll_cardinality) | ||
|
||
@property | ||
def ksize(self): | ||
return self._methodcall(lib.hll_ksize) | ||
|
||
def add_sequence(self, sequence, force=False): | ||
"Add a sequence into the sketch." | ||
self._methodcall(lib.hll_add_sequence, to_bytes(sequence), len(sequence), force) | ||
|
||
def add_kmer(self, kmer): | ||
"Add a kmer into the sketch." | ||
if len(kmer) != self.ksize: | ||
raise ValueError("kmer to add is not {} in length".format(self.ksize)) | ||
self.add_sequence(kmer) | ||
|
||
def add(self, h): | ||
if isinstance(h, str): | ||
return self.add_kmer(h) | ||
return self._methodcall(lib.hll_add_hash, h) | ||
|
||
def update(self, other): | ||
if isinstance(other, HLL): | ||
return self._methodcall(lib.hll_merge, other._objptr) | ||
elif isinstance(other, MinHash): | ||
return self._methodcall(lib.hll_update_mh, other._objptr) | ||
else: | ||
# FIXME: we could take sets here too (or anything that can be | ||
# converted to a list of ints...) | ||
raise TypeError("Must be a HyperLogLog or MinHash") | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.