-
-
Notifications
You must be signed in to change notification settings - Fork 177
Find similar images
JLuc edited this page Feb 21, 2020
·
1 revision
Best of https://github.com/jenssegers/imagehash/issues/45
@rw4lll : I have 3 millions hashes in DB and need to compare my current hash with each other in database so as to find all similar images. Is there any solution except direct comparing ? use something distance function in database ?
@Alex-Gramm : select * from images where BIT_COUNT(X'3c3e0e1a3a1e1e1e' ^ images.hash) < 5
@rw4lll : We are using postgres so it doesn't have bit_count method. I did custom function for that:
CREATE OR REPLACE FUNCTION hash_distance(hash1 varchar, hash2 varchar) RETURNS integer AS $$
DECLARE
bit_hash1 bit varying := hash1;
bit_hash2 bit varying := hash2;
length_hash1 int:= length(hash1);
length_hash2 int:= length(hash2);
max_length int:= GREATEST(length_hash1, length_hash2);
BEGIN
IF length_hash1 < max_length THEN
bit_hash1:=LPAD(bit_hash1::text, max_length, '0')::bit varying;
END IF;
IF length_hash2 < max_length THEN
bit_hash2:=LPAD(bit_hash2::text, max_length, '0')::bit varying;
END IF;
RETURN length(replace((bit_hash1 # bit_hash2)::text, '0', ''));
END;
$$ LANGUAGE plpgsql;
select hash_distance('100000', '11010011');