diff --git a/mteb/encoder_interface.py b/mteb/encoder_interface.py index 5a66330cdc..abb88f3ac4 100644 --- a/mteb/encoder_interface.py +++ b/mteb/encoder_interface.py @@ -179,29 +179,24 @@ def encode( # current a 1-1 match with Encoder.encode ) -> np.ndarray: pass - def get_image_embeddings( # Seems like sentence transformers use a singular encode for both images and text. Not sure if we want to do the same. - # If not it might be ideal to redefine Encoder.encode + def get_image_embeddings( self, images: list[Image.Image] | DataLoader, **kwargs, - # removed batch_size, it is not required that it will accept kwargs - ) -> np.ndarray: # added standard output (I believe we actually expect tensors in the code, but would like to be consistent) + ) -> np.ndarray: pass - def get_text_embeddings( # any reason for this? + def get_text_embeddings( self, texts: list[str], **kwargs, ) -> np.ndarray: pass - def get_fused_embeddings( # hmm what if I have a document with images at specific positions? + def get_fused_embeddings( self, texts: list[str] | None = None, - images: list[Image.Image] - | DataLoader - | None = None, # the requirement for these two to be the same seems odd (docs without images, images without associated text, docs with multiple images) - # fusion_mode: str="sum", # will remove this as it should be required in the interface + images: list[Image.Image] | DataLoader | None = None, **kwargs: Any, ) -> np.ndarray: pass