Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 5 additions & 10 deletions mteb/encoder_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,29 +179,24 @@ def encode( # current a 1-1 match with Encoder.encode
) -> np.ndarray:
pass

def get_image_embeddings( # Seems like sentence transformers use a singular encode for both images and text. Not sure if we want to do the same.
# If not it might be ideal to redefine Encoder.encode
def get_image_embeddings(
self,
images: list[Image.Image] | DataLoader,
**kwargs,
# removed batch_size, it is not required that it will accept kwargs
) -> np.ndarray: # added standard output (I believe we actually expect tensors in the code, but would like to be consistent)
) -> np.ndarray:
pass

def get_text_embeddings( # any reason for this?
def get_text_embeddings(
self,
texts: list[str],
**kwargs,
) -> np.ndarray:
pass

def get_fused_embeddings( # hmm what if I have a document with images at specific positions?
def get_fused_embeddings(
self,
texts: list[str] | None = None,
images: list[Image.Image]
| DataLoader
| None = None, # the requirement for these two to be the same seems odd (docs without images, images without associated text, docs with multiple images)
# fusion_mode: str="sum", # will remove this as it should be required in the interface
images: list[Image.Image] | DataLoader | None = None,
**kwargs: Any,
) -> np.ndarray:
pass