FlagOpen · dream-tentacle · Sep 23, 2024
diff --git a/FlagEmbedding/flag_models.py b/FlagEmbedding/flag_models.py
@@ -237,7 +237,7 @@ def __init__(
     def encode_queries(self, queries: Union[List[str], str],
                        batch_size: int = 256,
                        max_length: int = 512,
-                       convert_to_numpy: bool = True) -> np.ndarray:
+                       convert_to_numpy: bool = True) -> Union[np.ndarray, torch.Tensor]: 
         '''
         This function will be used for retrieval task
         if there is a instruction for queries, we will add it to the query text
@@ -252,7 +252,7 @@ def encode_corpus(self,
                       corpus: Union[List[str], str],
                       batch_size: int = 256,
                       max_length: int = 512,
-                      convert_to_numpy: bool = True) -> np.ndarray:
+                      convert_to_numpy: bool = True) -> Union[np.ndarray, torch.Tensor]:
         '''
         This function will be used for retrieval task
         encode corpus for retrieval task
@@ -264,7 +264,7 @@ def encode(self,
                sentences: Union[List[str], str],
                batch_size: int = 256,
                max_length: int = 512,
-               convert_to_numpy: bool = True) -> np.ndarray:
+               convert_to_numpy: bool = True) -> Union[np.ndarray, torch.Tensor]:
         if self.num_gpus > 0:
             batch_size = batch_size * self.num_gpus
         self.model.eval()
@@ -341,7 +341,7 @@ def __init__(
     def encode_queries(self, queries: Union[List[str], str],
                        batch_size: int = 256,
                        max_length: int = 512,
-                       convert_to_numpy: bool = True) -> np.ndarray:
+                       convert_to_numpy: bool = True) -> Union[np.ndarray, torch.Tensor]:
         '''
         This function will be used for retrieval task
         if there is a instruction for queries, we will add it to the query text
@@ -350,7 +350,7 @@ def encode_queries(self, queries: Union[List[str], str],
             if isinstance(queries, str):
                 input_texts = self.query_instruction_for_retrieval + queries
             else:
-                input_texts = ['{}{}'.format(self.query_instruction_for_retrieval, q) for q in queries]
+                input_texts = [(self.query_instruction_for_retrieval + q) for q in queries]
         else:
             input_texts = queries
         return self.encode(input_texts, batch_size=batch_size, max_length=max_length, convert_to_numpy=convert_to_numpy)
@@ -359,7 +359,7 @@ def encode_corpus(self,
                       corpus: Union[List[str], str],
                       batch_size: int = 256,
                       max_length: int = 512,
-                      convert_to_numpy: bool = True) -> np.ndarray:
+                      convert_to_numpy: bool = True) -> Union[np.ndarray, torch.Tensor]:
         '''
         This function will be used for retrieval task
         encode corpus for retrieval task
@@ -371,7 +371,7 @@ def encode(self,
                sentences: Union[List[str], str],
                batch_size: int = 256,
                max_length: int = 512,
-               convert_to_numpy: bool = True) -> np.ndarray:
+               convert_to_numpy: bool = True) -> Union[np.ndarray, torch.Tensor]:
         if self.num_gpus > 0:
             batch_size = batch_size * self.num_gpus
         self.model.eval()

diff --git a/Tutorials/1_Embedding/1.2.1_BGE_Series.ipynb b/Tutorials/1_Embedding/1.2.1_BGE_Series.ipynb
@@ -266,7 +266,7 @@
     "```\n",
     "LLMEmbedder.encode_keys(keys, batch_size=256, max_length=512, task='qa')\n",
     "```\n",
-    "Similarly, *encode_keys()* also calls *_encode()* and automatically add instructions according to given task."
+    "Similarly, *encode_keys()* also calls *_encode()* and automatically add instructions according to the given task."
    ]
   },
   {