diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index ad5b98aba..0b759bc1a 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -14,18 +14,13 @@ Once you're done, someone will review your PR shortly (see the section "Who can Fixes # (issue) - ## Before submitting -- [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). -- [ ] Did you read the [contributor guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests), - Pull Request section? -- [ ] Was this discussed/approved via a Github issue or the [forum](https://discuss.huggingface.co/)? Please add a link - to it if that's the case. -- [ ] Did you make sure to update the documentation with your changes? Here are the - [documentation guidelines](https://github.com/huggingface/transformers/tree/main/docs), and - [here are tips on formatting docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation). -- [ ] Did you write any new necessary tests? +- [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). +- [ ] Did you read the [contributor guideline](https://github.com/huggingface/text-embeddings-inference/blob/main/CONTRIBUTING.md)? +- [ ] Was this discussed/approved via a GitHub issue or the [forum](https://discuss.huggingface.co/)? Please add a link to it if that's the case. +- [ ] Did you make sure to update the documentation with your changes? Here are the [documentation guidelines](https://github.com/huggingface/transformers/tree/main/docs). +- [ ] Did you write any new necessary tests? If applicable, did you include or update the `insta` snapshots? ## Who can review? @@ -34,7 +29,6 @@ members/contributors who may be interested in your PR. diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index b23f3150a..5db1b1d37 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,4 +1,3 @@ - # Contributor Covenant Code of Conduct ## Our Pledge diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 579011647..7c6234654 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. --> -# Contribute to text-embeddings-inference +# Contribute to Text Embeddings Inference (TEI) Everyone is welcome to contribute, and we value everybody's contribution. Code contributions are not the only way to help the community. Answering questions, helping @@ -31,7 +31,7 @@ However you choose to contribute, please be mindful and respect our ## Ways to contribute -There are several ways you can contribute to text-embeddings-inference. +There are several ways you can contribute to Text Embeddings Inference (TEI). * Fix outstanding issues with the existing code. * Submit issues related to bugs or desired new features. @@ -52,7 +52,7 @@ feedback. ### Did you find a bug? -The text-embeddings-inference library is robust and reliable thanks to users who report the problems they encounter. +The Text Embeddings Inference (TEI) solution is robust and reliable thanks to users who report the problems they encounter. Before you report an issue, we would really appreciate it if you could **make sure the bug was not already reported** (use the search bar on GitHub under Issues). Your issue should also be related to bugs in the @@ -68,7 +68,7 @@ we can quickly resolve it: ### Do you want a new feature? -If there is a new feature you'd like to see in text-embeddings-inference, please open an issue and describe: +If there is a new feature you'd like to see in Text Embeddings Inference (TEI), please open an issue and describe: 1. What is the *motivation* behind this feature? Is it related to a problem or frustration with the library? Is it a feature related to something you need for a project? Is it something you worked on and think it could benefit @@ -94,7 +94,7 @@ New models are constantly released and if you want to implement a new model, ple * Link to the implementation if it is open-sourced. * Link to the model weights if they are available. -If you are willing to contribute the model yourself, let us know so we can help you add it to text-embeddings-inference! +If you are willing to contribute the model yourself, let us know so we can help you add it to Text Embeddings Inference (TEI)! ## Do you want to add documentation? @@ -104,8 +104,8 @@ happy to make the changes or help you make a contribution if you're interested! ## I want to become a maintainer of the project. How do I get there? -TGI is a project led and managed by Hugging Face as it powers our internal services. However, we are happy to have -motivated individuals from other organizations join us as maintainers with the goal of making TGI the best inference -service. +Text Embeddings Inference (TEI) is a project led and managed by Hugging Face as it powers our internal services. However, we are happy to have +motivated individuals from other organizations join us as maintainers with the goal of making TEI the best inference +service for embedding models on production on production. If you are such an individual (or organization), please reach out to us and let's collaborate. diff --git a/backends/grpc-client/src/client.rs b/backends/grpc-client/src/client.rs index 2f4868f55..1f6036eed 100644 --- a/backends/grpc-client/src/client.rs +++ b/backends/grpc-client/src/client.rs @@ -6,7 +6,7 @@ use grpc_metadata::InjectTelemetryContext; use tonic::transport::{Channel, Uri}; use tracing::instrument; -/// Text Generation Inference gRPC client +/// Text Embeddings Inference gRPC client #[derive(Debug, Clone)] pub struct Client { stub: EmbeddingServiceClient, diff --git a/backends/python/server/text_embeddings_server/utils/flash_attn.py b/backends/python/server/text_embeddings_server/utils/flash_attn.py index 3fb6b06d3..35efe2ddf 100644 --- a/backends/python/server/text_embeddings_server/utils/flash_attn.py +++ b/backends/python/server/text_embeddings_server/utils/flash_attn.py @@ -1,9 +1,10 @@ import os -import torch -from text_embeddings_server.utils.device import use_ipex, is_hpu +import torch from loguru import logger +from text_embeddings_server.utils.device import is_hpu, use_ipex + if os.getenv("USE_FLASH_ATTENTION", "").lower() == "false": raise ImportError("`USE_FLASH_ATTENTION` is false.") @@ -30,7 +31,7 @@ except ImportError: raise ImportError( "Flash Attention V2 is not installed.\n" - "Use the official Docker image (ghcr.io/huggingface/text-generation-inference:latest) " + "Use the official Docker image (ghcr.io/huggingface/text-embeddings-inference:cuda-latest) " "or install flash attention v2 with `cd server && make install install-flash-attention-v2`" ) if not (is_sm8x or is_sm90): @@ -45,7 +46,7 @@ except ImportError: raise ImportError( "Flash Attention is not installed.\n" - "Use the official Docker image (ghcr.io/huggingface/text-generation-inference:latest) " + "Use the official Docker image (ghcr.io/huggingface/text-embeddings-inference:cuda-latest) " "or install flash attention with `cd server && make install install-flash-attention`" ) from e diff --git a/backends/src/lib.rs b/backends/src/lib.rs index 073f94f4a..85f26e6c0 100644 --- a/backends/src/lib.rs +++ b/backends/src/lib.rs @@ -179,7 +179,12 @@ impl Backend { } #[instrument(skip_all)] - pub fn create_warmup_batch(&self, shape: (u32, u32), max_token: u32, seq_bucket_size: u32) -> Batch { + pub fn create_warmup_batch( + &self, + shape: (u32, u32), + max_token: u32, + seq_bucket_size: u32, + ) -> Batch { let (batch_size, length) = shape; let min_length = length.saturating_sub(seq_bucket_size).saturating_add(1); let tmp_length = if min_length < length {