huggingface · Wauplin · Aug 19, 2024 · Aug 19, 2024 · Aug 19, 2024 · Aug 19, 2024
diff --git a/src/huggingface_hub/inference/_common.py b/src/huggingface_hub/inference/_common.py
@@ -296,7 +296,7 @@ def _format_text_generation_stream_output(
     if not byte_payload.startswith(b"data:"):
         return None  # empty line
 
-    if byte_payload == b"data: [DONE]":
+    if byte_payload.strip() == b"data: [DONE]":
         raise StopIteration("[DONE] signal received.")
 
     # Decode payload
@@ -344,7 +344,7 @@ def _format_chat_completion_stream_output(
     if not byte_payload.startswith(b"data:"):
         return None  # empty line
 
-    if byte_payload == b"data: [DONE]":
+    if byte_payload.strip() == b"data: [DONE]":
         raise StopIteration("[DONE] signal received.")
 
     # Decode payload
@@ -355,7 +355,7 @@ def _format_chat_completion_stream_output(
 
 async def _async_yield_from(client: "ClientSession", response: "ClientResponse") -> AsyncIterable[bytes]:
     async for byte_payload in response.content:
-        yield byte_payload
+        yield byte_payload.strip()
     await client.close()
 
 

diff --git a/tests/cassettes/test_async_chat_completion_with_stream.yaml b/tests/cassettes/test_async_chat_completion_with_stream.yaml
@@ -3,80 +3,71 @@ interactions:
     body: null
     headers:
       user-agent:
-      - unknown/None; hf_hub/0.22.0.dev0; python/3.10.12; torch/2.2.0; tensorflow/2.15.0.post1;
+      - unknown/None; hf_hub/0.25.0.dev0; python/3.10.12; torch/2.3.1; tensorflow/2.17.0;
         fastcore/1.5.23
     method: POST
     uri: https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta/v1/chat/completions
   response:
     body:
-      string: 'data:{"id":"","object":"text_completion","created":1710439508,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"1.4.3-sha-e6bb3ff","choices":[{"index":0,"delta":{"role":"assistant","content":"Deep"},"logprobs":null,"finish_reason":null}]}
+      string: 'data:{"id":"","object":"text_completion","created":1724071633,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"2.0.4-sha-f426a33","choices":[{"index":0,"delta":{"role":"assistant","content":"Deep"},"logprobs":null,"finish_reason":null}]}
 
 
-        data:{"id":"","object":"text_completion","created":1710439508,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"1.4.3-sha-e6bb3ff","choices":[{"index":0,"delta":{"role":"assistant","content":"
-        Learning"},"logprobs":null,"finish_reason":null}]}
+        data:{"id":"","object":"text_completion","created":1724071633,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"2.0.4-sha-f426a33","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        learning"},"logprobs":null,"finish_reason":null}]}
 
 
-        data:{"id":"","object":"text_completion","created":1710439508,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"1.4.3-sha-e6bb3ff","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        data:{"id":"","object":"text_completion","created":1724071633,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"2.0.4-sha-f426a33","choices":[{"index":0,"delta":{"role":"assistant","content":"
         is"},"logprobs":null,"finish_reason":null}]}
 
 
-        data:{"id":"","object":"text_completion","created":1710439508,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"1.4.3-sha-e6bb3ff","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        data:{"id":"","object":"text_completion","created":1724071633,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"2.0.4-sha-f426a33","choices":[{"index":0,"delta":{"role":"assistant","content":"
         a"},"logprobs":null,"finish_reason":null}]}
 
 
-        data:{"id":"","object":"text_completion","created":1710439508,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"1.4.3-sha-e6bb3ff","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        data:{"id":"","object":"text_completion","created":1724071633,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"2.0.4-sha-f426a33","choices":[{"index":0,"delta":{"role":"assistant","content":"
         sub"},"logprobs":null,"finish_reason":null}]}
 
 
-        data:{"id":"","object":"text_completion","created":1710439508,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"1.4.3-sha-e6bb3ff","choices":[{"index":0,"delta":{"role":"assistant","content":"field"},"logprobs":null,"finish_reason":null}]}
+        data:{"id":"","object":"text_completion","created":1724071633,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"2.0.4-sha-f426a33","choices":[{"index":0,"delta":{"role":"assistant","content":"field"},"logprobs":null,"finish_reason":null}]}
 
 
-        data:{"id":"","object":"text_completion","created":1710439508,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"1.4.3-sha-e6bb3ff","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        data:{"id":"","object":"text_completion","created":1724071633,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"2.0.4-sha-f426a33","choices":[{"index":0,"delta":{"role":"assistant","content":"
         of"},"logprobs":null,"finish_reason":null}]}
 
 
-        data:{"id":"","object":"text_completion","created":1710439508,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"1.4.3-sha-e6bb3ff","choices":[{"index":0,"delta":{"role":"assistant","content":"
-        Machine"},"logprobs":null,"finish_reason":null}]}
+        data:{"id":"","object":"text_completion","created":1724071633,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"2.0.4-sha-f426a33","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        machine"},"logprobs":null,"finish_reason":null}]}
 
 
-        data:{"id":"","object":"text_completion","created":1710439508,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"1.4.3-sha-e6bb3ff","choices":[{"index":0,"delta":{"role":"assistant","content":"
-        Learning"},"logprobs":null,"finish_reason":null}]}
+        data:{"id":"","object":"text_completion","created":1724071633,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"2.0.4-sha-f426a33","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        learning"},"logprobs":null,"finish_reason":null}]}
 
 
-        data:{"id":"","object":"text_completion","created":1710439508,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"1.4.3-sha-e6bb3ff","choices":[{"index":0,"delta":{"role":"assistant","content":"
-        that"},"logprobs":null,"finish_reason":null}]}
+        data:{"id":"","object":"text_completion","created":1724071633,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"2.0.4-sha-f426a33","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        that"},"logprobs":null,"finish_reason":"length"}]}
+
 
-        data:{"id":"","object":"text_completion","created":1710439508,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"1.4.3-sha-e6bb3ff","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"length"}]}
         '
     headers:
       Access-Control-Allow-Credentials:
       - 'true'
-      Access-Control-Allow-Origin:
-      - '*'
-      Cache-Control:
-      - no-cache
       Connection:
       - keep-alive
+      Content-Length:
+      - '2526'
       Content-Type:
       - text/event-stream
       Date:
-      - Thu, 14 Mar 2024 18:05:08 GMT
-      Transfer-Encoding:
-      - chunked
+      - Mon, 19 Aug 2024 13:01:29 GMT
       Vary:
-      - origin, Origin, Access-Control-Request-Method, Access-Control-Request-Headers
-      x-accel-buffering:
-      - 'no'
-      x-compute-characters:
-      - '103'
+      - Origin, Access-Control-Request-Method, Access-Control-Request-Headers
       x-compute-type:
-      - 1-a10-g
+      - cache
       x-request-id:
-      - idvh81inTm9FBUT-za5t7
+      - w9oS4KSPCoEAOi6QV7-cX
       x-sha:
       - b70e0c9a2d9e14bd1e812d3c398e5f313e93b473
     status:
       code: 200
       message: OK
-    url: https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta/v1/chat/completions
 version: 1
diff --git a/tests/test_inference_async_client.py b/tests/test_inference_async_client.py
@@ -212,18 +212,16 @@ async def test_async_chat_completion_with_stream() -> None:
 
     all_items = [item async for item in output]
     generated_text = ""
-    for item in all_items[:-1]:  # all but last item
+    for item in all_items:
         assert isinstance(item, ChatCompletionStreamOutput)
         assert len(item.choices) == 1
         generated_text += item.choices[0].delta.content
     last_item = all_items[-1]
 
-    assert generated_text == "Deep Learning is a subfield of Machine Learning that"
+    assert generated_text == "Deep learning is a subfield of machine learning that"
 
-    # Last item has a finish reason but no role/content delta
+    # Last item has a finish reason
     assert last_item.choices[0].finish_reason == "length"
-    assert last_item.choices[0].delta.role is None
-    assert last_item.choices[0].delta.content is None
 
 
 @pytest.mark.vcr

diff --git a/tests/test_inference_client.py b/tests/test_inference_client.py
@@ -937,13 +937,14 @@ def test_chat_completion_base_url_works_with_v1(base_url: str):
     assert post_mock.call_args_list[0].kwargs["model"] == "http://0.0.0.0:8080/v1/chat/completions"
 
 
-def test_stream_text_generation_response():
+@pytest.mark.parametrize("stop_signal", [b"data: [DONE]", b"data: [DONE]\n", b"data: [DONE] "])
+def test_stream_text_generation_response(stop_signal: bytes):
     data = [
         b'data: {"index":1,"token":{"id":4560,"text":" trying","logprob":-2.078125,"special":false},"generated_text":null,"details":null}',
         b"",  # Empty line is skipped
         b"\n",  # Newline is skipped
         b'data: {"index":2,"token":{"id":311,"text":" to","logprob":-0.026245117,"special":false},"generated_text":" trying to","details":null}',
-        b"data: [DONE]",  # Stop signal
+        stop_signal,  # Stop signal
         # Won't parse after
         b'data: {"index":2,"token":{"id":311,"text":" to","logprob":-0.026245117,"special":false},"generated_text":" trying to","details":null}',
     ]
@@ -952,13 +953,14 @@ def test_stream_text_generation_response():
     assert output == [" trying", " to"]
 
 
-def test_stream_chat_completion_response():
+@pytest.mark.parametrize("stop_signal", [b"data: [DONE]", b"data: [DONE]\n", b"data: [DONE] "])
+def test_stream_chat_completion_response(stop_signal: bytes):
     data = [
         b'data: {"object":"chat.completion.chunk","id":"","created":1721737661,"model":"","system_fingerprint":"2.1.2-dev0-sha-5fca30e","choices":[{"index":0,"delta":{"role":"assistant","content":"Both"},"logprobs":null,"finish_reason":null}]}',
         b"",  # Empty line is skipped
         b"\n",  # Newline is skipped
         b'data: {"object":"chat.completion.chunk","id":"","created":1721737661,"model":"","system_fingerprint":"2.1.2-dev0-sha-5fca30e","choices":[{"index":0,"delta":{"role":"assistant","content":" Rust"},"logprobs":null,"finish_reason":null}]}',
-        b"data: [DONE]",  # Stop signal
+        stop_signal,  # Stop signal
         # Won't parse after
         b'data: {"index":2,"token":{"id":311,"text":" to","logprob":-0.026245117,"special":false},"generated_text":" trying to","details":null}',
     ]