@@ -64,46 +64,133 @@ The API supports:
64
64
curl ' https://api-inference.huggingface.co/models/google/gemma-2-2b-it/v1/chat/completions' \
65
65
-H " Authorization: Bearer hf_***" \
66
66
-H ' Content-Type: application/json' \
67
- -d ' {
68
- "model": "google/gemma-2-2b-it",
69
- "messages": [{"role": "user", "content": "What is the capital of France?"}],
70
- "max_tokens": 500,
71
- "stream": false
67
+ --data ' {
68
+ "model": "google/gemma-2-2b-it",
69
+ "messages": [
70
+ {
71
+ "role": "user",
72
+ "content": "What is the capital of France?"
73
+ }
74
+ ],
75
+ "max_tokens": 500,
76
+ "stream": true
72
77
}'
73
-
74
78
```
75
79
</curl >
76
80
77
81
<python >
82
+ With huggingface_hub client:
78
83
``` py
79
84
from huggingface_hub import InferenceClient
80
85
81
86
client = InferenceClient(api_key = " hf_***" )
82
87
83
- for message in client.chat_completion(
84
- model = " google/gemma-2-2b-it" ,
85
- messages = [{" role" : " user" , " content" : " What is the capital of France?" }],
88
+ messages = [
89
+ {
90
+ " role" : " user" ,
91
+ " content" : " What is the capital of France?"
92
+ }
93
+ ]
94
+
95
+ stream = client.chat.completions.create(
96
+ model = " google/gemma-2-2b-it" ,
97
+ messages = messages,
86
98
max_tokens = 500 ,
87
- stream = True ,
88
- ):
89
- print (message.choices[0 ].delta.content, end = " " )
99
+ stream = True
100
+ )
101
+
102
+ for chunk in stream:
103
+ print (chunk.choices[0 ].delta.content, end = " " )
104
+ ```
105
+
106
+ With openai client:
107
+ ``` py
108
+ from openai import OpenAI
109
+
110
+ client = OpenAI(
111
+ base_url = " https://api-inference.huggingface.co/v1/" ,
112
+ api_key = " hf_***"
113
+ )
114
+
115
+ messages = [
116
+ {
117
+ " role" : " user" ,
118
+ " content" : " What is the capital of France?"
119
+ }
120
+ ]
121
+
122
+ stream = client.chat.completions.create(
123
+ model = " google/gemma-2-2b-it" ,
124
+ messages = messages,
125
+ max_tokens = 500 ,
126
+ stream = True
127
+ )
128
+
129
+ for chunk in stream:
130
+ print (chunk.choices[0 ].delta.content, end = " " )
90
131
```
91
132
92
133
To use the Python client, see ` huggingface_hub ` 's [ package reference] ( https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.chat_completion ) .
93
134
</python >
94
135
95
136
<js >
137
+ With huggingface_hub client:
96
138
``` js
97
- import { HfInference } from " @huggingface/inference" ;
139
+ import { HfInference } from " @huggingface/inference"
98
140
99
- const inference = new HfInference (" hf_***" );
141
+ const client = new HfInference (" hf_***" )
100
142
101
- for await (const chunk of inference .chatCompletionStream ({
143
+ let out = " " ;
144
+
145
+ const stream = client .chatCompletionStream ({
102
146
model: " google/gemma-2-2b-it" ,
103
- messages: [{ role: " user" , content: " What is the capital of France?" }],
147
+ messages: [
148
+ {
149
+ role: " user" ,
150
+ content: " What is the capital of France?"
151
+ }
152
+ ],
153
+ max_tokens: 500
154
+ });
155
+
156
+ for await (const chunk of stream ) {
157
+ if (chunk .choices && chunk .choices .length > 0 ) {
158
+ const newContent = chunk .choices [0 ].delta .content ;
159
+ out += newContent;
160
+ console .log (newContent);
161
+ }
162
+ }
163
+ ```
164
+
165
+ With openai client:
166
+ ``` js
167
+ import { OpenAI } from " openai"
168
+
169
+ const client = new OpenAI ({
170
+ baseURL: " https://api-inference.huggingface.co/v1/" ,
171
+ apiKey: " hf_***"
172
+ })
173
+
174
+ let out = " " ;
175
+
176
+ const stream = await client .chat .completions .create ({
177
+ model: " google/gemma-2-2b-it" ,
178
+ messages: [
179
+ {
180
+ role: " user" ,
181
+ content: " What is the capital of France?"
182
+ }
183
+ ],
104
184
max_tokens: 500 ,
105
- })) {
106
- process .stdout .write (chunk .choices [0 ]? .delta ? .content || " " );
185
+ stream: true ,
186
+ });
187
+
188
+ for await (const chunk of stream ) {
189
+ if (chunk .choices && chunk .choices .length > 0 ) {
190
+ const newContent = chunk .choices [0 ].delta .content ;
191
+ out += newContent;
192
+ console .log (newContent);
193
+ }
107
194
}
108
195
```
109
196
@@ -124,73 +211,188 @@ To use the JavaScript client, see `huggingface.js`'s [package reference](https:/
124
211
curl ' https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-11B-Vision-Instruct/v1/chat/completions' \
125
212
-H " Authorization: Bearer hf_***" \
126
213
-H ' Content-Type: application/json' \
127
- - d ' {
128
- "model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
129
- "messages": [
214
+ --data ' {
215
+ "model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
216
+ "messages": [
130
217
{
131
218
"role": "user",
132
219
"content": [
133
- {"type": "image_url", "image_url": {"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"}},
134
- {"type": "text", "text": "Describe this image in one sentence."}
220
+ {
221
+ "type": "text",
222
+ "text": "Describe this image in one sentence."
223
+ },
224
+ {
225
+ "type": "image_url",
226
+ "image_url": {
227
+ "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
228
+ }
229
+ }
135
230
]
136
231
}
137
232
],
138
- "max_tokens": 500,
139
- "stream": false
233
+ "max_tokens": 500,
234
+ "stream": true
140
235
}'
141
-
142
236
```
143
237
</curl >
144
238
145
239
<python >
240
+ With huggingface_hub client:
146
241
``` py
147
242
from huggingface_hub import InferenceClient
148
243
149
244
client = InferenceClient(api_key = " hf_***" )
150
245
151
- image_url = " https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
246
+ messages = [
247
+ {
248
+ " role" : " user" ,
249
+ " content" : [
250
+ {
251
+ " type" : " text" ,
252
+ " text" : " Describe this image in one sentence."
253
+ },
254
+ {
255
+ " type" : " image_url" ,
256
+ " image_url" : {
257
+ " url" : " https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
258
+ }
259
+ }
260
+ ]
261
+ }
262
+ ]
263
+
264
+ stream = client.chat.completions.create(
265
+ model = " meta-llama/Llama-3.2-11B-Vision-Instruct" ,
266
+ messages = messages,
267
+ max_tokens = 500 ,
268
+ stream = True
269
+ )
152
270
153
- for message in client .chat_completion (
154
- model= " meta-llama/Llama-3.2-11B-Vision-Instruct" ,
155
- messages= [
156
- {
157
- " role" : " user" ,
158
- " content" : [
159
- {" type" : " image_url" , " image_url" : {" url" : image_url}},
160
- {" type" : " text" , " text" : " Describe this image in one sentence." },
161
- ],
162
- }
163
- ],
271
+ for chunk in stream:
272
+ print (chunk.choices[0 ].delta.content, end = " " )
273
+ ```
274
+
275
+ With openai client:
276
+ ``` py
277
+ from openai import OpenAI
278
+
279
+ client = OpenAI(
280
+ base_url = " https://api-inference.huggingface.co/v1/" ,
281
+ api_key = " hf_***"
282
+ )
283
+
284
+ messages = [
285
+ {
286
+ " role" : " user" ,
287
+ " content" : [
288
+ {
289
+ " type" : " text" ,
290
+ " text" : " Describe this image in one sentence."
291
+ },
292
+ {
293
+ " type" : " image_url" ,
294
+ " image_url" : {
295
+ " url" : " https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
296
+ }
297
+ }
298
+ ]
299
+ }
300
+ ]
301
+
302
+ stream = client.chat.completions.create(
303
+ model = " meta-llama/Llama-3.2-11B-Vision-Instruct" ,
304
+ messages = messages,
164
305
max_tokens = 500 ,
165
- stream= True,
166
- ):
167
- print (message .choices [0 ].delta .content , end= " " )
306
+ stream = True
307
+ )
308
+
309
+ for chunk in stream:
310
+ print (chunk.choices[0 ].delta.content, end = " " )
168
311
```
169
312
170
313
To use the Python client, see ` huggingface_hub ` 's [ package reference] ( https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.chat_completion ) .
171
314
</python >
172
315
173
316
<js >
317
+ With huggingface_hub client:
174
318
``` js
175
- import { HfInference } from " @huggingface/inference" ;
319
+ import { HfInference } from " @huggingface/inference"
176
320
177
- const inference = new HfInference (" hf_***" );
178
- const imageUrl = " https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" ;
321
+ const client = new HfInference (" hf_***" )
179
322
180
- for await (const chunk of inference .chatCompletionStream ({
323
+ let out = " " ;
324
+
325
+ const stream = client .chatCompletionStream ({
181
326
model: " meta-llama/Llama-3.2-11B-Vision-Instruct" ,
182
327
messages: [
183
328
{
184
- " role" : " user" ,
185
- " content" : [
186
- {" type" : " image_url" , " image_url" : {" url" : imageUrl}},
187
- {" type" : " text" , " text" : " Describe this image in one sentence." },
188
- ],
329
+ role: " user" ,
330
+ content: [
331
+ {
332
+ type: " text" ,
333
+ text: " Describe this image in one sentence."
334
+ },
335
+ {
336
+ type: " image_url" ,
337
+ image_url: {
338
+ url: " https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
339
+ }
340
+ }
341
+ ]
342
+ }
343
+ ],
344
+ max_tokens: 500
345
+ });
346
+
347
+ for await (const chunk of stream ) {
348
+ if (chunk .choices && chunk .choices .length > 0 ) {
349
+ const newContent = chunk .choices [0 ].delta .content ;
350
+ out += newContent;
351
+ console .log (newContent);
352
+ }
353
+ }
354
+ ```
355
+
356
+ With openai client:
357
+ ``` js
358
+ import { OpenAI } from " openai"
359
+
360
+ const client = new OpenAI ({
361
+ baseURL: " https://api-inference.huggingface.co/v1/" ,
362
+ apiKey: " hf_***"
363
+ })
364
+
365
+ let out = " " ;
366
+
367
+ const stream = await client .chat .completions .create ({
368
+ model: " meta-llama/Llama-3.2-11B-Vision-Instruct" ,
369
+ messages: [
370
+ {
371
+ role: " user" ,
372
+ content: [
373
+ {
374
+ type: " text" ,
375
+ text: " Describe this image in one sentence."
376
+ },
377
+ {
378
+ type: " image_url" ,
379
+ image_url: {
380
+ url: " https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
381
+ }
382
+ }
383
+ ]
189
384
}
190
385
],
191
386
max_tokens: 500 ,
192
- })) {
193
- process .stdout .write (chunk .choices [0 ]? .delta ? .content || " " );
387
+ stream: true ,
388
+ });
389
+
390
+ for await (const chunk of stream ) {
391
+ if (chunk .choices && chunk .choices .length > 0 ) {
392
+ const newContent = chunk .choices [0 ].delta .content ;
393
+ out += newContent;
394
+ console .log (newContent);
395
+ }
194
396
}
195
397
```
196
398
0 commit comments