Skip to content

Commit 11a32c8

Browse files
authored
Merge branch 'main' into cleanup-lora-test
2 parents dc26b73 + 577d498 commit 11a32c8

File tree

26 files changed

+1794
-113
lines changed

26 files changed

+1794
-113
lines changed

.buildkite/test-amd.yaml

Lines changed: 1265 additions & 0 deletions
Large diffs are not rendered by default.

.github/workflows/issue_autolabel.yml

Lines changed: 95 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ jobs:
1313
runs-on: ubuntu-latest
1414
steps:
1515
- name: Label issues based on keywords
16+
id: label-step
1617
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
1718
with:
1819
script: |
@@ -42,7 +43,6 @@ jobs:
4243
searchIn: "body"
4344
},
4445
],
45-
4646
// Substring search - matches anywhere in text (partial matches)
4747
substrings: [
4848
{
@@ -89,14 +89,12 @@ jobs:
8989
term: "hip_",
9090
searchIn: "both"
9191
},
92-
9392
// ROCm tools and libraries
9493
{
9594
term: "hipify",
9695
searchIn: "both"
9796
},
9897
],
99-
10098
// Regex patterns - for complex pattern matching
10199
regexPatterns: [
102100
{
@@ -107,13 +105,17 @@ jobs:
107105
}
108106
],
109107
},
108+
// Add more label configurations here as needed
109+
// example: {
110+
// keywords: [...],
111+
// substrings: [...],
112+
// regexPatterns: [...]
113+
// },
110114
};
111-
112115
// Helper function to create regex based on search type
113116
function createSearchRegex(term, type) {
114117
// Escape special regex characters in the term
115118
const escapedTerm = term.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
116-
117119
switch (type) {
118120
case 'keyword':
119121
// Word boundary search - matches whole words only
@@ -125,16 +127,13 @@ jobs:
125127
throw new Error(`Unknown search type: ${type}`);
126128
}
127129
}
128-
129130
// Helper function to find matching terms in text with line information
130131
function findMatchingTermsWithLines(text, searchTerms = [], searchType = 'keyword', searchLocation = '') {
131132
const matches = [];
132133
const lines = text.split('\n');
133-
134134
for (const termConfig of searchTerms) {
135135
let regex;
136136
let term, searchIn, pattern, description, flags;
137-
138137
// Handle different input formats (string or object)
139138
if (typeof termConfig === 'string') {
140139
term = termConfig;
@@ -146,21 +145,17 @@ jobs:
146145
description = termConfig.description;
147146
flags = termConfig.flags;
148147
}
149-
150148
// Skip if this term shouldn't be searched in the current location
151149
if (searchIn !== 'both' && searchIn !== searchLocation) {
152150
continue;
153151
}
154-
155152
// Create appropriate regex
156153
if (searchType === 'regex') {
157154
regex = new RegExp(pattern, flags || "gi");
158155
} else {
159156
regex = createSearchRegex(term, searchType);
160157
}
161-
162158
const termMatches = [];
163-
164159
// Check each line for matches
165160
lines.forEach((line, lineIndex) => {
166161
const lineMatches = line.match(regex);
@@ -175,15 +170,14 @@ jobs:
175170
originalTerm: term || pattern,
176171
description: description,
177172
// Show context around the match in the line
178-
context: line.length > 100 ?
179-
line.substring(Math.max(0, line.toLowerCase().indexOf(match.toLowerCase()) - 30),
180-
line.toLowerCase().indexOf(match.toLowerCase()) + match.length + 30) + '...'
173+
context: line.length > 100 ?
174+
line.substring(Math.max(0, line.toLowerCase().indexOf(match.toLowerCase()) - 30),
175+
line.toLowerCase().indexOf(match.toLowerCase()) + match.length + 30) + '...'
181176
: line.trim()
182177
});
183178
});
184179
}
185180
});
186-
187181
if (termMatches.length > 0) {
188182
matches.push({
189183
term: term || (description || pattern),
@@ -196,64 +190,48 @@ jobs:
196190
});
197191
}
198192
}
199-
200193
return matches;
201194
}
202-
203195
// Helper function to check if label should be added
204196
async function processLabel(labelName, config) {
205197
const body = context.payload.issue.body || "";
206198
const title = context.payload.issue.title || "";
207-
208199
core.notice(`Processing label: ${labelName}`);
209200
core.notice(`Issue Title: "${title}"`);
210201
core.notice(`Issue Body length: ${body.length} characters`);
211-
212202
let shouldAddLabel = false;
213203
let allMatches = [];
214204
let reason = '';
215-
216205
const keywords = config.keywords || [];
217206
const substrings = config.substrings || [];
218207
const regexPatterns = config.regexPatterns || [];
219-
220208
core.notice(`Searching with ${keywords.length} keywords, ${substrings.length} substrings, and ${regexPatterns.length} regex patterns`);
221-
222209
// Search in title
223210
if (title.trim()) {
224211
core.notice(`Searching in title: "${title}"`);
225-
226212
const titleKeywordMatches = findMatchingTermsWithLines(title, keywords, 'keyword', 'title');
227213
const titleSubstringMatches = findMatchingTermsWithLines(title, substrings, 'substring', 'title');
228214
const titleRegexMatches = findMatchingTermsWithLines(title, regexPatterns, 'regex', 'title');
229-
230215
allMatches.push(...titleKeywordMatches, ...titleSubstringMatches, ...titleRegexMatches);
231216
}
232-
233217
// Search in body
234218
if (body.trim()) {
235219
core.notice(`Searching in body (${body.length} characters)`);
236-
237220
const bodyKeywordMatches = findMatchingTermsWithLines(body, keywords, 'keyword', 'body');
238221
const bodySubstringMatches = findMatchingTermsWithLines(body, substrings, 'substring', 'body');
239222
const bodyRegexMatches = findMatchingTermsWithLines(body, regexPatterns, 'regex', 'body');
240-
241223
allMatches.push(...bodyKeywordMatches, ...bodySubstringMatches, ...bodyRegexMatches);
242224
}
243-
244225
if (allMatches.length > 0) {
245226
core.notice(`Found ${allMatches.length} matching term(s):`);
246-
247227
for (const termMatch of allMatches) {
248228
const locationText = termMatch.searchLocation === 'title' ? 'title' : 'body';
249229
const searchInText = termMatch.searchIn === 'both' ? 'both' : termMatch.searchIn;
250-
251230
if (termMatch.searchType === 'regex') {
252231
core.notice(` 📍 Regex: "${termMatch.term}" (pattern: ${termMatch.pattern}) found ${termMatch.count} time(s) in ${locationText} (configured to search in: ${searchInText}):`);
253232
} else {
254233
core.notice(` 📍 Term: "${termMatch.term}" (${termMatch.searchType} search) found ${termMatch.count} time(s) in ${locationText} (configured to search in: ${searchInText}):`);
255234
}
256-
257235
// Show details for each match
258236
termMatch.matches.forEach((match, index) => {
259237
core.notice(` ${index + 1}. Line ${match.lineNumber} in ${match.searchLocation}: "${match.match}" [${match.searchType}]`);
@@ -266,21 +244,17 @@ jobs:
266244
}
267245
});
268246
}
269-
270247
shouldAddLabel = true;
271248
const totalMatches = allMatches.reduce((sum, t) => sum + t.count, 0);
272249
const titleMatches = allMatches.filter(t => t.searchLocation === 'title').reduce((sum, t) => sum + t.count, 0);
273250
const bodyMatches = allMatches.filter(t => t.searchLocation === 'body').reduce((sum, t) => sum + t.count, 0);
274251
const keywordMatches = allMatches.filter(t => t.searchType === 'keyword').reduce((sum, t) => sum + t.count, 0);
275252
const substringMatches = allMatches.filter(t => t.searchType === 'substring').reduce((sum, t) => sum + t.count, 0);
276253
const regexMatches = allMatches.filter(t => t.searchType === 'regex').reduce((sum, t) => sum + t.count, 0);
277-
278254
reason = `Found ${totalMatches} total matches (${titleMatches} in title, ${bodyMatches} in body) - ${keywordMatches} keyword matches, ${substringMatches} substring matches, ${regexMatches} regex matches`;
279255
}
280-
281256
core.notice(`Final decision: ${shouldAddLabel ? 'ADD LABEL' : 'DO NOT ADD LABEL'}`);
282257
core.notice(`Reason: ${reason || 'No matching terms found'}`);
283-
284258
if (shouldAddLabel) {
285259
const existingLabels = context.payload.issue.labels.map(l => l.name);
286260
if (!existingLabels.includes(labelName)) {
@@ -296,14 +270,92 @@ jobs:
296270
core.notice(`Label "${labelName}" already present.`);
297271
return false;
298272
}
299-
300273
core.notice(`No matching terms found for label "${labelName}".`);
301274
return false;
302275
}
303-
304276
// Process all configured labels
305-
const processLabels = Object.entries(labelConfig)
306-
.map(([labelName, config]) => processLabel(labelName, config));
307-
const labelsAdded = await Promise.all(processLabels);
308-
const numLabelsAdded = labelsAdded.reduce((x, y) => x + y, 0);
309-
core.notice(`Processing complete. ${numLabelsAdded} label(s) added.`);
277+
const labelsAddedResults = await Promise.all(
278+
Object.entries(labelConfig).map(([labelName, config]) =>
279+
processLabel(labelName, config).then(added => ({ labelName, added }))
280+
)
281+
);
282+
283+
const numLabelsAdded = labelsAddedResults.filter(r => r.added).length;
284+
core.notice(`Processing complete. ${numLabelsAdded} label(s) added.`);
285+
286+
// Return which labels were added for the next step
287+
const addedLabels = labelsAddedResults.filter(r => r.added).map(r => r.labelName);
288+
core.setOutput('labels_added', JSON.stringify(addedLabels));
289+
return addedLabels;
290+
291+
- name: CC users for labeled issues
292+
if: steps.label-step.outputs.labels_added != '[]'
293+
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
294+
with:
295+
script: |
296+
// Configuration: Map labels to GitHub users to CC
297+
// You can add multiple users per label, and multiple label configurations
298+
const ccConfig = {
299+
rocm: {
300+
users: ['hongxiayang', 'tjtanaa', 'vllmellm'], // Add more users as needed: ['user1', 'user2', 'user3']
301+
message: 'CC {users} for ROCm-related issue' // {users} will be replaced with @mentions
302+
},
303+
// Add more label -> user mappings here
304+
// Example:
305+
// cuda: {
306+
// users: ['user1', 'user2'],
307+
// message: 'CC {users} for CUDA-related issue'
308+
// },
309+
// performance: {
310+
// users: ['perfexpert'],
311+
// message: 'CC {users} for performance issue'
312+
// },
313+
};
314+
315+
const labelsAdded = JSON.parse('${{ steps.label-step.outputs.labels_added }}');
316+
core.notice(`Labels added: ${labelsAdded.join(', ')}`);
317+
318+
// Get existing comments to check for already mentioned users
319+
const comments = await github.rest.issues.listComments({
320+
owner: context.repo.owner,
321+
repo: context.repo.repo,
322+
issue_number: context.issue.number,
323+
});
324+
325+
const issueBody = context.payload.issue.body || '';
326+
const allExistingText = issueBody + '\n' + comments.data.map(c => c.body).join('\n');
327+
328+
// Process each label that was added
329+
for (const label of labelsAdded) {
330+
if (ccConfig[label]) {
331+
const config = ccConfig[label];
332+
const usersToMention = [];
333+
334+
// Check which users haven't been mentioned yet
335+
for (const user of config.users) {
336+
const mentionPattern = new RegExp(`@${user}\\b`, 'i');
337+
if (!mentionPattern.test(allExistingText)) {
338+
usersToMention.push(user);
339+
} else {
340+
core.notice(`@${user} already mentioned for label "${label}", skipping`);
341+
}
342+
}
343+
344+
// Post comment if there are users to mention
345+
if (usersToMention.length > 0) {
346+
const mentions = usersToMention.map(u => `@${u}`).join(' ');
347+
const message = config.message.replace('{users}', mentions);
348+
349+
await github.rest.issues.createComment({
350+
owner: context.repo.owner,
351+
repo: context.repo.repo,
352+
issue_number: context.issue.number,
353+
body: message
354+
});
355+
356+
core.notice(`CC comment added for label "${label}": ${mentions}`);
357+
} else {
358+
core.notice(`All users for label "${label}" already mentioned, skipping comment`);
359+
}
360+
}
361+
}

docs/contributing/benchmarks.md

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ th {
3535
| Sonnet (deprecated) ||| Local file: `benchmarks/sonnet.txt` |
3636
| Random ||| `synthetic` |
3737
| RandomMultiModal (Image/Video) | 🟡 | 🚧 | `synthetic` |
38+
| RandomForReranking ||| `synthetic` |
3839
| Prefix Repetition ||| `synthetic` |
3940
| HuggingFace-VisionArena ||| `lmarena-ai/VisionArena-Chat` |
4041
| HuggingFace-MMVU ||| `yale-nlp/MMVU` |
@@ -878,6 +879,51 @@ vllm bench serve \
878879

879880
</details>
880881

882+
#### Reranker Benchmark
883+
884+
Benchmark the performance of rerank requests in vLLM.
885+
886+
<details class="admonition abstract" markdown="1">
887+
<summary>Show more</summary>
888+
889+
Unlike generative models which use Completions API or Chat Completions API,
890+
you should set `--backend vllm-rerank` and `--endpoint /v1/rerank` to use the Reranker API.
891+
892+
For reranking, the only supported dataset is `--dataset-name random-rerank`
893+
894+
Start the server:
895+
896+
```bash
897+
vllm serve BAAI/bge-reranker-v2-m3
898+
```
899+
900+
Run the benchmark:
901+
902+
```bash
903+
vllm bench serve \
904+
--model BAAI/bge-reranker-v2-m3 \
905+
--backend vllm-rerank \
906+
--endpoint /v1/rerank \
907+
--dataset-name random-rerank \
908+
--tokenizer BAAI/bge-reranker-v2-m3 \
909+
--random-input-len 512 \
910+
--num-prompts 10 \
911+
--random-batch-size 5
912+
```
913+
914+
For reranker models, this will create `num_prompts / random_batch_size` requests with
915+
`random_batch_size` "documents" where each one has close to `random_input_len` tokens.
916+
In the example above, this results in 2 rerank requests with 5 "documents" each where
917+
each document has close to 512 tokens.
918+
919+
Please note that the `/v1/rerank` is also supported by embedding models. So if you're running
920+
with an embedding model, also set `--no_reranker`. Because in this case the query is
921+
treated as a individual prompt by the server, here we send `random_batch_size - 1` documents
922+
to account for the extra prompt which is the query. The token accounting to report the
923+
throughput numbers correctly is also adjusted.
924+
925+
</details>
926+
881927
[](){ #performance-benchmarks }
882928

883929
## Performance Benchmarks

docs/getting_started/installation/cpu/arm.inc.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ ARM CPU backend currently supports Float32, FP16 and BFloat16 datatypes.
2323
# --8<-- [end:pre-built-wheels]
2424
# --8<-- [start:build-wheel-from-source]
2525

26-
--8<-- "docs/getting_started/installation/cpu/build.inc.md"
26+
--8<-- "docs/getting_started/installation/cpu/build.inc.md:extra-information"
2727

2828
Testing has been conducted on AWS Graviton3 instances for compatibility.
2929

docs/getting_started/installation/cpu/build.inc.md

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
# --8<-- [start:extra-information]
2+
13
First, install the recommended compiler. We recommend using `gcc/g++ >= 12.3.0` as the default compiler to avoid potential problems. For example, on Ubuntu 22.4, you can run:
24

35
```bash
@@ -39,7 +41,4 @@ If you want to develop vLLM, install it in editable mode instead.
3941
VLLM_TARGET_DEVICE=cpu python setup.py develop
4042
```
4143

42-
!!! note
43-
If you are building vLLM from source and not using the pre-built images, remember to set `LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:$LD_PRELOAD"` on x86 machines before running vLLM.
44-
4544
# --8<-- [end:extra-information]

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ markers = [
107107
"distributed: run this test only in distributed GPU tests",
108108
"skip_v1: do not run this test with v1",
109109
"optional: optional tests that are automatically skipped, include --optional to run them",
110+
"extra_server_args: extra arguments to pass to the server fixture",
110111
]
111112

112113
[tool.ty.src]

0 commit comments

Comments
 (0)