Skip to content

Commit 926a90a

Browse files
committed
chore: test gpt-oss-120b, nemotron and qwen3
1 parent 65b6c8c commit 926a90a

File tree

2 files changed

+41
-20
lines changed

2 files changed

+41
-20
lines changed

config/config.yaml

Lines changed: 37 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -45,15 +45,32 @@ prompt_guard:
4545
# Supported formats: 127.0.0.1, 192.168.1.1, ::1, 2001:db8::1
4646
# NOT supported: domain names (example.com), protocol prefixes (http://), paths (/api), ports in address (use 'port' field)
4747
vllm_endpoints:
48-
- name: "endpoint1"
49-
address: "172.28.0.20" # Static IPv4 of llm-katan within docker compose network
48+
- name: "gpt-oss-120b-endpoint"
49+
address: "172.28.0.1" # Docker gateway IP to access host machine
50+
port: 8001
51+
weight: 1
52+
- name: "qwen3-32b-endpoint"
53+
address: "172.28.0.1" # Docker gateway IP to access host machine
5054
port: 8002
5155
weight: 1
56+
- name: "nemotron-super-endpoint"
57+
address: "172.28.0.1" # Docker gateway IP to access host machine
58+
port: 8003
59+
weight: 1
5260

5361
model_config:
54-
"qwen3":
55-
reasoning_family: "qwen3" # This model uses Qwen-3 reasoning syntax
56-
preferred_endpoints: ["endpoint1"]
62+
"gpt-oss-120b":
63+
reasoning_family: "gpt-oss" # GPT-OSS reasoning syntax
64+
preferred_endpoints: ["gpt-oss-120b-endpoint"]
65+
pii_policy:
66+
allow_by_default: true
67+
"qwen3-32b":
68+
reasoning_family: "qwen3" # Qwen-3 reasoning syntax
69+
preferred_endpoints: ["qwen3-32b-endpoint"]
70+
pii_policy:
71+
allow_by_default: true
72+
"nemotron-super-1_5":
73+
preferred_endpoints: ["nemotron-super-endpoint"]
5774
pii_policy:
5875
allow_by_default: true
5976

@@ -79,91 +96,91 @@ categories:
7996
# jailbreak_enabled: true # Optional: Override global jailbreak detection per category
8097
# jailbreak_threshold: 0.8 # Optional: Override global jailbreak threshold per category
8198
model_scores:
82-
- model: qwen3
99+
- model: qwen3-32b
83100
score: 0.7
84101
use_reasoning: false # Business performs better without reasoning
85102
- name: law
86103
system_prompt: "You are a knowledgeable legal expert with comprehensive understanding of legal principles, case law, statutory interpretation, and legal procedures across multiple jurisdictions. Provide accurate legal information and analysis while clearly stating that your responses are for informational purposes only and do not constitute legal advice. Always recommend consulting with qualified legal professionals for specific legal matters."
87104
model_scores:
88-
- model: qwen3
105+
- model: nemotron-super-1_5
89106
score: 0.4
90107
use_reasoning: false
91108
- name: psychology
92109
system_prompt: "You are a psychology expert with deep knowledge of cognitive processes, behavioral patterns, mental health, developmental psychology, social psychology, and therapeutic approaches. Provide evidence-based insights grounded in psychological research and theory. When discussing mental health topics, emphasize the importance of professional consultation and avoid providing diagnostic or therapeutic advice."
93110
semantic_cache_enabled: true
94111
semantic_cache_similarity_threshold: 0.92 # High threshold for psychology - sensitive to nuances
95112
model_scores:
96-
- model: qwen3
113+
- model: gpt-oss-120b
97114
score: 0.6
98115
use_reasoning: false
99116
- name: biology
100117
system_prompt: "You are a biology expert with comprehensive knowledge spanning molecular biology, genetics, cell biology, ecology, evolution, anatomy, physiology, and biotechnology. Explain biological concepts with scientific accuracy, use appropriate terminology, and provide examples from current research. Connect biological principles to real-world applications and emphasize the interconnectedness of biological systems."
101118
model_scores:
102-
- model: qwen3
119+
- model: qwen3-32b
103120
score: 0.9
104121
use_reasoning: false
105122
- name: chemistry
106123
system_prompt: "You are a chemistry expert specializing in chemical reactions, molecular structures, and laboratory techniques. Provide detailed, step-by-step explanations."
107124
model_scores:
108-
- model: qwen3
125+
- model: gpt-oss-120b
109126
score: 0.6
110127
use_reasoning: true # Enable reasoning for complex chemistry
111128
- name: history
112129
system_prompt: "You are a historian with expertise across different time periods and cultures. Provide accurate historical context and analysis."
113130
model_scores:
114-
- model: qwen3
131+
- model: nemotron-super-1_5
115132
score: 0.7
116133
use_reasoning: false
117134
- name: other
118135
system_prompt: "You are a helpful and knowledgeable assistant. Provide accurate, helpful responses across a wide range of topics."
119136
semantic_cache_enabled: true
120137
semantic_cache_similarity_threshold: 0.75 # Lower threshold for general chat - less sensitive
121138
model_scores:
122-
- model: qwen3
139+
- model: qwen3-32b
123140
score: 0.7
124141
use_reasoning: false
125142
- name: health
126143
system_prompt: "You are a health and medical information expert with knowledge of anatomy, physiology, diseases, treatments, preventive care, nutrition, and wellness. Provide accurate, evidence-based health information while emphasizing that your responses are for educational purposes only and should never replace professional medical advice, diagnosis, or treatment. Always encourage users to consult healthcare professionals for medical concerns and emergencies."
127144
semantic_cache_enabled: true
128145
semantic_cache_similarity_threshold: 0.95 # High threshold for health - very sensitive to word changes
129146
model_scores:
130-
- model: qwen3
147+
- model: gpt-oss-120b
131148
score: 0.5
132149
use_reasoning: false
133150
- name: economics
134151
system_prompt: "You are an economics expert with deep understanding of microeconomics, macroeconomics, econometrics, financial markets, monetary policy, fiscal policy, international trade, and economic theory. Analyze economic phenomena using established economic principles, provide data-driven insights, and explain complex economic concepts in accessible terms. Consider both theoretical frameworks and real-world applications in your responses."
135152
model_scores:
136-
- model: qwen3
153+
- model: nemotron-super-1_5
137154
score: 1.0
138155
use_reasoning: false
139156
- name: math
140157
system_prompt: "You are a mathematics expert. Provide step-by-step solutions, show your work clearly, and explain mathematical concepts in an understandable way."
141158
model_scores:
142-
- model: qwen3
159+
- model: gpt-oss-120b
143160
score: 1.0
144161
use_reasoning: true # Enable reasoning for complex math
145162
- name: physics
146163
system_prompt: "You are a physics expert with deep understanding of physical laws and phenomena. Provide clear explanations with mathematical derivations when appropriate."
147164
model_scores:
148-
- model: qwen3
165+
- model: qwen3-32b
149166
score: 0.7
150167
use_reasoning: true # Enable reasoning for physics
151168
- name: computer science
152169
system_prompt: "You are a computer science expert with knowledge of algorithms, data structures, programming languages, and software engineering. Provide clear, practical solutions with code examples when helpful."
153170
model_scores:
154-
- model: qwen3
171+
- model: nemotron-super-1_5
155172
score: 0.6
156173
use_reasoning: false
157174
- name: philosophy
158175
system_prompt: "You are a philosophy expert with comprehensive knowledge of philosophical traditions, ethical theories, logic, metaphysics, epistemology, political philosophy, and the history of philosophical thought. Engage with complex philosophical questions by presenting multiple perspectives, analyzing arguments rigorously, and encouraging critical thinking. Draw connections between philosophical concepts and contemporary issues while maintaining intellectual honesty about the complexity and ongoing nature of philosophical debates."
159176
model_scores:
160-
- model: qwen3
177+
- model: gpt-oss-120b
161178
score: 0.5
162179
use_reasoning: false
163180
- name: engineering
164181
system_prompt: "You are an engineering expert with knowledge across multiple engineering disciplines including mechanical, electrical, civil, chemical, software, and systems engineering. Apply engineering principles, design methodologies, and problem-solving approaches to provide practical solutions. Consider safety, efficiency, sustainability, and cost-effectiveness in your recommendations. Use technical precision while explaining concepts clearly, and emphasize the importance of proper engineering practices and standards."
165182
model_scores:
166-
- model: qwen3
183+
- model: nemotron-super-1_5
167184
score: 0.7
168185
use_reasoning: false
169186

@@ -214,7 +231,7 @@ router:
214231
traditional_attention_dropout_prob: 0.1 # Traditional model attention dropout probability
215232
tie_break_confidence: 0.5 # Confidence value for tie-breaking situations
216233

217-
default_model: qwen3
234+
default_model: qwen3-32b
218235

219236
# Reasoning family configurations
220237
reasoning_families:

deploy/docker-compose/docker-compose.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ services:
1919
- OTEL_SERVICE_NAME=vllm-semantic-router
2020
- HUGGINGFACE_HUB_CACHE=/root/.cache/huggingface
2121
- HF_HUB_ENABLE_HF_TRANSFER=1
22+
extra_hosts:
23+
- "host.docker.internal:172.28.0.1" # Allow container to reach host LLM endpoints
2224
networks:
2325
- semantic-network
2426
healthcheck:
@@ -40,6 +42,8 @@ services:
4042
volumes:
4143
- ./addons/envoy.yaml:/etc/envoy/envoy.yaml:ro,z
4244
command: ["/usr/local/bin/envoy", "-c", "/etc/envoy/envoy.yaml", "--component-log-level", "ext_proc:trace,router:trace,http:trace"]
45+
extra_hosts:
46+
- "host.docker.internal:172.28.0.1" # Allow container to reach host LLM endpoints
4347
depends_on:
4448
semantic-router:
4549
condition: service_healthy

0 commit comments

Comments
 (0)