diff --git a/docs/en/DEPLOY_OPTION.md b/docs/en/DEPLOY_OPTION.md index a45f5e7a6..8f8fc5463 100644 --- a/docs/en/DEPLOY_OPTION.md +++ b/docs/en/DEPLOY_OPTION.md @@ -980,6 +980,10 @@ This solution supports the following text generation models: "apac.anthropic.claude-3-5-sonnet-20240620-v1:0", "apac.anthropic.claude-3-5-sonnet-20241022-v2:0", "us.deepseek.r1-v1:0", +"qwen.qwen3-235b-a22b-2507-v1:0", +"qwen.qwen3-32b-v1:0", +"qwen.qwen3-coder-480b-a35b-v1:0", +"qwen.qwen3-coder-30b-a3b-v1:0", "us.writer.palmyra-x5-v1:0", "us.writer.palmyra-x4-v1:0", "amazon.titan-text-premier-v1:0", diff --git a/docs/ja/DEPLOY_OPTION.md b/docs/ja/DEPLOY_OPTION.md index 870044bfd..71f29faf2 100644 --- a/docs/ja/DEPLOY_OPTION.md +++ b/docs/ja/DEPLOY_OPTION.md @@ -995,6 +995,10 @@ const envs: Record> = { "apac.anthropic.claude-3-5-sonnet-20240620-v1:0", "apac.anthropic.claude-3-5-sonnet-20241022-v2:0", "us.deepseek.r1-v1:0", +"qwen.qwen3-235b-a22b-2507-v1:0", +"qwen.qwen3-32b-v1:0", +"qwen.qwen3-coder-480b-a35b-v1:0", +"qwen.qwen3-coder-30b-a3b-v1:0", "us.writer.palmyra-x5-v1:0", "us.writer.palmyra-x4-v1:0", "amazon.titan-text-premier-v1:0", diff --git a/docs/ko/DEPLOY_OPTION.md b/docs/ko/DEPLOY_OPTION.md index caf3742a2..22ce12ccf 100644 --- a/docs/ko/DEPLOY_OPTION.md +++ b/docs/ko/DEPLOY_OPTION.md @@ -976,6 +976,10 @@ const envs: Record> = { "apac.anthropic.claude-3-5-sonnet-20240620-v1:0", "apac.anthropic.claude-3-5-sonnet-20241022-v2:0", "us.deepseek.r1-v1:0", +"qwen.qwen3-235b-a22b-2507-v1:0", +"qwen.qwen3-32b-v1:0", +"qwen.qwen3-coder-480b-a35b-v1:0", +"qwen.qwen3-coder-30b-a3b-v1:0", "us.writer.palmyra-x5-v1:0", "us.writer.palmyra-x4-v1:0", "amazon.titan-text-premier-v1:0", diff --git a/packages/cdk/lambda/utils/models.ts b/packages/cdk/lambda/utils/models.ts index 2d7dda05c..1194dbf7c 100644 --- a/packages/cdk/lambda/utils/models.ts +++ b/packages/cdk/lambda/utils/models.ts @@ -206,6 +206,31 @@ const DEEPSEEK_DEFAULT_PARAMS: ConverseInferenceParams = { }, }; +// Qwen3 model parameters based on actual AWS Bedrock limits +const QWEN_16K_DEFAULT_PARAMS: ConverseInferenceParams = { + inferenceConfig: { + maxTokens: 16384, + temperature: 0.7, + topP: 0.9, + }, +}; + +const QWEN_64K_DEFAULT_PARAMS: ConverseInferenceParams = { + inferenceConfig: { + maxTokens: 65536, + temperature: 0.7, + topP: 0.9, + }, +}; + +const QWEN_192K_DEFAULT_PARAMS: ConverseInferenceParams = { + inferenceConfig: { + maxTokens: 196608, + temperature: 0.7, + topP: 0.9, + }, +}; + const PALMYRA_DEFAULT_PARAMS: ConverseInferenceParams = { inferenceConfig: { maxTokens: 8192, @@ -1491,6 +1516,38 @@ export const BEDROCK_TEXT_GEN_MODELS: { extractConverseOutput: extractConverseOutput, extractConverseStreamOutput: extractConverseStreamOutput, }, + 'qwen.qwen3-235b-a22b-2507-v1:0': { + defaultParams: QWEN_192K_DEFAULT_PARAMS, + usecaseParams: USECASE_DEFAULT_PARAMS, + createConverseCommandInput: createConverseCommandInput, + createConverseStreamCommandInput: createConverseStreamCommandInput, + extractConverseOutput: extractConverseOutput, + extractConverseStreamOutput: extractConverseStreamOutput, + }, + 'qwen.qwen3-32b-v1:0': { + defaultParams: QWEN_16K_DEFAULT_PARAMS, + usecaseParams: USECASE_DEFAULT_PARAMS, + createConverseCommandInput: createConverseCommandInput, + createConverseStreamCommandInput: createConverseStreamCommandInput, + extractConverseOutput: extractConverseOutput, + extractConverseStreamOutput: extractConverseStreamOutput, + }, + 'qwen.qwen3-coder-480b-a35b-v1:0': { + defaultParams: QWEN_64K_DEFAULT_PARAMS, + usecaseParams: USECASE_DEFAULT_PARAMS, + createConverseCommandInput: createConverseCommandInput, + createConverseStreamCommandInput: createConverseStreamCommandInput, + extractConverseOutput: extractConverseOutput, + extractConverseStreamOutput: extractConverseStreamOutput, + }, + 'qwen.qwen3-coder-30b-a3b-v1:0': { + defaultParams: QWEN_192K_DEFAULT_PARAMS, + usecaseParams: USECASE_DEFAULT_PARAMS, + createConverseCommandInput: createConverseCommandInput, + createConverseStreamCommandInput: createConverseStreamCommandInput, + extractConverseOutput: extractConverseOutput, + extractConverseStreamOutput: extractConverseStreamOutput, + }, // Although Palmyra supports system context, the model seems work best without it. 'us.writer.palmyra-x4-v1:0': { defaultParams: PALMYRA_DEFAULT_PARAMS, diff --git a/packages/common/src/application/model.ts b/packages/common/src/application/model.ts index 3728f757a..34238ab50 100644 --- a/packages/common/src/application/model.ts +++ b/packages/common/src/application/model.ts @@ -384,6 +384,23 @@ export const modelMetadata: Record = { flags: MODEL_FEATURE.TEXT_DOC_REASONING, displayName: 'DeepSeek-R1', }, + // Qwen + 'qwen.qwen3-235b-a22b-2507-v1:0': { + flags: MODEL_FEATURE.TEXT_ONLY, + displayName: 'Qwen3 235B A22B 2507', + }, + 'qwen.qwen3-32b-v1:0': { + flags: MODEL_FEATURE.TEXT_ONLY, + displayName: 'Qwen3 32B', + }, + 'qwen.qwen3-coder-480b-a35b-v1:0': { + flags: MODEL_FEATURE.TEXT_ONLY, + displayName: 'Qwen3-Coder 480B A35B Instruct', + }, + 'qwen.qwen3-coder-30b-a3b-v1:0': { + flags: MODEL_FEATURE.TEXT_ONLY, + displayName: 'Qwen3-Coder 30B A3B Instruct', + }, // Writer 'us.writer.palmyra-x4-v1:0': { flags: MODEL_FEATURE.TEXT_DOC,