diff --git a/README.md b/README.md
index 84e97df..8ecfc6c 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@ No description provided (generated by Openapi Generator https://github.com/opena
 This Python package is automatically generated by the [OpenAPI Generator](https://openapi-generator.tech) project:
 
 - API version: 0.1.0
-- Package version: 0.3.1
+- Package version: 3.1.6
 - Generator version: 7.9.0
 - Build package: org.openapitools.codegen.languages.PythonClientCodegen
 
@@ -99,14 +99,19 @@ Class | Method | HTTP request | Description
 *EXTERNALApi* | [**create_checkout_payments_checkout_post**](docs/EXTERNALApi.md#create_checkout_payments_checkout_post) | **POST** /payments/checkout | Create Checkout
 *EXTERNALApi* | [**create_compute_deployment_deployments_compute_post**](docs/EXTERNALApi.md#create_compute_deployment_deployments_compute_post) | **POST** /deployments/compute | Create Compute Deployment
 *EXTERNALApi* | [**create_cserve_deployment_deployments_cserve_post**](docs/EXTERNALApi.md#create_cserve_deployment_deployments_cserve_post) | **POST** /deployments/cserve | Create Cserve Deployment
+*EXTERNALApi* | [**create_cserve_v2_deployment_deployments_cserve_v2_post**](docs/EXTERNALApi.md#create_cserve_v2_deployment_deployments_cserve_v2_post) | **POST** /deployments/cserve_v2 | Create Cserve V2 Deployment
 *EXTERNALApi* | [**create_inference_deployment_deployments_inference_post**](docs/EXTERNALApi.md#create_inference_deployment_deployments_inference_post) | **POST** /deployments/inference | Create Inference Deployment
+*EXTERNALApi* | [**create_rag_deployment_deployments_rag_post**](docs/EXTERNALApi.md#create_rag_deployment_deployments_rag_post) | **POST** /deployments/rag | Create Rag Deployment
 *EXTERNALApi* | [**delete_api_key_credentials_api_key_id_delete**](docs/EXTERNALApi.md#delete_api_key_credentials_api_key_id_delete) | **DELETE** /credentials/api-key/{id} | Delete Api Key
+*EXTERNALApi* | [**delete_user_vault_item_endpoint_user_vault_delete**](docs/EXTERNALApi.md#delete_user_vault_item_endpoint_user_vault_delete) | **DELETE** /user_vault | Delete User Vault Item Endpoint
+*EXTERNALApi* | [**get_all_user_vault_items_endpoint_user_vault_get**](docs/EXTERNALApi.md#get_all_user_vault_items_endpoint_user_vault_get) | **GET** /user_vault | Get All User Vault Items Endpoint
 *EXTERNALApi* | [**get_api_keys_credentials_api_key_get**](docs/EXTERNALApi.md#get_api_keys_credentials_api_key_get) | **GET** /credentials/api-key | Get Api Keys
 *EXTERNALApi* | [**get_clusters_clusters_get**](docs/EXTERNALApi.md#get_clusters_clusters_get) | **GET** /clusters | Get Clusters
 *EXTERNALApi* | [**get_compute_deployment_deployments_compute_deployment_id_get**](docs/EXTERNALApi.md#get_compute_deployment_deployments_compute_deployment_id_get) | **GET** /deployments/compute/{deployment_id} | Get Compute Deployment
 *EXTERNALApi* | [**get_credits_credits_get**](docs/EXTERNALApi.md#get_credits_credits_get) | **GET** /credits | Get Credits
 *EXTERNALApi* | [**get_cserve_deployment_deployments_cserve_deployment_id_get**](docs/EXTERNALApi.md#get_cserve_deployment_deployments_cserve_deployment_id_get) | **GET** /deployments/cserve/{deployment_id} | Get Cserve Deployment
 *EXTERNALApi* | [**get_cserve_recipe_deployments_cserve_recipes_get**](docs/EXTERNALApi.md#get_cserve_recipe_deployments_cserve_recipes_get) | **GET** /deployments/cserve/recipes | Get Cserve Recipe
+*EXTERNALApi* | [**get_cserve_v2_deployment_deployments_cserve_v2_deployment_id_get**](docs/EXTERNALApi.md#get_cserve_v2_deployment_deployments_cserve_v2_deployment_id_get) | **GET** /deployments/cserve_v2/{deployment_id} | Get Cserve V2 Deployment
 *EXTERNALApi* | [**get_deployment_logs_deployments_logs_deployment_id_get**](docs/EXTERNALApi.md#get_deployment_logs_deployments_logs_deployment_id_get) | **GET** /deployments/logs/{deployment_id} | Get Deployment Logs
 *EXTERNALApi* | [**get_deployment_status_deployments_status_deployment_id_get**](docs/EXTERNALApi.md#get_deployment_status_deployments_status_deployment_id_get) | **GET** /deployments/status/{deployment_id} | Get Deployment Status
 *EXTERNALApi* | [**get_deployments_deployments_get**](docs/EXTERNALApi.md#get_deployments_deployments_get) | **GET** /deployments | Get Deployments
@@ -114,28 +119,35 @@ Class | Method | HTTP request | Description
 *EXTERNALApi* | [**get_inference_deployment_deployments_inference_deployment_id_get**](docs/EXTERNALApi.md#get_inference_deployment_deployments_inference_deployment_id_get) | **GET** /deployments/inference/{deployment_id} | Get Inference Deployment
 *EXTERNALApi* | [**get_payments_payments_get**](docs/EXTERNALApi.md#get_payments_payments_get) | **GET** /payments | Get Payments
 *EXTERNALApi* | [**get_prebuilt_images_prebuilt_images_get**](docs/EXTERNALApi.md#get_prebuilt_images_prebuilt_images_get) | **GET** /prebuilt-images | Get Prebuilt Images
+*EXTERNALApi* | [**get_rag_deployment_deployments_rag_deployment_id_get**](docs/EXTERNALApi.md#get_rag_deployment_deployments_rag_deployment_id_get) | **GET** /deployments/rag/{deployment_id} | Get Rag Deployment
 *EXTERNALApi* | [**get_usage_daily_bills_get**](docs/EXTERNALApi.md#get_usage_daily_bills_get) | **GET** /daily_bills | Get Usage
 *EXTERNALApi* | [**get_usage_deployments_usage_deployment_id_get**](docs/EXTERNALApi.md#get_usage_deployments_usage_deployment_id_get) | **GET** /deployments/usage/{deployment_id} | Get Usage
 *EXTERNALApi* | [**setup_stripe_customer_payments_setup_post**](docs/EXTERNALApi.md#setup_stripe_customer_payments_setup_post) | **POST** /payments/setup | Setup Stripe Customer
 *EXTERNALApi* | [**update_deployment_status_deployments_status_deployment_id_put**](docs/EXTERNALApi.md#update_deployment_status_deployments_status_deployment_id_put) | **PUT** /deployments/status/{deployment_id} | Update Deployment Status
+*EXTERNALApi* | [**update_user_vault_item_endpoint_user_vault_put**](docs/EXTERNALApi.md#update_user_vault_item_endpoint_user_vault_put) | **PUT** /user_vault | Update User Vault Item Endpoint
 
 
 ## Documentation For Models
 
  - [APIKeyRequest](docs/APIKeyRequest.md)
  - [APIKeyResponse](docs/APIKeyResponse.md)
- - [CServeRecipeInput](docs/CServeRecipeInput.md)
- - [CServeRecipeOutput](docs/CServeRecipeOutput.md)
+ - [CServeRecipe](docs/CServeRecipe.md)
  - [CServeRecipePerf](docs/CServeRecipePerf.md)
  - [CServeRecipeResponse](docs/CServeRecipeResponse.md)
+ - [CServeV2RecipeInput](docs/CServeV2RecipeInput.md)
+ - [CServeV2RecipeOutput](docs/CServeV2RecipeOutput.md)
  - [CreateCServeDeploymentRequest](docs/CreateCServeDeploymentRequest.md)
  - [CreateCServeDeploymentResponse](docs/CreateCServeDeploymentResponse.md)
+ - [CreateCServeV2DeploymentRequest](docs/CreateCServeV2DeploymentRequest.md)
+ - [CreateCServeV2DeploymentResponse](docs/CreateCServeV2DeploymentResponse.md)
  - [CreateCheckoutRequest](docs/CreateCheckoutRequest.md)
  - [CreateCheckoutResponse](docs/CreateCheckoutResponse.md)
  - [CreateComputeDeploymentRequest](docs/CreateComputeDeploymentRequest.md)
  - [CreateComputeDeploymentResponse](docs/CreateComputeDeploymentResponse.md)
  - [CreateInferenceDeploymentRequest](docs/CreateInferenceDeploymentRequest.md)
  - [CreateInferenceDeploymentResponse](docs/CreateInferenceDeploymentResponse.md)
+ - [CreateRagDeploymentRequest](docs/CreateRagDeploymentRequest.md)
+ - [CreateRagDeploymentResponse](docs/CreateRagDeploymentResponse.md)
  - [CreditsResponse](docs/CreditsResponse.md)
  - [DailyBillResponse](docs/DailyBillResponse.md)
  - [DeploymentStatus](docs/DeploymentStatus.md)
@@ -144,6 +156,7 @@ Class | Method | HTTP request | Description
  - [DeploymentType](docs/DeploymentType.md)
  - [DeploymentUsageValue](docs/DeploymentUsageValue.md)
  - [GetCServeDeploymentResponse](docs/GetCServeDeploymentResponse.md)
+ - [GetCServeV2DeploymentResponse](docs/GetCServeV2DeploymentResponse.md)
  - [GetClusterResponse](docs/GetClusterResponse.md)
  - [GetComputeDeploymentResponse](docs/GetComputeDeploymentResponse.md)
  - [GetDeploymentLogResponse](docs/GetDeploymentLogResponse.md)
@@ -151,6 +164,7 @@ Class | Method | HTTP request | Description
  - [GetDeploymentUsageResponse](docs/GetDeploymentUsageResponse.md)
  - [GetInferenceDeploymentResponse](docs/GetInferenceDeploymentResponse.md)
  - [GetPaymentsResponse](docs/GetPaymentsResponse.md)
+ - [GetRagDeploymentResponse](docs/GetRagDeploymentResponse.md)
  - [HTTPValidationError](docs/HTTPValidationError.md)
  - [HardwareInstanceResponse](docs/HardwareInstanceResponse.md)
  - [ListAPIKeyResponse](docs/ListAPIKeyResponse.md)
@@ -160,10 +174,14 @@ Class | Method | HTTP request | Description
  - [ListGetDeploymentResponse](docs/ListGetDeploymentResponse.md)
  - [ListHardwareInstanceResponse](docs/ListHardwareInstanceResponse.md)
  - [ListPrebuiltImageResponse](docs/ListPrebuiltImageResponse.md)
+ - [ListUserVaultItemsResponse](docs/ListUserVaultItemsResponse.md)
  - [Metric](docs/Metric.md)
  - [PrebuiltImageResponse](docs/PrebuiltImageResponse.md)
  - [ServiceStatus](docs/ServiceStatus.md)
  - [UserSupportEmailRequest](docs/UserSupportEmailRequest.md)
+ - [UserVaultItemInput](docs/UserVaultItemInput.md)
+ - [UserVaultItemOutput](docs/UserVaultItemOutput.md)
+ - [UserVaultType](docs/UserVaultType.md)
  - [ValidationError](docs/ValidationError.md)
  - [ValidationErrorLocInner](docs/ValidationErrorLocInner.md)
 
diff --git a/docs/CServeRecipeInput.md b/docs/CServeRecipe.md
similarity index 71%
rename from docs/CServeRecipeInput.md
rename to docs/CServeRecipe.md
index e886926..9678dcc 100644
--- a/docs/CServeRecipeInput.md
+++ b/docs/CServeRecipe.md
@@ -1,4 +1,4 @@
-# CServeRecipeInput
+# CServeRecipe
 
 Base class for deployment planner
 
@@ -14,8 +14,12 @@ Name | Type | Description | Notes
 **swap_space** | **int** |  | [optional] [default to 0]
 **gpu_mem_util** | **float** |  | [optional] [default to 0.95]
 **max_num_seqs** | **int** |  | [optional] [default to 256]
-**use_prefix_caching** | **bool** |  | [optional] 
 **offloading_num** | **int** |  | [optional] [default to 0]
+**use_prefix_caching** | **bool** |  | [optional] 
+**use_chunked_prefill** | **bool** |  | [optional] 
+**chunked_prefill_size** | **int** |  | [optional] 
+**eager_execution** | **bool** |  | [optional] 
+**num_scheduler_steps** | **int** |  | [optional] 
 **use_flashinfer** | **bool** |  | [optional] [default to False]
 **max_model_len** | **int** |  | [optional] 
 **dtype** | **str** |  | [optional] [default to 'auto']
@@ -30,19 +34,19 @@ Name | Type | Description | Notes
 ## Example
 
 ```python
-from platform_api_python_client.models.c_serve_recipe_input import CServeRecipeInput
+from platform_api_python_client.models.c_serve_recipe import CServeRecipe
 
 # TODO update the JSON string below
 json = "{}"
-# create an instance of CServeRecipeInput from a JSON string
-c_serve_recipe_input_instance = CServeRecipeInput.from_json(json)
+# create an instance of CServeRecipe from a JSON string
+c_serve_recipe_instance = CServeRecipe.from_json(json)
 # print the JSON string representation of the object
-print(CServeRecipeInput.to_json())
+print(CServeRecipe.to_json())
 
 # convert the object into a dict
-c_serve_recipe_input_dict = c_serve_recipe_input_instance.to_dict()
-# create an instance of CServeRecipeInput from a dict
-c_serve_recipe_input_from_dict = CServeRecipeInput.from_dict(c_serve_recipe_input_dict)
+c_serve_recipe_dict = c_serve_recipe_instance.to_dict()
+# create an instance of CServeRecipe from a dict
+c_serve_recipe_from_dict = CServeRecipe.from_dict(c_serve_recipe_dict)
 ```
 [[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md)
 
diff --git a/docs/CServeRecipePerf.md b/docs/CServeRecipePerf.md
index dcf88b3..09a0ab7 100644
--- a/docs/CServeRecipePerf.md
+++ b/docs/CServeRecipePerf.md
@@ -5,7 +5,7 @@
 
 Name | Type | Description | Notes
 ------------ | ------------- | ------------- | -------------
-**recipe** | [**CServeRecipeOutput**](CServeRecipeOutput.md) |  | 
+**recipe** | [**CServeV2RecipeOutput**](CServeV2RecipeOutput.md) |  | 
 **hardware_instance_id** | **int** |  | 
 **output_tp** | **List[List[object]]** |  | 
 **mean_ttft** | **List[List[object]]** |  | 
diff --git a/docs/CServeRecipeOutput.md b/docs/CServeV2Recipe.md
similarity index 51%
rename from docs/CServeRecipeOutput.md
rename to docs/CServeV2Recipe.md
index d0aa70e..4d8ba68 100644
--- a/docs/CServeRecipeOutput.md
+++ b/docs/CServeV2Recipe.md
@@ -1,48 +1,56 @@
-# CServeRecipeOutput
+# CServeV2Recipe
 
-Base class for deployment planner
+Inputs to start deployment
 
 ## Properties
 
 Name | Type | Description | Notes
 ------------ | ------------- | ------------- | -------------
 **model** | **str** |  | 
+**max_model_len** | **int** |  | 
 **is_embedding_model** | **bool** |  | [default to False]
+**tokenizer** | **str** |  | 
 **tensor_parallel_size** | **int** |  | 
 **pipeline_parallel_size** | **int** |  | 
+**gpu_mem_util** | **float** |  | [default to 0.95]
 **block_size** | **int** |  | [default to 32]
 **swap_space** | **int** |  | [default to 0]
-**gpu_mem_util** | **float** |  | [default to 0.95]
+**quantization** | **str** |  | 
+**dtype** | **str** |  | [default to 'auto']
+**cache_dtype** | **str** |  | [default to 'auto']
 **max_num_seqs** | **int** |  | [default to 256]
-**use_prefix_caching** | **bool** |  | 
-**offloading_num** | **int** |  | [default to 0]
+**eager_execution** | **bool** |  | [default to True]
 **use_flashinfer** | **bool** |  | [default to False]
-**max_model_len** | **int** |  | 
-**dtype** | **str** |  | [default to 'auto']
-**tokenizer** | **str** |  | 
-**spec_proposer** | **str** |  | 
+**offloading_num** | **float** |  | [default to 0]
 **spec_draft_model** | **str** |  | 
 **spec_tokens** | **int** |  | 
-**spec_prompt_lookup_min** | **int** |  | 
 **spec_prompt_lookup_max** | **int** |  | 
-**seed** | **int** |  | [default to 0]
+**spec_prompt_lookup_min** | **int** |  | 
+**use_prefix_caching** | **bool** |  | [default to False]
+**use_chunked_prefill** | **bool** |  | [default to False]
+**chunked_prefill_size** | **int** |  | 
+**max_seq_len_to_capture** | **int** |  | [default to 1024]
+**distributed_executor_backend** | **str** |  | [default to 'ray']
+**spec_max_batch_size** | **int** |  | 
+**spec_max_seq_len** | **int** |  | 
+**num_scheduler_steps** | **int** |  | [default to 1]
 
 ## Example
 
 ```python
-from platform_api_python_client.models.c_serve_recipe_output import CServeRecipeOutput
+from platform_api_python_client.models.c_serve_v2_recipe import CServeV2Recipe
 
 # TODO update the JSON string below
 json = "{}"
-# create an instance of CServeRecipeOutput from a JSON string
-c_serve_recipe_output_instance = CServeRecipeOutput.from_json(json)
+# create an instance of CServeV2Recipe from a JSON string
+c_serve_v2_recipe_instance = CServeV2Recipe.from_json(json)
 # print the JSON string representation of the object
-print(CServeRecipeOutput.to_json())
+print(CServeV2Recipe.to_json())
 
 # convert the object into a dict
-c_serve_recipe_output_dict = c_serve_recipe_output_instance.to_dict()
-# create an instance of CServeRecipeOutput from a dict
-c_serve_recipe_output_from_dict = CServeRecipeOutput.from_dict(c_serve_recipe_output_dict)
+c_serve_v2_recipe_dict = c_serve_v2_recipe_instance.to_dict()
+# create an instance of CServeV2Recipe from a dict
+c_serve_v2_recipe_from_dict = CServeV2Recipe.from_dict(c_serve_v2_recipe_dict)
 ```
 [[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md)
 
diff --git a/docs/CServeV2RecipeInput.md b/docs/CServeV2RecipeInput.md
new file mode 100644
index 0000000..1d9a27f
--- /dev/null
+++ b/docs/CServeV2RecipeInput.md
@@ -0,0 +1,57 @@
+# CServeV2RecipeInput
+
+Inputs to start deployment
+
+## Properties
+
+Name | Type | Description | Notes
+------------ | ------------- | ------------- | -------------
+**model** | **str** |  | 
+**max_model_len** | **int** |  | [optional] 
+**is_embedding_model** | **bool** |  | [optional] [default to False]
+**tokenizer** | **str** |  | 
+**tensor_parallel_size** | **int** |  | 
+**pipeline_parallel_size** | **int** |  | 
+**gpu_mem_util** | **float** |  | [optional] [default to 0.95]
+**block_size** | **int** |  | [optional] [default to 16]
+**swap_space** | **int** |  | [optional] [default to 0]
+**quantization** | **str** |  | [optional] 
+**dtype** | **str** |  | [optional] [default to 'auto']
+**cache_dtype** | **str** |  | [optional] [default to 'auto']
+**max_num_seqs** | **int** |  | [optional] [default to 256]
+**eager_execution** | **bool** |  | [optional] [default to True]
+**use_flashinfer** | **bool** |  | [optional] [default to False]
+**offloading_num** | **float** |  | [optional] [default to 0]
+**spec_draft_model** | **str** |  | [optional] 
+**spec_tokens** | **int** |  | [optional] 
+**spec_prompt_lookup_max** | **int** |  | [optional] 
+**spec_prompt_lookup_min** | **int** |  | [optional] 
+**use_prefix_caching** | **bool** |  | [optional] [default to False]
+**use_chunked_prefill** | **bool** |  | [optional] [default to False]
+**chunked_prefill_size** | **int** |  | [optional] 
+**max_seq_len_to_capture** | **int** |  | [optional] [default to 8192]
+**distributed_executor_backend** | **str** |  | [optional] [default to 'mp']
+**spec_max_batch_size** | **int** |  | [optional] 
+**spec_max_seq_len** | **int** |  | [optional] 
+**num_scheduler_steps** | **int** |  | [optional] [default to 1]
+
+## Example
+
+```python
+from platform_api_python_client.models.c_serve_v2_recipe_input import CServeV2RecipeInput
+
+# TODO update the JSON string below
+json = "{}"
+# create an instance of CServeV2RecipeInput from a JSON string
+c_serve_v2_recipe_input_instance = CServeV2RecipeInput.from_json(json)
+# print the JSON string representation of the object
+print(CServeV2RecipeInput.to_json())
+
+# convert the object into a dict
+c_serve_v2_recipe_input_dict = c_serve_v2_recipe_input_instance.to_dict()
+# create an instance of CServeV2RecipeInput from a dict
+c_serve_v2_recipe_input_from_dict = CServeV2RecipeInput.from_dict(c_serve_v2_recipe_input_dict)
+```
+[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md)
+
+
diff --git a/docs/CServeV2RecipeOutput.md b/docs/CServeV2RecipeOutput.md
new file mode 100644
index 0000000..437fbfc
--- /dev/null
+++ b/docs/CServeV2RecipeOutput.md
@@ -0,0 +1,57 @@
+# CServeV2RecipeOutput
+
+Inputs to start deployment
+
+## Properties
+
+Name | Type | Description | Notes
+------------ | ------------- | ------------- | -------------
+**model** | **str** |  | 
+**max_model_len** | **int** |  | 
+**is_embedding_model** | **bool** |  | [default to False]
+**tokenizer** | **str** |  | 
+**tensor_parallel_size** | **int** |  | 
+**pipeline_parallel_size** | **int** |  | 
+**gpu_mem_util** | **float** |  | [default to 0.95]
+**block_size** | **int** |  | [default to 16]
+**swap_space** | **int** |  | [default to 0]
+**quantization** | **str** |  | 
+**dtype** | **str** |  | [default to 'auto']
+**cache_dtype** | **str** |  | [default to 'auto']
+**max_num_seqs** | **int** |  | [default to 256]
+**eager_execution** | **bool** |  | [default to True]
+**use_flashinfer** | **bool** |  | [default to False]
+**offloading_num** | **float** |  | [default to 0]
+**spec_draft_model** | **str** |  | 
+**spec_tokens** | **int** |  | 
+**spec_prompt_lookup_max** | **int** |  | 
+**spec_prompt_lookup_min** | **int** |  | 
+**use_prefix_caching** | **bool** |  | [default to False]
+**use_chunked_prefill** | **bool** |  | [default to False]
+**chunked_prefill_size** | **int** |  | 
+**max_seq_len_to_capture** | **int** |  | [default to 8192]
+**distributed_executor_backend** | **str** |  | [default to 'mp']
+**spec_max_batch_size** | **int** |  | 
+**spec_max_seq_len** | **int** |  | 
+**num_scheduler_steps** | **int** |  | [default to 1]
+
+## Example
+
+```python
+from platform_api_python_client.models.c_serve_v2_recipe_output import CServeV2RecipeOutput
+
+# TODO update the JSON string below
+json = "{}"
+# create an instance of CServeV2RecipeOutput from a JSON string
+c_serve_v2_recipe_output_instance = CServeV2RecipeOutput.from_json(json)
+# print the JSON string representation of the object
+print(CServeV2RecipeOutput.to_json())
+
+# convert the object into a dict
+c_serve_v2_recipe_output_dict = c_serve_v2_recipe_output_instance.to_dict()
+# create an instance of CServeV2RecipeOutput from a dict
+c_serve_v2_recipe_output_from_dict = CServeV2RecipeOutput.from_dict(c_serve_v2_recipe_output_dict)
+```
+[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md)
+
+
diff --git a/docs/CreateCServeDeploymentRequest.md b/docs/CreateCServeDeploymentRequest.md
index fa8f19a..ae0f73e 100644
--- a/docs/CreateCServeDeploymentRequest.md
+++ b/docs/CreateCServeDeploymentRequest.md
@@ -8,8 +8,8 @@ Name | Type | Description | Notes
 **name** | **str** |  | 
 **cluster_id** | **int** |  | 
 **hardware_instance_id** | **int** |  | 
-**recipe** | [**CServeRecipeInput**](CServeRecipeInput.md) |  | 
-**hf_token** | **str** |  | 
+**recipe** | [**CServeRecipe**](CServeRecipe.md) |  | 
+**hf_token** | **str** |  | [optional] 
 **endpoint_certificate_authority** | **str** |  | [optional] 
 **min_scale** | **int** |  | 
 **max_scale** | **int** |  | 
diff --git a/docs/CreateCServeV2DeploymentRequest.md b/docs/CreateCServeV2DeploymentRequest.md
new file mode 100644
index 0000000..f2b8766
--- /dev/null
+++ b/docs/CreateCServeV2DeploymentRequest.md
@@ -0,0 +1,38 @@
+# CreateCServeV2DeploymentRequest
+
+
+## Properties
+
+Name | Type | Description | Notes
+------------ | ------------- | ------------- | -------------
+**name** | **str** |  | 
+**cluster_id** | **int** |  | 
+**hardware_instance_id** | **int** |  | 
+**recipe** | [**CServeV2RecipeInput**](CServeV2RecipeInput.md) |  | 
+**hf_token** | **str** |  | [optional] 
+**endpoint_certificate_authority** | **str** |  | [optional] 
+**min_scale** | **int** |  | 
+**max_scale** | **int** |  | 
+**concurrency** | **int** |  | [optional] 
+**env_vars** | **Dict[str, str]** |  | [optional] 
+
+## Example
+
+```python
+from platform_api_python_client.models.create_c_serve_v2_deployment_request import CreateCServeV2DeploymentRequest
+
+# TODO update the JSON string below
+json = "{}"
+# create an instance of CreateCServeV2DeploymentRequest from a JSON string
+create_c_serve_v2_deployment_request_instance = CreateCServeV2DeploymentRequest.from_json(json)
+# print the JSON string representation of the object
+print(CreateCServeV2DeploymentRequest.to_json())
+
+# convert the object into a dict
+create_c_serve_v2_deployment_request_dict = create_c_serve_v2_deployment_request_instance.to_dict()
+# create an instance of CreateCServeV2DeploymentRequest from a dict
+create_c_serve_v2_deployment_request_from_dict = CreateCServeV2DeploymentRequest.from_dict(create_c_serve_v2_deployment_request_dict)
+```
+[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md)
+
+
diff --git a/docs/CreateCServeV2DeploymentResponse.md b/docs/CreateCServeV2DeploymentResponse.md
new file mode 100644
index 0000000..86facfc
--- /dev/null
+++ b/docs/CreateCServeV2DeploymentResponse.md
@@ -0,0 +1,31 @@
+# CreateCServeV2DeploymentResponse
+
+
+## Properties
+
+Name | Type | Description | Notes
+------------ | ------------- | ------------- | -------------
+**id** | **int** |  | 
+**created_at** | **datetime** |  | 
+**endpoint_url** | **str** |  | 
+
+## Example
+
+```python
+from platform_api_python_client.models.create_c_serve_v2_deployment_response import CreateCServeV2DeploymentResponse
+
+# TODO update the JSON string below
+json = "{}"
+# create an instance of CreateCServeV2DeploymentResponse from a JSON string
+create_c_serve_v2_deployment_response_instance = CreateCServeV2DeploymentResponse.from_json(json)
+# print the JSON string representation of the object
+print(CreateCServeV2DeploymentResponse.to_json())
+
+# convert the object into a dict
+create_c_serve_v2_deployment_response_dict = create_c_serve_v2_deployment_response_instance.to_dict()
+# create an instance of CreateCServeV2DeploymentResponse from a dict
+create_c_serve_v2_deployment_response_from_dict = CreateCServeV2DeploymentResponse.from_dict(create_c_serve_v2_deployment_response_dict)
+```
+[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md)
+
+
diff --git a/docs/CreateComputeDeploymentRequest.md b/docs/CreateComputeDeploymentRequest.md
index b7fd54c..7abef60 100644
--- a/docs/CreateComputeDeploymentRequest.md
+++ b/docs/CreateComputeDeploymentRequest.md
@@ -9,6 +9,7 @@ Name | Type | Description | Notes
 **cluster_id** | **int** |  | 
 **hardware_instance_id** | **int** |  | 
 **image_url** | **str** |  | 
+**enable_jupyter** | **bool** |  | [optional] [default to False]
 **ssh_public_key** | **str** |  | [optional] 
 **ssh_password** | **str** |  | [optional] 
 
diff --git a/docs/CreateComputeDeploymentResponse.md b/docs/CreateComputeDeploymentResponse.md
index 298b492..e1ea3b5 100644
--- a/docs/CreateComputeDeploymentResponse.md
+++ b/docs/CreateComputeDeploymentResponse.md
@@ -9,6 +9,7 @@ Name | Type | Description | Notes
 **created_at** | **datetime** |  | 
 **endpoint_url** | **str** |  | 
 **port** | **int** |  | 
+**jupyter_token** | **str** |  | 
 
 ## Example
 
diff --git a/docs/CreateRagDeploymentRequest.md b/docs/CreateRagDeploymentRequest.md
new file mode 100644
index 0000000..b08202b
--- /dev/null
+++ b/docs/CreateRagDeploymentRequest.md
@@ -0,0 +1,40 @@
+# CreateRagDeploymentRequest
+
+
+## Properties
+
+Name | Type | Description | Notes
+------------ | ------------- | ------------- | -------------
+**name** | **str** |  | 
+**cluster_id** | **int** |  | 
+**hardware_instance_id** | **int** |  | 
+**recipe** | [**CServeV2RecipeInput**](CServeV2RecipeInput.md) |  | 
+**hf_token** | **str** |  | [optional] 
+**llm_model** | **str** |  | 
+**centml_api_key** | **str** |  | 
+**min_scale** | **int** |  | [optional] [default to 1]
+**max_scale** | **int** |  | [optional] [default to 1]
+**endpoint_certificate_authority** | **str** |  | [optional] 
+**concurrency** | **int** |  | [optional] 
+**env_vars** | **Dict[str, str]** |  | [optional] 
+
+## Example
+
+```python
+from platform_api_python_client.models.create_rag_deployment_request import CreateRagDeploymentRequest
+
+# TODO update the JSON string below
+json = "{}"
+# create an instance of CreateRagDeploymentRequest from a JSON string
+create_rag_deployment_request_instance = CreateRagDeploymentRequest.from_json(json)
+# print the JSON string representation of the object
+print(CreateRagDeploymentRequest.to_json())
+
+# convert the object into a dict
+create_rag_deployment_request_dict = create_rag_deployment_request_instance.to_dict()
+# create an instance of CreateRagDeploymentRequest from a dict
+create_rag_deployment_request_from_dict = CreateRagDeploymentRequest.from_dict(create_rag_deployment_request_dict)
+```
+[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md)
+
+
diff --git a/docs/CreateRagDeploymentResponse.md b/docs/CreateRagDeploymentResponse.md
new file mode 100644
index 0000000..33e88fd
--- /dev/null
+++ b/docs/CreateRagDeploymentResponse.md
@@ -0,0 +1,31 @@
+# CreateRagDeploymentResponse
+
+
+## Properties
+
+Name | Type | Description | Notes
+------------ | ------------- | ------------- | -------------
+**id** | **int** |  | 
+**created_at** | **datetime** |  | 
+**endpoint_url** | **str** |  | 
+
+## Example
+
+```python
+from platform_api_python_client.models.create_rag_deployment_response import CreateRagDeploymentResponse
+
+# TODO update the JSON string below
+json = "{}"
+# create an instance of CreateRagDeploymentResponse from a JSON string
+create_rag_deployment_response_instance = CreateRagDeploymentResponse.from_json(json)
+# print the JSON string representation of the object
+print(CreateRagDeploymentResponse.to_json())
+
+# convert the object into a dict
+create_rag_deployment_response_dict = create_rag_deployment_response_instance.to_dict()
+# create an instance of CreateRagDeploymentResponse from a dict
+create_rag_deployment_response_from_dict = CreateRagDeploymentResponse.from_dict(create_rag_deployment_response_dict)
+```
+[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md)
+
+
diff --git a/docs/DeploymentType.md b/docs/DeploymentType.md
index ae6e6f8..b988793 100644
--- a/docs/DeploymentType.md
+++ b/docs/DeploymentType.md
@@ -17,8 +17,12 @@
 
 * `CSERVE` (value: `'cserve'`)
 
+* `CSERVE_V2` (value: `'cserve_v2'`)
+
 * `DEPLOYMENT` (value: `'deployment'`)
 
+* `RAG` (value: `'rag'`)
+
 [[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md)
 
 
diff --git a/docs/EXTERNALApi.md b/docs/EXTERNALApi.md
index c968860..e1a6dc9 100644
--- a/docs/EXTERNALApi.md
+++ b/docs/EXTERNALApi.md
@@ -9,14 +9,19 @@ Method | HTTP request | Description
 [**create_checkout_payments_checkout_post**](EXTERNALApi.md#create_checkout_payments_checkout_post) | **POST** /payments/checkout | Create Checkout
 [**create_compute_deployment_deployments_compute_post**](EXTERNALApi.md#create_compute_deployment_deployments_compute_post) | **POST** /deployments/compute | Create Compute Deployment
 [**create_cserve_deployment_deployments_cserve_post**](EXTERNALApi.md#create_cserve_deployment_deployments_cserve_post) | **POST** /deployments/cserve | Create Cserve Deployment
+[**create_cserve_v2_deployment_deployments_cserve_v2_post**](EXTERNALApi.md#create_cserve_v2_deployment_deployments_cserve_v2_post) | **POST** /deployments/cserve_v2 | Create Cserve V2 Deployment
 [**create_inference_deployment_deployments_inference_post**](EXTERNALApi.md#create_inference_deployment_deployments_inference_post) | **POST** /deployments/inference | Create Inference Deployment
+[**create_rag_deployment_deployments_rag_post**](EXTERNALApi.md#create_rag_deployment_deployments_rag_post) | **POST** /deployments/rag | Create Rag Deployment
 [**delete_api_key_credentials_api_key_id_delete**](EXTERNALApi.md#delete_api_key_credentials_api_key_id_delete) | **DELETE** /credentials/api-key/{id} | Delete Api Key
+[**delete_user_vault_item_endpoint_user_vault_delete**](EXTERNALApi.md#delete_user_vault_item_endpoint_user_vault_delete) | **DELETE** /user_vault | Delete User Vault Item Endpoint
+[**get_all_user_vault_items_endpoint_user_vault_get**](EXTERNALApi.md#get_all_user_vault_items_endpoint_user_vault_get) | **GET** /user_vault | Get All User Vault Items Endpoint
 [**get_api_keys_credentials_api_key_get**](EXTERNALApi.md#get_api_keys_credentials_api_key_get) | **GET** /credentials/api-key | Get Api Keys
 [**get_clusters_clusters_get**](EXTERNALApi.md#get_clusters_clusters_get) | **GET** /clusters | Get Clusters
 [**get_compute_deployment_deployments_compute_deployment_id_get**](EXTERNALApi.md#get_compute_deployment_deployments_compute_deployment_id_get) | **GET** /deployments/compute/{deployment_id} | Get Compute Deployment
 [**get_credits_credits_get**](EXTERNALApi.md#get_credits_credits_get) | **GET** /credits | Get Credits
 [**get_cserve_deployment_deployments_cserve_deployment_id_get**](EXTERNALApi.md#get_cserve_deployment_deployments_cserve_deployment_id_get) | **GET** /deployments/cserve/{deployment_id} | Get Cserve Deployment
 [**get_cserve_recipe_deployments_cserve_recipes_get**](EXTERNALApi.md#get_cserve_recipe_deployments_cserve_recipes_get) | **GET** /deployments/cserve/recipes | Get Cserve Recipe
+[**get_cserve_v2_deployment_deployments_cserve_v2_deployment_id_get**](EXTERNALApi.md#get_cserve_v2_deployment_deployments_cserve_v2_deployment_id_get) | **GET** /deployments/cserve_v2/{deployment_id} | Get Cserve V2 Deployment
 [**get_deployment_logs_deployments_logs_deployment_id_get**](EXTERNALApi.md#get_deployment_logs_deployments_logs_deployment_id_get) | **GET** /deployments/logs/{deployment_id} | Get Deployment Logs
 [**get_deployment_status_deployments_status_deployment_id_get**](EXTERNALApi.md#get_deployment_status_deployments_status_deployment_id_get) | **GET** /deployments/status/{deployment_id} | Get Deployment Status
 [**get_deployments_deployments_get**](EXTERNALApi.md#get_deployments_deployments_get) | **GET** /deployments | Get Deployments
@@ -24,10 +29,12 @@ Method | HTTP request | Description
 [**get_inference_deployment_deployments_inference_deployment_id_get**](EXTERNALApi.md#get_inference_deployment_deployments_inference_deployment_id_get) | **GET** /deployments/inference/{deployment_id} | Get Inference Deployment
 [**get_payments_payments_get**](EXTERNALApi.md#get_payments_payments_get) | **GET** /payments | Get Payments
 [**get_prebuilt_images_prebuilt_images_get**](EXTERNALApi.md#get_prebuilt_images_prebuilt_images_get) | **GET** /prebuilt-images | Get Prebuilt Images
+[**get_rag_deployment_deployments_rag_deployment_id_get**](EXTERNALApi.md#get_rag_deployment_deployments_rag_deployment_id_get) | **GET** /deployments/rag/{deployment_id} | Get Rag Deployment
 [**get_usage_daily_bills_get**](EXTERNALApi.md#get_usage_daily_bills_get) | **GET** /daily_bills | Get Usage
 [**get_usage_deployments_usage_deployment_id_get**](EXTERNALApi.md#get_usage_deployments_usage_deployment_id_get) | **GET** /deployments/usage/{deployment_id} | Get Usage
 [**setup_stripe_customer_payments_setup_post**](EXTERNALApi.md#setup_stripe_customer_payments_setup_post) | **POST** /payments/setup | Setup Stripe Customer
 [**update_deployment_status_deployments_status_deployment_id_put**](EXTERNALApi.md#update_deployment_status_deployments_status_deployment_id_put) | **PUT** /deployments/status/{deployment_id} | Update Deployment Status
+[**update_user_vault_item_endpoint_user_vault_put**](EXTERNALApi.md#update_user_vault_item_endpoint_user_vault_put) | **PUT** /user_vault | Update User Vault Item Endpoint
 
 
 # **add_user_request_support_user_requests_post**
@@ -419,6 +426,84 @@ Name | Type | Description  | Notes
 
 [[Back to top]](#) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to Model list]](../README.md#documentation-for-models) [[Back to README]](../README.md)
 
+# **create_cserve_v2_deployment_deployments_cserve_v2_post**
+> CreateCServeV2DeploymentResponse create_cserve_v2_deployment_deployments_cserve_v2_post(create_c_serve_v2_deployment_request)
+
+Create Cserve V2 Deployment
+
+### Example
+
+* Bearer Authentication (HTTPBearer):
+
+```python
+import platform_api_python_client
+from platform_api_python_client.models.create_c_serve_v2_deployment_request import CreateCServeV2DeploymentRequest
+from platform_api_python_client.models.create_c_serve_v2_deployment_response import CreateCServeV2DeploymentResponse
+from platform_api_python_client.rest import ApiException
+from pprint import pprint
+
+# Defining the host is optional and defaults to http://localhost
+# See configuration.py for a list of all supported configuration parameters.
+configuration = platform_api_python_client.Configuration(
+    host = "http://localhost"
+)
+
+# The client must configure the authentication and authorization parameters
+# in accordance with the API server security policy.
+# Examples for each auth method are provided below, use the example that
+# satisfies your auth use case.
+
+# Configure Bearer authorization: HTTPBearer
+configuration = platform_api_python_client.Configuration(
+    access_token = os.environ["BEARER_TOKEN"]
+)
+
+# Enter a context with an instance of the API client
+with platform_api_python_client.ApiClient(configuration) as api_client:
+    # Create an instance of the API class
+    api_instance = platform_api_python_client.EXTERNALApi(api_client)
+    create_c_serve_v2_deployment_request = platform_api_python_client.CreateCServeV2DeploymentRequest() # CreateCServeV2DeploymentRequest | 
+
+    try:
+        # Create Cserve V2 Deployment
+        api_response = api_instance.create_cserve_v2_deployment_deployments_cserve_v2_post(create_c_serve_v2_deployment_request)
+        print("The response of EXTERNALApi->create_cserve_v2_deployment_deployments_cserve_v2_post:\n")
+        pprint(api_response)
+    except Exception as e:
+        print("Exception when calling EXTERNALApi->create_cserve_v2_deployment_deployments_cserve_v2_post: %s\n" % e)
+```
+
+
+
+### Parameters
+
+
+Name | Type | Description  | Notes
+------------- | ------------- | ------------- | -------------
+ **create_c_serve_v2_deployment_request** | [**CreateCServeV2DeploymentRequest**](CreateCServeV2DeploymentRequest.md)|  | 
+
+### Return type
+
+[**CreateCServeV2DeploymentResponse**](CreateCServeV2DeploymentResponse.md)
+
+### Authorization
+
+[HTTPBearer](../README.md#HTTPBearer)
+
+### HTTP request headers
+
+ - **Content-Type**: application/json
+ - **Accept**: application/json
+
+### HTTP response details
+
+| Status code | Description | Response headers |
+|-------------|-------------|------------------|
+**200** | Successful Response |  -  |
+**422** | Validation Error |  -  |
+
+[[Back to top]](#) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to Model list]](../README.md#documentation-for-models) [[Back to README]](../README.md)
+
 # **create_inference_deployment_deployments_inference_post**
 > CreateInferenceDeploymentResponse create_inference_deployment_deployments_inference_post(create_inference_deployment_request)
 
@@ -497,6 +582,84 @@ Name | Type | Description  | Notes
 
 [[Back to top]](#) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to Model list]](../README.md#documentation-for-models) [[Back to README]](../README.md)
 
+# **create_rag_deployment_deployments_rag_post**
+> CreateRagDeploymentResponse create_rag_deployment_deployments_rag_post(create_rag_deployment_request)
+
+Create Rag Deployment
+
+### Example
+
+* Bearer Authentication (HTTPBearer):
+
+```python
+import platform_api_python_client
+from platform_api_python_client.models.create_rag_deployment_request import CreateRagDeploymentRequest
+from platform_api_python_client.models.create_rag_deployment_response import CreateRagDeploymentResponse
+from platform_api_python_client.rest import ApiException
+from pprint import pprint
+
+# Defining the host is optional and defaults to http://localhost
+# See configuration.py for a list of all supported configuration parameters.
+configuration = platform_api_python_client.Configuration(
+    host = "http://localhost"
+)
+
+# The client must configure the authentication and authorization parameters
+# in accordance with the API server security policy.
+# Examples for each auth method are provided below, use the example that
+# satisfies your auth use case.
+
+# Configure Bearer authorization: HTTPBearer
+configuration = platform_api_python_client.Configuration(
+    access_token = os.environ["BEARER_TOKEN"]
+)
+
+# Enter a context with an instance of the API client
+with platform_api_python_client.ApiClient(configuration) as api_client:
+    # Create an instance of the API class
+    api_instance = platform_api_python_client.EXTERNALApi(api_client)
+    create_rag_deployment_request = platform_api_python_client.CreateRagDeploymentRequest() # CreateRagDeploymentRequest | 
+
+    try:
+        # Create Rag Deployment
+        api_response = api_instance.create_rag_deployment_deployments_rag_post(create_rag_deployment_request)
+        print("The response of EXTERNALApi->create_rag_deployment_deployments_rag_post:\n")
+        pprint(api_response)
+    except Exception as e:
+        print("Exception when calling EXTERNALApi->create_rag_deployment_deployments_rag_post: %s\n" % e)
+```
+
+
+
+### Parameters
+
+
+Name | Type | Description  | Notes
+------------- | ------------- | ------------- | -------------
+ **create_rag_deployment_request** | [**CreateRagDeploymentRequest**](CreateRagDeploymentRequest.md)|  | 
+
+### Return type
+
+[**CreateRagDeploymentResponse**](CreateRagDeploymentResponse.md)
+
+### Authorization
+
+[HTTPBearer](../README.md#HTTPBearer)
+
+### HTTP request headers
+
+ - **Content-Type**: application/json
+ - **Accept**: application/json
+
+### HTTP response details
+
+| Status code | Description | Response headers |
+|-------------|-------------|------------------|
+**200** | Successful Response |  -  |
+**422** | Validation Error |  -  |
+
+[[Back to top]](#) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to Model list]](../README.md#documentation-for-models) [[Back to README]](../README.md)
+
 # **delete_api_key_credentials_api_key_id_delete**
 > object delete_api_key_credentials_api_key_id_delete(id)
 
@@ -573,6 +736,167 @@ Name | Type | Description  | Notes
 
 [[Back to top]](#) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to Model list]](../README.md#documentation-for-models) [[Back to README]](../README.md)
 
+# **delete_user_vault_item_endpoint_user_vault_delete**
+> object delete_user_vault_item_endpoint_user_vault_delete(user_vault_item_input)
+
+Delete User Vault Item Endpoint
+
+Delete an item of a specific type for the user.
+
+### Example
+
+* Bearer Authentication (HTTPBearer):
+
+```python
+import platform_api_python_client
+from platform_api_python_client.models.user_vault_item_input import UserVaultItemInput
+from platform_api_python_client.rest import ApiException
+from pprint import pprint
+
+# Defining the host is optional and defaults to http://localhost
+# See configuration.py for a list of all supported configuration parameters.
+configuration = platform_api_python_client.Configuration(
+    host = "http://localhost"
+)
+
+# The client must configure the authentication and authorization parameters
+# in accordance with the API server security policy.
+# Examples for each auth method are provided below, use the example that
+# satisfies your auth use case.
+
+# Configure Bearer authorization: HTTPBearer
+configuration = platform_api_python_client.Configuration(
+    access_token = os.environ["BEARER_TOKEN"]
+)
+
+# Enter a context with an instance of the API client
+with platform_api_python_client.ApiClient(configuration) as api_client:
+    # Create an instance of the API class
+    api_instance = platform_api_python_client.EXTERNALApi(api_client)
+    user_vault_item_input = platform_api_python_client.UserVaultItemInput() # UserVaultItemInput | 
+
+    try:
+        # Delete User Vault Item Endpoint
+        api_response = api_instance.delete_user_vault_item_endpoint_user_vault_delete(user_vault_item_input)
+        print("The response of EXTERNALApi->delete_user_vault_item_endpoint_user_vault_delete:\n")
+        pprint(api_response)
+    except Exception as e:
+        print("Exception when calling EXTERNALApi->delete_user_vault_item_endpoint_user_vault_delete: %s\n" % e)
+```
+
+
+
+### Parameters
+
+
+Name | Type | Description  | Notes
+------------- | ------------- | ------------- | -------------
+ **user_vault_item_input** | [**UserVaultItemInput**](UserVaultItemInput.md)|  | 
+
+### Return type
+
+**object**
+
+### Authorization
+
+[HTTPBearer](../README.md#HTTPBearer)
+
+### HTTP request headers
+
+ - **Content-Type**: application/json
+ - **Accept**: application/json
+
+### HTTP response details
+
+| Status code | Description | Response headers |
+|-------------|-------------|------------------|
+**200** | Successful Response |  -  |
+**422** | Validation Error |  -  |
+
+[[Back to top]](#) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to Model list]](../README.md#documentation-for-models) [[Back to README]](../README.md)
+
+# **get_all_user_vault_items_endpoint_user_vault_get**
+> ListUserVaultItemsResponse get_all_user_vault_items_endpoint_user_vault_get(type=type, search_query=search_query)
+
+Get All User Vault Items Endpoint
+
+Retrieve all items of a specific type for the user.
+
+### Example
+
+* Bearer Authentication (HTTPBearer):
+
+```python
+import platform_api_python_client
+from platform_api_python_client.models.list_user_vault_items_response import ListUserVaultItemsResponse
+from platform_api_python_client.models.user_vault_type import UserVaultType
+from platform_api_python_client.rest import ApiException
+from pprint import pprint
+
+# Defining the host is optional and defaults to http://localhost
+# See configuration.py for a list of all supported configuration parameters.
+configuration = platform_api_python_client.Configuration(
+    host = "http://localhost"
+)
+
+# The client must configure the authentication and authorization parameters
+# in accordance with the API server security policy.
+# Examples for each auth method are provided below, use the example that
+# satisfies your auth use case.
+
+# Configure Bearer authorization: HTTPBearer
+configuration = platform_api_python_client.Configuration(
+    access_token = os.environ["BEARER_TOKEN"]
+)
+
+# Enter a context with an instance of the API client
+with platform_api_python_client.ApiClient(configuration) as api_client:
+    # Create an instance of the API class
+    api_instance = platform_api_python_client.EXTERNALApi(api_client)
+    type = platform_api_python_client.UserVaultType() # UserVaultType |  (optional)
+    search_query = 'search_query_example' # str |  (optional)
+
+    try:
+        # Get All User Vault Items Endpoint
+        api_response = api_instance.get_all_user_vault_items_endpoint_user_vault_get(type=type, search_query=search_query)
+        print("The response of EXTERNALApi->get_all_user_vault_items_endpoint_user_vault_get:\n")
+        pprint(api_response)
+    except Exception as e:
+        print("Exception when calling EXTERNALApi->get_all_user_vault_items_endpoint_user_vault_get: %s\n" % e)
+```
+
+
+
+### Parameters
+
+
+Name | Type | Description  | Notes
+------------- | ------------- | ------------- | -------------
+ **type** | [**UserVaultType**](.md)|  | [optional] 
+ **search_query** | **str**|  | [optional] 
+
+### Return type
+
+[**ListUserVaultItemsResponse**](ListUserVaultItemsResponse.md)
+
+### Authorization
+
+[HTTPBearer](../README.md#HTTPBearer)
+
+### HTTP request headers
+
+ - **Content-Type**: Not defined
+ - **Accept**: application/json
+
+### HTTP response details
+
+| Status code | Description | Response headers |
+|-------------|-------------|------------------|
+**200** | Successful Response |  -  |
+**422** | Validation Error |  -  |
+
+[[Back to top]](#) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to Model list]](../README.md#documentation-for-models) [[Back to README]](../README.md)
+
 # **get_api_keys_credentials_api_key_get**
 > ListAPIKeyResponse get_api_keys_credentials_api_key_get()
 
@@ -1022,6 +1346,83 @@ Name | Type | Description  | Notes
 
 [[Back to top]](#) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to Model list]](../README.md#documentation-for-models) [[Back to README]](../README.md)
 
+# **get_cserve_v2_deployment_deployments_cserve_v2_deployment_id_get**
+> GetCServeV2DeploymentResponse get_cserve_v2_deployment_deployments_cserve_v2_deployment_id_get(deployment_id)
+
+Get Cserve V2 Deployment
+
+### Example
+
+* Bearer Authentication (HTTPBearer):
+
+```python
+import platform_api_python_client
+from platform_api_python_client.models.get_c_serve_v2_deployment_response import GetCServeV2DeploymentResponse
+from platform_api_python_client.rest import ApiException
+from pprint import pprint
+
+# Defining the host is optional and defaults to http://localhost
+# See configuration.py for a list of all supported configuration parameters.
+configuration = platform_api_python_client.Configuration(
+    host = "http://localhost"
+)
+
+# The client must configure the authentication and authorization parameters
+# in accordance with the API server security policy.
+# Examples for each auth method are provided below, use the example that
+# satisfies your auth use case.
+
+# Configure Bearer authorization: HTTPBearer
+configuration = platform_api_python_client.Configuration(
+    access_token = os.environ["BEARER_TOKEN"]
+)
+
+# Enter a context with an instance of the API client
+with platform_api_python_client.ApiClient(configuration) as api_client:
+    # Create an instance of the API class
+    api_instance = platform_api_python_client.EXTERNALApi(api_client)
+    deployment_id = 56 # int | 
+
+    try:
+        # Get Cserve V2 Deployment
+        api_response = api_instance.get_cserve_v2_deployment_deployments_cserve_v2_deployment_id_get(deployment_id)
+        print("The response of EXTERNALApi->get_cserve_v2_deployment_deployments_cserve_v2_deployment_id_get:\n")
+        pprint(api_response)
+    except Exception as e:
+        print("Exception when calling EXTERNALApi->get_cserve_v2_deployment_deployments_cserve_v2_deployment_id_get: %s\n" % e)
+```
+
+
+
+### Parameters
+
+
+Name | Type | Description  | Notes
+------------- | ------------- | ------------- | -------------
+ **deployment_id** | **int**|  | 
+
+### Return type
+
+[**GetCServeV2DeploymentResponse**](GetCServeV2DeploymentResponse.md)
+
+### Authorization
+
+[HTTPBearer](../README.md#HTTPBearer)
+
+### HTTP request headers
+
+ - **Content-Type**: Not defined
+ - **Accept**: application/json
+
+### HTTP response details
+
+| Status code | Description | Response headers |
+|-------------|-------------|------------------|
+**200** | Successful Response |  -  |
+**422** | Validation Error |  -  |
+
+[[Back to top]](#) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to Model list]](../README.md#documentation-for-models) [[Back to README]](../README.md)
+
 # **get_deployment_logs_deployments_logs_deployment_id_get**
 > GetDeploymentLogResponse get_deployment_logs_deployments_logs_deployment_id_get(deployment_id, start_time, end_time, next_page_token=next_page_token)
 
@@ -1267,7 +1668,7 @@ Name | Type | Description  | Notes
 [[Back to top]](#) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to Model list]](../README.md#documentation-for-models) [[Back to README]](../README.md)
 
 # **get_hardware_instances_hardware_instances_get**
-> ListHardwareInstanceResponse get_hardware_instances_hardware_instances_get(cluster_id)
+> ListHardwareInstanceResponse get_hardware_instances_hardware_instances_get(cluster_id=cluster_id)
 
 Get Hardware Instances
 
@@ -1301,11 +1702,11 @@ configuration = platform_api_python_client.Configuration(
 with platform_api_python_client.ApiClient(configuration) as api_client:
     # Create an instance of the API class
     api_instance = platform_api_python_client.EXTERNALApi(api_client)
-    cluster_id = 56 # int | 
+    cluster_id = 56 # int |  (optional)
 
     try:
         # Get Hardware Instances
-        api_response = api_instance.get_hardware_instances_hardware_instances_get(cluster_id)
+        api_response = api_instance.get_hardware_instances_hardware_instances_get(cluster_id=cluster_id)
         print("The response of EXTERNALApi->get_hardware_instances_hardware_instances_get:\n")
         pprint(api_response)
     except Exception as e:
@@ -1319,7 +1720,7 @@ with platform_api_python_client.ApiClient(configuration) as api_client:
 
 Name | Type | Description  | Notes
 ------------- | ------------- | ------------- | -------------
- **cluster_id** | **int**|  | 
+ **cluster_id** | **int**|  | [optional] 
 
 ### Return type
 
@@ -1577,6 +1978,83 @@ Name | Type | Description  | Notes
 
 [[Back to top]](#) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to Model list]](../README.md#documentation-for-models) [[Back to README]](../README.md)
 
+# **get_rag_deployment_deployments_rag_deployment_id_get**
+> GetRagDeploymentResponse get_rag_deployment_deployments_rag_deployment_id_get(deployment_id)
+
+Get Rag Deployment
+
+### Example
+
+* Bearer Authentication (HTTPBearer):
+
+```python
+import platform_api_python_client
+from platform_api_python_client.models.get_rag_deployment_response import GetRagDeploymentResponse
+from platform_api_python_client.rest import ApiException
+from pprint import pprint
+
+# Defining the host is optional and defaults to http://localhost
+# See configuration.py for a list of all supported configuration parameters.
+configuration = platform_api_python_client.Configuration(
+    host = "http://localhost"
+)
+
+# The client must configure the authentication and authorization parameters
+# in accordance with the API server security policy.
+# Examples for each auth method are provided below, use the example that
+# satisfies your auth use case.
+
+# Configure Bearer authorization: HTTPBearer
+configuration = platform_api_python_client.Configuration(
+    access_token = os.environ["BEARER_TOKEN"]
+)
+
+# Enter a context with an instance of the API client
+with platform_api_python_client.ApiClient(configuration) as api_client:
+    # Create an instance of the API class
+    api_instance = platform_api_python_client.EXTERNALApi(api_client)
+    deployment_id = 56 # int | 
+
+    try:
+        # Get Rag Deployment
+        api_response = api_instance.get_rag_deployment_deployments_rag_deployment_id_get(deployment_id)
+        print("The response of EXTERNALApi->get_rag_deployment_deployments_rag_deployment_id_get:\n")
+        pprint(api_response)
+    except Exception as e:
+        print("Exception when calling EXTERNALApi->get_rag_deployment_deployments_rag_deployment_id_get: %s\n" % e)
+```
+
+
+
+### Parameters
+
+
+Name | Type | Description  | Notes
+------------- | ------------- | ------------- | -------------
+ **deployment_id** | **int**|  | 
+
+### Return type
+
+[**GetRagDeploymentResponse**](GetRagDeploymentResponse.md)
+
+### Authorization
+
+[HTTPBearer](../README.md#HTTPBearer)
+
+### HTTP request headers
+
+ - **Content-Type**: Not defined
+ - **Accept**: application/json
+
+### HTTP response details
+
+| Status code | Description | Response headers |
+|-------------|-------------|------------------|
+**200** | Successful Response |  -  |
+**422** | Validation Error |  -  |
+
+[[Back to top]](#) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to Model list]](../README.md#documentation-for-models) [[Back to README]](../README.md)
+
 # **get_usage_daily_bills_get**
 > ListDailyBillResponse get_usage_daily_bills_get(start_date, end_date)
 
@@ -1891,3 +2369,82 @@ Name | Type | Description  | Notes
 
 [[Back to top]](#) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to Model list]](../README.md#documentation-for-models) [[Back to README]](../README.md)
 
+# **update_user_vault_item_endpoint_user_vault_put**
+> object update_user_vault_item_endpoint_user_vault_put(user_vault_item_input)
+
+Update User Vault Item Endpoint
+
+Update or add multiple items of a specific type for the user.
+
+### Example
+
+* Bearer Authentication (HTTPBearer):
+
+```python
+import platform_api_python_client
+from platform_api_python_client.models.user_vault_item_input import UserVaultItemInput
+from platform_api_python_client.rest import ApiException
+from pprint import pprint
+
+# Defining the host is optional and defaults to http://localhost
+# See configuration.py for a list of all supported configuration parameters.
+configuration = platform_api_python_client.Configuration(
+    host = "http://localhost"
+)
+
+# The client must configure the authentication and authorization parameters
+# in accordance with the API server security policy.
+# Examples for each auth method are provided below, use the example that
+# satisfies your auth use case.
+
+# Configure Bearer authorization: HTTPBearer
+configuration = platform_api_python_client.Configuration(
+    access_token = os.environ["BEARER_TOKEN"]
+)
+
+# Enter a context with an instance of the API client
+with platform_api_python_client.ApiClient(configuration) as api_client:
+    # Create an instance of the API class
+    api_instance = platform_api_python_client.EXTERNALApi(api_client)
+    user_vault_item_input = platform_api_python_client.UserVaultItemInput() # UserVaultItemInput | 
+
+    try:
+        # Update User Vault Item Endpoint
+        api_response = api_instance.update_user_vault_item_endpoint_user_vault_put(user_vault_item_input)
+        print("The response of EXTERNALApi->update_user_vault_item_endpoint_user_vault_put:\n")
+        pprint(api_response)
+    except Exception as e:
+        print("Exception when calling EXTERNALApi->update_user_vault_item_endpoint_user_vault_put: %s\n" % e)
+```
+
+
+
+### Parameters
+
+
+Name | Type | Description  | Notes
+------------- | ------------- | ------------- | -------------
+ **user_vault_item_input** | [**UserVaultItemInput**](UserVaultItemInput.md)|  | 
+
+### Return type
+
+**object**
+
+### Authorization
+
+[HTTPBearer](../README.md#HTTPBearer)
+
+### HTTP request headers
+
+ - **Content-Type**: application/json
+ - **Accept**: application/json
+
+### HTTP response details
+
+| Status code | Description | Response headers |
+|-------------|-------------|------------------|
+**200** | Successful Response |  -  |
+**422** | Validation Error |  -  |
+
+[[Back to top]](#) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to Model list]](../README.md#documentation-for-models) [[Back to README]](../README.md)
+
diff --git a/docs/GetCServeDeploymentResponse.md b/docs/GetCServeDeploymentResponse.md
index 06d62b3..f6ca201 100644
--- a/docs/GetCServeDeploymentResponse.md
+++ b/docs/GetCServeDeploymentResponse.md
@@ -13,8 +13,12 @@ Name | Type | Description | Notes
 **swap_space** | **int** |  | [default to 0]
 **gpu_mem_util** | **float** |  | [default to 0.95]
 **max_num_seqs** | **int** |  | [default to 256]
-**use_prefix_caching** | **bool** |  | 
 **offloading_num** | **int** |  | [default to 0]
+**use_prefix_caching** | **bool** |  | 
+**use_chunked_prefill** | **bool** |  | 
+**chunked_prefill_size** | **int** |  | 
+**eager_execution** | **bool** |  | 
+**num_scheduler_steps** | **int** |  | 
 **use_flashinfer** | **bool** |  | [default to False]
 **max_model_len** | **int** |  | 
 **dtype** | **str** |  | [default to 'auto']
diff --git a/docs/GetCServeV2DeploymentResponse.md b/docs/GetCServeV2DeploymentResponse.md
new file mode 100644
index 0000000..660be09
--- /dev/null
+++ b/docs/GetCServeV2DeploymentResponse.md
@@ -0,0 +1,43 @@
+# GetCServeV2DeploymentResponse
+
+
+## Properties
+
+Name | Type | Description | Notes
+------------ | ------------- | ------------- | -------------
+**cluster_id** | **int** |  | 
+**id** | **int** |  | 
+**name** | **str** |  | 
+**endpoint_url** | **str** |  | 
+**image_url** | **str** |  | 
+**type** | [**DeploymentType**](DeploymentType.md) |  | 
+**status** | [**DeploymentStatus**](DeploymentStatus.md) |  | 
+**created_at** | **datetime** |  | 
+**hardware_instance_id** | **int** |  | 
+**recipe** | [**CServeV2RecipeOutput**](CServeV2RecipeOutput.md) |  | 
+**min_scale** | **int** |  | 
+**max_scale** | **int** |  | 
+**endpoint_certificate_authority** | **str** |  | 
+**concurrency** | **int** |  | 
+**env_vars** | **Dict[str, str]** |  | 
+
+## Example
+
+```python
+from platform_api_python_client.models.get_c_serve_v2_deployment_response import GetCServeV2DeploymentResponse
+
+# TODO update the JSON string below
+json = "{}"
+# create an instance of GetCServeV2DeploymentResponse from a JSON string
+get_c_serve_v2_deployment_response_instance = GetCServeV2DeploymentResponse.from_json(json)
+# print the JSON string representation of the object
+print(GetCServeV2DeploymentResponse.to_json())
+
+# convert the object into a dict
+get_c_serve_v2_deployment_response_dict = get_c_serve_v2_deployment_response_instance.to_dict()
+# create an instance of GetCServeV2DeploymentResponse from a dict
+get_c_serve_v2_deployment_response_from_dict = GetCServeV2DeploymentResponse.from_dict(get_c_serve_v2_deployment_response_dict)
+```
+[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md)
+
+
diff --git a/docs/GetClusterResponse.md b/docs/GetClusterResponse.md
index dfe90e9..a18bbc3 100644
--- a/docs/GetClusterResponse.md
+++ b/docs/GetClusterResponse.md
@@ -7,6 +7,7 @@ Name | Type | Description | Notes
 ------------ | ------------- | ------------- | -------------
 **id** | **int** |  | 
 **display_name** | **str** |  | 
+**region** | **str** |  | 
 
 ## Example
 
diff --git a/docs/GetRagDeploymentResponse.md b/docs/GetRagDeploymentResponse.md
new file mode 100644
index 0000000..2a9f99b
--- /dev/null
+++ b/docs/GetRagDeploymentResponse.md
@@ -0,0 +1,45 @@
+# GetRagDeploymentResponse
+
+
+## Properties
+
+Name | Type | Description | Notes
+------------ | ------------- | ------------- | -------------
+**cluster_id** | **int** |  | 
+**id** | **int** |  | 
+**name** | **str** |  | 
+**endpoint_url** | **str** |  | 
+**image_url** | **str** |  | 
+**type** | [**DeploymentType**](DeploymentType.md) |  | 
+**status** | [**DeploymentStatus**](DeploymentStatus.md) |  | 
+**created_at** | **datetime** |  | 
+**hardware_instance_id** | **int** |  | 
+**recipe** | [**CServeV2RecipeOutput**](CServeV2RecipeOutput.md) |  | 
+**llm_model** | **str** |  | 
+**centml_api_key** | **str** |  | 
+**min_scale** | **int** |  | [default to 1]
+**max_scale** | **int** |  | [default to 1]
+**endpoint_certificate_authority** | **str** |  | 
+**concurrency** | **int** |  | 
+**env_vars** | **Dict[str, str]** |  | 
+
+## Example
+
+```python
+from platform_api_python_client.models.get_rag_deployment_response import GetRagDeploymentResponse
+
+# TODO update the JSON string below
+json = "{}"
+# create an instance of GetRagDeploymentResponse from a JSON string
+get_rag_deployment_response_instance = GetRagDeploymentResponse.from_json(json)
+# print the JSON string representation of the object
+print(GetRagDeploymentResponse.to_json())
+
+# convert the object into a dict
+get_rag_deployment_response_dict = get_rag_deployment_response_instance.to_dict()
+# create an instance of GetRagDeploymentResponse from a dict
+get_rag_deployment_response_from_dict = GetRagDeploymentResponse.from_dict(get_rag_deployment_response_dict)
+```
+[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md)
+
+
diff --git a/docs/HardwareInstanceResponse.md b/docs/HardwareInstanceResponse.md
index 8da7205..137c575 100644
--- a/docs/HardwareInstanceResponse.md
+++ b/docs/HardwareInstanceResponse.md
@@ -12,6 +12,9 @@ Name | Type | Description | Notes
 **cpu** | **int** |  | 
 **memory** | **int** |  | 
 **cost_per_hr** | **int** |  | 
+**cluster_id** | **int** |  | 
+**provider** | **str** |  | 
+**num_accelerators** | **int** |  | 
 
 ## Example
 
diff --git a/docs/ListUserVaultItemsResponse.md b/docs/ListUserVaultItemsResponse.md
new file mode 100644
index 0000000..f817e0e
--- /dev/null
+++ b/docs/ListUserVaultItemsResponse.md
@@ -0,0 +1,29 @@
+# ListUserVaultItemsResponse
+
+
+## Properties
+
+Name | Type | Description | Notes
+------------ | ------------- | ------------- | -------------
+**results** | [**List[UserVaultItemOutput]**](UserVaultItemOutput.md) |  | 
+
+## Example
+
+```python
+from platform_api_python_client.models.list_user_vault_items_response import ListUserVaultItemsResponse
+
+# TODO update the JSON string below
+json = "{}"
+# create an instance of ListUserVaultItemsResponse from a JSON string
+list_user_vault_items_response_instance = ListUserVaultItemsResponse.from_json(json)
+# print the JSON string representation of the object
+print(ListUserVaultItemsResponse.to_json())
+
+# convert the object into a dict
+list_user_vault_items_response_dict = list_user_vault_items_response_instance.to_dict()
+# create an instance of ListUserVaultItemsResponse from a dict
+list_user_vault_items_response_from_dict = ListUserVaultItemsResponse.from_dict(list_user_vault_items_response_dict)
+```
+[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md)
+
+
diff --git a/docs/UserSupportEmailRequest.md b/docs/UserSupportEmailRequest.md
index e3a48d7..bb87b94 100644
--- a/docs/UserSupportEmailRequest.md
+++ b/docs/UserSupportEmailRequest.md
@@ -7,6 +7,7 @@ Name | Type | Description | Notes
 ------------ | ------------- | ------------- | -------------
 **message** | **str** |  | 
 **subject** | **str** |  | 
+**send_to_sales** | **bool** |  | 
 
 ## Example
 
diff --git a/docs/UserVaultItemInput.md b/docs/UserVaultItemInput.md
new file mode 100644
index 0000000..6733c27
--- /dev/null
+++ b/docs/UserVaultItemInput.md
@@ -0,0 +1,31 @@
+# UserVaultItemInput
+
+
+## Properties
+
+Name | Type | Description | Notes
+------------ | ------------- | ------------- | -------------
+**type** | [**UserVaultType**](UserVaultType.md) |  | 
+**key** | **str** |  | 
+**value** | **str** |  | [optional] 
+
+## Example
+
+```python
+from platform_api_python_client.models.user_vault_item_input import UserVaultItemInput
+
+# TODO update the JSON string below
+json = "{}"
+# create an instance of UserVaultItemInput from a JSON string
+user_vault_item_input_instance = UserVaultItemInput.from_json(json)
+# print the JSON string representation of the object
+print(UserVaultItemInput.to_json())
+
+# convert the object into a dict
+user_vault_item_input_dict = user_vault_item_input_instance.to_dict()
+# create an instance of UserVaultItemInput from a dict
+user_vault_item_input_from_dict = UserVaultItemInput.from_dict(user_vault_item_input_dict)
+```
+[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md)
+
+
diff --git a/docs/UserVaultItemOutput.md b/docs/UserVaultItemOutput.md
new file mode 100644
index 0000000..9ce5e0b
--- /dev/null
+++ b/docs/UserVaultItemOutput.md
@@ -0,0 +1,31 @@
+# UserVaultItemOutput
+
+
+## Properties
+
+Name | Type | Description | Notes
+------------ | ------------- | ------------- | -------------
+**type** | [**UserVaultType**](UserVaultType.md) |  | 
+**key** | **str** |  | 
+**value** | **str** |  | 
+
+## Example
+
+```python
+from platform_api_python_client.models.user_vault_item_output import UserVaultItemOutput
+
+# TODO update the JSON string below
+json = "{}"
+# create an instance of UserVaultItemOutput from a JSON string
+user_vault_item_output_instance = UserVaultItemOutput.from_json(json)
+# print the JSON string representation of the object
+print(UserVaultItemOutput.to_json())
+
+# convert the object into a dict
+user_vault_item_output_dict = user_vault_item_output_instance.to_dict()
+# create an instance of UserVaultItemOutput from a dict
+user_vault_item_output_from_dict = UserVaultItemOutput.from_dict(user_vault_item_output_dict)
+```
+[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md)
+
+
diff --git a/docs/UserVaultType.md b/docs/UserVaultType.md
new file mode 100644
index 0000000..2c74e78
--- /dev/null
+++ b/docs/UserVaultType.md
@@ -0,0 +1,16 @@
+# UserVaultType
+
+
+## Enum
+
+* `ENV_VARS` (value: `'env_vars'`)
+
+* `SSH_KEYS` (value: `'ssh_keys'`)
+
+* `ACCESS_TOKENS` (value: `'access_tokens'`)
+
+* `CERTIFICATES` (value: `'certificates'`)
+
+[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md)
+
+
diff --git a/platform_api_python_client/__init__.py b/platform_api_python_client/__init__.py
index 69e913e..db4014f 100644
--- a/platform_api_python_client/__init__.py
+++ b/platform_api_python_client/__init__.py
@@ -14,7 +14,7 @@
 """  # noqa: E501
 
 
-__version__ = "0.3.1"
+__version__ = "3.1.6"
 
 # import apis into sdk package
 from platform_api_python_client.api.external_api import EXTERNALApi
@@ -33,18 +33,23 @@
 # import models into sdk package
 from platform_api_python_client.models.api_key_request import APIKeyRequest
 from platform_api_python_client.models.api_key_response import APIKeyResponse
-from platform_api_python_client.models.c_serve_recipe_input import CServeRecipeInput
-from platform_api_python_client.models.c_serve_recipe_output import CServeRecipeOutput
+from platform_api_python_client.models.c_serve_recipe import CServeRecipe
 from platform_api_python_client.models.c_serve_recipe_perf import CServeRecipePerf
 from platform_api_python_client.models.c_serve_recipe_response import CServeRecipeResponse
+from platform_api_python_client.models.c_serve_v2_recipe_input import CServeV2RecipeInput
+from platform_api_python_client.models.c_serve_v2_recipe_output import CServeV2RecipeOutput
 from platform_api_python_client.models.create_c_serve_deployment_request import CreateCServeDeploymentRequest
 from platform_api_python_client.models.create_c_serve_deployment_response import CreateCServeDeploymentResponse
+from platform_api_python_client.models.create_c_serve_v2_deployment_request import CreateCServeV2DeploymentRequest
+from platform_api_python_client.models.create_c_serve_v2_deployment_response import CreateCServeV2DeploymentResponse
 from platform_api_python_client.models.create_checkout_request import CreateCheckoutRequest
 from platform_api_python_client.models.create_checkout_response import CreateCheckoutResponse
 from platform_api_python_client.models.create_compute_deployment_request import CreateComputeDeploymentRequest
 from platform_api_python_client.models.create_compute_deployment_response import CreateComputeDeploymentResponse
 from platform_api_python_client.models.create_inference_deployment_request import CreateInferenceDeploymentRequest
 from platform_api_python_client.models.create_inference_deployment_response import CreateInferenceDeploymentResponse
+from platform_api_python_client.models.create_rag_deployment_request import CreateRagDeploymentRequest
+from platform_api_python_client.models.create_rag_deployment_response import CreateRagDeploymentResponse
 from platform_api_python_client.models.credits_response import CreditsResponse
 from platform_api_python_client.models.daily_bill_response import DailyBillResponse
 from platform_api_python_client.models.deployment_status import DeploymentStatus
@@ -53,6 +58,7 @@
 from platform_api_python_client.models.deployment_type import DeploymentType
 from platform_api_python_client.models.deployment_usage_value import DeploymentUsageValue
 from platform_api_python_client.models.get_c_serve_deployment_response import GetCServeDeploymentResponse
+from platform_api_python_client.models.get_c_serve_v2_deployment_response import GetCServeV2DeploymentResponse
 from platform_api_python_client.models.get_cluster_response import GetClusterResponse
 from platform_api_python_client.models.get_compute_deployment_response import GetComputeDeploymentResponse
 from platform_api_python_client.models.get_deployment_log_response import GetDeploymentLogResponse
@@ -60,6 +66,7 @@
 from platform_api_python_client.models.get_deployment_usage_response import GetDeploymentUsageResponse
 from platform_api_python_client.models.get_inference_deployment_response import GetInferenceDeploymentResponse
 from platform_api_python_client.models.get_payments_response import GetPaymentsResponse
+from platform_api_python_client.models.get_rag_deployment_response import GetRagDeploymentResponse
 from platform_api_python_client.models.http_validation_error import HTTPValidationError
 from platform_api_python_client.models.hardware_instance_response import HardwareInstanceResponse
 from platform_api_python_client.models.list_api_key_response import ListAPIKeyResponse
@@ -69,9 +76,13 @@
 from platform_api_python_client.models.list_get_deployment_response import ListGetDeploymentResponse
 from platform_api_python_client.models.list_hardware_instance_response import ListHardwareInstanceResponse
 from platform_api_python_client.models.list_prebuilt_image_response import ListPrebuiltImageResponse
+from platform_api_python_client.models.list_user_vault_items_response import ListUserVaultItemsResponse
 from platform_api_python_client.models.metric import Metric
 from platform_api_python_client.models.prebuilt_image_response import PrebuiltImageResponse
 from platform_api_python_client.models.service_status import ServiceStatus
 from platform_api_python_client.models.user_support_email_request import UserSupportEmailRequest
+from platform_api_python_client.models.user_vault_item_input import UserVaultItemInput
+from platform_api_python_client.models.user_vault_item_output import UserVaultItemOutput
+from platform_api_python_client.models.user_vault_type import UserVaultType
 from platform_api_python_client.models.validation_error import ValidationError
 from platform_api_python_client.models.validation_error_loc_inner import ValidationErrorLocInner
diff --git a/platform_api_python_client/api/external_api.py b/platform_api_python_client/api/external_api.py
index ec8c35e..b4e6f6c 100644
--- a/platform_api_python_client/api/external_api.py
+++ b/platform_api_python_client/api/external_api.py
@@ -23,22 +23,28 @@
 from platform_api_python_client.models.api_key_response import APIKeyResponse
 from platform_api_python_client.models.create_c_serve_deployment_request import CreateCServeDeploymentRequest
 from platform_api_python_client.models.create_c_serve_deployment_response import CreateCServeDeploymentResponse
+from platform_api_python_client.models.create_c_serve_v2_deployment_request import CreateCServeV2DeploymentRequest
+from platform_api_python_client.models.create_c_serve_v2_deployment_response import CreateCServeV2DeploymentResponse
 from platform_api_python_client.models.create_checkout_request import CreateCheckoutRequest
 from platform_api_python_client.models.create_checkout_response import CreateCheckoutResponse
 from platform_api_python_client.models.create_compute_deployment_request import CreateComputeDeploymentRequest
 from platform_api_python_client.models.create_compute_deployment_response import CreateComputeDeploymentResponse
 from platform_api_python_client.models.create_inference_deployment_request import CreateInferenceDeploymentRequest
 from platform_api_python_client.models.create_inference_deployment_response import CreateInferenceDeploymentResponse
+from platform_api_python_client.models.create_rag_deployment_request import CreateRagDeploymentRequest
+from platform_api_python_client.models.create_rag_deployment_response import CreateRagDeploymentResponse
 from platform_api_python_client.models.credits_response import CreditsResponse
 from platform_api_python_client.models.deployment_status_request import DeploymentStatusRequest
 from platform_api_python_client.models.deployment_status_response import DeploymentStatusResponse
 from platform_api_python_client.models.deployment_type import DeploymentType
 from platform_api_python_client.models.get_c_serve_deployment_response import GetCServeDeploymentResponse
+from platform_api_python_client.models.get_c_serve_v2_deployment_response import GetCServeV2DeploymentResponse
 from platform_api_python_client.models.get_compute_deployment_response import GetComputeDeploymentResponse
 from platform_api_python_client.models.get_deployment_log_response import GetDeploymentLogResponse
 from platform_api_python_client.models.get_deployment_usage_response import GetDeploymentUsageResponse
 from platform_api_python_client.models.get_inference_deployment_response import GetInferenceDeploymentResponse
 from platform_api_python_client.models.get_payments_response import GetPaymentsResponse
+from platform_api_python_client.models.get_rag_deployment_response import GetRagDeploymentResponse
 from platform_api_python_client.models.list_api_key_response import ListAPIKeyResponse
 from platform_api_python_client.models.list_c_serve_recipe_response import ListCServeRecipeResponse
 from platform_api_python_client.models.list_daily_bill_response import ListDailyBillResponse
@@ -46,8 +52,11 @@
 from platform_api_python_client.models.list_get_deployment_response import ListGetDeploymentResponse
 from platform_api_python_client.models.list_hardware_instance_response import ListHardwareInstanceResponse
 from platform_api_python_client.models.list_prebuilt_image_response import ListPrebuiltImageResponse
+from platform_api_python_client.models.list_user_vault_items_response import ListUserVaultItemsResponse
 from platform_api_python_client.models.metric import Metric
 from platform_api_python_client.models.user_support_email_request import UserSupportEmailRequest
+from platform_api_python_client.models.user_vault_item_input import UserVaultItemInput
+from platform_api_python_client.models.user_vault_type import UserVaultType
 
 from platform_api_python_client.api_client import ApiClient, RequestSerialized
 from platform_api_python_client.api_response import ApiResponse
@@ -1438,9 +1447,9 @@ def _create_cserve_deployment_deployments_cserve_post_serialize(
 
 
     @validate_call
-    def create_inference_deployment_deployments_inference_post(
+    def create_cserve_v2_deployment_deployments_cserve_v2_post(
         self,
-        create_inference_deployment_request: CreateInferenceDeploymentRequest,
+        create_c_serve_v2_deployment_request: CreateCServeV2DeploymentRequest,
         _request_timeout: Union[
             None,
             Annotated[StrictFloat, Field(gt=0)],
@@ -1453,12 +1462,12 @@ def create_inference_deployment_deployments_inference_post(
         _content_type: Optional[StrictStr] = None,
         _headers: Optional[Dict[StrictStr, Any]] = None,
         _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
-    ) -> CreateInferenceDeploymentResponse:
-        """Create Inference Deployment
+    ) -> CreateCServeV2DeploymentResponse:
+        """Create Cserve V2 Deployment
 
 
-        :param create_inference_deployment_request: (required)
-        :type create_inference_deployment_request: CreateInferenceDeploymentRequest
+        :param create_c_serve_v2_deployment_request: (required)
+        :type create_c_serve_v2_deployment_request: CreateCServeV2DeploymentRequest
         :param _request_timeout: timeout setting for this request. If one
                                  number provided, it will be total request
                                  timeout. It can also be a pair (tuple) of
@@ -1481,8 +1490,8 @@ def create_inference_deployment_deployments_inference_post(
         :return: Returns the result object.
         """ # noqa: E501
 
-        _param = self._create_inference_deployment_deployments_inference_post_serialize(
-            create_inference_deployment_request=create_inference_deployment_request,
+        _param = self._create_cserve_v2_deployment_deployments_cserve_v2_post_serialize(
+            create_c_serve_v2_deployment_request=create_c_serve_v2_deployment_request,
             _request_auth=_request_auth,
             _content_type=_content_type,
             _headers=_headers,
@@ -1490,7 +1499,7 @@ def create_inference_deployment_deployments_inference_post(
         )
 
         _response_types_map: Dict[str, Optional[str]] = {
-            '200': "CreateInferenceDeploymentResponse",
+            '200': "CreateCServeV2DeploymentResponse",
             '422': "HTTPValidationError",
         }
         response_data = self.api_client.call_api(
@@ -1505,9 +1514,9 @@ def create_inference_deployment_deployments_inference_post(
 
 
     @validate_call
-    def create_inference_deployment_deployments_inference_post_with_http_info(
+    def create_cserve_v2_deployment_deployments_cserve_v2_post_with_http_info(
         self,
-        create_inference_deployment_request: CreateInferenceDeploymentRequest,
+        create_c_serve_v2_deployment_request: CreateCServeV2DeploymentRequest,
         _request_timeout: Union[
             None,
             Annotated[StrictFloat, Field(gt=0)],
@@ -1520,12 +1529,12 @@ def create_inference_deployment_deployments_inference_post_with_http_info(
         _content_type: Optional[StrictStr] = None,
         _headers: Optional[Dict[StrictStr, Any]] = None,
         _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
-    ) -> ApiResponse[CreateInferenceDeploymentResponse]:
-        """Create Inference Deployment
+    ) -> ApiResponse[CreateCServeV2DeploymentResponse]:
+        """Create Cserve V2 Deployment
 
 
-        :param create_inference_deployment_request: (required)
-        :type create_inference_deployment_request: CreateInferenceDeploymentRequest
+        :param create_c_serve_v2_deployment_request: (required)
+        :type create_c_serve_v2_deployment_request: CreateCServeV2DeploymentRequest
         :param _request_timeout: timeout setting for this request. If one
                                  number provided, it will be total request
                                  timeout. It can also be a pair (tuple) of
@@ -1548,8 +1557,8 @@ def create_inference_deployment_deployments_inference_post_with_http_info(
         :return: Returns the result object.
         """ # noqa: E501
 
-        _param = self._create_inference_deployment_deployments_inference_post_serialize(
-            create_inference_deployment_request=create_inference_deployment_request,
+        _param = self._create_cserve_v2_deployment_deployments_cserve_v2_post_serialize(
+            create_c_serve_v2_deployment_request=create_c_serve_v2_deployment_request,
             _request_auth=_request_auth,
             _content_type=_content_type,
             _headers=_headers,
@@ -1557,7 +1566,7 @@ def create_inference_deployment_deployments_inference_post_with_http_info(
         )
 
         _response_types_map: Dict[str, Optional[str]] = {
-            '200': "CreateInferenceDeploymentResponse",
+            '200': "CreateCServeV2DeploymentResponse",
             '422': "HTTPValidationError",
         }
         response_data = self.api_client.call_api(
@@ -1572,9 +1581,9 @@ def create_inference_deployment_deployments_inference_post_with_http_info(
 
 
     @validate_call
-    def create_inference_deployment_deployments_inference_post_without_preload_content(
+    def create_cserve_v2_deployment_deployments_cserve_v2_post_without_preload_content(
         self,
-        create_inference_deployment_request: CreateInferenceDeploymentRequest,
+        create_c_serve_v2_deployment_request: CreateCServeV2DeploymentRequest,
         _request_timeout: Union[
             None,
             Annotated[StrictFloat, Field(gt=0)],
@@ -1588,11 +1597,11 @@ def create_inference_deployment_deployments_inference_post_without_preload_conte
         _headers: Optional[Dict[StrictStr, Any]] = None,
         _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
     ) -> RESTResponseType:
-        """Create Inference Deployment
+        """Create Cserve V2 Deployment
 
 
-        :param create_inference_deployment_request: (required)
-        :type create_inference_deployment_request: CreateInferenceDeploymentRequest
+        :param create_c_serve_v2_deployment_request: (required)
+        :type create_c_serve_v2_deployment_request: CreateCServeV2DeploymentRequest
         :param _request_timeout: timeout setting for this request. If one
                                  number provided, it will be total request
                                  timeout. It can also be a pair (tuple) of
@@ -1615,8 +1624,8 @@ def create_inference_deployment_deployments_inference_post_without_preload_conte
         :return: Returns the result object.
         """ # noqa: E501
 
-        _param = self._create_inference_deployment_deployments_inference_post_serialize(
-            create_inference_deployment_request=create_inference_deployment_request,
+        _param = self._create_cserve_v2_deployment_deployments_cserve_v2_post_serialize(
+            create_c_serve_v2_deployment_request=create_c_serve_v2_deployment_request,
             _request_auth=_request_auth,
             _content_type=_content_type,
             _headers=_headers,
@@ -1624,7 +1633,7 @@ def create_inference_deployment_deployments_inference_post_without_preload_conte
         )
 
         _response_types_map: Dict[str, Optional[str]] = {
-            '200': "CreateInferenceDeploymentResponse",
+            '200': "CreateCServeV2DeploymentResponse",
             '422': "HTTPValidationError",
         }
         response_data = self.api_client.call_api(
@@ -1634,9 +1643,9 @@ def create_inference_deployment_deployments_inference_post_without_preload_conte
         return response_data.response
 
 
-    def _create_inference_deployment_deployments_inference_post_serialize(
+    def _create_cserve_v2_deployment_deployments_cserve_v2_post_serialize(
         self,
-        create_inference_deployment_request,
+        create_c_serve_v2_deployment_request,
         _request_auth,
         _content_type,
         _headers,
@@ -1662,8 +1671,8 @@ def _create_inference_deployment_deployments_inference_post_serialize(
         # process the header parameters
         # process the form parameters
         # process the body parameter
-        if create_inference_deployment_request is not None:
-            _body_params = create_inference_deployment_request
+        if create_c_serve_v2_deployment_request is not None:
+            _body_params = create_c_serve_v2_deployment_request
 
 
         # set the HTTP header `Accept`
@@ -1695,7 +1704,7 @@ def _create_inference_deployment_deployments_inference_post_serialize(
 
         return self.api_client.param_serialize(
             method='POST',
-            resource_path='/deployments/inference',
+            resource_path='/deployments/cserve_v2',
             path_params=_path_params,
             query_params=_query_params,
             header_params=_header_params,
@@ -1712,9 +1721,9 @@ def _create_inference_deployment_deployments_inference_post_serialize(
 
 
     @validate_call
-    def delete_api_key_credentials_api_key_id_delete(
+    def create_inference_deployment_deployments_inference_post(
         self,
-        id: StrictStr,
+        create_inference_deployment_request: CreateInferenceDeploymentRequest,
         _request_timeout: Union[
             None,
             Annotated[StrictFloat, Field(gt=0)],
@@ -1727,12 +1736,12 @@ def delete_api_key_credentials_api_key_id_delete(
         _content_type: Optional[StrictStr] = None,
         _headers: Optional[Dict[StrictStr, Any]] = None,
         _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
-    ) -> object:
-        """Delete Api Key
+    ) -> CreateInferenceDeploymentResponse:
+        """Create Inference Deployment
 
 
-        :param id: (required)
-        :type id: str
+        :param create_inference_deployment_request: (required)
+        :type create_inference_deployment_request: CreateInferenceDeploymentRequest
         :param _request_timeout: timeout setting for this request. If one
                                  number provided, it will be total request
                                  timeout. It can also be a pair (tuple) of
@@ -1755,8 +1764,8 @@ def delete_api_key_credentials_api_key_id_delete(
         :return: Returns the result object.
         """ # noqa: E501
 
-        _param = self._delete_api_key_credentials_api_key_id_delete_serialize(
-            id=id,
+        _param = self._create_inference_deployment_deployments_inference_post_serialize(
+            create_inference_deployment_request=create_inference_deployment_request,
             _request_auth=_request_auth,
             _content_type=_content_type,
             _headers=_headers,
@@ -1764,7 +1773,7 @@ def delete_api_key_credentials_api_key_id_delete(
         )
 
         _response_types_map: Dict[str, Optional[str]] = {
-            '200': "object",
+            '200': "CreateInferenceDeploymentResponse",
             '422': "HTTPValidationError",
         }
         response_data = self.api_client.call_api(
@@ -1779,9 +1788,9 @@ def delete_api_key_credentials_api_key_id_delete(
 
 
     @validate_call
-    def delete_api_key_credentials_api_key_id_delete_with_http_info(
+    def create_inference_deployment_deployments_inference_post_with_http_info(
         self,
-        id: StrictStr,
+        create_inference_deployment_request: CreateInferenceDeploymentRequest,
         _request_timeout: Union[
             None,
             Annotated[StrictFloat, Field(gt=0)],
@@ -1794,12 +1803,12 @@ def delete_api_key_credentials_api_key_id_delete_with_http_info(
         _content_type: Optional[StrictStr] = None,
         _headers: Optional[Dict[StrictStr, Any]] = None,
         _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
-    ) -> ApiResponse[object]:
-        """Delete Api Key
+    ) -> ApiResponse[CreateInferenceDeploymentResponse]:
+        """Create Inference Deployment
 
 
-        :param id: (required)
-        :type id: str
+        :param create_inference_deployment_request: (required)
+        :type create_inference_deployment_request: CreateInferenceDeploymentRequest
         :param _request_timeout: timeout setting for this request. If one
                                  number provided, it will be total request
                                  timeout. It can also be a pair (tuple) of
@@ -1822,8 +1831,8 @@ def delete_api_key_credentials_api_key_id_delete_with_http_info(
         :return: Returns the result object.
         """ # noqa: E501
 
-        _param = self._delete_api_key_credentials_api_key_id_delete_serialize(
-            id=id,
+        _param = self._create_inference_deployment_deployments_inference_post_serialize(
+            create_inference_deployment_request=create_inference_deployment_request,
             _request_auth=_request_auth,
             _content_type=_content_type,
             _headers=_headers,
@@ -1831,7 +1840,7 @@ def delete_api_key_credentials_api_key_id_delete_with_http_info(
         )
 
         _response_types_map: Dict[str, Optional[str]] = {
-            '200': "object",
+            '200': "CreateInferenceDeploymentResponse",
             '422': "HTTPValidationError",
         }
         response_data = self.api_client.call_api(
@@ -1846,9 +1855,9 @@ def delete_api_key_credentials_api_key_id_delete_with_http_info(
 
 
     @validate_call
-    def delete_api_key_credentials_api_key_id_delete_without_preload_content(
+    def create_inference_deployment_deployments_inference_post_without_preload_content(
         self,
-        id: StrictStr,
+        create_inference_deployment_request: CreateInferenceDeploymentRequest,
         _request_timeout: Union[
             None,
             Annotated[StrictFloat, Field(gt=0)],
@@ -1862,11 +1871,11 @@ def delete_api_key_credentials_api_key_id_delete_without_preload_content(
         _headers: Optional[Dict[StrictStr, Any]] = None,
         _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
     ) -> RESTResponseType:
-        """Delete Api Key
+        """Create Inference Deployment
 
 
-        :param id: (required)
-        :type id: str
+        :param create_inference_deployment_request: (required)
+        :type create_inference_deployment_request: CreateInferenceDeploymentRequest
         :param _request_timeout: timeout setting for this request. If one
                                  number provided, it will be total request
                                  timeout. It can also be a pair (tuple) of
@@ -1889,8 +1898,8 @@ def delete_api_key_credentials_api_key_id_delete_without_preload_content(
         :return: Returns the result object.
         """ # noqa: E501
 
-        _param = self._delete_api_key_credentials_api_key_id_delete_serialize(
-            id=id,
+        _param = self._create_inference_deployment_deployments_inference_post_serialize(
+            create_inference_deployment_request=create_inference_deployment_request,
             _request_auth=_request_auth,
             _content_type=_content_type,
             _headers=_headers,
@@ -1898,7 +1907,7 @@ def delete_api_key_credentials_api_key_id_delete_without_preload_content(
         )
 
         _response_types_map: Dict[str, Optional[str]] = {
-            '200': "object",
+            '200': "CreateInferenceDeploymentResponse",
             '422': "HTTPValidationError",
         }
         response_data = self.api_client.call_api(
@@ -1908,9 +1917,9 @@ def delete_api_key_credentials_api_key_id_delete_without_preload_content(
         return response_data.response
 
 
-    def _delete_api_key_credentials_api_key_id_delete_serialize(
+    def _create_inference_deployment_deployments_inference_post_serialize(
         self,
-        id,
+        create_inference_deployment_request,
         _request_auth,
         _content_type,
         _headers,
@@ -1932,12 +1941,12 @@ def _delete_api_key_credentials_api_key_id_delete_serialize(
         _body_params: Optional[bytes] = None
 
         # process the path parameters
-        if id is not None:
-            _path_params['id'] = id
         # process the query parameters
         # process the header parameters
         # process the form parameters
         # process the body parameter
+        if create_inference_deployment_request is not None:
+            _body_params = create_inference_deployment_request
 
 
         # set the HTTP header `Accept`
@@ -1948,6 +1957,19 @@ def _delete_api_key_credentials_api_key_id_delete_serialize(
                 ]
             )
 
+        # set the HTTP header `Content-Type`
+        if _content_type:
+            _header_params['Content-Type'] = _content_type
+        else:
+            _default_content_type = (
+                self.api_client.select_header_content_type(
+                    [
+                        'application/json'
+                    ]
+                )
+            )
+            if _default_content_type is not None:
+                _header_params['Content-Type'] = _default_content_type
 
         # authentication setting
         _auth_settings: List[str] = [
@@ -1955,8 +1977,8 @@ def _delete_api_key_credentials_api_key_id_delete_serialize(
         ]
 
         return self.api_client.param_serialize(
-            method='DELETE',
-            resource_path='/credentials/api-key/{id}',
+            method='POST',
+            resource_path='/deployments/inference',
             path_params=_path_params,
             query_params=_query_params,
             header_params=_header_params,
@@ -1973,8 +1995,9 @@ def _delete_api_key_credentials_api_key_id_delete_serialize(
 
 
     @validate_call
-    def get_api_keys_credentials_api_key_get(
+    def create_rag_deployment_deployments_rag_post(
         self,
+        create_rag_deployment_request: CreateRagDeploymentRequest,
         _request_timeout: Union[
             None,
             Annotated[StrictFloat, Field(gt=0)],
@@ -1987,10 +2010,12 @@ def get_api_keys_credentials_api_key_get(
         _content_type: Optional[StrictStr] = None,
         _headers: Optional[Dict[StrictStr, Any]] = None,
         _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
-    ) -> ListAPIKeyResponse:
-        """Get Api Keys
+    ) -> CreateRagDeploymentResponse:
+        """Create Rag Deployment
 
 
+        :param create_rag_deployment_request: (required)
+        :type create_rag_deployment_request: CreateRagDeploymentRequest
         :param _request_timeout: timeout setting for this request. If one
                                  number provided, it will be total request
                                  timeout. It can also be a pair (tuple) of
@@ -2013,7 +2038,8 @@ def get_api_keys_credentials_api_key_get(
         :return: Returns the result object.
         """ # noqa: E501
 
-        _param = self._get_api_keys_credentials_api_key_get_serialize(
+        _param = self._create_rag_deployment_deployments_rag_post_serialize(
+            create_rag_deployment_request=create_rag_deployment_request,
             _request_auth=_request_auth,
             _content_type=_content_type,
             _headers=_headers,
@@ -2021,7 +2047,8 @@ def get_api_keys_credentials_api_key_get(
         )
 
         _response_types_map: Dict[str, Optional[str]] = {
-            '200': "ListAPIKeyResponse",
+            '200': "CreateRagDeploymentResponse",
+            '422': "HTTPValidationError",
         }
         response_data = self.api_client.call_api(
             *_param,
@@ -2035,8 +2062,9 @@ def get_api_keys_credentials_api_key_get(
 
 
     @validate_call
-    def get_api_keys_credentials_api_key_get_with_http_info(
+    def create_rag_deployment_deployments_rag_post_with_http_info(
         self,
+        create_rag_deployment_request: CreateRagDeploymentRequest,
         _request_timeout: Union[
             None,
             Annotated[StrictFloat, Field(gt=0)],
@@ -2049,10 +2077,12 @@ def get_api_keys_credentials_api_key_get_with_http_info(
         _content_type: Optional[StrictStr] = None,
         _headers: Optional[Dict[StrictStr, Any]] = None,
         _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
-    ) -> ApiResponse[ListAPIKeyResponse]:
-        """Get Api Keys
+    ) -> ApiResponse[CreateRagDeploymentResponse]:
+        """Create Rag Deployment
 
 
+        :param create_rag_deployment_request: (required)
+        :type create_rag_deployment_request: CreateRagDeploymentRequest
         :param _request_timeout: timeout setting for this request. If one
                                  number provided, it will be total request
                                  timeout. It can also be a pair (tuple) of
@@ -2075,7 +2105,8 @@ def get_api_keys_credentials_api_key_get_with_http_info(
         :return: Returns the result object.
         """ # noqa: E501
 
-        _param = self._get_api_keys_credentials_api_key_get_serialize(
+        _param = self._create_rag_deployment_deployments_rag_post_serialize(
+            create_rag_deployment_request=create_rag_deployment_request,
             _request_auth=_request_auth,
             _content_type=_content_type,
             _headers=_headers,
@@ -2083,7 +2114,8 @@ def get_api_keys_credentials_api_key_get_with_http_info(
         )
 
         _response_types_map: Dict[str, Optional[str]] = {
-            '200': "ListAPIKeyResponse",
+            '200': "CreateRagDeploymentResponse",
+            '422': "HTTPValidationError",
         }
         response_data = self.api_client.call_api(
             *_param,
@@ -2097,8 +2129,9 @@ def get_api_keys_credentials_api_key_get_with_http_info(
 
 
     @validate_call
-    def get_api_keys_credentials_api_key_get_without_preload_content(
+    def create_rag_deployment_deployments_rag_post_without_preload_content(
         self,
+        create_rag_deployment_request: CreateRagDeploymentRequest,
         _request_timeout: Union[
             None,
             Annotated[StrictFloat, Field(gt=0)],
@@ -2112,9 +2145,11 @@ def get_api_keys_credentials_api_key_get_without_preload_content(
         _headers: Optional[Dict[StrictStr, Any]] = None,
         _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
     ) -> RESTResponseType:
-        """Get Api Keys
+        """Create Rag Deployment
 
 
+        :param create_rag_deployment_request: (required)
+        :type create_rag_deployment_request: CreateRagDeploymentRequest
         :param _request_timeout: timeout setting for this request. If one
                                  number provided, it will be total request
                                  timeout. It can also be a pair (tuple) of
@@ -2137,7 +2172,8 @@ def get_api_keys_credentials_api_key_get_without_preload_content(
         :return: Returns the result object.
         """ # noqa: E501
 
-        _param = self._get_api_keys_credentials_api_key_get_serialize(
+        _param = self._create_rag_deployment_deployments_rag_post_serialize(
+            create_rag_deployment_request=create_rag_deployment_request,
             _request_auth=_request_auth,
             _content_type=_content_type,
             _headers=_headers,
@@ -2145,7 +2181,8 @@ def get_api_keys_credentials_api_key_get_without_preload_content(
         )
 
         _response_types_map: Dict[str, Optional[str]] = {
-            '200': "ListAPIKeyResponse",
+            '200': "CreateRagDeploymentResponse",
+            '422': "HTTPValidationError",
         }
         response_data = self.api_client.call_api(
             *_param,
@@ -2154,8 +2191,9 @@ def get_api_keys_credentials_api_key_get_without_preload_content(
         return response_data.response
 
 
-    def _get_api_keys_credentials_api_key_get_serialize(
+    def _create_rag_deployment_deployments_rag_post_serialize(
         self,
+        create_rag_deployment_request,
         _request_auth,
         _content_type,
         _headers,
@@ -2181,6 +2219,8 @@ def _get_api_keys_credentials_api_key_get_serialize(
         # process the header parameters
         # process the form parameters
         # process the body parameter
+        if create_rag_deployment_request is not None:
+            _body_params = create_rag_deployment_request
 
 
         # set the HTTP header `Accept`
@@ -2191,6 +2231,19 @@ def _get_api_keys_credentials_api_key_get_serialize(
                 ]
             )
 
+        # set the HTTP header `Content-Type`
+        if _content_type:
+            _header_params['Content-Type'] = _content_type
+        else:
+            _default_content_type = (
+                self.api_client.select_header_content_type(
+                    [
+                        'application/json'
+                    ]
+                )
+            )
+            if _default_content_type is not None:
+                _header_params['Content-Type'] = _default_content_type
 
         # authentication setting
         _auth_settings: List[str] = [
@@ -2198,8 +2251,8 @@ def _get_api_keys_credentials_api_key_get_serialize(
         ]
 
         return self.api_client.param_serialize(
-            method='GET',
-            resource_path='/credentials/api-key',
+            method='POST',
+            resource_path='/deployments/rag',
             path_params=_path_params,
             query_params=_query_params,
             header_params=_header_params,
@@ -2216,8 +2269,9 @@ def _get_api_keys_credentials_api_key_get_serialize(
 
 
     @validate_call
-    def get_clusters_clusters_get(
+    def delete_api_key_credentials_api_key_id_delete(
         self,
+        id: StrictStr,
         _request_timeout: Union[
             None,
             Annotated[StrictFloat, Field(gt=0)],
@@ -2230,10 +2284,12 @@ def get_clusters_clusters_get(
         _content_type: Optional[StrictStr] = None,
         _headers: Optional[Dict[StrictStr, Any]] = None,
         _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
-    ) -> ListGetClusterResponse:
-        """Get Clusters
+    ) -> object:
+        """Delete Api Key
 
 
+        :param id: (required)
+        :type id: str
         :param _request_timeout: timeout setting for this request. If one
                                  number provided, it will be total request
                                  timeout. It can also be a pair (tuple) of
@@ -2256,7 +2312,8 @@ def get_clusters_clusters_get(
         :return: Returns the result object.
         """ # noqa: E501
 
-        _param = self._get_clusters_clusters_get_serialize(
+        _param = self._delete_api_key_credentials_api_key_id_delete_serialize(
+            id=id,
             _request_auth=_request_auth,
             _content_type=_content_type,
             _headers=_headers,
@@ -2264,7 +2321,8 @@ def get_clusters_clusters_get(
         )
 
         _response_types_map: Dict[str, Optional[str]] = {
-            '200': "ListGetClusterResponse",
+            '200': "object",
+            '422': "HTTPValidationError",
         }
         response_data = self.api_client.call_api(
             *_param,
@@ -2278,8 +2336,9 @@ def get_clusters_clusters_get(
 
 
     @validate_call
-    def get_clusters_clusters_get_with_http_info(
+    def delete_api_key_credentials_api_key_id_delete_with_http_info(
         self,
+        id: StrictStr,
         _request_timeout: Union[
             None,
             Annotated[StrictFloat, Field(gt=0)],
@@ -2292,10 +2351,12 @@ def get_clusters_clusters_get_with_http_info(
         _content_type: Optional[StrictStr] = None,
         _headers: Optional[Dict[StrictStr, Any]] = None,
         _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
-    ) -> ApiResponse[ListGetClusterResponse]:
-        """Get Clusters
+    ) -> ApiResponse[object]:
+        """Delete Api Key
 
 
+        :param id: (required)
+        :type id: str
         :param _request_timeout: timeout setting for this request. If one
                                  number provided, it will be total request
                                  timeout. It can also be a pair (tuple) of
@@ -2318,7 +2379,8 @@ def get_clusters_clusters_get_with_http_info(
         :return: Returns the result object.
         """ # noqa: E501
 
-        _param = self._get_clusters_clusters_get_serialize(
+        _param = self._delete_api_key_credentials_api_key_id_delete_serialize(
+            id=id,
             _request_auth=_request_auth,
             _content_type=_content_type,
             _headers=_headers,
@@ -2326,7 +2388,8 @@ def get_clusters_clusters_get_with_http_info(
         )
 
         _response_types_map: Dict[str, Optional[str]] = {
-            '200': "ListGetClusterResponse",
+            '200': "object",
+            '422': "HTTPValidationError",
         }
         response_data = self.api_client.call_api(
             *_param,
@@ -2340,8 +2403,9 @@ def get_clusters_clusters_get_with_http_info(
 
 
     @validate_call
-    def get_clusters_clusters_get_without_preload_content(
+    def delete_api_key_credentials_api_key_id_delete_without_preload_content(
         self,
+        id: StrictStr,
         _request_timeout: Union[
             None,
             Annotated[StrictFloat, Field(gt=0)],
@@ -2355,9 +2419,11 @@ def get_clusters_clusters_get_without_preload_content(
         _headers: Optional[Dict[StrictStr, Any]] = None,
         _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
     ) -> RESTResponseType:
-        """Get Clusters
+        """Delete Api Key
 
 
+        :param id: (required)
+        :type id: str
         :param _request_timeout: timeout setting for this request. If one
                                  number provided, it will be total request
                                  timeout. It can also be a pair (tuple) of
@@ -2380,7 +2446,8 @@ def get_clusters_clusters_get_without_preload_content(
         :return: Returns the result object.
         """ # noqa: E501
 
-        _param = self._get_clusters_clusters_get_serialize(
+        _param = self._delete_api_key_credentials_api_key_id_delete_serialize(
+            id=id,
             _request_auth=_request_auth,
             _content_type=_content_type,
             _headers=_headers,
@@ -2388,7 +2455,8 @@ def get_clusters_clusters_get_without_preload_content(
         )
 
         _response_types_map: Dict[str, Optional[str]] = {
-            '200': "ListGetClusterResponse",
+            '200': "object",
+            '422': "HTTPValidationError",
         }
         response_data = self.api_client.call_api(
             *_param,
@@ -2397,8 +2465,9 @@ def get_clusters_clusters_get_without_preload_content(
         return response_data.response
 
 
-    def _get_clusters_clusters_get_serialize(
+    def _delete_api_key_credentials_api_key_id_delete_serialize(
         self,
+        id,
         _request_auth,
         _content_type,
         _headers,
@@ -2420,6 +2489,8 @@ def _get_clusters_clusters_get_serialize(
         _body_params: Optional[bytes] = None
 
         # process the path parameters
+        if id is not None:
+            _path_params['id'] = id
         # process the query parameters
         # process the header parameters
         # process the form parameters
@@ -2441,8 +2512,8 @@ def _get_clusters_clusters_get_serialize(
         ]
 
         return self.api_client.param_serialize(
-            method='GET',
-            resource_path='/clusters',
+            method='DELETE',
+            resource_path='/credentials/api-key/{id}',
             path_params=_path_params,
             query_params=_query_params,
             header_params=_header_params,
@@ -2459,9 +2530,9 @@ def _get_clusters_clusters_get_serialize(
 
 
     @validate_call
-    def get_compute_deployment_deployments_compute_deployment_id_get(
+    def delete_user_vault_item_endpoint_user_vault_delete(
         self,
-        deployment_id: StrictInt,
+        user_vault_item_input: UserVaultItemInput,
         _request_timeout: Union[
             None,
             Annotated[StrictFloat, Field(gt=0)],
@@ -2474,12 +2545,13 @@ def get_compute_deployment_deployments_compute_deployment_id_get(
         _content_type: Optional[StrictStr] = None,
         _headers: Optional[Dict[StrictStr, Any]] = None,
         _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
-    ) -> GetComputeDeploymentResponse:
-        """Get Compute Deployment
+    ) -> object:
+        """Delete User Vault Item Endpoint
 
+        Delete an item of a specific type for the user.
 
-        :param deployment_id: (required)
-        :type deployment_id: int
+        :param user_vault_item_input: (required)
+        :type user_vault_item_input: UserVaultItemInput
         :param _request_timeout: timeout setting for this request. If one
                                  number provided, it will be total request
                                  timeout. It can also be a pair (tuple) of
@@ -2502,8 +2574,8 @@ def get_compute_deployment_deployments_compute_deployment_id_get(
         :return: Returns the result object.
         """ # noqa: E501
 
-        _param = self._get_compute_deployment_deployments_compute_deployment_id_get_serialize(
-            deployment_id=deployment_id,
+        _param = self._delete_user_vault_item_endpoint_user_vault_delete_serialize(
+            user_vault_item_input=user_vault_item_input,
             _request_auth=_request_auth,
             _content_type=_content_type,
             _headers=_headers,
@@ -2511,7 +2583,7 @@ def get_compute_deployment_deployments_compute_deployment_id_get(
         )
 
         _response_types_map: Dict[str, Optional[str]] = {
-            '200': "GetComputeDeploymentResponse",
+            '200': "object",
             '422': "HTTPValidationError",
         }
         response_data = self.api_client.call_api(
@@ -2526,9 +2598,9 @@ def get_compute_deployment_deployments_compute_deployment_id_get(
 
 
     @validate_call
-    def get_compute_deployment_deployments_compute_deployment_id_get_with_http_info(
+    def delete_user_vault_item_endpoint_user_vault_delete_with_http_info(
         self,
-        deployment_id: StrictInt,
+        user_vault_item_input: UserVaultItemInput,
         _request_timeout: Union[
             None,
             Annotated[StrictFloat, Field(gt=0)],
@@ -2541,12 +2613,13 @@ def get_compute_deployment_deployments_compute_deployment_id_get_with_http_info(
         _content_type: Optional[StrictStr] = None,
         _headers: Optional[Dict[StrictStr, Any]] = None,
         _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
-    ) -> ApiResponse[GetComputeDeploymentResponse]:
-        """Get Compute Deployment
+    ) -> ApiResponse[object]:
+        """Delete User Vault Item Endpoint
 
+        Delete an item of a specific type for the user.
 
-        :param deployment_id: (required)
-        :type deployment_id: int
+        :param user_vault_item_input: (required)
+        :type user_vault_item_input: UserVaultItemInput
         :param _request_timeout: timeout setting for this request. If one
                                  number provided, it will be total request
                                  timeout. It can also be a pair (tuple) of
@@ -2569,8 +2642,8 @@ def get_compute_deployment_deployments_compute_deployment_id_get_with_http_info(
         :return: Returns the result object.
         """ # noqa: E501
 
-        _param = self._get_compute_deployment_deployments_compute_deployment_id_get_serialize(
-            deployment_id=deployment_id,
+        _param = self._delete_user_vault_item_endpoint_user_vault_delete_serialize(
+            user_vault_item_input=user_vault_item_input,
             _request_auth=_request_auth,
             _content_type=_content_type,
             _headers=_headers,
@@ -2578,7 +2651,7 @@ def get_compute_deployment_deployments_compute_deployment_id_get_with_http_info(
         )
 
         _response_types_map: Dict[str, Optional[str]] = {
-            '200': "GetComputeDeploymentResponse",
+            '200': "object",
             '422': "HTTPValidationError",
         }
         response_data = self.api_client.call_api(
@@ -2593,9 +2666,9 @@ def get_compute_deployment_deployments_compute_deployment_id_get_with_http_info(
 
 
     @validate_call
-    def get_compute_deployment_deployments_compute_deployment_id_get_without_preload_content(
+    def delete_user_vault_item_endpoint_user_vault_delete_without_preload_content(
         self,
-        deployment_id: StrictInt,
+        user_vault_item_input: UserVaultItemInput,
         _request_timeout: Union[
             None,
             Annotated[StrictFloat, Field(gt=0)],
@@ -2609,11 +2682,12 @@ def get_compute_deployment_deployments_compute_deployment_id_get_without_preload
         _headers: Optional[Dict[StrictStr, Any]] = None,
         _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
     ) -> RESTResponseType:
-        """Get Compute Deployment
+        """Delete User Vault Item Endpoint
 
+        Delete an item of a specific type for the user.
 
-        :param deployment_id: (required)
-        :type deployment_id: int
+        :param user_vault_item_input: (required)
+        :type user_vault_item_input: UserVaultItemInput
         :param _request_timeout: timeout setting for this request. If one
                                  number provided, it will be total request
                                  timeout. It can also be a pair (tuple) of
@@ -2636,8 +2710,8 @@ def get_compute_deployment_deployments_compute_deployment_id_get_without_preload
         :return: Returns the result object.
         """ # noqa: E501
 
-        _param = self._get_compute_deployment_deployments_compute_deployment_id_get_serialize(
-            deployment_id=deployment_id,
+        _param = self._delete_user_vault_item_endpoint_user_vault_delete_serialize(
+            user_vault_item_input=user_vault_item_input,
             _request_auth=_request_auth,
             _content_type=_content_type,
             _headers=_headers,
@@ -2645,7 +2719,7 @@ def get_compute_deployment_deployments_compute_deployment_id_get_without_preload
         )
 
         _response_types_map: Dict[str, Optional[str]] = {
-            '200': "GetComputeDeploymentResponse",
+            '200': "object",
             '422': "HTTPValidationError",
         }
         response_data = self.api_client.call_api(
@@ -2655,9 +2729,9 @@ def get_compute_deployment_deployments_compute_deployment_id_get_without_preload
         return response_data.response
 
 
-    def _get_compute_deployment_deployments_compute_deployment_id_get_serialize(
+    def _delete_user_vault_item_endpoint_user_vault_delete_serialize(
         self,
-        deployment_id,
+        user_vault_item_input,
         _request_auth,
         _content_type,
         _headers,
@@ -2679,12 +2753,12 @@ def _get_compute_deployment_deployments_compute_deployment_id_get_serialize(
         _body_params: Optional[bytes] = None
 
         # process the path parameters
-        if deployment_id is not None:
-            _path_params['deployment_id'] = deployment_id
         # process the query parameters
         # process the header parameters
         # process the form parameters
         # process the body parameter
+        if user_vault_item_input is not None:
+            _body_params = user_vault_item_input
 
 
         # set the HTTP header `Accept`
@@ -2695,6 +2769,19 @@ def _get_compute_deployment_deployments_compute_deployment_id_get_serialize(
                 ]
             )
 
+        # set the HTTP header `Content-Type`
+        if _content_type:
+            _header_params['Content-Type'] = _content_type
+        else:
+            _default_content_type = (
+                self.api_client.select_header_content_type(
+                    [
+                        'application/json'
+                    ]
+                )
+            )
+            if _default_content_type is not None:
+                _header_params['Content-Type'] = _default_content_type
 
         # authentication setting
         _auth_settings: List[str] = [
@@ -2702,8 +2789,8 @@ def _get_compute_deployment_deployments_compute_deployment_id_get_serialize(
         ]
 
         return self.api_client.param_serialize(
-            method='GET',
-            resource_path='/deployments/compute/{deployment_id}',
+            method='DELETE',
+            resource_path='/user_vault',
             path_params=_path_params,
             query_params=_query_params,
             header_params=_header_params,
@@ -2720,8 +2807,10 @@ def _get_compute_deployment_deployments_compute_deployment_id_get_serialize(
 
 
     @validate_call
-    def get_credits_credits_get(
+    def get_all_user_vault_items_endpoint_user_vault_get(
         self,
+        type: Optional[UserVaultType] = None,
+        search_query: Optional[StrictStr] = None,
         _request_timeout: Union[
             None,
             Annotated[StrictFloat, Field(gt=0)],
@@ -2734,10 +2823,15 @@ def get_credits_credits_get(
         _content_type: Optional[StrictStr] = None,
         _headers: Optional[Dict[StrictStr, Any]] = None,
         _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
-    ) -> CreditsResponse:
-        """Get Credits
+    ) -> ListUserVaultItemsResponse:
+        """Get All User Vault Items Endpoint
 
+        Retrieve all items of a specific type for the user.
 
+        :param type:
+        :type type: UserVaultType
+        :param search_query:
+        :type search_query: str
         :param _request_timeout: timeout setting for this request. If one
                                  number provided, it will be total request
                                  timeout. It can also be a pair (tuple) of
@@ -2760,7 +2854,9 @@ def get_credits_credits_get(
         :return: Returns the result object.
         """ # noqa: E501
 
-        _param = self._get_credits_credits_get_serialize(
+        _param = self._get_all_user_vault_items_endpoint_user_vault_get_serialize(
+            type=type,
+            search_query=search_query,
             _request_auth=_request_auth,
             _content_type=_content_type,
             _headers=_headers,
@@ -2768,7 +2864,1028 @@ def get_credits_credits_get(
         )
 
         _response_types_map: Dict[str, Optional[str]] = {
-            '200': "CreditsResponse",
+            '200': "ListUserVaultItemsResponse",
+            '422': "HTTPValidationError",
+        }
+        response_data = self.api_client.call_api(
+            *_param,
+            _request_timeout=_request_timeout
+        )
+        response_data.read()
+        return self.api_client.response_deserialize(
+            response_data=response_data,
+            response_types_map=_response_types_map,
+        ).data
+
+
+    @validate_call
+    def get_all_user_vault_items_endpoint_user_vault_get_with_http_info(
+        self,
+        type: Optional[UserVaultType] = None,
+        search_query: Optional[StrictStr] = None,
+        _request_timeout: Union[
+            None,
+            Annotated[StrictFloat, Field(gt=0)],
+            Tuple[
+                Annotated[StrictFloat, Field(gt=0)],
+                Annotated[StrictFloat, Field(gt=0)]
+            ]
+        ] = None,
+        _request_auth: Optional[Dict[StrictStr, Any]] = None,
+        _content_type: Optional[StrictStr] = None,
+        _headers: Optional[Dict[StrictStr, Any]] = None,
+        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
+    ) -> ApiResponse[ListUserVaultItemsResponse]:
+        """Get All User Vault Items Endpoint
+
+        Retrieve all items of a specific type for the user.
+
+        :param type:
+        :type type: UserVaultType
+        :param search_query:
+        :type search_query: str
+        :param _request_timeout: timeout setting for this request. If one
+                                 number provided, it will be total request
+                                 timeout. It can also be a pair (tuple) of
+                                 (connection, read) timeouts.
+        :type _request_timeout: int, tuple(int, int), optional
+        :param _request_auth: set to override the auth_settings for an a single
+                              request; this effectively ignores the
+                              authentication in the spec for a single request.
+        :type _request_auth: dict, optional
+        :param _content_type: force content-type for the request.
+        :type _content_type: str, Optional
+        :param _headers: set to override the headers for a single
+                         request; this effectively ignores the headers
+                         in the spec for a single request.
+        :type _headers: dict, optional
+        :param _host_index: set to override the host_index for a single
+                            request; this effectively ignores the host_index
+                            in the spec for a single request.
+        :type _host_index: int, optional
+        :return: Returns the result object.
+        """ # noqa: E501
+
+        _param = self._get_all_user_vault_items_endpoint_user_vault_get_serialize(
+            type=type,
+            search_query=search_query,
+            _request_auth=_request_auth,
+            _content_type=_content_type,
+            _headers=_headers,
+            _host_index=_host_index
+        )
+
+        _response_types_map: Dict[str, Optional[str]] = {
+            '200': "ListUserVaultItemsResponse",
+            '422': "HTTPValidationError",
+        }
+        response_data = self.api_client.call_api(
+            *_param,
+            _request_timeout=_request_timeout
+        )
+        response_data.read()
+        return self.api_client.response_deserialize(
+            response_data=response_data,
+            response_types_map=_response_types_map,
+        )
+
+
+    @validate_call
+    def get_all_user_vault_items_endpoint_user_vault_get_without_preload_content(
+        self,
+        type: Optional[UserVaultType] = None,
+        search_query: Optional[StrictStr] = None,
+        _request_timeout: Union[
+            None,
+            Annotated[StrictFloat, Field(gt=0)],
+            Tuple[
+                Annotated[StrictFloat, Field(gt=0)],
+                Annotated[StrictFloat, Field(gt=0)]
+            ]
+        ] = None,
+        _request_auth: Optional[Dict[StrictStr, Any]] = None,
+        _content_type: Optional[StrictStr] = None,
+        _headers: Optional[Dict[StrictStr, Any]] = None,
+        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
+    ) -> RESTResponseType:
+        """Get All User Vault Items Endpoint
+
+        Retrieve all items of a specific type for the user.
+
+        :param type:
+        :type type: UserVaultType
+        :param search_query:
+        :type search_query: str
+        :param _request_timeout: timeout setting for this request. If one
+                                 number provided, it will be total request
+                                 timeout. It can also be a pair (tuple) of
+                                 (connection, read) timeouts.
+        :type _request_timeout: int, tuple(int, int), optional
+        :param _request_auth: set to override the auth_settings for an a single
+                              request; this effectively ignores the
+                              authentication in the spec for a single request.
+        :type _request_auth: dict, optional
+        :param _content_type: force content-type for the request.
+        :type _content_type: str, Optional
+        :param _headers: set to override the headers for a single
+                         request; this effectively ignores the headers
+                         in the spec for a single request.
+        :type _headers: dict, optional
+        :param _host_index: set to override the host_index for a single
+                            request; this effectively ignores the host_index
+                            in the spec for a single request.
+        :type _host_index: int, optional
+        :return: Returns the result object.
+        """ # noqa: E501
+
+        _param = self._get_all_user_vault_items_endpoint_user_vault_get_serialize(
+            type=type,
+            search_query=search_query,
+            _request_auth=_request_auth,
+            _content_type=_content_type,
+            _headers=_headers,
+            _host_index=_host_index
+        )
+
+        _response_types_map: Dict[str, Optional[str]] = {
+            '200': "ListUserVaultItemsResponse",
+            '422': "HTTPValidationError",
+        }
+        response_data = self.api_client.call_api(
+            *_param,
+            _request_timeout=_request_timeout
+        )
+        return response_data.response
+
+
+    def _get_all_user_vault_items_endpoint_user_vault_get_serialize(
+        self,
+        type,
+        search_query,
+        _request_auth,
+        _content_type,
+        _headers,
+        _host_index,
+    ) -> RequestSerialized:
+
+        _host = None
+
+        _collection_formats: Dict[str, str] = {
+        }
+
+        _path_params: Dict[str, str] = {}
+        _query_params: List[Tuple[str, str]] = []
+        _header_params: Dict[str, Optional[str]] = _headers or {}
+        _form_params: List[Tuple[str, str]] = []
+        _files: Dict[
+            str, Union[str, bytes, List[str], List[bytes], List[Tuple[str, bytes]]]
+        ] = {}
+        _body_params: Optional[bytes] = None
+
+        # process the path parameters
+        # process the query parameters
+        if type is not None:
+            
+            _query_params.append(('type', type.value))
+            
+        if search_query is not None:
+            
+            _query_params.append(('search_query', search_query))
+            
+        # process the header parameters
+        # process the form parameters
+        # process the body parameter
+
+
+        # set the HTTP header `Accept`
+        if 'Accept' not in _header_params:
+            _header_params['Accept'] = self.api_client.select_header_accept(
+                [
+                    'application/json'
+                ]
+            )
+
+
+        # authentication setting
+        _auth_settings: List[str] = [
+            'HTTPBearer'
+        ]
+
+        return self.api_client.param_serialize(
+            method='GET',
+            resource_path='/user_vault',
+            path_params=_path_params,
+            query_params=_query_params,
+            header_params=_header_params,
+            body=_body_params,
+            post_params=_form_params,
+            files=_files,
+            auth_settings=_auth_settings,
+            collection_formats=_collection_formats,
+            _host=_host,
+            _request_auth=_request_auth
+        )
+
+
+
+
+    @validate_call
+    def get_api_keys_credentials_api_key_get(
+        self,
+        _request_timeout: Union[
+            None,
+            Annotated[StrictFloat, Field(gt=0)],
+            Tuple[
+                Annotated[StrictFloat, Field(gt=0)],
+                Annotated[StrictFloat, Field(gt=0)]
+            ]
+        ] = None,
+        _request_auth: Optional[Dict[StrictStr, Any]] = None,
+        _content_type: Optional[StrictStr] = None,
+        _headers: Optional[Dict[StrictStr, Any]] = None,
+        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
+    ) -> ListAPIKeyResponse:
+        """Get Api Keys
+
+
+        :param _request_timeout: timeout setting for this request. If one
+                                 number provided, it will be total request
+                                 timeout. It can also be a pair (tuple) of
+                                 (connection, read) timeouts.
+        :type _request_timeout: int, tuple(int, int), optional
+        :param _request_auth: set to override the auth_settings for an a single
+                              request; this effectively ignores the
+                              authentication in the spec for a single request.
+        :type _request_auth: dict, optional
+        :param _content_type: force content-type for the request.
+        :type _content_type: str, Optional
+        :param _headers: set to override the headers for a single
+                         request; this effectively ignores the headers
+                         in the spec for a single request.
+        :type _headers: dict, optional
+        :param _host_index: set to override the host_index for a single
+                            request; this effectively ignores the host_index
+                            in the spec for a single request.
+        :type _host_index: int, optional
+        :return: Returns the result object.
+        """ # noqa: E501
+
+        _param = self._get_api_keys_credentials_api_key_get_serialize(
+            _request_auth=_request_auth,
+            _content_type=_content_type,
+            _headers=_headers,
+            _host_index=_host_index
+        )
+
+        _response_types_map: Dict[str, Optional[str]] = {
+            '200': "ListAPIKeyResponse",
+        }
+        response_data = self.api_client.call_api(
+            *_param,
+            _request_timeout=_request_timeout
+        )
+        response_data.read()
+        return self.api_client.response_deserialize(
+            response_data=response_data,
+            response_types_map=_response_types_map,
+        ).data
+
+
+    @validate_call
+    def get_api_keys_credentials_api_key_get_with_http_info(
+        self,
+        _request_timeout: Union[
+            None,
+            Annotated[StrictFloat, Field(gt=0)],
+            Tuple[
+                Annotated[StrictFloat, Field(gt=0)],
+                Annotated[StrictFloat, Field(gt=0)]
+            ]
+        ] = None,
+        _request_auth: Optional[Dict[StrictStr, Any]] = None,
+        _content_type: Optional[StrictStr] = None,
+        _headers: Optional[Dict[StrictStr, Any]] = None,
+        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
+    ) -> ApiResponse[ListAPIKeyResponse]:
+        """Get Api Keys
+
+
+        :param _request_timeout: timeout setting for this request. If one
+                                 number provided, it will be total request
+                                 timeout. It can also be a pair (tuple) of
+                                 (connection, read) timeouts.
+        :type _request_timeout: int, tuple(int, int), optional
+        :param _request_auth: set to override the auth_settings for an a single
+                              request; this effectively ignores the
+                              authentication in the spec for a single request.
+        :type _request_auth: dict, optional
+        :param _content_type: force content-type for the request.
+        :type _content_type: str, Optional
+        :param _headers: set to override the headers for a single
+                         request; this effectively ignores the headers
+                         in the spec for a single request.
+        :type _headers: dict, optional
+        :param _host_index: set to override the host_index for a single
+                            request; this effectively ignores the host_index
+                            in the spec for a single request.
+        :type _host_index: int, optional
+        :return: Returns the result object.
+        """ # noqa: E501
+
+        _param = self._get_api_keys_credentials_api_key_get_serialize(
+            _request_auth=_request_auth,
+            _content_type=_content_type,
+            _headers=_headers,
+            _host_index=_host_index
+        )
+
+        _response_types_map: Dict[str, Optional[str]] = {
+            '200': "ListAPIKeyResponse",
+        }
+        response_data = self.api_client.call_api(
+            *_param,
+            _request_timeout=_request_timeout
+        )
+        response_data.read()
+        return self.api_client.response_deserialize(
+            response_data=response_data,
+            response_types_map=_response_types_map,
+        )
+
+
+    @validate_call
+    def get_api_keys_credentials_api_key_get_without_preload_content(
+        self,
+        _request_timeout: Union[
+            None,
+            Annotated[StrictFloat, Field(gt=0)],
+            Tuple[
+                Annotated[StrictFloat, Field(gt=0)],
+                Annotated[StrictFloat, Field(gt=0)]
+            ]
+        ] = None,
+        _request_auth: Optional[Dict[StrictStr, Any]] = None,
+        _content_type: Optional[StrictStr] = None,
+        _headers: Optional[Dict[StrictStr, Any]] = None,
+        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
+    ) -> RESTResponseType:
+        """Get Api Keys
+
+
+        :param _request_timeout: timeout setting for this request. If one
+                                 number provided, it will be total request
+                                 timeout. It can also be a pair (tuple) of
+                                 (connection, read) timeouts.
+        :type _request_timeout: int, tuple(int, int), optional
+        :param _request_auth: set to override the auth_settings for an a single
+                              request; this effectively ignores the
+                              authentication in the spec for a single request.
+        :type _request_auth: dict, optional
+        :param _content_type: force content-type for the request.
+        :type _content_type: str, Optional
+        :param _headers: set to override the headers for a single
+                         request; this effectively ignores the headers
+                         in the spec for a single request.
+        :type _headers: dict, optional
+        :param _host_index: set to override the host_index for a single
+                            request; this effectively ignores the host_index
+                            in the spec for a single request.
+        :type _host_index: int, optional
+        :return: Returns the result object.
+        """ # noqa: E501
+
+        _param = self._get_api_keys_credentials_api_key_get_serialize(
+            _request_auth=_request_auth,
+            _content_type=_content_type,
+            _headers=_headers,
+            _host_index=_host_index
+        )
+
+        _response_types_map: Dict[str, Optional[str]] = {
+            '200': "ListAPIKeyResponse",
+        }
+        response_data = self.api_client.call_api(
+            *_param,
+            _request_timeout=_request_timeout
+        )
+        return response_data.response
+
+
+    def _get_api_keys_credentials_api_key_get_serialize(
+        self,
+        _request_auth,
+        _content_type,
+        _headers,
+        _host_index,
+    ) -> RequestSerialized:
+
+        _host = None
+
+        _collection_formats: Dict[str, str] = {
+        }
+
+        _path_params: Dict[str, str] = {}
+        _query_params: List[Tuple[str, str]] = []
+        _header_params: Dict[str, Optional[str]] = _headers or {}
+        _form_params: List[Tuple[str, str]] = []
+        _files: Dict[
+            str, Union[str, bytes, List[str], List[bytes], List[Tuple[str, bytes]]]
+        ] = {}
+        _body_params: Optional[bytes] = None
+
+        # process the path parameters
+        # process the query parameters
+        # process the header parameters
+        # process the form parameters
+        # process the body parameter
+
+
+        # set the HTTP header `Accept`
+        if 'Accept' not in _header_params:
+            _header_params['Accept'] = self.api_client.select_header_accept(
+                [
+                    'application/json'
+                ]
+            )
+
+
+        # authentication setting
+        _auth_settings: List[str] = [
+            'HTTPBearer'
+        ]
+
+        return self.api_client.param_serialize(
+            method='GET',
+            resource_path='/credentials/api-key',
+            path_params=_path_params,
+            query_params=_query_params,
+            header_params=_header_params,
+            body=_body_params,
+            post_params=_form_params,
+            files=_files,
+            auth_settings=_auth_settings,
+            collection_formats=_collection_formats,
+            _host=_host,
+            _request_auth=_request_auth
+        )
+
+
+
+
+    @validate_call
+    def get_clusters_clusters_get(
+        self,
+        _request_timeout: Union[
+            None,
+            Annotated[StrictFloat, Field(gt=0)],
+            Tuple[
+                Annotated[StrictFloat, Field(gt=0)],
+                Annotated[StrictFloat, Field(gt=0)]
+            ]
+        ] = None,
+        _request_auth: Optional[Dict[StrictStr, Any]] = None,
+        _content_type: Optional[StrictStr] = None,
+        _headers: Optional[Dict[StrictStr, Any]] = None,
+        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
+    ) -> ListGetClusterResponse:
+        """Get Clusters
+
+
+        :param _request_timeout: timeout setting for this request. If one
+                                 number provided, it will be total request
+                                 timeout. It can also be a pair (tuple) of
+                                 (connection, read) timeouts.
+        :type _request_timeout: int, tuple(int, int), optional
+        :param _request_auth: set to override the auth_settings for an a single
+                              request; this effectively ignores the
+                              authentication in the spec for a single request.
+        :type _request_auth: dict, optional
+        :param _content_type: force content-type for the request.
+        :type _content_type: str, Optional
+        :param _headers: set to override the headers for a single
+                         request; this effectively ignores the headers
+                         in the spec for a single request.
+        :type _headers: dict, optional
+        :param _host_index: set to override the host_index for a single
+                            request; this effectively ignores the host_index
+                            in the spec for a single request.
+        :type _host_index: int, optional
+        :return: Returns the result object.
+        """ # noqa: E501
+
+        _param = self._get_clusters_clusters_get_serialize(
+            _request_auth=_request_auth,
+            _content_type=_content_type,
+            _headers=_headers,
+            _host_index=_host_index
+        )
+
+        _response_types_map: Dict[str, Optional[str]] = {
+            '200': "ListGetClusterResponse",
+        }
+        response_data = self.api_client.call_api(
+            *_param,
+            _request_timeout=_request_timeout
+        )
+        response_data.read()
+        return self.api_client.response_deserialize(
+            response_data=response_data,
+            response_types_map=_response_types_map,
+        ).data
+
+
+    @validate_call
+    def get_clusters_clusters_get_with_http_info(
+        self,
+        _request_timeout: Union[
+            None,
+            Annotated[StrictFloat, Field(gt=0)],
+            Tuple[
+                Annotated[StrictFloat, Field(gt=0)],
+                Annotated[StrictFloat, Field(gt=0)]
+            ]
+        ] = None,
+        _request_auth: Optional[Dict[StrictStr, Any]] = None,
+        _content_type: Optional[StrictStr] = None,
+        _headers: Optional[Dict[StrictStr, Any]] = None,
+        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
+    ) -> ApiResponse[ListGetClusterResponse]:
+        """Get Clusters
+
+
+        :param _request_timeout: timeout setting for this request. If one
+                                 number provided, it will be total request
+                                 timeout. It can also be a pair (tuple) of
+                                 (connection, read) timeouts.
+        :type _request_timeout: int, tuple(int, int), optional
+        :param _request_auth: set to override the auth_settings for an a single
+                              request; this effectively ignores the
+                              authentication in the spec for a single request.
+        :type _request_auth: dict, optional
+        :param _content_type: force content-type for the request.
+        :type _content_type: str, Optional
+        :param _headers: set to override the headers for a single
+                         request; this effectively ignores the headers
+                         in the spec for a single request.
+        :type _headers: dict, optional
+        :param _host_index: set to override the host_index for a single
+                            request; this effectively ignores the host_index
+                            in the spec for a single request.
+        :type _host_index: int, optional
+        :return: Returns the result object.
+        """ # noqa: E501
+
+        _param = self._get_clusters_clusters_get_serialize(
+            _request_auth=_request_auth,
+            _content_type=_content_type,
+            _headers=_headers,
+            _host_index=_host_index
+        )
+
+        _response_types_map: Dict[str, Optional[str]] = {
+            '200': "ListGetClusterResponse",
+        }
+        response_data = self.api_client.call_api(
+            *_param,
+            _request_timeout=_request_timeout
+        )
+        response_data.read()
+        return self.api_client.response_deserialize(
+            response_data=response_data,
+            response_types_map=_response_types_map,
+        )
+
+
+    @validate_call
+    def get_clusters_clusters_get_without_preload_content(
+        self,
+        _request_timeout: Union[
+            None,
+            Annotated[StrictFloat, Field(gt=0)],
+            Tuple[
+                Annotated[StrictFloat, Field(gt=0)],
+                Annotated[StrictFloat, Field(gt=0)]
+            ]
+        ] = None,
+        _request_auth: Optional[Dict[StrictStr, Any]] = None,
+        _content_type: Optional[StrictStr] = None,
+        _headers: Optional[Dict[StrictStr, Any]] = None,
+        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
+    ) -> RESTResponseType:
+        """Get Clusters
+
+
+        :param _request_timeout: timeout setting for this request. If one
+                                 number provided, it will be total request
+                                 timeout. It can also be a pair (tuple) of
+                                 (connection, read) timeouts.
+        :type _request_timeout: int, tuple(int, int), optional
+        :param _request_auth: set to override the auth_settings for an a single
+                              request; this effectively ignores the
+                              authentication in the spec for a single request.
+        :type _request_auth: dict, optional
+        :param _content_type: force content-type for the request.
+        :type _content_type: str, Optional
+        :param _headers: set to override the headers for a single
+                         request; this effectively ignores the headers
+                         in the spec for a single request.
+        :type _headers: dict, optional
+        :param _host_index: set to override the host_index for a single
+                            request; this effectively ignores the host_index
+                            in the spec for a single request.
+        :type _host_index: int, optional
+        :return: Returns the result object.
+        """ # noqa: E501
+
+        _param = self._get_clusters_clusters_get_serialize(
+            _request_auth=_request_auth,
+            _content_type=_content_type,
+            _headers=_headers,
+            _host_index=_host_index
+        )
+
+        _response_types_map: Dict[str, Optional[str]] = {
+            '200': "ListGetClusterResponse",
+        }
+        response_data = self.api_client.call_api(
+            *_param,
+            _request_timeout=_request_timeout
+        )
+        return response_data.response
+
+
+    def _get_clusters_clusters_get_serialize(
+        self,
+        _request_auth,
+        _content_type,
+        _headers,
+        _host_index,
+    ) -> RequestSerialized:
+
+        _host = None
+
+        _collection_formats: Dict[str, str] = {
+        }
+
+        _path_params: Dict[str, str] = {}
+        _query_params: List[Tuple[str, str]] = []
+        _header_params: Dict[str, Optional[str]] = _headers or {}
+        _form_params: List[Tuple[str, str]] = []
+        _files: Dict[
+            str, Union[str, bytes, List[str], List[bytes], List[Tuple[str, bytes]]]
+        ] = {}
+        _body_params: Optional[bytes] = None
+
+        # process the path parameters
+        # process the query parameters
+        # process the header parameters
+        # process the form parameters
+        # process the body parameter
+
+
+        # set the HTTP header `Accept`
+        if 'Accept' not in _header_params:
+            _header_params['Accept'] = self.api_client.select_header_accept(
+                [
+                    'application/json'
+                ]
+            )
+
+
+        # authentication setting
+        _auth_settings: List[str] = [
+            'HTTPBearer'
+        ]
+
+        return self.api_client.param_serialize(
+            method='GET',
+            resource_path='/clusters',
+            path_params=_path_params,
+            query_params=_query_params,
+            header_params=_header_params,
+            body=_body_params,
+            post_params=_form_params,
+            files=_files,
+            auth_settings=_auth_settings,
+            collection_formats=_collection_formats,
+            _host=_host,
+            _request_auth=_request_auth
+        )
+
+
+
+
+    @validate_call
+    def get_compute_deployment_deployments_compute_deployment_id_get(
+        self,
+        deployment_id: StrictInt,
+        _request_timeout: Union[
+            None,
+            Annotated[StrictFloat, Field(gt=0)],
+            Tuple[
+                Annotated[StrictFloat, Field(gt=0)],
+                Annotated[StrictFloat, Field(gt=0)]
+            ]
+        ] = None,
+        _request_auth: Optional[Dict[StrictStr, Any]] = None,
+        _content_type: Optional[StrictStr] = None,
+        _headers: Optional[Dict[StrictStr, Any]] = None,
+        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
+    ) -> GetComputeDeploymentResponse:
+        """Get Compute Deployment
+
+
+        :param deployment_id: (required)
+        :type deployment_id: int
+        :param _request_timeout: timeout setting for this request. If one
+                                 number provided, it will be total request
+                                 timeout. It can also be a pair (tuple) of
+                                 (connection, read) timeouts.
+        :type _request_timeout: int, tuple(int, int), optional
+        :param _request_auth: set to override the auth_settings for an a single
+                              request; this effectively ignores the
+                              authentication in the spec for a single request.
+        :type _request_auth: dict, optional
+        :param _content_type: force content-type for the request.
+        :type _content_type: str, Optional
+        :param _headers: set to override the headers for a single
+                         request; this effectively ignores the headers
+                         in the spec for a single request.
+        :type _headers: dict, optional
+        :param _host_index: set to override the host_index for a single
+                            request; this effectively ignores the host_index
+                            in the spec for a single request.
+        :type _host_index: int, optional
+        :return: Returns the result object.
+        """ # noqa: E501
+
+        _param = self._get_compute_deployment_deployments_compute_deployment_id_get_serialize(
+            deployment_id=deployment_id,
+            _request_auth=_request_auth,
+            _content_type=_content_type,
+            _headers=_headers,
+            _host_index=_host_index
+        )
+
+        _response_types_map: Dict[str, Optional[str]] = {
+            '200': "GetComputeDeploymentResponse",
+            '422': "HTTPValidationError",
+        }
+        response_data = self.api_client.call_api(
+            *_param,
+            _request_timeout=_request_timeout
+        )
+        response_data.read()
+        return self.api_client.response_deserialize(
+            response_data=response_data,
+            response_types_map=_response_types_map,
+        ).data
+
+
+    @validate_call
+    def get_compute_deployment_deployments_compute_deployment_id_get_with_http_info(
+        self,
+        deployment_id: StrictInt,
+        _request_timeout: Union[
+            None,
+            Annotated[StrictFloat, Field(gt=0)],
+            Tuple[
+                Annotated[StrictFloat, Field(gt=0)],
+                Annotated[StrictFloat, Field(gt=0)]
+            ]
+        ] = None,
+        _request_auth: Optional[Dict[StrictStr, Any]] = None,
+        _content_type: Optional[StrictStr] = None,
+        _headers: Optional[Dict[StrictStr, Any]] = None,
+        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
+    ) -> ApiResponse[GetComputeDeploymentResponse]:
+        """Get Compute Deployment
+
+
+        :param deployment_id: (required)
+        :type deployment_id: int
+        :param _request_timeout: timeout setting for this request. If one
+                                 number provided, it will be total request
+                                 timeout. It can also be a pair (tuple) of
+                                 (connection, read) timeouts.
+        :type _request_timeout: int, tuple(int, int), optional
+        :param _request_auth: set to override the auth_settings for an a single
+                              request; this effectively ignores the
+                              authentication in the spec for a single request.
+        :type _request_auth: dict, optional
+        :param _content_type: force content-type for the request.
+        :type _content_type: str, Optional
+        :param _headers: set to override the headers for a single
+                         request; this effectively ignores the headers
+                         in the spec for a single request.
+        :type _headers: dict, optional
+        :param _host_index: set to override the host_index for a single
+                            request; this effectively ignores the host_index
+                            in the spec for a single request.
+        :type _host_index: int, optional
+        :return: Returns the result object.
+        """ # noqa: E501
+
+        _param = self._get_compute_deployment_deployments_compute_deployment_id_get_serialize(
+            deployment_id=deployment_id,
+            _request_auth=_request_auth,
+            _content_type=_content_type,
+            _headers=_headers,
+            _host_index=_host_index
+        )
+
+        _response_types_map: Dict[str, Optional[str]] = {
+            '200': "GetComputeDeploymentResponse",
+            '422': "HTTPValidationError",
+        }
+        response_data = self.api_client.call_api(
+            *_param,
+            _request_timeout=_request_timeout
+        )
+        response_data.read()
+        return self.api_client.response_deserialize(
+            response_data=response_data,
+            response_types_map=_response_types_map,
+        )
+
+
+    @validate_call
+    def get_compute_deployment_deployments_compute_deployment_id_get_without_preload_content(
+        self,
+        deployment_id: StrictInt,
+        _request_timeout: Union[
+            None,
+            Annotated[StrictFloat, Field(gt=0)],
+            Tuple[
+                Annotated[StrictFloat, Field(gt=0)],
+                Annotated[StrictFloat, Field(gt=0)]
+            ]
+        ] = None,
+        _request_auth: Optional[Dict[StrictStr, Any]] = None,
+        _content_type: Optional[StrictStr] = None,
+        _headers: Optional[Dict[StrictStr, Any]] = None,
+        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
+    ) -> RESTResponseType:
+        """Get Compute Deployment
+
+
+        :param deployment_id: (required)
+        :type deployment_id: int
+        :param _request_timeout: timeout setting for this request. If one
+                                 number provided, it will be total request
+                                 timeout. It can also be a pair (tuple) of
+                                 (connection, read) timeouts.
+        :type _request_timeout: int, tuple(int, int), optional
+        :param _request_auth: set to override the auth_settings for an a single
+                              request; this effectively ignores the
+                              authentication in the spec for a single request.
+        :type _request_auth: dict, optional
+        :param _content_type: force content-type for the request.
+        :type _content_type: str, Optional
+        :param _headers: set to override the headers for a single
+                         request; this effectively ignores the headers
+                         in the spec for a single request.
+        :type _headers: dict, optional
+        :param _host_index: set to override the host_index for a single
+                            request; this effectively ignores the host_index
+                            in the spec for a single request.
+        :type _host_index: int, optional
+        :return: Returns the result object.
+        """ # noqa: E501
+
+        _param = self._get_compute_deployment_deployments_compute_deployment_id_get_serialize(
+            deployment_id=deployment_id,
+            _request_auth=_request_auth,
+            _content_type=_content_type,
+            _headers=_headers,
+            _host_index=_host_index
+        )
+
+        _response_types_map: Dict[str, Optional[str]] = {
+            '200': "GetComputeDeploymentResponse",
+            '422': "HTTPValidationError",
+        }
+        response_data = self.api_client.call_api(
+            *_param,
+            _request_timeout=_request_timeout
+        )
+        return response_data.response
+
+
+    def _get_compute_deployment_deployments_compute_deployment_id_get_serialize(
+        self,
+        deployment_id,
+        _request_auth,
+        _content_type,
+        _headers,
+        _host_index,
+    ) -> RequestSerialized:
+
+        _host = None
+
+        _collection_formats: Dict[str, str] = {
+        }
+
+        _path_params: Dict[str, str] = {}
+        _query_params: List[Tuple[str, str]] = []
+        _header_params: Dict[str, Optional[str]] = _headers or {}
+        _form_params: List[Tuple[str, str]] = []
+        _files: Dict[
+            str, Union[str, bytes, List[str], List[bytes], List[Tuple[str, bytes]]]
+        ] = {}
+        _body_params: Optional[bytes] = None
+
+        # process the path parameters
+        if deployment_id is not None:
+            _path_params['deployment_id'] = deployment_id
+        # process the query parameters
+        # process the header parameters
+        # process the form parameters
+        # process the body parameter
+
+
+        # set the HTTP header `Accept`
+        if 'Accept' not in _header_params:
+            _header_params['Accept'] = self.api_client.select_header_accept(
+                [
+                    'application/json'
+                ]
+            )
+
+
+        # authentication setting
+        _auth_settings: List[str] = [
+            'HTTPBearer'
+        ]
+
+        return self.api_client.param_serialize(
+            method='GET',
+            resource_path='/deployments/compute/{deployment_id}',
+            path_params=_path_params,
+            query_params=_query_params,
+            header_params=_header_params,
+            body=_body_params,
+            post_params=_form_params,
+            files=_files,
+            auth_settings=_auth_settings,
+            collection_formats=_collection_formats,
+            _host=_host,
+            _request_auth=_request_auth
+        )
+
+
+
+
+    @validate_call
+    def get_credits_credits_get(
+        self,
+        _request_timeout: Union[
+            None,
+            Annotated[StrictFloat, Field(gt=0)],
+            Tuple[
+                Annotated[StrictFloat, Field(gt=0)],
+                Annotated[StrictFloat, Field(gt=0)]
+            ]
+        ] = None,
+        _request_auth: Optional[Dict[StrictStr, Any]] = None,
+        _content_type: Optional[StrictStr] = None,
+        _headers: Optional[Dict[StrictStr, Any]] = None,
+        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
+    ) -> CreditsResponse:
+        """Get Credits
+
+
+        :param _request_timeout: timeout setting for this request. If one
+                                 number provided, it will be total request
+                                 timeout. It can also be a pair (tuple) of
+                                 (connection, read) timeouts.
+        :type _request_timeout: int, tuple(int, int), optional
+        :param _request_auth: set to override the auth_settings for an a single
+                              request; this effectively ignores the
+                              authentication in the spec for a single request.
+        :type _request_auth: dict, optional
+        :param _content_type: force content-type for the request.
+        :type _content_type: str, Optional
+        :param _headers: set to override the headers for a single
+                         request; this effectively ignores the headers
+                         in the spec for a single request.
+        :type _headers: dict, optional
+        :param _host_index: set to override the host_index for a single
+                            request; this effectively ignores the host_index
+                            in the spec for a single request.
+        :type _host_index: int, optional
+        :return: Returns the result object.
+        """ # noqa: E501
+
+        _param = self._get_credits_credits_get_serialize(
+            _request_auth=_request_auth,
+            _content_type=_content_type,
+            _headers=_headers,
+            _host_index=_host_index
+        )
+
+        _response_types_map: Dict[str, Optional[str]] = {
+            '200': "CreditsResponse",
         }
         response_data = self.api_client.call_api(
             *_param,
@@ -3487,7 +4604,268 @@ def _get_cserve_recipe_deployments_cserve_recipes_get_serialize(
 
         return self.api_client.param_serialize(
             method='GET',
-            resource_path='/deployments/cserve/recipes',
+            resource_path='/deployments/cserve/recipes',
+            path_params=_path_params,
+            query_params=_query_params,
+            header_params=_header_params,
+            body=_body_params,
+            post_params=_form_params,
+            files=_files,
+            auth_settings=_auth_settings,
+            collection_formats=_collection_formats,
+            _host=_host,
+            _request_auth=_request_auth
+        )
+
+
+
+
+    @validate_call
+    def get_cserve_v2_deployment_deployments_cserve_v2_deployment_id_get(
+        self,
+        deployment_id: StrictInt,
+        _request_timeout: Union[
+            None,
+            Annotated[StrictFloat, Field(gt=0)],
+            Tuple[
+                Annotated[StrictFloat, Field(gt=0)],
+                Annotated[StrictFloat, Field(gt=0)]
+            ]
+        ] = None,
+        _request_auth: Optional[Dict[StrictStr, Any]] = None,
+        _content_type: Optional[StrictStr] = None,
+        _headers: Optional[Dict[StrictStr, Any]] = None,
+        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
+    ) -> GetCServeV2DeploymentResponse:
+        """Get Cserve V2 Deployment
+
+
+        :param deployment_id: (required)
+        :type deployment_id: int
+        :param _request_timeout: timeout setting for this request. If one
+                                 number provided, it will be total request
+                                 timeout. It can also be a pair (tuple) of
+                                 (connection, read) timeouts.
+        :type _request_timeout: int, tuple(int, int), optional
+        :param _request_auth: set to override the auth_settings for an a single
+                              request; this effectively ignores the
+                              authentication in the spec for a single request.
+        :type _request_auth: dict, optional
+        :param _content_type: force content-type for the request.
+        :type _content_type: str, Optional
+        :param _headers: set to override the headers for a single
+                         request; this effectively ignores the headers
+                         in the spec for a single request.
+        :type _headers: dict, optional
+        :param _host_index: set to override the host_index for a single
+                            request; this effectively ignores the host_index
+                            in the spec for a single request.
+        :type _host_index: int, optional
+        :return: Returns the result object.
+        """ # noqa: E501
+
+        _param = self._get_cserve_v2_deployment_deployments_cserve_v2_deployment_id_get_serialize(
+            deployment_id=deployment_id,
+            _request_auth=_request_auth,
+            _content_type=_content_type,
+            _headers=_headers,
+            _host_index=_host_index
+        )
+
+        _response_types_map: Dict[str, Optional[str]] = {
+            '200': "GetCServeV2DeploymentResponse",
+            '422': "HTTPValidationError",
+        }
+        response_data = self.api_client.call_api(
+            *_param,
+            _request_timeout=_request_timeout
+        )
+        response_data.read()
+        return self.api_client.response_deserialize(
+            response_data=response_data,
+            response_types_map=_response_types_map,
+        ).data
+
+
+    @validate_call
+    def get_cserve_v2_deployment_deployments_cserve_v2_deployment_id_get_with_http_info(
+        self,
+        deployment_id: StrictInt,
+        _request_timeout: Union[
+            None,
+            Annotated[StrictFloat, Field(gt=0)],
+            Tuple[
+                Annotated[StrictFloat, Field(gt=0)],
+                Annotated[StrictFloat, Field(gt=0)]
+            ]
+        ] = None,
+        _request_auth: Optional[Dict[StrictStr, Any]] = None,
+        _content_type: Optional[StrictStr] = None,
+        _headers: Optional[Dict[StrictStr, Any]] = None,
+        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
+    ) -> ApiResponse[GetCServeV2DeploymentResponse]:
+        """Get Cserve V2 Deployment
+
+
+        :param deployment_id: (required)
+        :type deployment_id: int
+        :param _request_timeout: timeout setting for this request. If one
+                                 number provided, it will be total request
+                                 timeout. It can also be a pair (tuple) of
+                                 (connection, read) timeouts.
+        :type _request_timeout: int, tuple(int, int), optional
+        :param _request_auth: set to override the auth_settings for an a single
+                              request; this effectively ignores the
+                              authentication in the spec for a single request.
+        :type _request_auth: dict, optional
+        :param _content_type: force content-type for the request.
+        :type _content_type: str, Optional
+        :param _headers: set to override the headers for a single
+                         request; this effectively ignores the headers
+                         in the spec for a single request.
+        :type _headers: dict, optional
+        :param _host_index: set to override the host_index for a single
+                            request; this effectively ignores the host_index
+                            in the spec for a single request.
+        :type _host_index: int, optional
+        :return: Returns the result object.
+        """ # noqa: E501
+
+        _param = self._get_cserve_v2_deployment_deployments_cserve_v2_deployment_id_get_serialize(
+            deployment_id=deployment_id,
+            _request_auth=_request_auth,
+            _content_type=_content_type,
+            _headers=_headers,
+            _host_index=_host_index
+        )
+
+        _response_types_map: Dict[str, Optional[str]] = {
+            '200': "GetCServeV2DeploymentResponse",
+            '422': "HTTPValidationError",
+        }
+        response_data = self.api_client.call_api(
+            *_param,
+            _request_timeout=_request_timeout
+        )
+        response_data.read()
+        return self.api_client.response_deserialize(
+            response_data=response_data,
+            response_types_map=_response_types_map,
+        )
+
+
+    @validate_call
+    def get_cserve_v2_deployment_deployments_cserve_v2_deployment_id_get_without_preload_content(
+        self,
+        deployment_id: StrictInt,
+        _request_timeout: Union[
+            None,
+            Annotated[StrictFloat, Field(gt=0)],
+            Tuple[
+                Annotated[StrictFloat, Field(gt=0)],
+                Annotated[StrictFloat, Field(gt=0)]
+            ]
+        ] = None,
+        _request_auth: Optional[Dict[StrictStr, Any]] = None,
+        _content_type: Optional[StrictStr] = None,
+        _headers: Optional[Dict[StrictStr, Any]] = None,
+        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
+    ) -> RESTResponseType:
+        """Get Cserve V2 Deployment
+
+
+        :param deployment_id: (required)
+        :type deployment_id: int
+        :param _request_timeout: timeout setting for this request. If one
+                                 number provided, it will be total request
+                                 timeout. It can also be a pair (tuple) of
+                                 (connection, read) timeouts.
+        :type _request_timeout: int, tuple(int, int), optional
+        :param _request_auth: set to override the auth_settings for an a single
+                              request; this effectively ignores the
+                              authentication in the spec for a single request.
+        :type _request_auth: dict, optional
+        :param _content_type: force content-type for the request.
+        :type _content_type: str, Optional
+        :param _headers: set to override the headers for a single
+                         request; this effectively ignores the headers
+                         in the spec for a single request.
+        :type _headers: dict, optional
+        :param _host_index: set to override the host_index for a single
+                            request; this effectively ignores the host_index
+                            in the spec for a single request.
+        :type _host_index: int, optional
+        :return: Returns the result object.
+        """ # noqa: E501
+
+        _param = self._get_cserve_v2_deployment_deployments_cserve_v2_deployment_id_get_serialize(
+            deployment_id=deployment_id,
+            _request_auth=_request_auth,
+            _content_type=_content_type,
+            _headers=_headers,
+            _host_index=_host_index
+        )
+
+        _response_types_map: Dict[str, Optional[str]] = {
+            '200': "GetCServeV2DeploymentResponse",
+            '422': "HTTPValidationError",
+        }
+        response_data = self.api_client.call_api(
+            *_param,
+            _request_timeout=_request_timeout
+        )
+        return response_data.response
+
+
+    def _get_cserve_v2_deployment_deployments_cserve_v2_deployment_id_get_serialize(
+        self,
+        deployment_id,
+        _request_auth,
+        _content_type,
+        _headers,
+        _host_index,
+    ) -> RequestSerialized:
+
+        _host = None
+
+        _collection_formats: Dict[str, str] = {
+        }
+
+        _path_params: Dict[str, str] = {}
+        _query_params: List[Tuple[str, str]] = []
+        _header_params: Dict[str, Optional[str]] = _headers or {}
+        _form_params: List[Tuple[str, str]] = []
+        _files: Dict[
+            str, Union[str, bytes, List[str], List[bytes], List[Tuple[str, bytes]]]
+        ] = {}
+        _body_params: Optional[bytes] = None
+
+        # process the path parameters
+        if deployment_id is not None:
+            _path_params['deployment_id'] = deployment_id
+        # process the query parameters
+        # process the header parameters
+        # process the form parameters
+        # process the body parameter
+
+
+        # set the HTTP header `Accept`
+        if 'Accept' not in _header_params:
+            _header_params['Accept'] = self.api_client.select_header_accept(
+                [
+                    'application/json'
+                ]
+            )
+
+
+        # authentication setting
+        _auth_settings: List[str] = [
+            'HTTPBearer'
+        ]
+
+        return self.api_client.param_serialize(
+            method='GET',
+            resource_path='/deployments/cserve_v2/{deployment_id}',
             path_params=_path_params,
             query_params=_query_params,
             header_params=_header_params,
@@ -4393,7 +5771,7 @@ def _get_deployments_deployments_get_serialize(
     @validate_call
     def get_hardware_instances_hardware_instances_get(
         self,
-        cluster_id: StrictInt,
+        cluster_id: Optional[StrictInt] = None,
         _request_timeout: Union[
             None,
             Annotated[StrictFloat, Field(gt=0)],
@@ -4410,7 +5788,7 @@ def get_hardware_instances_hardware_instances_get(
         """Get Hardware Instances
 
 
-        :param cluster_id: (required)
+        :param cluster_id:
         :type cluster_id: int
         :param _request_timeout: timeout setting for this request. If one
                                  number provided, it will be total request
@@ -4460,7 +5838,7 @@ def get_hardware_instances_hardware_instances_get(
     @validate_call
     def get_hardware_instances_hardware_instances_get_with_http_info(
         self,
-        cluster_id: StrictInt,
+        cluster_id: Optional[StrictInt] = None,
         _request_timeout: Union[
             None,
             Annotated[StrictFloat, Field(gt=0)],
@@ -4477,7 +5855,7 @@ def get_hardware_instances_hardware_instances_get_with_http_info(
         """Get Hardware Instances
 
 
-        :param cluster_id: (required)
+        :param cluster_id:
         :type cluster_id: int
         :param _request_timeout: timeout setting for this request. If one
                                  number provided, it will be total request
@@ -4527,7 +5905,7 @@ def get_hardware_instances_hardware_instances_get_with_http_info(
     @validate_call
     def get_hardware_instances_hardware_instances_get_without_preload_content(
         self,
-        cluster_id: StrictInt,
+        cluster_id: Optional[StrictInt] = None,
         _request_timeout: Union[
             None,
             Annotated[StrictFloat, Field(gt=0)],
@@ -4544,7 +5922,7 @@ def get_hardware_instances_hardware_instances_get_without_preload_content(
         """Get Hardware Instances
 
 
-        :param cluster_id: (required)
+        :param cluster_id:
         :type cluster_id: int
         :param _request_timeout: timeout setting for this request. If one
                                  number provided, it will be total request
@@ -5416,10 +6794,271 @@ def _get_prebuilt_images_prebuilt_images_get_serialize(
 
         # process the path parameters
         # process the query parameters
-        if type is not None:
-            
-            _query_params.append(('type', type.value))
-            
+        if type is not None:
+            
+            _query_params.append(('type', type.value))
+            
+        # process the header parameters
+        # process the form parameters
+        # process the body parameter
+
+
+        # set the HTTP header `Accept`
+        if 'Accept' not in _header_params:
+            _header_params['Accept'] = self.api_client.select_header_accept(
+                [
+                    'application/json'
+                ]
+            )
+
+
+        # authentication setting
+        _auth_settings: List[str] = [
+            'HTTPBearer'
+        ]
+
+        return self.api_client.param_serialize(
+            method='GET',
+            resource_path='/prebuilt-images',
+            path_params=_path_params,
+            query_params=_query_params,
+            header_params=_header_params,
+            body=_body_params,
+            post_params=_form_params,
+            files=_files,
+            auth_settings=_auth_settings,
+            collection_formats=_collection_formats,
+            _host=_host,
+            _request_auth=_request_auth
+        )
+
+
+
+
+    @validate_call
+    def get_rag_deployment_deployments_rag_deployment_id_get(
+        self,
+        deployment_id: StrictInt,
+        _request_timeout: Union[
+            None,
+            Annotated[StrictFloat, Field(gt=0)],
+            Tuple[
+                Annotated[StrictFloat, Field(gt=0)],
+                Annotated[StrictFloat, Field(gt=0)]
+            ]
+        ] = None,
+        _request_auth: Optional[Dict[StrictStr, Any]] = None,
+        _content_type: Optional[StrictStr] = None,
+        _headers: Optional[Dict[StrictStr, Any]] = None,
+        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
+    ) -> GetRagDeploymentResponse:
+        """Get Rag Deployment
+
+
+        :param deployment_id: (required)
+        :type deployment_id: int
+        :param _request_timeout: timeout setting for this request. If one
+                                 number provided, it will be total request
+                                 timeout. It can also be a pair (tuple) of
+                                 (connection, read) timeouts.
+        :type _request_timeout: int, tuple(int, int), optional
+        :param _request_auth: set to override the auth_settings for an a single
+                              request; this effectively ignores the
+                              authentication in the spec for a single request.
+        :type _request_auth: dict, optional
+        :param _content_type: force content-type for the request.
+        :type _content_type: str, Optional
+        :param _headers: set to override the headers for a single
+                         request; this effectively ignores the headers
+                         in the spec for a single request.
+        :type _headers: dict, optional
+        :param _host_index: set to override the host_index for a single
+                            request; this effectively ignores the host_index
+                            in the spec for a single request.
+        :type _host_index: int, optional
+        :return: Returns the result object.
+        """ # noqa: E501
+
+        _param = self._get_rag_deployment_deployments_rag_deployment_id_get_serialize(
+            deployment_id=deployment_id,
+            _request_auth=_request_auth,
+            _content_type=_content_type,
+            _headers=_headers,
+            _host_index=_host_index
+        )
+
+        _response_types_map: Dict[str, Optional[str]] = {
+            '200': "GetRagDeploymentResponse",
+            '422': "HTTPValidationError",
+        }
+        response_data = self.api_client.call_api(
+            *_param,
+            _request_timeout=_request_timeout
+        )
+        response_data.read()
+        return self.api_client.response_deserialize(
+            response_data=response_data,
+            response_types_map=_response_types_map,
+        ).data
+
+
+    @validate_call
+    def get_rag_deployment_deployments_rag_deployment_id_get_with_http_info(
+        self,
+        deployment_id: StrictInt,
+        _request_timeout: Union[
+            None,
+            Annotated[StrictFloat, Field(gt=0)],
+            Tuple[
+                Annotated[StrictFloat, Field(gt=0)],
+                Annotated[StrictFloat, Field(gt=0)]
+            ]
+        ] = None,
+        _request_auth: Optional[Dict[StrictStr, Any]] = None,
+        _content_type: Optional[StrictStr] = None,
+        _headers: Optional[Dict[StrictStr, Any]] = None,
+        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
+    ) -> ApiResponse[GetRagDeploymentResponse]:
+        """Get Rag Deployment
+
+
+        :param deployment_id: (required)
+        :type deployment_id: int
+        :param _request_timeout: timeout setting for this request. If one
+                                 number provided, it will be total request
+                                 timeout. It can also be a pair (tuple) of
+                                 (connection, read) timeouts.
+        :type _request_timeout: int, tuple(int, int), optional
+        :param _request_auth: set to override the auth_settings for an a single
+                              request; this effectively ignores the
+                              authentication in the spec for a single request.
+        :type _request_auth: dict, optional
+        :param _content_type: force content-type for the request.
+        :type _content_type: str, Optional
+        :param _headers: set to override the headers for a single
+                         request; this effectively ignores the headers
+                         in the spec for a single request.
+        :type _headers: dict, optional
+        :param _host_index: set to override the host_index for a single
+                            request; this effectively ignores the host_index
+                            in the spec for a single request.
+        :type _host_index: int, optional
+        :return: Returns the result object.
+        """ # noqa: E501
+
+        _param = self._get_rag_deployment_deployments_rag_deployment_id_get_serialize(
+            deployment_id=deployment_id,
+            _request_auth=_request_auth,
+            _content_type=_content_type,
+            _headers=_headers,
+            _host_index=_host_index
+        )
+
+        _response_types_map: Dict[str, Optional[str]] = {
+            '200': "GetRagDeploymentResponse",
+            '422': "HTTPValidationError",
+        }
+        response_data = self.api_client.call_api(
+            *_param,
+            _request_timeout=_request_timeout
+        )
+        response_data.read()
+        return self.api_client.response_deserialize(
+            response_data=response_data,
+            response_types_map=_response_types_map,
+        )
+
+
+    @validate_call
+    def get_rag_deployment_deployments_rag_deployment_id_get_without_preload_content(
+        self,
+        deployment_id: StrictInt,
+        _request_timeout: Union[
+            None,
+            Annotated[StrictFloat, Field(gt=0)],
+            Tuple[
+                Annotated[StrictFloat, Field(gt=0)],
+                Annotated[StrictFloat, Field(gt=0)]
+            ]
+        ] = None,
+        _request_auth: Optional[Dict[StrictStr, Any]] = None,
+        _content_type: Optional[StrictStr] = None,
+        _headers: Optional[Dict[StrictStr, Any]] = None,
+        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
+    ) -> RESTResponseType:
+        """Get Rag Deployment
+
+
+        :param deployment_id: (required)
+        :type deployment_id: int
+        :param _request_timeout: timeout setting for this request. If one
+                                 number provided, it will be total request
+                                 timeout. It can also be a pair (tuple) of
+                                 (connection, read) timeouts.
+        :type _request_timeout: int, tuple(int, int), optional
+        :param _request_auth: set to override the auth_settings for an a single
+                              request; this effectively ignores the
+                              authentication in the spec for a single request.
+        :type _request_auth: dict, optional
+        :param _content_type: force content-type for the request.
+        :type _content_type: str, Optional
+        :param _headers: set to override the headers for a single
+                         request; this effectively ignores the headers
+                         in the spec for a single request.
+        :type _headers: dict, optional
+        :param _host_index: set to override the host_index for a single
+                            request; this effectively ignores the host_index
+                            in the spec for a single request.
+        :type _host_index: int, optional
+        :return: Returns the result object.
+        """ # noqa: E501
+
+        _param = self._get_rag_deployment_deployments_rag_deployment_id_get_serialize(
+            deployment_id=deployment_id,
+            _request_auth=_request_auth,
+            _content_type=_content_type,
+            _headers=_headers,
+            _host_index=_host_index
+        )
+
+        _response_types_map: Dict[str, Optional[str]] = {
+            '200': "GetRagDeploymentResponse",
+            '422': "HTTPValidationError",
+        }
+        response_data = self.api_client.call_api(
+            *_param,
+            _request_timeout=_request_timeout
+        )
+        return response_data.response
+
+
+    def _get_rag_deployment_deployments_rag_deployment_id_get_serialize(
+        self,
+        deployment_id,
+        _request_auth,
+        _content_type,
+        _headers,
+        _host_index,
+    ) -> RequestSerialized:
+
+        _host = None
+
+        _collection_formats: Dict[str, str] = {
+        }
+
+        _path_params: Dict[str, str] = {}
+        _query_params: List[Tuple[str, str]] = []
+        _header_params: Dict[str, Optional[str]] = _headers or {}
+        _form_params: List[Tuple[str, str]] = []
+        _files: Dict[
+            str, Union[str, bytes, List[str], List[bytes], List[Tuple[str, bytes]]]
+        ] = {}
+        _body_params: Optional[bytes] = None
+
+        # process the path parameters
+        if deployment_id is not None:
+            _path_params['deployment_id'] = deployment_id
+        # process the query parameters
         # process the header parameters
         # process the form parameters
         # process the body parameter
@@ -5441,7 +7080,7 @@ def _get_prebuilt_images_prebuilt_images_get_serialize(
 
         return self.api_client.param_serialize(
             method='GET',
-            resource_path='/prebuilt-images',
+            resource_path='/deployments/rag/{deployment_id}',
             path_params=_path_params,
             query_params=_query_params,
             header_params=_header_params,
@@ -6597,3 +8236,280 @@ def _update_deployment_status_deployments_status_deployment_id_put_serialize(
         )
 
 
+
+
+    @validate_call
+    def update_user_vault_item_endpoint_user_vault_put(
+        self,
+        user_vault_item_input: UserVaultItemInput,
+        _request_timeout: Union[
+            None,
+            Annotated[StrictFloat, Field(gt=0)],
+            Tuple[
+                Annotated[StrictFloat, Field(gt=0)],
+                Annotated[StrictFloat, Field(gt=0)]
+            ]
+        ] = None,
+        _request_auth: Optional[Dict[StrictStr, Any]] = None,
+        _content_type: Optional[StrictStr] = None,
+        _headers: Optional[Dict[StrictStr, Any]] = None,
+        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
+    ) -> object:
+        """Update User Vault Item Endpoint
+
+        Update or add multiple items of a specific type for the user.
+
+        :param user_vault_item_input: (required)
+        :type user_vault_item_input: UserVaultItemInput
+        :param _request_timeout: timeout setting for this request. If one
+                                 number provided, it will be total request
+                                 timeout. It can also be a pair (tuple) of
+                                 (connection, read) timeouts.
+        :type _request_timeout: int, tuple(int, int), optional
+        :param _request_auth: set to override the auth_settings for an a single
+                              request; this effectively ignores the
+                              authentication in the spec for a single request.
+        :type _request_auth: dict, optional
+        :param _content_type: force content-type for the request.
+        :type _content_type: str, Optional
+        :param _headers: set to override the headers for a single
+                         request; this effectively ignores the headers
+                         in the spec for a single request.
+        :type _headers: dict, optional
+        :param _host_index: set to override the host_index for a single
+                            request; this effectively ignores the host_index
+                            in the spec for a single request.
+        :type _host_index: int, optional
+        :return: Returns the result object.
+        """ # noqa: E501
+
+        _param = self._update_user_vault_item_endpoint_user_vault_put_serialize(
+            user_vault_item_input=user_vault_item_input,
+            _request_auth=_request_auth,
+            _content_type=_content_type,
+            _headers=_headers,
+            _host_index=_host_index
+        )
+
+        _response_types_map: Dict[str, Optional[str]] = {
+            '200': "object",
+            '422': "HTTPValidationError",
+        }
+        response_data = self.api_client.call_api(
+            *_param,
+            _request_timeout=_request_timeout
+        )
+        response_data.read()
+        return self.api_client.response_deserialize(
+            response_data=response_data,
+            response_types_map=_response_types_map,
+        ).data
+
+
+    @validate_call
+    def update_user_vault_item_endpoint_user_vault_put_with_http_info(
+        self,
+        user_vault_item_input: UserVaultItemInput,
+        _request_timeout: Union[
+            None,
+            Annotated[StrictFloat, Field(gt=0)],
+            Tuple[
+                Annotated[StrictFloat, Field(gt=0)],
+                Annotated[StrictFloat, Field(gt=0)]
+            ]
+        ] = None,
+        _request_auth: Optional[Dict[StrictStr, Any]] = None,
+        _content_type: Optional[StrictStr] = None,
+        _headers: Optional[Dict[StrictStr, Any]] = None,
+        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
+    ) -> ApiResponse[object]:
+        """Update User Vault Item Endpoint
+
+        Update or add multiple items of a specific type for the user.
+
+        :param user_vault_item_input: (required)
+        :type user_vault_item_input: UserVaultItemInput
+        :param _request_timeout: timeout setting for this request. If one
+                                 number provided, it will be total request
+                                 timeout. It can also be a pair (tuple) of
+                                 (connection, read) timeouts.
+        :type _request_timeout: int, tuple(int, int), optional
+        :param _request_auth: set to override the auth_settings for an a single
+                              request; this effectively ignores the
+                              authentication in the spec for a single request.
+        :type _request_auth: dict, optional
+        :param _content_type: force content-type for the request.
+        :type _content_type: str, Optional
+        :param _headers: set to override the headers for a single
+                         request; this effectively ignores the headers
+                         in the spec for a single request.
+        :type _headers: dict, optional
+        :param _host_index: set to override the host_index for a single
+                            request; this effectively ignores the host_index
+                            in the spec for a single request.
+        :type _host_index: int, optional
+        :return: Returns the result object.
+        """ # noqa: E501
+
+        _param = self._update_user_vault_item_endpoint_user_vault_put_serialize(
+            user_vault_item_input=user_vault_item_input,
+            _request_auth=_request_auth,
+            _content_type=_content_type,
+            _headers=_headers,
+            _host_index=_host_index
+        )
+
+        _response_types_map: Dict[str, Optional[str]] = {
+            '200': "object",
+            '422': "HTTPValidationError",
+        }
+        response_data = self.api_client.call_api(
+            *_param,
+            _request_timeout=_request_timeout
+        )
+        response_data.read()
+        return self.api_client.response_deserialize(
+            response_data=response_data,
+            response_types_map=_response_types_map,
+        )
+
+
+    @validate_call
+    def update_user_vault_item_endpoint_user_vault_put_without_preload_content(
+        self,
+        user_vault_item_input: UserVaultItemInput,
+        _request_timeout: Union[
+            None,
+            Annotated[StrictFloat, Field(gt=0)],
+            Tuple[
+                Annotated[StrictFloat, Field(gt=0)],
+                Annotated[StrictFloat, Field(gt=0)]
+            ]
+        ] = None,
+        _request_auth: Optional[Dict[StrictStr, Any]] = None,
+        _content_type: Optional[StrictStr] = None,
+        _headers: Optional[Dict[StrictStr, Any]] = None,
+        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
+    ) -> RESTResponseType:
+        """Update User Vault Item Endpoint
+
+        Update or add multiple items of a specific type for the user.
+
+        :param user_vault_item_input: (required)
+        :type user_vault_item_input: UserVaultItemInput
+        :param _request_timeout: timeout setting for this request. If one
+                                 number provided, it will be total request
+                                 timeout. It can also be a pair (tuple) of
+                                 (connection, read) timeouts.
+        :type _request_timeout: int, tuple(int, int), optional
+        :param _request_auth: set to override the auth_settings for an a single
+                              request; this effectively ignores the
+                              authentication in the spec for a single request.
+        :type _request_auth: dict, optional
+        :param _content_type: force content-type for the request.
+        :type _content_type: str, Optional
+        :param _headers: set to override the headers for a single
+                         request; this effectively ignores the headers
+                         in the spec for a single request.
+        :type _headers: dict, optional
+        :param _host_index: set to override the host_index for a single
+                            request; this effectively ignores the host_index
+                            in the spec for a single request.
+        :type _host_index: int, optional
+        :return: Returns the result object.
+        """ # noqa: E501
+
+        _param = self._update_user_vault_item_endpoint_user_vault_put_serialize(
+            user_vault_item_input=user_vault_item_input,
+            _request_auth=_request_auth,
+            _content_type=_content_type,
+            _headers=_headers,
+            _host_index=_host_index
+        )
+
+        _response_types_map: Dict[str, Optional[str]] = {
+            '200': "object",
+            '422': "HTTPValidationError",
+        }
+        response_data = self.api_client.call_api(
+            *_param,
+            _request_timeout=_request_timeout
+        )
+        return response_data.response
+
+
+    def _update_user_vault_item_endpoint_user_vault_put_serialize(
+        self,
+        user_vault_item_input,
+        _request_auth,
+        _content_type,
+        _headers,
+        _host_index,
+    ) -> RequestSerialized:
+
+        _host = None
+
+        _collection_formats: Dict[str, str] = {
+        }
+
+        _path_params: Dict[str, str] = {}
+        _query_params: List[Tuple[str, str]] = []
+        _header_params: Dict[str, Optional[str]] = _headers or {}
+        _form_params: List[Tuple[str, str]] = []
+        _files: Dict[
+            str, Union[str, bytes, List[str], List[bytes], List[Tuple[str, bytes]]]
+        ] = {}
+        _body_params: Optional[bytes] = None
+
+        # process the path parameters
+        # process the query parameters
+        # process the header parameters
+        # process the form parameters
+        # process the body parameter
+        if user_vault_item_input is not None:
+            _body_params = user_vault_item_input
+
+
+        # set the HTTP header `Accept`
+        if 'Accept' not in _header_params:
+            _header_params['Accept'] = self.api_client.select_header_accept(
+                [
+                    'application/json'
+                ]
+            )
+
+        # set the HTTP header `Content-Type`
+        if _content_type:
+            _header_params['Content-Type'] = _content_type
+        else:
+            _default_content_type = (
+                self.api_client.select_header_content_type(
+                    [
+                        'application/json'
+                    ]
+                )
+            )
+            if _default_content_type is not None:
+                _header_params['Content-Type'] = _default_content_type
+
+        # authentication setting
+        _auth_settings: List[str] = [
+            'HTTPBearer'
+        ]
+
+        return self.api_client.param_serialize(
+            method='PUT',
+            resource_path='/user_vault',
+            path_params=_path_params,
+            query_params=_query_params,
+            header_params=_header_params,
+            body=_body_params,
+            post_params=_form_params,
+            files=_files,
+            auth_settings=_auth_settings,
+            collection_formats=_collection_formats,
+            _host=_host,
+            _request_auth=_request_auth
+        )
+
+
diff --git a/platform_api_python_client/api_client.py b/platform_api_python_client/api_client.py
index 3aff335..abff685 100644
--- a/platform_api_python_client/api_client.py
+++ b/platform_api_python_client/api_client.py
@@ -90,7 +90,7 @@ def __init__(
             self.default_headers[header_name] = header_value
         self.cookie = cookie
         # Set default User-Agent.
-        self.user_agent = 'OpenAPI-Generator/1.0.0/python'
+        self.user_agent = 'OpenAPI-Generator/3.1.6/python'
         self.client_side_validation = configuration.client_side_validation
 
     def __enter__(self):
diff --git a/platform_api_python_client/configuration.py b/platform_api_python_client/configuration.py
index 0e96322..729ec95 100644
--- a/platform_api_python_client/configuration.py
+++ b/platform_api_python_client/configuration.py
@@ -392,7 +392,7 @@ def to_debug_report(self):
                "OS: {env}\n"\
                "Python Version: {pyversion}\n"\
                "Version of the API: 0.1.0\n"\
-               "SDK Package Version: 0.3.0".\
+               "SDK Package Version: 3.1.6".\
                format(env=sys.platform, pyversion=sys.version)
 
     def get_host_settings(self):
diff --git a/platform_api_python_client/models/__init__.py b/platform_api_python_client/models/__init__.py
index 8e2d02c..1345003 100644
--- a/platform_api_python_client/models/__init__.py
+++ b/platform_api_python_client/models/__init__.py
@@ -16,18 +16,23 @@
 # import models into model package
 from platform_api_python_client.models.api_key_request import APIKeyRequest
 from platform_api_python_client.models.api_key_response import APIKeyResponse
-from platform_api_python_client.models.c_serve_recipe_input import CServeRecipeInput
-from platform_api_python_client.models.c_serve_recipe_output import CServeRecipeOutput
+from platform_api_python_client.models.c_serve_recipe import CServeRecipe
 from platform_api_python_client.models.c_serve_recipe_perf import CServeRecipePerf
 from platform_api_python_client.models.c_serve_recipe_response import CServeRecipeResponse
+from platform_api_python_client.models.c_serve_v2_recipe_input import CServeV2RecipeInput
+from platform_api_python_client.models.c_serve_v2_recipe_output import CServeV2RecipeOutput
 from platform_api_python_client.models.create_c_serve_deployment_request import CreateCServeDeploymentRequest
 from platform_api_python_client.models.create_c_serve_deployment_response import CreateCServeDeploymentResponse
+from platform_api_python_client.models.create_c_serve_v2_deployment_request import CreateCServeV2DeploymentRequest
+from platform_api_python_client.models.create_c_serve_v2_deployment_response import CreateCServeV2DeploymentResponse
 from platform_api_python_client.models.create_checkout_request import CreateCheckoutRequest
 from platform_api_python_client.models.create_checkout_response import CreateCheckoutResponse
 from platform_api_python_client.models.create_compute_deployment_request import CreateComputeDeploymentRequest
 from platform_api_python_client.models.create_compute_deployment_response import CreateComputeDeploymentResponse
 from platform_api_python_client.models.create_inference_deployment_request import CreateInferenceDeploymentRequest
 from platform_api_python_client.models.create_inference_deployment_response import CreateInferenceDeploymentResponse
+from platform_api_python_client.models.create_rag_deployment_request import CreateRagDeploymentRequest
+from platform_api_python_client.models.create_rag_deployment_response import CreateRagDeploymentResponse
 from platform_api_python_client.models.credits_response import CreditsResponse
 from platform_api_python_client.models.daily_bill_response import DailyBillResponse
 from platform_api_python_client.models.deployment_status import DeploymentStatus
@@ -36,6 +41,7 @@
 from platform_api_python_client.models.deployment_type import DeploymentType
 from platform_api_python_client.models.deployment_usage_value import DeploymentUsageValue
 from platform_api_python_client.models.get_c_serve_deployment_response import GetCServeDeploymentResponse
+from platform_api_python_client.models.get_c_serve_v2_deployment_response import GetCServeV2DeploymentResponse
 from platform_api_python_client.models.get_cluster_response import GetClusterResponse
 from platform_api_python_client.models.get_compute_deployment_response import GetComputeDeploymentResponse
 from platform_api_python_client.models.get_deployment_log_response import GetDeploymentLogResponse
@@ -43,6 +49,7 @@
 from platform_api_python_client.models.get_deployment_usage_response import GetDeploymentUsageResponse
 from platform_api_python_client.models.get_inference_deployment_response import GetInferenceDeploymentResponse
 from platform_api_python_client.models.get_payments_response import GetPaymentsResponse
+from platform_api_python_client.models.get_rag_deployment_response import GetRagDeploymentResponse
 from platform_api_python_client.models.http_validation_error import HTTPValidationError
 from platform_api_python_client.models.hardware_instance_response import HardwareInstanceResponse
 from platform_api_python_client.models.list_api_key_response import ListAPIKeyResponse
@@ -52,9 +59,13 @@
 from platform_api_python_client.models.list_get_deployment_response import ListGetDeploymentResponse
 from platform_api_python_client.models.list_hardware_instance_response import ListHardwareInstanceResponse
 from platform_api_python_client.models.list_prebuilt_image_response import ListPrebuiltImageResponse
+from platform_api_python_client.models.list_user_vault_items_response import ListUserVaultItemsResponse
 from platform_api_python_client.models.metric import Metric
 from platform_api_python_client.models.prebuilt_image_response import PrebuiltImageResponse
 from platform_api_python_client.models.service_status import ServiceStatus
 from platform_api_python_client.models.user_support_email_request import UserSupportEmailRequest
+from platform_api_python_client.models.user_vault_item_input import UserVaultItemInput
+from platform_api_python_client.models.user_vault_item_output import UserVaultItemOutput
+from platform_api_python_client.models.user_vault_type import UserVaultType
 from platform_api_python_client.models.validation_error import ValidationError
 from platform_api_python_client.models.validation_error_loc_inner import ValidationErrorLocInner
diff --git a/platform_api_python_client/models/c_serve_recipe_input.py b/platform_api_python_client/models/c_serve_recipe.py
similarity index 69%
rename from platform_api_python_client/models/c_serve_recipe_input.py
rename to platform_api_python_client/models/c_serve_recipe.py
index c6bc41d..2c9a019 100644
--- a/platform_api_python_client/models/c_serve_recipe_input.py
+++ b/platform_api_python_client/models/c_serve_recipe.py
@@ -17,13 +17,13 @@
 import re  # noqa: F401
 import json
 
-from pydantic import BaseModel, ConfigDict, Field, StrictBool, StrictInt, StrictStr
+from pydantic import BaseModel, ConfigDict, Field, StrictBool, StrictInt, StrictStr, field_validator
 from typing import Any, ClassVar, Dict, List, Optional, Union
 from typing_extensions import Annotated
 from typing import Optional, Set
 from typing_extensions import Self
 
-class CServeRecipeInput(BaseModel):
+class CServeRecipe(BaseModel):
     """
     Base class for deployment planner
     """ # noqa: E501
@@ -35,8 +35,12 @@ class CServeRecipeInput(BaseModel):
     swap_space: Optional[Annotated[int, Field(strict=True, ge=0)]] = 0
     gpu_mem_util: Optional[Union[Annotated[float, Field(le=1.0, strict=True, ge=0.0)], Annotated[int, Field(le=1, strict=True, ge=0)]]] = 0.95
     max_num_seqs: Optional[StrictInt] = 256
-    use_prefix_caching: Optional[StrictBool] = None
     offloading_num: Optional[StrictInt] = 0
+    use_prefix_caching: Optional[StrictBool] = None
+    use_chunked_prefill: Optional[StrictBool] = None
+    chunked_prefill_size: Optional[StrictInt] = None
+    eager_execution: Optional[StrictBool] = None
+    num_scheduler_steps: Optional[StrictInt] = None
     use_flashinfer: Optional[StrictBool] = False
     max_model_len: Optional[Annotated[int, Field(strict=True, ge=128)]] = None
     dtype: Optional[StrictStr] = 'auto'
@@ -47,7 +51,37 @@ class CServeRecipeInput(BaseModel):
     spec_prompt_lookup_min: Optional[Annotated[int, Field(strict=True, ge=1)]] = None
     spec_prompt_lookup_max: Optional[Annotated[int, Field(strict=True, ge=1)]] = None
     seed: Optional[StrictInt] = 0
-    __properties: ClassVar[List[str]] = ["model", "is_embedding_model", "tensor_parallel_size", "pipeline_parallel_size", "block_size", "swap_space", "gpu_mem_util", "max_num_seqs", "use_prefix_caching", "offloading_num", "use_flashinfer", "max_model_len", "dtype", "tokenizer", "spec_proposer", "spec_draft_model", "spec_tokens", "spec_prompt_lookup_min", "spec_prompt_lookup_max", "seed"]
+    __properties: ClassVar[List[str]] = ["model", "is_embedding_model", "tensor_parallel_size", "pipeline_parallel_size", "block_size", "swap_space", "gpu_mem_util", "max_num_seqs", "offloading_num", "use_prefix_caching", "use_chunked_prefill", "chunked_prefill_size", "eager_execution", "num_scheduler_steps", "use_flashinfer", "max_model_len", "dtype", "tokenizer", "spec_proposer", "spec_draft_model", "spec_tokens", "spec_prompt_lookup_min", "spec_prompt_lookup_max", "seed"]
+
+    @field_validator('block_size')
+    def block_size_validate_enum(cls, value):
+        """Validates the enum"""
+        if value is None:
+            return value
+
+        if value not in set([16, 32]):
+            raise ValueError("must be one of enum values (16, 32)")
+        return value
+
+    @field_validator('dtype')
+    def dtype_validate_enum(cls, value):
+        """Validates the enum"""
+        if value is None:
+            return value
+
+        if value not in set(['auto', 'float16', 'float32', 'bfloat16']):
+            raise ValueError("must be one of enum values ('auto', 'float16', 'float32', 'bfloat16')")
+        return value
+
+    @field_validator('spec_proposer')
+    def spec_proposer_validate_enum(cls, value):
+        """Validates the enum"""
+        if value is None:
+            return value
+
+        if value not in set(['draft', 'prompt_lookup']):
+            raise ValueError("must be one of enum values ('draft', 'prompt_lookup')")
+        return value
 
     model_config = ConfigDict(
         populate_by_name=True,
@@ -67,7 +101,7 @@ def to_json(self) -> str:
 
     @classmethod
     def from_json(cls, json_str: str) -> Optional[Self]:
-        """Create an instance of CServeRecipeInput from a JSON string"""
+        """Create an instance of CServeRecipe from a JSON string"""
         return cls.from_dict(json.loads(json_str))
 
     def to_dict(self) -> Dict[str, Any]:
@@ -93,6 +127,26 @@ def to_dict(self) -> Dict[str, Any]:
         if self.use_prefix_caching is None and "use_prefix_caching" in self.model_fields_set:
             _dict['use_prefix_caching'] = None
 
+        # set to None if use_chunked_prefill (nullable) is None
+        # and model_fields_set contains the field
+        if self.use_chunked_prefill is None and "use_chunked_prefill" in self.model_fields_set:
+            _dict['use_chunked_prefill'] = None
+
+        # set to None if chunked_prefill_size (nullable) is None
+        # and model_fields_set contains the field
+        if self.chunked_prefill_size is None and "chunked_prefill_size" in self.model_fields_set:
+            _dict['chunked_prefill_size'] = None
+
+        # set to None if eager_execution (nullable) is None
+        # and model_fields_set contains the field
+        if self.eager_execution is None and "eager_execution" in self.model_fields_set:
+            _dict['eager_execution'] = None
+
+        # set to None if num_scheduler_steps (nullable) is None
+        # and model_fields_set contains the field
+        if self.num_scheduler_steps is None and "num_scheduler_steps" in self.model_fields_set:
+            _dict['num_scheduler_steps'] = None
+
         # set to None if max_model_len (nullable) is None
         # and model_fields_set contains the field
         if self.max_model_len is None and "max_model_len" in self.model_fields_set:
@@ -132,7 +186,7 @@ def to_dict(self) -> Dict[str, Any]:
 
     @classmethod
     def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
-        """Create an instance of CServeRecipeInput from a dict"""
+        """Create an instance of CServeRecipe from a dict"""
         if obj is None:
             return None
 
@@ -148,8 +202,12 @@ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
             "swap_space": obj.get("swap_space") if obj.get("swap_space") is not None else 0,
             "gpu_mem_util": obj.get("gpu_mem_util") if obj.get("gpu_mem_util") is not None else 0.95,
             "max_num_seqs": obj.get("max_num_seqs") if obj.get("max_num_seqs") is not None else 256,
-            "use_prefix_caching": obj.get("use_prefix_caching"),
             "offloading_num": obj.get("offloading_num") if obj.get("offloading_num") is not None else 0,
+            "use_prefix_caching": obj.get("use_prefix_caching"),
+            "use_chunked_prefill": obj.get("use_chunked_prefill"),
+            "chunked_prefill_size": obj.get("chunked_prefill_size"),
+            "eager_execution": obj.get("eager_execution"),
+            "num_scheduler_steps": obj.get("num_scheduler_steps"),
             "use_flashinfer": obj.get("use_flashinfer") if obj.get("use_flashinfer") is not None else False,
             "max_model_len": obj.get("max_model_len"),
             "dtype": obj.get("dtype") if obj.get("dtype") is not None else 'auto',
diff --git a/platform_api_python_client/models/c_serve_recipe_perf.py b/platform_api_python_client/models/c_serve_recipe_perf.py
index d2fb693..af21d46 100644
--- a/platform_api_python_client/models/c_serve_recipe_perf.py
+++ b/platform_api_python_client/models/c_serve_recipe_perf.py
@@ -20,7 +20,7 @@
 from pydantic import BaseModel, ConfigDict, Field, StrictInt
 from typing import Any, ClassVar, Dict, List
 from typing_extensions import Annotated
-from platform_api_python_client.models.c_serve_recipe_output import CServeRecipeOutput
+from platform_api_python_client.models.c_serve_v2_recipe_output import CServeV2RecipeOutput
 from typing import Optional, Set
 from typing_extensions import Self
 
@@ -28,7 +28,7 @@ class CServeRecipePerf(BaseModel):
     """
     CServeRecipePerf
     """ # noqa: E501
-    recipe: CServeRecipeOutput
+    recipe: CServeV2RecipeOutput
     hardware_instance_id: StrictInt
     output_tp: List[Annotated[List[Any], Field(min_length=2, max_length=2)]]
     mean_ttft: List[Annotated[List[Any], Field(min_length=2, max_length=2)]]
@@ -88,7 +88,7 @@ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
             return cls.model_validate(obj)
 
         _obj = cls.model_validate({
-            "recipe": CServeRecipeOutput.from_dict(obj["recipe"]) if obj.get("recipe") is not None else None,
+            "recipe": CServeV2RecipeOutput.from_dict(obj["recipe"]) if obj.get("recipe") is not None else None,
             "hardware_instance_id": obj.get("hardware_instance_id"),
             "output_tp": obj.get("output_tp"),
             "mean_ttft": obj.get("mean_ttft")
diff --git a/platform_api_python_client/models/c_serve_recipe_output.py b/platform_api_python_client/models/c_serve_v2_recipe.py
similarity index 59%
rename from platform_api_python_client/models/c_serve_recipe_output.py
rename to platform_api_python_client/models/c_serve_v2_recipe.py
index 16cedc2..cc1cc87 100644
--- a/platform_api_python_client/models/c_serve_recipe_output.py
+++ b/platform_api_python_client/models/c_serve_v2_recipe.py
@@ -17,37 +17,44 @@
 import re  # noqa: F401
 import json
 
-from pydantic import BaseModel, ConfigDict, Field, StrictBool, StrictInt, StrictStr
+from pydantic import BaseModel, ConfigDict, StrictBool, StrictFloat, StrictInt, StrictStr
 from typing import Any, ClassVar, Dict, List, Optional, Union
-from typing_extensions import Annotated
 from typing import Optional, Set
 from typing_extensions import Self
 
-class CServeRecipeOutput(BaseModel):
+class CServeV2Recipe(BaseModel):
     """
-    Base class for deployment planner
+    Inputs to start deployment
     """ # noqa: E501
     model: StrictStr
+    max_model_len: Optional[StrictInt]
     is_embedding_model: StrictBool
+    tokenizer: StrictStr
     tensor_parallel_size: StrictInt
     pipeline_parallel_size: StrictInt
+    gpu_mem_util: Union[StrictFloat, StrictInt]
     block_size: StrictInt
-    swap_space: Annotated[int, Field(strict=True, ge=0)]
-    gpu_mem_util: Union[Annotated[float, Field(le=1.0, strict=True, ge=0.0)], Annotated[int, Field(le=1, strict=True, ge=0)]]
+    swap_space: StrictInt
+    quantization: Optional[StrictStr]
+    dtype: StrictStr
+    cache_dtype: StrictStr
     max_num_seqs: StrictInt
-    use_prefix_caching: Optional[StrictBool]
-    offloading_num: StrictInt
+    eager_execution: StrictBool
     use_flashinfer: StrictBool
-    max_model_len: Optional[Annotated[int, Field(strict=True, ge=128)]]
-    dtype: StrictStr
-    tokenizer: Optional[StrictStr]
-    spec_proposer: Optional[StrictStr]
+    offloading_num: Union[StrictFloat, StrictInt]
     spec_draft_model: Optional[StrictStr]
     spec_tokens: Optional[StrictInt]
-    spec_prompt_lookup_min: Optional[Annotated[int, Field(strict=True, ge=1)]]
-    spec_prompt_lookup_max: Optional[Annotated[int, Field(strict=True, ge=1)]]
-    seed: StrictInt
-    __properties: ClassVar[List[str]] = ["model", "is_embedding_model", "tensor_parallel_size", "pipeline_parallel_size", "block_size", "swap_space", "gpu_mem_util", "max_num_seqs", "use_prefix_caching", "offloading_num", "use_flashinfer", "max_model_len", "dtype", "tokenizer", "spec_proposer", "spec_draft_model", "spec_tokens", "spec_prompt_lookup_min", "spec_prompt_lookup_max", "seed"]
+    spec_prompt_lookup_max: Optional[StrictInt]
+    spec_prompt_lookup_min: Optional[StrictInt]
+    use_prefix_caching: StrictBool
+    use_chunked_prefill: StrictBool
+    chunked_prefill_size: Optional[StrictInt]
+    max_seq_len_to_capture: StrictInt
+    distributed_executor_backend: StrictStr
+    spec_max_batch_size: Optional[StrictInt]
+    spec_max_seq_len: Optional[StrictInt]
+    num_scheduler_steps: StrictInt
+    __properties: ClassVar[List[str]] = ["model", "max_model_len", "is_embedding_model", "tokenizer", "tensor_parallel_size", "pipeline_parallel_size", "gpu_mem_util", "block_size", "swap_space", "quantization", "dtype", "cache_dtype", "max_num_seqs", "eager_execution", "use_flashinfer", "offloading_num", "spec_draft_model", "spec_tokens", "spec_prompt_lookup_max", "spec_prompt_lookup_min", "use_prefix_caching", "use_chunked_prefill", "chunked_prefill_size", "max_seq_len_to_capture", "distributed_executor_backend", "spec_max_batch_size", "spec_max_seq_len", "num_scheduler_steps"]
 
     model_config = ConfigDict(
         populate_by_name=True,
@@ -67,7 +74,7 @@ def to_json(self) -> str:
 
     @classmethod
     def from_json(cls, json_str: str) -> Optional[Self]:
-        """Create an instance of CServeRecipeOutput from a JSON string"""
+        """Create an instance of CServeV2Recipe from a JSON string"""
         return cls.from_dict(json.loads(json_str))
 
     def to_dict(self) -> Dict[str, Any]:
@@ -88,25 +95,15 @@ def to_dict(self) -> Dict[str, Any]:
             exclude=excluded_fields,
             exclude_none=True,
         )
-        # set to None if use_prefix_caching (nullable) is None
-        # and model_fields_set contains the field
-        if self.use_prefix_caching is None and "use_prefix_caching" in self.model_fields_set:
-            _dict['use_prefix_caching'] = None
-
         # set to None if max_model_len (nullable) is None
         # and model_fields_set contains the field
         if self.max_model_len is None and "max_model_len" in self.model_fields_set:
             _dict['max_model_len'] = None
 
-        # set to None if tokenizer (nullable) is None
+        # set to None if quantization (nullable) is None
         # and model_fields_set contains the field
-        if self.tokenizer is None and "tokenizer" in self.model_fields_set:
-            _dict['tokenizer'] = None
-
-        # set to None if spec_proposer (nullable) is None
-        # and model_fields_set contains the field
-        if self.spec_proposer is None and "spec_proposer" in self.model_fields_set:
-            _dict['spec_proposer'] = None
+        if self.quantization is None and "quantization" in self.model_fields_set:
+            _dict['quantization'] = None
 
         # set to None if spec_draft_model (nullable) is None
         # and model_fields_set contains the field
@@ -118,21 +115,36 @@ def to_dict(self) -> Dict[str, Any]:
         if self.spec_tokens is None and "spec_tokens" in self.model_fields_set:
             _dict['spec_tokens'] = None
 
+        # set to None if spec_prompt_lookup_max (nullable) is None
+        # and model_fields_set contains the field
+        if self.spec_prompt_lookup_max is None and "spec_prompt_lookup_max" in self.model_fields_set:
+            _dict['spec_prompt_lookup_max'] = None
+
         # set to None if spec_prompt_lookup_min (nullable) is None
         # and model_fields_set contains the field
         if self.spec_prompt_lookup_min is None and "spec_prompt_lookup_min" in self.model_fields_set:
             _dict['spec_prompt_lookup_min'] = None
 
-        # set to None if spec_prompt_lookup_max (nullable) is None
+        # set to None if chunked_prefill_size (nullable) is None
         # and model_fields_set contains the field
-        if self.spec_prompt_lookup_max is None and "spec_prompt_lookup_max" in self.model_fields_set:
-            _dict['spec_prompt_lookup_max'] = None
+        if self.chunked_prefill_size is None and "chunked_prefill_size" in self.model_fields_set:
+            _dict['chunked_prefill_size'] = None
+
+        # set to None if spec_max_batch_size (nullable) is None
+        # and model_fields_set contains the field
+        if self.spec_max_batch_size is None and "spec_max_batch_size" in self.model_fields_set:
+            _dict['spec_max_batch_size'] = None
+
+        # set to None if spec_max_seq_len (nullable) is None
+        # and model_fields_set contains the field
+        if self.spec_max_seq_len is None and "spec_max_seq_len" in self.model_fields_set:
+            _dict['spec_max_seq_len'] = None
 
         return _dict
 
     @classmethod
     def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
-        """Create an instance of CServeRecipeOutput from a dict"""
+        """Create an instance of CServeV2Recipe from a dict"""
         if obj is None:
             return None
 
@@ -141,25 +153,33 @@ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
 
         _obj = cls.model_validate({
             "model": obj.get("model"),
+            "max_model_len": obj.get("max_model_len"),
             "is_embedding_model": obj.get("is_embedding_model") if obj.get("is_embedding_model") is not None else False,
+            "tokenizer": obj.get("tokenizer"),
             "tensor_parallel_size": obj.get("tensor_parallel_size"),
             "pipeline_parallel_size": obj.get("pipeline_parallel_size"),
+            "gpu_mem_util": obj.get("gpu_mem_util") if obj.get("gpu_mem_util") is not None else 0.95,
             "block_size": obj.get("block_size") if obj.get("block_size") is not None else 32,
             "swap_space": obj.get("swap_space") if obj.get("swap_space") is not None else 0,
-            "gpu_mem_util": obj.get("gpu_mem_util") if obj.get("gpu_mem_util") is not None else 0.95,
+            "quantization": obj.get("quantization"),
+            "dtype": obj.get("dtype") if obj.get("dtype") is not None else 'auto',
+            "cache_dtype": obj.get("cache_dtype") if obj.get("cache_dtype") is not None else 'auto',
             "max_num_seqs": obj.get("max_num_seqs") if obj.get("max_num_seqs") is not None else 256,
-            "use_prefix_caching": obj.get("use_prefix_caching"),
-            "offloading_num": obj.get("offloading_num") if obj.get("offloading_num") is not None else 0,
+            "eager_execution": obj.get("eager_execution") if obj.get("eager_execution") is not None else True,
             "use_flashinfer": obj.get("use_flashinfer") if obj.get("use_flashinfer") is not None else False,
-            "max_model_len": obj.get("max_model_len"),
-            "dtype": obj.get("dtype") if obj.get("dtype") is not None else 'auto',
-            "tokenizer": obj.get("tokenizer"),
-            "spec_proposer": obj.get("spec_proposer"),
+            "offloading_num": obj.get("offloading_num") if obj.get("offloading_num") is not None else 0,
             "spec_draft_model": obj.get("spec_draft_model"),
             "spec_tokens": obj.get("spec_tokens"),
-            "spec_prompt_lookup_min": obj.get("spec_prompt_lookup_min"),
             "spec_prompt_lookup_max": obj.get("spec_prompt_lookup_max"),
-            "seed": obj.get("seed") if obj.get("seed") is not None else 0
+            "spec_prompt_lookup_min": obj.get("spec_prompt_lookup_min"),
+            "use_prefix_caching": obj.get("use_prefix_caching") if obj.get("use_prefix_caching") is not None else False,
+            "use_chunked_prefill": obj.get("use_chunked_prefill") if obj.get("use_chunked_prefill") is not None else False,
+            "chunked_prefill_size": obj.get("chunked_prefill_size"),
+            "max_seq_len_to_capture": obj.get("max_seq_len_to_capture") if obj.get("max_seq_len_to_capture") is not None else 1024,
+            "distributed_executor_backend": obj.get("distributed_executor_backend") if obj.get("distributed_executor_backend") is not None else 'ray',
+            "spec_max_batch_size": obj.get("spec_max_batch_size"),
+            "spec_max_seq_len": obj.get("spec_max_seq_len"),
+            "num_scheduler_steps": obj.get("num_scheduler_steps") if obj.get("num_scheduler_steps") is not None else 1
         })
         return _obj
 
diff --git a/platform_api_python_client/models/c_serve_v2_recipe_input.py b/platform_api_python_client/models/c_serve_v2_recipe_input.py
new file mode 100644
index 0000000..0c3a2b5
--- /dev/null
+++ b/platform_api_python_client/models/c_serve_v2_recipe_input.py
@@ -0,0 +1,186 @@
+# coding: utf-8
+
+"""
+    Platform External API
+
+    No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
+
+    The version of the OpenAPI document: 0.1.0
+    Generated by OpenAPI Generator (https://openapi-generator.tech)
+
+    Do not edit the class manually.
+"""  # noqa: E501
+
+
+from __future__ import annotations
+import pprint
+import re  # noqa: F401
+import json
+
+from pydantic import BaseModel, ConfigDict, StrictBool, StrictFloat, StrictInt, StrictStr
+from typing import Any, ClassVar, Dict, List, Optional, Union
+from typing import Optional, Set
+from typing_extensions import Self
+
+class CServeV2RecipeInput(BaseModel):
+    """
+    Inputs to start deployment
+    """ # noqa: E501
+    model: StrictStr
+    max_model_len: Optional[StrictInt] = None
+    is_embedding_model: Optional[StrictBool] = False
+    tokenizer: StrictStr
+    tensor_parallel_size: StrictInt
+    pipeline_parallel_size: StrictInt
+    gpu_mem_util: Optional[Union[StrictFloat, StrictInt]] = 0.95
+    block_size: Optional[StrictInt] = 16
+    swap_space: Optional[StrictInt] = 0
+    quantization: Optional[StrictStr] = None
+    dtype: Optional[StrictStr] = 'auto'
+    cache_dtype: Optional[StrictStr] = 'auto'
+    max_num_seqs: Optional[StrictInt] = 256
+    eager_execution: Optional[StrictBool] = True
+    use_flashinfer: Optional[StrictBool] = False
+    offloading_num: Optional[Union[StrictFloat, StrictInt]] = 0
+    spec_draft_model: Optional[StrictStr] = None
+    spec_tokens: Optional[StrictInt] = None
+    spec_prompt_lookup_max: Optional[StrictInt] = None
+    spec_prompt_lookup_min: Optional[StrictInt] = None
+    use_prefix_caching: Optional[StrictBool] = False
+    use_chunked_prefill: Optional[StrictBool] = False
+    chunked_prefill_size: Optional[StrictInt] = None
+    max_seq_len_to_capture: Optional[StrictInt] = 8192
+    distributed_executor_backend: Optional[StrictStr] = 'mp'
+    spec_max_batch_size: Optional[StrictInt] = None
+    spec_max_seq_len: Optional[StrictInt] = None
+    num_scheduler_steps: Optional[StrictInt] = 1
+    __properties: ClassVar[List[str]] = ["model", "max_model_len", "is_embedding_model", "tokenizer", "tensor_parallel_size", "pipeline_parallel_size", "gpu_mem_util", "block_size", "swap_space", "quantization", "dtype", "cache_dtype", "max_num_seqs", "eager_execution", "use_flashinfer", "offloading_num", "spec_draft_model", "spec_tokens", "spec_prompt_lookup_max", "spec_prompt_lookup_min", "use_prefix_caching", "use_chunked_prefill", "chunked_prefill_size", "max_seq_len_to_capture", "distributed_executor_backend", "spec_max_batch_size", "spec_max_seq_len", "num_scheduler_steps"]
+
+    model_config = ConfigDict(
+        populate_by_name=True,
+        validate_assignment=True,
+        protected_namespaces=(),
+    )
+
+
+    def to_str(self) -> str:
+        """Returns the string representation of the model using alias"""
+        return pprint.pformat(self.model_dump(by_alias=True))
+
+    def to_json(self) -> str:
+        """Returns the JSON representation of the model using alias"""
+        # TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
+        return json.dumps(self.to_dict())
+
+    @classmethod
+    def from_json(cls, json_str: str) -> Optional[Self]:
+        """Create an instance of CServeV2RecipeInput from a JSON string"""
+        return cls.from_dict(json.loads(json_str))
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Return the dictionary representation of the model using alias.
+
+        This has the following differences from calling pydantic's
+        `self.model_dump(by_alias=True)`:
+
+        * `None` is only added to the output dict for nullable fields that
+          were set at model initialization. Other fields with value `None`
+          are ignored.
+        """
+        excluded_fields: Set[str] = set([
+        ])
+
+        _dict = self.model_dump(
+            by_alias=True,
+            exclude=excluded_fields,
+            exclude_none=True,
+        )
+        # set to None if max_model_len (nullable) is None
+        # and model_fields_set contains the field
+        if self.max_model_len is None and "max_model_len" in self.model_fields_set:
+            _dict['max_model_len'] = None
+
+        # set to None if quantization (nullable) is None
+        # and model_fields_set contains the field
+        if self.quantization is None and "quantization" in self.model_fields_set:
+            _dict['quantization'] = None
+
+        # set to None if spec_draft_model (nullable) is None
+        # and model_fields_set contains the field
+        if self.spec_draft_model is None and "spec_draft_model" in self.model_fields_set:
+            _dict['spec_draft_model'] = None
+
+        # set to None if spec_tokens (nullable) is None
+        # and model_fields_set contains the field
+        if self.spec_tokens is None and "spec_tokens" in self.model_fields_set:
+            _dict['spec_tokens'] = None
+
+        # set to None if spec_prompt_lookup_max (nullable) is None
+        # and model_fields_set contains the field
+        if self.spec_prompt_lookup_max is None and "spec_prompt_lookup_max" in self.model_fields_set:
+            _dict['spec_prompt_lookup_max'] = None
+
+        # set to None if spec_prompt_lookup_min (nullable) is None
+        # and model_fields_set contains the field
+        if self.spec_prompt_lookup_min is None and "spec_prompt_lookup_min" in self.model_fields_set:
+            _dict['spec_prompt_lookup_min'] = None
+
+        # set to None if chunked_prefill_size (nullable) is None
+        # and model_fields_set contains the field
+        if self.chunked_prefill_size is None and "chunked_prefill_size" in self.model_fields_set:
+            _dict['chunked_prefill_size'] = None
+
+        # set to None if spec_max_batch_size (nullable) is None
+        # and model_fields_set contains the field
+        if self.spec_max_batch_size is None and "spec_max_batch_size" in self.model_fields_set:
+            _dict['spec_max_batch_size'] = None
+
+        # set to None if spec_max_seq_len (nullable) is None
+        # and model_fields_set contains the field
+        if self.spec_max_seq_len is None and "spec_max_seq_len" in self.model_fields_set:
+            _dict['spec_max_seq_len'] = None
+
+        return _dict
+
+    @classmethod
+    def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
+        """Create an instance of CServeV2RecipeInput from a dict"""
+        if obj is None:
+            return None
+
+        if not isinstance(obj, dict):
+            return cls.model_validate(obj)
+
+        _obj = cls.model_validate({
+            "model": obj.get("model"),
+            "max_model_len": obj.get("max_model_len"),
+            "is_embedding_model": obj.get("is_embedding_model") if obj.get("is_embedding_model") is not None else False,
+            "tokenizer": obj.get("tokenizer"),
+            "tensor_parallel_size": obj.get("tensor_parallel_size"),
+            "pipeline_parallel_size": obj.get("pipeline_parallel_size"),
+            "gpu_mem_util": obj.get("gpu_mem_util") if obj.get("gpu_mem_util") is not None else 0.95,
+            "block_size": obj.get("block_size") if obj.get("block_size") is not None else 16,
+            "swap_space": obj.get("swap_space") if obj.get("swap_space") is not None else 0,
+            "quantization": obj.get("quantization"),
+            "dtype": obj.get("dtype") if obj.get("dtype") is not None else 'auto',
+            "cache_dtype": obj.get("cache_dtype") if obj.get("cache_dtype") is not None else 'auto',
+            "max_num_seqs": obj.get("max_num_seqs") if obj.get("max_num_seqs") is not None else 256,
+            "eager_execution": obj.get("eager_execution") if obj.get("eager_execution") is not None else True,
+            "use_flashinfer": obj.get("use_flashinfer") if obj.get("use_flashinfer") is not None else False,
+            "offloading_num": obj.get("offloading_num") if obj.get("offloading_num") is not None else 0,
+            "spec_draft_model": obj.get("spec_draft_model"),
+            "spec_tokens": obj.get("spec_tokens"),
+            "spec_prompt_lookup_max": obj.get("spec_prompt_lookup_max"),
+            "spec_prompt_lookup_min": obj.get("spec_prompt_lookup_min"),
+            "use_prefix_caching": obj.get("use_prefix_caching") if obj.get("use_prefix_caching") is not None else False,
+            "use_chunked_prefill": obj.get("use_chunked_prefill") if obj.get("use_chunked_prefill") is not None else False,
+            "chunked_prefill_size": obj.get("chunked_prefill_size"),
+            "max_seq_len_to_capture": obj.get("max_seq_len_to_capture") if obj.get("max_seq_len_to_capture") is not None else 8192,
+            "distributed_executor_backend": obj.get("distributed_executor_backend") if obj.get("distributed_executor_backend") is not None else 'mp',
+            "spec_max_batch_size": obj.get("spec_max_batch_size"),
+            "spec_max_seq_len": obj.get("spec_max_seq_len"),
+            "num_scheduler_steps": obj.get("num_scheduler_steps") if obj.get("num_scheduler_steps") is not None else 1
+        })
+        return _obj
+
+
diff --git a/platform_api_python_client/models/c_serve_v2_recipe_output.py b/platform_api_python_client/models/c_serve_v2_recipe_output.py
new file mode 100644
index 0000000..1b001f1
--- /dev/null
+++ b/platform_api_python_client/models/c_serve_v2_recipe_output.py
@@ -0,0 +1,186 @@
+# coding: utf-8
+
+"""
+    Platform External API
+
+    No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
+
+    The version of the OpenAPI document: 0.1.0
+    Generated by OpenAPI Generator (https://openapi-generator.tech)
+
+    Do not edit the class manually.
+"""  # noqa: E501
+
+
+from __future__ import annotations
+import pprint
+import re  # noqa: F401
+import json
+
+from pydantic import BaseModel, ConfigDict, StrictBool, StrictFloat, StrictInt, StrictStr
+from typing import Any, ClassVar, Dict, List, Optional, Union
+from typing import Optional, Set
+from typing_extensions import Self
+
+class CServeV2RecipeOutput(BaseModel):
+    """
+    Inputs to start deployment
+    """ # noqa: E501
+    model: StrictStr
+    max_model_len: Optional[StrictInt]
+    is_embedding_model: StrictBool
+    tokenizer: StrictStr
+    tensor_parallel_size: StrictInt
+    pipeline_parallel_size: StrictInt
+    gpu_mem_util: Union[StrictFloat, StrictInt]
+    block_size: StrictInt
+    swap_space: StrictInt
+    quantization: Optional[StrictStr]
+    dtype: StrictStr
+    cache_dtype: StrictStr
+    max_num_seqs: StrictInt
+    eager_execution: StrictBool
+    use_flashinfer: StrictBool
+    offloading_num: Union[StrictFloat, StrictInt]
+    spec_draft_model: Optional[StrictStr]
+    spec_tokens: Optional[StrictInt]
+    spec_prompt_lookup_max: Optional[StrictInt]
+    spec_prompt_lookup_min: Optional[StrictInt]
+    use_prefix_caching: StrictBool
+    use_chunked_prefill: StrictBool
+    chunked_prefill_size: Optional[StrictInt]
+    max_seq_len_to_capture: StrictInt
+    distributed_executor_backend: StrictStr
+    spec_max_batch_size: Optional[StrictInt]
+    spec_max_seq_len: Optional[StrictInt]
+    num_scheduler_steps: StrictInt
+    __properties: ClassVar[List[str]] = ["model", "max_model_len", "is_embedding_model", "tokenizer", "tensor_parallel_size", "pipeline_parallel_size", "gpu_mem_util", "block_size", "swap_space", "quantization", "dtype", "cache_dtype", "max_num_seqs", "eager_execution", "use_flashinfer", "offloading_num", "spec_draft_model", "spec_tokens", "spec_prompt_lookup_max", "spec_prompt_lookup_min", "use_prefix_caching", "use_chunked_prefill", "chunked_prefill_size", "max_seq_len_to_capture", "distributed_executor_backend", "spec_max_batch_size", "spec_max_seq_len", "num_scheduler_steps"]
+
+    model_config = ConfigDict(
+        populate_by_name=True,
+        validate_assignment=True,
+        protected_namespaces=(),
+    )
+
+
+    def to_str(self) -> str:
+        """Returns the string representation of the model using alias"""
+        return pprint.pformat(self.model_dump(by_alias=True))
+
+    def to_json(self) -> str:
+        """Returns the JSON representation of the model using alias"""
+        # TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
+        return json.dumps(self.to_dict())
+
+    @classmethod
+    def from_json(cls, json_str: str) -> Optional[Self]:
+        """Create an instance of CServeV2RecipeOutput from a JSON string"""
+        return cls.from_dict(json.loads(json_str))
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Return the dictionary representation of the model using alias.
+
+        This has the following differences from calling pydantic's
+        `self.model_dump(by_alias=True)`:
+
+        * `None` is only added to the output dict for nullable fields that
+          were set at model initialization. Other fields with value `None`
+          are ignored.
+        """
+        excluded_fields: Set[str] = set([
+        ])
+
+        _dict = self.model_dump(
+            by_alias=True,
+            exclude=excluded_fields,
+            exclude_none=True,
+        )
+        # set to None if max_model_len (nullable) is None
+        # and model_fields_set contains the field
+        if self.max_model_len is None and "max_model_len" in self.model_fields_set:
+            _dict['max_model_len'] = None
+
+        # set to None if quantization (nullable) is None
+        # and model_fields_set contains the field
+        if self.quantization is None and "quantization" in self.model_fields_set:
+            _dict['quantization'] = None
+
+        # set to None if spec_draft_model (nullable) is None
+        # and model_fields_set contains the field
+        if self.spec_draft_model is None and "spec_draft_model" in self.model_fields_set:
+            _dict['spec_draft_model'] = None
+
+        # set to None if spec_tokens (nullable) is None
+        # and model_fields_set contains the field
+        if self.spec_tokens is None and "spec_tokens" in self.model_fields_set:
+            _dict['spec_tokens'] = None
+
+        # set to None if spec_prompt_lookup_max (nullable) is None
+        # and model_fields_set contains the field
+        if self.spec_prompt_lookup_max is None and "spec_prompt_lookup_max" in self.model_fields_set:
+            _dict['spec_prompt_lookup_max'] = None
+
+        # set to None if spec_prompt_lookup_min (nullable) is None
+        # and model_fields_set contains the field
+        if self.spec_prompt_lookup_min is None and "spec_prompt_lookup_min" in self.model_fields_set:
+            _dict['spec_prompt_lookup_min'] = None
+
+        # set to None if chunked_prefill_size (nullable) is None
+        # and model_fields_set contains the field
+        if self.chunked_prefill_size is None and "chunked_prefill_size" in self.model_fields_set:
+            _dict['chunked_prefill_size'] = None
+
+        # set to None if spec_max_batch_size (nullable) is None
+        # and model_fields_set contains the field
+        if self.spec_max_batch_size is None and "spec_max_batch_size" in self.model_fields_set:
+            _dict['spec_max_batch_size'] = None
+
+        # set to None if spec_max_seq_len (nullable) is None
+        # and model_fields_set contains the field
+        if self.spec_max_seq_len is None and "spec_max_seq_len" in self.model_fields_set:
+            _dict['spec_max_seq_len'] = None
+
+        return _dict
+
+    @classmethod
+    def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
+        """Create an instance of CServeV2RecipeOutput from a dict"""
+        if obj is None:
+            return None
+
+        if not isinstance(obj, dict):
+            return cls.model_validate(obj)
+
+        _obj = cls.model_validate({
+            "model": obj.get("model"),
+            "max_model_len": obj.get("max_model_len"),
+            "is_embedding_model": obj.get("is_embedding_model") if obj.get("is_embedding_model") is not None else False,
+            "tokenizer": obj.get("tokenizer"),
+            "tensor_parallel_size": obj.get("tensor_parallel_size"),
+            "pipeline_parallel_size": obj.get("pipeline_parallel_size"),
+            "gpu_mem_util": obj.get("gpu_mem_util") if obj.get("gpu_mem_util") is not None else 0.95,
+            "block_size": obj.get("block_size") if obj.get("block_size") is not None else 16,
+            "swap_space": obj.get("swap_space") if obj.get("swap_space") is not None else 0,
+            "quantization": obj.get("quantization"),
+            "dtype": obj.get("dtype") if obj.get("dtype") is not None else 'auto',
+            "cache_dtype": obj.get("cache_dtype") if obj.get("cache_dtype") is not None else 'auto',
+            "max_num_seqs": obj.get("max_num_seqs") if obj.get("max_num_seqs") is not None else 256,
+            "eager_execution": obj.get("eager_execution") if obj.get("eager_execution") is not None else True,
+            "use_flashinfer": obj.get("use_flashinfer") if obj.get("use_flashinfer") is not None else False,
+            "offloading_num": obj.get("offloading_num") if obj.get("offloading_num") is not None else 0,
+            "spec_draft_model": obj.get("spec_draft_model"),
+            "spec_tokens": obj.get("spec_tokens"),
+            "spec_prompt_lookup_max": obj.get("spec_prompt_lookup_max"),
+            "spec_prompt_lookup_min": obj.get("spec_prompt_lookup_min"),
+            "use_prefix_caching": obj.get("use_prefix_caching") if obj.get("use_prefix_caching") is not None else False,
+            "use_chunked_prefill": obj.get("use_chunked_prefill") if obj.get("use_chunked_prefill") is not None else False,
+            "chunked_prefill_size": obj.get("chunked_prefill_size"),
+            "max_seq_len_to_capture": obj.get("max_seq_len_to_capture") if obj.get("max_seq_len_to_capture") is not None else 8192,
+            "distributed_executor_backend": obj.get("distributed_executor_backend") if obj.get("distributed_executor_backend") is not None else 'mp',
+            "spec_max_batch_size": obj.get("spec_max_batch_size"),
+            "spec_max_seq_len": obj.get("spec_max_seq_len"),
+            "num_scheduler_steps": obj.get("num_scheduler_steps") if obj.get("num_scheduler_steps") is not None else 1
+        })
+        return _obj
+
+
diff --git a/platform_api_python_client/models/create_c_serve_deployment_request.py b/platform_api_python_client/models/create_c_serve_deployment_request.py
index 7668bf9..597e41c 100644
--- a/platform_api_python_client/models/create_c_serve_deployment_request.py
+++ b/platform_api_python_client/models/create_c_serve_deployment_request.py
@@ -20,7 +20,7 @@
 from pydantic import BaseModel, ConfigDict, Field, StrictInt, StrictStr
 from typing import Any, ClassVar, Dict, List, Optional
 from typing_extensions import Annotated
-from platform_api_python_client.models.c_serve_recipe_input import CServeRecipeInput
+from platform_api_python_client.models.c_serve_recipe import CServeRecipe
 from typing import Optional, Set
 from typing_extensions import Self
 
@@ -31,8 +31,8 @@ class CreateCServeDeploymentRequest(BaseModel):
     name: Annotated[str, Field(strict=True, max_length=12)]
     cluster_id: StrictInt
     hardware_instance_id: StrictInt
-    recipe: CServeRecipeInput
-    hf_token: StrictStr
+    recipe: CServeRecipe
+    hf_token: Optional[StrictStr] = None
     endpoint_certificate_authority: Optional[StrictStr] = None
     min_scale: StrictInt
     max_scale: StrictInt
@@ -82,6 +82,11 @@ def to_dict(self) -> Dict[str, Any]:
         # override the default output from pydantic by calling `to_dict()` of recipe
         if self.recipe:
             _dict['recipe'] = self.recipe.to_dict()
+        # set to None if hf_token (nullable) is None
+        # and model_fields_set contains the field
+        if self.hf_token is None and "hf_token" in self.model_fields_set:
+            _dict['hf_token'] = None
+
         # set to None if endpoint_certificate_authority (nullable) is None
         # and model_fields_set contains the field
         if self.endpoint_certificate_authority is None and "endpoint_certificate_authority" in self.model_fields_set:
@@ -107,7 +112,7 @@ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
             "name": obj.get("name"),
             "cluster_id": obj.get("cluster_id"),
             "hardware_instance_id": obj.get("hardware_instance_id"),
-            "recipe": CServeRecipeInput.from_dict(obj["recipe"]) if obj.get("recipe") is not None else None,
+            "recipe": CServeRecipe.from_dict(obj["recipe"]) if obj.get("recipe") is not None else None,
             "hf_token": obj.get("hf_token"),
             "endpoint_certificate_authority": obj.get("endpoint_certificate_authority"),
             "min_scale": obj.get("min_scale"),
diff --git a/platform_api_python_client/models/create_c_serve_v2_deployment_request.py b/platform_api_python_client/models/create_c_serve_v2_deployment_request.py
new file mode 100644
index 0000000..11a914a
--- /dev/null
+++ b/platform_api_python_client/models/create_c_serve_v2_deployment_request.py
@@ -0,0 +1,125 @@
+# coding: utf-8
+
+"""
+    Platform External API
+
+    No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
+
+    The version of the OpenAPI document: 0.1.0
+    Generated by OpenAPI Generator (https://openapi-generator.tech)
+
+    Do not edit the class manually.
+"""  # noqa: E501
+
+
+from __future__ import annotations
+import pprint
+import re  # noqa: F401
+import json
+
+from pydantic import BaseModel, ConfigDict, Field, StrictInt, StrictStr
+from typing import Any, ClassVar, Dict, List, Optional
+from typing_extensions import Annotated
+from platform_api_python_client.models.c_serve_v2_recipe_input import CServeV2RecipeInput
+from typing import Optional, Set
+from typing_extensions import Self
+
+class CreateCServeV2DeploymentRequest(BaseModel):
+    """
+    CreateCServeV2DeploymentRequest
+    """ # noqa: E501
+    name: Annotated[str, Field(strict=True, max_length=12)]
+    cluster_id: StrictInt
+    hardware_instance_id: StrictInt
+    recipe: CServeV2RecipeInput
+    hf_token: Optional[StrictStr] = None
+    endpoint_certificate_authority: Optional[StrictStr] = None
+    min_scale: StrictInt
+    max_scale: StrictInt
+    concurrency: Optional[StrictInt] = None
+    env_vars: Optional[Dict[str, StrictStr]] = None
+    __properties: ClassVar[List[str]] = ["name", "cluster_id", "hardware_instance_id", "recipe", "hf_token", "endpoint_certificate_authority", "min_scale", "max_scale", "concurrency", "env_vars"]
+
+    model_config = ConfigDict(
+        populate_by_name=True,
+        validate_assignment=True,
+        protected_namespaces=(),
+    )
+
+
+    def to_str(self) -> str:
+        """Returns the string representation of the model using alias"""
+        return pprint.pformat(self.model_dump(by_alias=True))
+
+    def to_json(self) -> str:
+        """Returns the JSON representation of the model using alias"""
+        # TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
+        return json.dumps(self.to_dict())
+
+    @classmethod
+    def from_json(cls, json_str: str) -> Optional[Self]:
+        """Create an instance of CreateCServeV2DeploymentRequest from a JSON string"""
+        return cls.from_dict(json.loads(json_str))
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Return the dictionary representation of the model using alias.
+
+        This has the following differences from calling pydantic's
+        `self.model_dump(by_alias=True)`:
+
+        * `None` is only added to the output dict for nullable fields that
+          were set at model initialization. Other fields with value `None`
+          are ignored.
+        """
+        excluded_fields: Set[str] = set([
+        ])
+
+        _dict = self.model_dump(
+            by_alias=True,
+            exclude=excluded_fields,
+            exclude_none=True,
+        )
+        # override the default output from pydantic by calling `to_dict()` of recipe
+        if self.recipe:
+            _dict['recipe'] = self.recipe.to_dict()
+        # set to None if hf_token (nullable) is None
+        # and model_fields_set contains the field
+        if self.hf_token is None and "hf_token" in self.model_fields_set:
+            _dict['hf_token'] = None
+
+        # set to None if endpoint_certificate_authority (nullable) is None
+        # and model_fields_set contains the field
+        if self.endpoint_certificate_authority is None and "endpoint_certificate_authority" in self.model_fields_set:
+            _dict['endpoint_certificate_authority'] = None
+
+        # set to None if concurrency (nullable) is None
+        # and model_fields_set contains the field
+        if self.concurrency is None and "concurrency" in self.model_fields_set:
+            _dict['concurrency'] = None
+
+        return _dict
+
+    @classmethod
+    def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
+        """Create an instance of CreateCServeV2DeploymentRequest from a dict"""
+        if obj is None:
+            return None
+
+        if not isinstance(obj, dict):
+            return cls.model_validate(obj)
+
+        _obj = cls.model_validate({
+            "name": obj.get("name"),
+            "cluster_id": obj.get("cluster_id"),
+            "hardware_instance_id": obj.get("hardware_instance_id"),
+            "recipe": CServeV2RecipeInput.from_dict(obj["recipe"]) if obj.get("recipe") is not None else None,
+            "hf_token": obj.get("hf_token"),
+            "endpoint_certificate_authority": obj.get("endpoint_certificate_authority"),
+            "min_scale": obj.get("min_scale"),
+            "max_scale": obj.get("max_scale"),
+            "concurrency": obj.get("concurrency"),
+            "env_vars": obj.get("env_vars")
+        })
+        return _obj
+
+
diff --git a/platform_api_python_client/models/create_c_serve_v2_deployment_response.py b/platform_api_python_client/models/create_c_serve_v2_deployment_response.py
new file mode 100644
index 0000000..46b471b
--- /dev/null
+++ b/platform_api_python_client/models/create_c_serve_v2_deployment_response.py
@@ -0,0 +1,92 @@
+# coding: utf-8
+
+"""
+    Platform External API
+
+    No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
+
+    The version of the OpenAPI document: 0.1.0
+    Generated by OpenAPI Generator (https://openapi-generator.tech)
+
+    Do not edit the class manually.
+"""  # noqa: E501
+
+
+from __future__ import annotations
+import pprint
+import re  # noqa: F401
+import json
+
+from datetime import datetime
+from pydantic import BaseModel, ConfigDict, StrictInt, StrictStr
+from typing import Any, ClassVar, Dict, List
+from typing import Optional, Set
+from typing_extensions import Self
+
+class CreateCServeV2DeploymentResponse(BaseModel):
+    """
+    CreateCServeV2DeploymentResponse
+    """ # noqa: E501
+    id: StrictInt
+    created_at: datetime
+    endpoint_url: StrictStr
+    __properties: ClassVar[List[str]] = ["id", "created_at", "endpoint_url"]
+
+    model_config = ConfigDict(
+        populate_by_name=True,
+        validate_assignment=True,
+        protected_namespaces=(),
+    )
+
+
+    def to_str(self) -> str:
+        """Returns the string representation of the model using alias"""
+        return pprint.pformat(self.model_dump(by_alias=True))
+
+    def to_json(self) -> str:
+        """Returns the JSON representation of the model using alias"""
+        # TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
+        return json.dumps(self.to_dict())
+
+    @classmethod
+    def from_json(cls, json_str: str) -> Optional[Self]:
+        """Create an instance of CreateCServeV2DeploymentResponse from a JSON string"""
+        return cls.from_dict(json.loads(json_str))
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Return the dictionary representation of the model using alias.
+
+        This has the following differences from calling pydantic's
+        `self.model_dump(by_alias=True)`:
+
+        * `None` is only added to the output dict for nullable fields that
+          were set at model initialization. Other fields with value `None`
+          are ignored.
+        """
+        excluded_fields: Set[str] = set([
+        ])
+
+        _dict = self.model_dump(
+            by_alias=True,
+            exclude=excluded_fields,
+            exclude_none=True,
+        )
+        return _dict
+
+    @classmethod
+    def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
+        """Create an instance of CreateCServeV2DeploymentResponse from a dict"""
+        if obj is None:
+            return None
+
+        if not isinstance(obj, dict):
+            return cls.model_validate(obj)
+
+        _obj = cls.model_validate({
+            "id": obj.get("id"),
+            "created_at": obj.get("created_at"),
+            "endpoint_url": obj.get("endpoint_url")
+        })
+        return _obj
+
+
diff --git a/platform_api_python_client/models/create_compute_deployment_request.py b/platform_api_python_client/models/create_compute_deployment_request.py
index f177d01..8216f04 100644
--- a/platform_api_python_client/models/create_compute_deployment_request.py
+++ b/platform_api_python_client/models/create_compute_deployment_request.py
@@ -17,7 +17,7 @@
 import re  # noqa: F401
 import json
 
-from pydantic import BaseModel, ConfigDict, Field, StrictInt, StrictStr
+from pydantic import BaseModel, ConfigDict, Field, StrictBool, StrictInt, StrictStr
 from typing import Any, ClassVar, Dict, List, Optional
 from typing_extensions import Annotated
 from typing import Optional, Set
@@ -31,9 +31,10 @@ class CreateComputeDeploymentRequest(BaseModel):
     cluster_id: StrictInt
     hardware_instance_id: StrictInt
     image_url: StrictStr
+    enable_jupyter: Optional[StrictBool] = False
     ssh_public_key: Optional[StrictStr] = None
     ssh_password: Optional[StrictStr] = None
-    __properties: ClassVar[List[str]] = ["name", "cluster_id", "hardware_instance_id", "image_url", "ssh_public_key", "ssh_password"]
+    __properties: ClassVar[List[str]] = ["name", "cluster_id", "hardware_instance_id", "image_url", "enable_jupyter", "ssh_public_key", "ssh_password"]
 
     model_config = ConfigDict(
         populate_by_name=True,
@@ -100,6 +101,7 @@ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
             "cluster_id": obj.get("cluster_id"),
             "hardware_instance_id": obj.get("hardware_instance_id"),
             "image_url": obj.get("image_url"),
+            "enable_jupyter": obj.get("enable_jupyter") if obj.get("enable_jupyter") is not None else False,
             "ssh_public_key": obj.get("ssh_public_key"),
             "ssh_password": obj.get("ssh_password")
         })
diff --git a/platform_api_python_client/models/create_compute_deployment_response.py b/platform_api_python_client/models/create_compute_deployment_response.py
index 2174943..9609798 100644
--- a/platform_api_python_client/models/create_compute_deployment_response.py
+++ b/platform_api_python_client/models/create_compute_deployment_response.py
@@ -19,7 +19,7 @@
 
 from datetime import datetime
 from pydantic import BaseModel, ConfigDict, StrictInt, StrictStr
-from typing import Any, ClassVar, Dict, List
+from typing import Any, ClassVar, Dict, List, Optional
 from typing import Optional, Set
 from typing_extensions import Self
 
@@ -31,7 +31,8 @@ class CreateComputeDeploymentResponse(BaseModel):
     created_at: datetime
     endpoint_url: StrictStr
     port: StrictInt
-    __properties: ClassVar[List[str]] = ["id", "created_at", "endpoint_url", "port"]
+    jupyter_token: Optional[StrictStr]
+    __properties: ClassVar[List[str]] = ["id", "created_at", "endpoint_url", "port", "jupyter_token"]
 
     model_config = ConfigDict(
         populate_by_name=True,
@@ -72,6 +73,11 @@ def to_dict(self) -> Dict[str, Any]:
             exclude=excluded_fields,
             exclude_none=True,
         )
+        # set to None if jupyter_token (nullable) is None
+        # and model_fields_set contains the field
+        if self.jupyter_token is None and "jupyter_token" in self.model_fields_set:
+            _dict['jupyter_token'] = None
+
         return _dict
 
     @classmethod
@@ -87,7 +93,8 @@ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
             "id": obj.get("id"),
             "created_at": obj.get("created_at"),
             "endpoint_url": obj.get("endpoint_url"),
-            "port": obj.get("port")
+            "port": obj.get("port"),
+            "jupyter_token": obj.get("jupyter_token")
         })
         return _obj
 
diff --git a/platform_api_python_client/models/create_rag_deployment_request.py b/platform_api_python_client/models/create_rag_deployment_request.py
new file mode 100644
index 0000000..112c4e2
--- /dev/null
+++ b/platform_api_python_client/models/create_rag_deployment_request.py
@@ -0,0 +1,129 @@
+# coding: utf-8
+
+"""
+    Platform External API
+
+    No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
+
+    The version of the OpenAPI document: 0.1.0
+    Generated by OpenAPI Generator (https://openapi-generator.tech)
+
+    Do not edit the class manually.
+"""  # noqa: E501
+
+
+from __future__ import annotations
+import pprint
+import re  # noqa: F401
+import json
+
+from pydantic import BaseModel, ConfigDict, Field, StrictInt, StrictStr
+from typing import Any, ClassVar, Dict, List, Optional
+from typing_extensions import Annotated
+from platform_api_python_client.models.c_serve_v2_recipe_input import CServeV2RecipeInput
+from typing import Optional, Set
+from typing_extensions import Self
+
+class CreateRagDeploymentRequest(BaseModel):
+    """
+    CreateRagDeploymentRequest
+    """ # noqa: E501
+    name: Annotated[str, Field(strict=True, max_length=12)]
+    cluster_id: StrictInt
+    hardware_instance_id: StrictInt
+    recipe: CServeV2RecipeInput
+    hf_token: Optional[StrictStr] = None
+    llm_model: StrictStr
+    centml_api_key: StrictStr
+    min_scale: Optional[StrictInt] = 1
+    max_scale: Optional[StrictInt] = 1
+    endpoint_certificate_authority: Optional[StrictStr] = None
+    concurrency: Optional[StrictInt] = None
+    env_vars: Optional[Dict[str, StrictStr]] = None
+    __properties: ClassVar[List[str]] = ["name", "cluster_id", "hardware_instance_id", "recipe", "hf_token", "llm_model", "centml_api_key", "min_scale", "max_scale", "endpoint_certificate_authority", "concurrency", "env_vars"]
+
+    model_config = ConfigDict(
+        populate_by_name=True,
+        validate_assignment=True,
+        protected_namespaces=(),
+    )
+
+
+    def to_str(self) -> str:
+        """Returns the string representation of the model using alias"""
+        return pprint.pformat(self.model_dump(by_alias=True))
+
+    def to_json(self) -> str:
+        """Returns the JSON representation of the model using alias"""
+        # TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
+        return json.dumps(self.to_dict())
+
+    @classmethod
+    def from_json(cls, json_str: str) -> Optional[Self]:
+        """Create an instance of CreateRagDeploymentRequest from a JSON string"""
+        return cls.from_dict(json.loads(json_str))
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Return the dictionary representation of the model using alias.
+
+        This has the following differences from calling pydantic's
+        `self.model_dump(by_alias=True)`:
+
+        * `None` is only added to the output dict for nullable fields that
+          were set at model initialization. Other fields with value `None`
+          are ignored.
+        """
+        excluded_fields: Set[str] = set([
+        ])
+
+        _dict = self.model_dump(
+            by_alias=True,
+            exclude=excluded_fields,
+            exclude_none=True,
+        )
+        # override the default output from pydantic by calling `to_dict()` of recipe
+        if self.recipe:
+            _dict['recipe'] = self.recipe.to_dict()
+        # set to None if hf_token (nullable) is None
+        # and model_fields_set contains the field
+        if self.hf_token is None and "hf_token" in self.model_fields_set:
+            _dict['hf_token'] = None
+
+        # set to None if endpoint_certificate_authority (nullable) is None
+        # and model_fields_set contains the field
+        if self.endpoint_certificate_authority is None and "endpoint_certificate_authority" in self.model_fields_set:
+            _dict['endpoint_certificate_authority'] = None
+
+        # set to None if concurrency (nullable) is None
+        # and model_fields_set contains the field
+        if self.concurrency is None and "concurrency" in self.model_fields_set:
+            _dict['concurrency'] = None
+
+        return _dict
+
+    @classmethod
+    def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
+        """Create an instance of CreateRagDeploymentRequest from a dict"""
+        if obj is None:
+            return None
+
+        if not isinstance(obj, dict):
+            return cls.model_validate(obj)
+
+        _obj = cls.model_validate({
+            "name": obj.get("name"),
+            "cluster_id": obj.get("cluster_id"),
+            "hardware_instance_id": obj.get("hardware_instance_id"),
+            "recipe": CServeV2RecipeInput.from_dict(obj["recipe"]) if obj.get("recipe") is not None else None,
+            "hf_token": obj.get("hf_token"),
+            "llm_model": obj.get("llm_model"),
+            "centml_api_key": obj.get("centml_api_key"),
+            "min_scale": obj.get("min_scale") if obj.get("min_scale") is not None else 1,
+            "max_scale": obj.get("max_scale") if obj.get("max_scale") is not None else 1,
+            "endpoint_certificate_authority": obj.get("endpoint_certificate_authority"),
+            "concurrency": obj.get("concurrency"),
+            "env_vars": obj.get("env_vars")
+        })
+        return _obj
+
+
diff --git a/platform_api_python_client/models/create_rag_deployment_response.py b/platform_api_python_client/models/create_rag_deployment_response.py
new file mode 100644
index 0000000..6272944
--- /dev/null
+++ b/platform_api_python_client/models/create_rag_deployment_response.py
@@ -0,0 +1,92 @@
+# coding: utf-8
+
+"""
+    Platform External API
+
+    No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
+
+    The version of the OpenAPI document: 0.1.0
+    Generated by OpenAPI Generator (https://openapi-generator.tech)
+
+    Do not edit the class manually.
+"""  # noqa: E501
+
+
+from __future__ import annotations
+import pprint
+import re  # noqa: F401
+import json
+
+from datetime import datetime
+from pydantic import BaseModel, ConfigDict, StrictInt, StrictStr
+from typing import Any, ClassVar, Dict, List
+from typing import Optional, Set
+from typing_extensions import Self
+
+class CreateRagDeploymentResponse(BaseModel):
+    """
+    CreateRagDeploymentResponse
+    """ # noqa: E501
+    id: StrictInt
+    created_at: datetime
+    endpoint_url: StrictStr
+    __properties: ClassVar[List[str]] = ["id", "created_at", "endpoint_url"]
+
+    model_config = ConfigDict(
+        populate_by_name=True,
+        validate_assignment=True,
+        protected_namespaces=(),
+    )
+
+
+    def to_str(self) -> str:
+        """Returns the string representation of the model using alias"""
+        return pprint.pformat(self.model_dump(by_alias=True))
+
+    def to_json(self) -> str:
+        """Returns the JSON representation of the model using alias"""
+        # TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
+        return json.dumps(self.to_dict())
+
+    @classmethod
+    def from_json(cls, json_str: str) -> Optional[Self]:
+        """Create an instance of CreateRagDeploymentResponse from a JSON string"""
+        return cls.from_dict(json.loads(json_str))
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Return the dictionary representation of the model using alias.
+
+        This has the following differences from calling pydantic's
+        `self.model_dump(by_alias=True)`:
+
+        * `None` is only added to the output dict for nullable fields that
+          were set at model initialization. Other fields with value `None`
+          are ignored.
+        """
+        excluded_fields: Set[str] = set([
+        ])
+
+        _dict = self.model_dump(
+            by_alias=True,
+            exclude=excluded_fields,
+            exclude_none=True,
+        )
+        return _dict
+
+    @classmethod
+    def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
+        """Create an instance of CreateRagDeploymentResponse from a dict"""
+        if obj is None:
+            return None
+
+        if not isinstance(obj, dict):
+            return cls.model_validate(obj)
+
+        _obj = cls.model_validate({
+            "id": obj.get("id"),
+            "created_at": obj.get("created_at"),
+            "endpoint_url": obj.get("endpoint_url")
+        })
+        return _obj
+
+
diff --git a/platform_api_python_client/models/deployment_type.py b/platform_api_python_client/models/deployment_type.py
index 4753e95..40ba35f 100644
--- a/platform_api_python_client/models/deployment_type.py
+++ b/platform_api_python_client/models/deployment_type.py
@@ -33,7 +33,9 @@ class DeploymentType(str, Enum):
     INFERENCE_V2 = 'inference_v2'
     COMPUTE_V2 = 'compute_v2'
     CSERVE = 'cserve'
+    CSERVE_V2 = 'cserve_v2'
     DEPLOYMENT = 'deployment'
+    RAG = 'rag'
 
     @classmethod
     def from_json(cls, json_str: str) -> Self:
diff --git a/platform_api_python_client/models/get_c_serve_deployment_response.py b/platform_api_python_client/models/get_c_serve_deployment_response.py
index 4a451e3..37549de 100644
--- a/platform_api_python_client/models/get_c_serve_deployment_response.py
+++ b/platform_api_python_client/models/get_c_serve_deployment_response.py
@@ -18,7 +18,7 @@
 import json
 
 from datetime import datetime
-from pydantic import BaseModel, ConfigDict, Field, StrictBool, StrictInt, StrictStr
+from pydantic import BaseModel, ConfigDict, Field, StrictBool, StrictInt, StrictStr, field_validator
 from typing import Any, ClassVar, Dict, List, Optional, Union
 from typing_extensions import Annotated
 from platform_api_python_client.models.deployment_status import DeploymentStatus
@@ -38,8 +38,12 @@ class GetCServeDeploymentResponse(BaseModel):
     swap_space: Annotated[int, Field(strict=True, ge=0)]
     gpu_mem_util: Union[Annotated[float, Field(le=1.0, strict=True, ge=0.0)], Annotated[int, Field(le=1, strict=True, ge=0)]]
     max_num_seqs: StrictInt
-    use_prefix_caching: Optional[StrictBool]
     offloading_num: StrictInt
+    use_prefix_caching: Optional[StrictBool]
+    use_chunked_prefill: Optional[StrictBool]
+    chunked_prefill_size: Optional[StrictInt]
+    eager_execution: Optional[StrictBool]
+    num_scheduler_steps: Optional[StrictInt]
     use_flashinfer: StrictBool
     max_model_len: Optional[Annotated[int, Field(strict=True, ge=128)]]
     dtype: StrictStr
@@ -64,7 +68,31 @@ class GetCServeDeploymentResponse(BaseModel):
     endpoint_certificate_authority: Optional[StrictStr]
     concurrency: Optional[StrictInt]
     env_vars: Dict[str, StrictStr]
-    __properties: ClassVar[List[str]] = ["model", "is_embedding_model", "tensor_parallel_size", "pipeline_parallel_size", "block_size", "swap_space", "gpu_mem_util", "max_num_seqs", "use_prefix_caching", "offloading_num", "use_flashinfer", "max_model_len", "dtype", "tokenizer", "spec_proposer", "spec_draft_model", "spec_tokens", "spec_prompt_lookup_min", "spec_prompt_lookup_max", "seed", "cluster_id", "id", "name", "endpoint_url", "image_url", "type", "status", "created_at", "hardware_instance_id", "min_scale", "max_scale", "endpoint_certificate_authority", "concurrency", "env_vars"]
+    __properties: ClassVar[List[str]] = ["model", "is_embedding_model", "tensor_parallel_size", "pipeline_parallel_size", "block_size", "swap_space", "gpu_mem_util", "max_num_seqs", "offloading_num", "use_prefix_caching", "use_chunked_prefill", "chunked_prefill_size", "eager_execution", "num_scheduler_steps", "use_flashinfer", "max_model_len", "dtype", "tokenizer", "spec_proposer", "spec_draft_model", "spec_tokens", "spec_prompt_lookup_min", "spec_prompt_lookup_max", "seed", "cluster_id", "id", "name", "endpoint_url", "image_url", "type", "status", "created_at", "hardware_instance_id", "min_scale", "max_scale", "endpoint_certificate_authority", "concurrency", "env_vars"]
+
+    @field_validator('block_size')
+    def block_size_validate_enum(cls, value):
+        """Validates the enum"""
+        if value not in set([16, 32]):
+            raise ValueError("must be one of enum values (16, 32)")
+        return value
+
+    @field_validator('dtype')
+    def dtype_validate_enum(cls, value):
+        """Validates the enum"""
+        if value not in set(['auto', 'float16', 'float32', 'bfloat16']):
+            raise ValueError("must be one of enum values ('auto', 'float16', 'float32', 'bfloat16')")
+        return value
+
+    @field_validator('spec_proposer')
+    def spec_proposer_validate_enum(cls, value):
+        """Validates the enum"""
+        if value is None:
+            return value
+
+        if value not in set(['draft', 'prompt_lookup']):
+            raise ValueError("must be one of enum values ('draft', 'prompt_lookup')")
+        return value
 
     model_config = ConfigDict(
         populate_by_name=True,
@@ -110,6 +138,26 @@ def to_dict(self) -> Dict[str, Any]:
         if self.use_prefix_caching is None and "use_prefix_caching" in self.model_fields_set:
             _dict['use_prefix_caching'] = None
 
+        # set to None if use_chunked_prefill (nullable) is None
+        # and model_fields_set contains the field
+        if self.use_chunked_prefill is None and "use_chunked_prefill" in self.model_fields_set:
+            _dict['use_chunked_prefill'] = None
+
+        # set to None if chunked_prefill_size (nullable) is None
+        # and model_fields_set contains the field
+        if self.chunked_prefill_size is None and "chunked_prefill_size" in self.model_fields_set:
+            _dict['chunked_prefill_size'] = None
+
+        # set to None if eager_execution (nullable) is None
+        # and model_fields_set contains the field
+        if self.eager_execution is None and "eager_execution" in self.model_fields_set:
+            _dict['eager_execution'] = None
+
+        # set to None if num_scheduler_steps (nullable) is None
+        # and model_fields_set contains the field
+        if self.num_scheduler_steps is None and "num_scheduler_steps" in self.model_fields_set:
+            _dict['num_scheduler_steps'] = None
+
         # set to None if max_model_len (nullable) is None
         # and model_fields_set contains the field
         if self.max_model_len is None and "max_model_len" in self.model_fields_set:
@@ -180,8 +228,12 @@ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
             "swap_space": obj.get("swap_space") if obj.get("swap_space") is not None else 0,
             "gpu_mem_util": obj.get("gpu_mem_util") if obj.get("gpu_mem_util") is not None else 0.95,
             "max_num_seqs": obj.get("max_num_seqs") if obj.get("max_num_seqs") is not None else 256,
-            "use_prefix_caching": obj.get("use_prefix_caching"),
             "offloading_num": obj.get("offloading_num") if obj.get("offloading_num") is not None else 0,
+            "use_prefix_caching": obj.get("use_prefix_caching"),
+            "use_chunked_prefill": obj.get("use_chunked_prefill"),
+            "chunked_prefill_size": obj.get("chunked_prefill_size"),
+            "eager_execution": obj.get("eager_execution"),
+            "num_scheduler_steps": obj.get("num_scheduler_steps"),
             "use_flashinfer": obj.get("use_flashinfer") if obj.get("use_flashinfer") is not None else False,
             "max_model_len": obj.get("max_model_len"),
             "dtype": obj.get("dtype") if obj.get("dtype") is not None else 'auto',
diff --git a/platform_api_python_client/models/get_c_serve_v2_deployment_response.py b/platform_api_python_client/models/get_c_serve_v2_deployment_response.py
new file mode 100644
index 0000000..7219d2b
--- /dev/null
+++ b/platform_api_python_client/models/get_c_serve_v2_deployment_response.py
@@ -0,0 +1,137 @@
+# coding: utf-8
+
+"""
+    Platform External API
+
+    No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
+
+    The version of the OpenAPI document: 0.1.0
+    Generated by OpenAPI Generator (https://openapi-generator.tech)
+
+    Do not edit the class manually.
+"""  # noqa: E501
+
+
+from __future__ import annotations
+import pprint
+import re  # noqa: F401
+import json
+
+from datetime import datetime
+from pydantic import BaseModel, ConfigDict, StrictInt, StrictStr
+from typing import Any, ClassVar, Dict, List, Optional
+from platform_api_python_client.models.c_serve_v2_recipe_output import CServeV2RecipeOutput
+from platform_api_python_client.models.deployment_status import DeploymentStatus
+from platform_api_python_client.models.deployment_type import DeploymentType
+from typing import Optional, Set
+from typing_extensions import Self
+
+class GetCServeV2DeploymentResponse(BaseModel):
+    """
+    GetCServeV2DeploymentResponse
+    """ # noqa: E501
+    cluster_id: StrictInt
+    id: StrictInt
+    name: StrictStr
+    endpoint_url: StrictStr
+    image_url: Optional[StrictStr]
+    type: DeploymentType
+    status: DeploymentStatus
+    created_at: datetime
+    hardware_instance_id: StrictInt
+    recipe: CServeV2RecipeOutput
+    min_scale: StrictInt
+    max_scale: StrictInt
+    endpoint_certificate_authority: Optional[StrictStr]
+    concurrency: Optional[StrictInt]
+    env_vars: Dict[str, StrictStr]
+    __properties: ClassVar[List[str]] = ["cluster_id", "id", "name", "endpoint_url", "image_url", "type", "status", "created_at", "hardware_instance_id", "recipe", "min_scale", "max_scale", "endpoint_certificate_authority", "concurrency", "env_vars"]
+
+    model_config = ConfigDict(
+        populate_by_name=True,
+        validate_assignment=True,
+        protected_namespaces=(),
+    )
+
+
+    def to_str(self) -> str:
+        """Returns the string representation of the model using alias"""
+        return pprint.pformat(self.model_dump(by_alias=True))
+
+    def to_json(self) -> str:
+        """Returns the JSON representation of the model using alias"""
+        # TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
+        return json.dumps(self.to_dict())
+
+    @classmethod
+    def from_json(cls, json_str: str) -> Optional[Self]:
+        """Create an instance of GetCServeV2DeploymentResponse from a JSON string"""
+        return cls.from_dict(json.loads(json_str))
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Return the dictionary representation of the model using alias.
+
+        This has the following differences from calling pydantic's
+        `self.model_dump(by_alias=True)`:
+
+        * `None` is only added to the output dict for nullable fields that
+          were set at model initialization. Other fields with value `None`
+          are ignored.
+        """
+        excluded_fields: Set[str] = set([
+        ])
+
+        _dict = self.model_dump(
+            by_alias=True,
+            exclude=excluded_fields,
+            exclude_none=True,
+        )
+        # override the default output from pydantic by calling `to_dict()` of recipe
+        if self.recipe:
+            _dict['recipe'] = self.recipe.to_dict()
+        # set to None if image_url (nullable) is None
+        # and model_fields_set contains the field
+        if self.image_url is None and "image_url" in self.model_fields_set:
+            _dict['image_url'] = None
+
+        # set to None if endpoint_certificate_authority (nullable) is None
+        # and model_fields_set contains the field
+        if self.endpoint_certificate_authority is None and "endpoint_certificate_authority" in self.model_fields_set:
+            _dict['endpoint_certificate_authority'] = None
+
+        # set to None if concurrency (nullable) is None
+        # and model_fields_set contains the field
+        if self.concurrency is None and "concurrency" in self.model_fields_set:
+            _dict['concurrency'] = None
+
+        return _dict
+
+    @classmethod
+    def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
+        """Create an instance of GetCServeV2DeploymentResponse from a dict"""
+        if obj is None:
+            return None
+
+        if not isinstance(obj, dict):
+            return cls.model_validate(obj)
+
+        _obj = cls.model_validate({
+            "cluster_id": obj.get("cluster_id"),
+            "id": obj.get("id"),
+            "name": obj.get("name"),
+            "endpoint_url": obj.get("endpoint_url"),
+            "image_url": obj.get("image_url"),
+            "type": obj.get("type"),
+            "status": obj.get("status"),
+            "created_at": obj.get("created_at"),
+            "hardware_instance_id": obj.get("hardware_instance_id"),
+            "recipe": CServeV2RecipeOutput.from_dict(obj["recipe"]) if obj.get("recipe") is not None else None,
+            "min_scale": obj.get("min_scale"),
+            "max_scale": obj.get("max_scale"),
+            "endpoint_certificate_authority": obj.get("endpoint_certificate_authority"),
+            "concurrency": obj.get("concurrency"),
+            "env_vars": obj.get("env_vars")
+        })
+        return _obj
+
+
diff --git a/platform_api_python_client/models/get_cluster_response.py b/platform_api_python_client/models/get_cluster_response.py
index 778c6f2..cea676e 100644
--- a/platform_api_python_client/models/get_cluster_response.py
+++ b/platform_api_python_client/models/get_cluster_response.py
@@ -18,7 +18,7 @@
 import json
 
 from pydantic import BaseModel, ConfigDict, StrictInt, StrictStr
-from typing import Any, ClassVar, Dict, List
+from typing import Any, ClassVar, Dict, List, Optional
 from typing import Optional, Set
 from typing_extensions import Self
 
@@ -28,7 +28,8 @@ class GetClusterResponse(BaseModel):
     """ # noqa: E501
     id: StrictInt
     display_name: StrictStr
-    __properties: ClassVar[List[str]] = ["id", "display_name"]
+    region: Optional[StrictStr]
+    __properties: ClassVar[List[str]] = ["id", "display_name", "region"]
 
     model_config = ConfigDict(
         populate_by_name=True,
@@ -69,6 +70,11 @@ def to_dict(self) -> Dict[str, Any]:
             exclude=excluded_fields,
             exclude_none=True,
         )
+        # set to None if region (nullable) is None
+        # and model_fields_set contains the field
+        if self.region is None and "region" in self.model_fields_set:
+            _dict['region'] = None
+
         return _dict
 
     @classmethod
@@ -82,7 +88,8 @@ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
 
         _obj = cls.model_validate({
             "id": obj.get("id"),
-            "display_name": obj.get("display_name")
+            "display_name": obj.get("display_name"),
+            "region": obj.get("region")
         })
         return _obj
 
diff --git a/platform_api_python_client/models/get_rag_deployment_response.py b/platform_api_python_client/models/get_rag_deployment_response.py
new file mode 100644
index 0000000..63573de
--- /dev/null
+++ b/platform_api_python_client/models/get_rag_deployment_response.py
@@ -0,0 +1,141 @@
+# coding: utf-8
+
+"""
+    Platform External API
+
+    No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
+
+    The version of the OpenAPI document: 0.1.0
+    Generated by OpenAPI Generator (https://openapi-generator.tech)
+
+    Do not edit the class manually.
+"""  # noqa: E501
+
+
+from __future__ import annotations
+import pprint
+import re  # noqa: F401
+import json
+
+from datetime import datetime
+from pydantic import BaseModel, ConfigDict, StrictInt, StrictStr
+from typing import Any, ClassVar, Dict, List, Optional
+from platform_api_python_client.models.c_serve_v2_recipe_output import CServeV2RecipeOutput
+from platform_api_python_client.models.deployment_status import DeploymentStatus
+from platform_api_python_client.models.deployment_type import DeploymentType
+from typing import Optional, Set
+from typing_extensions import Self
+
+class GetRagDeploymentResponse(BaseModel):
+    """
+    GetRagDeploymentResponse
+    """ # noqa: E501
+    cluster_id: StrictInt
+    id: StrictInt
+    name: StrictStr
+    endpoint_url: StrictStr
+    image_url: Optional[StrictStr]
+    type: DeploymentType
+    status: DeploymentStatus
+    created_at: datetime
+    hardware_instance_id: StrictInt
+    recipe: CServeV2RecipeOutput
+    llm_model: StrictStr
+    centml_api_key: StrictStr
+    min_scale: StrictInt
+    max_scale: StrictInt
+    endpoint_certificate_authority: Optional[StrictStr]
+    concurrency: Optional[StrictInt]
+    env_vars: Dict[str, StrictStr]
+    __properties: ClassVar[List[str]] = ["cluster_id", "id", "name", "endpoint_url", "image_url", "type", "status", "created_at", "hardware_instance_id", "recipe", "llm_model", "centml_api_key", "min_scale", "max_scale", "endpoint_certificate_authority", "concurrency", "env_vars"]
+
+    model_config = ConfigDict(
+        populate_by_name=True,
+        validate_assignment=True,
+        protected_namespaces=(),
+    )
+
+
+    def to_str(self) -> str:
+        """Returns the string representation of the model using alias"""
+        return pprint.pformat(self.model_dump(by_alias=True))
+
+    def to_json(self) -> str:
+        """Returns the JSON representation of the model using alias"""
+        # TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
+        return json.dumps(self.to_dict())
+
+    @classmethod
+    def from_json(cls, json_str: str) -> Optional[Self]:
+        """Create an instance of GetRagDeploymentResponse from a JSON string"""
+        return cls.from_dict(json.loads(json_str))
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Return the dictionary representation of the model using alias.
+
+        This has the following differences from calling pydantic's
+        `self.model_dump(by_alias=True)`:
+
+        * `None` is only added to the output dict for nullable fields that
+          were set at model initialization. Other fields with value `None`
+          are ignored.
+        """
+        excluded_fields: Set[str] = set([
+        ])
+
+        _dict = self.model_dump(
+            by_alias=True,
+            exclude=excluded_fields,
+            exclude_none=True,
+        )
+        # override the default output from pydantic by calling `to_dict()` of recipe
+        if self.recipe:
+            _dict['recipe'] = self.recipe.to_dict()
+        # set to None if image_url (nullable) is None
+        # and model_fields_set contains the field
+        if self.image_url is None and "image_url" in self.model_fields_set:
+            _dict['image_url'] = None
+
+        # set to None if endpoint_certificate_authority (nullable) is None
+        # and model_fields_set contains the field
+        if self.endpoint_certificate_authority is None and "endpoint_certificate_authority" in self.model_fields_set:
+            _dict['endpoint_certificate_authority'] = None
+
+        # set to None if concurrency (nullable) is None
+        # and model_fields_set contains the field
+        if self.concurrency is None and "concurrency" in self.model_fields_set:
+            _dict['concurrency'] = None
+
+        return _dict
+
+    @classmethod
+    def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
+        """Create an instance of GetRagDeploymentResponse from a dict"""
+        if obj is None:
+            return None
+
+        if not isinstance(obj, dict):
+            return cls.model_validate(obj)
+
+        _obj = cls.model_validate({
+            "cluster_id": obj.get("cluster_id"),
+            "id": obj.get("id"),
+            "name": obj.get("name"),
+            "endpoint_url": obj.get("endpoint_url"),
+            "image_url": obj.get("image_url"),
+            "type": obj.get("type"),
+            "status": obj.get("status"),
+            "created_at": obj.get("created_at"),
+            "hardware_instance_id": obj.get("hardware_instance_id"),
+            "recipe": CServeV2RecipeOutput.from_dict(obj["recipe"]) if obj.get("recipe") is not None else None,
+            "llm_model": obj.get("llm_model"),
+            "centml_api_key": obj.get("centml_api_key"),
+            "min_scale": obj.get("min_scale") if obj.get("min_scale") is not None else 1,
+            "max_scale": obj.get("max_scale") if obj.get("max_scale") is not None else 1,
+            "endpoint_certificate_authority": obj.get("endpoint_certificate_authority"),
+            "concurrency": obj.get("concurrency"),
+            "env_vars": obj.get("env_vars")
+        })
+        return _obj
+
+
diff --git a/platform_api_python_client/models/hardware_instance_response.py b/platform_api_python_client/models/hardware_instance_response.py
index c94465e..5a05658 100644
--- a/platform_api_python_client/models/hardware_instance_response.py
+++ b/platform_api_python_client/models/hardware_instance_response.py
@@ -18,7 +18,7 @@
 import json
 
 from pydantic import BaseModel, ConfigDict, StrictInt, StrictStr
-from typing import Any, ClassVar, Dict, List
+from typing import Any, ClassVar, Dict, List, Optional
 from typing import Optional, Set
 from typing_extensions import Self
 
@@ -33,7 +33,10 @@ class HardwareInstanceResponse(BaseModel):
     cpu: StrictInt
     memory: StrictInt
     cost_per_hr: StrictInt
-    __properties: ClassVar[List[str]] = ["id", "name", "gpu_type", "num_gpu", "cpu", "memory", "cost_per_hr"]
+    cluster_id: StrictInt
+    provider: Optional[StrictStr]
+    num_accelerators: Optional[StrictInt]
+    __properties: ClassVar[List[str]] = ["id", "name", "gpu_type", "num_gpu", "cpu", "memory", "cost_per_hr", "cluster_id", "provider", "num_accelerators"]
 
     model_config = ConfigDict(
         populate_by_name=True,
@@ -74,6 +77,16 @@ def to_dict(self) -> Dict[str, Any]:
             exclude=excluded_fields,
             exclude_none=True,
         )
+        # set to None if provider (nullable) is None
+        # and model_fields_set contains the field
+        if self.provider is None and "provider" in self.model_fields_set:
+            _dict['provider'] = None
+
+        # set to None if num_accelerators (nullable) is None
+        # and model_fields_set contains the field
+        if self.num_accelerators is None and "num_accelerators" in self.model_fields_set:
+            _dict['num_accelerators'] = None
+
         return _dict
 
     @classmethod
@@ -92,7 +105,10 @@ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
             "num_gpu": obj.get("num_gpu"),
             "cpu": obj.get("cpu"),
             "memory": obj.get("memory"),
-            "cost_per_hr": obj.get("cost_per_hr")
+            "cost_per_hr": obj.get("cost_per_hr"),
+            "cluster_id": obj.get("cluster_id"),
+            "provider": obj.get("provider"),
+            "num_accelerators": obj.get("num_accelerators")
         })
         return _obj
 
diff --git a/platform_api_python_client/models/list_user_vault_items_response.py b/platform_api_python_client/models/list_user_vault_items_response.py
new file mode 100644
index 0000000..ca4dbcc
--- /dev/null
+++ b/platform_api_python_client/models/list_user_vault_items_response.py
@@ -0,0 +1,95 @@
+# coding: utf-8
+
+"""
+    Platform External API
+
+    No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
+
+    The version of the OpenAPI document: 0.1.0
+    Generated by OpenAPI Generator (https://openapi-generator.tech)
+
+    Do not edit the class manually.
+"""  # noqa: E501
+
+
+from __future__ import annotations
+import pprint
+import re  # noqa: F401
+import json
+
+from pydantic import BaseModel, ConfigDict
+from typing import Any, ClassVar, Dict, List
+from platform_api_python_client.models.user_vault_item_output import UserVaultItemOutput
+from typing import Optional, Set
+from typing_extensions import Self
+
+class ListUserVaultItemsResponse(BaseModel):
+    """
+    ListUserVaultItemsResponse
+    """ # noqa: E501
+    results: List[UserVaultItemOutput]
+    __properties: ClassVar[List[str]] = ["results"]
+
+    model_config = ConfigDict(
+        populate_by_name=True,
+        validate_assignment=True,
+        protected_namespaces=(),
+    )
+
+
+    def to_str(self) -> str:
+        """Returns the string representation of the model using alias"""
+        return pprint.pformat(self.model_dump(by_alias=True))
+
+    def to_json(self) -> str:
+        """Returns the JSON representation of the model using alias"""
+        # TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
+        return json.dumps(self.to_dict())
+
+    @classmethod
+    def from_json(cls, json_str: str) -> Optional[Self]:
+        """Create an instance of ListUserVaultItemsResponse from a JSON string"""
+        return cls.from_dict(json.loads(json_str))
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Return the dictionary representation of the model using alias.
+
+        This has the following differences from calling pydantic's
+        `self.model_dump(by_alias=True)`:
+
+        * `None` is only added to the output dict for nullable fields that
+          were set at model initialization. Other fields with value `None`
+          are ignored.
+        """
+        excluded_fields: Set[str] = set([
+        ])
+
+        _dict = self.model_dump(
+            by_alias=True,
+            exclude=excluded_fields,
+            exclude_none=True,
+        )
+        # override the default output from pydantic by calling `to_dict()` of each item in results (list)
+        _items = []
+        if self.results:
+            for _item_results in self.results:
+                if _item_results:
+                    _items.append(_item_results.to_dict())
+            _dict['results'] = _items
+        return _dict
+
+    @classmethod
+    def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
+        """Create an instance of ListUserVaultItemsResponse from a dict"""
+        if obj is None:
+            return None
+
+        if not isinstance(obj, dict):
+            return cls.model_validate(obj)
+
+        _obj = cls.model_validate({
+            "results": [UserVaultItemOutput.from_dict(_item) for _item in obj["results"]] if obj.get("results") is not None else None
+        })
+        return _obj
+
+
diff --git a/platform_api_python_client/models/user_support_email_request.py b/platform_api_python_client/models/user_support_email_request.py
index 3cefa80..bf34253 100644
--- a/platform_api_python_client/models/user_support_email_request.py
+++ b/platform_api_python_client/models/user_support_email_request.py
@@ -17,7 +17,7 @@
 import re  # noqa: F401
 import json
 
-from pydantic import BaseModel, ConfigDict, StrictStr
+from pydantic import BaseModel, ConfigDict, StrictBool, StrictStr
 from typing import Any, ClassVar, Dict, List
 from typing import Optional, Set
 from typing_extensions import Self
@@ -28,7 +28,8 @@ class UserSupportEmailRequest(BaseModel):
     """ # noqa: E501
     message: StrictStr
     subject: StrictStr
-    __properties: ClassVar[List[str]] = ["message", "subject"]
+    send_to_sales: StrictBool
+    __properties: ClassVar[List[str]] = ["message", "subject", "send_to_sales"]
 
     model_config = ConfigDict(
         populate_by_name=True,
@@ -82,7 +83,8 @@ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
 
         _obj = cls.model_validate({
             "message": obj.get("message"),
-            "subject": obj.get("subject")
+            "subject": obj.get("subject"),
+            "send_to_sales": obj.get("send_to_sales")
         })
         return _obj
 
diff --git a/platform_api_python_client/models/user_vault_item_input.py b/platform_api_python_client/models/user_vault_item_input.py
new file mode 100644
index 0000000..a986736
--- /dev/null
+++ b/platform_api_python_client/models/user_vault_item_input.py
@@ -0,0 +1,97 @@
+# coding: utf-8
+
+"""
+    Platform External API
+
+    No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
+
+    The version of the OpenAPI document: 0.1.0
+    Generated by OpenAPI Generator (https://openapi-generator.tech)
+
+    Do not edit the class manually.
+"""  # noqa: E501
+
+
+from __future__ import annotations
+import pprint
+import re  # noqa: F401
+import json
+
+from pydantic import BaseModel, ConfigDict, StrictStr
+from typing import Any, ClassVar, Dict, List, Optional
+from platform_api_python_client.models.user_vault_type import UserVaultType
+from typing import Optional, Set
+from typing_extensions import Self
+
+class UserVaultItemInput(BaseModel):
+    """
+    UserVaultItemInput
+    """ # noqa: E501
+    type: UserVaultType
+    key: StrictStr
+    value: Optional[StrictStr] = None
+    __properties: ClassVar[List[str]] = ["type", "key", "value"]
+
+    model_config = ConfigDict(
+        populate_by_name=True,
+        validate_assignment=True,
+        protected_namespaces=(),
+    )
+
+
+    def to_str(self) -> str:
+        """Returns the string representation of the model using alias"""
+        return pprint.pformat(self.model_dump(by_alias=True))
+
+    def to_json(self) -> str:
+        """Returns the JSON representation of the model using alias"""
+        # TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
+        return json.dumps(self.to_dict())
+
+    @classmethod
+    def from_json(cls, json_str: str) -> Optional[Self]:
+        """Create an instance of UserVaultItemInput from a JSON string"""
+        return cls.from_dict(json.loads(json_str))
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Return the dictionary representation of the model using alias.
+
+        This has the following differences from calling pydantic's
+        `self.model_dump(by_alias=True)`:
+
+        * `None` is only added to the output dict for nullable fields that
+          were set at model initialization. Other fields with value `None`
+          are ignored.
+        """
+        excluded_fields: Set[str] = set([
+        ])
+
+        _dict = self.model_dump(
+            by_alias=True,
+            exclude=excluded_fields,
+            exclude_none=True,
+        )
+        # set to None if value (nullable) is None
+        # and model_fields_set contains the field
+        if self.value is None and "value" in self.model_fields_set:
+            _dict['value'] = None
+
+        return _dict
+
+    @classmethod
+    def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
+        """Create an instance of UserVaultItemInput from a dict"""
+        if obj is None:
+            return None
+
+        if not isinstance(obj, dict):
+            return cls.model_validate(obj)
+
+        _obj = cls.model_validate({
+            "type": obj.get("type"),
+            "key": obj.get("key"),
+            "value": obj.get("value")
+        })
+        return _obj
+
+
diff --git a/platform_api_python_client/models/user_vault_item_output.py b/platform_api_python_client/models/user_vault_item_output.py
new file mode 100644
index 0000000..262f82c
--- /dev/null
+++ b/platform_api_python_client/models/user_vault_item_output.py
@@ -0,0 +1,97 @@
+# coding: utf-8
+
+"""
+    Platform External API
+
+    No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
+
+    The version of the OpenAPI document: 0.1.0
+    Generated by OpenAPI Generator (https://openapi-generator.tech)
+
+    Do not edit the class manually.
+"""  # noqa: E501
+
+
+from __future__ import annotations
+import pprint
+import re  # noqa: F401
+import json
+
+from pydantic import BaseModel, ConfigDict, StrictStr
+from typing import Any, ClassVar, Dict, List, Optional
+from platform_api_python_client.models.user_vault_type import UserVaultType
+from typing import Optional, Set
+from typing_extensions import Self
+
+class UserVaultItemOutput(BaseModel):
+    """
+    UserVaultItemOutput
+    """ # noqa: E501
+    type: UserVaultType
+    key: StrictStr
+    value: Optional[StrictStr]
+    __properties: ClassVar[List[str]] = ["type", "key", "value"]
+
+    model_config = ConfigDict(
+        populate_by_name=True,
+        validate_assignment=True,
+        protected_namespaces=(),
+    )
+
+
+    def to_str(self) -> str:
+        """Returns the string representation of the model using alias"""
+        return pprint.pformat(self.model_dump(by_alias=True))
+
+    def to_json(self) -> str:
+        """Returns the JSON representation of the model using alias"""
+        # TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
+        return json.dumps(self.to_dict())
+
+    @classmethod
+    def from_json(cls, json_str: str) -> Optional[Self]:
+        """Create an instance of UserVaultItemOutput from a JSON string"""
+        return cls.from_dict(json.loads(json_str))
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Return the dictionary representation of the model using alias.
+
+        This has the following differences from calling pydantic's
+        `self.model_dump(by_alias=True)`:
+
+        * `None` is only added to the output dict for nullable fields that
+          were set at model initialization. Other fields with value `None`
+          are ignored.
+        """
+        excluded_fields: Set[str] = set([
+        ])
+
+        _dict = self.model_dump(
+            by_alias=True,
+            exclude=excluded_fields,
+            exclude_none=True,
+        )
+        # set to None if value (nullable) is None
+        # and model_fields_set contains the field
+        if self.value is None and "value" in self.model_fields_set:
+            _dict['value'] = None
+
+        return _dict
+
+    @classmethod
+    def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
+        """Create an instance of UserVaultItemOutput from a dict"""
+        if obj is None:
+            return None
+
+        if not isinstance(obj, dict):
+            return cls.model_validate(obj)
+
+        _obj = cls.model_validate({
+            "type": obj.get("type"),
+            "key": obj.get("key"),
+            "value": obj.get("value")
+        })
+        return _obj
+
+
diff --git a/platform_api_python_client/models/user_vault_type.py b/platform_api_python_client/models/user_vault_type.py
new file mode 100644
index 0000000..65a131a
--- /dev/null
+++ b/platform_api_python_client/models/user_vault_type.py
@@ -0,0 +1,39 @@
+# coding: utf-8
+
+"""
+    Platform External API
+
+    No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
+
+    The version of the OpenAPI document: 0.1.0
+    Generated by OpenAPI Generator (https://openapi-generator.tech)
+
+    Do not edit the class manually.
+"""  # noqa: E501
+
+
+from __future__ import annotations
+import json
+from enum import Enum
+from typing_extensions import Self
+
+
+class UserVaultType(str, Enum):
+    """
+    UserVaultType
+    """
+
+    """
+    allowed enum values
+    """
+    ENV_VARS = 'env_vars'
+    SSH_KEYS = 'ssh_keys'
+    ACCESS_TOKENS = 'access_tokens'
+    CERTIFICATES = 'certificates'
+
+    @classmethod
+    def from_json(cls, json_str: str) -> Self:
+        """Create an instance of UserVaultType from a JSON string"""
+        return cls(json.loads(json_str))
+
+
diff --git a/pyproject.toml b/pyproject.toml
index 631f74c..d84126a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "platform_api_python_client"
-version = "0.3.1"
+version = "3.1.6"
 description = "Platform External API"
 authors = ["OpenAPI Generator Community <team@openapitools.org>"]
 license = "NoLicense"
diff --git a/setup.py b/setup.py
index e3b3cf2..ea1af9d 100644
--- a/setup.py
+++ b/setup.py
@@ -21,7 +21,7 @@
 # prerequisite: setuptools
 # http://pypi.python.org/pypi/setuptools
 NAME = "platform-api-python-client"
-VERSION = "0.3.1"
+VERSION = "3.1.6"
 PYTHON_REQUIRES = ">= 3.8"
 REQUIRES = [
     "urllib3 >= 1.25.3, < 3.0.0",
diff --git a/test/test_c_serve_recipe_input.py b/test/test_c_serve_recipe.py
similarity index 73%
rename from test/test_c_serve_recipe_input.py
rename to test/test_c_serve_recipe.py
index f817c2b..2928556 100644
--- a/test/test_c_serve_recipe_input.py
+++ b/test/test_c_serve_recipe.py
@@ -14,10 +14,10 @@
 
 import unittest
 
-from platform_api_python_client.models.c_serve_recipe_input import CServeRecipeInput
+from platform_api_python_client.models.c_serve_recipe import CServeRecipe
 
-class TestCServeRecipeInput(unittest.TestCase):
-    """CServeRecipeInput unit test stubs"""
+class TestCServeRecipe(unittest.TestCase):
+    """CServeRecipe unit test stubs"""
 
     def setUp(self):
         pass
@@ -25,31 +25,35 @@ def setUp(self):
     def tearDown(self):
         pass
 
-    def make_instance(self, include_optional) -> CServeRecipeInput:
-        """Test CServeRecipeInput
+    def make_instance(self, include_optional) -> CServeRecipe:
+        """Test CServeRecipe
             include_optional is a boolean, when False only required
             params are included, when True both required and
             optional params are included """
-        # uncomment below to create an instance of `CServeRecipeInput`
+        # uncomment below to create an instance of `CServeRecipe`
         """
-        model = CServeRecipeInput()
+        model = CServeRecipe()
         if include_optional:
-            return CServeRecipeInput(
+            return CServeRecipe(
                 model = '',
                 is_embedding_model = True,
                 tensor_parallel_size = 56,
                 pipeline_parallel_size = 56,
-                block_size = 56,
+                block_size = 16,
                 swap_space = 0.0,
                 gpu_mem_util = 0.0,
                 max_num_seqs = 56,
-                use_prefix_caching = True,
                 offloading_num = 56,
+                use_prefix_caching = True,
+                use_chunked_prefill = True,
+                chunked_prefill_size = 56,
+                eager_execution = True,
+                num_scheduler_steps = 56,
                 use_flashinfer = True,
                 max_model_len = 128.0,
                 dtype = 'auto',
                 tokenizer = '',
-                spec_proposer = '',
+                spec_proposer = 'draft',
                 spec_draft_model = '',
                 spec_tokens = 56,
                 spec_prompt_lookup_min = 1.0,
@@ -57,15 +61,15 @@ def make_instance(self, include_optional) -> CServeRecipeInput:
                 seed = 56
             )
         else:
-            return CServeRecipeInput(
+            return CServeRecipe(
                 model = '',
                 tensor_parallel_size = 56,
                 pipeline_parallel_size = 56,
         )
         """
 
-    def testCServeRecipeInput(self):
-        """Test CServeRecipeInput"""
+    def testCServeRecipe(self):
+        """Test CServeRecipe"""
         # inst_req_only = self.make_instance(include_optional=False)
         # inst_req_and_optional = self.make_instance(include_optional=True)
 
diff --git a/test/test_c_serve_recipe_perf.py b/test/test_c_serve_recipe_perf.py
index 18672ac..474fda7 100644
--- a/test/test_c_serve_recipe_perf.py
+++ b/test/test_c_serve_recipe_perf.py
@@ -35,27 +35,35 @@ def make_instance(self, include_optional) -> CServeRecipePerf:
         model = CServeRecipePerf()
         if include_optional:
             return CServeRecipePerf(
-                recipe = platform_api_python_client.models.c_serve_recipe.CServeRecipe(
+                recipe = platform_api_python_client.models.c_serve_v2_recipe.CServeV2Recipe(
                     model = '', 
+                    max_model_len = 56, 
                     is_embedding_model = True, 
+                    tokenizer = '', 
                     tensor_parallel_size = 56, 
                     pipeline_parallel_size = 56, 
+                    gpu_mem_util = 1.337, 
                     block_size = 56, 
-                    swap_space = 0.0, 
-                    gpu_mem_util = 0.0, 
+                    swap_space = 56, 
+                    quantization = '', 
+                    dtype = 'auto', 
+                    cache_dtype = 'auto', 
                     max_num_seqs = 56, 
-                    use_prefix_caching = True, 
-                    offloading_num = 56, 
+                    eager_execution = True, 
                     use_flashinfer = True, 
-                    max_model_len = 128.0, 
-                    dtype = 'auto', 
-                    tokenizer = '', 
-                    spec_proposer = '', 
+                    offloading_num = 1.337, 
                     spec_draft_model = '', 
                     spec_tokens = 56, 
-                    spec_prompt_lookup_min = 1.0, 
-                    spec_prompt_lookup_max = 1.0, 
-                    seed = 56, ),
+                    spec_prompt_lookup_max = 56, 
+                    spec_prompt_lookup_min = 56, 
+                    use_prefix_caching = True, 
+                    use_chunked_prefill = True, 
+                    chunked_prefill_size = 56, 
+                    max_seq_len_to_capture = 56, 
+                    distributed_executor_backend = 'ray', 
+                    spec_max_batch_size = 56, 
+                    spec_max_seq_len = 56, 
+                    num_scheduler_steps = 56, ),
                 hardware_instance_id = 56,
                 output_tp = [
                     [
@@ -70,27 +78,35 @@ def make_instance(self, include_optional) -> CServeRecipePerf:
             )
         else:
             return CServeRecipePerf(
-                recipe = platform_api_python_client.models.c_serve_recipe.CServeRecipe(
+                recipe = platform_api_python_client.models.c_serve_v2_recipe.CServeV2Recipe(
                     model = '', 
+                    max_model_len = 56, 
                     is_embedding_model = True, 
+                    tokenizer = '', 
                     tensor_parallel_size = 56, 
                     pipeline_parallel_size = 56, 
+                    gpu_mem_util = 1.337, 
                     block_size = 56, 
-                    swap_space = 0.0, 
-                    gpu_mem_util = 0.0, 
+                    swap_space = 56, 
+                    quantization = '', 
+                    dtype = 'auto', 
+                    cache_dtype = 'auto', 
                     max_num_seqs = 56, 
-                    use_prefix_caching = True, 
-                    offloading_num = 56, 
+                    eager_execution = True, 
                     use_flashinfer = True, 
-                    max_model_len = 128.0, 
-                    dtype = 'auto', 
-                    tokenizer = '', 
-                    spec_proposer = '', 
+                    offloading_num = 1.337, 
                     spec_draft_model = '', 
                     spec_tokens = 56, 
-                    spec_prompt_lookup_min = 1.0, 
-                    spec_prompt_lookup_max = 1.0, 
-                    seed = 56, ),
+                    spec_prompt_lookup_max = 56, 
+                    spec_prompt_lookup_min = 56, 
+                    use_prefix_caching = True, 
+                    use_chunked_prefill = True, 
+                    chunked_prefill_size = 56, 
+                    max_seq_len_to_capture = 56, 
+                    distributed_executor_backend = 'ray', 
+                    spec_max_batch_size = 56, 
+                    spec_max_seq_len = 56, 
+                    num_scheduler_steps = 56, ),
                 hardware_instance_id = 56,
                 output_tp = [
                     [
diff --git a/test/test_c_serve_recipe_response.py b/test/test_c_serve_recipe_response.py
index 9d6fd50..b59d4c0 100644
--- a/test/test_c_serve_recipe_response.py
+++ b/test/test_c_serve_recipe_response.py
@@ -38,27 +38,35 @@ def make_instance(self, include_optional) -> CServeRecipeResponse:
                 model = '',
                 cluster_id = 56,
                 fastest = platform_api_python_client.models.c_serve_recipe_perf.CServeRecipePerf(
-                    recipe = platform_api_python_client.models.c_serve_recipe.CServeRecipe(
+                    recipe = platform_api_python_client.models.c_serve_v2_recipe.CServeV2Recipe(
                         model = '', 
+                        max_model_len = 56, 
                         is_embedding_model = True, 
+                        tokenizer = '', 
                         tensor_parallel_size = 56, 
                         pipeline_parallel_size = 56, 
+                        gpu_mem_util = 1.337, 
                         block_size = 56, 
-                        swap_space = 0.0, 
-                        gpu_mem_util = 0.0, 
+                        swap_space = 56, 
+                        quantization = '', 
+                        dtype = 'auto', 
+                        cache_dtype = 'auto', 
                         max_num_seqs = 56, 
-                        use_prefix_caching = True, 
-                        offloading_num = 56, 
+                        eager_execution = True, 
                         use_flashinfer = True, 
-                        max_model_len = 128.0, 
-                        dtype = 'auto', 
-                        tokenizer = '', 
-                        spec_proposer = '', 
+                        offloading_num = 1.337, 
                         spec_draft_model = '', 
                         spec_tokens = 56, 
-                        spec_prompt_lookup_min = 1.0, 
-                        spec_prompt_lookup_max = 1.0, 
-                        seed = 56, ), 
+                        spec_prompt_lookup_max = 56, 
+                        spec_prompt_lookup_min = 56, 
+                        use_prefix_caching = True, 
+                        use_chunked_prefill = True, 
+                        chunked_prefill_size = 56, 
+                        max_seq_len_to_capture = 56, 
+                        distributed_executor_backend = 'ray', 
+                        spec_max_batch_size = 56, 
+                        spec_max_seq_len = 56, 
+                        num_scheduler_steps = 56, ), 
                     hardware_instance_id = 56, 
                     output_tp = [
                         [
@@ -71,27 +79,35 @@ def make_instance(self, include_optional) -> CServeRecipeResponse:
                             ]
                         ], ),
                 cheapest = platform_api_python_client.models.c_serve_recipe_perf.CServeRecipePerf(
-                    recipe = platform_api_python_client.models.c_serve_recipe.CServeRecipe(
+                    recipe = platform_api_python_client.models.c_serve_v2_recipe.CServeV2Recipe(
                         model = '', 
+                        max_model_len = 56, 
                         is_embedding_model = True, 
+                        tokenizer = '', 
                         tensor_parallel_size = 56, 
                         pipeline_parallel_size = 56, 
+                        gpu_mem_util = 1.337, 
                         block_size = 56, 
-                        swap_space = 0.0, 
-                        gpu_mem_util = 0.0, 
+                        swap_space = 56, 
+                        quantization = '', 
+                        dtype = 'auto', 
+                        cache_dtype = 'auto', 
                         max_num_seqs = 56, 
-                        use_prefix_caching = True, 
-                        offloading_num = 56, 
+                        eager_execution = True, 
                         use_flashinfer = True, 
-                        max_model_len = 128.0, 
-                        dtype = 'auto', 
-                        tokenizer = '', 
-                        spec_proposer = '', 
+                        offloading_num = 1.337, 
                         spec_draft_model = '', 
                         spec_tokens = 56, 
-                        spec_prompt_lookup_min = 1.0, 
-                        spec_prompt_lookup_max = 1.0, 
-                        seed = 56, ), 
+                        spec_prompt_lookup_max = 56, 
+                        spec_prompt_lookup_min = 56, 
+                        use_prefix_caching = True, 
+                        use_chunked_prefill = True, 
+                        chunked_prefill_size = 56, 
+                        max_seq_len_to_capture = 56, 
+                        distributed_executor_backend = 'ray', 
+                        spec_max_batch_size = 56, 
+                        spec_max_seq_len = 56, 
+                        num_scheduler_steps = 56, ), 
                     hardware_instance_id = 56, 
                     output_tp = [
                         [
@@ -104,27 +120,35 @@ def make_instance(self, include_optional) -> CServeRecipeResponse:
                             ]
                         ], ),
                 best_value = platform_api_python_client.models.c_serve_recipe_perf.CServeRecipePerf(
-                    recipe = platform_api_python_client.models.c_serve_recipe.CServeRecipe(
+                    recipe = platform_api_python_client.models.c_serve_v2_recipe.CServeV2Recipe(
                         model = '', 
+                        max_model_len = 56, 
                         is_embedding_model = True, 
+                        tokenizer = '', 
                         tensor_parallel_size = 56, 
                         pipeline_parallel_size = 56, 
+                        gpu_mem_util = 1.337, 
                         block_size = 56, 
-                        swap_space = 0.0, 
-                        gpu_mem_util = 0.0, 
+                        swap_space = 56, 
+                        quantization = '', 
+                        dtype = 'auto', 
+                        cache_dtype = 'auto', 
                         max_num_seqs = 56, 
-                        use_prefix_caching = True, 
-                        offloading_num = 56, 
+                        eager_execution = True, 
                         use_flashinfer = True, 
-                        max_model_len = 128.0, 
-                        dtype = 'auto', 
-                        tokenizer = '', 
-                        spec_proposer = '', 
+                        offloading_num = 1.337, 
                         spec_draft_model = '', 
                         spec_tokens = 56, 
-                        spec_prompt_lookup_min = 1.0, 
-                        spec_prompt_lookup_max = 1.0, 
-                        seed = 56, ), 
+                        spec_prompt_lookup_max = 56, 
+                        spec_prompt_lookup_min = 56, 
+                        use_prefix_caching = True, 
+                        use_chunked_prefill = True, 
+                        chunked_prefill_size = 56, 
+                        max_seq_len_to_capture = 56, 
+                        distributed_executor_backend = 'ray', 
+                        spec_max_batch_size = 56, 
+                        spec_max_seq_len = 56, 
+                        num_scheduler_steps = 56, ), 
                     hardware_instance_id = 56, 
                     output_tp = [
                         [
@@ -142,27 +166,35 @@ def make_instance(self, include_optional) -> CServeRecipeResponse:
                 model = '',
                 cluster_id = 56,
                 fastest = platform_api_python_client.models.c_serve_recipe_perf.CServeRecipePerf(
-                    recipe = platform_api_python_client.models.c_serve_recipe.CServeRecipe(
+                    recipe = platform_api_python_client.models.c_serve_v2_recipe.CServeV2Recipe(
                         model = '', 
+                        max_model_len = 56, 
                         is_embedding_model = True, 
+                        tokenizer = '', 
                         tensor_parallel_size = 56, 
                         pipeline_parallel_size = 56, 
+                        gpu_mem_util = 1.337, 
                         block_size = 56, 
-                        swap_space = 0.0, 
-                        gpu_mem_util = 0.0, 
+                        swap_space = 56, 
+                        quantization = '', 
+                        dtype = 'auto', 
+                        cache_dtype = 'auto', 
                         max_num_seqs = 56, 
-                        use_prefix_caching = True, 
-                        offloading_num = 56, 
+                        eager_execution = True, 
                         use_flashinfer = True, 
-                        max_model_len = 128.0, 
-                        dtype = 'auto', 
-                        tokenizer = '', 
-                        spec_proposer = '', 
+                        offloading_num = 1.337, 
                         spec_draft_model = '', 
                         spec_tokens = 56, 
-                        spec_prompt_lookup_min = 1.0, 
-                        spec_prompt_lookup_max = 1.0, 
-                        seed = 56, ), 
+                        spec_prompt_lookup_max = 56, 
+                        spec_prompt_lookup_min = 56, 
+                        use_prefix_caching = True, 
+                        use_chunked_prefill = True, 
+                        chunked_prefill_size = 56, 
+                        max_seq_len_to_capture = 56, 
+                        distributed_executor_backend = 'ray', 
+                        spec_max_batch_size = 56, 
+                        spec_max_seq_len = 56, 
+                        num_scheduler_steps = 56, ), 
                     hardware_instance_id = 56, 
                     output_tp = [
                         [
@@ -175,27 +207,35 @@ def make_instance(self, include_optional) -> CServeRecipeResponse:
                             ]
                         ], ),
                 cheapest = platform_api_python_client.models.c_serve_recipe_perf.CServeRecipePerf(
-                    recipe = platform_api_python_client.models.c_serve_recipe.CServeRecipe(
+                    recipe = platform_api_python_client.models.c_serve_v2_recipe.CServeV2Recipe(
                         model = '', 
+                        max_model_len = 56, 
                         is_embedding_model = True, 
+                        tokenizer = '', 
                         tensor_parallel_size = 56, 
                         pipeline_parallel_size = 56, 
+                        gpu_mem_util = 1.337, 
                         block_size = 56, 
-                        swap_space = 0.0, 
-                        gpu_mem_util = 0.0, 
+                        swap_space = 56, 
+                        quantization = '', 
+                        dtype = 'auto', 
+                        cache_dtype = 'auto', 
                         max_num_seqs = 56, 
-                        use_prefix_caching = True, 
-                        offloading_num = 56, 
+                        eager_execution = True, 
                         use_flashinfer = True, 
-                        max_model_len = 128.0, 
-                        dtype = 'auto', 
-                        tokenizer = '', 
-                        spec_proposer = '', 
+                        offloading_num = 1.337, 
                         spec_draft_model = '', 
                         spec_tokens = 56, 
-                        spec_prompt_lookup_min = 1.0, 
-                        spec_prompt_lookup_max = 1.0, 
-                        seed = 56, ), 
+                        spec_prompt_lookup_max = 56, 
+                        spec_prompt_lookup_min = 56, 
+                        use_prefix_caching = True, 
+                        use_chunked_prefill = True, 
+                        chunked_prefill_size = 56, 
+                        max_seq_len_to_capture = 56, 
+                        distributed_executor_backend = 'ray', 
+                        spec_max_batch_size = 56, 
+                        spec_max_seq_len = 56, 
+                        num_scheduler_steps = 56, ), 
                     hardware_instance_id = 56, 
                     output_tp = [
                         [
@@ -208,27 +248,35 @@ def make_instance(self, include_optional) -> CServeRecipeResponse:
                             ]
                         ], ),
                 best_value = platform_api_python_client.models.c_serve_recipe_perf.CServeRecipePerf(
-                    recipe = platform_api_python_client.models.c_serve_recipe.CServeRecipe(
+                    recipe = platform_api_python_client.models.c_serve_v2_recipe.CServeV2Recipe(
                         model = '', 
+                        max_model_len = 56, 
                         is_embedding_model = True, 
+                        tokenizer = '', 
                         tensor_parallel_size = 56, 
                         pipeline_parallel_size = 56, 
+                        gpu_mem_util = 1.337, 
                         block_size = 56, 
-                        swap_space = 0.0, 
-                        gpu_mem_util = 0.0, 
+                        swap_space = 56, 
+                        quantization = '', 
+                        dtype = 'auto', 
+                        cache_dtype = 'auto', 
                         max_num_seqs = 56, 
-                        use_prefix_caching = True, 
-                        offloading_num = 56, 
+                        eager_execution = True, 
                         use_flashinfer = True, 
-                        max_model_len = 128.0, 
-                        dtype = 'auto', 
-                        tokenizer = '', 
-                        spec_proposer = '', 
+                        offloading_num = 1.337, 
                         spec_draft_model = '', 
                         spec_tokens = 56, 
-                        spec_prompt_lookup_min = 1.0, 
-                        spec_prompt_lookup_max = 1.0, 
-                        seed = 56, ), 
+                        spec_prompt_lookup_max = 56, 
+                        spec_prompt_lookup_min = 56, 
+                        use_prefix_caching = True, 
+                        use_chunked_prefill = True, 
+                        chunked_prefill_size = 56, 
+                        max_seq_len_to_capture = 56, 
+                        distributed_executor_backend = 'ray', 
+                        spec_max_batch_size = 56, 
+                        spec_max_seq_len = 56, 
+                        num_scheduler_steps = 56, ), 
                     hardware_instance_id = 56, 
                     output_tp = [
                         [
diff --git a/test/test_c_serve_v2_recipe.py b/test/test_c_serve_v2_recipe.py
new file mode 100644
index 0000000..462e1d4
--- /dev/null
+++ b/test/test_c_serve_v2_recipe.py
@@ -0,0 +1,106 @@
+# coding: utf-8
+
+"""
+    Platform External API
+
+    No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
+
+    The version of the OpenAPI document: 0.1.0
+    Generated by OpenAPI Generator (https://openapi-generator.tech)
+
+    Do not edit the class manually.
+"""  # noqa: E501
+
+
+import unittest
+
+from platform_api_python_client.models.c_serve_v2_recipe import CServeV2Recipe
+
+class TestCServeV2Recipe(unittest.TestCase):
+    """CServeV2Recipe unit test stubs"""
+
+    def setUp(self):
+        pass
+
+    def tearDown(self):
+        pass
+
+    def make_instance(self, include_optional) -> CServeV2Recipe:
+        """Test CServeV2Recipe
+            include_optional is a boolean, when False only required
+            params are included, when True both required and
+            optional params are included """
+        # uncomment below to create an instance of `CServeV2Recipe`
+        """
+        model = CServeV2Recipe()
+        if include_optional:
+            return CServeV2Recipe(
+                model = '',
+                max_model_len = 56,
+                is_embedding_model = True,
+                tokenizer = '',
+                tensor_parallel_size = 56,
+                pipeline_parallel_size = 56,
+                gpu_mem_util = 1.337,
+                block_size = 56,
+                swap_space = 56,
+                quantization = '',
+                dtype = 'auto',
+                cache_dtype = 'auto',
+                max_num_seqs = 56,
+                eager_execution = True,
+                use_flashinfer = True,
+                offloading_num = 1.337,
+                spec_draft_model = '',
+                spec_tokens = 56,
+                spec_prompt_lookup_max = 56,
+                spec_prompt_lookup_min = 56,
+                use_prefix_caching = True,
+                use_chunked_prefill = True,
+                chunked_prefill_size = 56,
+                max_seq_len_to_capture = 56,
+                distributed_executor_backend = 'ray',
+                spec_max_batch_size = 56,
+                spec_max_seq_len = 56,
+                num_scheduler_steps = 56
+            )
+        else:
+            return CServeV2Recipe(
+                model = '',
+                max_model_len = 56,
+                is_embedding_model = True,
+                tokenizer = '',
+                tensor_parallel_size = 56,
+                pipeline_parallel_size = 56,
+                gpu_mem_util = 1.337,
+                block_size = 56,
+                swap_space = 56,
+                quantization = '',
+                dtype = 'auto',
+                cache_dtype = 'auto',
+                max_num_seqs = 56,
+                eager_execution = True,
+                use_flashinfer = True,
+                offloading_num = 1.337,
+                spec_draft_model = '',
+                spec_tokens = 56,
+                spec_prompt_lookup_max = 56,
+                spec_prompt_lookup_min = 56,
+                use_prefix_caching = True,
+                use_chunked_prefill = True,
+                chunked_prefill_size = 56,
+                max_seq_len_to_capture = 56,
+                distributed_executor_backend = 'ray',
+                spec_max_batch_size = 56,
+                spec_max_seq_len = 56,
+                num_scheduler_steps = 56,
+        )
+        """
+
+    def testCServeV2Recipe(self):
+        """Test CServeV2Recipe"""
+        # inst_req_only = self.make_instance(include_optional=False)
+        # inst_req_and_optional = self.make_instance(include_optional=True)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/test_c_serve_recipe_output.py b/test/test_c_serve_v2_recipe_input.py
similarity index 50%
rename from test/test_c_serve_recipe_output.py
rename to test/test_c_serve_v2_recipe_input.py
index 712f6b2..72bedeb 100644
--- a/test/test_c_serve_recipe_output.py
+++ b/test/test_c_serve_v2_recipe_input.py
@@ -14,10 +14,10 @@
 
 import unittest
 
-from platform_api_python_client.models.c_serve_recipe_output import CServeRecipeOutput
+from platform_api_python_client.models.c_serve_v2_recipe_input import CServeV2RecipeInput
 
-class TestCServeRecipeOutput(unittest.TestCase):
-    """CServeRecipeOutput unit test stubs"""
+class TestCServeV2RecipeInput(unittest.TestCase):
+    """CServeV2RecipeInput unit test stubs"""
 
     def setUp(self):
         pass
@@ -25,64 +25,56 @@ def setUp(self):
     def tearDown(self):
         pass
 
-    def make_instance(self, include_optional) -> CServeRecipeOutput:
-        """Test CServeRecipeOutput
+    def make_instance(self, include_optional) -> CServeV2RecipeInput:
+        """Test CServeV2RecipeInput
             include_optional is a boolean, when False only required
             params are included, when True both required and
             optional params are included """
-        # uncomment below to create an instance of `CServeRecipeOutput`
+        # uncomment below to create an instance of `CServeV2RecipeInput`
         """
-        model = CServeRecipeOutput()
+        model = CServeV2RecipeInput()
         if include_optional:
-            return CServeRecipeOutput(
+            return CServeV2RecipeInput(
                 model = '',
+                max_model_len = 56,
                 is_embedding_model = True,
+                tokenizer = '',
                 tensor_parallel_size = 56,
                 pipeline_parallel_size = 56,
+                gpu_mem_util = 1.337,
                 block_size = 56,
-                swap_space = 0.0,
-                gpu_mem_util = 0.0,
+                swap_space = 56,
+                quantization = '',
+                dtype = 'auto',
+                cache_dtype = 'auto',
                 max_num_seqs = 56,
-                use_prefix_caching = True,
-                offloading_num = 56,
+                eager_execution = True,
                 use_flashinfer = True,
-                max_model_len = 128.0,
-                dtype = 'auto',
-                tokenizer = '',
-                spec_proposer = '',
+                offloading_num = 1.337,
                 spec_draft_model = '',
                 spec_tokens = 56,
-                spec_prompt_lookup_min = 1.0,
-                spec_prompt_lookup_max = 1.0,
-                seed = 56
+                spec_prompt_lookup_max = 56,
+                spec_prompt_lookup_min = 56,
+                use_prefix_caching = True,
+                use_chunked_prefill = True,
+                chunked_prefill_size = 56,
+                max_seq_len_to_capture = 56,
+                distributed_executor_backend = 'ray',
+                spec_max_batch_size = 56,
+                spec_max_seq_len = 56,
+                num_scheduler_steps = 56
             )
         else:
-            return CServeRecipeOutput(
+            return CServeV2RecipeInput(
                 model = '',
-                is_embedding_model = True,
+                tokenizer = '',
                 tensor_parallel_size = 56,
                 pipeline_parallel_size = 56,
-                block_size = 56,
-                swap_space = 0.0,
-                gpu_mem_util = 0.0,
-                max_num_seqs = 56,
-                use_prefix_caching = True,
-                offloading_num = 56,
-                use_flashinfer = True,
-                max_model_len = 128.0,
-                dtype = 'auto',
-                tokenizer = '',
-                spec_proposer = '',
-                spec_draft_model = '',
-                spec_tokens = 56,
-                spec_prompt_lookup_min = 1.0,
-                spec_prompt_lookup_max = 1.0,
-                seed = 56,
         )
         """
 
-    def testCServeRecipeOutput(self):
-        """Test CServeRecipeOutput"""
+    def testCServeV2RecipeInput(self):
+        """Test CServeV2RecipeInput"""
         # inst_req_only = self.make_instance(include_optional=False)
         # inst_req_and_optional = self.make_instance(include_optional=True)
 
diff --git a/test/test_c_serve_v2_recipe_output.py b/test/test_c_serve_v2_recipe_output.py
new file mode 100644
index 0000000..6505865
--- /dev/null
+++ b/test/test_c_serve_v2_recipe_output.py
@@ -0,0 +1,106 @@
+# coding: utf-8
+
+"""
+    Platform External API
+
+    No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
+
+    The version of the OpenAPI document: 0.1.0
+    Generated by OpenAPI Generator (https://openapi-generator.tech)
+
+    Do not edit the class manually.
+"""  # noqa: E501
+
+
+import unittest
+
+from platform_api_python_client.models.c_serve_v2_recipe_output import CServeV2RecipeOutput
+
+class TestCServeV2RecipeOutput(unittest.TestCase):
+    """CServeV2RecipeOutput unit test stubs"""
+
+    def setUp(self):
+        pass
+
+    def tearDown(self):
+        pass
+
+    def make_instance(self, include_optional) -> CServeV2RecipeOutput:
+        """Test CServeV2RecipeOutput
+            include_optional is a boolean, when False only required
+            params are included, when True both required and
+            optional params are included """
+        # uncomment below to create an instance of `CServeV2RecipeOutput`
+        """
+        model = CServeV2RecipeOutput()
+        if include_optional:
+            return CServeV2RecipeOutput(
+                model = '',
+                max_model_len = 56,
+                is_embedding_model = True,
+                tokenizer = '',
+                tensor_parallel_size = 56,
+                pipeline_parallel_size = 56,
+                gpu_mem_util = 1.337,
+                block_size = 56,
+                swap_space = 56,
+                quantization = '',
+                dtype = 'auto',
+                cache_dtype = 'auto',
+                max_num_seqs = 56,
+                eager_execution = True,
+                use_flashinfer = True,
+                offloading_num = 1.337,
+                spec_draft_model = '',
+                spec_tokens = 56,
+                spec_prompt_lookup_max = 56,
+                spec_prompt_lookup_min = 56,
+                use_prefix_caching = True,
+                use_chunked_prefill = True,
+                chunked_prefill_size = 56,
+                max_seq_len_to_capture = 56,
+                distributed_executor_backend = 'ray',
+                spec_max_batch_size = 56,
+                spec_max_seq_len = 56,
+                num_scheduler_steps = 56
+            )
+        else:
+            return CServeV2RecipeOutput(
+                model = '',
+                max_model_len = 56,
+                is_embedding_model = True,
+                tokenizer = '',
+                tensor_parallel_size = 56,
+                pipeline_parallel_size = 56,
+                gpu_mem_util = 1.337,
+                block_size = 56,
+                swap_space = 56,
+                quantization = '',
+                dtype = 'auto',
+                cache_dtype = 'auto',
+                max_num_seqs = 56,
+                eager_execution = True,
+                use_flashinfer = True,
+                offloading_num = 1.337,
+                spec_draft_model = '',
+                spec_tokens = 56,
+                spec_prompt_lookup_max = 56,
+                spec_prompt_lookup_min = 56,
+                use_prefix_caching = True,
+                use_chunked_prefill = True,
+                chunked_prefill_size = 56,
+                max_seq_len_to_capture = 56,
+                distributed_executor_backend = 'ray',
+                spec_max_batch_size = 56,
+                spec_max_seq_len = 56,
+                num_scheduler_steps = 56,
+        )
+        """
+
+    def testCServeV2RecipeOutput(self):
+        """Test CServeV2RecipeOutput"""
+        # inst_req_only = self.make_instance(include_optional=False)
+        # inst_req_and_optional = self.make_instance(include_optional=True)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/test_create_c_serve_deployment_request.py b/test/test_create_c_serve_deployment_request.py
index 931847d..b336efd 100644
--- a/test/test_create_c_serve_deployment_request.py
+++ b/test/test_create_c_serve_deployment_request.py
@@ -43,17 +43,21 @@ def make_instance(self, include_optional) -> CreateCServeDeploymentRequest:
                     is_embedding_model = True, 
                     tensor_parallel_size = 56, 
                     pipeline_parallel_size = 56, 
-                    block_size = 56, 
+                    block_size = 16, 
                     swap_space = 0.0, 
                     gpu_mem_util = 0.0, 
                     max_num_seqs = 56, 
-                    use_prefix_caching = True, 
                     offloading_num = 56, 
+                    use_prefix_caching = True, 
+                    use_chunked_prefill = True, 
+                    chunked_prefill_size = 56, 
+                    eager_execution = True, 
+                    num_scheduler_steps = 56, 
                     use_flashinfer = True, 
                     max_model_len = 128.0, 
                     dtype = 'auto', 
                     tokenizer = '', 
-                    spec_proposer = '', 
+                    spec_proposer = 'draft', 
                     spec_draft_model = '', 
                     spec_tokens = 56, 
                     spec_prompt_lookup_min = 1.0, 
@@ -78,23 +82,26 @@ def make_instance(self, include_optional) -> CreateCServeDeploymentRequest:
                     is_embedding_model = True, 
                     tensor_parallel_size = 56, 
                     pipeline_parallel_size = 56, 
-                    block_size = 56, 
+                    block_size = 16, 
                     swap_space = 0.0, 
                     gpu_mem_util = 0.0, 
                     max_num_seqs = 56, 
-                    use_prefix_caching = True, 
                     offloading_num = 56, 
+                    use_prefix_caching = True, 
+                    use_chunked_prefill = True, 
+                    chunked_prefill_size = 56, 
+                    eager_execution = True, 
+                    num_scheduler_steps = 56, 
                     use_flashinfer = True, 
                     max_model_len = 128.0, 
                     dtype = 'auto', 
                     tokenizer = '', 
-                    spec_proposer = '', 
+                    spec_proposer = 'draft', 
                     spec_draft_model = '', 
                     spec_tokens = 56, 
                     spec_prompt_lookup_min = 1.0, 
                     spec_prompt_lookup_max = 1.0, 
                     seed = 56, ),
-                hf_token = '',
                 min_scale = 56,
                 max_scale = 56,
         )
diff --git a/test/test_create_c_serve_v2_deployment_request.py b/test/test_create_c_serve_v2_deployment_request.py
new file mode 100644
index 0000000..af466b0
--- /dev/null
+++ b/test/test_create_c_serve_v2_deployment_request.py
@@ -0,0 +1,124 @@
+# coding: utf-8
+
+"""
+    Platform External API
+
+    No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
+
+    The version of the OpenAPI document: 0.1.0
+    Generated by OpenAPI Generator (https://openapi-generator.tech)
+
+    Do not edit the class manually.
+"""  # noqa: E501
+
+
+import unittest
+
+from platform_api_python_client.models.create_c_serve_v2_deployment_request import CreateCServeV2DeploymentRequest
+
+class TestCreateCServeV2DeploymentRequest(unittest.TestCase):
+    """CreateCServeV2DeploymentRequest unit test stubs"""
+
+    def setUp(self):
+        pass
+
+    def tearDown(self):
+        pass
+
+    def make_instance(self, include_optional) -> CreateCServeV2DeploymentRequest:
+        """Test CreateCServeV2DeploymentRequest
+            include_optional is a boolean, when False only required
+            params are included, when True both required and
+            optional params are included """
+        # uncomment below to create an instance of `CreateCServeV2DeploymentRequest`
+        """
+        model = CreateCServeV2DeploymentRequest()
+        if include_optional:
+            return CreateCServeV2DeploymentRequest(
+                name = '',
+                cluster_id = 56,
+                hardware_instance_id = 56,
+                recipe = platform_api_python_client.models.c_serve_v2_recipe.CServeV2Recipe(
+                    model = '', 
+                    max_model_len = 56, 
+                    is_embedding_model = True, 
+                    tokenizer = '', 
+                    tensor_parallel_size = 56, 
+                    pipeline_parallel_size = 56, 
+                    gpu_mem_util = 1.337, 
+                    block_size = 56, 
+                    swap_space = 56, 
+                    quantization = '', 
+                    dtype = 'auto', 
+                    cache_dtype = 'auto', 
+                    max_num_seqs = 56, 
+                    eager_execution = True, 
+                    use_flashinfer = True, 
+                    offloading_num = 1.337, 
+                    spec_draft_model = '', 
+                    spec_tokens = 56, 
+                    spec_prompt_lookup_max = 56, 
+                    spec_prompt_lookup_min = 56, 
+                    use_prefix_caching = True, 
+                    use_chunked_prefill = True, 
+                    chunked_prefill_size = 56, 
+                    max_seq_len_to_capture = 56, 
+                    distributed_executor_backend = 'ray', 
+                    spec_max_batch_size = 56, 
+                    spec_max_seq_len = 56, 
+                    num_scheduler_steps = 56, ),
+                hf_token = '',
+                endpoint_certificate_authority = '',
+                min_scale = 56,
+                max_scale = 56,
+                concurrency = 56,
+                env_vars = {
+                    'key' : ''
+                    }
+            )
+        else:
+            return CreateCServeV2DeploymentRequest(
+                name = '',
+                cluster_id = 56,
+                hardware_instance_id = 56,
+                recipe = platform_api_python_client.models.c_serve_v2_recipe.CServeV2Recipe(
+                    model = '', 
+                    max_model_len = 56, 
+                    is_embedding_model = True, 
+                    tokenizer = '', 
+                    tensor_parallel_size = 56, 
+                    pipeline_parallel_size = 56, 
+                    gpu_mem_util = 1.337, 
+                    block_size = 56, 
+                    swap_space = 56, 
+                    quantization = '', 
+                    dtype = 'auto', 
+                    cache_dtype = 'auto', 
+                    max_num_seqs = 56, 
+                    eager_execution = True, 
+                    use_flashinfer = True, 
+                    offloading_num = 1.337, 
+                    spec_draft_model = '', 
+                    spec_tokens = 56, 
+                    spec_prompt_lookup_max = 56, 
+                    spec_prompt_lookup_min = 56, 
+                    use_prefix_caching = True, 
+                    use_chunked_prefill = True, 
+                    chunked_prefill_size = 56, 
+                    max_seq_len_to_capture = 56, 
+                    distributed_executor_backend = 'ray', 
+                    spec_max_batch_size = 56, 
+                    spec_max_seq_len = 56, 
+                    num_scheduler_steps = 56, ),
+                min_scale = 56,
+                max_scale = 56,
+        )
+        """
+
+    def testCreateCServeV2DeploymentRequest(self):
+        """Test CreateCServeV2DeploymentRequest"""
+        # inst_req_only = self.make_instance(include_optional=False)
+        # inst_req_and_optional = self.make_instance(include_optional=True)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/test_create_c_serve_v2_deployment_response.py b/test/test_create_c_serve_v2_deployment_response.py
new file mode 100644
index 0000000..2e3ff94
--- /dev/null
+++ b/test/test_create_c_serve_v2_deployment_response.py
@@ -0,0 +1,56 @@
+# coding: utf-8
+
+"""
+    Platform External API
+
+    No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
+
+    The version of the OpenAPI document: 0.1.0
+    Generated by OpenAPI Generator (https://openapi-generator.tech)
+
+    Do not edit the class manually.
+"""  # noqa: E501
+
+
+import unittest
+
+from platform_api_python_client.models.create_c_serve_v2_deployment_response import CreateCServeV2DeploymentResponse
+
+class TestCreateCServeV2DeploymentResponse(unittest.TestCase):
+    """CreateCServeV2DeploymentResponse unit test stubs"""
+
+    def setUp(self):
+        pass
+
+    def tearDown(self):
+        pass
+
+    def make_instance(self, include_optional) -> CreateCServeV2DeploymentResponse:
+        """Test CreateCServeV2DeploymentResponse
+            include_optional is a boolean, when False only required
+            params are included, when True both required and
+            optional params are included """
+        # uncomment below to create an instance of `CreateCServeV2DeploymentResponse`
+        """
+        model = CreateCServeV2DeploymentResponse()
+        if include_optional:
+            return CreateCServeV2DeploymentResponse(
+                id = 56,
+                created_at = datetime.datetime.strptime('2013-10-20 19:20:30.00', '%Y-%m-%d %H:%M:%S.%f'),
+                endpoint_url = ''
+            )
+        else:
+            return CreateCServeV2DeploymentResponse(
+                id = 56,
+                created_at = datetime.datetime.strptime('2013-10-20 19:20:30.00', '%Y-%m-%d %H:%M:%S.%f'),
+                endpoint_url = '',
+        )
+        """
+
+    def testCreateCServeV2DeploymentResponse(self):
+        """Test CreateCServeV2DeploymentResponse"""
+        # inst_req_only = self.make_instance(include_optional=False)
+        # inst_req_and_optional = self.make_instance(include_optional=True)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/test_create_rag_deployment_request.py b/test/test_create_rag_deployment_request.py
new file mode 100644
index 0000000..1d4c957
--- /dev/null
+++ b/test/test_create_rag_deployment_request.py
@@ -0,0 +1,116 @@
+# coding: utf-8
+
+"""
+    Platform External API
+
+    No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
+
+    The version of the OpenAPI document: 0.1.0
+    Generated by OpenAPI Generator (https://openapi-generator.tech)
+
+    Do not edit the class manually.
+"""  # noqa: E501
+
+
+import unittest
+
+from platform_api_python_client.models.create_rag_deployment_request import CreateRagDeploymentRequest
+
+class TestCreateRagDeploymentRequest(unittest.TestCase):
+    """CreateRagDeploymentRequest unit test stubs"""
+
+    def setUp(self):
+        pass
+
+    def tearDown(self):
+        pass
+
+    def make_instance(self, include_optional) -> CreateRagDeploymentRequest:
+        """Test CreateRagDeploymentRequest
+            include_optional is a boolean, when False only required
+            params are included, when True both required and
+            optional params are included """
+        # uncomment below to create an instance of `CreateRagDeploymentRequest`
+        """
+        model = CreateRagDeploymentRequest()
+        if include_optional:
+            return CreateRagDeploymentRequest(
+                name = '',
+                cluster_id = 56,
+                hardware_instance_id = 56,
+                recipe = platform_api_python_client.models.c_serve_recipe.CServeRecipe(
+                    model = '', 
+                    is_embedding_model = True, 
+                    tensor_parallel_size = 56, 
+                    pipeline_parallel_size = 56, 
+                    block_size = 16, 
+                    swap_space = 0.0, 
+                    gpu_mem_util = 0.0, 
+                    max_num_seqs = 56, 
+                    offloading_num = 56, 
+                    use_prefix_caching = True, 
+                    use_chunked_prefill = True, 
+                    chunked_prefill_size = 56, 
+                    eager_execution = True, 
+                    num_scheduler_steps = 56, 
+                    use_flashinfer = True, 
+                    max_model_len = 128.0, 
+                    dtype = 'auto', 
+                    tokenizer = '', 
+                    spec_proposer = 'draft', 
+                    spec_draft_model = '', 
+                    spec_tokens = 56, 
+                    spec_prompt_lookup_min = 1.0, 
+                    spec_prompt_lookup_max = 1.0, 
+                    seed = 56, ),
+                hf_token = '',
+                centml_api_key = '',
+                min_scale = 56,
+                max_scale = 56,
+                endpoint_certificate_authority = '',
+                concurrency = 56,
+                env_vars = {
+                    'key' : ''
+                    }
+            )
+        else:
+            return CreateRagDeploymentRequest(
+                name = '',
+                cluster_id = 56,
+                hardware_instance_id = 56,
+                recipe = platform_api_python_client.models.c_serve_recipe.CServeRecipe(
+                    model = '', 
+                    is_embedding_model = True, 
+                    tensor_parallel_size = 56, 
+                    pipeline_parallel_size = 56, 
+                    block_size = 16, 
+                    swap_space = 0.0, 
+                    gpu_mem_util = 0.0, 
+                    max_num_seqs = 56, 
+                    offloading_num = 56, 
+                    use_prefix_caching = True, 
+                    use_chunked_prefill = True, 
+                    chunked_prefill_size = 56, 
+                    eager_execution = True, 
+                    num_scheduler_steps = 56, 
+                    use_flashinfer = True, 
+                    max_model_len = 128.0, 
+                    dtype = 'auto', 
+                    tokenizer = '', 
+                    spec_proposer = 'draft', 
+                    spec_draft_model = '', 
+                    spec_tokens = 56, 
+                    spec_prompt_lookup_min = 1.0, 
+                    spec_prompt_lookup_max = 1.0, 
+                    seed = 56, ),
+                centml_api_key = '',
+        )
+        """
+
+    def testCreateRagDeploymentRequest(self):
+        """Test CreateRagDeploymentRequest"""
+        # inst_req_only = self.make_instance(include_optional=False)
+        # inst_req_and_optional = self.make_instance(include_optional=True)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/test_create_rag_deployment_response.py b/test/test_create_rag_deployment_response.py
new file mode 100644
index 0000000..1eb92c2
--- /dev/null
+++ b/test/test_create_rag_deployment_response.py
@@ -0,0 +1,56 @@
+# coding: utf-8
+
+"""
+    Platform External API
+
+    No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
+
+    The version of the OpenAPI document: 0.1.0
+    Generated by OpenAPI Generator (https://openapi-generator.tech)
+
+    Do not edit the class manually.
+"""  # noqa: E501
+
+
+import unittest
+
+from platform_api_python_client.models.create_rag_deployment_response import CreateRagDeploymentResponse
+
+class TestCreateRagDeploymentResponse(unittest.TestCase):
+    """CreateRagDeploymentResponse unit test stubs"""
+
+    def setUp(self):
+        pass
+
+    def tearDown(self):
+        pass
+
+    def make_instance(self, include_optional) -> CreateRagDeploymentResponse:
+        """Test CreateRagDeploymentResponse
+            include_optional is a boolean, when False only required
+            params are included, when True both required and
+            optional params are included """
+        # uncomment below to create an instance of `CreateRagDeploymentResponse`
+        """
+        model = CreateRagDeploymentResponse()
+        if include_optional:
+            return CreateRagDeploymentResponse(
+                id = 56,
+                created_at = datetime.datetime.strptime('2013-10-20 19:20:30.00', '%Y-%m-%d %H:%M:%S.%f'),
+                endpoint_url = ''
+            )
+        else:
+            return CreateRagDeploymentResponse(
+                id = 56,
+                created_at = datetime.datetime.strptime('2013-10-20 19:20:30.00', '%Y-%m-%d %H:%M:%S.%f'),
+                endpoint_url = '',
+        )
+        """
+
+    def testCreateRagDeploymentResponse(self):
+        """Test CreateRagDeploymentResponse"""
+        # inst_req_only = self.make_instance(include_optional=False)
+        # inst_req_and_optional = self.make_instance(include_optional=True)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/test_external_api.py b/test/test_external_api.py
index 03d8466..d0d2e29 100644
--- a/test/test_external_api.py
+++ b/test/test_external_api.py
@@ -68,6 +68,13 @@ def test_create_inference_deployment_deployments_inference_post(self) -> None:
         """
         pass
 
+    def test_create_rag_deployment_deployments_rag_post(self) -> None:
+        """Test case for create_rag_deployment_deployments_rag_post
+
+        Create Rag Deployment
+        """
+        pass
+
     def test_delete_api_key_credentials_api_key_id_delete(self) -> None:
         """Test case for delete_api_key_credentials_api_key_id_delete
 
@@ -166,6 +173,13 @@ def test_get_prebuilt_images_prebuilt_images_get(self) -> None:
         """
         pass
 
+    def test_get_rag_deployment_deployments_rag_deployment_id_get(self) -> None:
+        """Test case for get_rag_deployment_deployments_rag_deployment_id_get
+
+        Get Rag Deployment
+        """
+        pass
+
     def test_get_usage_daily_bills_get(self) -> None:
         """Test case for get_usage_daily_bills_get
 
diff --git a/test/test_get_c_serve_deployment_response.py b/test/test_get_c_serve_deployment_response.py
index 4dc2b07..03f66ab 100644
--- a/test/test_get_c_serve_deployment_response.py
+++ b/test/test_get_c_serve_deployment_response.py
@@ -39,17 +39,21 @@ def make_instance(self, include_optional) -> GetCServeDeploymentResponse:
                 is_embedding_model = True,
                 tensor_parallel_size = 56,
                 pipeline_parallel_size = 56,
-                block_size = 56,
+                block_size = 16,
                 swap_space = 0.0,
                 gpu_mem_util = 0.0,
                 max_num_seqs = 56,
-                use_prefix_caching = True,
                 offloading_num = 56,
+                use_prefix_caching = True,
+                use_chunked_prefill = True,
+                chunked_prefill_size = 56,
+                eager_execution = True,
+                num_scheduler_steps = 56,
                 use_flashinfer = True,
                 max_model_len = 128.0,
                 dtype = 'auto',
                 tokenizer = '',
-                spec_proposer = '',
+                spec_proposer = 'draft',
                 spec_draft_model = '',
                 spec_tokens = 56,
                 spec_prompt_lookup_min = 1.0,
@@ -78,17 +82,21 @@ def make_instance(self, include_optional) -> GetCServeDeploymentResponse:
                 is_embedding_model = True,
                 tensor_parallel_size = 56,
                 pipeline_parallel_size = 56,
-                block_size = 56,
+                block_size = 16,
                 swap_space = 0.0,
                 gpu_mem_util = 0.0,
                 max_num_seqs = 56,
-                use_prefix_caching = True,
                 offloading_num = 56,
+                use_prefix_caching = True,
+                use_chunked_prefill = True,
+                chunked_prefill_size = 56,
+                eager_execution = True,
+                num_scheduler_steps = 56,
                 use_flashinfer = True,
                 max_model_len = 128.0,
                 dtype = 'auto',
                 tokenizer = '',
-                spec_proposer = '',
+                spec_proposer = 'draft',
                 spec_draft_model = '',
                 spec_tokens = 56,
                 spec_prompt_lookup_min = 1.0,
diff --git a/test/test_get_c_serve_v2_deployment_response.py b/test/test_get_c_serve_v2_deployment_response.py
new file mode 100644
index 0000000..dbb5e8f
--- /dev/null
+++ b/test/test_get_c_serve_v2_deployment_response.py
@@ -0,0 +1,138 @@
+# coding: utf-8
+
+"""
+    Platform External API
+
+    No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
+
+    The version of the OpenAPI document: 0.1.0
+    Generated by OpenAPI Generator (https://openapi-generator.tech)
+
+    Do not edit the class manually.
+"""  # noqa: E501
+
+
+import unittest
+
+from platform_api_python_client.models.get_c_serve_v2_deployment_response import GetCServeV2DeploymentResponse
+
+class TestGetCServeV2DeploymentResponse(unittest.TestCase):
+    """GetCServeV2DeploymentResponse unit test stubs"""
+
+    def setUp(self):
+        pass
+
+    def tearDown(self):
+        pass
+
+    def make_instance(self, include_optional) -> GetCServeV2DeploymentResponse:
+        """Test GetCServeV2DeploymentResponse
+            include_optional is a boolean, when False only required
+            params are included, when True both required and
+            optional params are included """
+        # uncomment below to create an instance of `GetCServeV2DeploymentResponse`
+        """
+        model = GetCServeV2DeploymentResponse()
+        if include_optional:
+            return GetCServeV2DeploymentResponse(
+                model = '',
+                max_model_len = 56,
+                is_embedding_model = True,
+                tokenizer = '',
+                tensor_parallel_size = 56,
+                pipeline_parallel_size = 56,
+                gpu_mem_util = 1.337,
+                block_size = 56,
+                swap_space = 56,
+                quantization = '',
+                dtype = 'auto',
+                cache_dtype = 'auto',
+                max_num_seqs = 56,
+                eager_execution = True,
+                use_flashinfer = True,
+                offloading_num = 1.337,
+                spec_draft_model = '',
+                spec_tokens = 56,
+                spec_prompt_lookup_max = 56,
+                spec_prompt_lookup_min = 56,
+                use_prefix_caching = True,
+                use_chunked_prefill = True,
+                chunked_prefill_size = 56,
+                max_seq_len_to_capture = 56,
+                distributed_executor_backend = 'ray',
+                spec_max_batch_size = 56,
+                spec_max_seq_len = 56,
+                num_scheduler_steps = 56,
+                cluster_id = 56,
+                id = 56,
+                name = '',
+                endpoint_url = '',
+                image_url = '',
+                type = 'inference',
+                status = 'active',
+                created_at = datetime.datetime.strptime('2013-10-20 19:20:30.00', '%Y-%m-%d %H:%M:%S.%f'),
+                hardware_instance_id = 56,
+                min_scale = 56,
+                max_scale = 56,
+                endpoint_certificate_authority = '',
+                concurrency = 56,
+                env_vars = {
+                    'key' : ''
+                    }
+            )
+        else:
+            return GetCServeV2DeploymentResponse(
+                model = '',
+                max_model_len = 56,
+                is_embedding_model = True,
+                tokenizer = '',
+                tensor_parallel_size = 56,
+                pipeline_parallel_size = 56,
+                gpu_mem_util = 1.337,
+                block_size = 56,
+                swap_space = 56,
+                quantization = '',
+                dtype = 'auto',
+                cache_dtype = 'auto',
+                max_num_seqs = 56,
+                eager_execution = True,
+                use_flashinfer = True,
+                offloading_num = 1.337,
+                spec_draft_model = '',
+                spec_tokens = 56,
+                spec_prompt_lookup_max = 56,
+                spec_prompt_lookup_min = 56,
+                use_prefix_caching = True,
+                use_chunked_prefill = True,
+                chunked_prefill_size = 56,
+                max_seq_len_to_capture = 56,
+                distributed_executor_backend = 'ray',
+                spec_max_batch_size = 56,
+                spec_max_seq_len = 56,
+                num_scheduler_steps = 56,
+                cluster_id = 56,
+                id = 56,
+                name = '',
+                endpoint_url = '',
+                image_url = '',
+                type = 'inference',
+                status = 'active',
+                created_at = datetime.datetime.strptime('2013-10-20 19:20:30.00', '%Y-%m-%d %H:%M:%S.%f'),
+                hardware_instance_id = 56,
+                min_scale = 56,
+                max_scale = 56,
+                endpoint_certificate_authority = '',
+                concurrency = 56,
+                env_vars = {
+                    'key' : ''
+                    },
+        )
+        """
+
+    def testGetCServeV2DeploymentResponse(self):
+        """Test GetCServeV2DeploymentResponse"""
+        # inst_req_only = self.make_instance(include_optional=False)
+        # inst_req_and_optional = self.make_instance(include_optional=True)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/test_get_rag_deployment_response.py b/test/test_get_rag_deployment_response.py
new file mode 100644
index 0000000..b812e91
--- /dev/null
+++ b/test/test_get_rag_deployment_response.py
@@ -0,0 +1,132 @@
+# coding: utf-8
+
+"""
+    Platform External API
+
+    No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
+
+    The version of the OpenAPI document: 0.1.0
+    Generated by OpenAPI Generator (https://openapi-generator.tech)
+
+    Do not edit the class manually.
+"""  # noqa: E501
+
+
+import unittest
+
+from platform_api_python_client.models.get_rag_deployment_response import GetRagDeploymentResponse
+
+class TestGetRagDeploymentResponse(unittest.TestCase):
+    """GetRagDeploymentResponse unit test stubs"""
+
+    def setUp(self):
+        pass
+
+    def tearDown(self):
+        pass
+
+    def make_instance(self, include_optional) -> GetRagDeploymentResponse:
+        """Test GetRagDeploymentResponse
+            include_optional is a boolean, when False only required
+            params are included, when True both required and
+            optional params are included """
+        # uncomment below to create an instance of `GetRagDeploymentResponse`
+        """
+        model = GetRagDeploymentResponse()
+        if include_optional:
+            return GetRagDeploymentResponse(
+                model = '',
+                is_embedding_model = True,
+                tensor_parallel_size = 56,
+                pipeline_parallel_size = 56,
+                block_size = 16,
+                swap_space = 0.0,
+                gpu_mem_util = 0.0,
+                max_num_seqs = 56,
+                offloading_num = 56,
+                use_prefix_caching = True,
+                use_chunked_prefill = True,
+                chunked_prefill_size = 56,
+                eager_execution = True,
+                num_scheduler_steps = 56,
+                use_flashinfer = True,
+                max_model_len = 128.0,
+                dtype = 'auto',
+                tokenizer = '',
+                spec_proposer = 'draft',
+                spec_draft_model = '',
+                spec_tokens = 56,
+                spec_prompt_lookup_min = 1.0,
+                spec_prompt_lookup_max = 1.0,
+                seed = 56,
+                cluster_id = 56,
+                id = 56,
+                name = '',
+                endpoint_url = '',
+                image_url = '',
+                type = 'inference',
+                status = 'active',
+                created_at = datetime.datetime.strptime('2013-10-20 19:20:30.00', '%Y-%m-%d %H:%M:%S.%f'),
+                hardware_instance_id = 56,
+                centml_api_key = '',
+                min_scale = 56,
+                max_scale = 56,
+                endpoint_certificate_authority = '',
+                concurrency = 56,
+                env_vars = {
+                    'key' : ''
+                    }
+            )
+        else:
+            return GetRagDeploymentResponse(
+                model = '',
+                is_embedding_model = True,
+                tensor_parallel_size = 56,
+                pipeline_parallel_size = 56,
+                block_size = 16,
+                swap_space = 0.0,
+                gpu_mem_util = 0.0,
+                max_num_seqs = 56,
+                offloading_num = 56,
+                use_prefix_caching = True,
+                use_chunked_prefill = True,
+                chunked_prefill_size = 56,
+                eager_execution = True,
+                num_scheduler_steps = 56,
+                use_flashinfer = True,
+                max_model_len = 128.0,
+                dtype = 'auto',
+                tokenizer = '',
+                spec_proposer = 'draft',
+                spec_draft_model = '',
+                spec_tokens = 56,
+                spec_prompt_lookup_min = 1.0,
+                spec_prompt_lookup_max = 1.0,
+                seed = 56,
+                cluster_id = 56,
+                id = 56,
+                name = '',
+                endpoint_url = '',
+                image_url = '',
+                type = 'inference',
+                status = 'active',
+                created_at = datetime.datetime.strptime('2013-10-20 19:20:30.00', '%Y-%m-%d %H:%M:%S.%f'),
+                hardware_instance_id = 56,
+                centml_api_key = '',
+                min_scale = 56,
+                max_scale = 56,
+                endpoint_certificate_authority = '',
+                concurrency = 56,
+                env_vars = {
+                    'key' : ''
+                    },
+        )
+        """
+
+    def testGetRagDeploymentResponse(self):
+        """Test GetRagDeploymentResponse"""
+        # inst_req_only = self.make_instance(include_optional=False)
+        # inst_req_and_optional = self.make_instance(include_optional=True)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/test_list_c_serve_recipe_response.py b/test/test_list_c_serve_recipe_response.py
index 7aed9ea..5408b10 100644
--- a/test/test_list_c_serve_recipe_response.py
+++ b/test/test_list_c_serve_recipe_response.py
@@ -40,27 +40,35 @@ def make_instance(self, include_optional) -> ListCServeRecipeResponse:
                         model = '', 
                         cluster_id = 56, 
                         fastest = platform_api_python_client.models.c_serve_recipe_perf.CServeRecipePerf(
-                            recipe = platform_api_python_client.models.c_serve_recipe.CServeRecipe(
+                            recipe = platform_api_python_client.models.c_serve_v2_recipe.CServeV2Recipe(
                                 model = '', 
+                                max_model_len = 56, 
                                 is_embedding_model = True, 
+                                tokenizer = '', 
                                 tensor_parallel_size = 56, 
                                 pipeline_parallel_size = 56, 
+                                gpu_mem_util = 1.337, 
                                 block_size = 56, 
-                                swap_space = 0.0, 
-                                gpu_mem_util = 0.0, 
+                                swap_space = 56, 
+                                quantization = '', 
+                                dtype = 'auto', 
+                                cache_dtype = 'auto', 
                                 max_num_seqs = 56, 
-                                use_prefix_caching = True, 
-                                offloading_num = 56, 
+                                eager_execution = True, 
                                 use_flashinfer = True, 
-                                max_model_len = 128.0, 
-                                dtype = 'auto', 
-                                tokenizer = '', 
-                                spec_proposer = '', 
+                                offloading_num = 1.337, 
                                 spec_draft_model = '', 
                                 spec_tokens = 56, 
-                                spec_prompt_lookup_min = 1.0, 
-                                spec_prompt_lookup_max = 1.0, 
-                                seed = 56, ), 
+                                spec_prompt_lookup_max = 56, 
+                                spec_prompt_lookup_min = 56, 
+                                use_prefix_caching = True, 
+                                use_chunked_prefill = True, 
+                                chunked_prefill_size = 56, 
+                                max_seq_len_to_capture = 56, 
+                                distributed_executor_backend = 'ray', 
+                                spec_max_batch_size = 56, 
+                                spec_max_seq_len = 56, 
+                                num_scheduler_steps = 56, ), 
                             hardware_instance_id = 56, 
                             output_tp = [
                                 [
@@ -73,27 +81,35 @@ def make_instance(self, include_optional) -> ListCServeRecipeResponse:
                                     ]
                                 ], ), 
                         cheapest = platform_api_python_client.models.c_serve_recipe_perf.CServeRecipePerf(
-                            recipe = platform_api_python_client.models.c_serve_recipe.CServeRecipe(
+                            recipe = platform_api_python_client.models.c_serve_v2_recipe.CServeV2Recipe(
                                 model = '', 
+                                max_model_len = 56, 
                                 is_embedding_model = True, 
+                                tokenizer = '', 
                                 tensor_parallel_size = 56, 
                                 pipeline_parallel_size = 56, 
+                                gpu_mem_util = 1.337, 
                                 block_size = 56, 
-                                swap_space = 0.0, 
-                                gpu_mem_util = 0.0, 
+                                swap_space = 56, 
+                                quantization = '', 
+                                dtype = 'auto', 
+                                cache_dtype = 'auto', 
                                 max_num_seqs = 56, 
-                                use_prefix_caching = True, 
-                                offloading_num = 56, 
+                                eager_execution = True, 
                                 use_flashinfer = True, 
-                                max_model_len = 128.0, 
-                                dtype = 'auto', 
-                                tokenizer = '', 
-                                spec_proposer = '', 
+                                offloading_num = 1.337, 
                                 spec_draft_model = '', 
                                 spec_tokens = 56, 
-                                spec_prompt_lookup_min = 1.0, 
-                                spec_prompt_lookup_max = 1.0, 
-                                seed = 56, ), 
+                                spec_prompt_lookup_max = 56, 
+                                spec_prompt_lookup_min = 56, 
+                                use_prefix_caching = True, 
+                                use_chunked_prefill = True, 
+                                chunked_prefill_size = 56, 
+                                max_seq_len_to_capture = 56, 
+                                distributed_executor_backend = 'ray', 
+                                spec_max_batch_size = 56, 
+                                spec_max_seq_len = 56, 
+                                num_scheduler_steps = 56, ), 
                             hardware_instance_id = 56, 
                             output_tp = [
                                 [
@@ -115,27 +131,35 @@ def make_instance(self, include_optional) -> ListCServeRecipeResponse:
                         model = '', 
                         cluster_id = 56, 
                         fastest = platform_api_python_client.models.c_serve_recipe_perf.CServeRecipePerf(
-                            recipe = platform_api_python_client.models.c_serve_recipe.CServeRecipe(
+                            recipe = platform_api_python_client.models.c_serve_v2_recipe.CServeV2Recipe(
                                 model = '', 
+                                max_model_len = 56, 
                                 is_embedding_model = True, 
+                                tokenizer = '', 
                                 tensor_parallel_size = 56, 
                                 pipeline_parallel_size = 56, 
+                                gpu_mem_util = 1.337, 
                                 block_size = 56, 
-                                swap_space = 0.0, 
-                                gpu_mem_util = 0.0, 
+                                swap_space = 56, 
+                                quantization = '', 
+                                dtype = 'auto', 
+                                cache_dtype = 'auto', 
                                 max_num_seqs = 56, 
-                                use_prefix_caching = True, 
-                                offloading_num = 56, 
+                                eager_execution = True, 
                                 use_flashinfer = True, 
-                                max_model_len = 128.0, 
-                                dtype = 'auto', 
-                                tokenizer = '', 
-                                spec_proposer = '', 
+                                offloading_num = 1.337, 
                                 spec_draft_model = '', 
                                 spec_tokens = 56, 
-                                spec_prompt_lookup_min = 1.0, 
-                                spec_prompt_lookup_max = 1.0, 
-                                seed = 56, ), 
+                                spec_prompt_lookup_max = 56, 
+                                spec_prompt_lookup_min = 56, 
+                                use_prefix_caching = True, 
+                                use_chunked_prefill = True, 
+                                chunked_prefill_size = 56, 
+                                max_seq_len_to_capture = 56, 
+                                distributed_executor_backend = 'ray', 
+                                spec_max_batch_size = 56, 
+                                spec_max_seq_len = 56, 
+                                num_scheduler_steps = 56, ), 
                             hardware_instance_id = 56, 
                             output_tp = [
                                 [
@@ -148,27 +172,35 @@ def make_instance(self, include_optional) -> ListCServeRecipeResponse:
                                     ]
                                 ], ), 
                         cheapest = platform_api_python_client.models.c_serve_recipe_perf.CServeRecipePerf(
-                            recipe = platform_api_python_client.models.c_serve_recipe.CServeRecipe(
+                            recipe = platform_api_python_client.models.c_serve_v2_recipe.CServeV2Recipe(
                                 model = '', 
+                                max_model_len = 56, 
                                 is_embedding_model = True, 
+                                tokenizer = '', 
                                 tensor_parallel_size = 56, 
                                 pipeline_parallel_size = 56, 
+                                gpu_mem_util = 1.337, 
                                 block_size = 56, 
-                                swap_space = 0.0, 
-                                gpu_mem_util = 0.0, 
+                                swap_space = 56, 
+                                quantization = '', 
+                                dtype = 'auto', 
+                                cache_dtype = 'auto', 
                                 max_num_seqs = 56, 
-                                use_prefix_caching = True, 
-                                offloading_num = 56, 
+                                eager_execution = True, 
                                 use_flashinfer = True, 
-                                max_model_len = 128.0, 
-                                dtype = 'auto', 
-                                tokenizer = '', 
-                                spec_proposer = '', 
+                                offloading_num = 1.337, 
                                 spec_draft_model = '', 
                                 spec_tokens = 56, 
-                                spec_prompt_lookup_min = 1.0, 
-                                spec_prompt_lookup_max = 1.0, 
-                                seed = 56, ), 
+                                spec_prompt_lookup_max = 56, 
+                                spec_prompt_lookup_min = 56, 
+                                use_prefix_caching = True, 
+                                use_chunked_prefill = True, 
+                                chunked_prefill_size = 56, 
+                                max_seq_len_to_capture = 56, 
+                                distributed_executor_backend = 'ray', 
+                                spec_max_batch_size = 56, 
+                                spec_max_seq_len = 56, 
+                                num_scheduler_steps = 56, ), 
                             hardware_instance_id = 56, 
                             output_tp = [
                                 [
diff --git a/test/test_list_user_vault_items_response.py b/test/test_list_user_vault_items_response.py
new file mode 100644
index 0000000..171b5a0
--- /dev/null
+++ b/test/test_list_user_vault_items_response.py
@@ -0,0 +1,62 @@
+# coding: utf-8
+
+"""
+    Platform External API
+
+    No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
+
+    The version of the OpenAPI document: 0.1.0
+    Generated by OpenAPI Generator (https://openapi-generator.tech)
+
+    Do not edit the class manually.
+"""  # noqa: E501
+
+
+import unittest
+
+from platform_api_python_client.models.list_user_vault_items_response import ListUserVaultItemsResponse
+
+class TestListUserVaultItemsResponse(unittest.TestCase):
+    """ListUserVaultItemsResponse unit test stubs"""
+
+    def setUp(self):
+        pass
+
+    def tearDown(self):
+        pass
+
+    def make_instance(self, include_optional) -> ListUserVaultItemsResponse:
+        """Test ListUserVaultItemsResponse
+            include_optional is a boolean, when False only required
+            params are included, when True both required and
+            optional params are included """
+        # uncomment below to create an instance of `ListUserVaultItemsResponse`
+        """
+        model = ListUserVaultItemsResponse()
+        if include_optional:
+            return ListUserVaultItemsResponse(
+                results = [
+                    platform_api_python_client.models.user_vault_item.UserVaultItem(
+                        type = 'env_vars', 
+                        key = '', 
+                        value = '', )
+                    ]
+            )
+        else:
+            return ListUserVaultItemsResponse(
+                results = [
+                    platform_api_python_client.models.user_vault_item.UserVaultItem(
+                        type = 'env_vars', 
+                        key = '', 
+                        value = '', )
+                    ],
+        )
+        """
+
+    def testListUserVaultItemsResponse(self):
+        """Test ListUserVaultItemsResponse"""
+        # inst_req_only = self.make_instance(include_optional=False)
+        # inst_req_and_optional = self.make_instance(include_optional=True)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/test_user_support_email_request.py b/test/test_user_support_email_request.py
index fa1a16c..de221ee 100644
--- a/test/test_user_support_email_request.py
+++ b/test/test_user_support_email_request.py
@@ -36,12 +36,14 @@ def make_instance(self, include_optional) -> UserSupportEmailRequest:
         if include_optional:
             return UserSupportEmailRequest(
                 message = '',
-                subject = ''
+                subject = '',
+                send_to_sales = True
             )
         else:
             return UserSupportEmailRequest(
                 message = '',
                 subject = '',
+                send_to_sales = True,
         )
         """
 
diff --git a/test/test_user_vault_item_input.py b/test/test_user_vault_item_input.py
new file mode 100644
index 0000000..ec65d24
--- /dev/null
+++ b/test/test_user_vault_item_input.py
@@ -0,0 +1,55 @@
+# coding: utf-8
+
+"""
+    Platform External API
+
+    No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
+
+    The version of the OpenAPI document: 0.1.0
+    Generated by OpenAPI Generator (https://openapi-generator.tech)
+
+    Do not edit the class manually.
+"""  # noqa: E501
+
+
+import unittest
+
+from platform_api_python_client.models.user_vault_item_input import UserVaultItemInput
+
+class TestUserVaultItemInput(unittest.TestCase):
+    """UserVaultItemInput unit test stubs"""
+
+    def setUp(self):
+        pass
+
+    def tearDown(self):
+        pass
+
+    def make_instance(self, include_optional) -> UserVaultItemInput:
+        """Test UserVaultItemInput
+            include_optional is a boolean, when False only required
+            params are included, when True both required and
+            optional params are included """
+        # uncomment below to create an instance of `UserVaultItemInput`
+        """
+        model = UserVaultItemInput()
+        if include_optional:
+            return UserVaultItemInput(
+                type = 'env_vars',
+                key = '',
+                value = ''
+            )
+        else:
+            return UserVaultItemInput(
+                type = 'env_vars',
+                key = '',
+        )
+        """
+
+    def testUserVaultItemInput(self):
+        """Test UserVaultItemInput"""
+        # inst_req_only = self.make_instance(include_optional=False)
+        # inst_req_and_optional = self.make_instance(include_optional=True)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/test_user_vault_item_output.py b/test/test_user_vault_item_output.py
new file mode 100644
index 0000000..3653a72
--- /dev/null
+++ b/test/test_user_vault_item_output.py
@@ -0,0 +1,56 @@
+# coding: utf-8
+
+"""
+    Platform External API
+
+    No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
+
+    The version of the OpenAPI document: 0.1.0
+    Generated by OpenAPI Generator (https://openapi-generator.tech)
+
+    Do not edit the class manually.
+"""  # noqa: E501
+
+
+import unittest
+
+from platform_api_python_client.models.user_vault_item_output import UserVaultItemOutput
+
+class TestUserVaultItemOutput(unittest.TestCase):
+    """UserVaultItemOutput unit test stubs"""
+
+    def setUp(self):
+        pass
+
+    def tearDown(self):
+        pass
+
+    def make_instance(self, include_optional) -> UserVaultItemOutput:
+        """Test UserVaultItemOutput
+            include_optional is a boolean, when False only required
+            params are included, when True both required and
+            optional params are included """
+        # uncomment below to create an instance of `UserVaultItemOutput`
+        """
+        model = UserVaultItemOutput()
+        if include_optional:
+            return UserVaultItemOutput(
+                type = 'env_vars',
+                key = '',
+                value = ''
+            )
+        else:
+            return UserVaultItemOutput(
+                type = 'env_vars',
+                key = '',
+                value = '',
+        )
+        """
+
+    def testUserVaultItemOutput(self):
+        """Test UserVaultItemOutput"""
+        # inst_req_only = self.make_instance(include_optional=False)
+        # inst_req_and_optional = self.make_instance(include_optional=True)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/test_user_vault_type.py b/test/test_user_vault_type.py
new file mode 100644
index 0000000..108bf37
--- /dev/null
+++ b/test/test_user_vault_type.py
@@ -0,0 +1,33 @@
+# coding: utf-8
+
+"""
+    Platform External API
+
+    No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
+
+    The version of the OpenAPI document: 0.1.0
+    Generated by OpenAPI Generator (https://openapi-generator.tech)
+
+    Do not edit the class manually.
+"""  # noqa: E501
+
+
+import unittest
+
+from platform_api_python_client.models.user_vault_type import UserVaultType
+
+class TestUserVaultType(unittest.TestCase):
+    """UserVaultType unit test stubs"""
+
+    def setUp(self):
+        pass
+
+    def tearDown(self):
+        pass
+
+    def testUserVaultType(self):
+        """Test UserVaultType"""
+        # inst = UserVaultType()
+
+if __name__ == '__main__':
+    unittest.main()