From dbf3df02e1e7c55309722edaec19da367572d3c7 Mon Sep 17 00:00:00 2001
From: Tharsanan1 <tharsanan.15@cse.mrt.ac.lk>
Date: Thu, 19 Sep 2024 18:13:24 +0530
Subject: [PATCH 1/2] Add apk conf for airl

---
 .../dp/airatelimitpolicy_controller.go        |  3 +
 .../ballerina/APIClient.bal                   | 37 ++++++++
 .../ballerina/ConfigGenreatorClient.bal       |  4 +
 .../ballerina/DeployerClient.bal              | 49 ++++++++++
 .../ballerina/K8sClient.bal                   | 25 +++++
 .../modules/model/AIRatelimitPolicy.bal       | 47 ++++++++++
 .../ballerina/modules/model/APIArtifact.bal   |  1 +
 .../ballerina/modules/model/RateLimit.bal     |  7 ++
 .../ballerina/resources/apk-conf-schema.yaml  | 56 ++++++++++++
 .../ballerina/types.bal                       | 34 +++++++
 .../config-deployer/conf/apk-schema.json      | 91 +++++++++++++++++++
 11 files changed, 354 insertions(+)
 create mode 100644 runtime/config-deployer-service/ballerina/modules/model/AIRatelimitPolicy.bal

diff --git a/common-controller/internal/operator/controllers/dp/airatelimitpolicy_controller.go b/common-controller/internal/operator/controllers/dp/airatelimitpolicy_controller.go
index 4791376a3..98789f89a 100644
--- a/common-controller/internal/operator/controllers/dp/airatelimitpolicy_controller.go
+++ b/common-controller/internal/operator/controllers/dp/airatelimitpolicy_controller.go
@@ -105,6 +105,9 @@ func (r *AIRateLimitPolicyReconciler) Reconcile(ctx context.Context, req ctrl.Re
 		xds.UpdateRateLimiterPolicies(conf.CommonController.Server.Label)
 	} else {
 		loggers.LoggerAPKOperator.Infof("ratelimits found")
+		if ratelimitPolicy.Spec.Override == nil {
+			ratelimitPolicy.Spec.Override = ratelimitPolicy.Spec.Default
+		}
 		if ratelimitPolicy.Spec.TargetRef.Name != "" {
 			r.ods.AddorUpdateAIRatelimitToStore(ratelimitKey, ratelimitPolicy.Spec)
 			xds.UpdateRateLimitXDSCacheForAIRatelimitPolicies(r.ods.GetAIRatelimitPolicySpecs())
diff --git a/runtime/config-deployer-service/ballerina/APIClient.bal b/runtime/config-deployer-service/ballerina/APIClient.bal
index ec7ea4012..a5ecaa447 100644
--- a/runtime/config-deployer-service/ballerina/APIClient.bal
+++ b/runtime/config-deployer-service/ballerina/APIClient.bal
@@ -232,6 +232,11 @@ public class APIClient {
                     serviceEntry: false,
                     url: self.constructURlFromService(sandboxEndpointConfig.endpoint)
                 };
+                AIRatelimit? aiRatelimit = sandboxEndpointConfig.aiRatelimit;
+                if aiRatelimit is AIRatelimit && aiRatelimit.enabled {
+                    model:AIRateLimitPolicy airl = self.generateAIRateLimitPolicyCR(apkConf, aiRatelimit.token, aiRatelimit.request, backendService.metadata.name, organization);
+                    apiArtifact.aiRatelimitPolicies[airl.metadata.name] = airl;
+                }
             }
         }
         if (endpointType == () || endpointType == PRODUCTION_TYPE) {
@@ -246,6 +251,11 @@ public class APIClient {
                     serviceEntry: false,
                     url: self.constructURlFromService(productionEndpointConfig.endpoint)
                 };
+                AIRatelimit? aiRatelimit = productionEndpointConfig.aiRatelimit;
+                if aiRatelimit is AIRatelimit && aiRatelimit.enabled {
+                    model:AIRateLimitPolicy airl = self.generateAIRateLimitPolicyCR(apkConf, aiRatelimit.token, aiRatelimit.request, backendService.metadata.name, organization);
+                    apiArtifact.aiRatelimitPolicies[airl.metadata.name] = airl;
+                }
             }
         }
         return endpointIdMap;
@@ -1506,6 +1516,29 @@ public class APIClient {
         return rateLimitPolicyCR;
     }
 
+    public isolated function generateAIRateLimitPolicyCR(APKConf apkConf, TokenAIRL tokenAIRL, RequestAIRL requestAIRL, string targetRefName, commons:Organization organization) returns model:AIRateLimitPolicy {
+        string apiIdentifierHash = crypto:hashSha1((apkConf.name + apkConf.version).toBytes()).toBase16();
+        model:AIRateLimitPolicy aiRateLimitPolicyCR = {
+            metadata: {
+                name: self.retrieveAIRateLimitPolicyName(apiIdentifierHash, targetRefName),
+                labels: self.getLabels(apkConf, organization)
+            },
+            spec: {
+                default: {
+                    organization: organization.name,
+                    requestCount: {unit: requestAIRL.unit, requestsPerUnit: requestAIRL.requestLimit},
+                    tokenCount: {unit: tokenAIRL.unit, requestTokenCount: tokenAIRL.promptLimit, responseTokenCount: tokenAIRL.completionLimit, totalTokenCount: tokenAIRL.totalLimit}
+                },
+                targetRef: {
+                    group: "dp.wso2.com",
+                    kind: "Backend",
+                    name: targetRefName
+                }
+            }
+        };
+        return aiRateLimitPolicyCR;
+    }
+
     isolated function retrieveRateLimitData(RateLimit rateLimit, commons:Organization organization) returns model:RateLimitData {
         model:RateLimitData rateLimitData = {
             api: {
@@ -1933,6 +1966,10 @@ public class APIClient {
         }
     }
 
+    public isolated function retrieveAIRateLimitPolicyName(string apiID, string targetRef) returns string {
+        return "airl-" + apiID + "-" + targetRef;        
+    }
+
     private isolated function validateAndRetrieveAPKConfiguration(json apkconfJson) returns APKConf|commons:APKError? {
         do {
             runtimeapi:APKConfValidationResponse validationResponse = check apkConfValidator.validate(apkconfJson.toJsonString());
diff --git a/runtime/config-deployer-service/ballerina/ConfigGenreatorClient.bal b/runtime/config-deployer-service/ballerina/ConfigGenreatorClient.bal
index 108e05829..8df53c519 100644
--- a/runtime/config-deployer-service/ballerina/ConfigGenreatorClient.bal
+++ b/runtime/config-deployer-service/ballerina/ConfigGenreatorClient.bal
@@ -234,6 +234,10 @@ on fail var e {
             string yamlString = check self.convertJsonToYaml(rateLimitPolicy.toJsonString());
             _ = check self.storeFile(yamlString, rateLimitPolicy.metadata.name, zipDir);
         }
+        foreach model:AIRateLimitPolicy airateLimitPolicy in apiArtifact.aiRatelimitPolicies {
+            string yamlString = check self.convertJsonToYaml(airateLimitPolicy.toJsonString());
+            _ = check self.storeFile(yamlString, airateLimitPolicy.metadata.name, zipDir);
+        }
         foreach model:APIPolicy apiPolicy in apiArtifact.apiPolicies {
             string yamlString = check self.convertJsonToYaml(apiPolicy.toJsonString());
             _ = check self.storeFile(yamlString, apiPolicy.metadata.name, zipDir);
diff --git a/runtime/config-deployer-service/ballerina/DeployerClient.bal b/runtime/config-deployer-service/ballerina/DeployerClient.bal
index 802083886..27fb38cd2 100644
--- a/runtime/config-deployer-service/ballerina/DeployerClient.bal
+++ b/runtime/config-deployer-service/ballerina/DeployerClient.bal
@@ -102,6 +102,7 @@ public class DeployerClient {
                 _ = check self.deleteScopeCrsForAPI(existingAPI, <string>apiArtifact?.organization);
                 check self.deleteBackends(existingAPI, <string>apiArtifact?.organization);
                 check self.deleteRateLimitPolicyCRs(existingAPI, <string>apiArtifact?.organization);
+                check self.deleteAIRateLimitPolicyCRs(existingAPI, <string>apiArtifact?.organization);
                 check self.deleteAPIPolicyCRs(existingAPI, <string>apiArtifact?.organization);
                 check self.deleteInterceptorServiceCRs(existingAPI, <string>apiArtifact?.organization);
                 check self.deleteBackendJWTConfig(existingAPI, <string>apiArtifact?.organization);
@@ -121,6 +122,7 @@ public class DeployerClient {
                     check self.deployBackendServices(apiArtifact, ownerReference);
                     check self.deployAuthenticationCRs(apiArtifact, ownerReference);
                     check self.deployRateLimitPolicyCRs(apiArtifact, ownerReference);
+                    check self.deployAIRateLimitPolicyCRs(apiArtifact, ownerReference);
                     check self.deployInterceptorServiceCRs(apiArtifact, ownerReference);
                     check self.deployBackendJWTConfigs(apiArtifact, ownerReference);
                     check self.deployAPIPolicyCRs(apiArtifact, ownerReference);
@@ -660,6 +662,30 @@ public class DeployerClient {
         }
     }
 
+    private isolated function deployAIRateLimitPolicyCRs(model:APIArtifact apiArtifact, model:OwnerReference ownerReference) returns error? {
+        foreach model:AIRateLimitPolicy rateLimitPolicy in apiArtifact.aiRatelimitPolicies {
+            rateLimitPolicy.metadata.ownerReferences = [ownerReference];
+            http:Response deployRateLimitPolicyResult = check deployAIRateLimitPolicyCR(rateLimitPolicy, <string>apiArtifact?.namespace);
+            if deployRateLimitPolicyResult.statusCode == http:STATUS_CREATED {
+                log:printDebug("Deployed AIRateLimitPolicy Successfully" + rateLimitPolicy.toString());
+            } else if deployRateLimitPolicyResult.statusCode == http:STATUS_CONFLICT {
+                log:printDebug("AIRateLimitPolicy already exists" + rateLimitPolicy.toString());
+                model:AIRateLimitPolicy rateLimitPolicyFromK8s = check getAIRateLimitPolicyCR(rateLimitPolicy.metadata.name, <string>apiArtifact?.namespace);
+                rateLimitPolicy.metadata.resourceVersion = rateLimitPolicyFromK8s.metadata.resourceVersion;
+                http:Response rateLimitPolicyCR = check updateAIRateLimitPolicyCR(rateLimitPolicy, <string>apiArtifact?.namespace);
+                if rateLimitPolicyCR.statusCode != http:STATUS_OK {
+                    json responsePayLoad = check rateLimitPolicyCR.getJsonPayload();
+                    model:Status statusResponse = check responsePayLoad.cloneWithType(model:Status);
+                    check self.handleK8sTimeout(statusResponse);
+                }
+            } else {
+                json responsePayLoad = check deployRateLimitPolicyResult.getJsonPayload();
+                model:Status statusResponse = check responsePayLoad.cloneWithType(model:Status);
+                check self.handleK8sTimeout(statusResponse);
+            }
+        }
+    }
+
     private isolated function deleteRateLimitPolicyCRs(model:API api, string organization) returns commons:APKError? {
         do {
             model:RateLimitPolicyList|http:ClientError rateLimitPolicyCrListResponse = check getRateLimitPolicyCRsForAPI(api.spec.apiName, api.spec.apiVersion, <string>api.metadata?.namespace, organization);
@@ -684,6 +710,29 @@ public class DeployerClient {
         }
     }
 
+    private isolated function deleteAIRateLimitPolicyCRs(model:API api, string organization) returns commons:APKError? {
+        do {
+            model:AIRateLimitPolicyList|http:ClientError aiRateLimitPolicyCrListResponse = check getAIRateLimitPolicyCRsForAPI(api.spec.apiName, api.spec.apiVersion, <string>api.metadata?.namespace, organization);
+            if aiRateLimitPolicyCrListResponse is model:AIRateLimitPolicyList {
+                foreach model:AIRateLimitPolicy item in aiRateLimitPolicyCrListResponse.items {
+                    http:Response|http:ClientError rateLimitPolicyCRDeletionResponse = deleteAIRateLimitPolicyCR(item.metadata.name, <string>item.metadata?.namespace);
+                    if rateLimitPolicyCRDeletionResponse is http:Response {
+                        if rateLimitPolicyCRDeletionResponse.statusCode != http:STATUS_OK {
+                            json responsePayLoad = check rateLimitPolicyCRDeletionResponse.getJsonPayload();
+                            model:Status statusResponse = check responsePayLoad.cloneWithType(model:Status);
+                            check self.handleK8sTimeout(statusResponse);
+                        }
+                    } else {
+                        log:printError("Error occured while deleting AI rate limit policy");
+                    }
+                }
+                return;
+            }
+        } on fail var e {
+            log:printError("Error occured deleting AI rate limit policy", e);
+            return e909022("Error occured deleting AI rate limit policy", e);
+        }
+    }
     private isolated function deleteAPIPolicyCRs(model:API api, string organization) returns commons:APKError? {
         do {
             model:APIPolicyList|http:ClientError apiPolicyCrListResponse = check getAPIPolicyCRsForAPI(api.spec.apiName, api.spec.apiVersion, <string>api.metadata?.namespace, organization);
diff --git a/runtime/config-deployer-service/ballerina/K8sClient.bal b/runtime/config-deployer-service/ballerina/K8sClient.bal
index ed1219c0d..e97309129 100644
--- a/runtime/config-deployer-service/ballerina/K8sClient.bal
+++ b/runtime/config-deployer-service/ballerina/K8sClient.bal
@@ -251,26 +251,51 @@ isolated function deployRateLimitPolicyCR(model:RateLimitPolicy rateLimitPolicy,
     return k8sApiServerEp->post(endpoint, rateLimitPolicy, targetType = http:Response);
 }
 
+isolated function deployAIRateLimitPolicyCR(model:AIRateLimitPolicy rateLimitPolicy, string namespace) returns http:Response|http:ClientError {
+    string endpoint = "/apis/dp.wso2.com/v1alpha3/namespaces/" + namespace + "/airatelimitpolicies";
+    return k8sApiServerEp->post(endpoint, rateLimitPolicy, targetType = http:Response);
+}
+
 isolated function updateRateLimitPolicyCR(model:RateLimitPolicy rateLimitPolicy, string namespace) returns http:Response|http:ClientError {
     string endpoint = "/apis/dp.wso2.com/v1alpha1/namespaces/" + namespace + "/ratelimitpolicies/" + rateLimitPolicy.metadata.name;
     return k8sApiServerEp->put(endpoint, rateLimitPolicy, targetType = http:Response);
 }
 
+isolated function updateAIRateLimitPolicyCR(model:AIRateLimitPolicy rateLimitPolicy, string namespace) returns http:Response|http:ClientError {
+    string endpoint = "/apis/dp.wso2.com/v1alpha3/namespaces/" + namespace + "/airatelimitpolicies/" + rateLimitPolicy.metadata.name;
+    return k8sApiServerEp->put(endpoint, rateLimitPolicy, targetType = http:Response);
+}
+
 isolated function getRateLimitPolicyCR(string name, string namespace) returns model:RateLimitPolicy|http:ClientError {
     string endpoint = "/apis/dp.wso2.com/v1alpha1/namespaces/" + namespace + "/ratelimitpolicies/" + name;
     return k8sApiServerEp->get(endpoint, targetType = model:RateLimitPolicy);
 }
 
+isolated function getAIRateLimitPolicyCR(string name, string namespace) returns model:AIRateLimitPolicy|http:ClientError {
+    string endpoint = "/apis/dp.wso2.com/v1alpha3/namespaces/" + namespace + "/airatelimitpolicies/" + name;
+    return k8sApiServerEp->get(endpoint, targetType = model:AIRateLimitPolicy);
+}
+
 isolated function deleteRateLimitPolicyCR(string name, string namespace) returns http:Response|http:ClientError {
     string endpoint = "/apis/dp.wso2.com/v1alpha1/namespaces/" + namespace + "/ratelimitpolicies/" + name;
     return k8sApiServerEp->delete(endpoint, targetType = http:Response);
 }
 
+isolated function deleteAIRateLimitPolicyCR(string name, string namespace) returns http:Response|http:ClientError {
+    string endpoint = "/apis/dp.wso2.com/v1alpha3/namespaces/" + namespace + "/airatelimitpolicies/" + name;
+    return k8sApiServerEp->delete(endpoint, targetType = http:Response);
+}
+
 isolated function getRateLimitPolicyCRsForAPI(string apiName, string apiVersion, string namespace, string organization) returns model:RateLimitPolicyList|http:ClientError|error {
     string endpoint = "/apis/dp.wso2.com/v1alpha1/namespaces/" + namespace + "/ratelimitpolicies?labelSelector=" + check generateUrlEncodedLabelSelector(apiName, apiVersion, organization);
     return k8sApiServerEp->get(endpoint, targetType = model:RateLimitPolicyList);
 }
 
+isolated function getAIRateLimitPolicyCRsForAPI(string apiName, string apiVersion, string namespace, string organization) returns model:AIRateLimitPolicyList|http:ClientError|error {
+    string endpoint = "/apis/dp.wso2.com/v1alpha3/namespaces/" + namespace + "/airatelimitpolicies?labelSelector=" + check generateUrlEncodedLabelSelector(apiName, apiVersion, organization);
+    return k8sApiServerEp->get(endpoint, targetType = model:AIRateLimitPolicyList);
+}
+
 isolated function deployAPIPolicyCR(model:APIPolicy apiPolicy, string namespace) returns http:Response|http:ClientError {
     string endpoint = "/apis/dp.wso2.com/v1alpha3/namespaces/" + namespace + "/apipolicies";
     return k8sApiServerEp->post(endpoint, apiPolicy, targetType = http:Response);
diff --git a/runtime/config-deployer-service/ballerina/modules/model/AIRatelimitPolicy.bal b/runtime/config-deployer-service/ballerina/modules/model/AIRatelimitPolicy.bal
new file mode 100644
index 000000000..6c7a0f8f4
--- /dev/null
+++ b/runtime/config-deployer-service/ballerina/modules/model/AIRatelimitPolicy.bal
@@ -0,0 +1,47 @@
+//
+// Copyright (c) 2024, WSO2 LLC. (http://www.wso2.com).
+//
+// WSO2 LLC. licenses this file to you under the Apache License,
+// Version 2.0 (the "License"); you may not use this file except
+// in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+//
+public type AIRateLimitPolicy record {
+    string apiVersion = "dp.wso2.com/v1alpha3";
+    string kind = "AIRateLimitPolicy";
+    Metadata metadata;
+    AIRateLimitPolicySpec spec;
+};
+
+public type AIRateLimitPolicySpec record {|
+    AIRateLimitPolicyData override?;
+    AIRateLimitPolicyData default?;
+    TargetRef targetRef;
+|};
+
+public type AIRateLimitPolicyData record {
+    string organization;
+    TokenAIRL tokenCount;
+    RequestAIRL requestCount;
+};
+
+public type TokenAIRL record {
+    string unit;
+    int requestTokenCount;
+    int responseTokenCount;
+    int totalTokenCount;
+};
+
+public type RequestAIRL record {
+    string unit;
+    int requestsPerUnit;
+};
diff --git a/runtime/config-deployer-service/ballerina/modules/model/APIArtifact.bal b/runtime/config-deployer-service/ballerina/modules/model/APIArtifact.bal
index b40f95448..2e18b2add 100644
--- a/runtime/config-deployer-service/ballerina/modules/model/APIArtifact.bal
+++ b/runtime/config-deployer-service/ballerina/modules/model/APIArtifact.bal
@@ -15,6 +15,7 @@ public type APIArtifact record {|
     map<Authentication> authenticationMap = {};
     map<Scope> scopes = {};
     map<RateLimitPolicy> rateLimitPolicies = {};
+    map<AIRateLimitPolicy> aiRatelimitPolicies = {};
     map<APIPolicy> apiPolicies = {};
     map<InterceptorService> interceptorServices = {};
     boolean sandboxEndpointAvailable = false;
diff --git a/runtime/config-deployer-service/ballerina/modules/model/RateLimit.bal b/runtime/config-deployer-service/ballerina/modules/model/RateLimit.bal
index a24cffbf2..8cf969c21 100644
--- a/runtime/config-deployer-service/ballerina/modules/model/RateLimit.bal
+++ b/runtime/config-deployer-service/ballerina/modules/model/RateLimit.bal
@@ -43,3 +43,10 @@ public type RateLimitPolicyList record {
     ListMeta metadata;
     RateLimitPolicy[] items;
 };
+
+public type AIRateLimitPolicyList record {
+    string apiVersion = "dp.wso2.com/v1alpha3";
+    string kind = "AIRateLimitPolicyList";
+    ListMeta metadata;
+    AIRateLimitPolicy[] items;
+};
diff --git a/runtime/config-deployer-service/ballerina/resources/apk-conf-schema.yaml b/runtime/config-deployer-service/ballerina/resources/apk-conf-schema.yaml
index afe8fc75d..248af0f7c 100644
--- a/runtime/config-deployer-service/ballerina/resources/apk-conf-schema.yaml
+++ b/runtime/config-deployer-service/ballerina/resources/apk-conf-schema.yaml
@@ -360,6 +360,8 @@ components:
           $ref: "#/components/schemas/Certificate"
         resiliency:
           $ref: "#/components/schemas/Resiliency"
+        aiRatelimit:
+          $ref: "#/components/schemas/AIRatelimit"
       additionalProperties: false
     Certificate:
       type: object
@@ -599,3 +601,57 @@ components:
           type: string
           default: string
       additionalProperties: false
+    AIRatelimit:
+      type: object
+      required:
+        - enabled
+        - token
+        - request
+      properties:
+        enabled:
+          type: boolean
+          default: true
+        token:
+          $ref: "#/components/schemas/TokenAIRL"
+        request:
+          $ref: "#/components/schemas/RequestAIRL"
+    TokenAIRL:
+      type: object
+      required:
+        - promptLimit
+        - completionLimit
+        - totalLimit
+        - unit
+      properties:
+        promptLimit:
+          type: integer
+          default: 0
+        completionLimit:
+          type: integer
+          default: 0
+        totalLimit:
+          type: integer
+          default": 0
+        unit:
+          type: string
+          default: Minute
+          enum:
+            - Minute
+            - Hour
+            - Day
+    RequestAIRL:
+      type: object
+      required:
+        - requestLimit
+        - unit
+      properties:
+        requestLimit:
+          type: integer
+          default: 0
+        unit:
+          type: string
+          default: Minute
+          enum:
+            - Minute
+            - Hour
+            - Day
diff --git a/runtime/config-deployer-service/ballerina/types.bal b/runtime/config-deployer-service/ballerina/types.bal
index dee63aa89..ed2abbccd 100644
--- a/runtime/config-deployer-service/ballerina/types.bal
+++ b/runtime/config-deployer-service/ballerina/types.bal
@@ -234,6 +234,38 @@ public type Resiliency record {
     RetryPolicy retryPolicy?;
 };
 
+# Configuration of AIRatelimit settings.
+#
+# + token - Configuration for the CircuitBreaker.
+# + request - Configuration for the Timeout.
+public type AIRatelimit record {
+    boolean enabled;
+    TokenAIRL token;
+    RequestAIRL request;
+};
+
+# Configuration for Token AI rate limit settings.
+#
+# + promptLimit - Limit for prompts within the specified unit.
+# + completionLimit - Limit for completions within the specified unit.
+# + totalLimit - Total limit combining prompt and completion counts.
+# + unit - The time unit for the rate limits (Minute, Hour, Day).
+public type TokenAIRL record {
+    int promptLimit;
+    int completionLimit;
+    int totalLimit;
+    string unit;
+};
+
+# Configuration for Request AI rate limit settings.
+#
+# + requestLimit - Limit for requests within the specified unit.
+# + unit - The time unit for the request limits (Minute, Hour, Day).
+public type RequestAIRL record {
+    int requestLimit;
+    string unit;
+};
+
 # Configuration of CircuitBreaker settings.
 #
 # + maxConnectionPools - The maximum number of connection pools allowed.
@@ -267,11 +299,13 @@ public type EndpointConfigurations record {
 # + endpointSecurity - The security configuration for the endpoint.
 # + certificate - The certificate configuration for the endpoint.
 # + resiliency - The resiliency configuration for the endpoint.
+# + AIRatelimit - The AIRatelimit configuration for the AI ratelimit.
 public type EndpointConfiguration record {
     string|K8sService endpoint;
     EndpointSecurity endpointSecurity?;
     Certificate certificate?;
     Resiliency resiliency?;
+    AIRatelimit aiRatelimit?;
 };
 
 # Configuration of OAuth2 Authentication type.
diff --git a/runtime/config-deployer-service/docker/config-deployer/conf/apk-schema.json b/runtime/config-deployer-service/docker/config-deployer/conf/apk-schema.json
index 19f723ee2..9ab31107d 100644
--- a/runtime/config-deployer-service/docker/config-deployer/conf/apk-schema.json
+++ b/runtime/config-deployer-service/docker/config-deployer/conf/apk-schema.json
@@ -532,6 +532,10 @@
         "resiliency": {
           "$ref": "#/schemas/Resiliency",
           "description": "Resiliency configuration for the API endpoint."
+        },
+        "aiRatelimit": {
+          "$ref": "#/schemas/AIRatelimit",
+          "description": "AI ratelimit configuration for the API endpoint."
         }
       },
       "additionalProperties": false
@@ -632,6 +636,93 @@
       },
       "additionalProperties": false
     },
+    "AIRatelimit": {
+      "type": "object",
+      "required": [
+        "enabled",
+        "token",
+        "request"
+      ],
+      "description": "Endpoint AI ratelimit related configurations of the API",
+      "properties": {
+        "enabled" : {
+          "type" : "boolean",
+          "default": true,
+          "description": "States whether the AI ratelimit is turned on or not"
+        },
+        "token": {
+          "$ref": "#/schemas/TokenAIRL"
+        },
+        "request": {
+          "$ref": "#/schemas/RequestAIRL"
+        }
+      },
+      "additionalProperties": false
+    },
+    "TokenAIRL": {
+      "type": "object",
+      "required": [
+        "promptLimit",
+        "completionLimit",
+        "totalLimit",
+        "unit"
+      ],
+      "description": "Token limits configuration for AI rate limiting",
+      "properties": {
+        "promptLimit": {
+          "type": "integer",
+          "default": 0,
+          "description": "Limit for prompts within the specified unit"
+        },
+        "completionLimit": {
+          "type": "integer",
+          "default": 0,
+          "description": "Limit for completions within the specified unit"
+        },
+        "totalLimit": {
+          "type": "integer",
+          "default": 0,
+          "description": "Total limit combining prompt and completion counts"
+        },
+        "unit": {
+          "type": "string",
+          "default": "Minute",
+          "enum": [
+            "Minute",
+            "Hour",
+            "Day"
+          ],
+          "description": "The time unit for the rate limits"
+        }
+      },
+      "additionalProperties": false
+    },
+    "RequestAIRL": {
+      "type": "object",
+      "required": [
+        "requestLimit",
+        "unit"
+      ],
+      "description": "Request limits configuration for AI rate limiting",
+      "properties": {
+        "requestLimit": {
+          "type": "integer",
+          "default": 0,
+          "description": "Limit for requests within the specified unit"
+        },
+        "unit": {
+          "type": "string",
+          "default": "Minute",
+          "enum": [
+            "Minute",
+            "Hour",
+            "Day"
+          ],
+          "description": "The time unit for the request limits"
+        }
+      },
+      "additionalProperties": false
+    },
     "CircuitBreaker": {
       "type": "object",
       "properties": {

From bcacdcd019dbfa7981d3cd905fe4127f298762ec Mon Sep 17 00:00:00 2001
From: Tharsanan1 <tharsanan.15@cse.mrt.ac.lk>
Date: Fri, 20 Sep 2024 08:28:53 +0530
Subject: [PATCH 2/2] Add integration test

---
 test/cucumber-tests/CRs/artifacts.yaml        | 22 ++++++
 .../apk-confs/backend_based_airl_conf.yaml    | 41 ++++++++++
 .../api/APIBackendBasedAIRatelimit.feature    | 75 ++++++++++++++++---
 3 files changed, 127 insertions(+), 11 deletions(-)
 create mode 100644 test/cucumber-tests/src/test/resources/artifacts/apk-confs/backend_based_airl_conf.yaml

diff --git a/test/cucumber-tests/CRs/artifacts.yaml b/test/cucumber-tests/CRs/artifacts.yaml
index 56e41d317..2bbeb801a 100644
--- a/test/cucumber-tests/CRs/artifacts.yaml
+++ b/test/cucumber-tests/CRs/artifacts.yaml
@@ -1777,3 +1777,25 @@ spec:
               cpu: 10m
 ---
 
+apiVersion: dp.wso2.com/v1alpha3
+kind: AIProvider
+metadata:
+  name: llm-provider-1
+  namespace: apk-integration-test
+spec:
+  providerName : "AzureAI"
+  providerAPIVersion : "2024-06-01"
+  organization : "default"
+  model:
+    in: "Body"
+    value: "model"
+  rateLimitFields:
+    promptTokens:
+      in: "Body"
+      value: "usage.prompt_tokens"
+    completionToken:
+      in: "Body"
+      value: "usage.completion_tokens"
+    totalToken:
+      in: "Body"
+      value: "usage.total_tokens"
diff --git a/test/cucumber-tests/src/test/resources/artifacts/apk-confs/backend_based_airl_conf.yaml b/test/cucumber-tests/src/test/resources/artifacts/apk-confs/backend_based_airl_conf.yaml
new file mode 100644
index 000000000..ffa4a2078
--- /dev/null
+++ b/test/cucumber-tests/src/test/resources/artifacts/apk-confs/backend_based_airl_conf.yaml
@@ -0,0 +1,41 @@
+---
+name: "BackendBasedAIRL"
+basePath: "/backend-based-airl"
+id: "backend-based-airl"
+version: "1.0.0"
+type: "REST"
+defaultVersion: false
+subscriptionValidation: false
+aiProvider:
+  name: llm-provider-1
+  apiVersion: “2024.06.01”
+endpointConfigurations:
+  production:
+    endpoint: "http://llm-service:80"
+    aiRatelimit: 
+      enabled: true
+      token:
+        promptLimit: 5000
+        completionLimit: 10000
+        totalLimit: 15000
+        unit: Minute
+      request:
+        requestLimit: 6000
+        unit: Minute
+operations:
+- target: "/employee"
+  verb: "GET"
+  secured: true
+  scopes: []
+- target: "/employee"
+  verb: "POST"
+  secured: true
+  scopes: []
+- target: "/employee/{employeeId}"
+  verb: "PUT"
+  secured: true
+  scopes: []
+- target: "/employee/{employeeId}"
+  verb: "DELETE"
+  secured: true
+  scopes: []
diff --git a/test/cucumber-tests/src/test/resources/tests/api/APIBackendBasedAIRatelimit.feature b/test/cucumber-tests/src/test/resources/tests/api/APIBackendBasedAIRatelimit.feature
index 2ad7bebe3..13fb3c024 100644
--- a/test/cucumber-tests/src/test/resources/tests/api/APIBackendBasedAIRatelimit.feature
+++ b/test/cucumber-tests/src/test/resources/tests/api/APIBackendBasedAIRatelimit.feature
@@ -1,24 +1,25 @@
 Feature: API backend based AI ratelimit Feature
+
   Scenario: backend based AI ratelimit token detail comes in the body.
     Given The system is ready
     And I have a valid subscription
     Then I set headers
-      |Authorization|bearer ${accessToken}|
+      | Authorization | bearer ${accessToken} |
     And I wait for next minute strictly
     And I send "GET" request to "https://default.gw.wso2.com:9095/llm-api/v1.0.0/3.14/employee?send=body" with body ""
     Then the response status code should be 200
     And the response headers should contain
-      | x-ratelimit-remaining      | 4999 |
+      | x-ratelimit-remaining | 4999 |
     And I wait for 3 seconds
     And I send "GET" request to "https://default.gw.wso2.com:9095/llm-api/v1.0.0/3.14/employee?send=body" with body ""
     Then the response status code should be 200
     And the response headers should contain
-      | x-ratelimit-remaining      | 4699 |
+      | x-ratelimit-remaining | 4699 |
     And I wait for 3 seconds
     And I send "GET" request to "https://default.gw.wso2.com:9095/llm-api/v1.0.0/3.14/employee?send=body&prompt_tokens=40000" with body ""
     Then the response status code should be 200
     And the response headers should contain
-      | x-ratelimit-remaining      | 4399 |
+      | x-ratelimit-remaining | 4399 |
     And I wait for 3 seconds
     And I send "GET" request to "https://default.gw.wso2.com:9095/llm-api/v1.0.0/3.14/employee?send=body&prompt_tokens=40000" with body ""
     Then the response status code should be 429
@@ -34,26 +35,27 @@ Feature: API backend based AI ratelimit Feature
     And I wait for 3 seconds
     And I send "GET" request to "https://default.gw.wso2.com:9095/llm-api/v1.0.0/3.14/employee?send=body" with body ""
     Then the response status code should be 429
+
   Scenario: backend based AI ratelimit token detail comes in the header.
     Given The system is ready
     And I have a valid subscription
     Then I set headers
-      |Authorization|bearer ${accessToken}|
+      | Authorization | bearer ${accessToken} |
     And I wait for next minute strictly
     And I send "GET" request to "https://default.gw.wso2.com:9095/llm-api-header/v1.0.0/3.14/employee?send=header" with body ""
     Then the response status code should be 200
     And the response headers should contain
-      | x-ratelimit-remaining      | 4999 |
+      | x-ratelimit-remaining | 4999 |
     And I wait for 3 seconds
     And I send "GET" request to "https://default.gw.wso2.com:9095/llm-api-header/v1.0.0/3.14/employee?send=header" with body ""
     Then the response status code should be 200
     And the response headers should contain
-      | x-ratelimit-remaining      | 4699 |
+      | x-ratelimit-remaining | 4699 |
     And I wait for 3 seconds
     And I send "GET" request to "https://default.gw.wso2.com:9095/llm-api-header/v1.0.0/3.14/employee?send=header&prompt_tokens=40000" with body ""
     Then the response status code should be 200
     And the response headers should contain
-      | x-ratelimit-remaining      | 4399 |
+      | x-ratelimit-remaining | 4399 |
     And I wait for 3 seconds
     And I send "GET" request to "https://default.gw.wso2.com:9095/llm-api-header/v1.0.0/3.14/employee?send=header&prompt_tokens=40000" with body ""
     Then the response status code should be 429
@@ -69,18 +71,69 @@ Feature: API backend based AI ratelimit Feature
     And I wait for 3 seconds
     And I send "GET" request to "https://default.gw.wso2.com:9095/llm-api-header/v1.0.0/3.14/employee?send=header" with body ""
     Then the response status code should be 429
+
   Scenario: backend based AI ratelimit token detail comes in the header but a body configured api checked.
     Given The system is ready
     And I have a valid subscription
     Then I set headers
-      |Authorization|bearer ${accessToken}|
+      | Authorization | bearer ${accessToken} |
     And I wait for next minute strictly
     And I send "GET" request to "https://default.gw.wso2.com:9095/llm-api/v1.0.0/3.14/employee?send=header" with body ""
     Then the response status code should be 200
     And the response headers should contain
-      | x-ratelimit-remaining      | 4999 |
+      | x-ratelimit-remaining | 4999 |
     And I wait for 3 seconds
     And I send "GET" request to "https://default.gw.wso2.com:9095/llm-api/v1.0.0/3.14/employee?send=header" with body ""
     Then the response status code should be 200
     And the response headers should contain
-      | x-ratelimit-remaining      | 4998 |
\ No newline at end of file
+      | x-ratelimit-remaining | 4998 |
+
+  Scenario: apk conf backend based AI ratelimit token detail comes in the body.
+    Given The system is ready
+    And I have a valid subscription
+    When I use the APK Conf file "artifacts/apk-confs/backend_based_airl_conf.yaml"
+    And the definition file "artifacts/definitions/employees_api.json"
+    And make the API deployment request
+    Then the response status code should be 200
+    Then I set headers
+      | Authorization | bearer ${accessToken} |
+    And I wait for next minute strictly
+    And I send "GET" request to "https://default.gw.wso2.com:9095/backend-based-airl/1.0.0/employee?send=body" with body ""
+    Then the response status code should be 200
+    And the response headers should contain
+      | x-ratelimit-remaining | 4999 |
+    And I wait for 3 seconds
+    And I send "GET" request to "https://default.gw.wso2.com:9095/backend-based-airl/1.0.0/employee?send=body" with body ""
+    Then the response status code should be 200
+    And the response headers should contain
+      | x-ratelimit-remaining | 4699 |
+    And I wait for 3 seconds
+    And I send "GET" request to "https://default.gw.wso2.com:9095/backend-based-airl/1.0.0/employee?send=body&prompt_tokens=40000" with body ""
+    Then the response status code should be 200
+    And the response headers should contain
+      | x-ratelimit-remaining | 4399 |
+    And I wait for 3 seconds
+    And I send "GET" request to "https://default.gw.wso2.com:9095/backend-based-airl/1.0.0/employee?send=body&prompt_tokens=40000" with body ""
+    Then the response status code should be 429
+    And I wait for next minute strictly
+    And I send "GET" request to "https://default.gw.wso2.com:9095/backend-based-airl/1.0.0/employee?send=body&completion_tokens=40000" with body ""
+    Then the response status code should be 200
+    And I wait for 3 seconds
+    And I send "GET" request to "https://default.gw.wso2.com:9095/backend-based-airl/1.0.0/employee?send=body" with body ""
+    Then the response status code should be 429
+    And I wait for next minute strictly
+    And I send "GET" request to "https://default.gw.wso2.com:9095/backend-based-airl/1.0.0/employee?send=body&total_tokens=40000" with body ""
+    Then the response status code should be 200
+    And I wait for 3 seconds
+    And I send "GET" request to "https://default.gw.wso2.com:9095/backend-based-airl/1.0.0/employee?send=body" with body ""
+    Then the response status code should be 429
+
+  Scenario Outline: Undeploy API
+    Given The system is ready
+    And I have a valid subscription
+    When I undeploy the API whose ID is "<apiID>"
+    Then the response status code should be <expectedStatusCode>
+
+    Examples:
+      | apiID              | expectedStatusCode |
+      | backend-based-airl |                202 |