Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add apk conf for airl #2478

Merged
merged 2 commits into from
Sep 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,9 @@ func (r *AIRateLimitPolicyReconciler) Reconcile(ctx context.Context, req ctrl.Re
xds.UpdateRateLimiterPolicies(conf.CommonController.Server.Label)
} else {
loggers.LoggerAPKOperator.Infof("ratelimits found")
if ratelimitPolicy.Spec.Override == nil {
ratelimitPolicy.Spec.Override = ratelimitPolicy.Spec.Default
}
if ratelimitPolicy.Spec.TargetRef.Name != "" {
r.ods.AddorUpdateAIRatelimitToStore(ratelimitKey, ratelimitPolicy.Spec)
xds.UpdateRateLimitXDSCacheForAIRatelimitPolicies(r.ods.GetAIRatelimitPolicySpecs())
Expand Down
37 changes: 37 additions & 0 deletions runtime/config-deployer-service/ballerina/APIClient.bal
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,11 @@ public class APIClient {
serviceEntry: false,
url: self.constructURlFromService(sandboxEndpointConfig.endpoint)
};
AIRatelimit? aiRatelimit = sandboxEndpointConfig.aiRatelimit;
if aiRatelimit is AIRatelimit && aiRatelimit.enabled {
model:AIRateLimitPolicy airl = self.generateAIRateLimitPolicyCR(apkConf, aiRatelimit.token, aiRatelimit.request, backendService.metadata.name, organization);
apiArtifact.aiRatelimitPolicies[airl.metadata.name] = airl;
}
}
}
if (endpointType == () || endpointType == PRODUCTION_TYPE) {
Expand All @@ -246,6 +251,11 @@ public class APIClient {
serviceEntry: false,
url: self.constructURlFromService(productionEndpointConfig.endpoint)
};
AIRatelimit? aiRatelimit = productionEndpointConfig.aiRatelimit;
if aiRatelimit is AIRatelimit && aiRatelimit.enabled {
model:AIRateLimitPolicy airl = self.generateAIRateLimitPolicyCR(apkConf, aiRatelimit.token, aiRatelimit.request, backendService.metadata.name, organization);
apiArtifact.aiRatelimitPolicies[airl.metadata.name] = airl;
}
}
}
return endpointIdMap;
Expand Down Expand Up @@ -1506,6 +1516,29 @@ public class APIClient {
return rateLimitPolicyCR;
}

public isolated function generateAIRateLimitPolicyCR(APKConf apkConf, TokenAIRL tokenAIRL, RequestAIRL requestAIRL, string targetRefName, commons:Organization organization) returns model:AIRateLimitPolicy {
string apiIdentifierHash = crypto:hashSha1((apkConf.name + apkConf.version).toBytes()).toBase16();
model:AIRateLimitPolicy aiRateLimitPolicyCR = {
metadata: {
name: self.retrieveAIRateLimitPolicyName(apiIdentifierHash, targetRefName),
labels: self.getLabels(apkConf, organization)
},
spec: {
default: {
organization: organization.name,
requestCount: {unit: requestAIRL.unit, requestsPerUnit: requestAIRL.requestLimit},
tokenCount: {unit: tokenAIRL.unit, requestTokenCount: tokenAIRL.promptLimit, responseTokenCount: tokenAIRL.completionLimit, totalTokenCount: tokenAIRL.totalLimit}
},
targetRef: {
group: "dp.wso2.com",
kind: "Backend",
name: targetRefName
}
}
};
return aiRateLimitPolicyCR;
}

isolated function retrieveRateLimitData(RateLimit rateLimit, commons:Organization organization) returns model:RateLimitData {
model:RateLimitData rateLimitData = {
api: {
Expand Down Expand Up @@ -1933,6 +1966,10 @@ public class APIClient {
}
}

public isolated function retrieveAIRateLimitPolicyName(string apiID, string targetRef) returns string {
return "airl-" + apiID + "-" + targetRef;
}

private isolated function validateAndRetrieveAPKConfiguration(json apkconfJson) returns APKConf|commons:APKError? {
do {
runtimeapi:APKConfValidationResponse validationResponse = check apkConfValidator.validate(apkconfJson.toJsonString());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,10 @@ on fail var e {
string yamlString = check self.convertJsonToYaml(rateLimitPolicy.toJsonString());
_ = check self.storeFile(yamlString, rateLimitPolicy.metadata.name, zipDir);
}
foreach model:AIRateLimitPolicy airateLimitPolicy in apiArtifact.aiRatelimitPolicies {
string yamlString = check self.convertJsonToYaml(airateLimitPolicy.toJsonString());
_ = check self.storeFile(yamlString, airateLimitPolicy.metadata.name, zipDir);
}
foreach model:APIPolicy apiPolicy in apiArtifact.apiPolicies {
string yamlString = check self.convertJsonToYaml(apiPolicy.toJsonString());
_ = check self.storeFile(yamlString, apiPolicy.metadata.name, zipDir);
Expand Down
49 changes: 49 additions & 0 deletions runtime/config-deployer-service/ballerina/DeployerClient.bal
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ public class DeployerClient {
_ = check self.deleteScopeCrsForAPI(existingAPI, <string>apiArtifact?.organization);
check self.deleteBackends(existingAPI, <string>apiArtifact?.organization);
check self.deleteRateLimitPolicyCRs(existingAPI, <string>apiArtifact?.organization);
check self.deleteAIRateLimitPolicyCRs(existingAPI, <string>apiArtifact?.organization);
check self.deleteAPIPolicyCRs(existingAPI, <string>apiArtifact?.organization);
check self.deleteInterceptorServiceCRs(existingAPI, <string>apiArtifact?.organization);
check self.deleteBackendJWTConfig(existingAPI, <string>apiArtifact?.organization);
Expand All @@ -121,6 +122,7 @@ public class DeployerClient {
check self.deployBackendServices(apiArtifact, ownerReference);
check self.deployAuthenticationCRs(apiArtifact, ownerReference);
check self.deployRateLimitPolicyCRs(apiArtifact, ownerReference);
check self.deployAIRateLimitPolicyCRs(apiArtifact, ownerReference);
check self.deployInterceptorServiceCRs(apiArtifact, ownerReference);
check self.deployBackendJWTConfigs(apiArtifact, ownerReference);
check self.deployAPIPolicyCRs(apiArtifact, ownerReference);
Expand Down Expand Up @@ -660,6 +662,30 @@ public class DeployerClient {
}
}

private isolated function deployAIRateLimitPolicyCRs(model:APIArtifact apiArtifact, model:OwnerReference ownerReference) returns error? {
foreach model:AIRateLimitPolicy rateLimitPolicy in apiArtifact.aiRatelimitPolicies {
rateLimitPolicy.metadata.ownerReferences = [ownerReference];
http:Response deployRateLimitPolicyResult = check deployAIRateLimitPolicyCR(rateLimitPolicy, <string>apiArtifact?.namespace);
if deployRateLimitPolicyResult.statusCode == http:STATUS_CREATED {
log:printDebug("Deployed AIRateLimitPolicy Successfully" + rateLimitPolicy.toString());
} else if deployRateLimitPolicyResult.statusCode == http:STATUS_CONFLICT {
log:printDebug("AIRateLimitPolicy already exists" + rateLimitPolicy.toString());
model:AIRateLimitPolicy rateLimitPolicyFromK8s = check getAIRateLimitPolicyCR(rateLimitPolicy.metadata.name, <string>apiArtifact?.namespace);
rateLimitPolicy.metadata.resourceVersion = rateLimitPolicyFromK8s.metadata.resourceVersion;
http:Response rateLimitPolicyCR = check updateAIRateLimitPolicyCR(rateLimitPolicy, <string>apiArtifact?.namespace);
if rateLimitPolicyCR.statusCode != http:STATUS_OK {
json responsePayLoad = check rateLimitPolicyCR.getJsonPayload();
model:Status statusResponse = check responsePayLoad.cloneWithType(model:Status);
check self.handleK8sTimeout(statusResponse);
}
} else {
json responsePayLoad = check deployRateLimitPolicyResult.getJsonPayload();
model:Status statusResponse = check responsePayLoad.cloneWithType(model:Status);
check self.handleK8sTimeout(statusResponse);
}
}
}

private isolated function deleteRateLimitPolicyCRs(model:API api, string organization) returns commons:APKError? {
do {
model:RateLimitPolicyList|http:ClientError rateLimitPolicyCrListResponse = check getRateLimitPolicyCRsForAPI(api.spec.apiName, api.spec.apiVersion, <string>api.metadata?.namespace, organization);
Expand All @@ -684,6 +710,29 @@ public class DeployerClient {
}
}

private isolated function deleteAIRateLimitPolicyCRs(model:API api, string organization) returns commons:APKError? {
do {
model:AIRateLimitPolicyList|http:ClientError aiRateLimitPolicyCrListResponse = check getAIRateLimitPolicyCRsForAPI(api.spec.apiName, api.spec.apiVersion, <string>api.metadata?.namespace, organization);
if aiRateLimitPolicyCrListResponse is model:AIRateLimitPolicyList {
foreach model:AIRateLimitPolicy item in aiRateLimitPolicyCrListResponse.items {
http:Response|http:ClientError rateLimitPolicyCRDeletionResponse = deleteAIRateLimitPolicyCR(item.metadata.name, <string>item.metadata?.namespace);
if rateLimitPolicyCRDeletionResponse is http:Response {
if rateLimitPolicyCRDeletionResponse.statusCode != http:STATUS_OK {
json responsePayLoad = check rateLimitPolicyCRDeletionResponse.getJsonPayload();
model:Status statusResponse = check responsePayLoad.cloneWithType(model:Status);
check self.handleK8sTimeout(statusResponse);
}
} else {
log:printError("Error occured while deleting AI rate limit policy");
}
}
return;
}
} on fail var e {
log:printError("Error occured deleting AI rate limit policy", e);
return e909022("Error occured deleting AI rate limit policy", e);
}
}
private isolated function deleteAPIPolicyCRs(model:API api, string organization) returns commons:APKError? {
do {
model:APIPolicyList|http:ClientError apiPolicyCrListResponse = check getAPIPolicyCRsForAPI(api.spec.apiName, api.spec.apiVersion, <string>api.metadata?.namespace, organization);
Expand Down
25 changes: 25 additions & 0 deletions runtime/config-deployer-service/ballerina/K8sClient.bal
Original file line number Diff line number Diff line change
Expand Up @@ -251,26 +251,51 @@ isolated function deployRateLimitPolicyCR(model:RateLimitPolicy rateLimitPolicy,
return k8sApiServerEp->post(endpoint, rateLimitPolicy, targetType = http:Response);
}

isolated function deployAIRateLimitPolicyCR(model:AIRateLimitPolicy rateLimitPolicy, string namespace) returns http:Response|http:ClientError {
string endpoint = "/apis/dp.wso2.com/v1alpha3/namespaces/" + namespace + "/airatelimitpolicies";
return k8sApiServerEp->post(endpoint, rateLimitPolicy, targetType = http:Response);
}

isolated function updateRateLimitPolicyCR(model:RateLimitPolicy rateLimitPolicy, string namespace) returns http:Response|http:ClientError {
string endpoint = "/apis/dp.wso2.com/v1alpha1/namespaces/" + namespace + "/ratelimitpolicies/" + rateLimitPolicy.metadata.name;
return k8sApiServerEp->put(endpoint, rateLimitPolicy, targetType = http:Response);
}

isolated function updateAIRateLimitPolicyCR(model:AIRateLimitPolicy rateLimitPolicy, string namespace) returns http:Response|http:ClientError {
string endpoint = "/apis/dp.wso2.com/v1alpha3/namespaces/" + namespace + "/airatelimitpolicies/" + rateLimitPolicy.metadata.name;
return k8sApiServerEp->put(endpoint, rateLimitPolicy, targetType = http:Response);
}

isolated function getRateLimitPolicyCR(string name, string namespace) returns model:RateLimitPolicy|http:ClientError {
string endpoint = "/apis/dp.wso2.com/v1alpha1/namespaces/" + namespace + "/ratelimitpolicies/" + name;
return k8sApiServerEp->get(endpoint, targetType = model:RateLimitPolicy);
}

isolated function getAIRateLimitPolicyCR(string name, string namespace) returns model:AIRateLimitPolicy|http:ClientError {
string endpoint = "/apis/dp.wso2.com/v1alpha3/namespaces/" + namespace + "/airatelimitpolicies/" + name;
return k8sApiServerEp->get(endpoint, targetType = model:AIRateLimitPolicy);
}

isolated function deleteRateLimitPolicyCR(string name, string namespace) returns http:Response|http:ClientError {
string endpoint = "/apis/dp.wso2.com/v1alpha1/namespaces/" + namespace + "/ratelimitpolicies/" + name;
return k8sApiServerEp->delete(endpoint, targetType = http:Response);
}

isolated function deleteAIRateLimitPolicyCR(string name, string namespace) returns http:Response|http:ClientError {
string endpoint = "/apis/dp.wso2.com/v1alpha3/namespaces/" + namespace + "/airatelimitpolicies/" + name;
return k8sApiServerEp->delete(endpoint, targetType = http:Response);
}

isolated function getRateLimitPolicyCRsForAPI(string apiName, string apiVersion, string namespace, string organization) returns model:RateLimitPolicyList|http:ClientError|error {
string endpoint = "/apis/dp.wso2.com/v1alpha1/namespaces/" + namespace + "/ratelimitpolicies?labelSelector=" + check generateUrlEncodedLabelSelector(apiName, apiVersion, organization);
return k8sApiServerEp->get(endpoint, targetType = model:RateLimitPolicyList);
}

isolated function getAIRateLimitPolicyCRsForAPI(string apiName, string apiVersion, string namespace, string organization) returns model:AIRateLimitPolicyList|http:ClientError|error {
string endpoint = "/apis/dp.wso2.com/v1alpha3/namespaces/" + namespace + "/airatelimitpolicies?labelSelector=" + check generateUrlEncodedLabelSelector(apiName, apiVersion, organization);
return k8sApiServerEp->get(endpoint, targetType = model:AIRateLimitPolicyList);
}

isolated function deployAPIPolicyCR(model:APIPolicy apiPolicy, string namespace) returns http:Response|http:ClientError {
string endpoint = "/apis/dp.wso2.com/v1alpha3/namespaces/" + namespace + "/apipolicies";
return k8sApiServerEp->post(endpoint, apiPolicy, targetType = http:Response);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
//
// Copyright (c) 2024, WSO2 LLC. (http://www.wso2.com).
//
// WSO2 LLC. licenses this file to you under the Apache License,
// Version 2.0 (the "License"); you may not use this file except
// in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//
public type AIRateLimitPolicy record {
string apiVersion = "dp.wso2.com/v1alpha3";
string kind = "AIRateLimitPolicy";
Metadata metadata;
AIRateLimitPolicySpec spec;
};

public type AIRateLimitPolicySpec record {|
AIRateLimitPolicyData override?;
AIRateLimitPolicyData default?;
TargetRef targetRef;
|};

public type AIRateLimitPolicyData record {
string organization;
TokenAIRL tokenCount;
RequestAIRL requestCount;
};

public type TokenAIRL record {
string unit;
int requestTokenCount;
int responseTokenCount;
int totalTokenCount;
};

public type RequestAIRL record {
string unit;
int requestsPerUnit;
};
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ public type APIArtifact record {|
map<Authentication> authenticationMap = {};
map<Scope> scopes = {};
map<RateLimitPolicy> rateLimitPolicies = {};
map<AIRateLimitPolicy> aiRatelimitPolicies = {};
map<APIPolicy> apiPolicies = {};
map<InterceptorService> interceptorServices = {};
boolean sandboxEndpointAvailable = false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,10 @@ public type RateLimitPolicyList record {
ListMeta metadata;
RateLimitPolicy[] items;
};

public type AIRateLimitPolicyList record {
string apiVersion = "dp.wso2.com/v1alpha3";
string kind = "AIRateLimitPolicyList";
ListMeta metadata;
AIRateLimitPolicy[] items;
};
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,8 @@ components:
$ref: "#/components/schemas/Certificate"
resiliency:
$ref: "#/components/schemas/Resiliency"
aiRatelimit:
$ref: "#/components/schemas/AIRatelimit"
additionalProperties: false
Certificate:
type: object
Expand Down Expand Up @@ -599,3 +601,57 @@ components:
type: string
default: string
additionalProperties: false
AIRatelimit:
type: object
required:
- enabled
- token
- request
properties:
enabled:
type: boolean
default: true
token:
$ref: "#/components/schemas/TokenAIRL"
request:
$ref: "#/components/schemas/RequestAIRL"
TokenAIRL:
type: object
required:
- promptLimit
- completionLimit
- totalLimit
- unit
properties:
promptLimit:
type: integer
default: 0
completionLimit:
type: integer
default: 0
totalLimit:
type: integer
default": 0
unit:
type: string
default: Minute
enum:
- Minute
- Hour
- Day
RequestAIRL:
type: object
required:
- requestLimit
- unit
properties:
requestLimit:
type: integer
default: 0
unit:
type: string
default: Minute
enum:
- Minute
- Hour
- Day
Loading
Loading