From 8191035571d58713f0655b2baee6105a53cbcad8 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Wed, 14 Jun 2023 18:08:13 -0700 Subject: [PATCH] chore: [LLM] Added system tests for tuning The tests cover the tuning as well as listing and loading the tuned models PiperOrigin-RevId: 540433943 --- .../system/aiplatform/test_language_models.py | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/tests/system/aiplatform/test_language_models.py b/tests/system/aiplatform/test_language_models.py index f91ac2efad..b50403bc7f 100644 --- a/tests/system/aiplatform/test_language_models.py +++ b/tests/system/aiplatform/test_language_models.py @@ -81,3 +81,66 @@ def test_text_embedding(self): for embedding in embeddings: vector = embedding.values assert len(vector) == 768 + + def test_tuning(self, shared_state): + """Test tuning, listing and loading models.""" + aiplatform.init(project=e2e_base._PROJECT, location=e2e_base._LOCATION) + + model = TextGenerationModel.from_pretrained("google/text-bison@001") + + import pandas + + training_data = pandas.DataFrame( + data=[ + {"input_text": "Input 0", "output_text": "Output 0"}, + {"input_text": "Input 1", "output_text": "Output 1"}, + {"input_text": "Input 2", "output_text": "Output 2"}, + {"input_text": "Input 3", "output_text": "Output 3"}, + {"input_text": "Input 4", "output_text": "Output 4"}, + {"input_text": "Input 5", "output_text": "Output 5"}, + {"input_text": "Input 6", "output_text": "Output 6"}, + {"input_text": "Input 7", "output_text": "Output 7"}, + {"input_text": "Input 8", "output_text": "Output 8"}, + {"input_text": "Input 9", "output_text": "Output 9"}, + ] + ) + + model.tune_model( + training_data=training_data, + train_steps=1, + tuning_job_location="europe-west4", + tuned_model_location="us-central1", + ) + # According to the Pipelines design, external resources created by a pipeline + # must not be modified or deleted. Otherwise caching will break next pipeline runs. + shared_state.setdefault("resources", []) + shared_state["resources"].append(model._endpoint) + shared_state["resources"].extend( + aiplatform.Model(model_name=deployed_model.model) + for deployed_model in model._endpoint.list_models() + ) + # Deleting the Endpoint is a little less bad since the LLM SDK will recreate it, but it's not advised for the same reason. + + response = model.predict( + "What is the best recipe for banana bread? Recipe:", + max_output_tokens=128, + temperature=0, + top_p=1, + top_k=5, + ) + assert response.text + + tuned_model_names = model.list_tuned_model_names() + assert tuned_model_names + tuned_model_name = tuned_model_names[0] + + tuned_model = TextGenerationModel.get_tuned_model(tuned_model_name) + + tuned_model_response = tuned_model.predict( + "What is the best recipe for banana bread? Recipe:", + max_output_tokens=128, + temperature=0, + top_p=1, + top_k=5, + ) + assert tuned_model_response.text