Skip to content

Commit

Permalink
add suffix to artifact_location
Browse files Browse the repository at this point in the history
  • Loading branch information
AnandInguva committed Dec 8, 2023
1 parent 816174a commit cfb1883
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 37 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import shutil
import tempfile
import unittest
import uuid

import numpy as np
from parameterized import parameterized
Expand Down Expand Up @@ -55,24 +57,12 @@
([{
test_query_column: test_query,
}], DEFAULT_MODEL_NAME, [0.13]),
(
[{
test_query_column: 'query: how much protein should a female eat',
},
{
test_query_column: (
"passage: As a general guideline, the CDC's "
"average requirement of protein for women "
"ages 19 to 70 is 46 grams per day. But, "
"as you can see from this chart, you'll need "
"to increase that if you're expecting or training"
" for a marathon. Check out the chart below "
"to see how much protein "
"you should be eating each day.")
}],
'intfloat/e5-base-v2',
# this model requires inputs to be specified as query: and passage:
[0.1, 0.1]),
([{
test_query_column: 'This is an example sentence',
}, {
test_query_column: ("Each sentence is converted")
}],
'sentence-transformers/all-MiniLM-L6-v2', [0.15, 0.14]),
]


Expand All @@ -88,6 +78,7 @@ def tearDown(self) -> None:

def test_sentence_transformer_embeddings(self):
model_name = DEFAULT_MODEL_NAME
artifact_location = os.path.join(self.artifact_location, uuid.uuid4().hex)
embedding_config = SentenceTransformerEmbeddings(
model_name=model_name, columns=[test_query_column])
with beam.Pipeline() as pipeline:
Expand All @@ -96,9 +87,9 @@ def test_sentence_transformer_embeddings(self):
| "CreateData" >> beam.Create([{
test_query_column: test_query
}])
| "MLTransform" >> MLTransform(
write_artifact_location=self.artifact_location).with_transform(
embedding_config))
| "MLTransform" >>
MLTransform(write_artifact_location=artifact_location).with_transform(
embedding_config))

def assert_element(element):
assert len(element[test_query_column]) == 768
Expand All @@ -107,6 +98,7 @@ def assert_element(element):

@unittest.skipIf(tft is None, 'Tensorflow Transform is not installed.')
def test_embeddings_with_scale_to_0_1(self):
artifact_location = os.path.join(self.artifact_location, uuid.uuid4().hex)
model_name = DEFAULT_MODEL_NAME
embedding_config = SentenceTransformerEmbeddings(
model_name=model_name,
Expand All @@ -118,10 +110,10 @@ def test_embeddings_with_scale_to_0_1(self):
| "CreateData" >> beam.Create([{
test_query_column: test_query
}])
| "MLTransform" >> MLTransform(
write_artifact_location=self.artifact_location).with_transform(
embedding_config).with_transform(
ScaleTo01(columns=[test_query_column])))
| "MLTransform" >>
MLTransform(write_artifact_location=artifact_location).with_transform(
embedding_config).with_transform(
ScaleTo01(columns=[test_query_column])))

def assert_element(element):
assert max(element.feature_1) == 1
Expand All @@ -134,13 +126,14 @@ def test_embeddings_with_read_artifact_location(
embedding_config = SentenceTransformerEmbeddings(
model_name=model_name, columns=[test_query_column])

artifact_location = os.path.join(self.artifact_location, uuid.uuid4().hex)
with beam.Pipeline() as p:
result_pcoll = (
p
| "CreateData" >> beam.Create(inputs)
| "MLTransform" >> MLTransform(
write_artifact_location=self.artifact_location).with_transform(
embedding_config))
| "MLTransform" >>
MLTransform(write_artifact_location=artifact_location).with_transform(
embedding_config))
max_ele_pcoll = (
result_pcoll
| beam.Map(lambda x: round(max(x[test_query_column]), 2)))
Expand All @@ -152,7 +145,7 @@ def test_embeddings_with_read_artifact_location(
p
| "CreateData" >> beam.Create(inputs)
| "MLTransform" >>
MLTransform(read_artifact_location=self.artifact_location))
MLTransform(read_artifact_location=artifact_location))
max_ele_pcoll = (
result_pcoll
| beam.Map(lambda x: round(max(x[test_query_column]), 2)))
Expand All @@ -161,6 +154,7 @@ def test_embeddings_with_read_artifact_location(

def test_sentence_transformer_with_int_data_types(self):
model_name = DEFAULT_MODEL_NAME
artifact_location = os.path.join(self.artifact_location, uuid.uuid4().hex)
embedding_config = SentenceTransformerEmbeddings(
model_name=model_name, columns=[test_query_column])
with self.assertRaises(TypeError):
Expand All @@ -171,12 +165,13 @@ def test_sentence_transformer_with_int_data_types(self):
test_query_column: 1
}])
| "MLTransform" >> MLTransform(
write_artifact_location=self.artifact_location).with_transform(
write_artifact_location=artifact_location).with_transform(
embedding_config))

@parameterized.expand(_parameterized_inputs)
def test_with_gcs_artifact_location(self, inputs, model_name, output):
artifact_location = ('gs://apache-beam-ml/testing/sentence_transformers')
artifact_location = os.path.join(
'gs://apache-beam-ml/testing/sentence_transformers', uuid.uuid4().hex)
embedding_config = SentenceTransformerEmbeddings(
model_name=model_name, columns=[test_query_column])

Expand Down Expand Up @@ -207,7 +202,7 @@ def test_with_gcs_artifact_location(self, inputs, model_name, output):

def test_embeddings_with_inference_args(self):
model_name = DEFAULT_MODEL_NAME

artifact_location = os.path.join(self.artifact_location, uuid.uuid4().hex)
inference_args = {'convert_to_numpy': False}
embedding_config = SentenceTransformerEmbeddings(
model_name=model_name,
Expand All @@ -219,9 +214,9 @@ def test_embeddings_with_inference_args(self):
| "CreateData" >> beam.Create([{
test_query_column: test_query
}])
| "MLTransform" >> MLTransform(
write_artifact_location=self.artifact_location).with_transform(
embedding_config))
| "MLTransform" >>
MLTransform(write_artifact_location=artifact_location).with_transform(
embedding_config))

def assert_element(element):
assert type(element) == torch.Tensor
Expand All @@ -233,14 +228,15 @@ def assert_element(element):

def test_mltransform_to_ptransform_with_vertex(self):
model_name = ''
artifact_location = os.path.join(self.artifact_location, uuid.uuid4().hex)
transforms = [
SentenceTransformerEmbeddings(columns=['x'], model_name=model_name),
SentenceTransformerEmbeddings(
columns=['y', 'z'], model_name=model_name)
]
ptransform_mapper = base._MLTransformToPTransformMapper(
transforms=transforms,
artifact_location=self.artifact_location,
artifact_location=artifact_location,
artifact_mode=None)

ptransform_list = ptransform_mapper.create_and_save_ptransform_list()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def __init__(
model_name: The name of the Vertex AI Text Embedding model.
columns: The columns containing the text to be embedded.
task_type: The downstream task for the embeddings.
Valid values: RETRIEVAL_QUERY, RETRIEVAL_DOCUMENT,
Valid values are RETRIEVAL_QUERY, RETRIEVAL_DOCUMENT,
SEMANTIC_SIMILARITY, CLASSIFICATION, CLUSTERING.
title: Identifier of the text content.
project: The default GCP project for API calls.
Expand Down

0 comments on commit cfb1883

Please sign in to comment.