Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,10 @@ sdk/storage/azure-storage-queue/tests/settings/settings_real.py
sdk/storage/azure-storage-file-share/tests/settings/settings_real.py
sdk/storage/azure-storage-file-datalake/tests/settings/settings_real.py

# azure-ai-ml test configs
!sdk/ml/azure-ai-ml/tests/test_configs/environment/*
!sdk/ml/azure-ai-ml/tests/test_configs/*/*/environment*


# The locations below are deprecated - keep to prevent any accidental secrets leakage ==========
sdk/storage/azure-storage-blob/tests/_shared/settings_real.py
Expand Down
5 changes: 5 additions & 0 deletions .vscode/cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@
"sdk/storage/azure-storage-blob/**",
"sdk/ml/azure-ai-ml/azure/ai/ml/_restclient/**",
"sdk/ml/azure-ai-ml/azure/ai/ml/entities/_job/job_name_generator.py",
"sdk/ml/azure-ai-ml/tests/**",
"eng/**/*.json",
"eng/*.txt",
"eng/tox/tox.ini",
Expand All @@ -111,6 +112,10 @@
".gitignore"
],
"words": [
"pyyaml",
"CONLL",
"pyjwt",
"conll",
"aad",
"aadclient",
"AADSTS",
Expand Down
1 change: 1 addition & 0 deletions eng/.docsettings.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ omitted_paths:
- sdk/**/tests/perfstress_tests/*
- sdk/nspkg/*
- sdk/**/swagger/*
- sdk/ml/azure-ai-ml/tests/*

language: python
root_check_enabled: True
Expand Down
1 change: 1 addition & 0 deletions sdk/ml/azure-ai-ml/dev_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ pytest
pydash
pytest-vcr
azure-mgmt-storage~=19.1.0
pywin32==227 ; sys_platform == 'win32'
2 changes: 2 additions & 0 deletions sdk/ml/azure-ai-ml/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@
"docker>=2.0.0",
"typing-extensions>=4.0.1",
"applicationinsights<=0.11.10",
# temp fix for mltable validation
"knack"
],
extras_require={
# user can run `pip install azure-ai-ml[designer]` to install mldesigner alone with this package
Expand Down
Empty file.
55 changes: 55 additions & 0 deletions sdk/ml/azure-ai-ml/tests/automl_job/_utiil.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import os
import time
import urllib.request as urllib
from zipfile import ZipFile

from azure.ai.ml.entities._job.automl.automl_job import AutoMLJob
from azure.ai.ml._operations.job_ops_helper import _wait_before_polling
from azure.ai.ml._operations.run_history_constants import RunHistoryConstants
from azure.ai.ml import MLClient


def assert_final_job_status(job, client: MLClient, job_type: AutoMLJob, expected_terminal_status: str):
assert isinstance(job, job_type)
assert job.status == "NotStarted"

poll_start_time = time.time()
while job.status not in RunHistoryConstants.TERMINAL_STATUSES:
time.sleep(_wait_before_polling(time.time() - poll_start_time))
job = client.jobs.get(job.name)

assert job.status == expected_terminal_status, f"Job status mismatch. Job created: {job}"


def assert_created_job(job, client: MLClient, job_type: AutoMLJob):
assert isinstance(job, job_type)
assert job.status == "NotStarted"
# After checking the job is created successfully, we don't need job running anymore;
# try canceling the job
try:
client.jobs.cancel(job.name)
except Exception:
print(f"Canceling {job.name} failed")


def get_properties():
properties = {
"_automl_internal_enable_mltable_quick_profile": True,
"_automl_internal_label": "latest",
"_automl_internal_save_mlflow": True,
}
return properties


def download_dataset(download_url: str, data_file: str):

# download data
urllib.urlretrieve(download_url, filename=data_file)

# extract files
with ZipFile(data_file, "r") as zip:
print("extracting files...")
zip.extractall()
print("done")
# delete zip file
os.remove(data_file)
180 changes: 180 additions & 0 deletions sdk/ml/azure-ai-ml/tests/automl_job/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
import pytest

from azure.ai.ml.constants import AssetTypes
from azure.ai.ml.entities._inputs_outputs import Input
from azure.ai.ml.entities._job.automl.tabular.forecasting_settings import ForecastingSettings
from ._utiil import download_dataset

from typing import Tuple
import os

TEST_CONFIG_PATH = "tests/test_configs/automl_job"
TEST_DATASETS_PATH = TEST_CONFIG_PATH + "/" + "test_datasets"

BANK_MARKETING_TRAIN_DATASET_PATH = TEST_DATASETS_PATH + "/" + "bank_marketing/train"
BANK_MARKETING_TEST_DATASET_PATH = TEST_DATASETS_PATH + "/" + "bank_marketing/valid"

BEER_FORECASTING_TRAIN_DATASET_PATH = TEST_DATASETS_PATH + "/" + "beer_forecasting/train"

CONLL_TRAIN_DATASET_PATH = TEST_DATASETS_PATH + "/" + "conll2003/train"
CONLL_VALID_DATASET_PATH = TEST_DATASETS_PATH + "/" + "conll2003/valid"

CREDIT_CARD_FRAUD_TRAIN_DATASET_PATH = TEST_DATASETS_PATH + "/" + "credit_card_fraud/train"
CREDIT_CARD_FRAUD_TEST_DATASET_PATH = TEST_DATASETS_PATH + "/" + "credit_card_fraud/test"
CREDIT_CARD_FRAUD_VALID_DATASET_PATH = TEST_DATASETS_PATH + "/" + "credit_card_fraud/valid"

MACHINE_DATA_TRAIN_DATASET_PATH = TEST_DATASETS_PATH + "/" + "machine_data/train"

NEWSGROUP_TRAIN_DATASET_PATH = TEST_DATASETS_PATH + "/" + "newsgroup/train"
NEWSGROUP_VALID_DATASET_PATH = TEST_DATASETS_PATH + "/" + "newsgroup/valid"

PAPER_CATEGORIZATION_TRAIN_DATASET_PATH = TEST_DATASETS_PATH + "/" + "paper_categorization/train"
PAPER_CATEGORIZATION_VALID_DATASET_PATH = TEST_DATASETS_PATH + "/" + "paper_categorization/valid"

IMAGE_CLASSIFICATION_TRAIN_DATASET_PATH = TEST_DATASETS_PATH + "/" + "image_classification/train"
IMAGE_CLASSIFICATION_VALID_DATASET_PATH = TEST_DATASETS_PATH + "/" + "image_classification/valid"

IMAGE_CLASSIFICATION_MULTILABEL_TRAIN_DATASET_PATH = TEST_DATASETS_PATH + "/" + "image_classification_multilabel/train"
IMAGE_CLASSIFICATION_MULTILABEL_VALID_DATASET_PATH = TEST_DATASETS_PATH + "/" + "image_classification_multilabel/valid"

IMAGE_OBJECT_DETECTION_TRAIN_DATASET_PATH = TEST_DATASETS_PATH + "/" + "image_object_detection/train"
IMAGE_OBJECT_DETECTION_VALID_DATASET_PATH = TEST_DATASETS_PATH + "/" + "image_object_detection/valid"

IMAGE_SEGMENTATION_TRAIN_DATASET_PATH = TEST_DATASETS_PATH + "/" + "image_instance_segmentation/train"
IMAGE_SEGMENTATION_VALID_DATASET_PATH = TEST_DATASETS_PATH + "/" + "image_instance_segmentation/valid"

ROOT_DIR = os.path.realpath(os.path.join(os.path.dirname(__file__), "../../.."))


@pytest.fixture
def bankmarketing_dataset() -> Tuple[Input, Input, str]:
# Classification Dataset
training_data = Input(type=AssetTypes.MLTABLE, path=os.path.join(ROOT_DIR, BANK_MARKETING_TRAIN_DATASET_PATH))
validation_data = Input(type=AssetTypes.MLTABLE, path=os.path.join(ROOT_DIR, BANK_MARKETING_TEST_DATASET_PATH))
label_column_name = "y"
return training_data, validation_data, label_column_name


@pytest.fixture
def credit_card_fraud_dataset() -> Tuple[Input, Input, str]:
# Classification Dataset
training_data = Input(type=AssetTypes.MLTABLE, path=CREDIT_CARD_FRAUD_TRAIN_DATASET_PATH)
validation_data = Input(type=AssetTypes.MLTABLE, path=CREDIT_CARD_FRAUD_VALID_DATASET_PATH)
label_column_name = "Class"
return training_data, validation_data, label_column_name


@pytest.fixture
def machinedata_dataset() -> Tuple[Input, str]:
# Regression Dataset
training_data = Input(type=AssetTypes.MLTABLE, path=os.path.join(ROOT_DIR, MACHINE_DATA_TRAIN_DATASET_PATH))
label_column_name = "ERP"
return training_data, label_column_name


@pytest.fixture
def beer_forecasting_dataset() -> Tuple[Input, ForecastingSettings, str]:
# Forecasting Dataset
training_data = Input(type=AssetTypes.MLTABLE, path=os.path.join(ROOT_DIR, BEER_FORECASTING_TRAIN_DATASET_PATH))
label_column_name = "BeerProduction"
time_column_name = "DATE"
forecast_horizon = 12
frequency = "MS"
forecasting_settings = ForecastingSettings(
time_column_name=time_column_name, forecast_horizon=forecast_horizon, frequency=frequency
)

return training_data, forecasting_settings, label_column_name


@pytest.fixture
def image_classification_dataset() -> Tuple[str, str]:
# Download data
download_url = "https://cvbp-secondary.z19.web.core.windows.net/datasets/image_classification/fridgeObjects.zip"
data_file = "./fridgeObjects.zip"
download_dataset(download_url=download_url, data_file=data_file)

# Classification dataset MLTable paths
train_path = os.path.join(ROOT_DIR, IMAGE_CLASSIFICATION_TRAIN_DATASET_PATH)
val_path = os.path.join(ROOT_DIR, IMAGE_CLASSIFICATION_VALID_DATASET_PATH)
return train_path, val_path


@pytest.fixture
def image_classification_multilabel_dataset() -> Tuple[str, str]:
# Download data
download_url = (
"https://cvbp-secondary.z19.web.core.windows.net/datasets/image_classification/multilabelFridgeObjects.zip"
)
data_file = "./multilabelFridgeObjects.zip"
download_dataset(download_url=download_url, data_file=data_file)

# Multilabel classification dataset MLTable paths
train_path = os.path.join(ROOT_DIR, IMAGE_CLASSIFICATION_MULTILABEL_TRAIN_DATASET_PATH)
val_path = os.path.join(ROOT_DIR, IMAGE_CLASSIFICATION_MULTILABEL_VALID_DATASET_PATH)
return train_path, val_path


@pytest.fixture
def image_object_detection_dataset() -> Tuple[str, str]:
# Download data
download_url = "https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjects.zip"
data_file = "./odFridgeObjects.zip"
download_dataset(download_url=download_url, data_file=data_file)

# Image Object Dataset
train_path = os.path.join(ROOT_DIR, IMAGE_OBJECT_DETECTION_TRAIN_DATASET_PATH)
val_path = os.path.join(ROOT_DIR, IMAGE_OBJECT_DETECTION_VALID_DATASET_PATH)
return train_path, val_path


@pytest.fixture
def image_segmentation_dataset() -> Tuple[str, str]:
# Download data
download_url = "https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjectsMask.zip"
data_file = "./odFridgeObjectsMask.zip"
download_dataset(download_url=download_url, data_file=data_file)

# Image Object Dataset
train_path = os.path.join(ROOT_DIR, IMAGE_SEGMENTATION_TRAIN_DATASET_PATH)
val_path = os.path.join(ROOT_DIR, IMAGE_SEGMENTATION_VALID_DATASET_PATH)
return train_path, val_path


# Text Classification Dataset
@pytest.fixture
def newsgroup() -> Tuple[Input, Input, str]:
training_data = Input(type=AssetTypes.MLTABLE, path=os.path.join(ROOT_DIR, NEWSGROUP_TRAIN_DATASET_PATH))
validation_data = Input(type=AssetTypes.MLTABLE, path=os.path.join(ROOT_DIR, NEWSGROUP_TRAIN_DATASET_PATH))
target_column_name = "y"

return training_data, validation_data, target_column_name


# Text Classification Multilabel Dataset
@pytest.fixture
def paper_categorization() -> Tuple[Input, Input, str]:
training_data = Input(type=AssetTypes.MLTABLE, path=os.path.join(ROOT_DIR, PAPER_CATEGORIZATION_TRAIN_DATASET_PATH))
validation_data = Input(
type=AssetTypes.MLTABLE, path=os.path.join(ROOT_DIR, PAPER_CATEGORIZATION_VALID_DATASET_PATH)
)
target_column_name = "terms"

return training_data, validation_data, target_column_name


# Text NER Dataset
@pytest.fixture
def conll() -> Tuple[Input, Input]:
training_data = Input(type=AssetTypes.MLTABLE, path=os.path.join(ROOT_DIR, CONLL_TRAIN_DATASET_PATH))
validation_data = Input(type=AssetTypes.MLTABLE, path=os.path.join(ROOT_DIR, CONLL_VALID_DATASET_PATH))

return training_data, validation_data


@pytest.fixture(scope="session")
def check_completed_run(pytestconfig):
# For pytests marked with pytest.mark.e2etest, only test up to creating the job
if "e2etest" in pytestconfig.getoption("-m"):
return False
return True
Empty file.
Loading