diff --git a/ludwig/benchmarking/profiler.py b/ludwig/benchmarking/profiler.py index 10332173c58..f0712314a67 100644 --- a/ludwig/benchmarking/profiler.py +++ b/ludwig/benchmarking/profiler.py @@ -3,15 +3,17 @@ import logging import os import shutil -import sys import threading import time from queue import Empty as EmptyQueueException from queue import Queue +from subprocess import PIPE, Popen from typing import Any, Dict, List +from xml.etree.ElementTree import fromstring import psutil import torch +from cpuinfo import get_cpu_info from gpustat.core import GPUStatCollection from ludwig.benchmarking.profiler_dataclasses import profiler_dataclass_to_flat_dict, TorchProfilerMetrics @@ -20,19 +22,38 @@ from ludwig.globals import LUDWIG_VERSION from ludwig.utils.data_utils import save_json -# disabling print because the following imports are verbose -f = open(os.devnull, "w") -sys.stdout = f -from experiment_impact_tracker.cpu.common import get_my_cpu_info # noqa E402 -from experiment_impact_tracker.gpu.nvidia import get_gpu_info # noqa E402 - -f.close() -sys.stdout = sys.__stdout__ - STOP_MESSAGE = "stop" logger = logging.getLogger() +def get_gpu_info(): + """Gathers general hardware information about an nvidia GPU. + + This function was copied from `experiment_impact_tracker` to get around a Pandas 2.0 breaking change impacting the + package. https://github.com/Breakend/experiment-impact- + tracker/blob/master/experiment_impact_tracker/gpu/nvidia.py#L48-L73 + """ + p = Popen(["nvidia-smi", "-q", "-x"], stdout=PIPE) + outs, errors = p.communicate() + xml = fromstring(outs) + data = [] + driver_version = xml.findall("driver_version")[0].text + cuda_version = xml.findall("cuda_version")[0].text + + for gpu_id, gpu in enumerate(xml.getiterator("gpu")): + gpu_data = {} + name = [x for x in gpu.getiterator("product_name")][0].text + memory_usage = gpu.findall("fb_memory_usage")[0] + total_memory = memory_usage.findall("total")[0].text + + gpu_data["name"] = name + gpu_data["total_memory"] = total_memory + gpu_data["driver_version"] = driver_version + gpu_data["cuda_version"] = cuda_version + data.append(gpu_data) + return data + + def monitor(queue: Queue, info: Dict[str, Any], logging_interval: int, cuda_is_available: bool) -> None: """Monitors hardware resource use. @@ -132,7 +153,7 @@ def _populate_static_information(self) -> None: self.info["start_disk_usage"] = shutil.disk_usage(os.path.expanduser("~")).used # CPU information - cpu_info = get_my_cpu_info() + cpu_info = get_cpu_info() self.info["cpu_architecture"] = cpu_info["arch"] self.info["num_cpu"] = psutil.cpu_count() self.info["cpu_name"] = cpu_info.get("brand_raw", "unknown") diff --git a/requirements.txt b/requirements.txt index 80566e52adc..8ca89a34d2a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ Cython>=0.25 h5py>=2.6,!=3.0.0 numpy>=1.15 -pandas>=0.25,!=1.1.5,<2.0 +pandas>=1.0,!=1.1.5 scipy>=0.18 tabulate>=0.7 scikit-learn @@ -29,7 +29,7 @@ torchinfo filelock psutil==5.9.4 protobuf -experiment_impact_tracker +py-cpuinfo==9.0.0 gpustat rich~=12.4.4 packaging @@ -39,10 +39,12 @@ sacremoses sentencepiece # new data format support -xlwt # excel -xlrd # excel -openpyxl # excel -pyarrow # parquet -lxml # html +xlwt # excel +xlrd>=2.0.1 # excel +xlsxwriter>=1.4.3 # excel +openpyxl>=3.0.7 # excel +pyxlsb>=1.0.8 # excel +pyarrow # parquet +lxml # html whylogs>=1.1.9 diff --git a/tests/integration_tests/test_experiment.py b/tests/integration_tests/test_experiment.py index c9cb71e12b3..78812ca4031 100644 --- a/tests/integration_tests/test_experiment.py +++ b/tests/integration_tests/test_experiment.py @@ -428,7 +428,6 @@ def test_experiment_image_dataset(train_format, train_in_memory, test_format, te "df", "dict", "excel", - "excel_xls", "feather", "fwf", "hdf5", diff --git a/tests/integration_tests/utils.py b/tests/integration_tests/utils.py index f12910603c1..eae330cde3c 100644 --- a/tests/integration_tests/utils.py +++ b/tests/integration_tests/utils.py @@ -814,7 +814,7 @@ def to_fwf(df, fname): processed_df_rows = [] for _, row in df.iterrows(): processed_df_row = {} - for feature_name, raw_feature in row.iteritems(): + for feature_name, raw_feature in row.items(): if "image" in feature_name and not (type(raw_feature) == float and np.isnan(raw_feature)): feature = np.array(Image.open(raw_feature)) else: diff --git a/tests/ludwig/data/test_split.py b/tests/ludwig/data/test_split.py index e4cd77281bd..a5d1cccf860 100644 --- a/tests/ludwig/data/test_split.py +++ b/tests/ludwig/data/test_split.py @@ -370,6 +370,7 @@ def test_hash_split(df_engine, ray_cluster_2cpu): df2["id"] = np.arange(100, 200) nrows *= 2 + df = df_engine.df_lib.concat([df, df2]) splits2 = splitter.split(df, backend)