Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions openai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,26 @@
# Originally forked from the MIT-licensed Stripe Python bindings.

import os
import sys
from typing import TYPE_CHECKING, Optional

from contextvars import ContextVar
from typing import Optional, TYPE_CHECKING

if "pkg_resources" not in sys.modules:
# workaround for the following:
# https://github.com/benoitc/gunicorn/pull/2539
sys.modules["pkg_resources"] = object() # type: ignore[assignment]
import aiohttp

del sys.modules["pkg_resources"]

from openai.api_resources import (
Audio,
ChatCompletion,
Completion,
Customer,
Edit,
Deployment,
Edit,
Embedding,
Engine,
ErrorObject,
Expand Down
4 changes: 2 additions & 2 deletions openai/api_resources/embedding.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import base64
import time


from openai import util
from openai.api_resources.abstract.engine_api_resource import EngineAPIResource
from openai.datalib import numpy as np, assert_has_numpy
from openai.datalib.numpy_helper import assert_has_numpy
from openai.datalib.numpy_helper import numpy as np
from openai.error import TryAgain


Expand Down
24 changes: 0 additions & 24 deletions openai/datalib.py → openai/datalib/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,6 @@
See also `setup.py`.

"""
try:
import numpy
except ImportError:
numpy = None

try:
import pandas
except ImportError:
pandas = None

HAS_NUMPY = bool(numpy)
HAS_PANDAS = bool(pandas)

INSTRUCTIONS = """

OpenAI error:
Expand All @@ -39,18 +26,7 @@
"""

NUMPY_INSTRUCTIONS = INSTRUCTIONS.format(library="numpy")
PANDAS_INSTRUCTIONS = INSTRUCTIONS.format(library="pandas")


class MissingDependencyError(Exception):
pass


def assert_has_numpy():
if not HAS_NUMPY:
raise MissingDependencyError(NUMPY_INSTRUCTIONS)


def assert_has_pandas():
if not HAS_PANDAS:
raise MissingDependencyError(PANDAS_INSTRUCTIONS)
15 changes: 15 additions & 0 deletions openai/datalib/numpy_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from openai.datalib.common import INSTRUCTIONS, MissingDependencyError

try:
import numpy
except ImportError:
numpy = None

HAS_NUMPY = bool(numpy)

NUMPY_INSTRUCTIONS = INSTRUCTIONS.format(library="numpy")


def assert_has_numpy():
if not HAS_NUMPY:
raise MissingDependencyError(NUMPY_INSTRUCTIONS)
15 changes: 15 additions & 0 deletions openai/datalib/pandas_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from openai.datalib.common import INSTRUCTIONS, MissingDependencyError

try:
import pandas
except ImportError:
pandas = None

HAS_PANDAS = bool(pandas)

PANDAS_INSTRUCTIONS = INSTRUCTIONS.format(library="pandas")


def assert_has_pandas():
if not HAS_PANDAS:
raise MissingDependencyError(PANDAS_INSTRUCTIONS)
4 changes: 2 additions & 2 deletions openai/embeddings_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
from tenacity import retry, stop_after_attempt, wait_random_exponential

import openai
from openai.datalib import numpy as np
from openai.datalib import pandas as pd
from openai.datalib.numpy_helper import numpy as np
from openai.datalib.pandas_helper import pandas as pd


@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
Expand Down
10 changes: 3 additions & 7 deletions openai/tests/test_long_examples_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,8 @@

import pytest

from openai.datalib import (
HAS_NUMPY,
HAS_PANDAS,
NUMPY_INSTRUCTIONS,
PANDAS_INSTRUCTIONS,
)
from openai.datalib.numpy_helper import HAS_NUMPY, NUMPY_INSTRUCTIONS
from openai.datalib.pandas_helper import HAS_PANDAS, PANDAS_INSTRUCTIONS


@pytest.mark.skipif(not HAS_PANDAS, reason=PANDAS_INSTRUCTIONS)
Expand Down Expand Up @@ -54,5 +50,5 @@ def test_long_examples_validator() -> None:
assert prepared_data_cmd_output.stderr == ""
# validate get_long_indexes() applied during optional_fn() call in long_examples_validator()
assert "indices of the long examples has changed" in prepared_data_cmd_output.stdout

return prepared_data_cmd_output.stdout
10 changes: 7 additions & 3 deletions openai/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
import sys
from typing import Any, Callable, NamedTuple, Optional

from openai.datalib import pandas as pd, assert_has_pandas
from openai.datalib.pandas_helper import assert_has_pandas
from openai.datalib.pandas_helper import pandas as pd


class Remediation(NamedTuple):
Expand Down Expand Up @@ -158,6 +159,7 @@ def long_examples_validator(df):

ft_type = infer_task_type(df)
if ft_type != "open-ended generation":

def get_long_indexes(d):
long_examples = d.apply(
lambda x: len(x.prompt) + len(x.completion) > 10000, axis=1
Expand All @@ -171,10 +173,12 @@ def get_long_indexes(d):
optional_msg = f"Remove {len(long_indexes)} long examples"

def optional_fn(x):

long_indexes_to_drop = get_long_indexes(x)
if long_indexes != long_indexes_to_drop:
sys.stdout.write(f"The indices of the long examples has changed as a result of a previously applied recommendation.\nThe {len(long_indexes_to_drop)} long examples to be dropped are now at the following indices: {long_indexes_to_drop}\n")
sys.stdout.write(
f"The indices of the long examples has changed as a result of a previously applied recommendation.\nThe {len(long_indexes_to_drop)} long examples to be dropped are now at the following indices: {long_indexes_to_drop}\n"
)
return x.drop(long_indexes_to_drop)

return Remediation(
Expand Down
4 changes: 2 additions & 2 deletions openai/wandb_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
from pathlib import Path

from openai import File, FineTune
from openai.datalib import numpy as np
from openai.datalib import pandas as pd
from openai.datalib.numpy_helper import numpy as np
from openai.datalib.pandas_helper import pandas as pd


class WandbLogger:
Expand Down