diff --git a/src/transformers/generation/utils.py b/src/transformers/generation/utils.py index 700ea0443f4d..3938457155d8 100644 --- a/src/transformers/generation/utils.py +++ b/src/transformers/generation/utils.py @@ -1440,6 +1440,8 @@ def _prepare_generated_length( and not self.config.is_encoder_decoder ): generation_config.max_length -= inputs_tensor.shape[1] + else: # by default let's always generate 10 new tokens + generation_config.max_length = generation_config.max_length + input_ids_length # same for min length if generation_config.min_new_tokens is not None: diff --git a/src/transformers/pipelines/base.py b/src/transformers/pipelines/base.py index 042958cbb0c6..25c2a11564c3 100644 --- a/src/transformers/pipelines/base.py +++ b/src/transformers/pipelines/base.py @@ -881,18 +881,7 @@ def __init__( # Take the first device used by `accelerate`. device = next(iter(hf_device_map.values())) else: - device = -1 - if ( - is_torch_mlu_available() - or is_torch_cuda_available() - or is_torch_npu_available() - or is_torch_xpu_available(check_device=True) - or is_torch_mps_available() - ): - logger.warning( - "Hardware accelerator e.g. GPU is available in the environment, but no `device` argument" - " is passed to the `Pipeline` object. Model will be on CPU." - ) + device = 0 if is_torch_available() and self.framework == "pt": if device == -1 and self.model.device is not None: @@ -920,10 +909,12 @@ def __init__( elif is_torch_mps_available(): self.device = torch.device(f"mps:{device}") else: - raise ValueError(f"{device} unrecognized or not available.") + self.device = torch.device("cpu") else: self.device = device if device is not None else -1 + logger.warning(f"Device set to use {self.device}") + self.binary_output = binary_output # We shouldn't call `model.to()` for models loaded with accelerate as well as the case that model is already on device if (