Skip to content
This repository was archived by the owner on Jun 3, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions src/deepsparse/transformers/utils/token_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,16 +77,17 @@ def generate(self, logits: numpy.ndarray) -> numpy.ndarray:
:param logits: the logits from the model with shape (vocab_size,)
:return: the sampled token
"""
if self.top_k:
logits = self.apply_top_k(logits)
if self.top_p:
logits = self.apply_top_p(logits)

if self.deterministic:
token = numpy.argmax(logits)
self.tokens.append(token)
return token

if self.top_k:
logits = self.apply_top_k(logits)

if self.top_p:
logits = self.apply_top_p(logits)

if self.sampling_temperature != 1.0:
logits /= self.sampling_temperature

Expand Down
19 changes: 5 additions & 14 deletions src/deepsparse/v2/text_generation/process_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@

import transformers

from deepsparse.transformers.pipelines.text_generation import TextGenerationInput
from deepsparse.transformers.pipelines.text_generation import (
GenerationDefaults,
TextGenerationInput,
)
from deepsparse.transformers.utils.helpers import (
check_and_return_generation_config,
override_config,
Expand All @@ -26,19 +29,7 @@
from deepsparse.v2.operators import Operator


__all__ = ["ProcessInputsTextGeneration", "GenerationDefaults"]


class GenerationDefaults:
num_return_sequences = 1
max_length = 100
max_new_tokens = None
output_scores = False
top_k = 0
top_p = 0.0
repetition_penalty = 0.0
do_sample = False
temperature = 1.0
__all__ = ["ProcessInputsTextGeneration"]


class ProcessInputsTextGeneration(Operator):
Expand Down
13 changes: 13 additions & 0 deletions tests/deepsparse/v2/integration_tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
6 changes: 6 additions & 0 deletions tests/deepsparse/v2/integration_tests/configs/codegen.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
cadence: "nightly"
model_path: "zoo:nlg/text_generation/codegen_mono-350m/pytorch/huggingface/bigpython_bigquery_thepile/base-none"
torch_model_name: "salesforce/codegen-350m-mono"
prompt: "\ndef Fibonacci(n):\n # Check if input is 0 then it will\n # print incorrect input"
precision: 0.0001
internal_kv_cache: [True, False]
6 changes: 6 additions & 0 deletions tests/deepsparse/v2/integration_tests/configs/gpt_neo.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
cadence: "commit"
model_path: "hf:mgoin/TinyStories-1M-ds"
torch_model_name: "roneneldan/TinyStories-1M"
prompt: "Didn't know what time it was, the lights were low\n I leaned back on my radio"
precision: 0.001
internal_kv_cache: [True, False]
6 changes: 6 additions & 0 deletions tests/deepsparse/v2/integration_tests/configs/opt.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
cadence: "nightly"
model_path: "zoo:nlg/text_generation/opt-1.3b/pytorch/huggingface/opt_pretrain/base-none"
torch_model_name: "facebook/opt-1.3b"
prompt: "Didn't know what time it was, the lights were low\n I leaned back on my radio"
precision: 0.0001
internal_kv_cache: [True, False]
137 changes: 137 additions & 0 deletions tests/deepsparse/v2/integration_tests/helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
import os
from typing import Any, Dict, List, Tuple, Union

import numpy
import yaml
from transformers import AutoModelForCausalLM, AutoTokenizer

import pytest


class TorchGroundTruthSource:
"""
An object that generates ground truth logits and
cache states from a prompt. This object can
generate tokens in an autoregressive manner, and thus
will output:
- prompt logits,
- generated logits,
- prompt cache state,
- generated sequence
"""

def __init__(self, num_tokens_to_generate: int, model_name: str):

self.model = AutoModelForCausalLM.from_pretrained(model_name)
self.tokenizer = self._create_tokenizer(model_name)

self.num_tokens_to_generate = num_tokens_to_generate

def tokenize(self, prompt: str):
return self.tokenizer(prompt, return_tensors="pt")

def __call__(
self, prompt: str
) -> Tuple[numpy.ndarray, numpy.ndarray, List[numpy.ndarray], str]:
# afaik it is not possible to get 'past_key_values' from
# the generate method, so we have to run the model twice
out = self.model.generate(
self.tokenize(prompt).input_ids,
max_new_tokens=self.num_tokens_to_generate,
output_scores=True,
return_dict_in_generate=True,
use_cache=True,
)
generated_text = self.tokenizer.decode(
out.sequences[0], skip_special_tokens=True
)
generated_logits = numpy.concatenate(
[[score.numpy() for score in out.scores]]
).transpose(
1, 0, 2
) # (1, num_tokens_to_generate, vocab_size)

out = self.model(**self.tokenize(prompt))
prompt_logits = out.logits.detach().numpy()[
:, :-1, :
] # (1, prompt_length, vocab_size)
prompt_cache = [
entry.detach().numpy()
for key_value_tuple in out.past_key_values
for entry in key_value_tuple
] # List[(1, num_heads, past_length, head_dim)]

return generated_logits, prompt_logits, prompt_cache, generated_text

@staticmethod
def _create_tokenizer(model_name):
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.padding_side = "left"
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token

return tokenizer


def parse_params(configs_directory: str) -> List[Dict[str, Any]]:
# parses the config file provided
assert os.path.isdir(
configs_directory
), f"Config_directory {configs_directory} is not a directory"

config_dicts = []
for file in os.listdir(configs_directory):
if file.endswith(".yaml"):
config_path = os.path.join(configs_directory, file)
# reads the yaml file
with open(config_path, "r") as f:
config = yaml.safe_load(f)

cadence = os.environ.get("CADENCE", "commit")
expected_cadence = config["cadence"]

if not isinstance(expected_cadence, list):
expected_cadence = [expected_cadence]
if cadence in expected_cadence:
config_dicts.append(config)
else:
logging.info(
f"Skipping testing model: {config['model_path']} "
f"for cadence: {config['cadence']}"
)
else:
raise FileNotFoundError(
f"Could not find a yaml file in {configs_directory}"
)
return config_dicts


def validate_internal_kv_cache(
internal_kv_cache, available_kv_cache_types: Union[str, List[str]]
) -> bool:
if internal_kv_cache and True not in available_kv_cache_types:
pytest.skip(
"The tests for running the pipeline with "
"internal kv cache management are disabled."
)
if not internal_kv_cache and False not in available_kv_cache_types:
pytest.skip(
"The tests for running the pipeline with "
"external kv cache management are disabled."
)
return internal_kv_cache
Loading