diff --git a/haystack/core/pipeline/base.py b/haystack/core/pipeline/base.py index 8511a9ca38..d1eb983957 100644 --- a/haystack/core/pipeline/base.py +++ b/haystack/core/pipeline/base.py @@ -6,8 +6,9 @@ from collections import defaultdict from copy import deepcopy from datetime import datetime +from enum import IntEnum from pathlib import Path -from typing import Any, Dict, Iterator, List, Optional, Set, TextIO, Tuple, Type, TypeVar, Union +from typing import Any, Dict, Iterator, List, Optional, TextIO, Tuple, Type, TypeVar, Union import networkx # type:ignore @@ -18,10 +19,21 @@ PipelineConnectError, PipelineDrawingError, PipelineError, + PipelineMaxComponentRuns, PipelineRuntimeError, PipelineUnmarshalError, PipelineValidationError, ) +from haystack.core.pipeline.component_checks import ( + _NO_OUTPUT_PRODUCED, + all_predecessors_executed, + are_all_lazy_variadic_sockets_resolved, + are_all_sockets_ready, + can_component_run, + is_any_greedy_socket_ready, + is_socket_lazy_variadic, +) +from haystack.core.pipeline.utils import FIFOPriorityQueue, parse_connect_string from haystack.core.serialization import DeserializationCallbacks, component_from_dict, component_to_dict from haystack.core.type_utils import _type_name, _types_are_compatible from haystack.marshal import Marshaller, YamlMarshaller @@ -30,7 +42,6 @@ from .descriptions import find_pipeline_inputs, find_pipeline_outputs from .draw import _to_mermaid_image from .template import PipelineTemplate, PredefinedPipeline -from .utils import parse_connect_string DEFAULT_MARSHALLER = YamlMarshaller() @@ -42,6 +53,14 @@ logger = logging.getLogger(__name__) +class ComponentPriority(IntEnum): + HIGHEST = 1 + READY = 2 + DEFER = 3 + DEFER_LAST = 4 + BLOCKED = 5 + + class PipelineBase: """ Components orchestration engine. @@ -369,7 +388,7 @@ def remove_component(self, name: str) -> Component: return instance - def connect(self, sender: str, receiver: str) -> "PipelineBase": # noqa: PLR0915 + def connect(self, sender: str, receiver: str) -> "PipelineBase": # noqa: PLR0915 PLR0912 """ Connects two components together. @@ -812,26 +831,6 @@ def _prepare_component_input_data(self, data: Dict[str, Any]) -> Dict[str, Dict[ return data - def _normalize_varidiac_input_data(self, data: Dict[str, Dict[str, Any]]) -> Dict[str, Dict[str, Any]]: - """ - Variadic inputs expect their value to be a list, this utility method creates that list from the user's input. - """ - for component_name, component_inputs in data.items(): - if component_name not in self.graph.nodes: - # This is not a component name, it must be the name of one or more input sockets. - # Those are handled in a different way, so we skip them here. - continue - instance = self.graph.nodes[component_name]["instance"] - for component_input, input_value in component_inputs.items(): - if instance.__haystack_input__._sockets_dict[component_input].is_variadic: - # Components that have variadic inputs need to receive lists as input. - # We don't want to force the user to always pass lists, so we convert single values to lists here. - # If it's already a list we assume the component takes a variadic input of lists, so we - # convert it in any case. - data[component_name][component_input] = [input_value] - - return {**data} - @classmethod def from_template( cls, predefined_pipeline: PredefinedPipeline, template_params: Optional[Dict[str, Any]] = None @@ -859,11 +858,6 @@ def from_template( msg += f"Source:\n{rendered}" raise PipelineUnmarshalError(msg) - def _init_graph(self): - """Resets the visits count for each component""" - for node in self.graph.nodes: - self.graph.nodes[node]["visits"] = 0 - def _find_receivers_from(self, component_name: str) -> List[Tuple[str, OutputSocket, InputSocket]]: """ Utility function to find all Components that receive input form `component_name`. @@ -882,540 +876,268 @@ def _find_receivers_from(self, component_name: str) -> List[Tuple[str, OutputSoc res.append((receiver_name, sender_socket, receiver_socket)) return res - def _distribute_output( # pylint: disable=too-many-positional-arguments - self, - receiver_components: List[Tuple[str, OutputSocket, InputSocket]], - component_result: Dict[str, Any], - components_inputs: Dict[str, Dict[str, Any]], - run_queue: List[Tuple[str, Component]], - waiting_queue: List[Tuple[str, Component]], - ) -> Dict[str, Any]: + @staticmethod + def _convert_to_internal_format(pipeline_inputs: Dict[str, Any]) -> Dict[str, Dict[str, List]]: """ - Distributes the output of a Component to the next Components that need it. + Converts the inputs to the pipeline to the format that is needed for the internal `Pipeline.run` logic. - This also updates the queues that keep track of which Components are ready to run and which are waiting for - input. + Example Input: + {'prompt_builder': {'question': 'Who lives in Paris?'}, 'retriever': {'query': 'Who lives in Paris?'}} + Example Output: + {'prompt_builder': {'question': [{'sender': None, 'value': 'Who lives in Paris?'}]}, + 'retriever': {'query': [{'sender': None, 'value': 'Who lives in Paris?'}]}} - :param receiver_components: - List of tuples containing name of receiver Components and relative sender OutputSocket - and receiver InputSocket instances - :param component_result: - The output of the Component - :param components_inputs: - The current state of the inputs divided by Component name - :param run_queue: - Queue of Components to run - :param waiting_queue: - Queue of Components waiting for input + :param pipeline_inputs: Inputs to the pipeline. + :returns: Converted inputs that can be used by the internal `Pipeline.run` logic. + """ + inputs: Dict[str, Dict[str, List[Dict[str, Any]]]] = {} + for component_name, socket_dict in pipeline_inputs.items(): + inputs[component_name] = {} + for socket_name, value in socket_dict.items(): + inputs[component_name][socket_name] = [{"sender": None, "value": value}] - :returns: - The updated output of the Component without the keys that were distributed to other Components - """ - # We keep track of which keys to remove from component_result at the end of the loop. - # This is done after the output has been distributed to the next components, so that - # we're sure all components that need this output have received it. - to_remove_from_component_result = set() - - for receiver_name, sender_socket, receiver_socket in receiver_components: - if sender_socket.name not in component_result: - # This output wasn't created by the sender, nothing we can do. - # - # Some Components might have conditional outputs, so we need to check if they actually returned - # some output while iterating over their output sockets. - # - # A perfect example of this would be the ConditionalRouter, which will have an output for each - # condition it has been initialized with. - # Though it will return only one output at a time. - continue - - if receiver_name not in components_inputs: - components_inputs[receiver_name] = {} - - # We keep track of the keys that were distributed to other Components. - # This key will be removed from component_result at the end of the loop. - to_remove_from_component_result.add(sender_socket.name) - - value = component_result[sender_socket.name] - - if receiver_socket.is_variadic: - # Usually Component inputs can only be received from one sender, the Variadic type allows - # instead to receive inputs from multiple senders. - # - # To keep track of all the inputs received internally we always store them in a list. - if receiver_socket.name not in components_inputs[receiver_name]: - # Create the list if it doesn't exist - components_inputs[receiver_name][receiver_socket.name] = [] - else: - # Check if the value is actually a list - assert isinstance(components_inputs[receiver_name][receiver_socket.name], list) - components_inputs[receiver_name][receiver_socket.name].append(value) - else: - components_inputs[receiver_name][receiver_socket.name] = value - - receiver = self.graph.nodes[receiver_name]["instance"] - pair = (receiver_name, receiver) - - if receiver_socket.is_variadic: - if receiver_socket.is_greedy: - # If the receiver is greedy, we can run it as soon as possible. - # First we remove it from the status lists it's in if it's there or - # we risk running it multiple times. - if pair in run_queue: - run_queue.remove(pair) - if pair in waiting_queue: - waiting_queue.remove(pair) - run_queue.insert(0, pair) - else: - # If the receiver Component has a variadic input that is not greedy - # we put it in the waiting queue. - # This make sure that we don't run it earlier than necessary and we can collect - # as many inputs as we can before running it. - if pair not in waiting_queue: - waiting_queue.append(pair) - - if pair not in waiting_queue and pair not in run_queue: - # Queue up the Component that received this input to run, only if it's not already waiting - # for input or already ready to run. - run_queue.append(pair) - - # Returns the output without the keys that were distributed to other Components - return {k: v for k, v in component_result.items() if k not in to_remove_from_component_result} - - def _find_next_runnable_component( - self, components_inputs: Dict[str, Dict[str, Any]], waiting_queue: List[Tuple[str, Component]] - ) -> Tuple[str, Component]: - """ - Finds the next Component that can be run and returns it. - - :param components_inputs: The current state of the inputs divided by Component name - :param waiting_queue: Queue of Components waiting for input - - :returns: The name and the instance of the next Component that can be run - """ - all_lazy_variadic = True - all_with_default_inputs = True - - filtered_waiting_queue = [] - - for name, comp in waiting_queue: - if not _is_lazy_variadic(comp): - # Components with variadic inputs that are not greedy must be removed only if there's nothing else to - # run at this stage. - # We need to wait as long as possible to run them, so we can collect as most inputs as we can. - all_lazy_variadic = False - - if not _has_all_inputs_with_defaults(comp): - # Components that have defaults for all their inputs must be treated the same identical way as we treat - # lazy variadic components. If there are only components with defaults we can run them. - # If we don't do this the order of execution of the Pipeline's Components will be affected cause we - # enqueue the Components in `run_queue` at the start using the order they are added in the Pipeline. - # If a Component A with defaults is added before a Component B that has no defaults, but in the Pipeline - # logic A must be executed after B. However, B could run before A if we don't do this check. - all_with_default_inputs = False - - if not _is_lazy_variadic(comp) and not _has_all_inputs_with_defaults(comp): - # Keep track of the Components that are not lazy variadic and don't have all inputs with defaults. - # We'll handle these later if necessary. - filtered_waiting_queue.append((name, comp)) - - # If all Components are lazy variadic or all Components have all inputs with defaults we can get one to run - if all_lazy_variadic or all_with_default_inputs: - return waiting_queue[0] - - for name, comp in filtered_waiting_queue: - # Find the first component that has all the inputs it needs to run - has_enough_inputs = True - for input_socket in comp.__haystack_input__._sockets_dict.values(): # type: ignore - if input_socket.name not in components_inputs.get(name, {}) and input_socket.is_mandatory: - has_enough_inputs = False - break - - if has_enough_inputs: - return name, comp - - # If we reach this point it means that we found no Component that has enough inputs to run. - # Ideally we should never reach this point, though we can't raise an exception either as - # existing use cases rely on this behavior. - # So we return the last Component, that could be the last from waiting_queue or filtered_waiting_queue. - return name, comp - - def _find_next_runnable_lazy_variadic_or_default_component( - self, waiting_queue: List[Tuple[str, Component]] - ) -> Tuple[str, Component]: - """ - Finds the next Component that can be run and has a lazy variadic input or all inputs with default values. - - :param waiting_queue: Queue of Components waiting for input - - :returns: The name and the instance of the next Component that can be run - """ - for name, comp in waiting_queue: - is_lazy_variadic = _is_lazy_variadic(comp) - has_only_defaults = _has_all_inputs_with_defaults(comp) - if is_lazy_variadic or has_only_defaults: - return name, comp - - # If we reach this point it means that we found no Component that has a lazy variadic input or all inputs with - # default values to run. - # Similar to `_find_next_runnable_component` we might not find the Component we want, so we optimistically - # return the last Component in the list. - # We're probably stuck in a loop in this case, but we can't raise an exception as existing use cases might - # rely on this behaviour. - # The loop detection will be handled later on. - return name, comp - - def _find_components_that_will_receive_no_input( - self, component_name: str, component_result: Dict[str, Any], components_inputs: Dict[str, Dict[str, Any]] - ) -> Set[Tuple[str, Component]]: - """ - Find all the Components that are connected to component_name and didn't receive any input from it. - - Components that have a Variadic input and received already some input from other Components - but not from component_name won't be returned as they have enough inputs to run. - - This includes the descendants of the Components that didn't receive any input from component_name. - That is necessary to avoid getting stuck into infinite loops waiting for inputs that will never arrive. - - :param component_name: Name of the Component that created the output - :param component_result: Output of the Component - :param components_inputs: The current state of the inputs divided by Component name - :return: A set of Components that didn't receive any input from component_name - """ - - # Simplifies the check if a Component is Variadic and received some input from other Components. - def has_variadic_socket_with_existing_inputs( - component: Component, component_name: str, sender_name: str, components_inputs: Dict[str, Dict[str, Any]] - ) -> bool: - for socket in component.__haystack_input__._sockets_dict.values(): # type: ignore - if sender_name not in socket.senders: - continue - if socket.is_variadic and len(components_inputs.get(component_name, {}).get(socket.name, [])) > 0: - return True - return False + return inputs - # Makes it easier to verify if all connections between two Components are optional - def all_connections_are_optional(sender_name: str, receiver: Component) -> bool: - for socket in receiver.__haystack_input__._sockets_dict.values(): # type: ignore - if sender_name not in socket.senders: - continue - if socket.is_mandatory: - return False - return True - - # Eases checking if other connections that are not between sender_name and receiver_name - # already received inputs - def other_connections_received_input(sender_name: str, receiver_name: str) -> bool: - receiver: Component = self.graph.nodes[receiver_name]["instance"] - for receiver_socket in receiver.__haystack_input__._sockets_dict.values(): # type: ignore - if sender_name in receiver_socket.senders: - continue - if components_inputs.get(receiver_name, {}).get(receiver_socket.name) is not None: - return True - return False + @staticmethod + def _consume_component_inputs(component_name: str, component: Dict, inputs: Dict) -> Dict[str, Any]: + """ + Extracts the inputs needed to run for the component and removes them from the global inputs state. + + :param component_name: The name of a component. + :param component: Component with component metadata. + :param inputs: Global inputs state. + :returns: The inputs for the component. + """ + component_inputs = inputs.get(component_name, {}) + consumed_inputs = {} + greedy_inputs_to_remove = set() + for socket_name, socket in component["input_sockets"].items(): + socket_inputs = component_inputs.get(socket_name, []) + socket_inputs = [sock["value"] for sock in socket_inputs if sock["value"] != _NO_OUTPUT_PRODUCED] + if socket_inputs: + if not socket.is_variadic: + # We only care about the first input provided to the socket. + consumed_inputs[socket_name] = socket_inputs[0] + elif socket.is_greedy: + # We need to keep track of greedy inputs because we always remove them, even if they come from + # outside the pipeline. Otherwise, a greedy input from the user would trigger a pipeline to run + # indefinitely. + greedy_inputs_to_remove.add(socket_name) + consumed_inputs[socket_name] = [socket_inputs[0]] + elif is_socket_lazy_variadic(socket): + # We use all inputs provided to the socket on a lazy variadic socket. + consumed_inputs[socket_name] = socket_inputs + + # We prune all inputs except for those that were provided from outside the pipeline (e.g. user inputs). + pruned_inputs = { + socket_name: [ + sock for sock in socket if sock["sender"] is None and not socket_name in greedy_inputs_to_remove + ] + for socket_name, socket in component_inputs.items() + } + pruned_inputs = {socket_name: socket for socket_name, socket in pruned_inputs.items() if len(socket) > 0} - components = set() - instance: Component = self.graph.nodes[component_name]["instance"] - for socket_name, socket in instance.__haystack_output__._sockets_dict.items(): # type: ignore - if socket_name in component_result: - continue - for receiver in socket.receivers: - receiver_instance: Component = self.graph.nodes[receiver]["instance"] - - if has_variadic_socket_with_existing_inputs( - receiver_instance, receiver, component_name, components_inputs - ): - # Components with Variadic input that already received some input - # can still run, even if branch is skipped. - # If we remove them they won't run. - continue - - if all_connections_are_optional(component_name, receiver_instance) and other_connections_received_input( - component_name, receiver - ): - # If all the connections between component_name and receiver are optional - # and receiver received other inputs already it still has enough inputs to run. - # Even if it didn't receive input from component_name, so we can't remove it or its - # descendants. - continue - - components.add((receiver, receiver_instance)) - # Get the descendants too. When we remove a Component that received no input - # it's extremely likely that its descendants will receive no input as well. - # This is fine even if the Pipeline will merge back into a single Component - # at a certain point. The merging Component will be put back into the run - # queue at a later stage. - for descendant_name in networkx.descendants(self.graph, receiver): - descendant = self.graph.nodes[descendant_name]["instance"] - - # Components with Variadic input that already received some input - # can still run, even if branch is skipped. - # If we remove them they won't run. - if has_variadic_socket_with_existing_inputs( - descendant, descendant_name, receiver, components_inputs - ): - continue - - components.add((descendant_name, descendant)) - - return components - - def _is_stuck_in_a_loop(self, waiting_queue: List[Tuple[str, Component]]) -> bool: - """ - Checks if the Pipeline is stuck in a loop. - - :param waiting_queue: Queue of Components waiting for input - - :returns: True if the Pipeline is stuck in a loop, False otherwise - """ - # Are we actually stuck or there's a lazy variadic or a component with has only default inputs - # waiting for input? - # This is our last resort, if there's no lazy variadic or component with only default inputs - # waiting for input we're stuck for real and we can't make any progress. - component_found = False - for _, comp in waiting_queue: - if _is_lazy_variadic(comp) or _has_all_inputs_with_defaults(comp): - component_found = True - break - - if not component_found: - # We're stuck in a loop for real, we can't make any progress. - # BAIL! - return True - - # If we have a single component with no variadic input or only default inputs waiting for input - # it means it has been waiting for input for at least 2 iterations. - # This will never run. - # BAIL! - return len(waiting_queue) == 1 - - def _component_has_enough_inputs_to_run(self, name: str, inputs: Dict[str, Dict[str, Any]]) -> bool: - """ - Returns True if the Component has all the inputs it needs to run. - - :param name: Name of the Component as defined in the Pipeline. - :param inputs: The current state of the inputs divided by Component name. - - :return: Whether the Component can run or not. - """ - instance: Component = self.graph.nodes[name]["instance"] - if name not in inputs: - return False - expected_inputs = instance.__haystack_input__._sockets_dict.keys() # type: ignore - current_inputs = inputs[name].keys() - return expected_inputs == current_inputs + inputs[component_name] = pruned_inputs + + return consumed_inputs - def _break_supported_cycles_in_graph(self) -> Tuple[networkx.MultiDiGraph, Dict[str, List[List[str]]]]: + def _fill_queue( + self, component_names: List[str], inputs: Dict[str, Any], component_visits: Dict[str, int] + ) -> FIFOPriorityQueue: """ - Utility function to remove supported cycles in the Pipeline's graph. + Calculates the execution priority for each component and inserts it into the priority queue. - Given that the Pipeline execution would wait to run a Component until it has received - all its mandatory inputs, it doesn't make sense for us to try and break cycles by - removing a connection to a mandatory input. The Pipeline would just get stuck at a later time. + :param component_names: Names of the components to put into the queue. + :param inputs: Inputs to the components. + :param component_visits: Current state of component visits. + :returns: A prioritized queue of component names. + """ + priority_queue = FIFOPriorityQueue() + for component_name in component_names: + component = self._get_component_with_graph_metadata_and_visits( + component_name, component_visits[component_name] + ) + priority = self._calculate_priority(component, inputs.get(component_name, {})) + priority_queue.push(component_name, priority) - So we can only break connections in cycles that have a Variadic or GreedyVariadic type or a default value. + return priority_queue - This will raise a PipelineRuntimeError if we there are cycles that can't be broken. - That is bound to happen when at least one of the inputs in a cycle is mandatory. + @staticmethod + def _calculate_priority(component: Dict, inputs: Dict) -> ComponentPriority: + """ + Calculates the execution priority for a component depending on the component's inputs. - If the Pipeline's graph doesn't have any cycle it will just return that graph and an empty dictionary. + :param component: Component metadata and component instance. + :param inputs: Inputs to the component. + :returns: Priority value for the component. + """ + if not can_component_run(component, inputs): + return ComponentPriority.BLOCKED + elif is_any_greedy_socket_ready(component, inputs) and are_all_sockets_ready(component, inputs): + return ComponentPriority.HIGHEST + elif all_predecessors_executed(component, inputs): + return ComponentPriority.READY + elif are_all_lazy_variadic_sockets_resolved(component, inputs): + return ComponentPriority.DEFER + else: + return ComponentPriority.DEFER_LAST - :returns: - A tuple containing: - * A copy of the Pipeline's graph without cycles - * A dictionary of Component's names and a list of all the cycles they were part of. - The cycles are a list of Component's names that create that cycle. - """ - if networkx.is_directed_acyclic_graph(self.graph): - return self.graph, {} - - temp_graph: networkx.MultiDiGraph = self.graph.copy() - # A list of all the cycles that are found in the graph, each inner list contains - # the Component names that create that cycle. - cycles: List[List[str]] = list(networkx.simple_cycles(self.graph)) - # Maps a Component name to a list of its output socket names that have been broken - edges_removed: Dict[str, List[str]] = defaultdict(list) - # This keeps track of all the cycles that a component is part of. - # Maps a Component name to a list of cycles, each inner list contains - # the Component names that create that cycle (the key will also be - # an element in each list). The last Component in each list is implicitly - # connected to the first. - components_in_cycles: Dict[str, List[List[str]]] = defaultdict(list) - - # Used to minimize the number of time we check whether the graph has any more - # cycles left to break or not. - graph_has_cycles = True - - # Iterate all the cycles to find the least amount of connections that we can remove - # to make the Pipeline graph acyclic. - # As soon as the graph is acyclic we stop breaking connections and return. - for cycle in cycles: - for comp in cycle: - components_in_cycles[comp].append(cycle) - - # Iterate this cycle, we zip the cycle with itself so that at the last iteration - # sender_comp will be the last element of cycle and receiver_comp will be the first. - # So if cycle is [1, 2, 3, 4] we would call zip([1, 2, 3, 4], [2, 3, 4, 1]). - for sender_comp, receiver_comp in zip(cycle, cycle[1:] + cycle[:1]): - # We get the key and iterate those as we want to edit the graph data while - # iterating the edges and that would raise. - # Even though the connection key set in Pipeline.connect() uses only the - # sockets name we don't have clashes since it's only used to differentiate - # multiple edges between two nodes. - edge_keys = list(temp_graph.get_edge_data(sender_comp, receiver_comp).keys()) - for edge_key in edge_keys: - edge_data = temp_graph.get_edge_data(sender_comp, receiver_comp)[edge_key] - receiver_socket = edge_data["to_socket"] - if not receiver_socket.is_variadic and receiver_socket.is_mandatory: - continue - - # We found a breakable edge - sender_socket = edge_data["from_socket"] - edges_removed[sender_comp].append(sender_socket.name) - temp_graph.remove_edge(sender_comp, receiver_comp, edge_key) - - graph_has_cycles = not networkx.is_directed_acyclic_graph(temp_graph) - if not graph_has_cycles: - # We removed all the cycles, we can stop - break - - if not graph_has_cycles: - # We removed all the cycles, nice - break - - if graph_has_cycles: - msg = "Pipeline contains a cycle that we can't execute" - raise PipelineRuntimeError(msg) - - return temp_graph, components_in_cycles + def _get_component_with_graph_metadata_and_visits(self, component_name: str, visits: int) -> Dict[str, Any]: + """ + Returns the component instance alongside input/output-socket metadata from the graph and adds current visits. + We can't store visits in the pipeline graph because this would prevent reentrance / thread-safe execution. -def _connections_status( - sender_node: str, receiver_node: str, sender_sockets: List[OutputSocket], receiver_sockets: List[InputSocket] -): - """ - Lists the status of the sockets, for error messages. - """ - sender_sockets_entries = [] - for sender_socket in sender_sockets: - sender_sockets_entries.append(f" - {sender_socket.name}: {_type_name(sender_socket.type)}") - sender_sockets_list = "\n".join(sender_sockets_entries) + :param component_name: The name of the component. + :param visits: Number of visits for the component. + :returns: Dict including component instance, input/output-sockets and visits. + """ + comp_dict = self.graph.nodes[component_name] + comp_dict = {**comp_dict, "visits": visits} + return comp_dict - receiver_sockets_entries = [] - for receiver_socket in receiver_sockets: - if receiver_socket.senders: - sender_status = f"sent by {','.join(receiver_socket.senders)}" - else: - sender_status = "available" - receiver_sockets_entries.append( - f" - {receiver_socket.name}: {_type_name(receiver_socket.type)} ({sender_status})" - ) - receiver_sockets_list = "\n".join(receiver_sockets_entries) + def _get_next_runnable_component( + self, priority_queue: FIFOPriorityQueue, component_visits: Dict[str, int] + ) -> Union[Tuple[ComponentPriority, str, Dict[str, Any]], None]: + """ + Returns the next runnable component alongside its metadata from the priority queue. - return f"'{sender_node}':\n{sender_sockets_list}\n'{receiver_node}':\n{receiver_sockets_list}" + :param priority_queue: Priority queue of component names. + :param component_visits: Current state of component visits. + :returns: The next runnable component, the component name, and its priority + or None if no component in the queue can run. + :raises: PipelineMaxComponentRuns if the next runnable component has exceeded the maximum number of runs. + """ + priority_and_component_name: Union[Tuple[ComponentPriority, str], None] = ( + None if (item := priority_queue.get()) is None else (ComponentPriority(item[0]), str(item[1])) + ) + if priority_and_component_name is not None and priority_and_component_name[0] != ComponentPriority.BLOCKED: + priority, component_name = priority_and_component_name + component = self._get_component_with_graph_metadata_and_visits( + component_name, component_visits[component_name] + ) + if component["visits"] > self._max_runs_per_component: + msg = f"Maximum run count {self._max_runs_per_component} reached for component '{component_name}'" + raise PipelineMaxComponentRuns(msg) -def _is_lazy_variadic(c: Component) -> bool: - """ - Small utility function to check if a Component has at least a Variadic input and no GreedyVariadic input. - """ - is_variadic = any( - socket.is_variadic - for socket in c.__haystack_input__._sockets_dict.values() # type: ignore - ) - if not is_variadic: - return False - return not any( - socket.is_greedy - for socket in c.__haystack_input__._sockets_dict.values() # type: ignore - ) - - -def _has_all_inputs_with_defaults(c: Component) -> bool: - """ - Small utility function to check if a Component has all inputs with defaults. - """ - return all( - not socket.is_mandatory - for socket in c.__haystack_input__._sockets_dict.values() # type: ignore - ) + return priority, component_name, component + return None -def _add_missing_input_defaults(name: str, comp: Component, components_inputs: Dict[str, Dict[str, Any]]): - """ - Updates the inputs with the default values for the inputs that are missing + @staticmethod + def _add_missing_input_defaults(component_inputs: Dict[str, Any], component_input_sockets: Dict[str, InputSocket]): + """ + Updates the inputs with the default values for the inputs that are missing - :param name: Name of the Component - :param comp: Instance of the Component - :param components_inputs: The current state of the inputs divided by Component name - """ - if name not in components_inputs: - components_inputs[name] = {} + :param component_inputs: Inputs for the component. + :param component_input_sockets: Input sockets of the component. + """ + for name, socket in component_input_sockets.items(): + if not socket.is_mandatory and name not in component_inputs: + if socket.is_variadic: + component_inputs[name] = [socket.default_value] + else: + component_inputs[name] = socket.default_value - for input_socket in comp.__haystack_input__._sockets_dict.values(): # type: ignore - if input_socket.is_mandatory: - continue + return component_inputs - if input_socket.name not in components_inputs[name]: - components_inputs[name][input_socket.name] = input_socket.default_value + @staticmethod + def _write_component_outputs( + component_name, component_outputs, inputs, receivers, include_outputs_from + ) -> Dict[str, Any]: + """ + Distributes the outputs of a component to the input sockets that it is connected to. + + :param component_name: The name of the component. + :param component_outputs: The outputs of the component. + :param inputs: The current global input state. + :param receivers: List of receiver_name, sender_socket, receiver_socket for connected components. + :param include_outputs_from: List of component names that should always return an output from the pipeline. + """ + for receiver_name, sender_socket, receiver_socket in receivers: + # We either get the value that was produced by the actor or we use the _NO_OUTPUT_PRODUCED class to indicate + # that the sender did not produce an output for this socket. + # This allows us to track if a pre-decessor already ran but did not produce an output. + value = component_outputs.get(sender_socket.name, _NO_OUTPUT_PRODUCED) + if receiver_name not in inputs: + inputs[receiver_name] = {} + + # If we have a non-variadic or a greedy variadic receiver socket, we can just overwrite any inputs + # that might already exist (to be reconsidered but mirrors current behavior). + if not is_socket_lazy_variadic(receiver_socket): + inputs[receiver_name][receiver_socket.name] = [{"sender": component_name, "value": value}] + + # If the receiver socket is lazy variadic, and it already has an input, we need to append the new input. + # Lazy variadic sockets can collect multiple inputs. + else: + if not inputs[receiver_name].get(receiver_socket.name): + inputs[receiver_name][receiver_socket.name] = [] + inputs[receiver_name][receiver_socket.name].append({"sender": component_name, "value": value}) -def _enqueue_component( - component_pair: Tuple[str, Component], - run_queue: List[Tuple[str, Component]], - waiting_queue: List[Tuple[str, Component]], -): - """ - Append a Component in the queue of Components to run if not already in it. + # If we want to include all outputs from this actor in the final outputs, we don't need to prune any consumed + # outputs + if component_name in include_outputs_from: + return component_outputs - Remove it from the waiting list if it's there. + # We prune outputs that were consumed by any receiving sockets. + # All remaining outputs will be added to the final outputs of the pipeline. + consumed_outputs = {sender_socket.name for _, sender_socket, __ in receivers} + pruned_outputs = {key: value for key, value in component_outputs.items() if key not in consumed_outputs} - :param component_pair: Tuple of Component name and instance - :param run_queue: Queue of Components to run - :param waiting_queue: Queue of Components waiting for input - """ - if component_pair in waiting_queue: - waiting_queue.remove(component_pair) + return pruned_outputs - if component_pair not in run_queue: - run_queue.append(component_pair) + @staticmethod + def _is_queue_stale(priority_queue: FIFOPriorityQueue) -> bool: + """ + Checks if the priority queue needs to be recomputed because the priorities might have changed. + :param priority_queue: Priority queue of component names. + """ + return len(priority_queue) == 0 or priority_queue.peek()[0] > ComponentPriority.READY -def _dequeue_component( - component_pair: Tuple[str, Component], - run_queue: List[Tuple[str, Component]], - waiting_queue: List[Tuple[str, Component]], -): - """ - Removes a Component both from the queue of Components to run and the waiting list. + @staticmethod + def validate_pipeline(priority_queue: FIFOPriorityQueue) -> None: + """ + Validate the pipeline to check if it is blocked or has no valid entry point. - :param component_pair: Tuple of Component name and instance - :param run_queue: Queue of Components to run - :param waiting_queue: Queue of Components waiting for input - """ - if component_pair in waiting_queue: - waiting_queue.remove(component_pair) + :param priority_queue: Priority queue of component names. + """ + if len(priority_queue) == 0: + return - if component_pair in run_queue: - run_queue.remove(component_pair) + candidate = priority_queue.peek() + if candidate is not None and candidate[0] == ComponentPriority.BLOCKED: + raise PipelineRuntimeError( + "Cannot run pipeline - all components are blocked. " + "This typically happens when:\n" + "1. There is no valid entry point for the pipeline\n" + "2. There is a circular dependency preventing the pipeline from running\n" + "Check the connections between these components and ensure all required inputs are provided." + ) -def _enqueue_waiting_component(component_pair: Tuple[str, Component], waiting_queue: List[Tuple[str, Component]]): +def _connections_status( + sender_node: str, receiver_node: str, sender_sockets: List[OutputSocket], receiver_sockets: List[InputSocket] +): """ - Append a Component in the queue of Components that are waiting for inputs if not already in it. - - :param component_pair: Tuple of Component name and instance - :param waiting_queue: Queue of Components waiting for input + Lists the status of the sockets, for error messages. """ - if component_pair not in waiting_queue: - waiting_queue.append(component_pair) - + sender_sockets_entries = [] + for sender_socket in sender_sockets: + sender_sockets_entries.append(f" - {sender_socket.name}: {_type_name(sender_socket.type)}") + sender_sockets_list = "\n".join(sender_sockets_entries) -def _dequeue_waiting_component(component_pair: Tuple[str, Component], waiting_queue: List[Tuple[str, Component]]): - """ - Removes a Component from the queue of Components that are waiting for inputs. + receiver_sockets_entries = [] + for receiver_socket in receiver_sockets: + if receiver_socket.senders: + sender_status = f"sent by {','.join(receiver_socket.senders)}" + else: + sender_status = "available" + receiver_sockets_entries.append( + f" - {receiver_socket.name}: {_type_name(receiver_socket.type)} ({sender_status})" + ) + receiver_sockets_list = "\n".join(receiver_sockets_entries) - :param component_pair: Tuple of Component name and instance - :param waiting_queue: Queue of Components waiting for input - """ - if component_pair in waiting_queue: - waiting_queue.remove(component_pair) + return f"'{sender_node}':\n{sender_sockets_list}\n'{receiver_node}':\n{receiver_sockets_list}" diff --git a/haystack/core/pipeline/component_checks.py b/haystack/core/pipeline/component_checks.py new file mode 100644 index 0000000000..b4d42a8ac0 --- /dev/null +++ b/haystack/core/pipeline/component_checks.py @@ -0,0 +1,245 @@ +# SPDX-FileCopyrightText: 2022-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +from typing import Any, Dict, List + +from haystack.core.component.types import InputSocket, _empty + +_NO_OUTPUT_PRODUCED = _empty + + +def can_component_run(component: Dict, inputs: Dict) -> bool: + """ + Checks if the component can run, given the current state of its inputs. + + A component needs to pass two gates so that it is ready to run: + 1. It has received all mandatory inputs. + 2. It has received a trigger. + :param component: Component metadata and the component instance. + :param inputs: Inputs for the component. + """ + received_all_mandatory_inputs = are_all_sockets_ready(component, inputs, only_check_mandatory=True) + received_trigger = has_any_trigger(component, inputs) + + return received_all_mandatory_inputs and received_trigger + + +def has_any_trigger(component: Dict, inputs: Dict) -> bool: + """ + Checks if a component was triggered to execute. + + There are 3 triggers: + 1. A predecessor provided input to the component. + 2. Input to the component was provided from outside the pipeline (e.g. user input). + 3. The component does not receive input from any other components in the pipeline and `Pipeline.run` was called. + + A trigger can only cause a component to execute ONCE because: + 1. Components consume inputs from predecessors before execution (they are deleted). + 2. Inputs from outside the pipeline can only trigger a component when it is executed for the first time. + 3. `Pipeline.run` can only trigger a component when it is executed for the first time. + + :param component: Component metadata and the component instance. + :param inputs: Inputs for the component. + """ + trigger_from_predecessor = any_predecessors_provided_input(component, inputs) + trigger_from_user = has_user_input(inputs) and component["visits"] == 0 + trigger_without_inputs = can_not_receive_inputs_from_pipeline(component) and component["visits"] == 0 + + return trigger_from_predecessor or trigger_from_user or trigger_without_inputs + + +def are_all_sockets_ready(component: Dict, inputs: Dict, only_check_mandatory: bool = False) -> bool: + """ + Checks if all sockets of a component have enough inputs for the component to execute. + + :param component: Component metadata and the component instance. + :param inputs: Inputs for the component. + :param only_check_mandatory: If only mandatory sockets should be checked. + """ + filled_sockets = set() + expected_sockets = set() + if only_check_mandatory: + sockets_to_check = { + socket_name: socket for socket_name, socket in component["input_sockets"].items() if socket.is_mandatory + } + else: + sockets_to_check = { + socket_name: socket + for socket_name, socket in component["input_sockets"].items() + if socket.is_mandatory or len(socket.senders) + } + + for socket_name, socket in sockets_to_check.items(): + socket_inputs = inputs.get(socket_name, []) + expected_sockets.add(socket_name) + + # Check if socket has all required inputs or is a lazy variadic socket with any input + if has_socket_received_all_inputs(socket, socket_inputs) or ( + is_socket_lazy_variadic(socket) and any_socket_input_received(socket_inputs) + ): + filled_sockets.add(socket_name) + + return filled_sockets == expected_sockets + + +def any_predecessors_provided_input(component: Dict, inputs: Dict) -> bool: + """ + Checks if a component received inputs from any predecessors. + + :param component: Component metadata and the component instance. + :param inputs: Inputs for the component. + """ + return any( + any_socket_value_from_predecessor_received(inputs.get(socket_name, [])) + for socket_name in component["input_sockets"].keys() + ) + + +def any_socket_value_from_predecessor_received(socket_inputs: List[Dict[str, Any]]) -> bool: + """ + Checks if a component socket received input from any predecessors. + + :param socket_inputs: Inputs for the component's socket. + """ + # When sender is None, the input was provided from outside the pipeline. + return any(inp["value"] != _NO_OUTPUT_PRODUCED and inp["sender"] is not None for inp in socket_inputs) + + +def has_user_input(inputs: Dict) -> bool: + """ + Checks if a component has received input from outside the pipeline (e.g. user input). + + :param inputs: Inputs for the component. + """ + return any(inp for socket in inputs.values() for inp in socket if inp["sender"] is None) + + +def can_not_receive_inputs_from_pipeline(component: Dict) -> bool: + """ + Checks if a component can not receive inputs from any other components in the pipeline. + + :param: Component metadata and the component instance. + """ + return all(len(sock.senders) == 0 for sock in component["input_sockets"].values()) + + +def all_socket_predecessors_executed(socket: InputSocket, socket_inputs: List[Dict]) -> bool: + """ + Checks if all components connecting to an InputSocket have executed. + + :param: The InputSocket of a component. + :param: socket_inputs: Inputs for the socket. + """ + expected_senders = set(socket.senders) + executed_senders = {inp["sender"] for inp in socket_inputs if inp["sender"] is not None} + + return expected_senders == executed_senders + + +def any_socket_input_received(socket_inputs: List[Dict]) -> bool: + """ + Checks if a socket has received any input from any other components in the pipeline or from outside the pipeline. + + :param socket_inputs: Inputs for the socket. + """ + return any(inp["value"] != _NO_OUTPUT_PRODUCED for inp in socket_inputs) + + +def has_lazy_variadic_socket_received_all_inputs(socket: InputSocket, socket_inputs: List[Dict]) -> bool: + """ + Checks if a lazy variadic socket has received all expected inputs from other components in the pipeline. + + :param socket: The InputSocket of a component. + :param socket_inputs: Inputs for the socket. + """ + expected_senders = set(socket.senders) + actual_senders = { + sock["sender"] for sock in socket_inputs if sock["value"] != _NO_OUTPUT_PRODUCED and sock["sender"] is not None + } + + return expected_senders == actual_senders + + +def is_socket_lazy_variadic(socket: InputSocket) -> bool: + """ + Checks if an InputSocket is a lazy variadic socket. + + :param socket: The InputSocket of a component. + """ + return socket.is_variadic and not socket.is_greedy + + +def has_socket_received_all_inputs(socket: InputSocket, socket_inputs: List[Dict]) -> bool: + """ + Checks if a socket has received all expected inputs. + + :param socket: The InputSocket of a component. + :param socket_inputs: Inputs for the socket. + """ + # No inputs received for the socket, it is not filled. + if len(socket_inputs) == 0: + return False + + # The socket is greedy variadic and at least one input was produced, it is complete. + if socket.is_variadic and socket.is_greedy and any(sock["value"] != _NO_OUTPUT_PRODUCED for sock in socket_inputs): + return True + + # The socket is lazy variadic and all expected inputs were produced. + if is_socket_lazy_variadic(socket) and has_lazy_variadic_socket_received_all_inputs(socket, socket_inputs): + return True + + # The socket is not variadic and the only expected input is complete. + return not socket.is_variadic and socket_inputs[0]["value"] != _NO_OUTPUT_PRODUCED + + +def all_predecessors_executed(component: Dict, inputs: Dict) -> bool: + """ + Checks if all predecessors of a component have executed. + + :param component: Component metadata and the component instance. + :param inputs: Inputs for the component. + """ + return all( + all_socket_predecessors_executed(socket, inputs.get(socket_name, [])) + for socket_name, socket in component["input_sockets"].items() + ) + + +def are_all_lazy_variadic_sockets_resolved(component: Dict, inputs: Dict) -> bool: + """ + Checks if the final state for all lazy variadic sockets of a component is resolved. + + :param component: Component metadata and the component instance. + :param inputs: Inputs for the component. + """ + for socket_name, socket in component["input_sockets"].items(): + if is_socket_lazy_variadic(socket): + socket_inputs = inputs.get(socket_name, []) + + # Checks if a lazy variadic socket is ready to run. + # A socket is ready if either: + # - it has received all expected inputs, or + # - all its predecessors have executed + # If none of the conditions are met, the socket is not ready to run and we defer the component. + if not ( + has_lazy_variadic_socket_received_all_inputs(socket, socket_inputs) + or all_socket_predecessors_executed(socket, socket_inputs) + ): + return False + + return True + + +def is_any_greedy_socket_ready(component: Dict, inputs: Dict) -> bool: + """ + Checks if the component has any greedy socket that is ready to run. + + :param component: Component metadata and the component instance. + :param inputs: Inputs for the component. + """ + for socket_name, socket in component["input_sockets"].items(): + if socket.is_greedy and has_socket_received_all_inputs(socket, inputs.get(socket_name, [])): + return True + + return False diff --git a/haystack/core/pipeline/pipeline.py b/haystack/core/pipeline/pipeline.py index 622c4ef6d7..e85daaf36c 100644 --- a/haystack/core/pipeline/pipeline.py +++ b/haystack/core/pipeline/pipeline.py @@ -2,25 +2,16 @@ # # SPDX-License-Identifier: Apache-2.0 +import warnings from copy import deepcopy -from typing import Any, Dict, List, Mapping, Optional, Set, Tuple -from warnings import warn - -import networkx as nx +from typing import Any, Dict, Mapping, Optional, Set, cast from haystack import logging, tracing from haystack.core.component import Component -from haystack.core.errors import PipelineMaxComponentRuns, PipelineRuntimeError -from haystack.core.pipeline.base import ( - _dequeue_component, - _dequeue_waiting_component, - _enqueue_component, - _enqueue_waiting_component, -) +from haystack.core.errors import PipelineRuntimeError +from haystack.core.pipeline.base import ComponentPriority, PipelineBase from haystack.telemetry import pipeline_running -from .base import PipelineBase, _add_missing_input_defaults, _is_lazy_variadic - logger = logging.getLogger(__name__) @@ -32,26 +23,39 @@ class Pipeline(PipelineBase): """ def _run_component( - self, name: str, inputs: Dict[str, Any], parent_span: Optional[tracing.Span] = None + self, + component: Dict[str, Any], + inputs: Dict[str, Any], + component_visits: Dict[str, int], + parent_span: Optional[tracing.Span] = None, ) -> Dict[str, Any]: """ Runs a Component with the given inputs. - :param name: Name of the Component as defined in the Pipeline. + :param component: Component with component metadata. :param inputs: Inputs for the Component. + :param component_visits: Current state of component visits. :param parent_span: The parent span to use for the newly created span. This is to allow tracing to be correctly linked to the pipeline run. :raises PipelineRuntimeError: If Component doesn't return a dictionary. :return: The output of the Component. """ - instance: Component = self.graph.nodes[name]["instance"] + instance: Component = component["instance"] + component_name = self.get_component_name(instance) + component_inputs = self._consume_component_inputs( + component_name=component_name, component=component, inputs=inputs + ) + + # We need to add missing defaults using default values from input sockets because the run signature + # might not provide these defaults for components with inputs defined dynamically upon component initialization + component_inputs = self._add_missing_input_defaults(component_inputs, component["input_sockets"]) with tracing.tracer.trace( "haystack.component.run", tags={ - "haystack.component.name": name, + "haystack.component.name": component_name, "haystack.component.type": instance.__class__.__name__, - "haystack.component.input_types": {k: type(v).__name__ for k, v in inputs.items()}, + "haystack.component.input_types": {k: type(v).__name__ for k, v in component_inputs.items()}, "haystack.component.input_spec": { key: { "type": (value.type.__name__ if isinstance(value.type, type) else str(value.type)), @@ -71,191 +75,21 @@ def _run_component( ) as span: # We deepcopy the inputs otherwise we might lose that information # when we delete them in case they're sent to other Components - span.set_content_tag("haystack.component.input", deepcopy(inputs)) - logger.info("Running component {component_name}", component_name=name) - res: Dict[str, Any] = instance.run(**inputs) - self.graph.nodes[name]["visits"] += 1 - - # After a Component that has variadic inputs is run, we need to reset the variadic inputs that were consumed - for socket in instance.__haystack_input__._sockets_dict.values(): # type: ignore - if socket.name not in inputs: - continue - if socket.is_variadic: - inputs[socket.name] = [] - - if not isinstance(res, Mapping): + span.set_content_tag("haystack.component.input", deepcopy(component_inputs)) + logger.info("Running component {component_name}", component_name=component_name) + component_output = instance.run(**component_inputs) + component_visits[component_name] += 1 + + if not isinstance(component_output, Mapping): raise PipelineRuntimeError( - f"Component '{name}' didn't return a dictionary. " + f"Component '{component_name}' didn't return a dictionary. " "Components must always return dictionaries: check the documentation." ) - span.set_tag("haystack.component.visits", self.graph.nodes[name]["visits"]) - span.set_content_tag("haystack.component.output", res) - return res + span.set_tag("haystack.component.visits", component_visits[component_name]) + span.set_content_tag("haystack.component.output", component_output) - def _run_subgraph( # noqa: PLR0915 - self, - cycle: List[str], - component_name: str, - components_inputs: Dict[str, Dict[str, Any]], - *, - include_outputs_from: Optional[Set[str]] = None, - parent_span: Optional[tracing.Span] = None, - ) -> Tuple[Dict[str, Any], Dict[str, Any]]: - """ - Runs a `cycle` in the Pipeline starting from `component_name`. - - This will return once there are no inputs for the Components in `cycle`. - - This is an internal method meant to be used in `Pipeline.run()` only. - - :param cycle: - List of Components that are part of the cycle being run - :param component_name: - Name of the Component that will start execution of the cycle - :param components_inputs: - Components inputs, this might include inputs for Components that are not part - of the cycle but part of the wider Pipeline's graph - :param include_outputs_from: - Set of component names whose individual outputs are to be - included in the cycle's output. In case a Component is executed multiple times - only the last-produced output is included. - :returns: - Outputs of all the Components that are not connected to other Components in `cycle`. - If `include_outputs_from` is set those Components' outputs will be included. - :raises PipelineMaxComponentRuns: - If a Component reaches the maximum number of times it can be run in this Pipeline - """ - waiting_queue: List[Tuple[str, Component]] = [] - run_queue: List[Tuple[str, Component]] = [] - - # Create the run queue starting with the component that needs to run first - start_index = cycle.index(component_name) - for node in cycle[start_index:]: - run_queue.append((node, self.graph.nodes[node]["instance"])) - - include_outputs_from = set() if include_outputs_from is None else include_outputs_from - - before_last_waiting_queue: Optional[Set[str]] = None - last_waiting_queue: Optional[Set[str]] = None - - subgraph_outputs = {} - # These are outputs that are sent to other Components but the user explicitly - # asked to include them in the final output. - extra_outputs = {} - - # This variable is used to keep track if we still need to run the cycle or not. - # When a Component doesn't send outputs to another Component - # that's inside the subgraph, we stop running this subgraph. - cycle_received_inputs = False - - while not cycle_received_inputs: - # Here we run the Components - name, comp = run_queue.pop(0) - if _is_lazy_variadic(comp) and not all(_is_lazy_variadic(comp) for _, comp in run_queue): - # We run Components with lazy variadic inputs only if there only Components with - # lazy variadic inputs left to run - _enqueue_waiting_component((name, comp), waiting_queue) - continue - - # As soon as a Component returns only output that is not part of the cycle, we can stop - if self._component_has_enough_inputs_to_run(name, components_inputs): - if self.graph.nodes[name]["visits"] > self._max_runs_per_component: - msg = f"Maximum run count {self._max_runs_per_component} reached for component '{name}'" - raise PipelineMaxComponentRuns(msg) - - res: Dict[str, Any] = self._run_component(name, components_inputs[name], parent_span=parent_span) - - # Delete the inputs that were consumed by the Component and are not received from - # the user or from Components that are part of this cycle - sockets = list(components_inputs[name].keys()) - for socket_name in sockets: - senders = comp.__haystack_input__._sockets_dict[socket_name].senders # type: ignore - if not senders: - # We keep inputs that came from the user - continue - all_senders_in_cycle = all(sender in cycle for sender in senders) - if all_senders_in_cycle: - # All senders are in the cycle, we can remove the input. - # We'll receive it later at a certain point. - del components_inputs[name][socket_name] - - if name in include_outputs_from: - # Deepcopy the outputs to prevent downstream nodes from modifying them - # We don't care about loops - Always store the last output. - extra_outputs[name] = deepcopy(res) - - # Reset the waiting for input previous states, we managed to run a component - before_last_waiting_queue = None - last_waiting_queue = None - - # Check if a component doesn't send any output to components that are part of the cycle - final_output_reached = False - for output_socket in res.keys(): - for receiver in comp.__haystack_output__._sockets_dict[output_socket].receivers: # type: ignore - if receiver in cycle: - final_output_reached = True - break - if final_output_reached: - break - - if not final_output_reached: - # We stop only if the Component we just ran doesn't send any output to sockets that - # are part of the cycle - cycle_received_inputs = True - - # We manage to run this component that was in the waiting list, we can remove it. - # This happens when a component was put in the waiting list but we reached it from another edge. - _dequeue_waiting_component((name, comp), waiting_queue) - for pair in self._find_components_that_will_receive_no_input(name, res, components_inputs): - _dequeue_component(pair, run_queue, waiting_queue) - - receivers = [item for item in self._find_receivers_from(name) if item[0] in cycle] - - res = self._distribute_output(receivers, res, components_inputs, run_queue, waiting_queue) - - # We treat a cycle as a completely independent graph, so we keep track of output - # that is not sent inside the cycle. - # This output is going to get distributed to the wider graph after we finish running - # a cycle. - # All values that are left at this point go outside the cycle. - if len(res) > 0: - subgraph_outputs[name] = res - else: - # This component doesn't have enough inputs so we can't run it yet - _enqueue_waiting_component((name, comp), waiting_queue) - - if len(run_queue) == 0 and len(waiting_queue) > 0: - # Check if we're stuck in a loop. - # It's important to check whether previous waitings are None as it could be that no - # Component has actually been run yet. - if ( - before_last_waiting_queue is not None - and last_waiting_queue is not None - and before_last_waiting_queue == last_waiting_queue - ): - if self._is_stuck_in_a_loop(waiting_queue): - # We're stuck! We can't make any progress. - msg = ( - "Pipeline is stuck running in a loop. Partial outputs will be returned. " - "Check the Pipeline graph for possible issues." - ) - warn(RuntimeWarning(msg)) - break - - (name, comp) = self._find_next_runnable_lazy_variadic_or_default_component(waiting_queue) - _add_missing_input_defaults(name, comp, components_inputs) - _enqueue_component((name, comp), run_queue, waiting_queue) - continue - - before_last_waiting_queue = last_waiting_queue.copy() if last_waiting_queue is not None else None - last_waiting_queue = {item[0] for item in waiting_queue} - - (name, comp) = self._find_next_runnable_component(components_inputs, waiting_queue) - _add_missing_input_defaults(name, comp, components_inputs) - _enqueue_component((name, comp), run_queue, waiting_queue) - - return subgraph_outputs, extra_outputs + return cast(Dict[Any, Any], component_output) def run( # noqa: PLR0915, PLR0912 self, data: Dict[str, Any], include_outputs_from: Optional[Set[str]] = None @@ -340,6 +174,8 @@ def run( # noqa: PLR0915, PLR0912 will only contain the outputs of leaf components, i.e., components without outgoing connections. + :raises ValueError: + If invalid inputs are provided to the pipeline. :raises PipelineRuntimeError: If the Pipeline contains cycles with unsupported connections that would cause it to get stuck and fail running. @@ -349,9 +185,6 @@ def run( # noqa: PLR0915, PLR0912 """ pipeline_running(self) - # Reset the visits count for each component - self._init_graph() - # TODO: Remove this warmup once we can check reliably whether a component has been warmed up or not # As of now it's here to make sure we don't have failing tests that assume warm_up() is called in run() self.warm_up() @@ -359,194 +192,74 @@ def run( # noqa: PLR0915, PLR0912 # normalize `data` data = self._prepare_component_input_data(data) - # Raise if input is malformed in some way + # Raise ValueError if input is malformed in some way self._validate_input(data) - # Normalize the input data - components_inputs: Dict[str, Dict[str, Any]] = self._normalize_varidiac_input_data(data) - - # These variables are used to detect when we're stuck in a loop. - # Stuck loops can happen when one or more components are waiting for input but - # no other component is going to run. - # This can happen when a whole branch of the graph is skipped for example. - # When we find that two consecutive iterations of the loop where the waiting_queue is the same, - # we know we're stuck in a loop and we can't make any progress. - # - # They track the previous two states of the waiting_queue. So if waiting_queue would n, - # before_last_waiting_queue would be n-2 and last_waiting_queue would be n-1. - # When we run a component, we reset both. - before_last_waiting_queue: Optional[Set[str]] = None - last_waiting_queue: Optional[Set[str]] = None - - # The waiting_for_input list is used to keep track of components that are waiting for input. - waiting_queue: List[Tuple[str, Component]] = [] - - include_outputs_from = set() if include_outputs_from is None else include_outputs_from - - # This is what we'll return at the end - final_outputs: Dict[Any, Any] = {} - - # Break cycles in case there are, this is a noop if no cycle is found. - # This will raise if a cycle can't be broken. - graph_without_cycles, components_in_cycles = self._break_supported_cycles_in_graph() - - run_queue: List[Tuple[str, Component]] = [] - for node in nx.topological_sort(graph_without_cycles): - run_queue.append((node, self.graph.nodes[node]["instance"])) - - # Set defaults inputs for those sockets that don't receive input neither from the user - # nor from other Components. - # If they have no default nothing is done. - # This is important to ensure correct order execution, otherwise some variadic - # Components that receive input from the user might be run before than they should. - for name, comp in self.graph.nodes(data="instance"): - if name not in components_inputs: - components_inputs[name] = {} - for socket_name, socket in comp.__haystack_input__._sockets_dict.items(): - if socket_name in components_inputs[name]: - continue - if not socket.senders: - value = socket.default_value - if socket.is_variadic: - value = [value] - components_inputs[name][socket_name] = value + if include_outputs_from is None: + include_outputs_from = set() + + # We create a list of components in the pipeline sorted by name, so that the algorithm runs deterministically + # and independent of insertion order into the pipeline. + ordered_component_names = sorted(self.graph.nodes.keys()) + + # We track component visits to decide if a component can run. + component_visits = {component_name: 0 for component_name in ordered_component_names} + + # We need to access a component's receivers multiple times during a pipeline run. + # We store them here for easy access. + cached_receivers = {name: self._find_receivers_from(name) for name in ordered_component_names} + pipeline_outputs: Dict[str, Any] = {} with tracing.tracer.trace( "haystack.pipeline.run", tags={ "haystack.pipeline.input_data": data, - "haystack.pipeline.output_data": final_outputs, + "haystack.pipeline.output_data": pipeline_outputs, "haystack.pipeline.metadata": self.metadata, "haystack.pipeline.max_runs_per_component": self._max_runs_per_component, }, ) as span: - # Cache for extra outputs, if enabled. - extra_outputs: Dict[Any, Any] = {} - - while len(run_queue) > 0: - name, comp = run_queue.pop(0) - - if _is_lazy_variadic(comp) and not all(_is_lazy_variadic(comp) for _, comp in run_queue): - # We run Components with lazy variadic inputs only if there only Components with - # lazy variadic inputs left to run - _enqueue_waiting_component((name, comp), waiting_queue) - continue - if self._component_has_enough_inputs_to_run(name, components_inputs) and components_in_cycles.get( - name, [] - ): - cycles = components_in_cycles.get(name, []) - - # This component is part of one or more cycles, let's get the first one and run it. - # We can reliably pick any of the cycles if there are multiple ones, the way cycles - # are run doesn't make a different whether we pick the first or any of the others a - # Component is part of. - subgraph_output, subgraph_extra_output = self._run_subgraph( - cycles[0], name, components_inputs, include_outputs_from=include_outputs_from, parent_span=span - ) - - # After a cycle is run the previous run_queue can't be correct anymore cause it's - # not modified when running the subgraph. - # So we reset it given the output returned by the subgraph. - run_queue = [] - - # Reset the waiting for input previous states, we managed to run at least one component - before_last_waiting_queue = None - last_waiting_queue = None - - # Merge the extra outputs - extra_outputs.update(subgraph_extra_output) - - for component_name, component_output in subgraph_output.items(): - receivers = self._find_receivers_from(component_name) - component_output = self._distribute_output( - receivers, component_output, components_inputs, run_queue, waiting_queue - ) + inputs = self._convert_to_internal_format(pipeline_inputs=data) + priority_queue = self._fill_queue(ordered_component_names, inputs, component_visits) + + # check if pipeline is blocked before execution + self.validate_pipeline(priority_queue) + + while True: + candidate = self._get_next_runnable_component(priority_queue, component_visits) + if candidate is None: + break + + priority, component_name, component = candidate + if len(priority_queue) > 0: + next_priority, next_name = priority_queue.peek() - if len(component_output) > 0: - final_outputs[component_name] = component_output - - elif self._component_has_enough_inputs_to_run(name, components_inputs): - if self.graph.nodes[name]["visits"] > self._max_runs_per_component: - msg = f"Maximum run count {self._max_runs_per_component} reached for component '{name}'" - raise PipelineMaxComponentRuns(msg) - - res: Dict[str, Any] = self._run_component(name, components_inputs[name], parent_span=span) - - # Delete the inputs that were consumed by the Component and are not received from the user - sockets = list(components_inputs[name].keys()) - for socket_name in sockets: - senders = comp.__haystack_input__._sockets_dict[socket_name].senders - if senders: - # Delete all inputs that are received from other Components - del components_inputs[name][socket_name] - # We keep inputs that came from the user - - if name in include_outputs_from: - # Deepcopy the outputs to prevent downstream nodes from modifying them - # We don't care about loops - Always store the last output. - extra_outputs[name] = deepcopy(res) - - # Reset the waiting for input previous states, we managed to run a component - before_last_waiting_queue = None - last_waiting_queue = None - - # We manage to run this component that was in the waiting list, we can remove it. - # This happens when a component was put in the waiting list but we reached it from another edge. - _dequeue_waiting_component((name, comp), waiting_queue) - - for pair in self._find_components_that_will_receive_no_input(name, res, components_inputs): - _dequeue_component(pair, run_queue, waiting_queue) - receivers = self._find_receivers_from(name) - res = self._distribute_output(receivers, res, components_inputs, run_queue, waiting_queue) - - if len(res) > 0: - final_outputs[name] = res - else: - # This component doesn't have enough inputs so we can't run it yet - _enqueue_waiting_component((name, comp), waiting_queue) - - if len(run_queue) == 0 and len(waiting_queue) > 0: - # Check if we're stuck in a loop. - # It's important to check whether previous waitings are None as it could be that no - # Component has actually been run yet. if ( - before_last_waiting_queue is not None - and last_waiting_queue is not None - and before_last_waiting_queue == last_waiting_queue + priority in [ComponentPriority.DEFER, ComponentPriority.DEFER_LAST] + and next_priority == priority ): - if self._is_stuck_in_a_loop(waiting_queue): - # We're stuck! We can't make any progress. - msg = ( - "Pipeline is stuck running in a loop. Partial outputs will be returned. " - "Check the Pipeline graph for possible issues." - ) - warn(RuntimeWarning(msg)) - break - - (name, comp) = self._find_next_runnable_lazy_variadic_or_default_component(waiting_queue) - _add_missing_input_defaults(name, comp, components_inputs) - _enqueue_component((name, comp), run_queue, waiting_queue) - continue - - before_last_waiting_queue = last_waiting_queue.copy() if last_waiting_queue is not None else None - last_waiting_queue = {item[0] for item in waiting_queue} - - (name, comp) = self._find_next_runnable_component(components_inputs, waiting_queue) - _add_missing_input_defaults(name, comp, components_inputs) - _enqueue_component((name, comp), run_queue, waiting_queue) - - if len(include_outputs_from) > 0: - for name, output in extra_outputs.items(): - inner = final_outputs.get(name) - if inner is None: - final_outputs[name] = output - else: - # Let's not override any keys that are already - # in the final_outputs as they might be different - # from what we cached in extra_outputs, e.g. when loops - # are involved. - for k, v in output.items(): - if k not in inner: - inner[k] = v - - return final_outputs + msg = ( + f"Components '{component_name}' and '{next_name}' are waiting for " + f"optional inputs at the same time. The pipeline will execute '{component_name}' " + f"first based on lexicographical ordering." + ) + warnings.warn(msg) + + component_outputs = self._run_component(component, inputs, component_visits, parent_span=span) + + # Updates global input state with component outputs and returns outputs that should go to + # pipeline outputs. + component_pipeline_outputs = self._write_component_outputs( + component_name=component_name, + component_outputs=component_outputs, + inputs=inputs, + receivers=cached_receivers[component_name], + include_outputs_from=include_outputs_from, + ) + + if component_pipeline_outputs: + pipeline_outputs[component_name] = deepcopy(component_pipeline_outputs) + if self._is_queue_stale(priority_queue): + priority_queue = self._fill_queue(ordered_component_names, inputs, component_visits) + + return pipeline_outputs diff --git a/haystack/core/pipeline/utils.py b/haystack/core/pipeline/utils.py index f9f858a39d..0746dc6746 100644 --- a/haystack/core/pipeline/utils.py +++ b/haystack/core/pipeline/utils.py @@ -2,7 +2,9 @@ # # SPDX-License-Identifier: Apache-2.0 -from typing import Optional, Tuple +import heapq +from itertools import count +from typing import Any, List, Optional, Tuple def parse_connect_string(connection: str) -> Tuple[str, Optional[str]]: @@ -18,3 +20,103 @@ def parse_connect_string(connection: str) -> Tuple[str, Optional[str]]: split_str = connection.split(".", maxsplit=1) return (split_str[0], split_str[1]) return connection, None + + +class FIFOPriorityQueue: + """ + A priority queue that maintains FIFO order for items of equal priority. + + Items with the same priority are processed in the order they were added. + This queue ensures that when multiple items share the same priority level, + they are dequeued in the same order they were enqueued (First-In-First-Out). + """ + + def __init__(self) -> None: + """ + Initialize a new FIFO priority queue. + """ + # List of tuples (priority, count, item) where count ensures FIFO order + self._queue: List[Tuple[int, int, Any]] = [] + # Counter to maintain insertion order for equal priorities + self._counter = count() + + def push(self, item: Any, priority: int) -> None: + """ + Push an item into the queue with a given priority. + + Items with equal priority maintain FIFO ordering based on insertion time. + Lower priority numbers are dequeued first. + + :param item: + The item to insert into the queue. + :param priority: + Priority level for the item. Lower numbers indicate higher priority. + """ + next_count = next(self._counter) + entry = (priority, next_count, item) + heapq.heappush(self._queue, entry) + + def pop(self) -> Tuple[int, Any]: + """ + Remove and return the highest priority item from the queue. + + For items with equal priority, returns the one that was inserted first. + + :returns: + A tuple containing (priority, item) with the lowest priority number. + :raises IndexError: + If the queue is empty. + """ + if not self._queue: + raise IndexError("pop from empty queue") + priority, _, item = heapq.heappop(self._queue) + return priority, item + + def peek(self) -> Tuple[int, Any]: + """ + Return but don't remove the highest priority item from the queue. + + For items with equal priority, returns the one that was inserted first. + + :returns: + A tuple containing (priority, item) with the lowest priority number. + :raises IndexError: + If the queue is empty. + """ + if not self._queue: + raise IndexError("peek at empty queue") + priority, _, item = self._queue[0] + return priority, item + + def get(self) -> Optional[Tuple[int, Any]]: + """ + Remove and return the highest priority item from the queue. + + For items with equal priority, returns the one that was inserted first. + Unlike pop(), returns None if the queue is empty instead of raising an exception. + + :returns: + A tuple containing (priority, item), or None if the queue is empty. + """ + if not self._queue: + return None + priority, _, item = heapq.heappop(self._queue) + return priority, item + + def __len__(self) -> int: + """ + Return the number of items in the queue. + + :returns: + The number of items currently in the queue. + """ + return len(self._queue) + + def __bool__(self) -> bool: + """ + Return True if the queue has items, False if empty. + + :returns: + True if the queue contains items, False otherwise. + """ + return bool(self._queue) diff --git a/pyproject.toml b/pyproject.toml index eda943c19a..32ae00497c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -185,7 +185,7 @@ include = ["/haystack", "/VERSION.txt"] packages = ["haystack"] [tool.codespell] -ignore-words-list = "ans,astroid,nd,ned,nin,ue,rouge,ist" +ignore-words-list = "ans,astroid,nd,ned,nin,ue,rouge,ist, Claus" quiet-level = 3 skip = "./test,./e2e" diff --git a/releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml b/releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml new file mode 100644 index 0000000000..96381b9770 --- /dev/null +++ b/releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml @@ -0,0 +1,12 @@ +--- +highlights: > + Major refactoring of `Pipeline.run()` to fix multiple bugs. + We moved from a mostly graph-based to a dynamic dataflow driven execution logic. + While most pipelines should remain unaffected, we recommend carefully checking your pipeline executions + to ensure their output hasn't changed. +fixes: + - | + - acyclic pipelines with multiple lazy variadic components not running all components + - cyclic pipelines not passing intermediate outputs to components outside the cycle + - cyclic pipelines with two or more optional or greedy variadic edges showing unexpected execution behavior + - cyclic pipelines with two cycles sharing an edge raising errors diff --git a/test/conftest.py b/test/conftest.py index 513009d234..2929e0ca7d 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -1,17 +1,18 @@ # SPDX-FileCopyrightText: 2022-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from datetime import datetime + from pathlib import Path from test.tracing.utils import SpyingTracer -from typing import Generator -from unittest.mock import Mock, patch +from typing import Generator, Dict +from unittest.mock import Mock import pytest -from openai.types.chat import ChatCompletion, ChatCompletionMessage -from openai.types.chat.chat_completion import Choice +import time +import asyncio + -from haystack import tracing +from haystack import tracing, component from haystack.testing.test_utils import set_all_seeds set_all_seeds(0) @@ -20,6 +21,23 @@ tracing.disable_tracing() +@pytest.fixture() +def waiting_component(): + @component + class Waiter: + @component.output_types(waited_for=int) + def run(self, wait_for: int) -> Dict[str, int]: + time.sleep(wait_for) + return {"waited_for": wait_for} + + @component.output_types(waited_for=int) + async def run_async(self, wait_for: int) -> Dict[str, int]: + await asyncio.sleep(wait_for) + return {"waited_for": wait_for} + + return Waiter + + @pytest.fixture() def mock_tokenizer(): """ @@ -57,6 +75,7 @@ def urlopen_mock(self, method, url, *args, **kwargs): def spying_tracer() -> Generator[SpyingTracer, None, None]: tracer = SpyingTracer() tracing.enable_tracing(tracer) + tracer.is_content_tracing_enabled = True yield tracer diff --git a/test/core/pipeline/features/conftest.py b/test/core/pipeline/features/conftest.py index 16c7be35ea..e1e411df1e 100644 --- a/test/core/pipeline/features/conftest.py +++ b/test/core/pipeline/features/conftest.py @@ -19,7 +19,7 @@ class PipelineRunData: inputs: Dict[str, Any] include_outputs_from: Set[str] = field(default_factory=set) expected_outputs: Dict[str, Any] = field(default_factory=dict) - expected_run_order: List[str] = field(default_factory=list) + expected_component_calls: Dict[Tuple[str, int], Dict[str, Any]] = field(default_factory=dict) @dataclass @@ -29,7 +29,7 @@ class _PipelineResult: """ outputs: Dict[str, Any] - run_order: List[str] + component_calls: Dict[Tuple[str, int], Dict[str, Any]] = field(default_factory=dict) @when("I run the Pipeline", target_fixture="pipeline_result") @@ -52,12 +52,16 @@ def run_pipeline( for data in pipeline_run_data: try: outputs = pipeline.run(data=data.inputs, include_outputs_from=data.include_outputs_from) - run_order = [ - span.tags["haystack.component.name"] + + component_calls = { + (span.tags["haystack.component.name"], span.tags["haystack.component.visits"]): span.tags[ + "haystack.component.input" + ] for span in spying_tracer.spans - if "haystack.component.name" in span.tags - ] - results.append(_PipelineResult(outputs=outputs, run_order=run_order)) + if "haystack.component.name" in span.tags and "haystack.component.visits" in span.tags + } + results.append(_PipelineResult(outputs=outputs, component_calls=component_calls)) + spying_tracer.spans.clear() except Exception as e: return e @@ -83,10 +87,10 @@ def check_pipeline_result(pipeline_result: List[Tuple[_PipelineResult, PipelineR assert res.outputs == data.expected_outputs -@then("components ran in the expected order") -def check_pipeline_run_order(pipeline_result: List[Tuple[_PipelineResult, PipelineRunData]]): +@then("components are called with the expected inputs") +def check_component_calls(pipeline_result: List[Tuple[_PipelineResult, PipelineRunData]]): for res, data in pipeline_result: - assert res.run_order == data.expected_run_order + assert res.component_calls == data.expected_component_calls @then(parsers.parse("it must have raised {exception_class_name}")) diff --git a/test/core/pipeline/features/pipeline_run.feature b/test/core/pipeline/features/pipeline_run.feature index db064ea2b1..8724dd3e1e 100644 --- a/test/core/pipeline/features/pipeline_run.feature +++ b/test/core/pipeline/features/pipeline_run.feature @@ -4,7 +4,7 @@ Feature: Pipeline running Given a pipeline When I run the Pipeline Then it should return the expected result - And components ran in the expected order + And components are called with the expected inputs Examples: | kind | @@ -24,7 +24,6 @@ Feature: Pipeline running | that has a component with mutable input | | that has a component with mutable output sent to multiple inputs | | that has a greedy and variadic component after a component with default input | - | that has components added in a different order from the order of execution | | that has a component with only default inputs | | that has a component with only default inputs as first to run and receives inputs from a loop | | that has multiple branches that merge into a component with a single variadic input | @@ -39,11 +38,20 @@ Feature: Pipeline running | that is linear with conditional branching and multiple joins | | that is a simple agent | | that has a variadic component that receives partial inputs | + | that has a variadic component that receives partial inputs in a different order | | that has an answer joiner variadic component | | that is linear and a component in the middle receives optional input from other components and input from the user | | that has a loop in the middle | | that has variadic component that receives a conditional input | | that has a string variadic component | + | that is an agent that can use RAG | + | that has a feedback loop | + | created in a non-standard order that has a loop | + | that has an agent with a feedback cycle | + | that passes outputs that are consumed in cycle to outside the cycle | + | with a component that has dynamic default inputs | + | with a component that has variadic dynamic default inputs | + | that is a file conversion pipeline with two joiners | Scenario Outline: Running a bad Pipeline Given a pipeline diff --git a/test/core/pipeline/features/test_run.py b/test/core/pipeline/features/test_run.py index 652fea7c30..30c415f68d 100644 --- a/test/core/pipeline/features/test_run.py +++ b/test/core/pipeline/features/test_run.py @@ -7,13 +7,15 @@ from haystack import Pipeline, Document, component from haystack.document_stores.types import DuplicatePolicy -from haystack.dataclasses import ChatMessage, GeneratedAnswer -from haystack.components.routers import ConditionalRouter +from haystack.dataclasses import ChatMessage, GeneratedAnswer, TextContent, ByteStream +from haystack.components.routers import ConditionalRouter, FileTypeRouter from haystack.components.builders import PromptBuilder, AnswerBuilder, ChatPromptBuilder +from haystack.components.converters import OutputAdapter, JSONConverter, TextFileToDocument, CSVToDocument from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter from haystack.components.retrievers.in_memory import InMemoryBM25Retriever from haystack.document_stores.in_memory import InMemoryDocumentStore from haystack.components.joiners import BranchJoiner, DocumentJoiner, AnswerJoiner, StringJoiner +from haystack.core.component.types import Variadic from haystack.testing.sample_components import ( Accumulate, AddFixedValue, @@ -62,7 +64,11 @@ def pipeline_that_is_linear(): PipelineRunData( inputs={"first_addition": {"value": 1}}, expected_outputs={"second_addition": {"result": 7}}, - expected_run_order=["first_addition", "double", "second_addition"], + expected_component_calls={ + ("first_addition", 1): {"value": 1, "add": None}, + ("double", 1): {"value": 3}, + ("second_addition", 1): {"value": 6, "add": None}, + }, ) ], ) @@ -154,32 +160,32 @@ def pipeline_complex(): PipelineRunData( inputs={"greet_first": {"value": 1}, "greet_enumerator": {"value": 1}}, expected_outputs={"accumulate_3": {"value": -7}, "add_five": {"result": -6}}, - expected_run_order=[ - "greet_first", - "greet_enumerator", - "accumulate_1", - "enumerate", - "add_two", - "add_three", - "parity", - "add_one", - "branch_joiner", - "below_10", - "double", - "branch_joiner", - "below_10", - "double", - "branch_joiner", - "below_10", - "accumulate_2", - "sum", - "diff", - "greet_one_last_time", - "replicate", - "add_five", - "add_four", - "accumulate_3", - ], + expected_component_calls={ + ("greet_first", 1): {"value": 1, "log_level": None, "message": None}, + ("greet_enumerator", 1): {"value": 1, "log_level": None, "message": None}, + ("accumulate_1", 1): {"value": 1}, + ("add_two", 1): {"value": 1, "add": None}, + ("parity", 1): {"value": 3}, + ("add_one", 1): {"value": 3, "add": None}, + ("branch_joiner", 1): {"value": [4]}, + ("below_10", 1): {"value": 4, "threshold": None}, + ("double", 1): {"value": 4}, + ("branch_joiner", 2): {"value": [8]}, + ("below_10", 2): {"value": 8, "threshold": None}, + ("double", 2): {"value": 8}, + ("branch_joiner", 3): {"value": [16]}, + ("below_10", 3): {"value": 16, "threshold": None}, + ("accumulate_2", 1): {"value": 16}, + ("enumerate", 1): {"value": 1}, + ("add_three", 1): {"value": 1, "add": None}, + ("sum", 1): {"values": [1, 4]}, + ("diff", 1): {"first_value": 5, "second_value": 16}, + ("greet_one_last_time", 1): {"value": -11, "log_level": None, "message": None}, + ("replicate", 1): {"value": -11}, + ("add_five", 1): {"value": -11, "add": None}, + ("add_four", 1): {"value": -11, "add": None}, + ("accumulate_3", 1): {"value": -7}, + }, ) ], ) @@ -202,12 +208,12 @@ def run(self, a: int, b: int = 2): PipelineRunData( inputs={"with_defaults": {"a": 40, "b": 30}}, expected_outputs={"with_defaults": {"c": 70}}, - expected_run_order=["with_defaults"], + expected_component_calls={("with_defaults", 1): {"a": 40, "b": 30}}, ), PipelineRunData( inputs={"with_defaults": {"a": 40}}, expected_outputs={"with_defaults": {"c": 42}}, - expected_run_order=["with_defaults"], + expected_component_calls={("with_defaults", 1): {"a": 40, "b": 2}}, ), ], ) @@ -232,27 +238,39 @@ def pipeline_that_has_two_loops_of_identical_lengths(): PipelineRunData( inputs={"branch_joiner": {"value": 0}}, expected_outputs={"remainder": {"remainder_is_0": 0}}, - expected_run_order=["branch_joiner", "remainder"], + expected_component_calls={("branch_joiner", 1): {"value": [0]}, ("remainder", 1): {"value": 0}}, ), PipelineRunData( inputs={"branch_joiner": {"value": 3}}, expected_outputs={"remainder": {"remainder_is_0": 3}}, - expected_run_order=["branch_joiner", "remainder"], + expected_component_calls={("branch_joiner", 1): {"value": [3]}, ("remainder", 1): {"value": 3}}, ), PipelineRunData( inputs={"branch_joiner": {"value": 4}}, expected_outputs={"remainder": {"remainder_is_0": 6}}, - expected_run_order=["branch_joiner", "remainder", "add_two", "branch_joiner", "remainder"], + expected_component_calls={ + ("branch_joiner", 1): {"value": [4]}, + ("remainder", 1): {"value": 4}, + ("add_two", 1): {"value": 4, "add": None}, + ("branch_joiner", 2): {"value": [6]}, + ("remainder", 2): {"value": 6}, + }, ), PipelineRunData( inputs={"branch_joiner": {"value": 5}}, expected_outputs={"remainder": {"remainder_is_0": 6}}, - expected_run_order=["branch_joiner", "remainder", "add_one", "branch_joiner", "remainder"], + expected_component_calls={ + ("branch_joiner", 1): {"value": [5]}, + ("remainder", 1): {"value": 5}, + ("add_one", 1): {"value": 5, "add": None}, + ("branch_joiner", 2): {"value": [6]}, + ("remainder", 2): {"value": 6}, + }, ), PipelineRunData( inputs={"branch_joiner": {"value": 6}}, expected_outputs={"remainder": {"remainder_is_0": 6}}, - expected_run_order=["branch_joiner", "remainder"], + expected_component_calls={("branch_joiner", 1): {"value": [6]}, ("remainder", 1): {"value": 6}}, ), ], ) @@ -280,34 +298,40 @@ def pipeline_that_has_two_loops_of_different_lengths(): PipelineRunData( inputs={"branch_joiner": {"value": 0}}, expected_outputs={"remainder": {"remainder_is_0": 0}}, - expected_run_order=["branch_joiner", "remainder"], + expected_component_calls={("branch_joiner", 1): {"value": [0]}, ("remainder", 1): {"value": 0}}, ), PipelineRunData( inputs={"branch_joiner": {"value": 3}}, expected_outputs={"remainder": {"remainder_is_0": 3}}, - expected_run_order=["branch_joiner", "remainder"], + expected_component_calls={("branch_joiner", 1): {"value": [3]}, ("remainder", 1): {"value": 3}}, ), PipelineRunData( inputs={"branch_joiner": {"value": 4}}, expected_outputs={"remainder": {"remainder_is_0": 6}}, - expected_run_order=[ - "branch_joiner", - "remainder", - "add_two_1", - "add_two_2", - "branch_joiner", - "remainder", - ], + expected_component_calls={ + ("branch_joiner", 1): {"value": [4]}, + ("remainder", 1): {"value": 4}, + ("add_two_1", 1): {"value": 4, "add": None}, + ("add_two_2", 1): {"value": 5, "add": None}, + ("branch_joiner", 2): {"value": [6]}, + ("remainder", 2): {"value": 6}, + }, ), PipelineRunData( inputs={"branch_joiner": {"value": 5}}, expected_outputs={"remainder": {"remainder_is_0": 6}}, - expected_run_order=["branch_joiner", "remainder", "add_one", "branch_joiner", "remainder"], + expected_component_calls={ + ("branch_joiner", 1): {"value": [5]}, + ("remainder", 1): {"value": 5}, + ("add_one", 1): {"value": 5, "add": None}, + ("branch_joiner", 2): {"value": [6]}, + ("remainder", 2): {"value": 6}, + }, ), PipelineRunData( inputs={"branch_joiner": {"value": 6}}, expected_outputs={"remainder": {"remainder_is_0": 6}}, - expected_run_order=["branch_joiner", "remainder"], + expected_component_calls={("branch_joiner", 1): {"value": [6]}, ("remainder", 1): {"value": 6}}, ), ], ) @@ -341,21 +365,21 @@ def pipeline_that_has_a_single_loop_with_two_conditional_branches(): PipelineRunData( inputs={"add_one": {"value": 3}}, expected_outputs={"add_two": {"result": 13}}, - expected_run_order=[ - "add_one", - "branch_joiner", - "below_10", - "accumulator", - "below_5", - "branch_joiner", - "below_10", - "accumulator", - "below_5", - "add_three", - "branch_joiner", - "below_10", - "add_two", - ], + expected_component_calls={ + ("accumulator", 1): {"value": 4}, + ("accumulator", 2): {"value": 4}, + ("add_one", 1): {"add": None, "value": 3}, + ("add_three", 1): {"add": None, "value": 8}, + ("add_two", 1): {"add": None, "value": 11}, + ("below_10", 1): {"threshold": None, "value": 4}, + ("below_10", 2): {"threshold": None, "value": 4}, + ("below_10", 3): {"threshold": None, "value": 11}, + ("below_5", 1): {"threshold": None, "value": 4}, + ("below_5", 2): {"threshold": None, "value": 8}, + ("branch_joiner", 1): {"value": [4]}, + ("branch_joiner", 2): {"value": [4]}, + ("branch_joiner", 3): {"value": [11]}, + }, ) ], ) @@ -376,12 +400,20 @@ def pipeline_that_has_a_component_with_dynamic_inputs_defined_in_init(): PipelineRunData( inputs={"hello": {"word": "Alice"}}, expected_outputs={"splitter": {"output": ["This", "is", "the", "greeting:", "Hello,", "Alice!!"]}}, - expected_run_order=["hello", "fstring", "splitter"], + expected_component_calls={ + ("fstring", 1): {"greeting": "Hello, Alice!", "template": None}, + ("hello", 1): {"word": "Alice"}, + ("splitter", 1): {"sentence": "This is the greeting: Hello, Alice!!"}, + }, ), PipelineRunData( inputs={"hello": {"word": "Alice"}, "fstring": {"template": "Received: {greeting}"}}, expected_outputs={"splitter": {"output": ["Received:", "Hello,", "Alice!"]}}, - expected_run_order=["hello", "fstring", "splitter"], + expected_component_calls={ + ("fstring", 1): {"greeting": "Hello, Alice!", "template": "Received: {greeting}"}, + ("hello", 1): {"word": "Alice"}, + ("splitter", 1): {"sentence": "Received: Hello, Alice!"}, + }, ), ], ) @@ -407,12 +439,21 @@ def pipeline_that_has_two_branches_that_dont_merge(): PipelineRunData( inputs={"add_one": {"value": 1}}, expected_outputs={"add_three": {"result": 15}}, - expected_run_order=["add_one", "parity", "add_ten", "add_three"], + expected_component_calls={ + ("add_one", 1): {"add": None, "value": 1}, + ("add_ten", 1): {"add": None, "value": 2}, + ("add_three", 1): {"add": None, "value": 12}, + ("parity", 1): {"value": 2}, + }, ), PipelineRunData( inputs={"add_one": {"value": 2}}, expected_outputs={"double": {"value": 6}}, - expected_run_order=["add_one", "parity", "double"], + expected_component_calls={ + ("add_one", 1): {"add": None, "value": 2}, + ("double", 1): {"value": 3}, + ("parity", 1): {"value": 3}, + }, ), ], ) @@ -440,7 +481,14 @@ def pipeline_that_has_three_branches_that_dont_merge(): PipelineRunData( inputs={"add_one": {"value": 1}}, expected_outputs={"add_one_again": {"result": 6}, "add_ten": {"result": 12}, "double": {"value": 4}}, - expected_run_order=["add_one", "repeat", "add_ten", "double", "add_three", "add_one_again"], + expected_component_calls={ + ("add_one", 1): {"add": None, "value": 1}, + ("add_one_again", 1): {"add": None, "value": 5}, + ("add_ten", 1): {"add": None, "value": 2}, + ("add_three", 1): {"add": None, "value": 2}, + ("double", 1): {"value": 2}, + ("repeat", 1): {"value": 2}, + }, ) ], ) @@ -465,7 +513,13 @@ def pipeline_that_has_two_branches_that_merge(): PipelineRunData( inputs={"first_addition": {"value": 1}, "third_addition": {"value": 1}}, expected_outputs={"fourth_addition": {"result": 3}}, - expected_run_order=["first_addition", "third_addition", "second_addition", "diff", "fourth_addition"], + expected_component_calls={ + ("diff", 1): {"first_value": 5, "second_value": 3}, + ("first_addition", 1): {"add": None, "value": 1}, + ("fourth_addition", 1): {"add": None, "value": 2}, + ("second_addition", 1): {"add": None, "value": 3}, + ("third_addition", 1): {"add": None, "value": 1}, + }, ) ], ) @@ -500,12 +554,24 @@ def pipeline_that_has_different_combinations_of_branches_that_merge_and_do_not_m PipelineRunData( inputs={"add_one": {"value": 1}, "add_two": {"add": 2}, "add_two_as_well": {"add": 2}}, expected_outputs={"add_two": {"result": 8}, "add_two_as_well": {"result": 8}}, - expected_run_order=["add_one", "parity", "add_four", "add_two", "add_two_as_well"], + expected_component_calls={ + ("add_four", 1): {"add": None, "value": 2}, + ("add_one", 1): {"add": None, "value": 1}, + ("add_two", 1): {"add": 2, "value": 6}, + ("add_two_as_well", 1): {"add": 2, "value": 6}, + ("parity", 1): {"value": 2}, + }, ), PipelineRunData( inputs={"add_one": {"value": 2}, "add_two": {"add": 2}, "add_two_as_well": {"add": 2}}, expected_outputs={"diff": {"difference": 7}}, - expected_run_order=["add_one", "parity", "double", "add_ten", "diff"], + expected_component_calls={ + ("add_one", 1): {"add": None, "value": 2}, + ("add_ten", 1): {"add": None, "value": 3}, + ("diff", 1): {"first_value": 13, "second_value": 6}, + ("double", 1): {"value": 3}, + ("parity", 1): {"value": 3}, + }, ), ], ) @@ -536,21 +602,21 @@ def pipeline_that_has_two_branches_one_of_which_loops_back(): PipelineRunData( inputs={"add_zero": {"value": 8}, "sum": {"values": 2}}, expected_outputs={"sum": {"total": 23}}, - expected_run_order=[ - "add_zero", - "branch_joiner", - "below_10", - "add_one", - "counter", - "branch_joiner", - "below_10", - "add_one", - "counter", - "branch_joiner", - "below_10", - "add_two", - "sum", - ], + expected_component_calls={ + ("add_one", 1): {"add": None, "value": 8}, + ("add_one", 2): {"add": None, "value": 9}, + ("add_two", 1): {"add": None, "value": 19}, + ("add_zero", 1): {"add": None, "value": 8}, + ("below_10", 1): {"threshold": None, "value": 8}, + ("below_10", 2): {"threshold": None, "value": 9}, + ("below_10", 3): {"threshold": None, "value": 19}, + ("branch_joiner", 1): {"value": [8]}, + ("branch_joiner", 2): {"value": [9]}, + ("branch_joiner", 3): {"value": [19]}, + ("counter", 1): {"value": 9}, + ("counter", 2): {"value": 10}, + ("sum", 1): {"values": [2, 21]}, + }, ) ], ) @@ -584,7 +650,12 @@ def run(self, input_list: List[str]): "concat1": {"output": ["foo", "bar", "extra_item"]}, "concat2": {"output": ["foo", "bar", "extra_item"]}, }, - expected_run_order=["mangler1", "mangler2", "concat1", "concat2"], + expected_component_calls={ + ("concat1", 1): {"inputs": [["foo", "bar", "extra_item"]]}, + ("concat2", 1): {"inputs": [["foo", "bar", "extra_item"]]}, + ("mangler1", 1): {"input_list": ["foo", "bar"]}, + ("mangler2", 1): {"input_list": ["foo", "bar"]}, + }, ) ], ) @@ -650,7 +721,68 @@ def run(self, messages: List[ChatMessage]): }, "mm2": {"merged_message": "Fake message"}, }, - expected_run_order=["prompt_builder", "llm", "mm1", "mm2"], + expected_component_calls={ + ("llm", 1): { + "messages": [ + ChatMessage( + _role="system", + _content=[ + TextContent( + text="Always respond in English even if some input data is in other languages." + ) + ], + _name=None, + _meta={}, + ), + ChatMessage( + _role="user", _content=[TextContent(text="Tell me about Berlin")], _name=None, _meta={} + ), + ] + }, + ("mm1", 1): { + "messages": [ + ChatMessage( + _role="system", + _content=[ + TextContent( + text="Always respond in English even if some input data is in other languages." + ) + ], + _name=None, + _meta={}, + ), + ChatMessage( + _role="user", _content=[TextContent(text="Tell me about Berlin")], _name=None, _meta={} + ), + ], + "metadata": {"meta2": "value2", "metadata_key": "metadata_value"}, + }, + ("mm2", 1): { + "messages": [ + ChatMessage( + _role="assistant", _content=[TextContent(text="Fake message")], _name=None, _meta={} + ) + ], + "metadata": {"meta2": "value2", "metadata_key": "metadata_value"}, + }, + ("prompt_builder", 1): { + "prompt_source": [ + ChatMessage( + _role="system", + _content=[ + TextContent( + text="Always respond in English even if some input data is in other languages." + ) + ], + _name=None, + _meta={}, + ), + ChatMessage( + _role="user", _content=[TextContent(text="Tell me about Berlin")], _name=None, _meta={} + ), + ] + }, + }, ) ], ) @@ -699,7 +831,37 @@ def pipeline_that_has_a_greedy_and_variadic_component_after_a_component_with_def "question" } }, - expected_run_order=["retriever", "branch_joiner", "prompt_builder"], + expected_component_calls={ + ("branch_joiner", 1): { + "value": [ + [ + Document( + id="328f0cbb6722c5cfa290aa2b78bcda8dc5afa09f0e2c23092afc502ba89c85e7", + content="This is a simple document", + score=0.5993376509412102, + ) + ] + ] + }, + ("prompt_builder", 1): { + "documents": [ + Document( + id="328f0cbb6722c5cfa290aa2b78bcda8dc5afa09f0e2c23092afc502ba89c85e7", + content="This is a simple document", + score=0.5993376509412102, + ) + ], + "query": "This is my question", + "template": None, + "template_variables": None, + }, + ("retriever", 1): { + "filters": None, + "query": "This is my question", + "scale_score": None, + "top_k": None, + }, + }, ) ], ) @@ -719,71 +881,6 @@ def pipeline_that_has_a_component_that_doesnt_return_a_dictionary(): return pipe, [PipelineRunData({"comp": {"a": 1}})] -@given( - "a pipeline that has components added in a different order from the order of execution", - target_fixture="pipeline_data", -) -def pipeline_that_has_components_added_in_a_different_order_from_the_order_of_execution(): - """ - We enqueue the Components in internal `to_run` data structure at the start of `Pipeline.run()` using the order - they are added in the Pipeline with `Pipeline.add_component()`. - If a Component A with defaults is added before a Component B that has no defaults, but in the Pipeline - logic A must be executed after B it could run instead before. - - This test verifies that the order of execution is correct. - """ - docs = [Document(content="Rome is the capital of Italy"), Document(content="Paris is the capital of France")] - doc_store = InMemoryDocumentStore() - doc_store.write_documents(docs) - template = ( - "Given the following information, answer the question.\n" - "Context:\n" - "{% for document in documents %}" - " {{ document.content }}\n" - "{% endfor %}" - "Question: {{ query }}" - ) - - pipe = Pipeline(max_runs_per_component=1) - - # The order of this addition is important for the test - # Do not edit them. - pipe.add_component("prompt_builder", PromptBuilder(template=template)) - pipe.add_component("retriever", InMemoryBM25Retriever(document_store=doc_store)) - pipe.connect("retriever", "prompt_builder.documents") - - query = "What is the capital of France?" - return ( - pipe, - [ - PipelineRunData( - inputs={"prompt_builder": {"query": query}, "retriever": {"query": query}}, - expected_outputs={ - "prompt_builder": { - "prompt": "Given the " - "following " - "information, " - "answer the " - "question.\n" - "Context:\n" - " Paris is " - "the capital " - "of France\n" - " Rome is " - "the capital " - "of Italy\n" - "Question: " - "What is the " - "capital of " - "France?" - } - }, - expected_run_order=["retriever", "prompt_builder"], - ) - ], - ) - - @given("a pipeline that has a component with only default inputs", target_fixture="pipeline_data") def pipeline_that_has_a_component_with_only_default_inputs(): FakeGenerator = component_class( @@ -841,7 +938,58 @@ def pipeline_that_has_a_component_with_only_default_inputs(): ] } }, - expected_run_order=["retriever", "prompt_builder", "generator", "answer_builder"], + expected_component_calls={ + ("answer_builder", 1): { + "documents": [ + Document( + id="413dccdf51a54cca75b7ed2eddac04e6e58560bd2f0caf4106a3efc023fe3651", + content="Paris is the capital of France", + score=1.600237583702734, + ), + Document( + id="a4a874fc2ef75015da7924d709fbdd2430e46a8e94add6e0f26cd32c1c03435d", + content="Rome is the capital of Italy", + score=1.2536639934227616, + ), + ], + "meta": None, + "pattern": None, + "query": "What is the capital of France?", + "reference_pattern": None, + "replies": ["Paris"], + }, + ("generator", 1): { + "prompt": "Given the following information, answer the " + "question.\n" + "Context:\n" + " Paris is the capital of France\n" + " Rome is the capital of Italy\n" + "Question: What is the capital of France?" + }, + ("prompt_builder", 1): { + "documents": [ + Document( + id="413dccdf51a54cca75b7ed2eddac04e6e58560bd2f0caf4106a3efc023fe3651", + content="Paris is the capital of France", + score=1.600237583702734, + ), + Document( + id="a4a874fc2ef75015da7924d709fbdd2430e46a8e94add6e0f26cd32c1c03435d", + content="Rome is the capital of Italy", + score=1.2536639934227616, + ), + ], + "query": "What is the capital of France?", + "template": None, + "template_variables": None, + }, + ("retriever", 1): { + "filters": None, + "query": "What is the capital of France?", + "scale_score": None, + "top_k": None, + }, + }, ) ], ) @@ -918,7 +1066,37 @@ def fake_generator_run(self, generation_kwargs: Optional[Dict[str, Any]] = None, PipelineRunData( inputs={"prompt_builder": {"query": "What is the capital of Italy?"}}, expected_outputs={"router": {"correct_replies": ["Rome"]}}, - expected_run_order=["prompt_builder", "generator", "router", "prompt_builder", "generator", "router"], + expected_component_calls={ + ("generator", 1): { + "generation_kwargs": None, + "prompt": "Answer the following question.\n\nQuestion: What is the capital of Italy?", + }, + ("generator", 2): { + "generation_kwargs": None, + "prompt": "Answer the following question.\n" + "\n" + "Previously you replied incorrectly this:\n" + "\n" + " - Paris\n" + "\n" + "\n" + "Question: What is the capital of Italy?", + }, + ("prompt_builder", 1): { + "previous_replies": "", + "query": "What is the capital of Italy?", + "template": None, + "template_variables": None, + }, + ("prompt_builder", 2): { + "previous_replies": ["Paris"], + "query": "What is the capital of Italy?", + "template": None, + "template_variables": None, + }, + ("router", 1): {"replies": ["Paris"]}, + ("router", 2): {"replies": ["Rome"]}, + }, ) ], ) @@ -954,12 +1132,24 @@ def pipeline_that_has_multiple_branches_that_merge_into_a_component_with_a_singl PipelineRunData( inputs={"add_one": {"value": 1}}, expected_outputs={"sum": {"total": 14}}, - expected_run_order=["add_one", "parity", "add_ten", "sum"], + expected_component_calls={ + ("add_one", 1): {"add": None, "value": 1}, + ("add_ten", 1): {"add": None, "value": 2}, + ("parity", 1): {"value": 2}, + ("sum", 1): {"values": [2, 12]}, + }, ), PipelineRunData( inputs={"add_one": {"value": 2}}, expected_outputs={"sum": {"total": 17}}, - expected_run_order=["add_one", "parity", "double", "add_four", "add_one_again", "sum"], + expected_component_calls={ + ("add_four", 1): {"add": None, "value": 3}, + ("add_one", 1): {"add": None, "value": 2}, + ("add_one_again", 1): {"add": None, "value": 7}, + ("double", 1): {"value": 3}, + ("parity", 1): {"value": 3}, + ("sum", 1): {"values": [3, 6, 8]}, + }, ), ], ) @@ -989,7 +1179,13 @@ def pipeline_that_has_multiple_branches_of_different_lengths_that_merge_into_a_c PipelineRunData( inputs={"first_addition": {"value": 1}, "third_addition": {"value": 1}}, expected_outputs={"fourth_addition": {"result": 12}}, - expected_run_order=["first_addition", "third_addition", "second_addition", "sum", "fourth_addition"], + expected_component_calls={ + ("first_addition", 1): {"add": None, "value": 1}, + ("fourth_addition", 1): {"add": None, "value": 11}, + ("second_addition", 1): {"add": None, "value": 3}, + ("sum", 1): {"values": [3, 3, 5]}, + ("third_addition", 1): {"add": None, "value": 1}, + }, ) ], ) @@ -1015,13 +1211,21 @@ def pipeline_that_is_linear_and_returns_intermediate_outputs(): "first_addition": {"result": 3}, "second_addition": {"result": 7}, }, - expected_run_order=["first_addition", "double", "second_addition"], + expected_component_calls={ + ("double", 1): {"value": 3}, + ("first_addition", 1): {"add": None, "value": 1}, + ("second_addition", 1): {"add": None, "value": 6}, + }, ), PipelineRunData( inputs={"first_addition": {"value": 1}}, include_outputs_from={"double"}, expected_outputs={"double": {"value": 6}, "second_addition": {"result": 7}}, - expected_run_order=["first_addition", "double", "second_addition"], + expected_component_calls={ + ("double", 1): {"value": 3}, + ("first_addition", 1): {"add": None, "value": 1}, + ("second_addition", 1): {"add": None, "value": 6}, + }, ), ], ) @@ -1070,21 +1274,21 @@ def pipeline_that_has_a_loop_and_returns_intermediate_outputs_from_it(): "below_5": {"above": 8}, "add_three": {"result": 11}, }, - expected_run_order=[ - "add_one", - "branch_joiner", - "below_10", - "accumulator", - "below_5", - "branch_joiner", - "below_10", - "accumulator", - "below_5", - "add_three", - "branch_joiner", - "below_10", - "add_two", - ], + expected_component_calls={ + ("accumulator", 1): {"value": 4}, + ("accumulator", 2): {"value": 4}, + ("add_one", 1): {"add": None, "value": 3}, + ("add_three", 1): {"add": None, "value": 8}, + ("add_two", 1): {"add": None, "value": 11}, + ("below_10", 1): {"threshold": None, "value": 4}, + ("below_10", 2): {"threshold": None, "value": 4}, + ("below_10", 3): {"threshold": None, "value": 11}, + ("below_5", 1): {"threshold": None, "value": 4}, + ("below_5", 2): {"threshold": None, "value": 8}, + ("branch_joiner", 1): {"value": [4]}, + ("branch_joiner", 2): {"value": [4]}, + ("branch_joiner", 3): {"value": [11]}, + }, ) ], ) @@ -1122,13 +1326,21 @@ def run(self, value: int): "first_addition": {"result": 3}, "second_addition": {"result": 7}, }, - expected_run_order=["first_addition", "double", "second_addition"], + expected_component_calls={ + ("double", 1): {"value": 3}, + ("first_addition", 1): {"add": None, "value": 1}, + ("second_addition", 1): {"add": None, "value": 6}, + }, ), PipelineRunData( inputs={"first_addition": {"value": 1}}, include_outputs_from={"double"}, expected_outputs={"double": {"original": 3, "value": 6}, "second_addition": {"result": 7}}, - expected_run_order=["first_addition", "double", "second_addition"], + expected_component_calls={ + ("double", 1): {"value": 3}, + ("first_addition", 1): {"add": None, "value": 1}, + ("second_addition", 1): {"add": None, "value": 6}, + }, ), ], ) @@ -1156,12 +1368,15 @@ def pipeline_that_has_a_component_with_default_inputs_that_doesnt_receive_anythi PipelineRunData( inputs={"router": {"sentence": "Wir mussen reisen"}}, expected_outputs={"router": {"language_1": "German"}}, - expected_run_order=["router"], + expected_component_calls={("router", 1): {"sentence": "Wir mussen reisen"}}, ), PipelineRunData( inputs={"router": {"sentence": "Yo tengo que viajar"}}, expected_outputs={"pb": {"prompt": "Ok, I know, that's Spanish"}}, - expected_run_order=["router", "pb"], + expected_component_calls={ + ("pb", 1): {"language": "Spanish", "template": None, "template_variables": None}, + ("router", 1): {"sentence": "Yo tengo que viajar"}, + }, ), ], ) @@ -1245,7 +1460,45 @@ def run(self, query: str): "router": {"question": "This is a question with no_answer"}, }, expected_outputs={"fallback_llm": {"replies": ["There's simply no_answer to this question"]}}, - expected_run_order=["prompt", "llm", "router", "fallback_prompt", "fallback_llm"], + expected_component_calls={ + ("fallback_llm", 1): { + "prompt": "User entered a query that cannot be answered " + "with the given table.\n" + " The query was: This is a " + "question with no_answer and the table had " + "columns: .\n" + " Let the user know why " + "the question cannot be answered" + }, + ("fallback_prompt", 1): { + "columns": "", + "question": "This is a question with no_answer", + "template": None, + "template_variables": None, + }, + ("llm", 1): { + "prompt": "Please generate an SQL query. The query should answer " + "the following Question: This is a question with " + "no_answer;\n" + " If the question cannot be answered given " + "the provided table and columns, return 'no_answer'\n" + " The query is to be answered for the table " + "is called 'absenteeism' with the following\n" + " Columns: Age, Absenteeism_time_in_hours, " + "Days, Disciplinary_failure;\n" + " Answer:" + }, + ("prompt", 1): { + "columns": "Age, Absenteeism_time_in_hours, Days, Disciplinary_failure", + "question": "This is a question with no_answer", + "template": None, + "template_variables": None, + }, + ("router", 1): { + "question": "This is a question with no_answer", + "replies": ["There's simply no_answer to this question"], + }, + }, ) ], [ @@ -1255,7 +1508,104 @@ def run(self, query: str): "router": {"question": "This is a question that has an answer"}, }, expected_outputs={"sql_querier": {"results": "This is the query result", "query": "Some SQL query"}}, - expected_run_order=["prompt", "llm", "router", "sql_querier"], + expected_component_calls={ + ("llm", 1): { + "prompt": "\n" + " You are an experienced and accurate Turkish CX " + "speacialist that classifies customer comments into " + "pre-defined categories below:\n" + "\n" + " Negative experience labels:\n" + " - Late delivery\n" + " - Rotten/spoilt item\n" + " - Bad Courier behavior\n" + "\n" + " Positive experience labels:\n" + " - Good courier behavior\n" + " - Thanks & appreciation\n" + " - Love message to courier\n" + " - Fast delivery\n" + " - Quality of products\n" + "\n" + " Create a JSON object as a response. The fields " + "are: 'positive_experience', 'negative_experience'.\n" + " Assign at least one of the pre-defined labels to " + "the given customer comment under positive and " + "negative experience fields.\n" + " If the comment has a positive experience, list " + "the label under 'positive_experience' field.\n" + " If the comments has a negative_experience, list " + "it under the 'negative_experience' field.\n" + " Here is the comment:\n" + "I loved the quality of the meal but the courier was " + "rude\n" + ". Just return the category names in the list. If " + "there aren't any, return an empty list.\n" + "\n" + " \n" + " " + }, + ("llm", 2): { + "prompt": "\n" + " You are an experienced and accurate Turkish CX " + "speacialist that classifies customer comments into " + "pre-defined categories below:\n" + "\n" + " Negative experience labels:\n" + " - Late delivery\n" + " - Rotten/spoilt item\n" + " - Bad Courier behavior\n" + "\n" + " Positive experience labels:\n" + " - Good courier behavior\n" + " - Thanks & appreciation\n" + " - Love message to courier\n" + " - Fast delivery\n" + " - Quality of products\n" + "\n" + " Create a JSON object as a response. The fields " + "are: 'positive_experience', 'negative_experience'.\n" + " Assign at least one of the pre-defined labels to " + "the given customer comment under positive and " + "negative experience fields.\n" + " If the comment has a positive experience, list " + "the label under 'positive_experience' field.\n" + " If the comments has a negative_experience, list " + "it under the 'negative_experience' field.\n" + " Here is the comment:\n" + "I loved the quality of the meal but the courier was " + "rude\n" + ". Just return the category names in the list. If " + "there aren't any, return an empty list.\n" + "\n" + " \n" + " You already created the following output in a " + "previous attempt: ['This is an invalid reply']\n" + " However, this doesn't comply with the format " + "requirements from above and triggered this Python " + "exception: this is an error message\n" + " Correct the output and try again. Just return the " + "corrected output without any extra explanations.\n" + " \n" + " " + }, + ("output_validator", 1): {"replies": ["This is a valid reply"]}, + ("output_validator", 2): {"replies": ["This is a valid reply"]}, + ("prompt_builder", 1): { + "comment": "", + "error_message": "", + "invalid_replies": "", + "template": None, + "template_variables": {"comment": "I loved the quality of the meal but the courier was rude"}, + }, + ("prompt_builder", 2): { + "comment": "", + "error_message": "this is an error message", + "invalid_replies": ["This is an invalid reply"], + "template": None, + "template_variables": {"comment": "I loved the quality of the meal but the courier was rude"}, + }, + }, ) ], ) @@ -1333,14 +1683,104 @@ def run(self, prompt: str): PipelineRunData( inputs={"prompt_builder": {"template_variables": {"comment": comment}}}, expected_outputs={"output_validator": {"valid_replies": ["This is a valid reply"]}}, - expected_run_order=[ - "prompt_builder", - "llm", - "output_validator", - "prompt_builder", - "llm", - "output_validator", - ], + expected_component_calls={ + ("llm", 1): { + "prompt": "\n" + " You are an experienced and accurate Turkish CX " + "speacialist that classifies customer comments into " + "pre-defined categories below:\n" + "\n" + " Negative experience labels:\n" + " - Late delivery\n" + " - Rotten/spoilt item\n" + " - Bad Courier behavior\n" + "\n" + " Positive experience labels:\n" + " - Good courier behavior\n" + " - Thanks & appreciation\n" + " - Love message to courier\n" + " - Fast delivery\n" + " - Quality of products\n" + "\n" + " Create a JSON object as a response. The fields " + "are: 'positive_experience', 'negative_experience'.\n" + " Assign at least one of the pre-defined labels to " + "the given customer comment under positive and " + "negative experience fields.\n" + " If the comment has a positive experience, list " + "the label under 'positive_experience' field.\n" + " If the comments has a negative_experience, list " + "it under the 'negative_experience' field.\n" + " Here is the comment:\n" + "I loved the quality of the meal but the courier was " + "rude\n" + ". Just return the category names in the list. If " + "there aren't any, return an empty list.\n" + "\n" + " \n" + " " + }, + ("llm", 2): { + "prompt": "\n" + " You are an experienced and accurate Turkish CX " + "speacialist that classifies customer comments into " + "pre-defined categories below:\n" + "\n" + " Negative experience labels:\n" + " - Late delivery\n" + " - Rotten/spoilt item\n" + " - Bad Courier behavior\n" + "\n" + " Positive experience labels:\n" + " - Good courier behavior\n" + " - Thanks & appreciation\n" + " - Love message to courier\n" + " - Fast delivery\n" + " - Quality of products\n" + "\n" + " Create a JSON object as a response. The fields " + "are: 'positive_experience', 'negative_experience'.\n" + " Assign at least one of the pre-defined labels to " + "the given customer comment under positive and " + "negative experience fields.\n" + " If the comment has a positive experience, list " + "the label under 'positive_experience' field.\n" + " If the comments has a negative_experience, list " + "it under the 'negative_experience' field.\n" + " Here is the comment:\n" + "I loved the quality of the meal but the courier was " + "rude\n" + ". Just return the category names in the list. If " + "there aren't any, return an empty list.\n" + "\n" + " \n" + " You already created the following output in a " + "previous attempt: ['This is an invalid reply']\n" + " However, this doesn't comply with the format " + "requirements from above and triggered this Python " + "exception: this is an error message\n" + " Correct the output and try again. Just return the " + "corrected output without any extra explanations.\n" + " \n" + " " + }, + ("output_validator", 1): {"replies": ["This is a valid reply"]}, + ("output_validator", 2): {"replies": ["This is a valid reply"]}, + ("prompt_builder", 1): { + "comment": "", + "error_message": "", + "invalid_replies": "", + "template": None, + "template_variables": {"comment": "I loved the quality of the meal but the courier was rude"}, + }, + ("prompt_builder", 2): { + "comment": "", + "error_message": "this is an error message", + "invalid_replies": ["This is an invalid reply"], + "template": None, + "template_variables": {"comment": "I loved the quality of the meal but the courier was rude"}, + }, + }, ) ], ) @@ -1438,15 +1878,76 @@ def run(self, prompt: str, generation_kwargs: Optional[Dict[str, Any]] = None): "llm": {"replies": ["This is a reply"], "meta": {"meta_key": "meta_value"}}, "spellchecker": {"meta": {"meta_key": "meta_value"}}, }, - expected_run_order=[ - "prompt_builder1", - "spellchecker", - "prompt_builder3", - "retriever", - "ranker", - "prompt_builder2", - "llm", - ], + expected_component_calls={ + ("llm", 1): { + "generation_kwargs": None, + "prompt": "\n" + " According to these documents:\n" + "\n" + " \n" + " This is a document\n" + " \n" + "\n" + " Answer the given question: \n" + " \n" + " This is a reply\n" + " \n" + " \n" + " Answer:\n" + " ", + }, + ("prompt_builder1", 1): { + "question": "Wha i Acromegaly?", + "template": None, + "template_variables": None, + }, + ("prompt_builder2", 1): { + "documents": [ + Document( + id="9d51914541072d3d822910785727db8a3838dba5ca6ebb0a543969260ecdeda6", + content="This is a document", + ) + ], + "question": "\n \n This is a reply\n \n ", + "template": None, + "template_variables": None, + }, + ("prompt_builder3", 1): { + "replies": ["This is a reply"], + "template": None, + "template_variables": None, + }, + ("ranker", 1): { + "calibration_factor": None, + "documents": [ + Document( + id="9d51914541072d3d822910785727db8a3838dba5ca6ebb0a543969260ecdeda6", + content="This is a document", + ) + ], + "query": "\n \n This is a reply\n \n ", + "scale_score": None, + "score_threshold": None, + "top_k": None, + }, + ("retriever", 1): { + "filters": None, + "query": "\n \n This is a reply\n \n ", + "scale_score": None, + "top_k": None, + }, + ("spellchecker", 1): { + "generation_kwargs": None, + "prompt": "\n" + " You are a spellchecking system. Check " + "the given query and fill in the corrected " + "query.\n" + "\n" + " Question: Wha i Acromegaly?\n" + " Corrected question:\n" + " ", + }, + }, ) ], ) @@ -1523,20 +2024,41 @@ def run(self, query_embedding: List[float]): ] } }, - expected_run_order=[ - "router", - "text_embedder", - "bm25retriever", - "retriever", - "joinerhybrid", - "ranker", - "joinerfinal", - ], + expected_component_calls={ + ("router", 1): {"query": "I'm a legit question"}, + ("text_embedder", 1): {"text": "I'm a legit question"}, + ("bm25retriever", 1): {"query": "I'm a legit question"}, + ("retriever", 1): {"query_embedding": [1.0, 2.0, 3.0]}, + ("joinerhybrid", 1): { + "documents": [ + [Document(content="This is a document")], + [Document(content="This is another document")], + ], + "top_k": None, + }, + ("ranker", 1): { + "query": "I'm a legit question", + "documents": [ + Document(content="This is a document"), + Document(content="This is another document"), + ], + }, + ("joinerfinal", 1): { + "documents": [ + [Document(content="This is a document"), Document(content="This is another document")] + ], + "top_k": None, + }, + }, ), PipelineRunData( inputs={"router": {"query": "I'm a nasty prompt injection"}}, expected_outputs={"joinerfinal": {"documents": []}}, - expected_run_order=["router", "emptyretriever", "joinerfinal"], + expected_component_calls={ + ("router", 1): {"query": "I'm a nasty prompt injection"}, + ("emptyretriever", 1): {"query": "I'm a nasty prompt injection"}, + ("joinerfinal", 1): {"documents": [[]], "top_k": None}, + }, ), ], ) @@ -1730,25 +2252,256 @@ def run(self, replies: List[ChatMessage]): "search_prompt_builder": {"template": search_message}, }, expected_outputs={"router": {"finish": "Eiffel Tower"}}, - expected_run_order=[ - "main_input", - "prompt_builder", - "llm", - "prompt_concatenator_after_action", - "tool_extractor", - "router", - "router_search", - "search_prompt_builder", - "search_llm", - "search_output_adapter", - "prompt_concatenator_after_observation", - "main_input", - "prompt_builder", - "llm", - "prompt_concatenator_after_action", - "tool_extractor", - "router", - ], + expected_component_calls={ + ("llm", 1): { + "generation_kwargs": None, + "messages": [ + ChatMessage( + _role="user", + _content=[ + TextContent( + text="\n Solve a question answering task with interleaving Thought, Action, Observation steps.\n\n Thought reasons about the current situation\n\n Action can be:\n google_search - Searches Google for the exact concept/entity (given in square brackets) and returns the results for you to use\n finish - Returns the final answer (given in square brackets) and finishes the task\n\n Observation summarizes the Action outcome and helps in formulating the next\n Thought in Thought, Action, Observation interleaving triplet of steps.\n\n After each Observation, provide the next Thought and next Action.\n Don't execute multiple steps even though you know the answer.\n Only generate Thought and Action, never Observation, you'll get Observation from Action.\n Follow the pattern in the example below.\n\n Example:\n ###########################\n Question: Which magazine was started first Arthur’s Magazine or First for Women?\n Thought: I need to search Arthur’s Magazine and First for Women, and find which was started\n first.\n Action: google_search[When was 'Arthur’s Magazine' started?]\n Observation: Arthur’s Magazine was an American literary periodical ˘\n published in Philadelphia and founded in 1844. Edited by Timothy Shay Arthur, it featured work by\n Edgar A. Poe, J.H. Ingraham, Sarah Josepha Hale, Thomas G. Spear, and others. In May 1846\n it was merged into Godey’s Lady’s Book.\n Thought: Arthur’s Magazine was started in 1844. I need to search First for Women founding date next\n Action: google_search[When was 'First for Women' magazine started?]\n Observation: First for Women is a woman’s magazine published by Bauer Media Group in the\n USA. The magazine was started in 1989. It is based in Englewood Cliffs, New Jersey. In 2011\n the circulation of the magazine was 1,310,696 copies.\n Thought: First for Women was started in 1989. 1844 (Arthur’s Magazine) ¡ 1989 (First for\n Women), so Arthur’s Magazine was started first.\n Action: finish[Arthur’s Magazine]\n ############################\n\n Let's start, the question is: which tower is taller: eiffel tower or tower of pisa?\n\n Thought:\n " + ) + ], + _name=None, + _meta={}, + ) + ], + }, + ("llm", 2): { + "generation_kwargs": None, + "messages": [ + ChatMessage( + _role="user", + _content=[ + TextContent( + text="\n Solve a question answering task with interleaving Thought, Action, Observation steps.\n\n Thought reasons about the current situation\n\n Action can be:\n google_search - Searches Google for the exact concept/entity (given in square brackets) and returns the results for you to use\n finish - Returns the final answer (given in square brackets) and finishes the task\n\n Observation summarizes the Action outcome and helps in formulating the next\n Thought in Thought, Action, Observation interleaving triplet of steps.\n\n After each Observation, provide the next Thought and next Action.\n Don't execute multiple steps even though you know the answer.\n Only generate Thought and Action, never Observation, you'll get Observation from Action.\n Follow the pattern in the example below.\n\n Example:\n ###########################\n Question: Which magazine was started first Arthur’s Magazine or First for Women?\n Thought: I need to search Arthur’s Magazine and First for Women, and find which was started\n first.\n Action: google_search[When was 'Arthur’s Magazine' started?]\n Observation: Arthur’s Magazine was an American literary periodical ˘\n published in Philadelphia and founded in 1844. Edited by Timothy Shay Arthur, it featured work by\n Edgar A. Poe, J.H. Ingraham, Sarah Josepha Hale, Thomas G. Spear, and others. In May 1846\n it was merged into Godey’s Lady’s Book.\n Thought: Arthur’s Magazine was started in 1844. I need to search First for Women founding date next\n Action: google_search[When was 'First for Women' magazine started?]\n Observation: First for Women is a woman’s magazine published by Bauer Media Group in the\n USA. The magazine was started in 1989. It is based in Englewood Cliffs, New Jersey. In 2011\n the circulation of the magazine was 1,310,696 copies.\n Thought: First for Women was started in 1989. 1844 (Arthur’s Magazine) ¡ 1989 (First for\n Women), so Arthur’s Magazine was started first.\n Action: finish[Arthur’s Magazine]\n ############################\n\n Let's start, the question is: which tower is taller: eiffel tower or tower of pisa?\n\n Thought:\n thinking\n Action: google_search[What is taller, Eiffel Tower or Leaning Tower of Pisa]\nObservation: Tower of Pisa is 55 meters tall\n\n\nThought: " + ) + ], + _name=None, + _meta={}, + ) + ], + }, + ("main_input", 1): { + "value": [ + [ + ChatMessage( + _role="user", + _content=[ + TextContent( + text="\n Solve a question answering task with interleaving Thought, Action, Observation steps.\n\n Thought reasons about the current situation\n\n Action can be:\n google_search - Searches Google for the exact concept/entity (given in square brackets) and returns the results for you to use\n finish - Returns the final answer (given in square brackets) and finishes the task\n\n Observation summarizes the Action outcome and helps in formulating the next\n Thought in Thought, Action, Observation interleaving triplet of steps.\n\n After each Observation, provide the next Thought and next Action.\n Don't execute multiple steps even though you know the answer.\n Only generate Thought and Action, never Observation, you'll get Observation from Action.\n Follow the pattern in the example below.\n\n Example:\n ###########################\n Question: Which magazine was started first Arthur’s Magazine or First for Women?\n Thought: I need to search Arthur’s Magazine and First for Women, and find which was started\n first.\n Action: google_search[When was 'Arthur’s Magazine' started?]\n Observation: Arthur’s Magazine was an American literary periodical ˘\n published in Philadelphia and founded in 1844. Edited by Timothy Shay Arthur, it featured work by\n Edgar A. Poe, J.H. Ingraham, Sarah Josepha Hale, Thomas G. Spear, and others. In May 1846\n it was merged into Godey’s Lady’s Book.\n Thought: Arthur’s Magazine was started in 1844. I need to search First for Women founding date next\n Action: google_search[When was 'First for Women' magazine started?]\n Observation: First for Women is a woman’s magazine published by Bauer Media Group in the\n USA. The magazine was started in 1989. It is based in Englewood Cliffs, New Jersey. In 2011\n the circulation of the magazine was 1,310,696 copies.\n Thought: First for Women was started in 1989. 1844 (Arthur’s Magazine) ¡ 1989 (First for\n Women), so Arthur’s Magazine was started first.\n Action: finish[Arthur’s Magazine]\n ############################\n\n Let's start, the question is: {{query}}\n\n Thought:\n " + ) + ], + _name=None, + _meta={}, + ) + ] + ] + }, + ("main_input", 2): { + "value": [ + [ + ChatMessage( + _role="user", + _content=[ + TextContent( + text="\n Solve a question answering task with interleaving Thought, Action, Observation steps.\n\n Thought reasons about the current situation\n\n Action can be:\n google_search - Searches Google for the exact concept/entity (given in square brackets) and returns the results for you to use\n finish - Returns the final answer (given in square brackets) and finishes the task\n\n Observation summarizes the Action outcome and helps in formulating the next\n Thought in Thought, Action, Observation interleaving triplet of steps.\n\n After each Observation, provide the next Thought and next Action.\n Don't execute multiple steps even though you know the answer.\n Only generate Thought and Action, never Observation, you'll get Observation from Action.\n Follow the pattern in the example below.\n\n Example:\n ###########################\n Question: Which magazine was started first Arthur’s Magazine or First for Women?\n Thought: I need to search Arthur’s Magazine and First for Women, and find which was started\n first.\n Action: google_search[When was 'Arthur’s Magazine' started?]\n Observation: Arthur’s Magazine was an American literary periodical ˘\n published in Philadelphia and founded in 1844. Edited by Timothy Shay Arthur, it featured work by\n Edgar A. Poe, J.H. Ingraham, Sarah Josepha Hale, Thomas G. Spear, and others. In May 1846\n it was merged into Godey’s Lady’s Book.\n Thought: Arthur’s Magazine was started in 1844. I need to search First for Women founding date next\n Action: google_search[When was 'First for Women' magazine started?]\n Observation: First for Women is a woman’s magazine published by Bauer Media Group in the\n USA. The magazine was started in 1989. It is based in Englewood Cliffs, New Jersey. In 2011\n the circulation of the magazine was 1,310,696 copies.\n Thought: First for Women was started in 1989. 1844 (Arthur’s Magazine) ¡ 1989 (First for\n Women), so Arthur’s Magazine was started first.\n Action: finish[Arthur’s Magazine]\n ############################\n\n Let's start, the question is: which tower is taller: eiffel tower or tower of pisa?\n\n Thought:\n thinking\n Action: google_search[What is taller, Eiffel Tower or Leaning Tower of Pisa]\nObservation: Tower of Pisa is 55 meters tall\n\n\nThought: " + ) + ], + _name=None, + _meta={}, + ) + ] + ] + }, + ("prompt_builder", 1): { + "query": "which tower is taller: eiffel tower or tower of pisa?", + "template": [ + ChatMessage( + _role="user", + _content=[ + TextContent( + text="\n Solve a question answering task with interleaving Thought, Action, Observation steps.\n\n Thought reasons about the current situation\n\n Action can be:\n google_search - Searches Google for the exact concept/entity (given in square brackets) and returns the results for you to use\n finish - Returns the final answer (given in square brackets) and finishes the task\n\n Observation summarizes the Action outcome and helps in formulating the next\n Thought in Thought, Action, Observation interleaving triplet of steps.\n\n After each Observation, provide the next Thought and next Action.\n Don't execute multiple steps even though you know the answer.\n Only generate Thought and Action, never Observation, you'll get Observation from Action.\n Follow the pattern in the example below.\n\n Example:\n ###########################\n Question: Which magazine was started first Arthur’s Magazine or First for Women?\n Thought: I need to search Arthur’s Magazine and First for Women, and find which was started\n first.\n Action: google_search[When was 'Arthur’s Magazine' started?]\n Observation: Arthur’s Magazine was an American literary periodical ˘\n published in Philadelphia and founded in 1844. Edited by Timothy Shay Arthur, it featured work by\n Edgar A. Poe, J.H. Ingraham, Sarah Josepha Hale, Thomas G. Spear, and others. In May 1846\n it was merged into Godey’s Lady’s Book.\n Thought: Arthur’s Magazine was started in 1844. I need to search First for Women founding date next\n Action: google_search[When was 'First for Women' magazine started?]\n Observation: First for Women is a woman’s magazine published by Bauer Media Group in the\n USA. The magazine was started in 1989. It is based in Englewood Cliffs, New Jersey. In 2011\n the circulation of the magazine was 1,310,696 copies.\n Thought: First for Women was started in 1989. 1844 (Arthur’s Magazine) ¡ 1989 (First for\n Women), so Arthur’s Magazine was started first.\n Action: finish[Arthur’s Magazine]\n ############################\n\n Let's start, the question is: {{query}}\n\n Thought:\n " + ) + ], + _name=None, + _meta={}, + ) + ], + "template_variables": None, + }, + ("prompt_builder", 2): { + "query": "which tower is taller: eiffel tower or tower of pisa?", + "template": [ + ChatMessage( + _role="user", + _content=[ + TextContent( + text="\n Solve a question answering task with interleaving Thought, Action, Observation steps.\n\n Thought reasons about the current situation\n\n Action can be:\n google_search - Searches Google for the exact concept/entity (given in square brackets) and returns the results for you to use\n finish - Returns the final answer (given in square brackets) and finishes the task\n\n Observation summarizes the Action outcome and helps in formulating the next\n Thought in Thought, Action, Observation interleaving triplet of steps.\n\n After each Observation, provide the next Thought and next Action.\n Don't execute multiple steps even though you know the answer.\n Only generate Thought and Action, never Observation, you'll get Observation from Action.\n Follow the pattern in the example below.\n\n Example:\n ###########################\n Question: Which magazine was started first Arthur’s Magazine or First for Women?\n Thought: I need to search Arthur’s Magazine and First for Women, and find which was started\n first.\n Action: google_search[When was 'Arthur’s Magazine' started?]\n Observation: Arthur’s Magazine was an American literary periodical ˘\n published in Philadelphia and founded in 1844. Edited by Timothy Shay Arthur, it featured work by\n Edgar A. Poe, J.H. Ingraham, Sarah Josepha Hale, Thomas G. Spear, and others. In May 1846\n it was merged into Godey’s Lady’s Book.\n Thought: Arthur’s Magazine was started in 1844. I need to search First for Women founding date next\n Action: google_search[When was 'First for Women' magazine started?]\n Observation: First for Women is a woman’s magazine published by Bauer Media Group in the\n USA. The magazine was started in 1989. It is based in Englewood Cliffs, New Jersey. In 2011\n the circulation of the magazine was 1,310,696 copies.\n Thought: First for Women was started in 1989. 1844 (Arthur’s Magazine) ¡ 1989 (First for\n Women), so Arthur’s Magazine was started first.\n Action: finish[Arthur’s Magazine]\n ############################\n\n Let's start, the question is: which tower is taller: eiffel tower or tower of pisa?\n\n Thought:\n thinking\n Action: google_search[What is taller, Eiffel Tower or Leaning Tower of Pisa]\nObservation: Tower of Pisa is 55 meters tall\n\n\nThought: " + ) + ], + _name=None, + _meta={}, + ) + ], + "template_variables": None, + }, + ("prompt_concatenator_after_action", 1): { + "current_prompt": [ + ChatMessage( + _role="user", + _content=[ + TextContent( + text="\n Solve a question answering task with interleaving Thought, Action, Observation steps.\n\n Thought reasons about the current situation\n\n Action can be:\n google_search - Searches Google for the exact concept/entity (given in square brackets) and returns the results for you to use\n finish - Returns the final answer (given in square brackets) and finishes the task\n\n Observation summarizes the Action outcome and helps in formulating the next\n Thought in Thought, Action, Observation interleaving triplet of steps.\n\n After each Observation, provide the next Thought and next Action.\n Don't execute multiple steps even though you know the answer.\n Only generate Thought and Action, never Observation, you'll get Observation from Action.\n Follow the pattern in the example below.\n\n Example:\n ###########################\n Question: Which magazine was started first Arthur’s Magazine or First for Women?\n Thought: I need to search Arthur’s Magazine and First for Women, and find which was started\n first.\n Action: google_search[When was 'Arthur’s Magazine' started?]\n Observation: Arthur’s Magazine was an American literary periodical ˘\n published in Philadelphia and founded in 1844. Edited by Timothy Shay Arthur, it featured work by\n Edgar A. Poe, J.H. Ingraham, Sarah Josepha Hale, Thomas G. Spear, and others. In May 1846\n it was merged into Godey’s Lady’s Book.\n Thought: Arthur’s Magazine was started in 1844. I need to search First for Women founding date next\n Action: google_search[When was 'First for Women' magazine started?]\n Observation: First for Women is a woman’s magazine published by Bauer Media Group in the\n USA. The magazine was started in 1989. It is based in Englewood Cliffs, New Jersey. In 2011\n the circulation of the magazine was 1,310,696 copies.\n Thought: First for Women was started in 1989. 1844 (Arthur’s Magazine) ¡ 1989 (First for\n Women), so Arthur’s Magazine was started first.\n Action: finish[Arthur’s Magazine]\n ############################\n\n Let's start, the question is: which tower is taller: eiffel tower or tower of pisa?\n\n Thought:\n " + ) + ], + _name=None, + _meta={}, + ) + ], + "replies": [ + ChatMessage( + _role="assistant", + _content=[ + TextContent( + text="thinking\n Action: google_search[What is taller, Eiffel Tower or Leaning Tower of Pisa]\n" + ) + ], + _name=None, + _meta={}, + ) + ], + }, + ("prompt_concatenator_after_action", 2): { + "current_prompt": [ + ChatMessage( + _role="user", + _content=[ + TextContent( + text="\n Solve a question answering task with interleaving Thought, Action, Observation steps.\n\n Thought reasons about the current situation\n\n Action can be:\n google_search - Searches Google for the exact concept/entity (given in square brackets) and returns the results for you to use\n finish - Returns the final answer (given in square brackets) and finishes the task\n\n Observation summarizes the Action outcome and helps in formulating the next\n Thought in Thought, Action, Observation interleaving triplet of steps.\n\n After each Observation, provide the next Thought and next Action.\n Don't execute multiple steps even though you know the answer.\n Only generate Thought and Action, never Observation, you'll get Observation from Action.\n Follow the pattern in the example below.\n\n Example:\n ###########################\n Question: Which magazine was started first Arthur’s Magazine or First for Women?\n Thought: I need to search Arthur’s Magazine and First for Women, and find which was started\n first.\n Action: google_search[When was 'Arthur’s Magazine' started?]\n Observation: Arthur’s Magazine was an American literary periodical ˘\n published in Philadelphia and founded in 1844. Edited by Timothy Shay Arthur, it featured work by\n Edgar A. Poe, J.H. Ingraham, Sarah Josepha Hale, Thomas G. Spear, and others. In May 1846\n it was merged into Godey’s Lady’s Book.\n Thought: Arthur’s Magazine was started in 1844. I need to search First for Women founding date next\n Action: google_search[When was 'First for Women' magazine started?]\n Observation: First for Women is a woman’s magazine published by Bauer Media Group in the\n USA. The magazine was started in 1989. It is based in Englewood Cliffs, New Jersey. In 2011\n the circulation of the magazine was 1,310,696 copies.\n Thought: First for Women was started in 1989. 1844 (Arthur’s Magazine) ¡ 1989 (First for\n Women), so Arthur’s Magazine was started first.\n Action: finish[Arthur’s Magazine]\n ############################\n\n Let's start, the question is: which tower is taller: eiffel tower or tower of pisa?\n\n Thought:\n thinking\n Action: google_search[What is taller, Eiffel Tower or Leaning Tower of Pisa]\nObservation: Tower of Pisa is 55 meters tall\n\n\nThought: " + ) + ], + _name=None, + _meta={}, + ) + ], + "replies": [ + ChatMessage( + _role="assistant", + _content=[TextContent(text="thinking\n Action: finish[Eiffel Tower]\n")], + _name=None, + _meta={}, + ) + ], + }, + ("prompt_concatenator_after_observation", 1): { + "current_prompt": [ + ChatMessage( + _role="user", + _content=[ + TextContent( + text="\n Solve a question answering task with interleaving Thought, Action, Observation steps.\n\n Thought reasons about the current situation\n\n Action can be:\n google_search - Searches Google for the exact concept/entity (given in square brackets) and returns the results for you to use\n finish - Returns the final answer (given in square brackets) and finishes the task\n\n Observation summarizes the Action outcome and helps in formulating the next\n Thought in Thought, Action, Observation interleaving triplet of steps.\n\n After each Observation, provide the next Thought and next Action.\n Don't execute multiple steps even though you know the answer.\n Only generate Thought and Action, never Observation, you'll get Observation from Action.\n Follow the pattern in the example below.\n\n Example:\n ###########################\n Question: Which magazine was started first Arthur’s Magazine or First for Women?\n Thought: I need to search Arthur’s Magazine and First for Women, and find which was started\n first.\n Action: google_search[When was 'Arthur’s Magazine' started?]\n Observation: Arthur’s Magazine was an American literary periodical ˘\n published in Philadelphia and founded in 1844. Edited by Timothy Shay Arthur, it featured work by\n Edgar A. Poe, J.H. Ingraham, Sarah Josepha Hale, Thomas G. Spear, and others. In May 1846\n it was merged into Godey’s Lady’s Book.\n Thought: Arthur’s Magazine was started in 1844. I need to search First for Women founding date next\n Action: google_search[When was 'First for Women' magazine started?]\n Observation: First for Women is a woman’s magazine published by Bauer Media Group in the\n USA. The magazine was started in 1989. It is based in Englewood Cliffs, New Jersey. In 2011\n the circulation of the magazine was 1,310,696 copies.\n Thought: First for Women was started in 1989. 1844 (Arthur’s Magazine) ¡ 1989 (First for\n Women), so Arthur’s Magazine was started first.\n Action: finish[Arthur’s Magazine]\n ############################\n\n Let's start, the question is: which tower is taller: eiffel tower or tower of pisa?\n\n Thought:\n thinking\n Action: google_search[What is taller, Eiffel Tower or Leaning Tower of Pisa]\n" + ) + ], + _name=None, + _meta={}, + ) + ], + "replies": [ + ChatMessage( + _role="assistant", + _content=[TextContent(text="Observation: Tower of Pisa is 55 meters tall\n\n")], + _name=None, + _meta={}, + ) + ], + }, + ("router", 1): { + "tool_id_and_param": ["google_search", "What is taller, Eiffel Tower or Leaning Tower of Pisa"] + }, + ("router", 2): {"tool_id_and_param": ["finish", "Eiffel Tower"]}, + ("router_search", 1): {"query": "What is taller, Eiffel Tower or Leaning Tower of Pisa"}, + ("search_llm", 1): { + "generation_kwargs": None, + "messages": [ + ChatMessage( + _role="user", + _content=[ + TextContent( + text="\n Given these web search results:\n\n \n Eiffel Tower is 300 meters tall\n \n Tower of Pisa is 55 meters tall\n \n\n Be as brief as possible, max one sentence.\n Answer the question: What is taller, Eiffel Tower or Leaning Tower of Pisa\n " + ) + ], + _name=None, + _meta={}, + ) + ], + }, + ("search_output_adapter", 1): { + "replies": [ + ChatMessage( + _role="assistant", + _content=[TextContent(text="Tower of Pisa is 55 meters tall\n")], + _name=None, + _meta={}, + ) + ] + }, + ("search_prompt_builder", 1): { + "documents": [ + Document( + id="c37eb19352b261b17314cac9e1539921b5996f88c99ad0b134f12effb38ed467", + content="Eiffel Tower is 300 meters tall", + ), + Document( + id="c5281056a220c32e6fa1c4ae7d3f263c0f25fd620592c5e45049a9dcb778f129", + content="Tower of Pisa is 55 meters tall", + ), + ], + "search_query": "What is taller, Eiffel Tower or Leaning Tower of Pisa", + "template": [ + ChatMessage( + _role="user", + _content=[ + TextContent( + text="\n Given these web search results:\n\n {% for doc in documents %}\n {{ doc.content }}\n {% endfor %}\n\n Be as brief as possible, max one sentence.\n Answer the question: {{search_query}}\n " + ) + ], + _name=None, + _meta={}, + ) + ], + "template_variables": None, + }, + ("tool_extractor", 1): { + "messages": [ + ChatMessage( + _role="user", + _content=[ + TextContent( + text="\n Solve a question answering task with interleaving Thought, Action, Observation steps.\n\n Thought reasons about the current situation\n\n Action can be:\n google_search - Searches Google for the exact concept/entity (given in square brackets) and returns the results for you to use\n finish - Returns the final answer (given in square brackets) and finishes the task\n\n Observation summarizes the Action outcome and helps in formulating the next\n Thought in Thought, Action, Observation interleaving triplet of steps.\n\n After each Observation, provide the next Thought and next Action.\n Don't execute multiple steps even though you know the answer.\n Only generate Thought and Action, never Observation, you'll get Observation from Action.\n Follow the pattern in the example below.\n\n Example:\n ###########################\n Question: Which magazine was started first Arthur’s Magazine or First for Women?\n Thought: I need to search Arthur’s Magazine and First for Women, and find which was started\n first.\n Action: google_search[When was 'Arthur’s Magazine' started?]\n Observation: Arthur’s Magazine was an American literary periodical ˘\n published in Philadelphia and founded in 1844. Edited by Timothy Shay Arthur, it featured work by\n Edgar A. Poe, J.H. Ingraham, Sarah Josepha Hale, Thomas G. Spear, and others. In May 1846\n it was merged into Godey’s Lady’s Book.\n Thought: Arthur’s Magazine was started in 1844. I need to search First for Women founding date next\n Action: google_search[When was 'First for Women' magazine started?]\n Observation: First for Women is a woman’s magazine published by Bauer Media Group in the\n USA. The magazine was started in 1989. It is based in Englewood Cliffs, New Jersey. In 2011\n the circulation of the magazine was 1,310,696 copies.\n Thought: First for Women was started in 1989. 1844 (Arthur’s Magazine) ¡ 1989 (First for\n Women), so Arthur’s Magazine was started first.\n Action: finish[Arthur’s Magazine]\n ############################\n\n Let's start, the question is: which tower is taller: eiffel tower or tower of pisa?\n\n Thought:\n thinking\n Action: google_search[What is taller, Eiffel Tower or Leaning Tower of Pisa]\n" + ) + ], + _name=None, + _meta={}, + ) + ] + }, + ("tool_extractor", 2): { + "messages": [ + ChatMessage( + _role="user", + _content=[ + TextContent( + text="\n Solve a question answering task with interleaving Thought, Action, Observation steps.\n\n Thought reasons about the current situation\n\n Action can be:\n google_search - Searches Google for the exact concept/entity (given in square brackets) and returns the results for you to use\n finish - Returns the final answer (given in square brackets) and finishes the task\n\n Observation summarizes the Action outcome and helps in formulating the next\n Thought in Thought, Action, Observation interleaving triplet of steps.\n\n After each Observation, provide the next Thought and next Action.\n Don't execute multiple steps even though you know the answer.\n Only generate Thought and Action, never Observation, you'll get Observation from Action.\n Follow the pattern in the example below.\n\n Example:\n ###########################\n Question: Which magazine was started first Arthur’s Magazine or First for Women?\n Thought: I need to search Arthur’s Magazine and First for Women, and find which was started\n first.\n Action: google_search[When was 'Arthur’s Magazine' started?]\n Observation: Arthur’s Magazine was an American literary periodical ˘\n published in Philadelphia and founded in 1844. Edited by Timothy Shay Arthur, it featured work by\n Edgar A. Poe, J.H. Ingraham, Sarah Josepha Hale, Thomas G. Spear, and others. In May 1846\n it was merged into Godey’s Lady’s Book.\n Thought: Arthur’s Magazine was started in 1844. I need to search First for Women founding date next\n Action: google_search[When was 'First for Women' magazine started?]\n Observation: First for Women is a woman’s magazine published by Bauer Media Group in the\n USA. The magazine was started in 1989. It is based in Englewood Cliffs, New Jersey. In 2011\n the circulation of the magazine was 1,310,696 copies.\n Thought: First for Women was started in 1989. 1844 (Arthur’s Magazine) ¡ 1989 (First for\n Women), so Arthur’s Magazine was started first.\n Action: finish[Arthur’s Magazine]\n ############################\n\n Let's start, the question is: which tower is taller: eiffel tower or tower of pisa?\n\n Thought:\n thinking\n Action: google_search[What is taller, Eiffel Tower or Leaning Tower of Pisa]\nObservation: Tower of Pisa is 55 meters tall\n\n\nThought: thinking\n Action: finish[Eiffel Tower]\n" + ) + ], + _name=None, + _meta={}, + ) + ] + }, + }, ) ] @@ -1790,7 +2543,18 @@ def run(self, create_document: bool = False): ] }, }, - expected_run_order=["first_creator", "second_creator", "third_creator", "documents_joiner"], + expected_component_calls={ + ("documents_joiner", 1): { + "documents": [ + [Document(id="First document", content="First document")], + [Document(id="Third document", content="Third document")], + ], + "top_k": None, + }, + ("first_creator", 1): {"create_document": True}, + ("second_creator", 1): {"create_document": False}, + ("third_creator", 1): {"create_document": True}, + }, ), PipelineRunData( inputs={"first_creator": {"create_document": True}, "second_creator": {"create_document": True}}, @@ -1803,13 +2567,105 @@ def run(self, create_document: bool = False): ] }, }, - expected_run_order=["first_creator", "second_creator", "third_creator", "documents_joiner"], + expected_component_calls={ + ("documents_joiner", 1): { + "documents": [ + [Document(id="First document", content="First document")], + [Document(id="Second document", content="Second document")], + ], + "top_k": None, + }, + ("first_creator", 1): {"create_document": True}, + ("second_creator", 1): {"create_document": True}, + ("third_creator", 1): {"create_document": False}, + }, ), ], ) -@given("a pipeline that has an answer joiner variadic component", target_fixture="pipeline_data") +@given( + "a pipeline that has a variadic component that receives partial inputs in a different order", + target_fixture="pipeline_data", +) +def that_has_a_variadic_component_that_receives_partial_inputs_different_order(): + @component + class ConditionalDocumentCreator: + def __init__(self, content: str): + self._content = content + + @component.output_types(documents=List[Document], noop=None) + def run(self, create_document: bool = False): + if create_document: + return {"documents": [Document(id=self._content, content=self._content)]} + return {"noop": None} + + pipeline = Pipeline(max_runs_per_component=1) + pipeline.add_component("third_creator", ConditionalDocumentCreator(content="Third document")) + pipeline.add_component("first_creator", ConditionalDocumentCreator(content="First document")) + pipeline.add_component("second_creator", ConditionalDocumentCreator(content="Second document")) + pipeline.add_component("documents_joiner", DocumentJoiner()) + + pipeline.connect("first_creator.documents", "documents_joiner.documents") + pipeline.connect("second_creator.documents", "documents_joiner.documents") + pipeline.connect("third_creator.documents", "documents_joiner.documents") + + return ( + pipeline, + [ + PipelineRunData( + inputs={"first_creator": {"create_document": True}, "third_creator": {"create_document": True}}, + expected_outputs={ + "second_creator": {"noop": None}, + "documents_joiner": { + "documents": [ + Document(id="First document", content="First document"), + Document(id="Third document", content="Third document"), + ] + }, + }, + expected_component_calls={ + ("documents_joiner", 1): { + "documents": [ + [Document(id="First document", content="First document")], + [Document(id="Third document", content="Third document")], + ], + "top_k": None, + }, + ("first_creator", 1): {"create_document": True}, + ("second_creator", 1): {"create_document": False}, + ("third_creator", 1): {"create_document": True}, + }, + ), + PipelineRunData( + inputs={"first_creator": {"create_document": True}, "second_creator": {"create_document": True}}, + expected_outputs={ + "third_creator": {"noop": None}, + "documents_joiner": { + "documents": [ + Document(id="First document", content="First document"), + Document(id="Second document", content="Second document"), + ] + }, + }, + expected_component_calls={ + ("documents_joiner", 1): { + "documents": [ + [Document(id="First document", content="First document")], + [Document(id="Second document", content="Second document")], + ], + "top_k": None, + }, + ("first_creator", 1): {"create_document": True}, + ("second_creator", 1): {"create_document": True}, + ("third_creator", 1): {"create_document": False}, + }, + ), + ], + ) + + +@given("a pipeline that has an answer joiner variadic component", target_fixture="pipeline_data") def that_has_an_answer_joiner_variadic_component(): query = "What's Natural Language Processing?" @@ -1847,7 +2703,45 @@ def that_has_an_answer_joiner_variadic_component(): ] } }, - expected_run_order=["answer_builder_1", "answer_builder_2", "answer_joiner"], + expected_component_calls={ + ("answer_builder_1", 1): { + "documents": None, + "meta": None, + "pattern": None, + "query": "What's Natural Language Processing?", + "reference_pattern": None, + "replies": ["This is a test answer"], + }, + ("answer_builder_2", 1): { + "documents": None, + "meta": None, + "pattern": None, + "query": "What's Natural Language Processing?", + "reference_pattern": None, + "replies": ["This is a second test answer"], + }, + ("answer_joiner", 1): { + "answers": [ + [ + GeneratedAnswer( + data="This is a test answer", + query="What's Natural Language Processing?", + documents=[], + meta={}, + ) + ], + [ + GeneratedAnswer( + data="This is a second test answer", + query="What's Natural Language Processing?", + documents=[], + meta={}, + ) + ], + ], + "top_k": None, + }, + }, ) ], ) @@ -1923,7 +2817,35 @@ def run(self, prompt: str): ] } }, - expected_run_order=["builder", "metadata_extractor", "retriever", "document_joiner"], + expected_component_calls={ + ("builder", 1): {"template": None, "template_variables": None}, + ("document_joiner", 1): { + "documents": [ + [ + Document( + id="doc2", + content="some text about investigation and treatment of Alzheimer disease", + meta={"year": 2023, "disease": "Alzheimer", "author": "John Bread"}, + score=3.324112496100923, + ) + ] + ], + "top_k": None, + }, + ("metadata_extractor", 1): {"prompt": '{"disease": "Alzheimer", "year": 2023}'}, + ("retriever", 1): { + "filters": { + "conditions": [ + {"field": "meta.disease", "operator": "==", "value": "Alzheimer"}, + {"field": "meta.year", "operator": "==", "value": 2023}, + ], + "operator": "AND", + }, + "query": "publications 2023 Alzheimer's disease", + "scale_score": None, + "top_k": None, + }, + }, ) ], ) @@ -2058,16 +2980,33 @@ def run(self, prompt: str): expected_outputs={ "answer_builder": {"answers": [GeneratedAnswer(data="42", query=question, documents=[])]} }, - expected_run_order=[ - "prompt_cleaner", - "prompt_builder", - "llm", - "answer_validator", - "prompt_builder", - "llm", - "answer_validator", - "answer_builder", - ], + expected_component_calls={ + ("answer_builder", 1): { + "documents": None, + "meta": None, + "pattern": None, + "query": "What is the answer?", + "reference_pattern": None, + "replies": ["42"], + }, + ("answer_validator", 1): {"replies": ["No answer"]}, + ("answer_validator", 2): {"replies": ["42"]}, + ("llm", 1): {"prompt": "Random template"}, + ("llm", 2): {"prompt": ""}, + ("prompt_builder", 1): { + "invalid_replies": "", + "question": "What is the answer?", + "template": "Random template", + "template_variables": None, + }, + ("prompt_builder", 2): { + "invalid_replies": ["No answer"], + "question": "What is the answer?", + "template": None, + "template_variables": None, + }, + ("prompt_cleaner", 1): {"prompt": "Random template"}, + }, ) ], ) @@ -2154,14 +3093,62 @@ def run(self, documents: List[Document]): ] }, }, - expected_run_order=[ - "comma_splitter", - "noop2", - "document_cleaner", - "noop3", - "conditional_router", - "document_joiner", - ], + expected_component_calls={ + ("comma_splitter", 1): { + "documents": [ + Document( + id="1000", + content="This document has so many, sentences. Like this one, or this one. Or even this other one.", + ) + ] + }, + ("conditional_router", 1): { + "documents": [ + Document( + id="1000", + content="This document has so many, sentences. Like this one, or this one. Or even this other one.", + ) + ] + }, + ("document_cleaner", 1): { + "documents": [ + Document(id="0", content="This document has so many"), + Document(id="1", content=" sentences. Like this one"), + Document(id="2", content=" or this one. Or even this other one."), + ] + }, + ("document_joiner", 1): { + "documents": [ + [ + Document(id="0", content="This document has so many"), + Document(id="1", content=" sentences. Like this one"), + Document(id="2", content=" or this one. Or even this other one."), + ], + [ + Document(id="0", content="This document has so many"), + Document(id="1", content="sentences. Like this one"), + Document(id="2", content="or this one. Or even this other one."), + ], + ], + "top_k": None, + }, + ("noop2", 1): { + "documents": [ + Document( + id="1000", + content="This document has so many, sentences. Like this one, or this one. Or even this other one.", + ) + ] + }, + ("noop3", 1): { + "documents": [ + Document( + id="1000", + content="This document has so many, sentences. Like this one, or this one. Or even this other one.", + ) + ] + }, + }, ), PipelineRunData( inputs={ @@ -2184,15 +3171,109 @@ def run(self, documents: List[Document]): ] } }, - expected_run_order=[ - "comma_splitter", - "noop2", - "document_cleaner", - "noop3", - "conditional_router", - "empty_lines_cleaner", - "document_joiner", - ], + expected_component_calls={ + ("comma_splitter", 1): { + "documents": [ + Document( + id="1000", + content="This document has so many, sentences. Like this one, or this one. Or even this other one.", + ), + Document( + id="1000", + content="This document has so many, sentences. Like this one, or this one. Or even this other one.", + ), + ] + }, + ("conditional_router", 1): { + "documents": [ + Document( + id="1000", + content="This document has so many, sentences. Like this one, or this one. Or even this other one.", + ), + Document( + id="1000", + content="This document has so many, sentences. Like this one, or this one. Or even this other one.", + ), + ] + }, + ("document_cleaner", 1): { + "documents": [ + Document(id="0", content="This document has so many"), + Document(id="1", content=" sentences. Like this one"), + Document(id="2", content=" or this one. Or even this other one."), + Document(id="3", content="This document has so many"), + Document(id="4", content=" sentences. Like this one"), + Document(id="5", content=" or this one. Or even this other one."), + ] + }, + ("document_joiner", 1): { + "documents": [ + [ + Document(id="0", content="This document has so many"), + Document(id="1", content=" sentences. Like this one"), + Document(id="2", content=" or this one. Or even this other one."), + Document(id="3", content="This document has so many"), + Document(id="4", content=" sentences. Like this one"), + Document(id="5", content=" or this one. Or even this other one."), + ], + [ + Document(id="0", content="This document has so many"), + Document(id="1", content="sentences. Like this one"), + Document(id="2", content="or this one. Or even this other one."), + Document(id="3", content="This document has so many"), + Document(id="4", content="sentences. Like this one"), + Document(id="5", content="or this one. Or even this other one."), + ], + [ + Document( + id="1000", + content="This document has so many, sentences. Like this one, or this one. Or even this other one.", + ), + Document( + id="1000", + content="This document has so many, sentences. Like this one, or this one. Or even this other one.", + ), + ], + ], + "top_k": None, + }, + ("empty_lines_cleaner", 1): { + "documents": [ + Document( + id="1000", + content="This document has so many, sentences. Like this one, or this one. Or even this other one.", + ), + Document( + id="1000", + content="This document has so many, sentences. Like this one, or this one. Or even this other one.", + ), + ] + }, + ("noop2", 1): { + "documents": [ + Document( + id="1000", + content="This document has so many, sentences. Like this one, or this one. Or even this other one.", + ), + Document( + id="1000", + content="This document has so many, sentences. Like this one, or this one. Or even this other one.", + ), + ] + }, + ("noop3", 1): { + "documents": [ + Document( + id="1000", + content="This document has so many, sentences. Like this one, or this one. Or even this other one.", + ), + Document( + id="1000", + content="This document has so many, sentences. Like this one, or this one. Or even this other one.", + ), + ] + }, + }, ), ] @@ -2220,7 +3301,1769 @@ def that_has_a_string_variadic_component(): "strings": ["Builder 1: What's Natural Language Processing?", "Builder 2: What's is life?"] } }, - expected_run_order=["prompt_builder_1", "prompt_builder_2", "string_joiner"], + expected_component_calls={ + ("prompt_builder_1", 1): { + "query": "What's Natural Language Processing?", + "template": None, + "template_variables": None, + }, + ("prompt_builder_2", 1): {"query": "What's is life?", "template": None, "template_variables": None}, + ("string_joiner", 1): { + "strings": ["Builder 1: What's Natural Language Processing?", "Builder 2: What's is life?"] + }, + }, + ) + ], + ) + + +@given("a pipeline that is an agent that can use RAG", target_fixture="pipeline_data") +def an_agent_that_can_use_RAG(): + @component + class FixedGenerator: + def __init__(self, replies): + self.replies = replies + self.idx = 0 + + @component.output_types(replies=List[str]) + def run(self, prompt: str): + if self.idx < len(self.replies): + replies = [self.replies[self.idx]] + self.idx += 1 + else: + self.idx = 0 + replies = [self.replies[self.idx]] + self.idx += 1 + + return {"replies": replies} + + @component + class FakeRetriever: + @component.output_types(documents=List[Document]) + def run(self, query: str): + return { + "documents": [ + Document(content="This is a document potentially answering the question.", meta={"access_group": 1}) + ] + } + + agent_prompt_template = """ +Your task is to answer the user's question. +You can use a RAG system to find information. +Use the RAG system until you have sufficient information to answer the question. +To use the RAG system, output "search:" followed by your question. +Once you have an answer, output "answer:" followed by your answer. + +Here is the question: {{query}} + """ + + rag_prompt_template = """ +Answer the question based on the provided documents. +Question: {{ query }} +Documents: +{% for document in documents %} +{{ document.content }} +{% endfor %} + """ + + joiner = BranchJoiner(type_=str) + + agent_llm = FixedGenerator(replies=["search: Can you help me?", "answer: here is my answer"]) + agent_prompt = PromptBuilder(template=agent_prompt_template) + + rag_llm = FixedGenerator(replies=["This is all the information I found!"]) + rag_prompt = PromptBuilder(template=rag_prompt_template) + + retriever = FakeRetriever() + + routes = [ + { + "condition": "{{ 'search:' in replies[0] }}", + "output": "{{ replies[0] }}", + "output_name": "search", + "output_type": str, + }, + { + "condition": "{{ 'answer:' in replies[0] }}", + "output": "{{ replies }}", + "output_name": "answer", + "output_type": List[str], + }, + ] + + router = ConditionalRouter(routes=routes) + + concatenator = OutputAdapter(template="{{current_prompt + '\n' + rag_answer[0]}}", output_type=str) + + answer_builder = AnswerBuilder() + + pp = Pipeline(max_runs_per_component=2) + + pp.add_component("joiner", joiner) + pp.add_component("rag_llm", rag_llm) + pp.add_component("rag_prompt", rag_prompt) + pp.add_component("agent_prompt", agent_prompt) + pp.add_component("agent_llm", agent_llm) + pp.add_component("router", router) + pp.add_component("concatenator", concatenator) + pp.add_component("retriever", retriever) + pp.add_component("answer_builder", answer_builder) + + pp.connect("agent_prompt.prompt", "joiner.value") + pp.connect("joiner.value", "agent_llm.prompt") + pp.connect("agent_llm.replies", "router.replies") + pp.connect("router.search", "retriever.query") + pp.connect("router.answer", "answer_builder.replies") + pp.connect("retriever.documents", "rag_prompt.documents") + pp.connect("rag_prompt.prompt", "rag_llm.prompt") + pp.connect("rag_llm.replies", "concatenator.rag_answer") + pp.connect("joiner.value", "concatenator.current_prompt") + pp.connect("concatenator.output", "joiner.value") + + query = "Does this run reliably?" + + return ( + pp, + [ + PipelineRunData( + inputs={ + "agent_prompt": {"query": query}, + "rag_prompt": {"query": query}, + "answer_builder": {"query": query}, + }, + expected_outputs={ + "answer_builder": { + "answers": [GeneratedAnswer(data="answer: here is my answer", query=query, documents=[])] + } + }, + expected_component_calls={ + ("agent_llm", 1): { + "prompt": "\n" + "Your task is to answer the user's question.\n" + "You can use a RAG system to find information.\n" + "Use the RAG system until you have sufficient " + "information to answer the question.\n" + 'To use the RAG system, output "search:" ' + "followed by your question.\n" + 'Once you have an answer, output "answer:" ' + "followed by your answer.\n" + "\n" + "Here is the question: Does this run reliably?\n" + " " + }, + ("agent_llm", 2): { + "prompt": "\n" + "Your task is to answer the user's question.\n" + "You can use a RAG system to find information.\n" + "Use the RAG system until you have sufficient " + "information to answer the question.\n" + 'To use the RAG system, output "search:" ' + "followed by your question.\n" + 'Once you have an answer, output "answer:" ' + "followed by your answer.\n" + "\n" + "Here is the question: Does this run reliably?\n" + " \n" + "This is all the information I found!" + }, + ("agent_prompt", 1): { + "query": "Does this run reliably?", + "template": None, + "template_variables": None, + }, + ("answer_builder", 1): { + "documents": None, + "meta": None, + "pattern": None, + "query": "Does this run reliably?", + "reference_pattern": None, + "replies": ["answer: here is my answer"], + }, + ("concatenator", 1): { + "current_prompt": "\n" + "Your task is to answer the user's " + "question.\n" + "You can use a RAG system to find " + "information.\n" + "Use the RAG system until you have " + "sufficient information to answer the " + "question.\n" + "To use the RAG system, output " + '"search:" followed by your ' + "question.\n" + "Once you have an answer, output " + '"answer:" followed by your answer.\n' + "\n" + "Here is the question: Does this run " + "reliably?\n" + " ", + "rag_answer": ["This is all the information I found!"], + }, + ("joiner", 1): { + "value": [ + "\n" + "Your task is to answer the user's question.\n" + "You can use a RAG system to find information.\n" + "Use the RAG system until you have sufficient " + "information to answer the question.\n" + 'To use the RAG system, output "search:" followed ' + "by your question.\n" + 'Once you have an answer, output "answer:" followed ' + "by your answer.\n" + "\n" + "Here is the question: Does this run reliably?\n" + " " + ] + }, + ("joiner", 2): { + "value": [ + "\n" + "Your task is to answer the user's question.\n" + "You can use a RAG system to find information.\n" + "Use the RAG system until you have sufficient " + "information to answer the question.\n" + 'To use the RAG system, output "search:" followed ' + "by your question.\n" + 'Once you have an answer, output "answer:" followed ' + "by your answer.\n" + "\n" + "Here is the question: Does this run reliably?\n" + " \n" + "This is all the information I found!" + ] + }, + ("rag_llm", 1): { + "prompt": "\n" + "Answer the question based on the provided " + "documents.\n" + "Question: Does this run reliably?\n" + "Documents:\n" + "\n" + "This is a document potentially answering the " + "question.\n" + "\n" + " " + }, + ("rag_prompt", 1): { + "documents": [ + Document( + id="969664d0cf76e52b0ffb719d00d3e5a6b1c90bb29e56f6107dfd87bf2f5388ed", + content="This is a document potentially answering the question.", + meta={"access_group": 1}, + ) + ], + "query": "Does this run reliably?", + "template": None, + "template_variables": None, + }, + ("retriever", 1): {"query": "search: Can you help me?"}, + ("router", 1): {"replies": ["search: Can you help me?"]}, + ("router", 2): {"replies": ["answer: here is my answer"]}, + }, + ) + ], + ) + + +@given("a pipeline that has a feedback loop", target_fixture="pipeline_data") +def has_feedback_loop(): + @component + class FixedGenerator: + def __init__(self, replies): + self.replies = replies + self.idx = 0 + + @component.output_types(replies=List[str]) + def run(self, prompt: str): + if self.idx < len(self.replies): + replies = [self.replies[self.idx]] + self.idx += 1 + else: + self.idx = 0 + replies = [self.replies[self.idx]] + self.idx += 1 + + return {"replies": replies} + + code_prompt_template = """ +Generate code to solve the task: {{ task }} + +{% if feedback %} +Here is your initial attempt and some feedback: +{{ feedback }} +{% endif %} + """ + + feedback_prompt_template = """ +Check if this code is valid and can run: {{ code[0] }} +Return "PASS" if it passes and "FAIL" if it fails. +Provide additional feedback on why it fails. + """ + + code_llm = FixedGenerator(replies=["invalid code", "valid code"]) + code_prompt = PromptBuilder(template=code_prompt_template) + + feedback_llm = FixedGenerator(replies=["FAIL", "PASS"]) + feedback_prompt = PromptBuilder(template=feedback_prompt_template) + + routes = [ + { + "condition": "{{ 'FAIL' in replies[0] }}", + "output": "{{ replies[0] }}", + "output_name": "fail", + "output_type": str, + }, + { + "condition": "{{ 'PASS' in replies[0] }}", + "output": "{{ code }}", + "output_name": "pass", + "output_type": List[str], + }, + ] + + router = ConditionalRouter(routes=routes) + + concatenator = OutputAdapter(template="{{current_prompt[0] + '\n' + feedback[0]}}", output_type=str) + + answer_builder = AnswerBuilder() + + pp = Pipeline(max_runs_per_component=100) + + pp.add_component("code_llm", code_llm) + pp.add_component("code_prompt", code_prompt) + pp.add_component("feedback_prompt", feedback_prompt) + pp.add_component("feedback_llm", feedback_llm) + pp.add_component("router", router) + pp.add_component("concatenator", concatenator) + pp.add_component("answer_builder", answer_builder) + + pp.connect("code_prompt.prompt", "code_llm.prompt") + pp.connect("code_llm.replies", "feedback_prompt.code") + pp.connect("feedback_llm.replies", "router.replies") + pp.connect("router.fail", "concatenator.feedback") + pp.connect("router.pass", "answer_builder.replies") + pp.connect("code_llm.replies", "router.code") + pp.connect("feedback_prompt.prompt", "feedback_llm.prompt") + pp.connect("code_llm.replies", "concatenator.current_prompt") + pp.connect("concatenator.output", "code_prompt.feedback") + + task = "Generate code to generate christmas ascii-art" + + return ( + pp, + [ + PipelineRunData( + inputs={"code_prompt": {"task": task}, "answer_builder": {"query": task}}, + expected_outputs={ + "answer_builder": {"answers": [GeneratedAnswer(data="valid code", query=task, documents=[])]} + }, + expected_component_calls={ + ("answer_builder", 1): { + "documents": None, + "meta": None, + "pattern": None, + "query": "Generate code to generate christmas ascii-art", + "reference_pattern": None, + "replies": ["valid code"], + }, + ("code_llm", 1): { + "prompt": "\n" + "Generate code to solve the task: Generate code " + "to generate christmas ascii-art\n" + "\n" + "\n" + " " + }, + ("code_llm", 2): { + "prompt": "\n" + "Generate code to solve the task: Generate code " + "to generate christmas ascii-art\n" + "\n" + "\n" + "Here is your initial attempt and some feedback:\n" + "invalid code\n" + "F\n" + "\n" + " " + }, + ("code_prompt", 1): { + "feedback": "", + "task": "Generate code to generate christmas ascii-art", + "template": None, + "template_variables": None, + }, + ("code_prompt", 2): { + "feedback": "invalid code\nF", + "task": "Generate code to generate christmas ascii-art", + "template": None, + "template_variables": None, + }, + ("concatenator", 1): {"current_prompt": ["invalid code"], "feedback": "FAIL"}, + ("feedback_llm", 1): { + "prompt": "\n" + "Check if this code is valid and can run: " + "invalid code\n" + 'Return "PASS" if it passes and "FAIL" if it ' + "fails.\n" + "Provide additional feedback on why it " + "fails.\n" + " " + }, + ("feedback_llm", 2): { + "prompt": "\n" + "Check if this code is valid and can run: " + "valid code\n" + 'Return "PASS" if it passes and "FAIL" if it ' + "fails.\n" + "Provide additional feedback on why it " + "fails.\n" + " " + }, + ("feedback_prompt", 1): {"code": ["invalid code"], "template": None, "template_variables": None}, + ("feedback_prompt", 2): {"code": ["valid code"], "template": None, "template_variables": None}, + ("router", 1): {"code": ["invalid code"], "replies": ["FAIL"]}, + ("router", 2): {"code": ["valid code"], "replies": ["PASS"]}, + }, + ) + ], + ) + + +@given("a pipeline created in a non-standard order that has a loop", target_fixture="pipeline_data") +def has_non_standard_order_loop(): + @component + class FixedGenerator: + def __init__(self, replies): + self.replies = replies + self.idx = 0 + + @component.output_types(replies=List[str]) + def run(self, prompt: str): + if self.idx < len(self.replies): + replies = [self.replies[self.idx]] + self.idx += 1 + else: + self.idx = 0 + replies = [self.replies[self.idx]] + self.idx += 1 + + return {"replies": replies} + + code_prompt_template = """ +Generate code to solve the task: {{ task }} + +{% if feedback %} +Here is your initial attempt and some feedback: +{{ feedback }} +{% endif %} + """ + + feedback_prompt_template = """ +Check if this code is valid and can run: {{ code[0] }} +Return "PASS" if it passes and "FAIL" if it fails. +Provide additional feedback on why it fails. + """ + + code_llm = FixedGenerator(replies=["invalid code", "valid code"]) + code_prompt = PromptBuilder(template=code_prompt_template) + + feedback_llm = FixedGenerator(replies=["FAIL", "PASS"]) + feedback_prompt = PromptBuilder(template=feedback_prompt_template) + + routes = [ + { + "condition": "{{ 'FAIL' in replies[0] }}", + "output": "{{ replies[0] }}", + "output_name": "fail", + "output_type": str, + }, + { + "condition": "{{ 'PASS' in replies[0] }}", + "output": "{{ code }}", + "output_name": "pass", + "output_type": List[str], + }, + ] + + router = ConditionalRouter(routes=routes) + + concatenator = OutputAdapter(template="{{current_prompt[0] + '\n' + feedback[0]}}", output_type=str) + + answer_builder = AnswerBuilder() + + pp = Pipeline(max_runs_per_component=100) + + pp.add_component("concatenator", concatenator) + pp.add_component("code_llm", code_llm) + pp.add_component("code_prompt", code_prompt) + pp.add_component("feedback_prompt", feedback_prompt) + pp.add_component("feedback_llm", feedback_llm) + pp.add_component("router", router) + + pp.add_component("answer_builder", answer_builder) + + pp.connect("concatenator.output", "code_prompt.feedback") + pp.connect("code_prompt.prompt", "code_llm.prompt") + pp.connect("code_llm.replies", "feedback_prompt.code") + pp.connect("feedback_llm.replies", "router.replies") + pp.connect("router.fail", "concatenator.feedback") + pp.connect("feedback_prompt.prompt", "feedback_llm.prompt") + pp.connect("router.pass", "answer_builder.replies") + pp.connect("code_llm.replies", "router.code") + pp.connect("code_llm.replies", "concatenator.current_prompt") + + task = "Generate code to generate christmas ascii-art" + + return ( + pp, + [ + PipelineRunData( + inputs={"code_prompt": {"task": task}, "answer_builder": {"query": task}}, + expected_outputs={ + "answer_builder": {"answers": [GeneratedAnswer(data="valid code", query=task, documents=[])]} + }, + expected_component_calls={ + ("answer_builder", 1): { + "documents": None, + "meta": None, + "pattern": None, + "query": "Generate code to generate christmas ascii-art", + "reference_pattern": None, + "replies": ["valid code"], + }, + ("code_llm", 1): { + "prompt": "\n" + "Generate code to solve the task: Generate code " + "to generate christmas ascii-art\n" + "\n" + "\n" + " " + }, + ("code_llm", 2): { + "prompt": "\n" + "Generate code to solve the task: Generate code " + "to generate christmas ascii-art\n" + "\n" + "\n" + "Here is your initial attempt and some feedback:\n" + "invalid code\n" + "F\n" + "\n" + " " + }, + ("code_prompt", 1): { + "feedback": "", + "task": "Generate code to generate christmas ascii-art", + "template": None, + "template_variables": None, + }, + ("code_prompt", 2): { + "feedback": "invalid code\nF", + "task": "Generate code to generate christmas ascii-art", + "template": None, + "template_variables": None, + }, + ("concatenator", 1): {"current_prompt": ["invalid code"], "feedback": "FAIL"}, + ("feedback_llm", 1): { + "prompt": "\n" + "Check if this code is valid and can run: " + "invalid code\n" + 'Return "PASS" if it passes and "FAIL" if it ' + "fails.\n" + "Provide additional feedback on why it " + "fails.\n" + " " + }, + ("feedback_llm", 2): { + "prompt": "\n" + "Check if this code is valid and can run: " + "valid code\n" + 'Return "PASS" if it passes and "FAIL" if it ' + "fails.\n" + "Provide additional feedback on why it " + "fails.\n" + " " + }, + ("feedback_prompt", 1): {"code": ["invalid code"], "template": None, "template_variables": None}, + ("feedback_prompt", 2): {"code": ["valid code"], "template": None, "template_variables": None}, + ("router", 1): {"code": ["invalid code"], "replies": ["FAIL"]}, + ("router", 2): {"code": ["valid code"], "replies": ["PASS"]}, + }, + ) + ], + ) + + +@given("a pipeline that has an agent with a feedback cycle", target_fixture="pipeline_data") +def agent_with_feedback_cycle(): + @component + class FixedGenerator: + def __init__(self, replies): + self.replies = replies + self.idx = 0 + + @component.output_types(replies=List[str]) + def run(self, prompt: str): + if self.idx < len(self.replies): + replies = [self.replies[self.idx]] + self.idx += 1 + else: + self.idx = 0 + replies = [self.replies[self.idx]] + self.idx += 1 + + return {"replies": replies} + + @component + class FakeFileEditor: + @component.output_types(files=str) + def run(self, replies: List[str]): + return {"files": "This is the edited file content."} + + code_prompt_template = """ +Generate code to solve the task: {{ task }} + +You can edit files by returning: +Edit: file_name + +Once you solved the task, respond with: +Task finished! + +{% if feedback %} +Here is your initial attempt and some feedback: +{{ feedback }} +{% endif %} + """ + + feedback_prompt_template = """ +{% if task_finished %} +Check if this code is valid and can run: {{ code }} +Return "PASS" if it passes and "FAIL" if it fails. +Provide additional feedback on why it fails. +{% endif %} + """ + + code_llm = FixedGenerator(replies=["Edit: file_1.py", "Edit: file_2.py", "Edit: file_3.py", "Task finished!"]) + code_prompt = PromptBuilder(template=code_prompt_template) + file_editor = FakeFileEditor() + + feedback_llm = FixedGenerator(replies=["FAIL", "PASS"]) + feedback_prompt = PromptBuilder(template=feedback_prompt_template, required_variables=["task_finished"]) + + routes = [ + { + "condition": "{{ 'FAIL' in replies[0] }}", + "output": "{{ current_prompt + '\n' + replies[0] }}", + "output_name": "fail", + "output_type": str, + }, + { + "condition": "{{ 'PASS' in replies[0] }}", + "output": "{{ replies }}", + "output_name": "pass", + "output_type": List[str], + }, + ] + feedback_router = ConditionalRouter(routes=routes) + + tool_use_routes = [ + { + "condition": "{{ 'Edit:' in replies[0] }}", + "output": "{{ replies }}", + "output_name": "edit", + "output_type": List[str], + }, + { + "condition": "{{ 'Task finished!' in replies[0] }}", + "output": "{{ replies }}", + "output_name": "done", + "output_type": List[str], + }, + ] + tool_use_router = ConditionalRouter(routes=tool_use_routes) + + joiner = BranchJoiner(type_=str) + agent_concatenator = OutputAdapter(template="{{current_prompt + '\n' + files}}", output_type=str) + + pp = Pipeline(max_runs_per_component=100) + + pp.add_component("code_prompt", code_prompt) + pp.add_component("joiner", joiner) + pp.add_component("code_llm", code_llm) + pp.add_component("tool_use_router", tool_use_router) + pp.add_component("file_editor", file_editor) + pp.add_component("agent_concatenator", agent_concatenator) + pp.add_component("feedback_prompt", feedback_prompt) + pp.add_component("feedback_llm", feedback_llm) + pp.add_component("feedback_router", feedback_router) + + # Main Agent + pp.connect("code_prompt.prompt", "joiner.value") + pp.connect("joiner.value", "code_llm.prompt") + pp.connect("code_llm.replies", "tool_use_router.replies") + pp.connect("tool_use_router.edit", "file_editor.replies") + pp.connect("file_editor.files", "agent_concatenator.files") + pp.connect("joiner.value", "agent_concatenator.current_prompt") + pp.connect("agent_concatenator.output", "joiner.value") + + # Feedback Cycle + pp.connect("tool_use_router.done", "feedback_prompt.task_finished") + pp.connect("agent_concatenator.output", "feedback_prompt.code") + pp.connect("feedback_prompt.prompt", "feedback_llm.prompt") + pp.connect("feedback_llm.replies", "feedback_router.replies") + pp.connect("agent_concatenator.output", "feedback_router.current_prompt") + pp.connect("feedback_router.fail", "joiner.value") + + task = "Generate code to generate christmas ascii-art" + + return ( + pp, + [ + PipelineRunData( + inputs={"code_prompt": {"task": task}}, + expected_outputs={"feedback_router": {"pass": ["PASS"]}}, + expected_component_calls={ + ("agent_concatenator", 1): { + "current_prompt": "\n" + "Generate code to solve the " + "task: Generate code to " + "generate christmas ascii-art\n" + "\n" + "You can edit files by " + "returning:\n" + "Edit: file_name\n" + "\n" + "Once you solved the task, " + "respond with:\n" + "Task finished!\n" + "\n" + "\n" + " ", + "files": "This is the edited file content.", + }, + ("agent_concatenator", 2): { + "current_prompt": "\n" + "Generate code to solve the " + "task: Generate code to " + "generate christmas ascii-art\n" + "\n" + "You can edit files by " + "returning:\n" + "Edit: file_name\n" + "\n" + "Once you solved the task, " + "respond with:\n" + "Task finished!\n" + "\n" + "\n" + " \n" + "This is the edited file " + "content.", + "files": "This is the edited file content.", + }, + ("agent_concatenator", 3): { + "current_prompt": "\n" + "Generate code to solve the " + "task: Generate code to " + "generate christmas ascii-art\n" + "\n" + "You can edit files by " + "returning:\n" + "Edit: file_name\n" + "\n" + "Once you solved the task, " + "respond with:\n" + "Task finished!\n" + "\n" + "\n" + " \n" + "This is the edited file " + "content.\n" + "This is the edited file " + "content.", + "files": "This is the edited file content.", + }, + ("agent_concatenator", 4): { + "current_prompt": "\n" + "Generate code to solve the " + "task: Generate code to " + "generate christmas ascii-art\n" + "\n" + "You can edit files by " + "returning:\n" + "Edit: file_name\n" + "\n" + "Once you solved the task, " + "respond with:\n" + "Task finished!\n" + "\n" + "\n" + " \n" + "This is the edited file " + "content.\n" + "This is the edited file " + "content.\n" + "This is the edited file " + "content.\n" + "FAIL", + "files": "This is the edited file content.", + }, + ("agent_concatenator", 5): { + "current_prompt": "\n" + "Generate code to solve the " + "task: Generate code to " + "generate christmas ascii-art\n" + "\n" + "You can edit files by " + "returning:\n" + "Edit: file_name\n" + "\n" + "Once you solved the task, " + "respond with:\n" + "Task finished!\n" + "\n" + "\n" + " \n" + "This is the edited file " + "content.\n" + "This is the edited file " + "content.\n" + "This is the edited file " + "content.\n" + "FAIL\n" + "This is the edited file " + "content.", + "files": "This is the edited file content.", + }, + ("agent_concatenator", 6): { + "current_prompt": "\n" + "Generate code to solve the " + "task: Generate code to " + "generate christmas ascii-art\n" + "\n" + "You can edit files by " + "returning:\n" + "Edit: file_name\n" + "\n" + "Once you solved the task, " + "respond with:\n" + "Task finished!\n" + "\n" + "\n" + " \n" + "This is the edited file " + "content.\n" + "This is the edited file " + "content.\n" + "This is the edited file " + "content.\n" + "FAIL\n" + "This is the edited file " + "content.\n" + "This is the edited file " + "content.", + "files": "This is the edited file content.", + }, + ("code_llm", 1): { + "prompt": "\n" + "Generate code to solve the task: Generate code " + "to generate christmas ascii-art\n" + "\n" + "You can edit files by returning:\n" + "Edit: file_name\n" + "\n" + "Once you solved the task, respond with:\n" + "Task finished!\n" + "\n" + "\n" + " " + }, + ("code_llm", 2): { + "prompt": "\n" + "Generate code to solve the task: Generate code " + "to generate christmas ascii-art\n" + "\n" + "You can edit files by returning:\n" + "Edit: file_name\n" + "\n" + "Once you solved the task, respond with:\n" + "Task finished!\n" + "\n" + "\n" + " \n" + "This is the edited file content." + }, + ("code_llm", 3): { + "prompt": "\n" + "Generate code to solve the task: Generate code " + "to generate christmas ascii-art\n" + "\n" + "You can edit files by returning:\n" + "Edit: file_name\n" + "\n" + "Once you solved the task, respond with:\n" + "Task finished!\n" + "\n" + "\n" + " \n" + "This is the edited file content.\n" + "This is the edited file content." + }, + ("code_llm", 4): { + "prompt": "\n" + "Generate code to solve the task: Generate code " + "to generate christmas ascii-art\n" + "\n" + "You can edit files by returning:\n" + "Edit: file_name\n" + "\n" + "Once you solved the task, respond with:\n" + "Task finished!\n" + "\n" + "\n" + " \n" + "This is the edited file content.\n" + "This is the edited file content.\n" + "This is the edited file content." + }, + ("code_llm", 5): { + "prompt": "\n" + "Generate code to solve the task: Generate code " + "to generate christmas ascii-art\n" + "\n" + "You can edit files by returning:\n" + "Edit: file_name\n" + "\n" + "Once you solved the task, respond with:\n" + "Task finished!\n" + "\n" + "\n" + " \n" + "This is the edited file content.\n" + "This is the edited file content.\n" + "This is the edited file content.\n" + "FAIL" + }, + ("code_llm", 6): { + "prompt": "\n" + "Generate code to solve the task: Generate code " + "to generate christmas ascii-art\n" + "\n" + "You can edit files by returning:\n" + "Edit: file_name\n" + "\n" + "Once you solved the task, respond with:\n" + "Task finished!\n" + "\n" + "\n" + " \n" + "This is the edited file content.\n" + "This is the edited file content.\n" + "This is the edited file content.\n" + "FAIL\n" + "This is the edited file content." + }, + ("code_llm", 7): { + "prompt": "\n" + "Generate code to solve the task: Generate code " + "to generate christmas ascii-art\n" + "\n" + "You can edit files by returning:\n" + "Edit: file_name\n" + "\n" + "Once you solved the task, respond with:\n" + "Task finished!\n" + "\n" + "\n" + " \n" + "This is the edited file content.\n" + "This is the edited file content.\n" + "This is the edited file content.\n" + "FAIL\n" + "This is the edited file content.\n" + "This is the edited file content." + }, + ("code_llm", 8): { + "prompt": "\n" + "Generate code to solve the task: Generate code " + "to generate christmas ascii-art\n" + "\n" + "You can edit files by returning:\n" + "Edit: file_name\n" + "\n" + "Once you solved the task, respond with:\n" + "Task finished!\n" + "\n" + "\n" + " \n" + "This is the edited file content.\n" + "This is the edited file content.\n" + "This is the edited file content.\n" + "FAIL\n" + "This is the edited file content.\n" + "This is the edited file content.\n" + "This is the edited file content." + }, + ("code_prompt", 1): { + "feedback": "", + "task": "Generate code to generate christmas ascii-art", + "template": None, + "template_variables": None, + }, + ("feedback_llm", 1): { + "prompt": "\n" + "\n" + "Check if this code is valid and can run: \n" + "Generate code to solve the task: Generate " + "code to generate christmas ascii-art\n" + "\n" + "You can edit files by returning:\n" + "Edit: file_name\n" + "\n" + "Once you solved the task, respond with:\n" + "Task finished!\n" + "\n" + "\n" + " \n" + "This is the edited file content.\n" + "This is the edited file content.\n" + "This is the edited file content.\n" + 'Return "PASS" if it passes and "FAIL" if it ' + "fails.\n" + "Provide additional feedback on why it " + "fails.\n" + "\n" + " " + }, + ("feedback_llm", 2): { + "prompt": "\n" + "\n" + "Check if this code is valid and can run: \n" + "Generate code to solve the task: Generate " + "code to generate christmas ascii-art\n" + "\n" + "You can edit files by returning:\n" + "Edit: file_name\n" + "\n" + "Once you solved the task, respond with:\n" + "Task finished!\n" + "\n" + "\n" + " \n" + "This is the edited file content.\n" + "This is the edited file content.\n" + "This is the edited file content.\n" + "FAIL\n" + "This is the edited file content.\n" + "This is the edited file content.\n" + "This is the edited file content.\n" + 'Return "PASS" if it passes and "FAIL" if it ' + "fails.\n" + "Provide additional feedback on why it " + "fails.\n" + "\n" + " " + }, + ("feedback_prompt", 1): { + "code": "\n" + "Generate code to solve the task: Generate " + "code to generate christmas ascii-art\n" + "\n" + "You can edit files by returning:\n" + "Edit: file_name\n" + "\n" + "Once you solved the task, respond with:\n" + "Task finished!\n" + "\n" + "\n" + " \n" + "This is the edited file content.\n" + "This is the edited file content.\n" + "This is the edited file content.", + "task_finished": ["Task finished!"], + "template": None, + "template_variables": None, + }, + ("feedback_prompt", 2): { + "code": "\n" + "Generate code to solve the task: Generate " + "code to generate christmas ascii-art\n" + "\n" + "You can edit files by returning:\n" + "Edit: file_name\n" + "\n" + "Once you solved the task, respond with:\n" + "Task finished!\n" + "\n" + "\n" + " \n" + "This is the edited file content.\n" + "This is the edited file content.\n" + "This is the edited file content.\n" + "FAIL\n" + "This is the edited file content.\n" + "This is the edited file content.\n" + "This is the edited file content.", + "task_finished": ["Task finished!"], + "template": None, + "template_variables": None, + }, + ("feedback_router", 1): { + "current_prompt": "\n" + "Generate code to solve the task: " + "Generate code to generate " + "christmas ascii-art\n" + "\n" + "You can edit files by returning:\n" + "Edit: file_name\n" + "\n" + "Once you solved the task, respond " + "with:\n" + "Task finished!\n" + "\n" + "\n" + " \n" + "This is the edited file content.\n" + "This is the edited file content.\n" + "This is the edited file content.", + "replies": ["FAIL"], + }, + ("feedback_router", 2): { + "current_prompt": "\n" + "Generate code to solve the task: " + "Generate code to generate " + "christmas ascii-art\n" + "\n" + "You can edit files by returning:\n" + "Edit: file_name\n" + "\n" + "Once you solved the task, respond " + "with:\n" + "Task finished!\n" + "\n" + "\n" + " \n" + "This is the edited file content.\n" + "This is the edited file content.\n" + "This is the edited file content.\n" + "FAIL\n" + "This is the edited file content.\n" + "This is the edited file content.\n" + "This is the edited file content.", + "replies": ["PASS"], + }, + ("file_editor", 1): {"replies": ["Edit: file_1.py"]}, + ("file_editor", 2): {"replies": ["Edit: file_2.py"]}, + ("file_editor", 3): {"replies": ["Edit: file_3.py"]}, + ("file_editor", 4): {"replies": ["Edit: file_1.py"]}, + ("file_editor", 5): {"replies": ["Edit: file_2.py"]}, + ("file_editor", 6): {"replies": ["Edit: file_3.py"]}, + ("joiner", 1): { + "value": [ + "\n" + "Generate code to solve the task: Generate code to " + "generate christmas ascii-art\n" + "\n" + "You can edit files by returning:\n" + "Edit: file_name\n" + "\n" + "Once you solved the task, respond with:\n" + "Task finished!\n" + "\n" + "\n" + " " + ] + }, + ("joiner", 2): { + "value": [ + "\n" + "Generate code to solve the task: Generate code to " + "generate christmas ascii-art\n" + "\n" + "You can edit files by returning:\n" + "Edit: file_name\n" + "\n" + "Once you solved the task, respond with:\n" + "Task finished!\n" + "\n" + "\n" + " \n" + "This is the edited file content." + ] + }, + ("joiner", 3): { + "value": [ + "\n" + "Generate code to solve the task: Generate code to " + "generate christmas ascii-art\n" + "\n" + "You can edit files by returning:\n" + "Edit: file_name\n" + "\n" + "Once you solved the task, respond with:\n" + "Task finished!\n" + "\n" + "\n" + " \n" + "This is the edited file content.\n" + "This is the edited file content." + ] + }, + ("joiner", 4): { + "value": [ + "\n" + "Generate code to solve the task: Generate code to " + "generate christmas ascii-art\n" + "\n" + "You can edit files by returning:\n" + "Edit: file_name\n" + "\n" + "Once you solved the task, respond with:\n" + "Task finished!\n" + "\n" + "\n" + " \n" + "This is the edited file content.\n" + "This is the edited file content.\n" + "This is the edited file content." + ] + }, + ("joiner", 5): { + "value": [ + "\n" + "Generate code to solve the task: Generate code to " + "generate christmas ascii-art\n" + "\n" + "You can edit files by returning:\n" + "Edit: file_name\n" + "\n" + "Once you solved the task, respond with:\n" + "Task finished!\n" + "\n" + "\n" + " \n" + "This is the edited file content.\n" + "This is the edited file content.\n" + "This is the edited file content.\n" + "FAIL" + ] + }, + ("joiner", 6): { + "value": [ + "\n" + "Generate code to solve the task: Generate code to " + "generate christmas ascii-art\n" + "\n" + "You can edit files by returning:\n" + "Edit: file_name\n" + "\n" + "Once you solved the task, respond with:\n" + "Task finished!\n" + "\n" + "\n" + " \n" + "This is the edited file content.\n" + "This is the edited file content.\n" + "This is the edited file content.\n" + "FAIL\n" + "This is the edited file content." + ] + }, + ("joiner", 7): { + "value": [ + "\n" + "Generate code to solve the task: Generate code to " + "generate christmas ascii-art\n" + "\n" + "You can edit files by returning:\n" + "Edit: file_name\n" + "\n" + "Once you solved the task, respond with:\n" + "Task finished!\n" + "\n" + "\n" + " \n" + "This is the edited file content.\n" + "This is the edited file content.\n" + "This is the edited file content.\n" + "FAIL\n" + "This is the edited file content.\n" + "This is the edited file content." + ] + }, + ("joiner", 8): { + "value": [ + "\n" + "Generate code to solve the task: Generate code to " + "generate christmas ascii-art\n" + "\n" + "You can edit files by returning:\n" + "Edit: file_name\n" + "\n" + "Once you solved the task, respond with:\n" + "Task finished!\n" + "\n" + "\n" + " \n" + "This is the edited file content.\n" + "This is the edited file content.\n" + "This is the edited file content.\n" + "FAIL\n" + "This is the edited file content.\n" + "This is the edited file content.\n" + "This is the edited file content." + ] + }, + ("tool_use_router", 1): {"replies": ["Edit: file_1.py"]}, + ("tool_use_router", 2): {"replies": ["Edit: file_2.py"]}, + ("tool_use_router", 3): {"replies": ["Edit: file_3.py"]}, + ("tool_use_router", 4): {"replies": ["Task finished!"]}, + ("tool_use_router", 5): {"replies": ["Edit: file_1.py"]}, + ("tool_use_router", 6): {"replies": ["Edit: file_2.py"]}, + ("tool_use_router", 7): {"replies": ["Edit: file_3.py"]}, + ("tool_use_router", 8): {"replies": ["Task finished!"]}, + }, + ) + ], + ) + + +@given("a pipeline that passes outputs that are consumed in cycle to outside the cycle", target_fixture="pipeline_data") +def passes_outputs_outside_cycle(): + @component + class FixedGenerator: + def __init__(self, replies): + self.replies = replies + self.idx = 0 + + @component.output_types(replies=List[str]) + def run(self, prompt: str): + if self.idx < len(self.replies): + replies = [self.replies[self.idx]] + self.idx += 1 + else: + self.idx = 0 + replies = [self.replies[self.idx]] + self.idx += 1 + + return {"replies": replies} + + @component + class AnswerBuilderWithPrompt: + @component.output_types(answers=List[GeneratedAnswer]) + def run(self, replies: List[str], query: str, prompt: Optional[str] = None) -> Dict[str, Any]: + answer = GeneratedAnswer(data=replies[0], query=query, documents=[]) + + if prompt is not None: + answer.meta["prompt"] = prompt + + return {"answers": [answer]} + + code_prompt_template = "{{task}}" + + feedback_prompt_template = """ +Check if this code is valid and can run: {{ code[0] }} +Return "PASS" if it passes and "FAIL" if it fails. +Provide additional feedback on why it fails. + """ + + valid_response = """ +def generate_santa_sleigh(): + ''' + Returns ASCII art of Santa Claus on his sleigh with Rudolph leading the way. + ''' + # implementation goes here. + return art + """ + + code_llm = FixedGenerator(replies=["invalid code", "invalid code", valid_response]) + code_prompt = PromptBuilder(template=code_prompt_template) + + feedback_llm = FixedGenerator(replies=["FAIL", "FAIL, come on, try again.", "PASS"]) + feedback_prompt = PromptBuilder(template=feedback_prompt_template) + + routes = [ + { + "condition": "{{ 'FAIL' in replies[0] }}", + "output": "{{ replies[0] }}", + "output_name": "fail", + "output_type": str, + }, + { + "condition": "{{ 'PASS' in replies[0] }}", + "output": "{{ code }}", + "output_name": "pass", + "output_type": List[str], + }, + ] + + router = ConditionalRouter(routes=routes) + joiner = BranchJoiner(type_=str) + concatenator = OutputAdapter( + template="{{code_prompt + '\n' + generated_code[0] + '\n' + feedback}}", output_type=str + ) + + answer_builder = AnswerBuilderWithPrompt() + + pp = Pipeline(max_runs_per_component=100) + + pp.add_component("concatenator", concatenator) + pp.add_component("code_llm", code_llm) + pp.add_component("code_prompt", code_prompt) + pp.add_component("feedback_prompt", feedback_prompt) + pp.add_component("feedback_llm", feedback_llm) + pp.add_component("router", router) + pp.add_component("joiner", joiner) + + pp.add_component("answer_builder", answer_builder) + + pp.connect("concatenator.output", "joiner.value") + pp.connect("joiner.value", "code_prompt.task") + pp.connect("code_prompt.prompt", "code_llm.prompt") + pp.connect("code_prompt.prompt", "concatenator.code_prompt") + pp.connect("code_llm.replies", "feedback_prompt.code") + pp.connect("feedback_llm.replies", "router.replies") + pp.connect("router.fail", "concatenator.feedback") + pp.connect("feedback_prompt.prompt", "feedback_llm.prompt") + pp.connect("router.pass", "answer_builder.replies") + pp.connect("code_llm.replies", "router.code") + pp.connect("code_llm.replies", "concatenator.generated_code") + pp.connect("concatenator.output", "answer_builder.prompt") + + task = "Generate code to generate christmas ascii-art" + + expected_prompt = """Generate code to generate christmas ascii-art +invalid code +FAIL +invalid code +FAIL, come on, try again.""" + return ( + pp, + [ + PipelineRunData( + inputs={"joiner": {"value": task}, "answer_builder": {"query": task}}, + expected_outputs={ + "answer_builder": { + "answers": [ + GeneratedAnswer( + data=valid_response, query=task, documents=[], meta={"prompt": expected_prompt} + ) + ] + } + }, + expected_component_calls={ + ("answer_builder", 1): { + "prompt": "Generate code to generate christmas " + "ascii-art\n" + "invalid code\n" + "FAIL\n" + "invalid code\n" + "FAIL, come on, try again.", + "query": "Generate code to generate christmas ascii-art", + "replies": [ + "\n" + "def generate_santa_sleigh():\n" + " '''\n" + " Returns ASCII art of Santa Claus on " + "his sleigh with Rudolph leading the " + "way.\n" + " '''\n" + " # implementation goes here.\n" + " return art\n" + " " + ], + }, + ("code_llm", 1): {"prompt": "Generate code to generate christmas ascii-art"}, + ("code_llm", 2): {"prompt": "Generate code to generate christmas ascii-art\ninvalid code\nFAIL"}, + ("code_llm", 3): { + "prompt": "Generate code to generate christmas ascii-art\n" + "invalid code\n" + "FAIL\n" + "invalid code\n" + "FAIL, come on, try again." + }, + ("code_prompt", 1): { + "task": "Generate code to generate christmas ascii-art", + "template": None, + "template_variables": None, + }, + ("code_prompt", 2): { + "task": "Generate code to generate christmas ascii-art\ninvalid code\nFAIL", + "template": None, + "template_variables": None, + }, + ("code_prompt", 3): { + "task": "Generate code to generate christmas ascii-art\n" + "invalid code\n" + "FAIL\n" + "invalid code\n" + "FAIL, come on, try again.", + "template": None, + "template_variables": None, + }, + ("concatenator", 1): { + "code_prompt": "Generate code to generate christmas ascii-art", + "feedback": "FAIL", + "generated_code": ["invalid code"], + }, + ("concatenator", 2): { + "code_prompt": "Generate code to generate christmas ascii-art\ninvalid code\nFAIL", + "feedback": "FAIL, come on, try again.", + "generated_code": ["invalid code"], + }, + ("feedback_llm", 1): { + "prompt": "\n" + "Check if this code is valid and can run: " + "invalid code\n" + 'Return "PASS" if it passes and "FAIL" if it ' + "fails.\n" + "Provide additional feedback on why it " + "fails.\n" + " " + }, + ("feedback_llm", 2): { + "prompt": "\n" + "Check if this code is valid and can run: " + "invalid code\n" + 'Return "PASS" if it passes and "FAIL" if it ' + "fails.\n" + "Provide additional feedback on why it " + "fails.\n" + " " + }, + ("feedback_llm", 3): { + "prompt": "\n" + "Check if this code is valid and can run: \n" + "def generate_santa_sleigh():\n" + " '''\n" + " Returns ASCII art of Santa Claus on his " + "sleigh with Rudolph leading the way.\n" + " '''\n" + " # implementation goes here.\n" + " return art\n" + " \n" + 'Return "PASS" if it passes and "FAIL" if it ' + "fails.\n" + "Provide additional feedback on why it " + "fails.\n" + " " + }, + ("feedback_prompt", 1): {"code": ["invalid code"], "template": None, "template_variables": None}, + ("feedback_prompt", 2): {"code": ["invalid code"], "template": None, "template_variables": None}, + ("feedback_prompt", 3): { + "code": [ + "\n" + "def generate_santa_sleigh():\n" + " '''\n" + " Returns ASCII art of Santa Claus on " + "his sleigh with Rudolph leading the way.\n" + " '''\n" + " # implementation goes here.\n" + " return art\n" + " " + ], + "template": None, + "template_variables": None, + }, + ("joiner", 1): {"value": ["Generate code to generate christmas ascii-art"]}, + ("joiner", 2): {"value": ["Generate code to generate christmas ascii-art\ninvalid code\nFAIL"]}, + ("joiner", 3): { + "value": [ + "Generate code to generate christmas ascii-art\n" + "invalid code\n" + "FAIL\n" + "invalid code\n" + "FAIL, come on, try again." + ] + }, + ("router", 1): {"code": ["invalid code"], "replies": ["FAIL"]}, + ("router", 2): {"code": ["invalid code"], "replies": ["FAIL, come on, try again."]}, + ("router", 3): { + "code": [ + "\n" + "def generate_santa_sleigh():\n" + " '''\n" + " Returns ASCII art of Santa Claus on his sleigh " + "with Rudolph leading the way.\n" + " '''\n" + " # implementation goes here.\n" + " return art\n" + " " + ], + "replies": ["PASS"], + }, + }, + ) + ], + ) + + +@given("a pipeline with a component that has dynamic default inputs", target_fixture="pipeline_data") +def pipeline_with_dynamic_defaults(): + @component + class ParrotWithDynamicDefaultInputs: + def __init__(self, input_variable: str): + self.input_variable = input_variable + component.set_input_type(self, input_variable, str, default="Parrot doesn't only parrot!") + + @component.output_types(response=str) + def run(self, **kwargs): + return {"response": kwargs[self.input_variable]} + + parrot = ParrotWithDynamicDefaultInputs("parrot") + pipeline = Pipeline() + pipeline.add_component("parrot", parrot) + return ( + pipeline, + [ + PipelineRunData( + inputs={"parrot": {"parrot": "Are you a parrot?"}}, + expected_outputs={"parrot": {"response": "Are you a parrot?"}}, + expected_component_calls={("parrot", 1): {"parrot": "Are you a parrot?"}}, + ), + PipelineRunData( + inputs={}, + expected_outputs={"parrot": {"response": "Parrot doesn't only parrot!"}}, + expected_component_calls={("parrot", 1): {"parrot": "Parrot doesn't only parrot!"}}, + ), + ], + ) + + +@given("a pipeline with a component that has variadic dynamic default inputs", target_fixture="pipeline_data") +def pipeline_with_variadic_dynamic_defaults(): + @component + class ParrotWithVariadicDynamicDefaultInputs: + def __init__(self, input_variable: str): + self.input_variable = input_variable + component.set_input_type(self, input_variable, Variadic[str], default="Parrot doesn't only parrot!") + + @component.output_types(response=List[str]) + def run(self, **kwargs): + return {"response": kwargs[self.input_variable]} + + parrot = ParrotWithVariadicDynamicDefaultInputs("parrot") + pipeline = Pipeline() + pipeline.add_component("parrot", parrot) + return ( + pipeline, + [ + PipelineRunData( + inputs={"parrot": {"parrot": "Are you a parrot?"}}, + expected_outputs={"parrot": {"response": ["Are you a parrot?"]}}, + expected_component_calls={("parrot", 1): {"parrot": ["Are you a parrot?"]}}, + ), + PipelineRunData( + inputs={}, + expected_outputs={"parrot": {"response": ["Parrot doesn't only parrot!"]}}, + expected_component_calls={("parrot", 1): {"parrot": ["Parrot doesn't only parrot!"]}}, + ), + ], + ) + + +@given("a pipeline that is a file conversion pipeline with two joiners", target_fixture="pipeline_data") +def pipeline_that_converts_files(): + csv_data = """ +some,header,row +0,1,0 + """ + + txt_data = "Text file content for testing this." + + json_data = '{"content": "Some test content"}' + + sources = [ + ByteStream.from_string(text=csv_data, mime_type="text/csv", meta={"file_type": "csv"}), + ByteStream.from_string(text=txt_data, mime_type="text/plain", meta={"file_type": "txt"}), + ByteStream.from_string(text=json_data, mime_type="application/json", meta={"file_type": "json"}), + ] + + router = FileTypeRouter(mime_types=["text/csv", "text/plain", "application/json"]) + splitter = DocumentSplitter(split_by="word", split_length=3, split_overlap=0) + txt_converter = TextFileToDocument() + csv_converter = CSVToDocument() + json_converter = JSONConverter(content_key="content") + + b_joiner = DocumentJoiner() + a_joiner = DocumentJoiner() + + pp = Pipeline(max_runs_per_component=1) + + pp.add_component("router", router) + pp.add_component("splitter", splitter) + pp.add_component("txt_converter", txt_converter) + pp.add_component("csv_converter", csv_converter) + pp.add_component("json_converter", json_converter) + pp.add_component("b_joiner", b_joiner) + pp.add_component("a_joiner", a_joiner) + + pp.connect("router.text/plain", "txt_converter.sources") + pp.connect("router.application/json", "json_converter.sources") + pp.connect("router.text/csv", "csv_converter.sources") + pp.connect("txt_converter.documents", "b_joiner.documents") + pp.connect("json_converter.documents", "b_joiner.documents") + pp.connect("csv_converter.documents", "a_joiner.documents") + pp.connect("b_joiner.documents", "splitter.documents") + pp.connect("splitter.documents", "a_joiner.documents") + + expected_pre_split_docs = [ + Document(content="Some test content", meta={"file_type": "json"}), + Document(content=txt_data, meta={"file_type": "txt"}), + ] + expected_splits_docs = [ + Document( + content="Some test content", + meta={ + "file_type": "json", + "source_id": "0c6c5951d18da2935c7af3e24d417a9f94ca85403866dcfee1de93922504e1e5", + "page_number": 1, + "split_id": 0, + "split_idx_start": 0, + }, + ), + Document( + content="Text file content ", + meta={ + "file_type": "txt", + "source_id": "41cb91740f6e64ab542122936ea746c238ae0a92fd29b698efabbe23d0ba4c42", + "page_number": 1, + "split_id": 0, + "split_idx_start": 0, + }, + ), + Document( + content="for testing this.", + meta={ + "file_type": "txt", + "source_id": "41cb91740f6e64ab542122936ea746c238ae0a92fd29b698efabbe23d0ba4c42", + "page_number": 1, + "split_id": 1, + "split_idx_start": 18, + }, + ), + ] + + expected_csv_docs = [Document(content=csv_data, meta={"file_type": "csv"})] + + return ( + pp, + [ + PipelineRunData( + inputs={"router": {"sources": sources}}, + expected_outputs={"a_joiner": {"documents": expected_csv_docs + expected_splits_docs}}, + expected_component_calls={ + ("router", 1): {"sources": sources, "meta": None}, + ("csv_converter", 1): {"sources": [sources[0]], "meta": None}, + ("txt_converter", 1): {"sources": [sources[1]], "meta": None}, + ("json_converter", 1): {"sources": [sources[2]], "meta": None}, + ("b_joiner", 1): { + "documents": [[expected_pre_split_docs[0]], [expected_pre_split_docs[1]]], + "top_k": None, + }, + ("splitter", 1): {"documents": expected_pre_split_docs}, + ("a_joiner", 1): {"documents": [expected_csv_docs, expected_splits_docs], "top_k": None}, + }, ) ], ) diff --git a/test/core/pipeline/test_component_checks.py b/test/core/pipeline/test_component_checks.py new file mode 100644 index 0000000000..f536f69e94 --- /dev/null +++ b/test/core/pipeline/test_component_checks.py @@ -0,0 +1,648 @@ +# SPDX-FileCopyrightText: 2022-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from haystack.core.pipeline.component_checks import * +from haystack.core.pipeline.component_checks import _NO_OUTPUT_PRODUCED +from haystack.core.component.types import InputSocket, OutputSocket, Variadic, GreedyVariadic + + +@pytest.fixture +def basic_component(): + """Basic component with one mandatory and one optional input.""" + return { + "instance": "mock_instance", + "visits": 0, + "input_sockets": { + "mandatory_input": InputSocket("mandatory_input", int, senders=["previous_component"]), + "optional_input": InputSocket("optional_input", str, default_value="default"), + }, + "output_sockets": {"output": OutputSocket("output", int)}, + } + + +@pytest.fixture +def variadic_component(): + """Component with variadic input.""" + return { + "instance": "mock_instance", + "visits": 0, + "input_sockets": { + "variadic_input": InputSocket("variadic_input", Variadic[int], senders=["previous_component"]), + "normal_input": InputSocket("normal_input", str, senders=["another_component"]), + }, + "output_sockets": {"output": OutputSocket("output", int)}, + } + + +@pytest.fixture +def greedy_variadic_component(): + """Component with greedy variadic input.""" + return { + "instance": "mock_instance", + "visits": 0, + "input_sockets": { + "greedy_input": InputSocket( + "greedy_input", GreedyVariadic[int], senders=["previous_component", "other_component"] + ), + "normal_input": InputSocket("normal_input", str), + }, + "output_sockets": {"output": OutputSocket("output", int)}, + } + + +@pytest.fixture +def input_socket_with_sender(): + """Regular input socket with a single sender.""" + socket = InputSocket("test_input", int) + socket.senders = ["component1"] + return socket + + +@pytest.fixture +def variadic_socket_with_senders(): + """Variadic input socket with multiple senders.""" + socket = InputSocket("test_variadic", Variadic[int]) + socket.senders = ["component1", "component2"] + return socket + + +@pytest.fixture +def component_with_multiple_sockets(input_socket_with_sender, variadic_socket_with_senders): + """Component with multiple input sockets including both regular and variadic.""" + return { + "instance": "mock_instance", + "input_sockets": { + "socket1": input_socket_with_sender, + "socket2": variadic_socket_with_senders, + "socket3": InputSocket("socket3", str), # No senders + }, + } + + +@pytest.fixture +def regular_socket(): + """Regular input socket with one sender.""" + socket = InputSocket("regular", int) + socket.senders = ["component1"] + return socket + + +@pytest.fixture +def lazy_variadic_socket(): + """Lazy variadic input socket with multiple senders.""" + socket = InputSocket("lazy_variadic", Variadic[int]) + socket.senders = ["component1", "component2"] + return socket + + +@pytest.fixture +def greedy_variadic_socket(): + """Greedy variadic input socket with multiple senders.""" + socket = InputSocket("greedy_variadic", GreedyVariadic[int]) + socket.senders = ["component1", "component2", "component3"] + return socket + + +@pytest.fixture +def complex_component(regular_socket, lazy_variadic_socket, greedy_variadic_socket): + """Component with all types of sockets.""" + return { + "instance": "mock_instance", + "input_sockets": { + "regular": regular_socket, + "lazy_var": lazy_variadic_socket, + "greedy_var": greedy_variadic_socket, + }, + } + + +class TestCanComponentRun: + def test_component_with_all_mandatory_inputs_and_trigger(self, basic_component): + """Checks that the component runs if all mandatory inputs are received and triggered.""" + inputs = {"mandatory_input": [{"sender": "previous_component", "value": 42}]} + assert can_component_run(basic_component, inputs) is True + + def test_component_missing_mandatory_input(self, basic_component): + """Checks that the component won't run if mandatory inputs are missing.""" + inputs = {"optional_input": [{"sender": "previous_component", "value": "test"}]} + assert can_component_run(basic_component, inputs) is False + + def test_component_with_no_trigger_but_all_inputs(self, basic_component): + """ + Test case where all mandatory inputs are present with valid values, + but there is no trigger (no new input from predecessor, not first visit). + """ + basic_component["visits"] = 1 + inputs = {"mandatory_input": [{"sender": None, "value": 42}]} + assert can_component_run(basic_component, inputs) is False + + def test_component_with_multiple_visits(self, basic_component): + """Checks that a component can still be triggered on subsequent visits by a predecessor.""" + basic_component["visits"] = 2 + inputs = {"mandatory_input": [{"sender": "previous_component", "value": 42}]} + assert can_component_run(basic_component, inputs) is True + + def test_component_with_no_inputs_first_visit(self, basic_component): + """Checks that a component with no input sockets can be triggered on its first visit.""" + basic_component["input_sockets"] = {} + inputs = {} + assert can_component_run(basic_component, inputs) is True + + def test_component_triggered_on_second_visit_with_new_input(self, basic_component): + """ + Tests that a second visit is triggered if new predecessor input arrives + (i.e. visits > 0, but a valid new input from a predecessor is provided). + """ + # First, simulate that the component has already run once. + basic_component["visits"] = 1 + + # Now a predecessor provides a new input; this should re-trigger execution. + inputs = {"mandatory_input": [{"sender": "previous_component", "value": 99}]} + assert can_component_run(basic_component, inputs) is True + + +class TestHasAnyTrigger: + def test_trigger_from_predecessor(self, basic_component): + """Ensures that new data from a predecessor can trigger a component.""" + inputs = {"mandatory_input": [{"sender": "previous_component", "value": 42}]} + assert has_any_trigger(basic_component, inputs) is True + + def test_trigger_from_user_first_visit(self, basic_component): + """Checks that user input (sender=None) triggers the component on the first visit.""" + inputs = {"mandatory_input": [{"sender": None, "value": 42}]} + assert has_any_trigger(basic_component, inputs) is True + + def test_no_trigger_from_user_after_first_visit(self, basic_component): + """Checks that user input no longer triggers the component after the first visit.""" + basic_component["visits"] = 1 + inputs = {"mandatory_input": [{"sender": None, "value": 42}]} + assert has_any_trigger(basic_component, inputs) is False + + def test_trigger_without_inputs_first_visit(self, basic_component): + """Checks that a component with no inputs is triggered on the first visit.""" + basic_component["input_sockets"] = {} + inputs = {} + assert has_any_trigger(basic_component, inputs) is True + + def test_no_trigger_without_inputs_after_first_visit(self, basic_component): + """Checks that on subsequent visits, no inputs means no trigger.""" + basic_component["input_sockets"] = {} + basic_component["visits"] = 1 + inputs = {} + assert has_any_trigger(basic_component, inputs) is False + + +class TestAllMandatorySocketsReady: + def test_all_mandatory_sockets_filled(self, basic_component): + """Checks that all mandatory sockets are ready when they have valid input.""" + inputs = {"mandatory_input": [{"sender": "previous_component", "value": 42}]} + assert are_all_sockets_ready(basic_component, inputs) is True + + def test_missing_mandatory_socket(self, basic_component): + """Ensures that if a mandatory socket is missing, the component is not ready.""" + inputs = {"optional_input": [{"sender": "previous_component", "value": "test"}]} + assert are_all_sockets_ready(basic_component, inputs) is False + + def test_variadic_socket_with_input(self, variadic_component): + """Verifies that a variadic socket is considered filled if it has at least one input.""" + inputs = { + "variadic_input": [{"sender": "previous_component", "value": 42}], + "normal_input": [{"sender": "previous_component", "value": "test"}], + } + assert are_all_sockets_ready(variadic_component, inputs) is True + + def test_greedy_variadic_socket(self, greedy_variadic_component): + """Greedy variadic sockets are ready with at least one valid input.""" + inputs = { + "greedy_input": [{"sender": "previous_component", "value": 42}], + "normal_input": [{"sender": "previous_component", "value": "test"}], + } + assert are_all_sockets_ready(greedy_variadic_component, inputs) is True + + def test_greedy_variadic_socket_and_missing_mandatory(self, greedy_variadic_component): + """All mandatory sockets need to be filled even with GreedyVariadic sockets.""" + inputs = {"greedy_input": [{"sender": "previous_component", "value": 42}]} + assert are_all_sockets_ready(greedy_variadic_component, inputs, only_check_mandatory=True) is False + + def test_variadic_socket_no_input(self, variadic_component): + """A variadic socket is not filled if it has zero valid inputs.""" + inputs = {"normal_input": [{"sender": "previous_component", "value": "test"}]} + assert are_all_sockets_ready(variadic_component, inputs) is False + + def test_mandatory_and_optional_sockets(self): + input_sockets = { + "mandatory": InputSocket("mandatory", str, senders=["previous_component"]), + "optional": InputSocket("optional", str, senders=["previous_component"], default_value="test"), + } + + component = {"input_sockets": input_sockets} + inputs = {"mandatory": [{"sender": "previous_component", "value": "hello"}]} + assert are_all_sockets_ready(component, inputs) is False + assert are_all_sockets_ready(component, inputs, only_check_mandatory=True) is True + + def test_empty_inputs(self, basic_component): + """Checks that if there are no inputs at all, mandatory sockets are not ready.""" + inputs = {} + assert are_all_sockets_ready(basic_component, inputs) is False + + def test_no_mandatory_sockets(self, basic_component): + """Ensures that if there are no mandatory sockets, the component is considered ready.""" + basic_component["input_sockets"] = { + "optional_1": InputSocket("optional_1", str, default_value="default1"), + "optional_2": InputSocket("optional_2", str, default_value="default2"), + } + inputs = {} + assert are_all_sockets_ready(basic_component, inputs) is True + + def test_multiple_mandatory_sockets(self, basic_component): + """Checks readiness when multiple mandatory sockets are defined.""" + basic_component["input_sockets"] = { + "mandatory_1": InputSocket("mandatory_1", int, senders=["previous_component"]), + "mandatory_2": InputSocket("mandatory_2", str, senders=["some other component"]), + "optional": InputSocket("optional", bool, default_value=False), + } + inputs = { + "mandatory_1": [{"sender": "comp1", "value": 42}], + "mandatory_2": [{"sender": "comp2", "value": "test"}], + } + assert are_all_sockets_ready(basic_component, inputs) is True + + # Missing one mandatory input + inputs = {"mandatory_1": [{"sender": "comp1", "value": 42}], "optional": [{"sender": "comp3", "value": True}]} + assert are_all_sockets_ready(basic_component, inputs) is False + + +class TestPredecessorInputDetection: + def test_any_predecessors_provided_input_with_predecessor(self, component_with_multiple_sockets): + """ + Tests detection of predecessor input when a valid predecessor sends data. + """ + inputs = {"socket1": [{"sender": "component1", "value": 42}], "socket2": [{"sender": None, "value": "test"}]} + assert any_predecessors_provided_input(component_with_multiple_sockets, inputs) is True + + def test_any_predecessors_provided_input_no_predecessor(self, component_with_multiple_sockets): + """ + Checks that no predecessor inputs are detected if all senders are None (user inputs). + """ + inputs = {"socket1": [{"sender": None, "value": 42}], "socket2": [{"sender": None, "value": "test"}]} + assert any_predecessors_provided_input(component_with_multiple_sockets, inputs) is False + + def test_any_predecessors_provided_input_with_no_output(self, component_with_multiple_sockets): + """ + Ensures that _NO_OUTPUT_PRODUCED from a predecessor is ignored in the predecessor detection. + """ + inputs = { + "socket1": [{"sender": "component1", "value": _NO_OUTPUT_PRODUCED}], + "socket2": [{"sender": None, "value": "test"}], + } + assert any_predecessors_provided_input(component_with_multiple_sockets, inputs) is False + + def test_any_predecessors_provided_input_empty_inputs(self, component_with_multiple_sockets): + """Ensures that empty inputs dictionary returns False.""" + inputs = {} + assert any_predecessors_provided_input(component_with_multiple_sockets, inputs) is False + + +class TestSocketValueFromPredecessor: + """ + Tests for `any_socket_value_from_predecessor_received`, verifying whether + any predecessor component provided valid output to a socket. + """ + + @pytest.mark.parametrize( + "socket_inputs, expected_result", + [ + pytest.param([{"sender": "component1", "value": 42}], True, id="valid_input"), + pytest.param([{"sender": "component1", "value": _NO_OUTPUT_PRODUCED}], False, id="no_output"), + pytest.param([{"sender": None, "value": 42}], False, id="user_input"), + pytest.param( + [ + {"sender": None, "value": 42}, + {"sender": "component1", "value": _NO_OUTPUT_PRODUCED}, + {"sender": "component2", "value": 100}, + ], + True, + id="mixed_inputs", + ), + pytest.param([], False, id="empty_list"), + ], + ) + def test_any_socket_value_from_predecessor_received(self, socket_inputs, expected_result): + """ + Parametrized test to check whether any valid predecessor input + exists in a list of socket inputs. + """ + assert any_socket_value_from_predecessor_received(socket_inputs) == expected_result + + +class TestUserInputDetection: + def test_has_user_input_with_user_input(self): + """Checks that having a sender=None input means user input is present.""" + inputs = {"socket1": [{"sender": None, "value": 42}], "socket2": [{"sender": "component1", "value": "test"}]} + assert has_user_input(inputs) is True + + def test_has_user_input_without_user_input(self): + """Ensures that if all senders are component-based, there's no user input.""" + inputs = { + "socket1": [{"sender": "component1", "value": 42}], + "socket2": [{"sender": "component2", "value": "test"}], + } + assert has_user_input(inputs) is False + + def test_has_user_input_empty_inputs(self): + """Checks that an empty inputs dict has no user input.""" + inputs = {} + assert has_user_input(inputs) is False + + def test_has_user_input_with_no_output(self): + """ + Even if the input value is _NO_OUTPUT_PRODUCED, if sender=None + it still counts as user input being provided. + """ + inputs = {"socket1": [{"sender": None, "value": _NO_OUTPUT_PRODUCED}]} + assert has_user_input(inputs) is True + + +class TestPipelineInputCapability: + def test_cannot_receive_inputs_no_senders(self): + """Checks that a component with zero senders for each socket cannot receive pipeline inputs.""" + component = {"input_sockets": {"socket1": InputSocket("socket1", int), "socket2": InputSocket("socket2", str)}} + assert can_not_receive_inputs_from_pipeline(component) is True + + def test_cannot_receive_inputs_with_senders(self, component_with_multiple_sockets): + """If at least one socket has a sender, the component can receive pipeline inputs.""" + assert can_not_receive_inputs_from_pipeline(component_with_multiple_sockets) is False + + def test_cannot_receive_inputs_mixed_senders(self, input_socket_with_sender): + """A single socket with a sender means the component can receive pipeline inputs.""" + component = { + "input_sockets": { + "socket1": input_socket_with_sender, + "socket2": InputSocket("socket2", str), # No senders + } + } + assert can_not_receive_inputs_from_pipeline(component) is False + + +class TestSocketExecutionStatus: + def test_regular_socket_predecessor_executed(self, input_socket_with_sender): + """Verifies that if the correct sender provides a value, the socket is marked as executed.""" + socket_inputs = [{"sender": "component1", "value": 42}] + assert all_socket_predecessors_executed(input_socket_with_sender, socket_inputs) is True + + def test_regular_socket_predecessor_not_executed(self, input_socket_with_sender): + """If there are no inputs, the predecessor is not considered executed.""" + socket_inputs = [] + assert all_socket_predecessors_executed(input_socket_with_sender, socket_inputs) is False + + def test_regular_socket_with_wrong_predecessor(self, input_socket_with_sender): + """Checks that a mismatch in sender means the socket is not yet executed.""" + socket_inputs = [{"sender": "component2", "value": 42}] + assert all_socket_predecessors_executed(input_socket_with_sender, socket_inputs) is False + + def test_variadic_socket_all_predecessors_executed(self, variadic_socket_with_senders): + """Variadic socket is executed only if all senders have produced at least one valid result.""" + socket_inputs = [{"sender": "component1", "value": 42}, {"sender": "component2", "value": 43}] + assert all_socket_predecessors_executed(variadic_socket_with_senders, socket_inputs) is True + + def test_variadic_socket_partial_execution(self, variadic_socket_with_senders): + """If only one of multiple senders produced an output, not all predecessors are executed.""" + socket_inputs = [{"sender": "component1", "value": 42}] + assert all_socket_predecessors_executed(variadic_socket_with_senders, socket_inputs) is False + + def test_variadic_socket_with_user_input(self, variadic_socket_with_senders): + """ + User input (sender=None) doesn't block the socket from being 'executed' if + all named predecessors have also produced outputs. + """ + socket_inputs = [ + {"sender": "component1", "value": 42}, + {"sender": None, "value": 43}, + {"sender": "component2", "value": 44}, + ] + assert all_socket_predecessors_executed(variadic_socket_with_senders, socket_inputs) is True + + def test_variadic_socket_no_execution(self, variadic_socket_with_senders): + """Empty inputs means no predecessor has executed.""" + socket_inputs = [] + assert all_socket_predecessors_executed(variadic_socket_with_senders, socket_inputs) is False + + +class TestSocketInputReceived: + def test_any_socket_input_received_with_value(self): + """Checks that if there's a non-_NO_OUTPUT_PRODUCED value, the socket is marked as having input.""" + socket_inputs = [{"sender": "component1", "value": 42}] + assert any_socket_input_received(socket_inputs) is True + + def test_any_socket_input_received_with_no_output(self): + """If all inputs are _NO_OUTPUT_PRODUCED, the socket has no effective input.""" + socket_inputs = [{"sender": "component1", "value": _NO_OUTPUT_PRODUCED}] + assert any_socket_input_received(socket_inputs) is False + + def test_any_socket_input_received_mixed_inputs(self): + """A single valid input among many is enough to consider the socket as having input.""" + socket_inputs = [{"sender": "component1", "value": _NO_OUTPUT_PRODUCED}, {"sender": "component2", "value": 42}] + assert any_socket_input_received(socket_inputs) is True + + def test_any_socket_input_received_empty_list(self): + """Empty list: no input received.""" + assert any_socket_input_received([]) is False + + +class TestLazyVariadicSocket: + def test_lazy_variadic_all_inputs_received(self, variadic_socket_with_senders): + """Lazy variadic socket is ready only if all named senders provided outputs.""" + socket_inputs = [{"sender": "component1", "value": 42}, {"sender": "component2", "value": 43}] + assert has_lazy_variadic_socket_received_all_inputs(variadic_socket_with_senders, socket_inputs) is True + + def test_lazy_variadic_partial_inputs(self, variadic_socket_with_senders): + """Partial inputs from only some senders is insufficient for a lazy variadic socket.""" + socket_inputs = [{"sender": "component1", "value": 42}] + assert has_lazy_variadic_socket_received_all_inputs(variadic_socket_with_senders, socket_inputs) is False + + def test_lazy_variadic_with_no_output(self, variadic_socket_with_senders): + """_NO_OUTPUT_PRODUCED from a sender doesn't count as valid input, so it's not fully received.""" + socket_inputs = [{"sender": "component1", "value": _NO_OUTPUT_PRODUCED}, {"sender": "component2", "value": 42}] + assert has_lazy_variadic_socket_received_all_inputs(variadic_socket_with_senders, socket_inputs) is False + + def test_lazy_variadic_with_user_input(self, variadic_socket_with_senders): + """ + User input doesn't block a lazy variadic socket, as long as all named senders + also provided outputs. + """ + socket_inputs = [ + {"sender": "component1", "value": 42}, + {"sender": None, "value": 43}, + {"sender": "component2", "value": 44}, + ] + assert has_lazy_variadic_socket_received_all_inputs(variadic_socket_with_senders, socket_inputs) is True + + def test_lazy_variadic_empty_inputs(self, variadic_socket_with_senders): + """No inputs at all means the lazy variadic socket hasn't received everything yet.""" + assert has_lazy_variadic_socket_received_all_inputs(variadic_socket_with_senders, []) is False + + +class TestSocketTypeDetection: + def test_is_socket_lazy_variadic_with_lazy_socket(self, lazy_variadic_socket): + """Ensures that a non-greedy variadic socket is detected as lazy.""" + assert is_socket_lazy_variadic(lazy_variadic_socket) is True + + def test_is_socket_lazy_variadic_with_greedy_socket(self, greedy_variadic_socket): + """Greedy variadic sockets should not be marked as lazy.""" + assert is_socket_lazy_variadic(greedy_variadic_socket) is False + + def test_is_socket_lazy_variadic_with_regular_socket(self, regular_socket): + """Regular sockets are not variadic at all.""" + assert is_socket_lazy_variadic(regular_socket) is False + + +class TestSocketInputCompletion: + def test_regular_socket_complete(self, regular_socket): + """A single valid input marks a regular socket as complete.""" + inputs = [{"sender": "component1", "value": 42}] + assert has_socket_received_all_inputs(regular_socket, inputs) is True + + def test_regular_socket_incomplete(self, regular_socket): + """_NO_OUTPUT_PRODUCED means the socket is not complete.""" + inputs = [{"sender": "component1", "value": _NO_OUTPUT_PRODUCED}] + assert has_socket_received_all_inputs(regular_socket, inputs) is False + + def test_regular_socket_no_inputs(self, regular_socket): + """No inputs at all means the socket is incomplete.""" + inputs = [] + assert has_socket_received_all_inputs(regular_socket, inputs) is False + + def test_lazy_variadic_socket_all_inputs(self, lazy_variadic_socket): + """Lazy variadic socket is complete only if all senders have produced valid outputs.""" + inputs = [{"sender": "component1", "value": 42}, {"sender": "component2", "value": 43}] + assert has_socket_received_all_inputs(lazy_variadic_socket, inputs) is True + + def test_lazy_variadic_socket_partial_inputs(self, lazy_variadic_socket): + """Partial coverage of senders is insufficient for lazy variadic sockets.""" + inputs = [{"sender": "component1", "value": 42}] + assert has_socket_received_all_inputs(lazy_variadic_socket, inputs) is False + + def test_lazy_variadic_socket_with_no_output(self, lazy_variadic_socket): + """A sender that produces _NO_OUTPUT_PRODUCED does not fulfill the lazy socket requirement.""" + inputs = [{"sender": "component1", "value": 42}, {"sender": "component2", "value": _NO_OUTPUT_PRODUCED}] + assert has_socket_received_all_inputs(lazy_variadic_socket, inputs) is False + + def test_greedy_variadic_socket_one_input(self, greedy_variadic_socket): + """A greedy variadic socket is complete if it has at least one valid input.""" + inputs = [{"sender": "component1", "value": 42}] + assert has_socket_received_all_inputs(greedy_variadic_socket, inputs) is True + + def test_greedy_variadic_socket_multiple_inputs(self, greedy_variadic_socket): + """A greedy variadic socket with multiple inputs remains complete as soon as one is valid.""" + inputs = [{"sender": "component1", "value": 42}, {"sender": "component2", "value": 43}] + assert has_socket_received_all_inputs(greedy_variadic_socket, inputs) is True + + def test_greedy_variadic_socket_no_valid_inputs(self, greedy_variadic_socket): + """All _NO_OUTPUT_PRODUCED means the greedy socket is not complete.""" + inputs = [{"sender": "component1", "value": _NO_OUTPUT_PRODUCED}] + assert has_socket_received_all_inputs(greedy_variadic_socket, inputs) is False + + +class TestPredecessorExecution: + def test_all_predecessors_executed_complete(self, complex_component): + """ + Checks that if all named senders produce valid outputs for each socket, + then all predecessors are considered executed. + """ + inputs = { + "regular": [{"sender": "component1", "value": 42}], + "lazy_var": [{"sender": "component1", "value": 42}, {"sender": "component2", "value": 43}], + "greedy_var": [ + {"sender": "component1", "value": 42}, + {"sender": "component2", "value": 43}, + {"sender": "component3", "value": 44}, + ], + } + assert all_predecessors_executed(complex_component, inputs) is True + + def test_all_predecessors_executed_partial(self, complex_component): + """If a lazy socket is missing one predecessor, not all predecessors are executed.""" + inputs = { + "regular": [{"sender": "component1", "value": 42}], + "lazy_var": [{"sender": "component1", "value": 42}], # Missing component2 + "greedy_var": [{"sender": "component1", "value": 42}, {"sender": "component2", "value": 43}], + } + assert all_predecessors_executed(complex_component, inputs) is False + + def test_all_predecessors_executed_with_user_input(self, complex_component): + """ + User input shouldn't affect predecessor execution for the lazy socket: + we still need all named senders to produce output. + """ + inputs = { + "regular": [{"sender": "component1", "value": 42}], + "lazy_var": [{"sender": "component1", "value": 42}, {"sender": None, "value": 43}], + "greedy_var": [ + {"sender": "component1", "value": 42}, + {"sender": "component2", "value": 43}, + {"sender": "component3", "value": 44}, + ], + } + assert all_predecessors_executed(complex_component, inputs) is False + + +class TestLazyVariadicResolution: + def test_lazy_variadic_sockets_all_resolved(self, complex_component): + """Checks that lazy variadic sockets are resolved when all inputs have arrived.""" + inputs = {"lazy_var": [{"sender": "component1", "value": 42}, {"sender": "component2", "value": 43}]} + assert are_all_lazy_variadic_sockets_resolved(complex_component, inputs) is True + + def test_lazy_variadic_sockets_partially_resolved(self, complex_component): + """Missing some sender outputs means lazy variadic sockets are not resolved.""" + inputs = { + "lazy_var": [{"sender": "component1", "value": 42}] # Missing component2 + } + assert are_all_lazy_variadic_sockets_resolved(complex_component, inputs) is False + + def test_lazy_variadic_sockets_with_no_inputs(self, complex_component): + """No inputs: lazy variadic socket is not resolved.""" + inputs = {} + assert are_all_lazy_variadic_sockets_resolved(complex_component, inputs) is False + + def test_lazy_variadic_sockets_with_predecessors_executed(self, complex_component): + """ + Ensures that if all predecessors have executed (but produced no output), + the lazy variadic socket is still considered resolved. + """ + inputs = { + "lazy_var": [ + {"sender": "component1", "value": _NO_OUTPUT_PRODUCED}, + {"sender": "component2", "value": _NO_OUTPUT_PRODUCED}, + ] + } + assert are_all_lazy_variadic_sockets_resolved(complex_component, inputs) is True + + +class TestGreedySocketReadiness: + def test_greedy_socket_ready(self, complex_component): + """A single valid input is enough for a greedy variadic socket to be considered ready.""" + inputs = {"greedy_var": [{"sender": "component1", "value": 42}]} + assert is_any_greedy_socket_ready(complex_component, inputs) is True + + def test_greedy_socket_multiple_inputs_ready(self, complex_component): + """Multiple valid inputs on a greedy socket is also fine—it's still ready.""" + inputs = {"greedy_var": [{"sender": "component1", "value": 42}, {"sender": "component2", "value": 43}]} + assert is_any_greedy_socket_ready(complex_component, inputs) is True + + def test_greedy_socket_not_ready(self, complex_component): + """If the only input is _NO_OUTPUT_PRODUCED, the greedy socket isn't ready.""" + inputs = {"greedy_var": [{"sender": "component1", "value": _NO_OUTPUT_PRODUCED}]} + assert is_any_greedy_socket_ready(complex_component, inputs) is False + + def test_greedy_socket_no_inputs(self, complex_component): + """No inputs at all: the greedy socket is not ready.""" + inputs = {} + assert is_any_greedy_socket_ready(complex_component, inputs) is False + + def test_greedy_socket_with_user_input(self, complex_component): + """User input can also trigger readiness for a greedy variadic socket.""" + inputs = {"greedy_var": [{"sender": None, "value": 42}]} + assert is_any_greedy_socket_ready(complex_component, inputs) is True diff --git a/test/core/pipeline/test_pipeline.py b/test/core/pipeline/test_pipeline.py index 1f0284301e..33690fe35f 100644 --- a/test/core/pipeline/test_pipeline.py +++ b/test/core/pipeline/test_pipeline.py @@ -1,53 +1,15 @@ # SPDX-FileCopyrightText: 2022-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -import logging -from typing import List, Optional -from unittest.mock import patch + +from concurrent.futures import ThreadPoolExecutor import pytest -from haystack import Document -from haystack.components.builders import PromptBuilder, AnswerBuilder from haystack.components.joiners import BranchJoiner from haystack.core.component import component -from haystack.core.component.types import InputSocket, OutputSocket, Variadic, GreedyVariadic, _empty -from haystack.core.errors import DeserializationError, PipelineConnectError, PipelineDrawingError, PipelineError -from haystack.core.pipeline import Pipeline, PredefinedPipeline -from haystack.core.pipeline.base import ( - _add_missing_input_defaults, - _enqueue_component, - _dequeue_component, - _enqueue_waiting_component, - _dequeue_waiting_component, - _is_lazy_variadic, -) -from haystack.core.serialization import DeserializationCallbacks -from haystack.testing.factory import component_class -from haystack.testing.sample_components import AddFixedValue, Double, Greet - -logging.basicConfig(level=logging.DEBUG) - - -@component -class FakeComponent: - def __init__(self, an_init_param: Optional[str] = None): - pass - - @component.output_types(value=str) - def run(self, input_: str): - return {"value": input_} - - -@component -class FakeComponentSquared: - def __init__(self, an_init_param: Optional[str] = None): - self.an_init_param = an_init_param - self.inner = FakeComponent() - - @component.output_types(value=str) - def run(self, input_: str): - return {"value": input_} +from haystack.core.errors import PipelineRuntimeError +from haystack.core.pipeline import Pipeline class TestPipeline: @@ -56,1539 +18,68 @@ class TestPipeline: It doesn't test Pipeline.run(), that is done separately in a different way. """ - def test_pipeline_dumps(self, test_files_path): - pipeline = Pipeline(max_runs_per_component=99) - pipeline.add_component("Comp1", FakeComponent("Foo")) - pipeline.add_component("Comp2", FakeComponent()) - pipeline.connect("Comp1.value", "Comp2.input_") - result = pipeline.dumps() - with open(f"{test_files_path}/yaml/test_pipeline.yaml", "r") as f: - assert f.read() == result - - def test_pipeline_loads_invalid_data(self): - invalid_yaml = """components: - Comp1: - init_parameters: - an_init_param: null - type: test.core.pipeline.test_pipeline.FakeComponent - Comp2* - init_parameters: - an_init_param: null - type: test.core.pipeline.test_pipeline.FakeComponent - connections: - * receiver: Comp2.input_ - sender: Comp1.value - metadata: - """ - - with pytest.raises(DeserializationError, match="unmarshalling serialized"): - pipeline = Pipeline.loads(invalid_yaml) - - invalid_init_parameter_yaml = """components: - Comp1: - init_parameters: - unknown: null - type: test.core.pipeline.test_pipeline.FakeComponent - Comp2: - init_parameters: - an_init_param: null - type: test.core.pipeline.test_pipeline.FakeComponent - connections: - - receiver: Comp2.input_ - sender: Comp1.value - metadata: {} - """ - - with pytest.raises(DeserializationError, match=".*Comp1.*unknown.*"): - pipeline = Pipeline.loads(invalid_init_parameter_yaml) - - def test_pipeline_dump(self, test_files_path, tmp_path): - pipeline = Pipeline(max_runs_per_component=99) - pipeline.add_component("Comp1", FakeComponent("Foo")) - pipeline.add_component("Comp2", FakeComponent()) - pipeline.connect("Comp1.value", "Comp2.input_") - with open(tmp_path / "out.yaml", "w") as f: - pipeline.dump(f) - # re-open and ensure it's the same data as the test file - with open(f"{test_files_path}/yaml/test_pipeline.yaml", "r") as test_f, open(tmp_path / "out.yaml", "r") as f: - assert f.read() == test_f.read() - - def test_pipeline_load(self, test_files_path): - with open(f"{test_files_path}/yaml/test_pipeline.yaml", "r") as f: - pipeline = Pipeline.load(f) - assert pipeline._max_runs_per_component == 99 - assert isinstance(pipeline.get_component("Comp1"), FakeComponent) - assert isinstance(pipeline.get_component("Comp2"), FakeComponent) - - @patch("haystack.core.pipeline.base._to_mermaid_image") - @patch("haystack.core.pipeline.base.is_in_jupyter") - @patch("IPython.display.Image") - @patch("IPython.display.display") - def test_show_in_notebook( - self, mock_ipython_display, mock_ipython_image, mock_is_in_jupyter, mock_to_mermaid_image - ): - pipe = Pipeline() - - mock_to_mermaid_image.return_value = b"some_image_data" - mock_is_in_jupyter.return_value = True - - pipe.show() - mock_ipython_image.assert_called_once_with(b"some_image_data") - mock_ipython_display.assert_called_once() - - @patch("haystack.core.pipeline.base.is_in_jupyter") - def test_show_not_in_notebook(self, mock_is_in_jupyter): - pipe = Pipeline() - - mock_is_in_jupyter.return_value = False - - with pytest.raises(PipelineDrawingError): - pipe.show() - - @patch("haystack.core.pipeline.base._to_mermaid_image") - def test_draw(self, mock_to_mermaid_image, tmp_path): - pipe = Pipeline() - mock_to_mermaid_image.return_value = b"some_image_data" - - image_path = tmp_path / "test.png" - pipe.draw(path=image_path) - assert image_path.read_bytes() == mock_to_mermaid_image.return_value - - # UNIT - def test_add_component_to_different_pipelines(self): - first_pipe = Pipeline() - second_pipe = Pipeline() - some_component = component_class("Some")() - - assert some_component.__haystack_added_to_pipeline__ is None - first_pipe.add_component("some", some_component) - assert some_component.__haystack_added_to_pipeline__ is first_pipe - - with pytest.raises(PipelineError): - second_pipe.add_component("some", some_component) - - def test_remove_component_raises_if_invalid_component_name(self): - pipe = Pipeline() - component = component_class("Some")() - - pipe.add_component("1", component) - - with pytest.raises(ValueError): - pipe.remove_component("2") - - def test_remove_component_removes_component_and_its_edges(self): - pipe = Pipeline() - component_1 = component_class("Type1")() - component_2 = component_class("Type2")() - component_3 = component_class("Type3")() - component_4 = component_class("Type4")() - - pipe.add_component("1", component_1) - pipe.add_component("2", component_2) - pipe.add_component("3", component_3) - pipe.add_component("4", component_4) + def test_pipeline_thread_safety(self, waiting_component, spying_tracer): + # Initialize pipeline with synchronous components + pp = Pipeline() + pp.add_component("wait", waiting_component()) - pipe.connect("1", "2") - pipe.connect("2", "3") - pipe.connect("3", "4") + run_data = [{"wait_for": 1}, {"wait_for": 2}] - pipe.remove_component("2") + # Use ThreadPoolExecutor to run pipeline calls in parallel + with ThreadPoolExecutor(max_workers=len(run_data)) as executor: + # Submit pipeline runs to the executor + futures = [executor.submit(pp.run, data) for data in run_data] - assert ["1", "3", "4"] == sorted(pipe.graph.nodes) - assert [("3", "4")] == sorted([(u, v) for (u, v) in pipe.graph.edges()]) + # Wait for all futures to complete + for future in futures: + future.result() - def test_remove_component_allows_you_to_reuse_the_component(self): - pipe = Pipeline() - Some = component_class("Some", input_types={"in": int}, output_types={"out": int}) + # Verify component visits using tracer + component_spans = [sp for sp in spying_tracer.spans if sp.operation_name == "haystack.component.run"] - pipe.add_component("component_1", Some()) - pipe.add_component("component_2", Some()) - pipe.add_component("component_3", Some()) - pipe.connect("component_1", "component_2") - pipe.connect("component_2", "component_3") - component_2 = pipe.remove_component("component_2") + for span in component_spans: + assert span.tags["haystack.component.visits"] == 1 - assert component_2.__haystack_added_to_pipeline__ is None - assert component_2.__haystack_input__._sockets_dict == {"in": InputSocket(name="in", type=int, senders=[])} - assert component_2.__haystack_output__._sockets_dict == { - "out": OutputSocket(name="out", type=int, receivers=[]) - } + def test__run_component_success(self): + """Test successful component execution""" + joiner_1 = BranchJoiner(type_=str) + joiner_2 = BranchJoiner(type_=str) + pp = Pipeline() + pp.add_component("joiner_1", joiner_1) + pp.add_component("joiner_2", joiner_2) + pp.connect("joiner_1", "joiner_2") + inputs = {"joiner_1": {"value": [{"sender": None, "value": "test_value"}]}} - pipe2 = Pipeline() - pipe2.add_component("component_4", Some()) - pipe2.add_component("component_2", component_2) - pipe2.add_component("component_5", Some()) - - pipe2.connect("component_4", "component_2") - pipe2.connect("component_2", "component_5") - assert component_2.__haystack_added_to_pipeline__ is pipe2 - assert component_2.__haystack_input__._sockets_dict == { - "in": InputSocket(name="in", type=int, senders=["component_4"]) - } - assert component_2.__haystack_output__._sockets_dict == { - "out": OutputSocket(name="out", type=int, receivers=["component_5"]) - } - - # instance = pipe2.get_component("some") - # assert instance == component - - # UNIT - def test_get_component_name(self): - pipe = Pipeline() - some_component = component_class("Some")() - pipe.add_component("some", some_component) - - assert pipe.get_component_name(some_component) == "some" - - # UNIT - def test_get_component_name_not_added_to_pipeline(self): - pipe = Pipeline() - some_component = component_class("Some")() - - assert pipe.get_component_name(some_component) == "" - - # UNIT - def test_repr(self): - pipe = Pipeline(metadata={"test": "test"}) - pipe.add_component("add_two", AddFixedValue(add=2)) - pipe.add_component("add_default", AddFixedValue()) - pipe.add_component("double", Double()) - pipe.connect("add_two", "double") - pipe.connect("double", "add_default") - - expected_repr = ( - f"{object.__repr__(pipe)}\n" - "🧱 Metadata\n" - " - test: test\n" - "🚅 Components\n" - " - add_two: AddFixedValue\n" - " - add_default: AddFixedValue\n" - " - double: Double\n" - "🛤️ Connections\n" - " - add_two.result -> double.value (int)\n" - " - double.value -> add_default.value (int)\n" + outputs = pp._run_component( + component=pp._get_component_with_graph_metadata_and_visits("joiner_1", 0), + inputs=inputs, + component_visits={"joiner_1": 0, "joiner_2": 0}, ) - assert repr(pipe) == expected_repr - - # UNIT - def test_to_dict(self): - add_two = AddFixedValue(add=2) - add_default = AddFixedValue() - double = Double() - pipe = Pipeline(metadata={"test": "test"}, max_runs_per_component=42) - pipe.add_component("add_two", add_two) - pipe.add_component("add_default", add_default) - pipe.add_component("double", double) - pipe.connect("add_two", "double") - pipe.connect("double", "add_default") - - res = pipe.to_dict() - expected = { - "metadata": {"test": "test"}, - "max_runs_per_component": 42, - "components": { - "add_two": { - "type": "haystack.testing.sample_components.add_value.AddFixedValue", - "init_parameters": {"add": 2}, - }, - "add_default": { - "type": "haystack.testing.sample_components.add_value.AddFixedValue", - "init_parameters": {"add": 1}, - }, - "double": {"type": "haystack.testing.sample_components.double.Double", "init_parameters": {}}, - }, - "connections": [ - {"sender": "add_two.result", "receiver": "double.value"}, - {"sender": "double.value", "receiver": "add_default.value"}, - ], - } - assert res == expected - - def test_from_dict(self): - data = { - "metadata": {"test": "test"}, - "max_runs_per_component": 101, - "components": { - "add_two": { - "type": "haystack.testing.sample_components.add_value.AddFixedValue", - "init_parameters": {"add": 2}, - }, - "add_default": { - "type": "haystack.testing.sample_components.add_value.AddFixedValue", - "init_parameters": {"add": 1}, - }, - "double": {"type": "haystack.testing.sample_components.double.Double", "init_parameters": {}}, - }, - "connections": [ - {"sender": "add_two.result", "receiver": "double.value"}, - {"sender": "double.value", "receiver": "add_default.value"}, - ], - } - pipe = Pipeline.from_dict(data) - - assert pipe.metadata == {"test": "test"} - assert pipe._max_runs_per_component == 101 - - # Components - assert len(pipe.graph.nodes) == 3 - ## add_two - add_two = pipe.graph.nodes["add_two"] - assert add_two["instance"].add == 2 - assert add_two["input_sockets"] == { - "value": InputSocket(name="value", type=int), - "add": InputSocket(name="add", type=Optional[int], default_value=None), - } - assert add_two["output_sockets"] == {"result": OutputSocket(name="result", type=int, receivers=["double"])} - assert add_two["visits"] == 0 - - ## add_default - add_default = pipe.graph.nodes["add_default"] - assert add_default["instance"].add == 1 - assert add_default["input_sockets"] == { - "value": InputSocket(name="value", type=int, senders=["double"]), - "add": InputSocket(name="add", type=Optional[int], default_value=None), - } - assert add_default["output_sockets"] == {"result": OutputSocket(name="result", type=int)} - assert add_default["visits"] == 0 - - ## double - double = pipe.graph.nodes["double"] - assert double["instance"] - assert double["input_sockets"] == {"value": InputSocket(name="value", type=int, senders=["add_two"])} - assert double["output_sockets"] == {"value": OutputSocket(name="value", type=int, receivers=["add_default"])} - assert double["visits"] == 0 - - # Connections - connections = list(pipe.graph.edges(data=True)) - assert len(connections) == 2 - assert connections[0] == ( - "add_two", - "double", - { - "conn_type": "int", - "from_socket": OutputSocket(name="result", type=int, receivers=["double"]), - "to_socket": InputSocket(name="value", type=int, senders=["add_two"]), - "mandatory": True, - }, - ) - assert connections[1] == ( - "double", - "add_default", - { - "conn_type": "int", - "from_socket": OutputSocket(name="value", type=int, receivers=["add_default"]), - "to_socket": InputSocket(name="value", type=int, senders=["double"]), - "mandatory": True, - }, - ) - - # TODO: Remove this, this should be a component test. - # The pipeline can't handle this in any case nor way. - def test_from_dict_with_callbacks(self): - data = { - "metadata": {"test": "test"}, - "components": { - "add_two": { - "type": "haystack.testing.sample_components.add_value.AddFixedValue", - "init_parameters": {"add": 2}, - }, - "add_default": { - "type": "haystack.testing.sample_components.add_value.AddFixedValue", - "init_parameters": {"add": 1}, - }, - "double": {"type": "haystack.testing.sample_components.double.Double", "init_parameters": {}}, - "greet": { - "type": "haystack.testing.sample_components.greet.Greet", - "init_parameters": {"message": "test"}, - }, - }, - "connections": [ - {"sender": "add_two.result", "receiver": "double.value"}, - {"sender": "double.value", "receiver": "add_default.value"}, - ], - } - - components_seen_in_callback = [] - - def component_pre_init_callback(name, component_cls, init_params): - assert name in ["add_two", "add_default", "double", "greet"] - assert component_cls in [AddFixedValue, Double, Greet] - - if name == "add_two": - assert init_params == {"add": 2} - elif name == "add_default": - assert init_params == {"add": 1} - elif name == "greet": - assert init_params == {"message": "test"} - - components_seen_in_callback.append(name) - - pipe = Pipeline.from_dict( - data, callbacks=DeserializationCallbacks(component_pre_init=component_pre_init_callback) - ) - assert components_seen_in_callback == ["add_two", "add_default", "double", "greet"] - add_two = pipe.graph.nodes["add_two"]["instance"] - assert add_two.add == 2 - add_default = pipe.graph.nodes["add_default"]["instance"] - assert add_default.add == 1 - greet = pipe.graph.nodes["greet"]["instance"] - assert greet.message == "test" - assert greet.log_level == "INFO" - - def component_pre_init_callback_modify(name, component_cls, init_params): - assert name in ["add_two", "add_default", "double", "greet"] - assert component_cls in [AddFixedValue, Double, Greet] - - if name == "add_two": - init_params["add"] = 3 - elif name == "add_default": - init_params["add"] = 0 - elif name == "greet": - init_params["message"] = "modified test" - init_params["log_level"] = "DEBUG" - - pipe = Pipeline.from_dict( - data, callbacks=DeserializationCallbacks(component_pre_init=component_pre_init_callback_modify) - ) - add_two = pipe.graph.nodes["add_two"]["instance"] - assert add_two.add == 3 - add_default = pipe.graph.nodes["add_default"]["instance"] - assert add_default.add == 0 - greet = pipe.graph.nodes["greet"]["instance"] - assert greet.message == "modified test" - assert greet.log_level == "DEBUG" + assert outputs == {"value": "test_value"} + # We remove input in greedy variadic sockets, even if they are from the user + assert "value" not in inputs["joiner_1"] - # Test with a component that internally instantiates another component - def component_pre_init_callback_check_class(name, component_cls, init_params): - assert name == "fake_component_squared" - assert component_cls == FakeComponentSquared - - pipe = Pipeline() - pipe.add_component("fake_component_squared", FakeComponentSquared()) - pipe = Pipeline.from_dict( - pipe.to_dict(), - callbacks=DeserializationCallbacks(component_pre_init=component_pre_init_callback_check_class), - ) - assert type(pipe.graph.nodes["fake_component_squared"]["instance"].inner) == FakeComponent - - # UNIT - def test_from_dict_with_empty_dict(self): - assert Pipeline() == Pipeline.from_dict({}) - - # TODO: UNIT, consider deprecating this argument - def test_from_dict_with_components_instances(self): - add_two = AddFixedValue(add=2) - add_default = AddFixedValue() - components = {"add_two": add_two, "add_default": add_default} - data = { - "metadata": {"test": "test"}, - "components": { - "add_two": {}, - "add_default": {}, - "double": {"type": "haystack.testing.sample_components.double.Double", "init_parameters": {}}, - }, - "connections": [ - {"sender": "add_two.result", "receiver": "double.value"}, - {"sender": "double.value", "receiver": "add_default.value"}, - ], - } - pipe = Pipeline.from_dict(data, components=components) - assert pipe.metadata == {"test": "test"} - - # Components - assert len(pipe.graph.nodes) == 3 - ## add_two - add_two_data = pipe.graph.nodes["add_two"] - assert add_two_data["instance"] is add_two - assert add_two_data["instance"].add == 2 - assert add_two_data["input_sockets"] == { - "value": InputSocket(name="value", type=int), - "add": InputSocket(name="add", type=Optional[int], default_value=None), - } - assert add_two_data["output_sockets"] == {"result": OutputSocket(name="result", type=int, receivers=["double"])} - assert add_two_data["visits"] == 0 - - ## add_default - add_default_data = pipe.graph.nodes["add_default"] - assert add_default_data["instance"] is add_default - assert add_default_data["instance"].add == 1 - assert add_default_data["input_sockets"] == { - "value": InputSocket(name="value", type=int, senders=["double"]), - "add": InputSocket(name="add", type=Optional[int], default_value=None), - } - assert add_default_data["output_sockets"] == {"result": OutputSocket(name="result", type=int, receivers=[])} - assert add_default_data["visits"] == 0 - - ## double - double = pipe.graph.nodes["double"] - assert double["instance"] - assert double["input_sockets"] == {"value": InputSocket(name="value", type=int, senders=["add_two"])} - assert double["output_sockets"] == {"value": OutputSocket(name="value", type=int, receivers=["add_default"])} - assert double["visits"] == 0 - - # Connections - connections = list(pipe.graph.edges(data=True)) - assert len(connections) == 2 - assert connections[0] == ( - "add_two", - "double", - { - "conn_type": "int", - "from_socket": OutputSocket(name="result", type=int, receivers=["double"]), - "to_socket": InputSocket(name="value", type=int, senders=["add_two"]), - "mandatory": True, - }, - ) - assert connections[1] == ( - "double", - "add_default", - { - "conn_type": "int", - "from_socket": OutputSocket(name="value", type=int, receivers=["add_default"]), - "to_socket": InputSocket(name="value", type=int, senders=["double"]), - "mandatory": True, - }, - ) - - # UNIT - def test_from_dict_without_component_type(self): - data = { - "metadata": {"test": "test"}, - "components": {"add_two": {"init_parameters": {"add": 2}}}, - "connections": [], - } - with pytest.raises(PipelineError) as err: - Pipeline.from_dict(data) - - err.match("Missing 'type' in component 'add_two'") - - # UNIT - def test_from_dict_without_registered_component_type(self): - data = { - "metadata": {"test": "test"}, - "components": {"add_two": {"type": "foo.bar.baz", "init_parameters": {"add": 2}}}, - "connections": [], - } - with pytest.raises(PipelineError) as err: - Pipeline.from_dict(data) - - err.match(r"Component .+ not imported.") - - def test_from_dict_with_invalid_type(self): - data = { - "metadata": {"test": "test"}, - "components": {"add_two": {"type": "", "init_parameters": {"add": 2}}}, - "connections": [], - } - with pytest.raises(PipelineError) as err: - Pipeline.from_dict(data) - - err.match(r"Component '' \(name: 'add_two'\) not imported.") - - # UNIT - def test_from_dict_without_connection_sender(self): - data = {"metadata": {"test": "test"}, "components": {}, "connections": [{"receiver": "some.receiver"}]} - with pytest.raises(PipelineError) as err: - Pipeline.from_dict(data) - - err.match("Missing sender in connection: {'receiver': 'some.receiver'}") - - # UNIT - def test_from_dict_without_connection_receiver(self): - data = {"metadata": {"test": "test"}, "components": {}, "connections": [{"sender": "some.sender"}]} - with pytest.raises(PipelineError) as err: - Pipeline.from_dict(data) - - err.match("Missing receiver in connection: {'sender': 'some.sender'}") - - def test_describe_input_only_no_inputs_components(self): - A = component_class("A", input_types={}, output={"x": 0}) - B = component_class("B", input_types={}, output={"y": 0}) - C = component_class("C", input_types={"x": int, "y": int}, output={"z": 0}) - p = Pipeline() - p.add_component("a", A()) - p.add_component("b", B()) - p.add_component("c", C()) - p.connect("a.x", "c.x") - p.connect("b.y", "c.y") - assert p.inputs() == {} - assert p.inputs(include_components_with_connected_inputs=True) == { - "c": {"x": {"type": int, "is_mandatory": True}, "y": {"type": int, "is_mandatory": True}} - } - - def test_describe_input_some_components_with_no_inputs(self): - A = component_class("A", input_types={}, output={"x": 0}) - B = component_class("B", input_types={"y": int}, output={"y": 0}) - C = component_class("C", input_types={"x": int, "y": int}, output={"z": 0}) - p = Pipeline() - p.add_component("a", A()) - p.add_component("b", B()) - p.add_component("c", C()) - p.connect("a.x", "c.x") - p.connect("b.y", "c.y") - assert p.inputs() == {"b": {"y": {"type": int, "is_mandatory": True}}} - assert p.inputs(include_components_with_connected_inputs=True) == { - "b": {"y": {"type": int, "is_mandatory": True}}, - "c": {"x": {"type": int, "is_mandatory": True}, "y": {"type": int, "is_mandatory": True}}, - } - - def test_describe_input_all_components_have_inputs(self): - A = component_class("A", input_types={"x": Optional[int]}, output={"x": 0}) - B = component_class("B", input_types={"y": int}, output={"y": 0}) - C = component_class("C", input_types={"x": int, "y": int}, output={"z": 0}) - p = Pipeline() - p.add_component("a", A()) - p.add_component("b", B()) - p.add_component("c", C()) - p.connect("a.x", "c.x") - p.connect("b.y", "c.y") - assert p.inputs() == { - "a": {"x": {"type": Optional[int], "is_mandatory": True}}, - "b": {"y": {"type": int, "is_mandatory": True}}, - } - assert p.inputs(include_components_with_connected_inputs=True) == { - "a": {"x": {"type": Optional[int], "is_mandatory": True}}, - "b": {"y": {"type": int, "is_mandatory": True}}, - "c": {"x": {"type": int, "is_mandatory": True}, "y": {"type": int, "is_mandatory": True}}, - } - - def test_describe_output_multiple_possible(self): - """ - This pipeline has two outputs: - {"b": {"output_b": {"type": str}}, "a": {"output_a": {"type": str}}} - """ - A = component_class("A", input_types={"input_a": str}, output={"output_a": "str", "output_b": "str"}) - B = component_class("B", input_types={"input_b": str}, output={"output_b": "str"}) - - pipe = Pipeline() - pipe.add_component("a", A()) - pipe.add_component("b", B()) - pipe.connect("a.output_b", "b.input_b") - - assert pipe.outputs() == {"b": {"output_b": {"type": str}}, "a": {"output_a": {"type": str}}} - assert pipe.outputs(include_components_with_connected_outputs=True) == { - "a": {"output_a": {"type": str}, "output_b": {"type": str}}, - "b": {"output_b": {"type": str}}, - } - - def test_describe_output_single(self): - """ - This pipeline has one output: - {"c": {"z": {"type": int}}} - """ - A = component_class("A", input_types={"x": Optional[int]}, output={"x": 0}) - B = component_class("B", input_types={"y": int}, output={"y": 0}) - C = component_class("C", input_types={"x": int, "y": int}, output={"z": 0}) - p = Pipeline() - p.add_component("a", A()) - p.add_component("b", B()) - p.add_component("c", C()) - p.connect("a.x", "c.x") - p.connect("b.y", "c.y") - - assert p.outputs() == {"c": {"z": {"type": int}}} - assert p.outputs(include_components_with_connected_outputs=True) == { - "a": {"x": {"type": int}}, - "b": {"y": {"type": int}}, - "c": {"z": {"type": int}}, - } - - def test_describe_no_outputs(self): - """ - This pipeline sets up elaborate connections between three components but in fact it has no outputs: - Check that p.outputs() == {} - """ - A = component_class("A", input_types={"x": Optional[int]}, output={"x": 0}) - B = component_class("B", input_types={"y": int}, output={"y": 0}) - C = component_class("C", input_types={"x": int, "y": int}, output={}) - p = Pipeline() - p.add_component("a", A()) - p.add_component("b", B()) - p.add_component("c", C()) - p.connect("a.x", "c.x") - p.connect("b.y", "c.y") - assert p.outputs() == {} - assert p.outputs(include_components_with_connected_outputs=True) == { - "a": {"x": {"type": int}}, - "b": {"y": {"type": int}}, - } - - def test_from_template(self, monkeypatch): - monkeypatch.setenv("OPENAI_API_KEY", "fake_key") - pipe = Pipeline.from_template(PredefinedPipeline.INDEXING) - assert pipe.get_component("cleaner") - - def test_walk_pipeline_with_no_cycles(self): - """ - This pipeline has two source nodes, source1 and source2, one hello3 node in between, and one sink node, joiner. - pipeline.walk() should return each component exactly once. The order is not guaranteed. - """ - - @component - class Hello: - @component.output_types(output=str) - def run(self, word: str): - """ - Takes a string in input and returns "Hello, !" in output. - """ - return {"output": f"Hello, {word}!"} + def test__run_component_fail(self): + """Test error when component doesn't return a dictionary""" @component - class Joiner: + class WrongOutput: @component.output_types(output=str) - def run(self, word1: str, word2: str): - """ - Takes two strings in input and returns "Hello, and !" in output. - """ - return {"output": f"Hello, {word1} and {word2}!"} - - pipeline = Pipeline() - source1 = Hello() - source2 = Hello() - hello3 = Hello() - joiner = Joiner() - pipeline.add_component("source1", source1) - pipeline.add_component("source2", source2) - pipeline.add_component("hello3", hello3) - pipeline.add_component("joiner", joiner) - - pipeline.connect("source1", "joiner.word1") - pipeline.connect("source2", "hello3") - pipeline.connect("hello3", "joiner.word2") - - expected_components = [("source1", source1), ("source2", source2), ("joiner", joiner), ("hello3", hello3)] - assert sorted(expected_components) == sorted(pipeline.walk()) - - def test_walk_pipeline_with_cycles(self): - """ - This pipeline consists of two components, which would run three times in a loop. - pipeline.walk() should return these components exactly once. The order is not guaranteed. - """ - - @component - class Hello: - def __init__(self): - self.iteration_counter = 0 - - @component.output_types(intermediate=str, final=str) - def run(self, word: str, intermediate: Optional[str] = None): - """ - Takes a string in input and returns "Hello, !" in output. - """ - if self.iteration_counter < 3: - self.iteration_counter += 1 - return {"intermediate": f"Hello, {intermediate or word}!"} - return {"final": f"Hello, {intermediate or word}!"} - - pipeline = Pipeline() - hello = Hello() - hello_again = Hello() - pipeline.add_component("hello", hello) - pipeline.add_component("hello_again", hello_again) - pipeline.connect("hello.intermediate", "hello_again.intermediate") - pipeline.connect("hello_again.intermediate", "hello.intermediate") - assert {("hello", hello), ("hello_again", hello_again)} == set(pipeline.walk()) - - def test__init_graph(self): - pipe = Pipeline() - pipe.add_component("greet", Greet()) - pipe.add_component("adder", AddFixedValue()) - pipe.connect("greet", "adder") - pipe._init_graph() - for node in pipe.graph.nodes: - assert pipe.graph.nodes[node]["visits"] == 0 - - def test__normalize_varidiac_input_data(self): - pipe = Pipeline() - template = """ - Answer the following questions: - {{ questions | join("\n") }} - """ - pipe.add_component("prompt_builder", PromptBuilder(template=template)) - pipe.add_component("branch_joiner", BranchJoiner(type_=int)) - questions = ["What is the capital of Italy?", "What is the capital of France?"] - data = { - "prompt_builder": {"questions": questions}, - "branch_joiner": {"value": 1}, - "not_a_component": "some input data", - } - res = pipe._normalize_varidiac_input_data(data) - assert res == { - "prompt_builder": {"questions": ["What is the capital of Italy?", "What is the capital of France?"]}, - "branch_joiner": {"value": [1]}, - "not_a_component": "some input data", - } - - def test__prepare_component_input_data(self): - MockComponent = component_class("MockComponent", input_types={"x": List[str], "y": str}) - pipe = Pipeline() - pipe.add_component("first_mock", MockComponent()) - pipe.add_component("second_mock", MockComponent()) - - res = pipe._prepare_component_input_data({"x": ["some data"], "y": "some other data"}) - assert res == { - "first_mock": {"x": ["some data"], "y": "some other data"}, - "second_mock": {"x": ["some data"], "y": "some other data"}, - } - assert id(res["first_mock"]["x"]) != id(res["second_mock"]["x"]) - - def test__prepare_component_input_data_with_connected_inputs(self): - MockComponent = component_class( - "MockComponent", input_types={"x": List[str], "y": str}, output_types={"z": str} - ) - pipe = Pipeline() - pipe.add_component("first_mock", MockComponent()) - pipe.add_component("second_mock", MockComponent()) - pipe.connect("first_mock.z", "second_mock.y") - - res = pipe._prepare_component_input_data({"x": ["some data"], "y": "some other data"}) - assert res == {"first_mock": {"x": ["some data"], "y": "some other data"}, "second_mock": {"x": ["some data"]}} - assert id(res["first_mock"]["x"]) != id(res["second_mock"]["x"]) - - def test__prepare_component_input_data_with_non_existing_input(self, caplog): - pipe = Pipeline() - res = pipe._prepare_component_input_data({"input_name": 1}) - assert res == {} - assert ( - "Inputs ['input_name'] were not matched to any component inputs, " - "please check your run parameters." in caplog.text - ) - - def test_connect(self): - comp1 = component_class("Comp1", output_types={"value": int})() - comp2 = component_class("Comp2", input_types={"value": int})() - pipe = Pipeline() - pipe.add_component("comp1", comp1) - pipe.add_component("comp2", comp2) - assert pipe.connect("comp1.value", "comp2.value") is pipe - - assert comp1.__haystack_output__.value.receivers == ["comp2"] - assert comp2.__haystack_input__.value.senders == ["comp1"] - assert list(pipe.graph.edges) == [("comp1", "comp2", "value/value")] - - def test_connect_already_connected(self): - comp1 = component_class("Comp1", output_types={"value": int})() - comp2 = component_class("Comp2", input_types={"value": int})() - pipe = Pipeline() - pipe.add_component("comp1", comp1) - pipe.add_component("comp2", comp2) - pipe.connect("comp1.value", "comp2.value") - pipe.connect("comp1.value", "comp2.value") - - assert comp1.__haystack_output__.value.receivers == ["comp2"] - assert comp2.__haystack_input__.value.senders == ["comp1"] - assert list(pipe.graph.edges) == [("comp1", "comp2", "value/value")] - - def test_connect_with_sender_component_name(self): - comp1 = component_class("Comp1", output_types={"value": int})() - comp2 = component_class("Comp2", input_types={"value": int})() - pipe = Pipeline() - pipe.add_component("comp1", comp1) - pipe.add_component("comp2", comp2) - pipe.connect("comp1", "comp2.value") - - assert comp1.__haystack_output__.value.receivers == ["comp2"] - assert comp2.__haystack_input__.value.senders == ["comp1"] - assert list(pipe.graph.edges) == [("comp1", "comp2", "value/value")] - - def test_connect_with_receiver_component_name(self): - comp1 = component_class("Comp1", output_types={"value": int})() - comp2 = component_class("Comp2", input_types={"value": int})() - pipe = Pipeline() - pipe.add_component("comp1", comp1) - pipe.add_component("comp2", comp2) - pipe.connect("comp1.value", "comp2") + def run(self, value: str): + return "not_a_dict" - assert comp1.__haystack_output__.value.receivers == ["comp2"] - assert comp2.__haystack_input__.value.senders == ["comp1"] - assert list(pipe.graph.edges) == [("comp1", "comp2", "value/value")] + wrong = WrongOutput() + pp = Pipeline() + pp.add_component("wrong", wrong) - def test_connect_with_sender_and_receiver_component_name(self): - comp1 = component_class("Comp1", output_types={"value": int})() - comp2 = component_class("Comp2", input_types={"value": int})() - pipe = Pipeline() - pipe.add_component("comp1", comp1) - pipe.add_component("comp2", comp2) - pipe.connect("comp1", "comp2") + inputs = {"wrong": {"value": [{"sender": None, "value": "test_value"}]}} - assert comp1.__haystack_output__.value.receivers == ["comp2"] - assert comp2.__haystack_input__.value.senders == ["comp1"] - assert list(pipe.graph.edges) == [("comp1", "comp2", "value/value")] - - def test_connect_with_sender_not_in_pipeline(self): - comp2 = component_class("Comp2", input_types={"value": int})() - pipe = Pipeline() - pipe.add_component("comp2", comp2) - with pytest.raises(ValueError): - pipe.connect("comp1.value", "comp2.value") - - def test_connect_with_receiver_not_in_pipeline(self): - comp1 = component_class("Comp1", output_types={"value": int})() - pipe = Pipeline() - pipe.add_component("comp1", comp1) - with pytest.raises(ValueError): - pipe.connect("comp1.value", "comp2.value") - - def test_connect_with_sender_socket_name_not_in_pipeline(self): - comp1 = component_class("Comp1", output_types={"value": int})() - comp2 = component_class("Comp2", input_types={"value": int})() - pipe = Pipeline() - pipe.add_component("comp1", comp1) - pipe.add_component("comp2", comp2) - with pytest.raises(PipelineConnectError): - pipe.connect("comp1.non_existing", "comp2.value") - - def test_connect_with_receiver_socket_name_not_in_pipeline(self): - comp1 = component_class("Comp1", output_types={"value": int})() - comp2 = component_class("Comp2", input_types={"value": int})() - pipe = Pipeline() - pipe.add_component("comp1", comp1) - pipe.add_component("comp2", comp2) - with pytest.raises(PipelineConnectError): - pipe.connect("comp1.value", "comp2.non_existing") - - def test_connect_with_no_matching_types_and_same_names(self): - comp1 = component_class("Comp1", output_types={"value": int})() - comp2 = component_class("Comp2", input_types={"value": str})() - pipe = Pipeline() - pipe.add_component("comp1", comp1) - pipe.add_component("comp2", comp2) - with pytest.raises(PipelineConnectError): - pipe.connect("comp1", "comp2") - - def test_connect_with_multiple_sender_connections_with_same_type_and_differing_name(self): - comp1 = component_class("Comp1", output_types={"val1": int, "val2": int})() - comp2 = component_class("Comp2", input_types={"value": int})() - pipe = Pipeline() - pipe.add_component("comp1", comp1) - pipe.add_component("comp2", comp2) - with pytest.raises(PipelineConnectError): - pipe.connect("comp1", "comp2") - - def test_connect_with_multiple_receiver_connections_with_same_type_and_differing_name(self): - comp1 = component_class("Comp1", output_types={"value": int})() - comp2 = component_class("Comp2", input_types={"val1": int, "val2": int})() - pipe = Pipeline() - pipe.add_component("comp1", comp1) - pipe.add_component("comp2", comp2) - with pytest.raises(PipelineConnectError): - pipe.connect("comp1", "comp2") - - def test_connect_with_multiple_sender_connections_with_same_type_and_same_name(self): - comp1 = component_class("Comp1", output_types={"value": int, "other": int})() - comp2 = component_class("Comp2", input_types={"value": int})() - pipe = Pipeline() - pipe.add_component("comp1", comp1) - pipe.add_component("comp2", comp2) - pipe.connect("comp1", "comp2") - - assert comp1.__haystack_output__.value.receivers == ["comp2"] - assert comp2.__haystack_input__.value.senders == ["comp1"] - assert list(pipe.graph.edges) == [("comp1", "comp2", "value/value")] - - def test_connect_with_multiple_receiver_connections_with_same_type_and_same_name(self): - comp1 = component_class("Comp1", output_types={"value": int})() - comp2 = component_class("Comp2", input_types={"value": int, "other": int})() - pipe = Pipeline() - pipe.add_component("comp1", comp1) - pipe.add_component("comp2", comp2) - pipe.connect("comp1", "comp2") - - assert comp1.__haystack_output__.value.receivers == ["comp2"] - assert comp2.__haystack_input__.value.senders == ["comp1"] - assert list(pipe.graph.edges) == [("comp1", "comp2", "value/value")] - - def test_connect_multiple_outputs_to_non_variadic_input(self): - comp1 = component_class("Comp1", output_types={"value": int})() - comp2 = component_class("Comp2", output_types={"value": int})() - comp3 = component_class("Comp3", input_types={"value": int})() - pipe = Pipeline() - pipe.add_component("comp1", comp1) - pipe.add_component("comp2", comp2) - pipe.add_component("comp3", comp3) - pipe.connect("comp1.value", "comp3.value") - with pytest.raises(PipelineConnectError): - pipe.connect("comp2.value", "comp3.value") - - def test_connect_multiple_outputs_to_variadic_input(self): - comp1 = component_class("Comp1", output_types={"value": int})() - comp2 = component_class("Comp2", output_types={"value": int})() - comp3 = component_class("Comp3", input_types={"value": Variadic[int]})() - pipe = Pipeline() - pipe.add_component("comp1", comp1) - pipe.add_component("comp2", comp2) - pipe.add_component("comp3", comp3) - pipe.connect("comp1.value", "comp3.value") - pipe.connect("comp2.value", "comp3.value") - - assert comp1.__haystack_output__.value.receivers == ["comp3"] - assert comp2.__haystack_output__.value.receivers == ["comp3"] - assert comp3.__haystack_input__.value.senders == ["comp1", "comp2"] - assert list(pipe.graph.edges) == [("comp1", "comp3", "value/value"), ("comp2", "comp3", "value/value")] - - def test_connect_same_component_as_sender_and_receiver(self): - """ - This pipeline consists of one component, which would be connected to itself. - Connecting a component to itself is raises PipelineConnectError. - """ - pipe = Pipeline() - single_component = FakeComponent() - pipe.add_component("single_component", single_component) - with pytest.raises(PipelineConnectError): - pipe.connect("single_component.out", "single_component.in") - - def test__run_component(self, spying_tracer, caplog): - caplog.set_level(logging.INFO) - sentence_builder = component_class( - "SentenceBuilder", input_types={"words": List[str]}, output={"text": "some words"} - )() - document_builder = component_class( - "DocumentBuilder", input_types={"text": str}, output={"doc": Document(content="some words")} - )() - document_cleaner = component_class( - "DocumentCleaner", - input_types={"doc": Document}, - output={"cleaned_doc": Document(content="some cleaner words")}, - )() - - pipe = Pipeline() - pipe.add_component("sentence_builder", sentence_builder) - pipe.add_component("document_builder", document_builder) - pipe.add_component("document_cleaner", document_cleaner) - pipe.connect("sentence_builder.text", "document_builder.text") - pipe.connect("document_builder.doc", "document_cleaner.doc") - assert spying_tracer.spans == [] - res = pipe._run_component("document_builder", {"text": "whatever"}) - assert res == {"doc": Document(content="some words")} - - assert len(spying_tracer.spans) == 1 - span = spying_tracer.spans[0] - assert span.operation_name == "haystack.component.run" - assert span.tags == { - "haystack.component.name": "document_builder", - "haystack.component.type": "DocumentBuilder", - "haystack.component.input_types": {"text": "str"}, - "haystack.component.input_spec": {"text": {"type": "str", "senders": ["sentence_builder"]}}, - "haystack.component.output_spec": {"doc": {"type": "Document", "receivers": ["document_cleaner"]}}, - "haystack.component.visits": 1, - } - - assert caplog.messages == ["Running component document_builder"] - - def test__run_component_with_variadic_input(self): - document_joiner = component_class("DocumentJoiner", input_types={"docs": Variadic[Document]})() - - pipe = Pipeline() - pipe.add_component("document_joiner", document_joiner) - inputs = {"docs": [Document(content="doc1"), Document(content="doc2")]} - pipe._run_component("document_joiner", inputs) - assert inputs == {"docs": []} - - def test__component_has_enough_inputs_to_run(self): - sentence_builder = component_class("SentenceBuilder", input_types={"words": List[str]})() - pipe = Pipeline() - pipe.add_component("sentence_builder", sentence_builder) - - assert not pipe._component_has_enough_inputs_to_run("sentence_builder", {}) - assert not pipe._component_has_enough_inputs_to_run( - "sentence_builder", {"sentence_builder": {"wrong_input_name": "blah blah"}} - ) - assert pipe._component_has_enough_inputs_to_run( - "sentence_builder", {"sentence_builder": {"words": ["blah blah"]}} - ) - - def test__find_components_that_will_receive_no_input(self): - sentence_builder = component_class( - "SentenceBuilder", input_types={"words": List[str]}, output_types={"text": str} - )() - document_builder = component_class( - "DocumentBuilder", input_types={"text": str}, output_types={"doc": Document} - )() - conditional_document_builder = component_class( - "ConditionalDocumentBuilder", output_types={"doc": Document, "noop": None} - )() - - document_joiner = component_class("DocumentJoiner", input_types={"docs": Variadic[Document]})() - - pipe = Pipeline() - pipe.add_component("sentence_builder", sentence_builder) - pipe.add_component("document_builder", document_builder) - pipe.add_component("document_joiner", document_joiner) - pipe.add_component("conditional_document_builder", conditional_document_builder) - pipe.connect("sentence_builder.text", "document_builder.text") - pipe.connect("document_builder.doc", "document_joiner.docs") - pipe.connect("conditional_document_builder.doc", "document_joiner.docs") - - res = pipe._find_components_that_will_receive_no_input("sentence_builder", {}, {}) - assert res == {("document_builder", document_builder), ("document_joiner", document_joiner)} - - res = pipe._find_components_that_will_receive_no_input("sentence_builder", {"text": "some text"}, {}) - assert res == set() - - res = pipe._find_components_that_will_receive_no_input("conditional_document_builder", {"noop": None}, {}) - assert res == {("document_joiner", document_joiner)} - - res = pipe._find_components_that_will_receive_no_input( - "conditional_document_builder", {"noop": None}, {"document_joiner": {"docs": []}} - ) - assert res == {("document_joiner", document_joiner)} - - res = pipe._find_components_that_will_receive_no_input( - "conditional_document_builder", {"noop": None}, {"document_joiner": {"docs": [Document("some text")]}} - ) - assert res == set() - - multiple_outputs = component_class("MultipleOutputs", output_types={"first": int, "second": int})() - - def custom_init(self): - component.set_input_type(self, "first", Optional[int], 1) - component.set_input_type(self, "second", Optional[int], 2) - - multiple_optional_inputs = component_class("MultipleOptionalInputs", extra_fields={"__init__": custom_init})() - - pipe = Pipeline() - pipe.add_component("multiple_outputs", multiple_outputs) - pipe.add_component("multiple_optional_inputs", multiple_optional_inputs) - pipe.connect("multiple_outputs.second", "multiple_optional_inputs.first") - - res = pipe._find_components_that_will_receive_no_input("multiple_outputs", {"first": 1}, {}) - assert res == {("multiple_optional_inputs", multiple_optional_inputs)} - - res = pipe._find_components_that_will_receive_no_input( - "multiple_outputs", {"first": 1}, {"multiple_optional_inputs": {"second": 200}} - ) - assert res == set() - - res = pipe._find_components_that_will_receive_no_input("multiple_outputs", {"second": 1}, {}) - assert res == set() - - def test__distribute_output(self): - document_builder = component_class( - "DocumentBuilder", input_types={"text": str}, output_types={"doc": Document, "another_doc": Document} - )() - document_cleaner = component_class( - "DocumentCleaner", input_types={"doc": Document}, output_types={"cleaned_doc": Document} - )() - document_joiner = component_class("DocumentJoiner", input_types={"docs": Variadic[Document]})() - - pipe = Pipeline() - pipe.add_component("document_builder", document_builder) - pipe.add_component("document_cleaner", document_cleaner) - pipe.add_component("document_joiner", document_joiner) - pipe.connect("document_builder.doc", "document_cleaner.doc") - pipe.connect("document_builder.another_doc", "document_joiner.docs") - - inputs = {"document_builder": {"text": "some text"}} - run_queue = [] - waiting_queue = [("document_joiner", document_joiner)] - receivers = [ - ( - "document_cleaner", - OutputSocket("doc", Document, ["document_cleaner"]), - InputSocket("doc", Document, _empty, ["document_builder"]), - ), - ( - "document_joiner", - OutputSocket("another_doc", Document, ["document_joiner"]), - InputSocket("docs", Variadic[Document], _empty, ["document_builder"]), - ), - ] - res = pipe._distribute_output( - receivers, {"doc": Document("some text"), "another_doc": Document()}, inputs, run_queue, waiting_queue - ) - - assert res == {} - assert inputs == { - "document_builder": {"text": "some text"}, - "document_cleaner": {"doc": Document("some text")}, - "document_joiner": {"docs": [Document()]}, - } - assert run_queue == [("document_cleaner", document_cleaner)] - assert waiting_queue == [("document_joiner", document_joiner)] - - def test__find_next_runnable_component(self): - document_builder = component_class( - "DocumentBuilder", input_types={"text": str}, output_types={"doc": Document} - )() - pipe = Pipeline() - components_inputs = {"document_builder": {"text": "some text"}} - waiting_queue = [("document_builder", document_builder)] - pair = pipe._find_next_runnable_component(components_inputs, waiting_queue) - assert pair == ("document_builder", document_builder) - - def test__find_next_runnable_component_without_component_inputs(self): - document_builder = component_class( - "DocumentBuilder", input_types={"text": str}, output_types={"doc": Document} - )() - pipe = Pipeline() - components_inputs = {} - waiting_queue = [("document_builder", document_builder)] - pair = pipe._find_next_runnable_component(components_inputs, waiting_queue) - assert pair == ("document_builder", document_builder) - - def test__find_next_runnable_component_with_component_with_only_variadic_non_greedy_input(self): - document_joiner = component_class("DocumentJoiner", input_types={"docs": Variadic[Document]})() - - pipe = Pipeline() - components_inputs = {} - waiting_queue = [("document_joiner", document_joiner)] - pair = pipe._find_next_runnable_component(components_inputs, waiting_queue) - assert pair == ("document_joiner", document_joiner) - - def test__find_next_runnable_component_with_component_with_only_default_input(self): - prompt_builder = PromptBuilder(template="{{ questions | join('\n') }}") - - pipe = Pipeline() - components_inputs = {} - waiting_queue = [("prompt_builder", prompt_builder)] - pair = pipe._find_next_runnable_component(components_inputs, waiting_queue) - - assert pair == ("prompt_builder", prompt_builder) - - def test__find_next_runnable_component_with_component_with_variadic_non_greedy_and_default_input(self): - document_joiner = component_class("DocumentJoiner", input_types={"docs": Variadic[Document]})() - prompt_builder = PromptBuilder(template="{{ questions | join('\n') }}") - - pipe = Pipeline() - components_inputs = {} - waiting_queue = [("prompt_builder", prompt_builder), ("document_joiner", document_joiner)] - pair = pipe._find_next_runnable_component(components_inputs, waiting_queue) - - assert pair == ("document_joiner", document_joiner) - - def test__find_next_runnable_component_with_different_components_inputs(self): - document_builder = component_class( - "DocumentBuilder", input_types={"text": str}, output_types={"doc": Document} - )() - document_joiner = component_class("DocumentJoiner", input_types={"docs": Variadic[Document]})() - prompt_builder = PromptBuilder(template="{{ questions | join('\n') }}") - - pipe = Pipeline() - components_inputs = {"document_builder": {"text": "some text"}} - waiting_queue = [ - ("prompt_builder", prompt_builder), - ("document_builder", document_builder), - ("document_joiner", document_joiner), - ] - pair = pipe._find_next_runnable_component(components_inputs, waiting_queue) - - assert pair == ("document_builder", document_builder) - - def test__find_next_runnable_component_with_different_components_without_any_input(self): - document_builder = component_class( - "DocumentBuilder", input_types={"text": str}, output_types={"doc": Document} - )() - document_joiner = component_class("DocumentJoiner", input_types={"docs": Variadic[Document]})() - prompt_builder = PromptBuilder(template="{{ questions | join('\n') }}") - - pipe = Pipeline() - components_inputs = {} - waiting_queue = [ - ("prompt_builder", prompt_builder), - ("document_builder", document_builder), - ("document_joiner", document_joiner), - ] - pair = pipe._find_next_runnable_component(components_inputs, waiting_queue) - - assert pair == ("document_builder", document_builder) - - def test__is_stuck_in_a_loop(self): - document_builder = component_class( - "DocumentBuilder", input_types={"text": str}, output_types={"doc": Document} - )() - document_joiner = component_class("DocumentJoiner", input_types={"docs": Variadic[Document]})() - prompt_builder = PromptBuilder(template="{{ questions | join('\n') }}") - - pipe = Pipeline() - - waiting_queue = [("document_builder", document_builder)] - assert pipe._is_stuck_in_a_loop(waiting_queue) - - waiting_queue = [("document_joiner", document_joiner)] - assert pipe._is_stuck_in_a_loop(waiting_queue) - - waiting_queue = [("prompt_builder", prompt_builder)] - assert pipe._is_stuck_in_a_loop(waiting_queue) - - waiting_queue = [("document_joiner", document_joiner), ("prompt_builder", prompt_builder)] - assert not pipe._is_stuck_in_a_loop(waiting_queue) - - waiting_queue = [("document_builder", document_joiner), ("prompt_builder", prompt_builder)] - assert not pipe._is_stuck_in_a_loop(waiting_queue) - - waiting_queue = [("document_builder", document_joiner), ("document_joiner", document_joiner)] - assert not pipe._is_stuck_in_a_loop(waiting_queue) - - def test__enqueue_component(self): - document_builder = component_class( - "DocumentBuilder", input_types={"text": str}, output_types={"doc": Document} - )() - document_joiner = component_class("DocumentJoiner", input_types={"docs": Variadic[Document]})() - - run_queue = [] - waiting_queue = [] - _enqueue_component(("document_builder", document_builder), run_queue, waiting_queue) - assert run_queue == [("document_builder", document_builder)] - assert waiting_queue == [] - - run_queue = [("document_builder", document_builder)] - waiting_queue = [] - _enqueue_component(("document_builder", document_builder), run_queue, waiting_queue) - assert run_queue == [("document_builder", document_builder)] - assert waiting_queue == [] - - run_queue = [] - waiting_queue = [("document_builder", document_builder)] - _enqueue_component(("document_builder", document_builder), run_queue, waiting_queue) - assert run_queue == [("document_builder", document_builder)] - assert waiting_queue == [] - - run_queue = [] - waiting_queue = [("document_joiner", document_joiner)] - _enqueue_component(("document_builder", document_builder), run_queue, waiting_queue) - assert run_queue == [("document_builder", document_builder)] - assert waiting_queue == [("document_joiner", document_joiner)] - - run_queue = [("document_joiner", document_joiner)] - waiting_queue = [] - _enqueue_component(("document_builder", document_builder), run_queue, waiting_queue) - assert run_queue == [("document_joiner", document_joiner), ("document_builder", document_builder)] - assert waiting_queue == [] - - def test__dequeue_component(self): - document_builder = component_class( - "DocumentBuilder", input_types={"text": str}, output_types={"doc": Document} - )() - document_joiner = component_class("DocumentJoiner", input_types={"docs": Variadic[Document]})() - - run_queue = [] - waiting_queue = [] - _dequeue_component(("document_builder", document_builder), run_queue, waiting_queue) - assert run_queue == [] - assert waiting_queue == [] - - run_queue = [("document_builder", document_builder)] - waiting_queue = [] - _dequeue_component(("document_builder", document_builder), run_queue, waiting_queue) - assert run_queue == [] - assert waiting_queue == [] - - run_queue = [] - waiting_queue = [("document_builder", document_builder)] - _dequeue_component(("document_builder", document_builder), run_queue, waiting_queue) - assert run_queue == [] - assert waiting_queue == [] - - run_queue = [("document_builder", document_builder)] - waiting_queue = [("document_builder", document_builder)] - _dequeue_component(("document_builder", document_builder), run_queue, waiting_queue) - assert run_queue == [] - assert waiting_queue == [] - - run_queue = [("document_builder", document_builder)] - waiting_queue = [("document_builder", document_builder)] - _dequeue_component(("document_joiner", document_joiner), run_queue, waiting_queue) - assert run_queue == [("document_builder", document_builder)] - assert waiting_queue == [("document_builder", document_builder)] - - def test__add_missing_input_defaults(self): - name = "prompt_builder" - prompt_builder = PromptBuilder(template="{{ questions | join('\n') }}") - components_inputs = {} - _add_missing_input_defaults(name, prompt_builder, components_inputs) - assert components_inputs == {"prompt_builder": {"questions": "", "template": None, "template_variables": None}} - - name = "answer_builder" - answer_builder = AnswerBuilder() - components_inputs = {"answer_builder": {"query": "What is the answer?"}} - _add_missing_input_defaults(name, answer_builder, components_inputs) - assert components_inputs == { - "answer_builder": { - "query": "What is the answer?", - "meta": None, - "documents": None, - "pattern": None, - "reference_pattern": None, - } - } - - name = "branch_joiner" - branch_joiner = BranchJoiner(int) - components_inputs = {} - _add_missing_input_defaults(name, branch_joiner, components_inputs) - assert components_inputs == {"branch_joiner": {}} - - def test__find_next_runnable_lazy_variadic_or_default_component(self): - document_builder = component_class( - "DocumentBuilder", input_types={"text": str}, output_types={"doc": Document} - )() - document_joiner = component_class("DocumentJoiner", input_types={"docs": Variadic[Document]})() - prompt_builder = PromptBuilder(template="{{ questions | join('\n') }}") - pipe = Pipeline() - - waiting_queue = [("document_builder", document_builder)] - pair = pipe._find_next_runnable_lazy_variadic_or_default_component(waiting_queue) - assert pair == ("document_builder", document_builder) - - waiting_queue = [("document_joiner", document_joiner)] - pair = pipe._find_next_runnable_lazy_variadic_or_default_component(waiting_queue) - assert pair == ("document_joiner", document_joiner) - - waiting_queue = [("prompt_builder", prompt_builder)] - pair = pipe._find_next_runnable_lazy_variadic_or_default_component(waiting_queue) - assert pair == ("prompt_builder", prompt_builder) - - waiting_queue = [ - ("document_builder", document_builder), - ("document_joiner", document_joiner), - ("prompt_builder", prompt_builder), - ] - pair = pipe._find_next_runnable_lazy_variadic_or_default_component(waiting_queue) - assert pair == ("document_joiner", document_joiner) - - waiting_queue = [ - ("prompt_builder", prompt_builder), - ("document_builder", document_builder), - ("document_joiner", document_joiner), - ] - pair = pipe._find_next_runnable_lazy_variadic_or_default_component(waiting_queue) - assert pair == ("prompt_builder", prompt_builder) - - waiting_queue = [ - ("document_builder", document_builder), - ("document_joiner", document_joiner), - ("prompt_builder", prompt_builder), - ] - pair = pipe._find_next_runnable_lazy_variadic_or_default_component(waiting_queue) - assert pair == ("document_joiner", document_joiner) - - waiting_queue = [ - ("document_builder", document_builder), - ("prompt_builder", prompt_builder), - ("document_joiner", document_joiner), - ] - pair = pipe._find_next_runnable_lazy_variadic_or_default_component(waiting_queue) - assert pair == ("prompt_builder", prompt_builder) - - def test__enqueue_waiting_component(self): - document_builder = component_class( - "DocumentBuilder", input_types={"text": str}, output_types={"doc": Document} - )() - document_joiner = component_class("DocumentJoiner", input_types={"docs": Variadic[Document]})() - - waiting_queue = [] - _enqueue_waiting_component(("document_builder", document_builder), waiting_queue) - assert waiting_queue == [("document_builder", document_builder)] - - waiting_queue = [("document_builder", document_builder)] - _enqueue_waiting_component(("document_builder", document_builder), waiting_queue) - assert waiting_queue == [("document_builder", document_builder)] - - waiting_queue = [("document_joiner", document_joiner)] - _enqueue_waiting_component(("document_builder", document_builder), waiting_queue) - assert waiting_queue == [("document_joiner", document_joiner), ("document_builder", document_builder)] - - waiting_queue = [("document_builder", document_builder), ("document_joiner", document_joiner)] - _enqueue_waiting_component(("document_builder", document_builder), waiting_queue) - assert waiting_queue == [("document_builder", document_builder), ("document_joiner", document_joiner)] - - def test__dequeue_waiting_component(self): - document_builder = component_class( - "DocumentBuilder", input_types={"text": str}, output_types={"doc": Document} - )() - document_joiner = component_class("DocumentJoiner", input_types={"docs": Variadic[Document]})() - - waiting_queue = [] - _dequeue_waiting_component(("document_builder", document_builder), waiting_queue) - assert waiting_queue == [] - - waiting_queue = [("document_builder", document_builder)] - _dequeue_waiting_component(("document_builder", document_builder), waiting_queue) - assert waiting_queue == [] - - waiting_queue = [("document_joiner", document_joiner)] - _dequeue_waiting_component(("document_builder", document_builder), waiting_queue) - assert waiting_queue == [("document_joiner", document_joiner)] - - waiting_queue = [("document_builder", document_builder), ("document_joiner", document_joiner)] - _dequeue_waiting_component(("document_builder", document_builder), waiting_queue) - assert waiting_queue == [("document_joiner", document_joiner)] - - def test__is_lazy_variadic(self): - VariadicAndGreedyVariadic = component_class( - "VariadicAndGreedyVariadic", input_types={"variadic": Variadic[int], "greedy_variadic": GreedyVariadic[int]} - ) - NonVariadic = component_class("NonVariadic", input_types={"value": int}) - VariadicNonGreedyVariadic = component_class( - "VariadicNonGreedyVariadic", input_types={"variadic": Variadic[int]} - ) - NonVariadicAndGreedyVariadic = component_class( - "NonVariadicAndGreedyVariadic", input_types={"greedy_variadic": GreedyVariadic[int]} - ) - assert not _is_lazy_variadic(VariadicAndGreedyVariadic()) - assert not _is_lazy_variadic(NonVariadic()) - assert _is_lazy_variadic(VariadicNonGreedyVariadic()) - assert not _is_lazy_variadic(NonVariadicAndGreedyVariadic()) - - def test__find_receivers_from(self): - sentence_builder = component_class( - "SentenceBuilder", input_types={"words": List[str]}, output_types={"text": str} - )() - document_builder = component_class( - "DocumentBuilder", input_types={"text": str}, output_types={"doc": Document} - )() - conditional_document_builder = component_class( - "ConditionalDocumentBuilder", output_types={"doc": Document, "noop": None} - )() - - document_joiner = component_class("DocumentJoiner", input_types={"docs": Variadic[Document]})() - - pipe = Pipeline() - pipe.add_component("sentence_builder", sentence_builder) - pipe.add_component("document_builder", document_builder) - pipe.add_component("document_joiner", document_joiner) - pipe.add_component("conditional_document_builder", conditional_document_builder) - pipe.connect("sentence_builder.text", "document_builder.text") - pipe.connect("document_builder.doc", "document_joiner.docs") - pipe.connect("conditional_document_builder.doc", "document_joiner.docs") - - res = pipe._find_receivers_from("sentence_builder") - assert res == [ - ( - "document_builder", - OutputSocket(name="text", type=str, receivers=["document_builder"]), - InputSocket(name="text", type=str, default_value=_empty, senders=["sentence_builder"]), + with pytest.raises(PipelineRuntimeError) as exc_info: + pp._run_component( + component=pp._get_component_with_graph_metadata_and_visits("wrong", 0), + inputs=inputs, + component_visits={"wrong": 0}, ) - ] - res = pipe._find_receivers_from("document_builder") - assert res == [ - ( - "document_joiner", - OutputSocket(name="doc", type=Document, receivers=["document_joiner"]), - InputSocket( - name="docs", - type=Variadic[Document], - default_value=_empty, - senders=["document_builder", "conditional_document_builder"], - ), - ) - ] - - res = pipe._find_receivers_from("document_joiner") - assert res == [] - - res = pipe._find_receivers_from("conditional_document_builder") - assert res == [ - ( - "document_joiner", - OutputSocket(name="doc", type=Document, receivers=["document_joiner"]), - InputSocket( - name="docs", - type=Variadic[Document], - default_value=_empty, - senders=["document_builder", "conditional_document_builder"], - ), - ) - ] + assert "didn't return a dictionary" in str(exc_info.value) diff --git a/test/core/pipeline/test_pipeline_base.py b/test/core/pipeline/test_pipeline_base.py new file mode 100644 index 0000000000..279ed2fa9b --- /dev/null +++ b/test/core/pipeline/test_pipeline_base.py @@ -0,0 +1,1627 @@ +# SPDX-FileCopyrightText: 2022-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +import logging +from typing import List, Optional +from unittest.mock import patch + +import pytest + +from haystack import Document +from haystack.core.component import component +from haystack.core.component.types import InputSocket, OutputSocket, Variadic, GreedyVariadic, _empty +from haystack.core.errors import ( + DeserializationError, + PipelineConnectError, + PipelineDrawingError, + PipelineError, + PipelineMaxComponentRuns, +) +from haystack.core.pipeline import PredefinedPipeline +from haystack.core.pipeline.base import PipelineBase +from haystack.core.pipeline.base import ComponentPriority, _NO_OUTPUT_PRODUCED +from haystack.core.pipeline.utils import FIFOPriorityQueue + +from haystack.core.serialization import DeserializationCallbacks +from haystack.testing.factory import component_class +from haystack.testing.sample_components import AddFixedValue, Double, Greet + +logging.basicConfig(level=logging.DEBUG) + + +@component +class FakeComponent: + def __init__(self, an_init_param: Optional[str] = None): + pass + + @component.output_types(value=str) + def run(self, input_: str): + return {"value": input_} + + +@component +class FakeComponentSquared: + def __init__(self, an_init_param: Optional[str] = None): + self.an_init_param = an_init_param + self.inner = FakeComponent() + + @component.output_types(value=str) + def run(self, input_: str): + return {"value": input_} + + +@pytest.fixture +def regular_output_socket(): + """Output socket for a regular (non-variadic) connection with receivers""" + return OutputSocket("output1", int, receivers=["receiver1", "receiver2"]) + + +@pytest.fixture +def regular_input_socket(): + """Regular (non-variadic) input socket with a single sender""" + return InputSocket("input1", int, senders=["sender1"]) + + +@pytest.fixture +def lazy_variadic_input_socket(): + """Lazy variadic input socket with multiple senders""" + return InputSocket("variadic_input", Variadic[int], senders=["sender1", "sender2"]) + + +class TestPipelineBase: + """ + This class contains only unit tests for the PipelineBase class. + It doesn't test Pipeline.run(), that is done separately in a different way. + """ + + def test_pipeline_dumps(self, test_files_path): + pipeline = PipelineBase(max_runs_per_component=99) + pipeline.add_component("Comp1", FakeComponent("Foo")) + pipeline.add_component("Comp2", FakeComponent()) + pipeline.connect("Comp1.value", "Comp2.input_") + result = pipeline.dumps() + with open(f"{test_files_path}/yaml/test_pipeline.yaml", "r") as f: + assert f.read() == result + + def test_pipeline_loads_invalid_data(self): + invalid_yaml = """components: + Comp1: + init_parameters: + an_init_param: null + type: test.core.pipeline.test_pipeline_base.FakeComponent + Comp2* + init_parameters: + an_init_param: null + type: test.core.pipeline.test_pipeline_base.FakeComponent + connections: + * receiver: Comp2.input_ + sender: Comp1.value + metadata: + """ + + with pytest.raises(DeserializationError, match="unmarshalling serialized"): + pipeline = PipelineBase.loads(invalid_yaml) + + invalid_init_parameter_yaml = """components: + Comp1: + init_parameters: + unknown: null + type: test.core.pipeline.test_pipeline_base.FakeComponent + Comp2: + init_parameters: + an_init_param: null + type: test.core.pipeline.test_pipeline_base.FakeComponent + connections: + - receiver: Comp2.input_ + sender: Comp1.value + metadata: {} + """ + + with pytest.raises(DeserializationError, match=".*Comp1.*unknown.*"): + pipeline = PipelineBase.loads(invalid_init_parameter_yaml) + + def test_pipeline_dump(self, test_files_path, tmp_path): + pipeline = PipelineBase(max_runs_per_component=99) + pipeline.add_component("Comp1", FakeComponent("Foo")) + pipeline.add_component("Comp2", FakeComponent()) + pipeline.connect("Comp1.value", "Comp2.input_") + with open(tmp_path / "out.yaml", "w") as f: + pipeline.dump(f) + # re-open and ensure it's the same data as the test file + with open(f"{test_files_path}/yaml/test_pipeline.yaml", "r") as test_f, open(tmp_path / "out.yaml", "r") as f: + assert f.read() == test_f.read() + + def test_pipeline_load(self, test_files_path): + with open(f"{test_files_path}/yaml/test_pipeline.yaml", "r") as f: + pipeline = PipelineBase.load(f) + assert pipeline._max_runs_per_component == 99 + assert isinstance(pipeline.get_component("Comp1"), FakeComponent) + assert isinstance(pipeline.get_component("Comp2"), FakeComponent) + + @patch("haystack.core.pipeline.base._to_mermaid_image") + @patch("haystack.core.pipeline.base.is_in_jupyter") + @patch("IPython.display.Image") + @patch("IPython.display.display") + def test_show_in_notebook( + self, mock_ipython_display, mock_ipython_image, mock_is_in_jupyter, mock_to_mermaid_image + ): + pipe = PipelineBase() + + mock_to_mermaid_image.return_value = b"some_image_data" + mock_is_in_jupyter.return_value = True + + pipe.show() + mock_ipython_image.assert_called_once_with(b"some_image_data") + mock_ipython_display.assert_called_once() + + @patch("haystack.core.pipeline.base.is_in_jupyter") + def test_show_not_in_notebook(self, mock_is_in_jupyter): + pipe = PipelineBase() + + mock_is_in_jupyter.return_value = False + + with pytest.raises(PipelineDrawingError): + pipe.show() + + @patch("haystack.core.pipeline.base._to_mermaid_image") + def test_draw(self, mock_to_mermaid_image, tmp_path): + pipe = PipelineBase() + mock_to_mermaid_image.return_value = b"some_image_data" + + image_path = tmp_path / "test.png" + pipe.draw(path=image_path) + assert image_path.read_bytes() == mock_to_mermaid_image.return_value + + # UNIT + def test_add_component_to_different_pipelines(self): + first_pipe = PipelineBase() + second_pipe = PipelineBase() + some_component = component_class("Some")() + + assert some_component.__haystack_added_to_pipeline__ is None + first_pipe.add_component("some", some_component) + assert some_component.__haystack_added_to_pipeline__ is first_pipe + + with pytest.raises(PipelineError): + second_pipe.add_component("some", some_component) + + def test_remove_component_raises_if_invalid_component_name(self): + pipe = PipelineBase() + component = component_class("Some")() + + pipe.add_component("1", component) + + with pytest.raises(ValueError): + pipe.remove_component("2") + + def test_remove_component_removes_component_and_its_edges(self): + pipe = PipelineBase() + component_1 = component_class("Type1")() + component_2 = component_class("Type2")() + component_3 = component_class("Type3")() + component_4 = component_class("Type4")() + + pipe.add_component("1", component_1) + pipe.add_component("2", component_2) + pipe.add_component("3", component_3) + pipe.add_component("4", component_4) + + pipe.connect("1", "2") + pipe.connect("2", "3") + pipe.connect("3", "4") + + pipe.remove_component("2") + + assert ["1", "3", "4"] == sorted(pipe.graph.nodes) + assert [("3", "4")] == sorted([(u, v) for (u, v) in pipe.graph.edges()]) + + def test_remove_component_allows_you_to_reuse_the_component(self): + pipe = PipelineBase() + Some = component_class("Some", input_types={"in": int}, output_types={"out": int}) + + pipe.add_component("component_1", Some()) + pipe.add_component("component_2", Some()) + pipe.add_component("component_3", Some()) + pipe.connect("component_1", "component_2") + pipe.connect("component_2", "component_3") + component_2 = pipe.remove_component("component_2") + + assert component_2.__haystack_added_to_pipeline__ is None + assert component_2.__haystack_input__._sockets_dict == {"in": InputSocket(name="in", type=int, senders=[])} + assert component_2.__haystack_output__._sockets_dict == { + "out": OutputSocket(name="out", type=int, receivers=[]) + } + + pipe2 = PipelineBase() + pipe2.add_component("component_4", Some()) + pipe2.add_component("component_2", component_2) + pipe2.add_component("component_5", Some()) + + pipe2.connect("component_4", "component_2") + pipe2.connect("component_2", "component_5") + assert component_2.__haystack_added_to_pipeline__ is pipe2 + assert component_2.__haystack_input__._sockets_dict == { + "in": InputSocket(name="in", type=int, senders=["component_4"]) + } + assert component_2.__haystack_output__._sockets_dict == { + "out": OutputSocket(name="out", type=int, receivers=["component_5"]) + } + + # instance = pipe2.get_component("some") + # assert instance == component + + # UNIT + def test_get_component_name(self): + pipe = PipelineBase() + some_component = component_class("Some")() + pipe.add_component("some", some_component) + + assert pipe.get_component_name(some_component) == "some" + + # UNIT + def test_get_component_name_not_added_to_pipeline(self): + pipe = PipelineBase() + some_component = component_class("Some")() + + assert pipe.get_component_name(some_component) == "" + + # UNIT + def test_repr(self): + pipe = PipelineBase(metadata={"test": "test"}) + pipe.add_component("add_two", AddFixedValue(add=2)) + pipe.add_component("add_default", AddFixedValue()) + pipe.add_component("double", Double()) + pipe.connect("add_two", "double") + pipe.connect("double", "add_default") + + expected_repr = ( + f"{object.__repr__(pipe)}\n" + "🧱 Metadata\n" + " - test: test\n" + "🚅 Components\n" + " - add_two: AddFixedValue\n" + " - add_default: AddFixedValue\n" + " - double: Double\n" + "🛤️ Connections\n" + " - add_two.result -> double.value (int)\n" + " - double.value -> add_default.value (int)\n" + ) + + assert repr(pipe) == expected_repr + + # UNIT + def test_to_dict(self): + add_two = AddFixedValue(add=2) + add_default = AddFixedValue() + double = Double() + pipe = PipelineBase(metadata={"test": "test"}, max_runs_per_component=42) + pipe.add_component("add_two", add_two) + pipe.add_component("add_default", add_default) + pipe.add_component("double", double) + pipe.connect("add_two", "double") + pipe.connect("double", "add_default") + + res = pipe.to_dict() + expected = { + "metadata": {"test": "test"}, + "max_runs_per_component": 42, + "components": { + "add_two": { + "type": "haystack.testing.sample_components.add_value.AddFixedValue", + "init_parameters": {"add": 2}, + }, + "add_default": { + "type": "haystack.testing.sample_components.add_value.AddFixedValue", + "init_parameters": {"add": 1}, + }, + "double": {"type": "haystack.testing.sample_components.double.Double", "init_parameters": {}}, + }, + "connections": [ + {"sender": "add_two.result", "receiver": "double.value"}, + {"sender": "double.value", "receiver": "add_default.value"}, + ], + } + assert res == expected + + def test_from_dict(self): + data = { + "metadata": {"test": "test"}, + "max_runs_per_component": 101, + "components": { + "add_two": { + "type": "haystack.testing.sample_components.add_value.AddFixedValue", + "init_parameters": {"add": 2}, + }, + "add_default": { + "type": "haystack.testing.sample_components.add_value.AddFixedValue", + "init_parameters": {"add": 1}, + }, + "double": {"type": "haystack.testing.sample_components.double.Double", "init_parameters": {}}, + }, + "connections": [ + {"sender": "add_two.result", "receiver": "double.value"}, + {"sender": "double.value", "receiver": "add_default.value"}, + ], + } + pipe = PipelineBase.from_dict(data) + + assert pipe.metadata == {"test": "test"} + assert pipe._max_runs_per_component == 101 + + # Components + assert len(pipe.graph.nodes) == 3 + ## add_two + add_two = pipe.graph.nodes["add_two"] + assert add_two["instance"].add == 2 + assert add_two["input_sockets"] == { + "value": InputSocket(name="value", type=int), + "add": InputSocket(name="add", type=Optional[int], default_value=None), + } + assert add_two["output_sockets"] == {"result": OutputSocket(name="result", type=int, receivers=["double"])} + assert add_two["visits"] == 0 + + ## add_default + add_default = pipe.graph.nodes["add_default"] + assert add_default["instance"].add == 1 + assert add_default["input_sockets"] == { + "value": InputSocket(name="value", type=int, senders=["double"]), + "add": InputSocket(name="add", type=Optional[int], default_value=None), + } + assert add_default["output_sockets"] == {"result": OutputSocket(name="result", type=int)} + assert add_default["visits"] == 0 + + ## double + double = pipe.graph.nodes["double"] + assert double["instance"] + assert double["input_sockets"] == {"value": InputSocket(name="value", type=int, senders=["add_two"])} + assert double["output_sockets"] == {"value": OutputSocket(name="value", type=int, receivers=["add_default"])} + assert double["visits"] == 0 + + # Connections + connections = list(pipe.graph.edges(data=True)) + assert len(connections) == 2 + assert connections[0] == ( + "add_two", + "double", + { + "conn_type": "int", + "from_socket": OutputSocket(name="result", type=int, receivers=["double"]), + "to_socket": InputSocket(name="value", type=int, senders=["add_two"]), + "mandatory": True, + }, + ) + assert connections[1] == ( + "double", + "add_default", + { + "conn_type": "int", + "from_socket": OutputSocket(name="value", type=int, receivers=["add_default"]), + "to_socket": InputSocket(name="value", type=int, senders=["double"]), + "mandatory": True, + }, + ) + + # TODO: Remove this, this should be a component test. + # The pipeline can't handle this in any case nor way. + def test_from_dict_with_callbacks(self): + data = { + "metadata": {"test": "test"}, + "components": { + "add_two": { + "type": "haystack.testing.sample_components.add_value.AddFixedValue", + "init_parameters": {"add": 2}, + }, + "add_default": { + "type": "haystack.testing.sample_components.add_value.AddFixedValue", + "init_parameters": {"add": 1}, + }, + "double": {"type": "haystack.testing.sample_components.double.Double", "init_parameters": {}}, + "greet": { + "type": "haystack.testing.sample_components.greet.Greet", + "init_parameters": {"message": "test"}, + }, + }, + "connections": [ + {"sender": "add_two.result", "receiver": "double.value"}, + {"sender": "double.value", "receiver": "add_default.value"}, + ], + } + + components_seen_in_callback = [] + + def component_pre_init_callback(name, component_cls, init_params): + assert name in ["add_two", "add_default", "double", "greet"] + assert component_cls in [AddFixedValue, Double, Greet] + + if name == "add_two": + assert init_params == {"add": 2} + elif name == "add_default": + assert init_params == {"add": 1} + elif name == "greet": + assert init_params == {"message": "test"} + + components_seen_in_callback.append(name) + + pipe = PipelineBase.from_dict( + data, callbacks=DeserializationCallbacks(component_pre_init=component_pre_init_callback) + ) + assert components_seen_in_callback == ["add_two", "add_default", "double", "greet"] + add_two = pipe.graph.nodes["add_two"]["instance"] + assert add_two.add == 2 + add_default = pipe.graph.nodes["add_default"]["instance"] + assert add_default.add == 1 + greet = pipe.graph.nodes["greet"]["instance"] + assert greet.message == "test" + assert greet.log_level == "INFO" + + def component_pre_init_callback_modify(name, component_cls, init_params): + assert name in ["add_two", "add_default", "double", "greet"] + assert component_cls in [AddFixedValue, Double, Greet] + + if name == "add_two": + init_params["add"] = 3 + elif name == "add_default": + init_params["add"] = 0 + elif name == "greet": + init_params["message"] = "modified test" + init_params["log_level"] = "DEBUG" + + pipe = PipelineBase.from_dict( + data, callbacks=DeserializationCallbacks(component_pre_init=component_pre_init_callback_modify) + ) + add_two = pipe.graph.nodes["add_two"]["instance"] + assert add_two.add == 3 + add_default = pipe.graph.nodes["add_default"]["instance"] + assert add_default.add == 0 + greet = pipe.graph.nodes["greet"]["instance"] + assert greet.message == "modified test" + assert greet.log_level == "DEBUG" + + # Test with a component that internally instantiates another component + def component_pre_init_callback_check_class(name, component_cls, init_params): + assert name == "fake_component_squared" + assert component_cls == FakeComponentSquared + + pipe = PipelineBase() + pipe.add_component("fake_component_squared", FakeComponentSquared()) + pipe = PipelineBase.from_dict( + pipe.to_dict(), + callbacks=DeserializationCallbacks(component_pre_init=component_pre_init_callback_check_class), + ) + assert type(pipe.graph.nodes["fake_component_squared"]["instance"].inner) == FakeComponent + + # UNIT + def test_from_dict_with_empty_dict(self): + assert PipelineBase() == PipelineBase.from_dict({}) + + # TODO: UNIT, consider deprecating this argument + def test_from_dict_with_components_instances(self): + add_two = AddFixedValue(add=2) + add_default = AddFixedValue() + components = {"add_two": add_two, "add_default": add_default} + data = { + "metadata": {"test": "test"}, + "components": { + "add_two": {}, + "add_default": {}, + "double": {"type": "haystack.testing.sample_components.double.Double", "init_parameters": {}}, + }, + "connections": [ + {"sender": "add_two.result", "receiver": "double.value"}, + {"sender": "double.value", "receiver": "add_default.value"}, + ], + } + pipe = PipelineBase.from_dict(data, components=components) + assert pipe.metadata == {"test": "test"} + + # Components + assert len(pipe.graph.nodes) == 3 + ## add_two + add_two_data = pipe.graph.nodes["add_two"] + assert add_two_data["instance"] is add_two + assert add_two_data["instance"].add == 2 + assert add_two_data["input_sockets"] == { + "value": InputSocket(name="value", type=int), + "add": InputSocket(name="add", type=Optional[int], default_value=None), + } + assert add_two_data["output_sockets"] == {"result": OutputSocket(name="result", type=int, receivers=["double"])} + assert add_two_data["visits"] == 0 + + ## add_default + add_default_data = pipe.graph.nodes["add_default"] + assert add_default_data["instance"] is add_default + assert add_default_data["instance"].add == 1 + assert add_default_data["input_sockets"] == { + "value": InputSocket(name="value", type=int, senders=["double"]), + "add": InputSocket(name="add", type=Optional[int], default_value=None), + } + assert add_default_data["output_sockets"] == {"result": OutputSocket(name="result", type=int, receivers=[])} + assert add_default_data["visits"] == 0 + + ## double + double = pipe.graph.nodes["double"] + assert double["instance"] + assert double["input_sockets"] == {"value": InputSocket(name="value", type=int, senders=["add_two"])} + assert double["output_sockets"] == {"value": OutputSocket(name="value", type=int, receivers=["add_default"])} + assert double["visits"] == 0 + + # Connections + connections = list(pipe.graph.edges(data=True)) + assert len(connections) == 2 + assert connections[0] == ( + "add_two", + "double", + { + "conn_type": "int", + "from_socket": OutputSocket(name="result", type=int, receivers=["double"]), + "to_socket": InputSocket(name="value", type=int, senders=["add_two"]), + "mandatory": True, + }, + ) + assert connections[1] == ( + "double", + "add_default", + { + "conn_type": "int", + "from_socket": OutputSocket(name="value", type=int, receivers=["add_default"]), + "to_socket": InputSocket(name="value", type=int, senders=["double"]), + "mandatory": True, + }, + ) + + # UNIT + def test_from_dict_without_component_type(self): + data = { + "metadata": {"test": "test"}, + "components": {"add_two": {"init_parameters": {"add": 2}}}, + "connections": [], + } + with pytest.raises(PipelineError) as err: + PipelineBase.from_dict(data) + + err.match("Missing 'type' in component 'add_two'") + + # UNIT + def test_from_dict_without_registered_component_type(self): + data = { + "metadata": {"test": "test"}, + "components": {"add_two": {"type": "foo.bar.baz", "init_parameters": {"add": 2}}}, + "connections": [], + } + with pytest.raises(PipelineError) as err: + PipelineBase.from_dict(data) + + err.match(r"Component .+ not imported.") + + def test_from_dict_with_invalid_type(self): + data = { + "metadata": {"test": "test"}, + "components": {"add_two": {"type": "", "init_parameters": {"add": 2}}}, + "connections": [], + } + with pytest.raises(PipelineError) as err: + PipelineBase.from_dict(data) + + err.match(r"Component '' \(name: 'add_two'\) not imported.") + + # UNIT + def test_from_dict_without_connection_sender(self): + data = {"metadata": {"test": "test"}, "components": {}, "connections": [{"receiver": "some.receiver"}]} + with pytest.raises(PipelineError) as err: + PipelineBase.from_dict(data) + + err.match("Missing sender in connection: {'receiver': 'some.receiver'}") + + # UNIT + def test_from_dict_without_connection_receiver(self): + data = {"metadata": {"test": "test"}, "components": {}, "connections": [{"sender": "some.sender"}]} + with pytest.raises(PipelineError) as err: + PipelineBase.from_dict(data) + + err.match("Missing receiver in connection: {'sender': 'some.sender'}") + + def test_describe_input_only_no_inputs_components(self): + A = component_class("A", input_types={}, output={"x": 0}) + B = component_class("B", input_types={}, output={"y": 0}) + C = component_class("C", input_types={"x": int, "y": int}, output={"z": 0}) + p = PipelineBase() + p.add_component("a", A()) + p.add_component("b", B()) + p.add_component("c", C()) + p.connect("a.x", "c.x") + p.connect("b.y", "c.y") + assert p.inputs() == {} + assert p.inputs(include_components_with_connected_inputs=True) == { + "c": {"x": {"type": int, "is_mandatory": True}, "y": {"type": int, "is_mandatory": True}} + } + + def test_describe_input_some_components_with_no_inputs(self): + A = component_class("A", input_types={}, output={"x": 0}) + B = component_class("B", input_types={"y": int}, output={"y": 0}) + C = component_class("C", input_types={"x": int, "y": int}, output={"z": 0}) + p = PipelineBase() + p.add_component("a", A()) + p.add_component("b", B()) + p.add_component("c", C()) + p.connect("a.x", "c.x") + p.connect("b.y", "c.y") + assert p.inputs() == {"b": {"y": {"type": int, "is_mandatory": True}}} + assert p.inputs(include_components_with_connected_inputs=True) == { + "b": {"y": {"type": int, "is_mandatory": True}}, + "c": {"x": {"type": int, "is_mandatory": True}, "y": {"type": int, "is_mandatory": True}}, + } + + def test_describe_input_all_components_have_inputs(self): + A = component_class("A", input_types={"x": Optional[int]}, output={"x": 0}) + B = component_class("B", input_types={"y": int}, output={"y": 0}) + C = component_class("C", input_types={"x": int, "y": int}, output={"z": 0}) + p = PipelineBase() + p.add_component("a", A()) + p.add_component("b", B()) + p.add_component("c", C()) + p.connect("a.x", "c.x") + p.connect("b.y", "c.y") + assert p.inputs() == { + "a": {"x": {"type": Optional[int], "is_mandatory": True}}, + "b": {"y": {"type": int, "is_mandatory": True}}, + } + assert p.inputs(include_components_with_connected_inputs=True) == { + "a": {"x": {"type": Optional[int], "is_mandatory": True}}, + "b": {"y": {"type": int, "is_mandatory": True}}, + "c": {"x": {"type": int, "is_mandatory": True}, "y": {"type": int, "is_mandatory": True}}, + } + + def test_describe_output_multiple_possible(self): + """ + This pipeline has two outputs: + {"b": {"output_b": {"type": str}}, "a": {"output_a": {"type": str}}} + """ + A = component_class("A", input_types={"input_a": str}, output={"output_a": "str", "output_b": "str"}) + B = component_class("B", input_types={"input_b": str}, output={"output_b": "str"}) + + pipe = PipelineBase() + pipe.add_component("a", A()) + pipe.add_component("b", B()) + pipe.connect("a.output_b", "b.input_b") + + assert pipe.outputs() == {"b": {"output_b": {"type": str}}, "a": {"output_a": {"type": str}}} + assert pipe.outputs(include_components_with_connected_outputs=True) == { + "a": {"output_a": {"type": str}, "output_b": {"type": str}}, + "b": {"output_b": {"type": str}}, + } + + def test_describe_output_single(self): + """ + This pipeline has one output: + {"c": {"z": {"type": int}}} + """ + A = component_class("A", input_types={"x": Optional[int]}, output={"x": 0}) + B = component_class("B", input_types={"y": int}, output={"y": 0}) + C = component_class("C", input_types={"x": int, "y": int}, output={"z": 0}) + p = PipelineBase() + p.add_component("a", A()) + p.add_component("b", B()) + p.add_component("c", C()) + p.connect("a.x", "c.x") + p.connect("b.y", "c.y") + + assert p.outputs() == {"c": {"z": {"type": int}}} + assert p.outputs(include_components_with_connected_outputs=True) == { + "a": {"x": {"type": int}}, + "b": {"y": {"type": int}}, + "c": {"z": {"type": int}}, + } + + def test_describe_no_outputs(self): + """ + This pipeline sets up elaborate connections between three components but in fact it has no outputs: + Check that p.outputs() == {} + """ + A = component_class("A", input_types={"x": Optional[int]}, output={"x": 0}) + B = component_class("B", input_types={"y": int}, output={"y": 0}) + C = component_class("C", input_types={"x": int, "y": int}, output={}) + p = PipelineBase() + p.add_component("a", A()) + p.add_component("b", B()) + p.add_component("c", C()) + p.connect("a.x", "c.x") + p.connect("b.y", "c.y") + assert p.outputs() == {} + assert p.outputs(include_components_with_connected_outputs=True) == { + "a": {"x": {"type": int}}, + "b": {"y": {"type": int}}, + } + + def test_from_template(self, monkeypatch): + monkeypatch.setenv("OPENAI_API_KEY", "fake_key") + pipe = PipelineBase.from_template(PredefinedPipeline.INDEXING) + assert pipe.get_component("cleaner") + + def test_walk_pipeline_with_no_cycles(self): + """ + This pipeline has two source nodes, source1 and source2, one hello3 node in between, and one sink node, joiner. + pipeline.walk() should return each component exactly once. The order is not guaranteed. + """ + + @component + class Hello: + @component.output_types(output=str) + def run(self, word: str): + """ + Takes a string in input and returns "Hello, !" in output. + """ + return {"output": f"Hello, {word}!"} + + @component + class Joiner: + @component.output_types(output=str) + def run(self, word1: str, word2: str): + """ + Takes two strings in input and returns "Hello, and !" in output. + """ + return {"output": f"Hello, {word1} and {word2}!"} + + pipeline = PipelineBase() + source1 = Hello() + source2 = Hello() + hello3 = Hello() + joiner = Joiner() + pipeline.add_component("source1", source1) + pipeline.add_component("source2", source2) + pipeline.add_component("hello3", hello3) + pipeline.add_component("joiner", joiner) + + pipeline.connect("source1", "joiner.word1") + pipeline.connect("source2", "hello3") + pipeline.connect("hello3", "joiner.word2") + + expected_components = [("source1", source1), ("source2", source2), ("joiner", joiner), ("hello3", hello3)] + assert sorted(expected_components) == sorted(pipeline.walk()) + + def test_walk_pipeline_with_cycles(self): + """ + This pipeline consists of two components, which would run three times in a loop. + pipeline.walk() should return these components exactly once. The order is not guaranteed. + """ + + @component + class Hello: + def __init__(self): + self.iteration_counter = 0 + + @component.output_types(intermediate=str, final=str) + def run(self, word: str, intermediate: Optional[str] = None): + """ + Takes a string in input and returns "Hello, !" in output. + """ + if self.iteration_counter < 3: + self.iteration_counter += 1 + return {"intermediate": f"Hello, {intermediate or word}!"} + return {"final": f"Hello, {intermediate or word}!"} + + pipeline = PipelineBase() + hello = Hello() + hello_again = Hello() + pipeline.add_component("hello", hello) + pipeline.add_component("hello_again", hello_again) + pipeline.connect("hello.intermediate", "hello_again.intermediate") + pipeline.connect("hello_again.intermediate", "hello.intermediate") + assert {("hello", hello), ("hello_again", hello_again)} == set(pipeline.walk()) + + def test__prepare_component_input_data(self): + MockComponent = component_class("MockComponent", input_types={"x": List[str], "y": str}) + pipe = PipelineBase() + pipe.add_component("first_mock", MockComponent()) + pipe.add_component("second_mock", MockComponent()) + + res = pipe._prepare_component_input_data({"x": ["some data"], "y": "some other data"}) + assert res == { + "first_mock": {"x": ["some data"], "y": "some other data"}, + "second_mock": {"x": ["some data"], "y": "some other data"}, + } + assert id(res["first_mock"]["x"]) != id(res["second_mock"]["x"]) + + def test__prepare_component_input_data_with_connected_inputs(self): + MockComponent = component_class( + "MockComponent", input_types={"x": List[str], "y": str}, output_types={"z": str} + ) + pipe = PipelineBase() + pipe.add_component("first_mock", MockComponent()) + pipe.add_component("second_mock", MockComponent()) + pipe.connect("first_mock.z", "second_mock.y") + + res = pipe._prepare_component_input_data({"x": ["some data"], "y": "some other data"}) + assert res == {"first_mock": {"x": ["some data"], "y": "some other data"}, "second_mock": {"x": ["some data"]}} + assert id(res["first_mock"]["x"]) != id(res["second_mock"]["x"]) + + def test__prepare_component_input_data_with_non_existing_input(self, caplog): + pipe = PipelineBase() + res = pipe._prepare_component_input_data({"input_name": 1}) + assert res == {} + assert ( + "Inputs ['input_name'] were not matched to any component inputs, " + "please check your run parameters." in caplog.text + ) + + def test_connect(self): + comp1 = component_class("Comp1", output_types={"value": int})() + comp2 = component_class("Comp2", input_types={"value": int})() + pipe = PipelineBase() + pipe.add_component("comp1", comp1) + pipe.add_component("comp2", comp2) + assert pipe.connect("comp1.value", "comp2.value") is pipe + + assert comp1.__haystack_output__.value.receivers == ["comp2"] + assert comp2.__haystack_input__.value.senders == ["comp1"] + assert list(pipe.graph.edges) == [("comp1", "comp2", "value/value")] + + def test_connect_already_connected(self): + comp1 = component_class("Comp1", output_types={"value": int})() + comp2 = component_class("Comp2", input_types={"value": int})() + pipe = PipelineBase() + pipe.add_component("comp1", comp1) + pipe.add_component("comp2", comp2) + pipe.connect("comp1.value", "comp2.value") + pipe.connect("comp1.value", "comp2.value") + + assert comp1.__haystack_output__.value.receivers == ["comp2"] + assert comp2.__haystack_input__.value.senders == ["comp1"] + assert list(pipe.graph.edges) == [("comp1", "comp2", "value/value")] + + def test_connect_with_sender_component_name(self): + comp1 = component_class("Comp1", output_types={"value": int})() + comp2 = component_class("Comp2", input_types={"value": int})() + pipe = PipelineBase() + pipe.add_component("comp1", comp1) + pipe.add_component("comp2", comp2) + pipe.connect("comp1", "comp2.value") + + assert comp1.__haystack_output__.value.receivers == ["comp2"] + assert comp2.__haystack_input__.value.senders == ["comp1"] + assert list(pipe.graph.edges) == [("comp1", "comp2", "value/value")] + + def test_connect_with_receiver_component_name(self): + comp1 = component_class("Comp1", output_types={"value": int})() + comp2 = component_class("Comp2", input_types={"value": int})() + pipe = PipelineBase() + pipe.add_component("comp1", comp1) + pipe.add_component("comp2", comp2) + pipe.connect("comp1.value", "comp2") + + assert comp1.__haystack_output__.value.receivers == ["comp2"] + assert comp2.__haystack_input__.value.senders == ["comp1"] + assert list(pipe.graph.edges) == [("comp1", "comp2", "value/value")] + + def test_connect_with_sender_and_receiver_component_name(self): + comp1 = component_class("Comp1", output_types={"value": int})() + comp2 = component_class("Comp2", input_types={"value": int})() + pipe = PipelineBase() + pipe.add_component("comp1", comp1) + pipe.add_component("comp2", comp2) + pipe.connect("comp1", "comp2") + + assert comp1.__haystack_output__.value.receivers == ["comp2"] + assert comp2.__haystack_input__.value.senders == ["comp1"] + assert list(pipe.graph.edges) == [("comp1", "comp2", "value/value")] + + def test_connect_with_sender_not_in_pipeline(self): + comp2 = component_class("Comp2", input_types={"value": int})() + pipe = PipelineBase() + pipe.add_component("comp2", comp2) + with pytest.raises(ValueError): + pipe.connect("comp1.value", "comp2.value") + + def test_connect_with_receiver_not_in_pipeline(self): + comp1 = component_class("Comp1", output_types={"value": int})() + pipe = PipelineBase() + pipe.add_component("comp1", comp1) + with pytest.raises(ValueError): + pipe.connect("comp1.value", "comp2.value") + + def test_connect_with_sender_socket_name_not_in_pipeline(self): + comp1 = component_class("Comp1", output_types={"value": int})() + comp2 = component_class("Comp2", input_types={"value": int})() + pipe = PipelineBase() + pipe.add_component("comp1", comp1) + pipe.add_component("comp2", comp2) + with pytest.raises(PipelineConnectError): + pipe.connect("comp1.non_existing", "comp2.value") + + def test_connect_with_receiver_socket_name_not_in_pipeline(self): + comp1 = component_class("Comp1", output_types={"value": int})() + comp2 = component_class("Comp2", input_types={"value": int})() + pipe = PipelineBase() + pipe.add_component("comp1", comp1) + pipe.add_component("comp2", comp2) + with pytest.raises(PipelineConnectError): + pipe.connect("comp1.value", "comp2.non_existing") + + def test_connect_with_no_matching_types_and_same_names(self): + comp1 = component_class("Comp1", output_types={"value": int})() + comp2 = component_class("Comp2", input_types={"value": str})() + pipe = PipelineBase() + pipe.add_component("comp1", comp1) + pipe.add_component("comp2", comp2) + with pytest.raises(PipelineConnectError): + pipe.connect("comp1", "comp2") + + def test_connect_with_multiple_sender_connections_with_same_type_and_differing_name(self): + comp1 = component_class("Comp1", output_types={"val1": int, "val2": int})() + comp2 = component_class("Comp2", input_types={"value": int})() + pipe = PipelineBase() + pipe.add_component("comp1", comp1) + pipe.add_component("comp2", comp2) + with pytest.raises(PipelineConnectError): + pipe.connect("comp1", "comp2") + + def test_connect_with_multiple_receiver_connections_with_same_type_and_differing_name(self): + comp1 = component_class("Comp1", output_types={"value": int})() + comp2 = component_class("Comp2", input_types={"val1": int, "val2": int})() + pipe = PipelineBase() + pipe.add_component("comp1", comp1) + pipe.add_component("comp2", comp2) + with pytest.raises(PipelineConnectError): + pipe.connect("comp1", "comp2") + + def test_connect_with_multiple_sender_connections_with_same_type_and_same_name(self): + comp1 = component_class("Comp1", output_types={"value": int, "other": int})() + comp2 = component_class("Comp2", input_types={"value": int})() + pipe = PipelineBase() + pipe.add_component("comp1", comp1) + pipe.add_component("comp2", comp2) + pipe.connect("comp1", "comp2") + + assert comp1.__haystack_output__.value.receivers == ["comp2"] + assert comp2.__haystack_input__.value.senders == ["comp1"] + assert list(pipe.graph.edges) == [("comp1", "comp2", "value/value")] + + def test_connect_with_multiple_receiver_connections_with_same_type_and_same_name(self): + comp1 = component_class("Comp1", output_types={"value": int})() + comp2 = component_class("Comp2", input_types={"value": int, "other": int})() + pipe = PipelineBase() + pipe.add_component("comp1", comp1) + pipe.add_component("comp2", comp2) + pipe.connect("comp1", "comp2") + + assert comp1.__haystack_output__.value.receivers == ["comp2"] + assert comp2.__haystack_input__.value.senders == ["comp1"] + assert list(pipe.graph.edges) == [("comp1", "comp2", "value/value")] + + def test_connect_multiple_outputs_to_non_variadic_input(self): + comp1 = component_class("Comp1", output_types={"value": int})() + comp2 = component_class("Comp2", output_types={"value": int})() + comp3 = component_class("Comp3", input_types={"value": int})() + pipe = PipelineBase() + pipe.add_component("comp1", comp1) + pipe.add_component("comp2", comp2) + pipe.add_component("comp3", comp3) + pipe.connect("comp1.value", "comp3.value") + with pytest.raises(PipelineConnectError): + pipe.connect("comp2.value", "comp3.value") + + def test_connect_multiple_outputs_to_variadic_input(self): + comp1 = component_class("Comp1", output_types={"value": int})() + comp2 = component_class("Comp2", output_types={"value": int})() + comp3 = component_class("Comp3", input_types={"value": Variadic[int]})() + pipe = PipelineBase() + pipe.add_component("comp1", comp1) + pipe.add_component("comp2", comp2) + pipe.add_component("comp3", comp3) + pipe.connect("comp1.value", "comp3.value") + pipe.connect("comp2.value", "comp3.value") + + assert comp1.__haystack_output__.value.receivers == ["comp3"] + assert comp2.__haystack_output__.value.receivers == ["comp3"] + assert comp3.__haystack_input__.value.senders == ["comp1", "comp2"] + assert list(pipe.graph.edges) == [("comp1", "comp3", "value/value"), ("comp2", "comp3", "value/value")] + + def test_connect_same_component_as_sender_and_receiver(self): + """ + This pipeline consists of one component, which would be connected to itself. + Connecting a component to itself is raises PipelineConnectError. + """ + pipe = PipelineBase() + single_component = FakeComponent() + pipe.add_component("single_component", single_component) + with pytest.raises(PipelineConnectError): + pipe.connect("single_component.out", "single_component.in") + + @pytest.mark.parametrize( + "component_inputs,sockets,expected_inputs", + [ + ({"mandatory": 1}, {"mandatory": InputSocket("mandatory", int)}, {"mandatory": 1}), + ({}, {"optional": InputSocket("optional", str, default_value="test")}, {"optional": "test"}), + ( + {"mandatory": 1}, + { + "mandatory": InputSocket("mandatory", int), + "optional": InputSocket("optional", str, default_value="test"), + }, + {"mandatory": 1, "optional": "test"}, + ), + ( + {}, + {"optional_variadic": InputSocket("optional_variadic", Variadic[str], default_value="test")}, + {"optional_variadic": ["test"]}, + ), + ( + {}, + { + "optional_1": InputSocket("optional_1", int, default_value=1), + "optional_2": InputSocket("optional_2", int, default_value=2), + }, + {"optional_1": 1, "optional_2": 2}, + ), + ], + ids=["no-defaults", "only-default", "mixed-default", "variadic-default", "multiple_defaults"], + ) + def test__add_missing_defaults(self, component_inputs, sockets, expected_inputs): + filled_inputs = PipelineBase._add_missing_input_defaults(component_inputs, sockets) + + assert filled_inputs == expected_inputs + + def test__find_receivers_from(self): + sentence_builder = component_class( + "SentenceBuilder", input_types={"words": List[str]}, output_types={"text": str} + )() + document_builder = component_class( + "DocumentBuilder", input_types={"text": str}, output_types={"doc": Document} + )() + conditional_document_builder = component_class( + "ConditionalDocumentBuilder", output_types={"doc": Document, "noop": None} + )() + + document_joiner = component_class("DocumentJoiner", input_types={"docs": Variadic[Document]})() + + pipe = PipelineBase() + pipe.add_component("sentence_builder", sentence_builder) + pipe.add_component("document_builder", document_builder) + pipe.add_component("document_joiner", document_joiner) + pipe.add_component("conditional_document_builder", conditional_document_builder) + pipe.connect("sentence_builder.text", "document_builder.text") + pipe.connect("document_builder.doc", "document_joiner.docs") + pipe.connect("conditional_document_builder.doc", "document_joiner.docs") + + res = pipe._find_receivers_from("sentence_builder") + assert res == [ + ( + "document_builder", + OutputSocket(name="text", type=str, receivers=["document_builder"]), + InputSocket(name="text", type=str, default_value=_empty, senders=["sentence_builder"]), + ) + ] + + res = pipe._find_receivers_from("document_builder") + assert res == [ + ( + "document_joiner", + OutputSocket(name="doc", type=Document, receivers=["document_joiner"]), + InputSocket( + name="docs", + type=Variadic[Document], + default_value=_empty, + senders=["document_builder", "conditional_document_builder"], + ), + ) + ] + + res = pipe._find_receivers_from("document_joiner") + assert res == [] + + res = pipe._find_receivers_from("conditional_document_builder") + assert res == [ + ( + "document_joiner", + OutputSocket(name="doc", type=Document, receivers=["document_joiner"]), + InputSocket( + name="docs", + type=Variadic[Document], + default_value=_empty, + senders=["document_builder", "conditional_document_builder"], + ), + ) + ] + + @pytest.mark.parametrize( + "component, inputs, expected_priority, test_description", + [ + # Test case 1: BLOCKED - Missing mandatory input + ( + { + "instance": "mock_instance", + "visits": 0, + "input_sockets": { + "mandatory_input": InputSocket("mandatory_input", int), + "optional_input": InputSocket( + "optional_input", str, default_value="default", senders=["previous_component"] + ), + }, + }, + {"optional_input": [{"sender": "previous_component", "value": "test"}]}, + ComponentPriority.BLOCKED, + "Component should be BLOCKED when mandatory input is missing", + ), + # Test case 2: BLOCKED - No trigger after first visit + ( + { + "instance": "mock_instance", + "visits": 1, # Already visited + "input_sockets": { + "mandatory_input": InputSocket("mandatory_input", int), + "optional_input": InputSocket("optional_input", str, default_value="default"), + }, + }, + {"mandatory_input": [{"sender": None, "value": 42}]}, + ComponentPriority.BLOCKED, + "Component should be BLOCKED when there's no new trigger after first visit", + ), + # Test case 3: HIGHEST - Greedy socket ready + ( + { + "instance": "mock_instance", + "visits": 0, + "input_sockets": { + "greedy_input": InputSocket("greedy_input", GreedyVariadic[int], senders=["component1"]), + "normal_input": InputSocket("normal_input", str, senders=["component2"]), + }, + }, + { + "greedy_input": [{"sender": "component1", "value": 42}], + "normal_input": [{"sender": "component2", "value": "test"}], + }, + ComponentPriority.HIGHEST, + "Component should have HIGHEST priority when greedy socket has valid input", + ), + # Test case 4: DEFER - Greedy socket ready but optional missing + ( + { + "instance": "mock_instance", + "visits": 0, + "input_sockets": { + "greedy_input": InputSocket("greedy_input", GreedyVariadic[int], senders=["component1"]), + "optional_input": InputSocket( + "optional_input", str, senders=["component2"], default_value="test" + ), + }, + }, + {"greedy_input": [{"sender": "component1", "value": 42}]}, + ComponentPriority.DEFER, + "Component should DEFER when greedy socket has valid input but expected optional input is missing", + ), + # Test case 4: READY - All predecessors executed + ( + { + "instance": "mock_instance", + "visits": 0, + "input_sockets": { + "mandatory_input": InputSocket("mandatory_input", int, senders=["previous_component"]), + "optional_input": InputSocket( + "optional_input", str, senders=["another_component"], default_value="default" + ), + }, + }, + { + "mandatory_input": [{"sender": "previous_component", "value": 42}], + "optional_input": [{"sender": "another_component", "value": "test"}], + }, + ComponentPriority.READY, + "Component should be READY when all predecessors have executed", + ), + # Test case 5: DEFER - Lazy variadic sockets resolved and optional missing. + ( + { + "instance": "mock_instance", + "visits": 0, + "input_sockets": { + "variadic_input": InputSocket( + "variadic_input", Variadic[int], senders=["component1", "component2"] + ), + "normal_input": InputSocket("normal_input", str, senders=["component3"]), + "optional_input": InputSocket( + "optional_input", str, default_value="default", senders=["component4"] + ), + }, + }, + { + "variadic_input": [ + {"sender": "component1", "value": "test"}, + {"sender": "component2", "value": _NO_OUTPUT_PRODUCED}, + ], + "normal_input": [{"sender": "component3", "value": "test"}], + }, + ComponentPriority.DEFER, + "Component should DEFER when all lazy variadic sockets are resolved", + ), + # Test case 6: DEFER_LAST - Incomplete variadic inputs + ( + { + "instance": "mock_instance", + "visits": 0, + "input_sockets": { + "variadic_input": InputSocket( + "variadic_input", Variadic[int], senders=["component1", "component2"] + ), + "normal_input": InputSocket("normal_input", str), + }, + }, + { + "variadic_input": [{"sender": "component1", "value": 42}], # Missing component2 + "normal_input": [{"sender": "component3", "value": "test"}], + }, + ComponentPriority.DEFER_LAST, + "Component should be DEFER_LAST when not all variadic senders have produced output", + ), + # Test case 7: READY - No input sockets, first visit + ( + { + "instance": "mock_instance", + "visits": 0, + "input_sockets": {"optional_input": InputSocket("optional_input", str, default_value="default")}, + }, + {}, # no inputs + ComponentPriority.READY, + "Component should be READY on first visit when it has no input sockets", + ), + # Test case 8: BLOCKED - No connected input sockets, subsequent visit + ( + { + "instance": "mock_instance", + "visits": 1, + "input_sockets": {"optional_input": InputSocket("optional_input", str, default_value="default")}, + }, + {}, # no inputs + ComponentPriority.BLOCKED, + "Component should be BLOCKED on subsequent visits when it has no input sockets", + ), + ], + ids=lambda p: p.name if isinstance(p, ComponentPriority) else str(p), + ) + def test__calculate_priority(self, component, inputs, expected_priority, test_description): + """Test priority calculation for various component and input combinations.""" + # For variadic inputs, set up senders if needed + for socket in component["input_sockets"].values(): + if socket.is_variadic and not hasattr(socket, "senders"): + socket.senders = ["component1", "component2"] + + assert PipelineBase._calculate_priority(component, inputs) == expected_priority + + @pytest.mark.parametrize( + "pipeline_inputs,expected_output", + [ + # Test case 1: Empty input + ({}, {}), + # Test case 2: Single component, multiple inputs + ( + {"component1": {"input1": 42, "input2": "test", "input3": True}}, + { + "component1": { + "input1": [{"sender": None, "value": 42}], + "input2": [{"sender": None, "value": "test"}], + "input3": [{"sender": None, "value": True}], + } + }, + ), + # Test case 3: Multiple components + ( + { + "component1": {"input1": 42, "input2": "test"}, + "component2": {"input3": [1, 2, 3], "input4": {"key": "value"}}, + }, + { + "component1": { + "input1": [{"sender": None, "value": 42}], + "input2": [{"sender": None, "value": "test"}], + }, + "component2": { + "input3": [{"sender": None, "value": [1, 2, 3]}], + "input4": [{"sender": None, "value": {"key": "value"}}], + }, + }, + ), + ], + ids=["empty_input", "single_component_multiple_inputs", "multiple_components"], + ) + def test__convert_to_internal_format(self, pipeline_inputs, expected_output): + """Test conversion of legacy pipeline inputs to internal format.""" + result = PipelineBase._convert_to_internal_format(pipeline_inputs) + assert result == expected_output + + @pytest.mark.parametrize( + "socket_type,existing_inputs,expected_count", + [ + ("regular", None, 1), # Regular socket should overwrite + ("regular", [{"sender": "other", "value": 24}], 1), # Should still overwrite + ("lazy_variadic", None, 1), # First input to lazy variadic + ("lazy_variadic", [{"sender": "other", "value": 24}], 2), # Should append + ], + ids=["regular-new", "regular-existing", "variadic-new", "variadic-existing"], + ) + def test__write_component_outputs_different_sockets( + self, + socket_type, + existing_inputs, + expected_count, + regular_output_socket, + regular_input_socket, + lazy_variadic_input_socket, + ): + """Test writing to different socket types with various existing input states""" + receiver_socket = lazy_variadic_input_socket if socket_type == "lazy_variadic" else regular_input_socket + socket_name = receiver_socket.name + receivers = [("receiver1", regular_output_socket, receiver_socket)] + + inputs = {} + if existing_inputs: + inputs = {"receiver1": {socket_name: existing_inputs}} + + component_outputs = {"output1": 42} + + PipelineBase._write_component_outputs( + component_name="sender1", + component_outputs=component_outputs, + inputs=inputs, + receivers=receivers, + include_outputs_from=[], + ) + + assert len(inputs["receiver1"][socket_name]) == expected_count + assert {"sender": "sender1", "value": 42} in inputs["receiver1"][socket_name] + + @pytest.mark.parametrize( + "component_outputs,include_outputs,expected_pruned", + [ + ({"output1": 42, "output2": 24}, [], {"output2": 24}), # Prune consumed outputs only + ({"output1": 42, "output2": 24}, ["sender1"], {"output1": 42, "output2": 24}), # Keep all outputs + ({}, [], {}), # No outputs case + ], + ids=["prune-consumed", "keep-all", "no-outputs"], + ) + def test__write_component_outputs_output_pruning( + self, component_outputs, include_outputs, expected_pruned, regular_output_socket, regular_input_socket + ): + """Test output pruning behavior under different scenarios""" + receivers = [("receiver1", regular_output_socket, regular_input_socket)] + + pruned_outputs = PipelineBase._write_component_outputs( + component_name="sender1", + component_outputs=component_outputs, + inputs={}, + receivers=receivers, + include_outputs_from=include_outputs, + ) + + assert pruned_outputs == expected_pruned + + @pytest.mark.parametrize( + "output_value", + [42, None, _NO_OUTPUT_PRODUCED, "string_value", 3.14], + ids=["int", "none", "no-output", "string", "float"], + ) + def test__write_component_outputs_different_output_values( + self, output_value, regular_output_socket, regular_input_socket + ): + """Test handling of different output values""" + receivers = [("receiver1", regular_output_socket, regular_input_socket)] + component_outputs = {"output1": output_value} + inputs = {} + PipelineBase._write_component_outputs( + component_name="sender1", + component_outputs=component_outputs, + inputs=inputs, + receivers=receivers, + include_outputs_from=[], + ) + + assert inputs["receiver1"]["input1"] == [{"sender": "sender1", "value": output_value}] + + @pytest.mark.parametrize("receivers_count", [1, 2, 3], ids=["single-receiver", "two-receivers", "three-receivers"]) + def test__write_component_outputs_multiple_receivers( + self, receivers_count, regular_output_socket, regular_input_socket + ): + """Test writing to multiple receivers""" + receivers = [(f"receiver{i}", regular_output_socket, regular_input_socket) for i in range(receivers_count)] + component_outputs = {"output1": 42} + + inputs = {} + PipelineBase._write_component_outputs( + component_name="sender1", + component_outputs=component_outputs, + inputs=inputs, + receivers=receivers, + include_outputs_from=[], + ) + + for i in range(receivers_count): + receiver_name = f"receiver{i}" + assert receiver_name in inputs + assert inputs[receiver_name]["input1"] == [{"sender": "sender1", "value": 42}] + + def test__get_next_runnable_component_empty(self): + """Test with empty queue returns None""" + queue = FIFOPriorityQueue() + pipeline = PipelineBase() + result = pipeline._get_next_runnable_component(queue, component_visits={}) + assert result is None + + def test__get_next_runnable_component_blocked(self): + """Test component with BLOCKED priority returns None""" + pipeline = PipelineBase() + queue = FIFOPriorityQueue() + queue.push("blocked_component", ComponentPriority.BLOCKED) + result = pipeline._get_next_runnable_component(queue, component_visits={"blocked_component": 0}) + assert result is None + + @patch("haystack.core.pipeline.base.PipelineBase._get_component_with_graph_metadata_and_visits") + def test__get_next_runnable_component_max_visits(self, mock_get_component_with_graph_metadata_and_visits): + """Test component exceeding max visits raises exception""" + pipeline = PipelineBase(max_runs_per_component=2) + queue = FIFOPriorityQueue() + queue.push("ready_component", ComponentPriority.READY) + mock_get_component_with_graph_metadata_and_visits.return_value = {"instance": "test", "visits": 3} + + with pytest.raises(PipelineMaxComponentRuns) as exc_info: + pipeline._get_next_runnable_component(queue, component_visits={"ready_component": 3}) + + assert "Maximum run count 2 reached for component 'ready_component'" in str(exc_info.value) + + @patch("haystack.core.pipeline.base.PipelineBase._get_component_with_graph_metadata_and_visits") + def test__get_next_runnable_component_ready(self, mock_get_component_with_graph_metadata_and_visits): + """Test component that is READY""" + pipeline = PipelineBase() + queue = FIFOPriorityQueue() + queue.push("ready_component", ComponentPriority.READY) + mock_get_component_with_graph_metadata_and_visits.return_value = {"instance": "test", "visits": 1} + + priority, component_name, component = pipeline._get_next_runnable_component( + queue, component_visits={"ready_component": 1} + ) + + assert priority == ComponentPriority.READY + assert component_name == "ready_component" + assert component == {"instance": "test", "visits": 1} + + @pytest.mark.parametrize( + "queue_setup,expected_stale", + [ + # Empty queue case + (None, True), + # READY priority case + ((ComponentPriority.READY, "component1"), False), + # DEFER priority case + ((ComponentPriority.DEFER, "component1"), True), + ], + ids=["empty-queue", "ready-component", "deferred-component"], + ) + def test__is_queue_stale(self, queue_setup, expected_stale): + queue = FIFOPriorityQueue() + if queue_setup: + priority, component_name = queue_setup + queue.push(component_name, priority) + + result = PipelineBase._is_queue_stale(queue) + assert result == expected_stale + + @patch("haystack.core.pipeline.base.PipelineBase._calculate_priority") + @patch("haystack.core.pipeline.base.PipelineBase._get_component_with_graph_metadata_and_visits") + def test_fill_queue(self, mock_get_metadata, mock_calc_priority): + pipeline = PipelineBase() + component_names = ["comp1", "comp2"] + inputs = {"comp1": {"input1": "value1"}, "comp2": {"input2": "value2"}} + + mock_get_metadata.side_effect = lambda name, _: {"component": f"mock_{name}"} + mock_calc_priority.side_effect = [1, 2] # Different priorities for testing + + queue = pipeline._fill_queue(component_names, inputs, component_visits={"comp1": 1, "comp2": 1}) + + assert mock_get_metadata.call_count == 2 + assert mock_calc_priority.call_count == 2 + + # Verify correct calls for first component + mock_get_metadata.assert_any_call("comp1", 1) + mock_calc_priority.assert_any_call({"component": "mock_comp1"}, {"input1": "value1"}) + + # Verify correct calls for second component + mock_get_metadata.assert_any_call("comp2", 1) + mock_calc_priority.assert_any_call({"component": "mock_comp2"}, {"input2": "value2"}) + + assert queue.pop() == (1, "comp1") + assert queue.pop() == (2, "comp2") + + @pytest.mark.parametrize( + "input_sockets,component_inputs,expected_consumed,expected_remaining", + [ + # Regular socket test + ( + {"input1": InputSocket("input1", int)}, + {"input1": [{"sender": "comp1", "value": 42}, {"sender": "comp2", "value": 24}]}, + {"input1": 42}, # Should take first valid input + {}, # All pipeline inputs should be removed + ), + # Regular socket with user input + ( + {"input1": InputSocket("input1", int)}, + { + "input1": [ + {"sender": "comp1", "value": 42}, + {"sender": None, "value": 24}, # User input + ] + }, + {"input1": 42}, + {"input1": [{"sender": None, "value": 24}]}, # User input should remain + ), + # Greedy variadic socket + ( + {"greedy": InputSocket("greedy", GreedyVariadic[int])}, + { + "greedy": [ + {"sender": "comp1", "value": 42}, + {"sender": None, "value": 24}, # User input + {"sender": "comp2", "value": 33}, + ] + }, + {"greedy": [42]}, # Takes first valid input + {}, # All inputs removed for greedy sockets + ), + # Lazy variadic socket + ( + {"lazy": InputSocket("lazy", Variadic[int])}, + { + "lazy": [ + {"sender": "comp1", "value": 42}, + {"sender": "comp2", "value": 24}, + {"sender": None, "value": 33}, # User input + ] + }, + {"lazy": [42, 24, 33]}, # Takes all valid inputs + {"lazy": [{"sender": None, "value": 33}]}, # User input remains + ), + # Mixed socket types + ( + { + "regular": InputSocket("regular", int), + "greedy": InputSocket("greedy", GreedyVariadic[int]), + "lazy": InputSocket("lazy", Variadic[int]), + }, + { + "regular": [{"sender": "comp1", "value": 42}, {"sender": None, "value": 24}], + "greedy": [{"sender": "comp2", "value": 33}, {"sender": None, "value": 15}], + "lazy": [{"sender": "comp3", "value": 55}, {"sender": "comp4", "value": 66}], + }, + {"regular": 42, "greedy": [33], "lazy": [55, 66]}, + {"regular": [{"sender": None, "value": 24}]}, # Only non-greedy user input remains + ), + # Filtering _NO_OUTPUT_PRODUCED + ( + {"input1": InputSocket("input1", int)}, + { + "input1": [ + {"sender": "comp1", "value": _NO_OUTPUT_PRODUCED}, + {"sender": "comp2", "value": 42}, + {"sender": "comp2", "value": _NO_OUTPUT_PRODUCED}, + ] + }, + {"input1": 42}, # Should skip _NO_OUTPUT_PRODUCED values + {}, # All inputs consumed + ), + ], + ids=[ + "regular-socket", + "regular-with-user-input", + "greedy-variadic", + "lazy-variadic", + "mixed-sockets", + "no-output-filtering", + ], + ) + def test__consume_component_inputs(self, input_sockets, component_inputs, expected_consumed, expected_remaining): + # Setup + component = {"input_sockets": input_sockets} + inputs = {"test_component": component_inputs} + + # Run + consumed = PipelineBase._consume_component_inputs("test_component", component, inputs) + + # Verify + assert consumed == expected_consumed + assert inputs["test_component"] == expected_remaining diff --git a/test/core/pipeline/test_tracing.py b/test/core/pipeline/test_tracing.py index 6b5493383d..a643d11f41 100644 --- a/test/core/pipeline/test_tracing.py +++ b/test/core/pipeline/test_tracing.py @@ -58,7 +58,9 @@ def test_with_enabled_tracing(self, pipeline: Pipeline, spying_tracer: SpyingTra "haystack.component.type": "Hello", "haystack.component.input_types": {"word": "str"}, "haystack.component.input_spec": {"word": {"type": ANY, "senders": []}}, + "haystack.component.input": {"word": "world"}, "haystack.component.output_spec": {"output": {"type": "str", "receivers": ["hello2"]}}, + "haystack.component.output": {"output": "Hello, world!"}, "haystack.component.visits": 1, }, parent_span=pipeline_span, @@ -72,7 +74,9 @@ def test_with_enabled_tracing(self, pipeline: Pipeline, spying_tracer: SpyingTra "haystack.component.type": "Hello", "haystack.component.input_types": {"word": "str"}, "haystack.component.input_spec": {"word": {"type": ANY, "senders": ["hello"]}}, + "haystack.component.input": {"word": "Hello, world!"}, "haystack.component.output_spec": {"output": {"type": "str", "receivers": []}}, + "haystack.component.output": {"output": "Hello, Hello, world!!"}, "haystack.component.visits": 1, }, parent_span=pipeline_span, diff --git a/test/core/pipeline/test_utils.py b/test/core/pipeline/test_utils.py index fcb5734f50..e2cc26cac3 100644 --- a/test/core/pipeline/test_utils.py +++ b/test/core/pipeline/test_utils.py @@ -1,9 +1,177 @@ # SPDX-FileCopyrightText: 2022-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from haystack.core.pipeline.utils import parse_connect_string + +import pytest + +from haystack.core.pipeline.utils import parse_connect_string, FIFOPriorityQueue def test_parse_connection(): assert parse_connect_string("foobar") == ("foobar", None) assert parse_connect_string("foo.bar") == ("foo", "bar") + + +@pytest.fixture +def empty_queue(): + """Fixture providing a fresh empty queue for each test.""" + return FIFOPriorityQueue() + + +def test_empty_queue_initialization(empty_queue): + """Test that a new queue is empty.""" + assert len(empty_queue) == 0 + assert not bool(empty_queue) + + +def test_push_single_item(empty_queue): + """Test pushing a single item.""" + empty_queue.push("item1", 1) + assert len(empty_queue) == 1 + assert bool(empty_queue) + assert empty_queue.peek() == (1, "item1") + + +def test_push_multiple_items_different_priorities(empty_queue): + """Test pushing multiple items with different priorities.""" + items = [("item3", 3), ("item1", 1), ("item2", 2)] + for item, priority in items: + empty_queue.push(item, priority) + + # Items should come out in priority order + assert empty_queue.pop() == (1, "item1") + assert empty_queue.pop() == (2, "item2") + assert empty_queue.pop() == (3, "item3") + + +def test_push_multiple_items_same_priority(empty_queue): + """Test FIFO behavior for items with equal priority.""" + items = [("first", 1), ("second", 1), ("third", 1)] + for item, priority in items: + empty_queue.push(item, priority) + + # Items should come out in insertion order + assert empty_queue.pop() == (1, "first") + assert empty_queue.pop() == (1, "second") + assert empty_queue.pop() == (1, "third") + + +def test_mixed_priority_and_fifo(empty_queue): + """Test mixed priority levels with some equal priorities.""" + empty_queue.push("medium1", 2) + empty_queue.push("high", 1) + empty_queue.push("medium2", 2) + empty_queue.push("low", 3) + + # Check extraction order + assert empty_queue.pop() == (1, "high") + assert empty_queue.pop() == (2, "medium1") + assert empty_queue.pop() == (2, "medium2") + assert empty_queue.pop() == (3, "low") + + +def test_peek_behavior(empty_queue): + """Test that peek returns items without removing them.""" + empty_queue.push("item1", 1) + empty_queue.push("item2", 2) + + # Peek multiple times + for _ in range(3): + assert empty_queue.peek() == (1, "item1") + assert len(empty_queue) == 2 + + +def test_get_behavior(empty_queue): + """Test the get method with both empty and non-empty queues.""" + # Test on empty queue + assert empty_queue.get() is None + + # Test with items + empty_queue.push("item1", 1) + assert empty_queue.get() == (1, "item1") + assert empty_queue.get() is None # Queue should be empty again + + +def test_pop_empty_queue(empty_queue): + """Test that pop raises IndexError on empty queue.""" + with pytest.raises(IndexError, match="pop from empty queue"): + empty_queue.pop() + + +def test_peek_empty_queue(empty_queue): + """Test that peek raises IndexError on empty queue.""" + with pytest.raises(IndexError, match="peek at empty queue"): + empty_queue.peek() + + +def test_length_updates(empty_queue): + """Test that length updates correctly with pushes and pops.""" + initial_len = len(empty_queue) + assert initial_len == 0 + + # Test length increases + empty_queue.push("item1", 1) + assert len(empty_queue) == 1 + empty_queue.push("item2", 2) + assert len(empty_queue) == 2 + + # Test length decreases + empty_queue.pop() + assert len(empty_queue) == 1 + empty_queue.pop() + assert len(empty_queue) == 0 + + +def test_bool_conversion(empty_queue): + """Test boolean conversion in various states.""" + # Empty queue should be False + assert not bool(empty_queue) + + # Queue with items should be True + empty_queue.push("item", 1) + assert bool(empty_queue) + + # Queue should be False again after removing item + empty_queue.pop() + assert not bool(empty_queue) + + +def test_large_number_of_items(empty_queue): + """Test handling of a large number of items with mixed priorities.""" + # Add 1000 items with 10 different priority levels + for i in range(1000): + priority = i % 10 + empty_queue.push(f"item{i}", priority) + + # Verify FIFO order within same priority + last_priority = -1 + last_index = -1 + for _ in range(1000): + priority, item = empty_queue.pop() + current_index = int(item[4:]) # Extract index from "itemX" + + if priority == last_priority: + assert current_index > last_index, "FIFO order violated within same priority" + else: + assert priority > last_priority, "Priority order violated" + + last_priority = priority + last_index = current_index + + +@pytest.mark.parametrize( + "items", + [ + [(1, "A"), (1, "B"), (1, "C")], # Same priority + [(3, "A"), (2, "B"), (1, "C")], # Different priorities + [(2, "A"), (1, "B"), (2, "C")], # Mixed priorities + ], +) +def test_queue_ordering_parametrized(empty_queue, items): + """Parametrized test for different ordering scenarios.""" + for priority, item in items: + empty_queue.push(item, priority) + + sorted_items = sorted(items, key=lambda x: (x[0], items.index(x))) + for priority, item in sorted_items: + assert empty_queue.pop() == (priority, item) diff --git a/test/test_files/yaml/test_pipeline.yaml b/test/test_files/yaml/test_pipeline.yaml index 945ef018ae..53c281d30c 100644 --- a/test/test_files/yaml/test_pipeline.yaml +++ b/test/test_files/yaml/test_pipeline.yaml @@ -2,11 +2,11 @@ components: Comp1: init_parameters: an_init_param: null - type: test.core.pipeline.test_pipeline.FakeComponent + type: test.core.pipeline.test_pipeline_base.FakeComponent Comp2: init_parameters: an_init_param: null - type: test.core.pipeline.test_pipeline.FakeComponent + type: test.core.pipeline.test_pipeline_base.FakeComponent connections: - receiver: Comp2.input_ sender: Comp1.value diff --git a/test/tracing/test_tracer.py b/test/tracing/test_tracer.py index 3ef63c7b26..2589bd65ef 100644 --- a/test/tracing/test_tracer.py +++ b/test/tracing/test_tracer.py @@ -153,21 +153,10 @@ def test__auto_configured_datadog_tracer_with_failing_import(self, monkeypatch): class TestTracingContent: - def test_set_content_tag_with_default_settings(self, spying_tracer: SpyingTracer) -> None: - with tracer.trace("test") as span: - span.set_content_tag("my_content", "my_content") + def test_set_content_tag_with_enabled_content_tracing(self, spying_tracer: SpyingTracer) -> None: + # SpyingTracer supports content tracing by default - assert len(spying_tracer.spans) == 1 - span = spying_tracer.spans[0] - assert span.tags == {} - - def test_set_content_tag_with_enabled_content_tracing( - self, monkeypatch: MonkeyPatch, spying_tracer: SpyingTracer - ) -> None: enable_tracing(spying_tracer) - # monkeypatch to avoid impact on other tests - monkeypatch.setattr(tracer, "is_content_tracing_enabled", True) - with tracer.trace("test") as span: span.set_content_tag("my_content", "my_content") @@ -175,9 +164,10 @@ def test_set_content_tag_with_enabled_content_tracing( span = spying_tracer.spans[0] assert span.tags == {"my_content": "my_content"} - def test_set_content_tag_when_enabled_via_env_variable(self, monkeypatch: MonkeyPatch) -> None: - monkeypatch.setenv(HAYSTACK_CONTENT_TRACING_ENABLED_ENV_VAR, "true") + def test_set_content_tag_when_disabled_via_env_variable(self, monkeypatch: MonkeyPatch) -> None: + # we test if content tracing is disabled when the env variable is set to false + monkeypatch.setenv(HAYSTACK_CONTENT_TRACING_ENABLED_ENV_VAR, "false") proxy_tracer = ProxyTracer(provided_tracer=SpyingTracer()) - assert proxy_tracer.is_content_tracing_enabled is True + assert proxy_tracer.is_content_tracing_enabled is False diff --git a/test/tracing/utils.py b/test/tracing/utils.py index c2261baacf..198fd7089e 100644 --- a/test/tracing/utils.py +++ b/test/tracing/utils.py @@ -13,6 +13,7 @@ class SpyingSpan(Span): operation_name: str parent_span: Optional[Span] = None + tags: Dict[str, Any] = dataclasses.field(default_factory=dict) trace_id: Optional[str] = dataclasses.field(default_factory=lambda: str(uuid.uuid4())) @@ -24,6 +25,12 @@ def set_tag(self, key: str, value: Any) -> None: def get_correlation_data_for_logs(self) -> Dict[str, Any]: return {"trace_id": self.trace_id, "span_id": self.span_id} + def set_content_tag(self, key: str, value: Any) -> None: + """ + Set a content tag, but only if content tracing is enabled in the tracer. + """ + self.set_tag(key, value) + class SpyingTracer(Tracer): def current_span(self) -> Optional[Span]: @@ -37,7 +44,6 @@ def trace( self, operation_name: str, tags: Optional[Dict[str, Any]] = None, parent_span: Optional[Span] = None ) -> Iterator[Span]: new_span = SpyingSpan(operation_name, parent_span) - for key, value in (tags or {}).items(): new_span.set_tag(key, value)