From f7ffbc8fd5dfd67136e4a926e40a5f2172690fad Mon Sep 17 00:00:00 2001 From: brimoor Date: Tue, 15 Jun 2021 16:08:30 -0400 Subject: [PATCH] docs updates --- fiftyone/core/collections.py | 40 +++++++++++++++++++++++------------ fiftyone/core/models.py | 25 ++++++++++++++-------- fiftyone/utils/flash.py | 41 ++++++++++++++++++++++++------------ 3 files changed, 70 insertions(+), 36 deletions(-) diff --git a/fiftyone/core/collections.py b/fiftyone/core/collections.py index f2756d11d50..67dd7e19fd2 100644 --- a/fiftyone/core/collections.py +++ b/fiftyone/core/collections.py @@ -1222,7 +1222,7 @@ def apply_model( batch_size=None, num_workers=None, skip_failures=True, - **kwargs, + **trainer_kwargs, ): """Applies the :class:`FiftyOne model ` or :class:`Lightning Flash model ` to the @@ -1259,9 +1259,10 @@ def apply_model( raising an error if predictions cannot be generated for a sample. Only applicable to :class:`fiftyone.core.models.Model` instances - kwargs: additional kwargs used to construct a - :class:`flash:flash.core.trainer.Trainer` for Flash - models + **trainer_kwargs: optional keyword arguments used to initialize the + :class:`flash:flash.core.trainer.Trainer` when using Flash + models. These can be used to, for example, configure the number + of GPUs to use and other distributed inference parameters """ fomo.apply_model( self, @@ -1272,7 +1273,7 @@ def apply_model( batch_size=batch_size, num_workers=num_workers, skip_failures=skip_failures, - **kwargs, + **trainer_kwargs, ) def compute_embeddings( @@ -1282,25 +1283,33 @@ def compute_embeddings( batch_size=None, num_workers=None, skip_failures=True, + **trainer_kwargs, ): """Computes embeddings for the samples in the collection using the - given :class:`fiftyone.core.models.Model`. + given :class:`FiftyOne model ` or + :class:`Lightning Flash model `. This method supports all the following cases: - - Using an image model to compute embeddings for an image collection - - Using an image model to compute frame embeddings for a video - collection - - Using a video model to compute embeddings for a video collection - - The ``model`` must expose embeddings, i.e., + - Using an image :class:`fiftyone.core.models.Model` to compute + embeddings for an image collection + - Using an image :class:`fiftyone.core.models.Model` to compute frame + embeddings for a video collection + - Using a video :class:`fiftyone.core.models.Model` to compute + embeddings for a video collection + - Using an :class:`flash:flash.image.ImageEmbeder` to compute + embeddings for an image collection + + When using a :class:`FiftyOne model `, the + model must expose embeddings, i.e., :meth:`fiftyone.core.models.Model.has_embeddings` must return ``True``. If an ``embeddings_field`` is provided, the embeddings are saved to the samples; otherwise, the embeddings are returned in-memory. Args: - model: a :class:`fiftyone.core.models.Model` + model: a :class:`fiftyone.core.models.Model` or + :class:`flash:flash.core.model.Task` embeddings_field (None): the name of a field in which to store the embeddings. When computing video frame embeddings, the "frames." prefix is optional @@ -1313,6 +1322,10 @@ def compute_embeddings( raising an error if embeddings cannot be generated for a sample. Only applicable to :class:`fiftyone.core.models.Model` instances + **trainer_kwargs: optional keyword arguments used to initialize the + :class:`flash:flash.core.trainer.Trainer` when using Flash + models. These can be used to, for example, configure the number + of GPUs to use and other distributed inference parameters Returns: one of the following: @@ -1340,6 +1353,7 @@ def compute_embeddings( batch_size=batch_size, num_workers=num_workers, skip_failures=skip_failures, + **trainer_kwargs, ) def compute_patch_embeddings( diff --git a/fiftyone/core/models.py b/fiftyone/core/models.py index 59ac73a6efd..3d068f40e27 100644 --- a/fiftyone/core/models.py +++ b/fiftyone/core/models.py @@ -55,7 +55,7 @@ def apply_model( batch_size=None, num_workers=None, skip_failures=True, - **kwargs, + **trainer_kwargs, ): """Applies the :class:`FiftyOne model ` or :class:`Lightning Flash model ` to the samples @@ -87,9 +87,10 @@ def apply_model( skip_failures (True): whether to gracefully continue without raising an error if predictions cannot be generated for a sample. Only applicable to :class:`Model` instances - kwargs: additional kwargs used to construct a - :class:`flash:flash.core.trainer.Trainer` for Flash - models + **trainer_kwargs: optional keyword arguments used to initialize the + :class:`flash:flash.core.trainer.Trainer` when using Flash models. + These can be used to, for example, configure the number of GPUs to + use and other distributed inference parameters """ if _is_flash_model(model): return fouf.apply_flash_model( @@ -100,7 +101,7 @@ def apply_model( store_logits=store_logits, batch_size=batch_size, num_workers=num_workers, - **kwargs, + **trainer_kwargs, ) if not isinstance(model, Model): @@ -619,6 +620,7 @@ def compute_embeddings( batch_size=None, num_workers=None, skip_failures=True, + **trainer_kwargs, ): """Computes embeddings for the samples in the collection using the given :class:`FiftyOne model ` or @@ -635,15 +637,15 @@ def compute_embeddings( - Using an :class:`flash:flash.image.ImageEmbeder` to compute embeddings for an image collection - When ``model`` is a FiftyOne model, it must expose embeddings, i.e., - :meth:`Model.has_embeddings` must return ``True``. + When using a :class:`FiftyOne model `, the model must expose + embeddings, i.e., :meth:`Model.has_embeddings` must return ``True``. If an ``embeddings_field`` is provided, the embeddings are saved to the samples; otherwise, the embeddings are returned in-memory. Args: samples: a :class:`fiftyone.core.collections.SampleCollection` - model: a :class:`Model` or :class:`flash:flash.image.ImageEmbeder` + model: a :class:`Model` or :class:`flash:flash.core.model.Task>` embeddings_field (None): the name of a field in which to store the embeddings. When computing video frame embeddings, the "frames." prefix is optional @@ -653,7 +655,11 @@ def compute_embeddings( Only applicable for Torch-based models skip_failures (True): whether to gracefully continue without raising an error if embeddings cannot be generated for a sample. Only - applicable to :class:`fiftyone.core.models.Model` instances + applicable to :class:`Model` instances + **trainer_kwargs: optional keyword arguments used to initialize the + :class:`flash:flash.core.trainer.Trainer` when using Flash models. + These can be used to, for example, configure the number of GPUs to + use and other distributed inference parameters Returns: one of the following: @@ -680,6 +686,7 @@ def compute_embeddings( embeddings_field=embeddings_field, batch_size=batch_size, num_workers=num_workers, + **trainer_kwargs, ) if not isinstance(model, Model): diff --git a/fiftyone/utils/flash.py b/fiftyone/utils/flash.py index ed27bfa2a03..d27edebd30e 100644 --- a/fiftyone/utils/flash.py +++ b/fiftyone/utils/flash.py @@ -59,8 +59,10 @@ def apply_flash_model( batch_size (None): an optional batch size to use. If not provided, a default batch size is used num_workers (None): the number of workers for the data loader to use - trainer_kwargs: additional kwargs are passed into the Trainer() - constructor + **trainer_kwargs: optional keyword arguments used to initialize the + :class:`flash:flash.core.trainer.Trainer`. These can be used to, + for example, configure the number of GPUs to use and other + distributed inference parameters """ serializer = _get_serializer(model, confidence_thresh, store_logits) @@ -71,12 +73,14 @@ def apply_flash_model( with fou.SetAttributes( model, data_pipeline=data_pipeline, serializer=serializer ): - kwargs = dict(preprocess=model.preprocess, num_workers=num_workers) + data_kwargs = dict( + preprocess=model.preprocess, num_workers=num_workers, + ) if batch_size is not None: - kwargs["batch_size"] = batch_size + data_kwargs["batch_size"] = batch_size datamodule = fi.ImageClassificationData.from_fiftyone( - predict_dataset=samples, **kwargs + predict_dataset=samples, **data_kwargs ) predictions = flash.Trainer(**trainer_kwargs).predict( model, datamodule=datamodule @@ -87,7 +91,12 @@ def apply_flash_model( def compute_flash_embeddings( - samples, model, embeddings_field=None, batch_size=None, num_workers=None + samples, + model, + embeddings_field=None, + batch_size=None, + num_workers=None, + **trainer_kwargs, ): """Computes embeddings for the samples in the collection using the given :class:`Lightning Flash model `. @@ -106,6 +115,10 @@ def compute_flash_embeddings( batch_size (None): an optional batch size to use. If not provided, a default batch size is used num_workers (None): the number of workers for the data loader to use + **trainer_kwargs: optional keyword arguments used to initialize the + :class:`flash:flash.core.trainer.Trainer`. These can be used to, + for example, configure the number of GPUs to use and other + distributed inference parameters Returns: one of the following: @@ -125,18 +138,18 @@ def compute_flash_embeddings( data_pipeline = model.data_pipeline with fou.SetAttributes(model, data_pipeline=data_pipeline): - # equivalent(?) but no progress bar... - # filepaths = samples.values("filepath") - # embeddings = model.predict(filepaths) - - kwargs = dict(preprocess=model.preprocess, num_workers=num_workers) + data_kwargs = dict( + preprocess=model.preprocess, num_workers=num_workers, + ) if batch_size is not None: - kwargs["batch_size"] = batch_size + data_kwargs["batch_size"] = batch_size datamodule = fi.ImageClassificationData.from_fiftyone( - predict_dataset=samples, **kwargs + predict_dataset=samples, **data_kwargs + ) + embeddings = flash.Trainer(**trainer_kwargs).predict( + model, datamodule=datamodule ) - embeddings = flash.Trainer().predict(model, datamodule=datamodule) embeddings = list(itertools.chain.from_iterable(embeddings)) if embeddings_field is not None: