pydata · TomNicholas · Aug 7, 2019 · Jul 12, 2019 · Jul 12, 2019 · Jul 15, 2019
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -40,6 +40,8 @@ New functions/methods
 Enhancements
 ~~~~~~~~~~~~
 
+- :py:func:`~xarray.concat` and :py:func:`~xarray.open_mfdataset` now support the ``join`` kwarg.
+  It is passed down to :py:func:`~xarray.align`. By `Deepak Cherian <https://github.com/dcherian>`_.
 - In :py:meth:`~xarray.Dataset.to_zarr`, passing ``mode`` is not mandatory if
   ``append_dim`` is set, as it will automatically be set to ``'a'`` internally.
   By `David Brochart <https://github.com/davidbrochart>`_.

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
@@ -609,7 +609,7 @@ def open_mfdataset(paths, chunks=None, concat_dim='_not_supplied',
                    compat='no_conflicts', preprocess=None, engine=None,
                    lock=None, data_vars='all', coords='different',
                    combine='_old_auto', autoclose=None, parallel=False,
-                   **kwargs):
+                   join='outer', **kwargs):
     """Open multiple files as a single dataset.
 
     If combine='by_coords' then the function ``combine_by_coords`` is used to 
@@ -704,6 +704,16 @@ def open_mfdataset(paths, chunks=None, concat_dim='_not_supplied',
     parallel : bool, optional
         If True, the open and preprocess steps of this function will be
         performed in parallel using ``dask.delayed``. Default is False.
+    join : {'outer', 'inner', 'left', 'right', 'exact'}, optional
+        String indicating how to combine differing indexes
+        (excluding concat_dim) in objects
+
+        - 'outer': use the union of object indexes
+        - 'inner': use the intersection of object indexes
+        - 'left': use indexes from the first object with each dimension
+        - 'right': use indexes from the last object with each dimension
+        - 'exact': instead of aligning, raise `ValueError` when indexes to be
+          aligned are not equal
     **kwargs : optional
         Additional arguments passed on to :py:func:`xarray.open_dataset`.
 
@@ -798,18 +808,20 @@ def open_mfdataset(paths, chunks=None, concat_dim='_not_supplied',
 
             combined = auto_combine(datasets, concat_dim=concat_dim,
                                     compat=compat, data_vars=data_vars,
-                                    coords=coords, from_openmfds=True)
+                                    coords=coords, join=join,
+                                    from_openmfds=True)
         elif combine == 'nested':
             # Combined nested list by successive concat and merge operations
             # along each dimension, using structure given by "ids"
             combined = _nested_combine(datasets, concat_dims=concat_dim,
                                        compat=compat, data_vars=data_vars,
-                                       coords=coords, ids=ids)
+                                       coords=coords, ids=ids, join=join)
         elif combine == 'by_coords':
             # Redo ordering from coordinates, ignoring how they were ordered
             # previously
             combined = combine_by_coords(datasets, compat=compat,
-                                         data_vars=data_vars, coords=coords)
+                                         data_vars=data_vars, coords=coords,
+                                         join=join)
         else:
             raise ValueError("{} is an invalid option for the keyword argument"
                              " ``combine``".format(combine))

diff --git a/xarray/core/combine.py b/xarray/core/combine.py
@@ -136,7 +136,7 @@ def _check_shape_tile_ids(combined_tile_ids):
 
 def _combine_nd(combined_ids, concat_dims, data_vars='all',
                 coords='different', compat='no_conflicts',
-                fill_value=dtypes.NA):
+                fill_value=dtypes.NA, join='outer'):
     """
     Combines an N-dimensional structure of datasets into one by applying a
     series of either concat and merge operations along each dimension.
@@ -177,13 +177,14 @@ def _combine_nd(combined_ids, concat_dims, data_vars='all',
                                                     data_vars=data_vars,
                                                     coords=coords,
                                                     compat=compat,
-                                                    fill_value=fill_value)
+                                                    fill_value=fill_value,
+                                                    join=join)
     (combined_ds,) = combined_ids.values()
     return combined_ds
 
 
 def _combine_all_along_first_dim(combined_ids, dim, data_vars, coords, compat,
-                                 fill_value=dtypes.NA):
+                                 fill_value=dtypes.NA, join='outer'):
 
     # Group into lines of datasets which must be combined along dim
     # need to sort by _new_tile_id first for groupby to work
@@ -197,12 +198,13 @@ def _combine_all_along_first_dim(combined_ids, dim, data_vars, coords, compat,
         combined_ids = OrderedDict(sorted(group))
         datasets = combined_ids.values()
         new_combined_ids[new_id] = _combine_1d(datasets, dim, compat,
-                                               data_vars, coords, fill_value)
+                                               data_vars, coords, fill_value,
+                                               join)
     return new_combined_ids
 
 
 def _combine_1d(datasets, concat_dim, compat='no_conflicts', data_vars='all',
-                coords='different', fill_value=dtypes.NA):
+                coords='different', fill_value=dtypes.NA, join='outer'):
     """
     Applies either concat or merge to 1D list of datasets depending on value
     of concat_dim
@@ -211,7 +213,7 @@ def _combine_1d(datasets, concat_dim, compat='no_conflicts', data_vars='all',
     if concat_dim is not None:
         try:
             combined = concat(datasets, dim=concat_dim, data_vars=data_vars,
-                              coords=coords, fill_value=fill_value)
+                              coords=coords, fill_value=fill_value, join=join)
         except ValueError as err:
             if "encountered unexpected variable" in str(err):
                 raise ValueError("These objects cannot be combined using only "
@@ -222,7 +224,8 @@ def _combine_1d(datasets, concat_dim, compat='no_conflicts', data_vars='all',
             else:
                 raise
     else:
-        combined = merge(datasets, compat=compat, fill_value=fill_value)
+        combined = merge(datasets, compat=compat, fill_value=fill_value,
+                         join=join)
 
     return combined
 
@@ -233,7 +236,7 @@ def _new_tile_id(single_id_ds_pair):
 
 
 def _nested_combine(datasets, concat_dims, compat, data_vars, coords, ids,
-                    fill_value=dtypes.NA):
+                    fill_value=dtypes.NA, join='outer'):
 
     if len(datasets) == 0:
         return Dataset()
@@ -254,12 +257,13 @@ def _nested_combine(datasets, concat_dims, compat, data_vars, coords, ids,
     # Apply series of concatenate or merge operations along each dimension
     combined = _combine_nd(combined_ids, concat_dims, compat=compat,
                            data_vars=data_vars, coords=coords,
-                           fill_value=fill_value)
+                           fill_value=fill_value, join=join)
     return combined
 
 
 def combine_nested(datasets, concat_dim, compat='no_conflicts',
-                   data_vars='all', coords='different', fill_value=dtypes.NA):
+                   data_vars='all', coords='different', fill_value=dtypes.NA,
+                   join='outer'):
     """
     Explicitly combine an N-dimensional grid of datasets into one by using a
     succession of concat and merge operations along each dimension of the grid.
@@ -312,6 +316,16 @@ def combine_nested(datasets, concat_dim, compat='no_conflicts',
         Details are in the documentation of concat
     fill_value : scalar, optional
         Value to use for newly missing values
+    join : {'outer', 'inner', 'left', 'right', 'exact'}, optional
+        String indicating how to combine differing indexes
+        (excluding concat_dim) in objects
+
+        - 'outer': use the union of object indexes
+        - 'inner': use the intersection of object indexes
+        - 'left': use indexes from the first object with each dimension
+        - 'right': use indexes from the last object with each dimension
+        - 'exact': instead of aligning, raise `ValueError` when indexes to be
+          aligned are not equal
 
     Returns
     -------
@@ -383,15 +397,15 @@ def combine_nested(datasets, concat_dim, compat='no_conflicts',
     # The IDs argument tells _manual_combine that datasets aren't yet sorted
     return _nested_combine(datasets, concat_dims=concat_dim, compat=compat,
                            data_vars=data_vars, coords=coords, ids=False,
-                           fill_value=fill_value)
+                           fill_value=fill_value, join=join)
 
 
 def vars_as_keys(ds):
     return tuple(sorted(ds))
 
 
 def combine_by_coords(datasets, compat='no_conflicts', data_vars='all',
-                      coords='different', fill_value=dtypes.NA):
+                      coords='different', fill_value=dtypes.NA, join='outer'):
     """
     Attempt to auto-magically combine the given datasets into one by using
     dimension coordinates.
@@ -439,6 +453,16 @@ def combine_by_coords(datasets, compat='no_conflicts', data_vars='all',
         Details are in the documentation of concat
     fill_value : scalar, optional
         Value to use for newly missing values
+    join : {'outer', 'inner', 'left', 'right', 'exact'}, optional
+        String indicating how to combine differing indexes
+        (excluding concat_dim) in objects
+
+        - 'outer': use the union of object indexes
+        - 'inner': use the intersection of object indexes
+        - 'left': use indexes from the first object with each dimension
+        - 'right': use indexes from the last object with each dimension
+        - 'exact': instead of aligning, raise `ValueError` when indexes to be
+          aligned are not equal
 
     Returns
     -------
@@ -498,7 +522,7 @@ def combine_by_coords(datasets, compat='no_conflicts', data_vars='all',
         # Concatenate along all of concat_dims one by one to create single ds
         concatenated = _combine_nd(combined_ids, concat_dims=concat_dims,
                                    data_vars=data_vars, coords=coords,
-                                   fill_value=fill_value)
+                                   fill_value=fill_value, join=join)
 
         # Check the overall coordinates are monotonically increasing
         for dim in concat_dims:
@@ -511,7 +535,7 @@ def combine_by_coords(datasets, compat='no_conflicts', data_vars='all',
         concatenated_grouped_by_data_vars.append(concatenated)
 
     return merge(concatenated_grouped_by_data_vars, compat=compat,
-                 fill_value=fill_value)
+                 fill_value=fill_value, join=join)
 
 
 # Everything beyond here is only needed until the deprecation cycle in #2616
@@ -523,7 +547,7 @@ def combine_by_coords(datasets, compat='no_conflicts', data_vars='all',
 
 def auto_combine(datasets, concat_dim='_not_supplied', compat='no_conflicts',
                  data_vars='all', coords='different', fill_value=dtypes.NA,
-                 from_openmfds=False):
+                 join='outer', from_openmfds=False):
     """
     Attempt to auto-magically combine the given datasets into one.
 
@@ -571,6 +595,16 @@ def auto_combine(datasets, concat_dim='_not_supplied', compat='no_conflicts',
         Details are in the documentation of concat
     fill_value : scalar, optional
         Value to use for newly missing values
+    join : {'outer', 'inner', 'left', 'right', 'exact'}, optional
+        String indicating how to combine differing indexes
+        (excluding concat_dim) in objects
+
+        - 'outer': use the union of object indexes
+        - 'inner': use the intersection of object indexes
+        - 'left': use indexes from the first object with each dimension
+        - 'right': use indexes from the last object with each dimension
+        - 'exact': instead of aligning, raise `ValueError` when indexes to be
+          aligned are not equal
 
     Returns
     -------
@@ -629,7 +663,8 @@ def auto_combine(datasets, concat_dim='_not_supplied', compat='no_conflicts',
 
     return _old_auto_combine(datasets, concat_dim=concat_dim,
                              compat=compat, data_vars=data_vars,
-                             coords=coords, fill_value=fill_value)
+                             coords=coords, fill_value=fill_value,
+                             join=join)
 
 
 def _dimension_coords_exist(datasets):
@@ -670,7 +705,7 @@ def _requires_concat_and_merge(datasets):
 def _old_auto_combine(datasets, concat_dim=_CONCAT_DIM_DEFAULT,
                       compat='no_conflicts',
                       data_vars='all', coords='different',
-                      fill_value=dtypes.NA):
+                      fill_value=dtypes.NA, join='outer'):
     if concat_dim is not None:
         dim = None if concat_dim is _CONCAT_DIM_DEFAULT else concat_dim
 
@@ -679,16 +714,17 @@ def _old_auto_combine(datasets, concat_dim=_CONCAT_DIM_DEFAULT,
 
         concatenated = [_auto_concat(list(datasets), dim=dim,
                                      data_vars=data_vars, coords=coords,
-                                     fill_value=fill_value)
+                                     fill_value=fill_value, join=join)
                         for vars, datasets in grouped]
     else:
         concatenated = datasets
-    merged = merge(concatenated, compat=compat, fill_value=fill_value)
+    merged = merge(concatenated, compat=compat, fill_value=fill_value,
+                   join=join)
     return merged
 
 
 def _auto_concat(datasets, dim=None, data_vars='all', coords='different',
-                 fill_value=dtypes.NA):
+                 fill_value=dtypes.NA, join='outer'):
     if len(datasets) == 1 and dim is None:
         # There is nothing more to combine, so kick out early.
         return datasets[0]

diff --git a/xarray/core/concat.py b/xarray/core/concat.py
@@ -11,7 +11,7 @@
 
 def concat(objs, dim=None, data_vars='all', coords='different',
            compat='equals', positions=None, indexers=None, mode=None,
-           concat_over=None, fill_value=dtypes.NA):
+           concat_over=None, fill_value=dtypes.NA, join='outer'):
     """Concatenate xarray objects along a new or existing dimension.
 
     Parameters
@@ -52,7 +52,7 @@ def concat(objs, dim=None, data_vars='all', coords='different',
           * 'all': All coordinate variables will be concatenated, except
             those corresponding to other dimensions.
           * list of str: The listed coordinate variables will be concatenated,
-            in addition the 'minimal' coordinates.
+            in addition to the 'minimal' coordinates.
     compat : {'equals', 'identical'}, optional
         String indicating how to compare non-concatenated variables and
         dataset global attributes for potential conflicts. 'equals' means
@@ -65,6 +65,17 @@ def concat(objs, dim=None, data_vars='all', coords='different',
         supplied, objects are concatenated in the provided order.
     fill_value : scalar, optional
         Value to use for newly missing values
+    join : {'outer', 'inner', 'left', 'right', 'exact'}, optional
+        String indicating how to combine differing indexes
+        (excluding dim) in objects
+
+        - 'outer': use the union of object indexes
+        - 'inner': use the intersection of object indexes
+        - 'left': use indexes from the first object with each dimension
+        - 'right': use indexes from the last object with each dimension
+        - 'exact': instead of aligning, raise `ValueError` when indexes to be
+          aligned are not equal
+
     indexers, mode, concat_over : deprecated
 
     Returns
@@ -76,7 +87,7 @@ def concat(objs, dim=None, data_vars='all', coords='different',
     merge
     auto_combine
     """
-    # TODO: add join and ignore_index arguments copied from pandas.concat
+    # TODO: add ignore_index arguments copied from pandas.concat
     # TODO: support concatenating scalar coordinates even if the concatenated
     # dimension already exists
     from .dataset import Dataset
@@ -116,7 +127,7 @@ def concat(objs, dim=None, data_vars='all', coords='different',
     else:
         raise TypeError('can only concatenate xarray Dataset and DataArray '
                         'objects, got %s' % type(first_obj))
-    return f(objs, dim, data_vars, coords, compat, positions, fill_value)
+    return f(objs, dim, data_vars, coords, compat, positions, fill_value, join)
 
 
 def _calc_concat_dim_coord(dim):
@@ -212,7 +223,7 @@ def process_subset_opt(opt, subset):
 
 
 def _dataset_concat(datasets, dim, data_vars, coords, compat, positions,
-                    fill_value=dtypes.NA):
+                    fill_value=dtypes.NA, join='outer'):
     """
     Concatenate a sequence of datasets along a new or existing dimension
     """
@@ -225,7 +236,7 @@ def _dataset_concat(datasets, dim, data_vars, coords, compat, positions,
     dim, coord = _calc_concat_dim_coord(dim)
     # Make sure we're working on a copy (we'll be loading variables)
     datasets = [ds.copy() for ds in datasets]
-    datasets = align(*datasets, join='outer', copy=False, exclude=[dim],
+    datasets = align(*datasets, join=join, copy=False, exclude=[dim],
                      fill_value=fill_value)
 
     concat_over, equals = _calc_concat_over(datasets, dim, data_vars, coords)
@@ -318,7 +329,7 @@ def ensure_common_dims(vars):
 
 
 def _dataarray_concat(arrays, dim, data_vars, coords, compat,
-                      positions, fill_value=dtypes.NA):
+                      positions, fill_value=dtypes.NA, join='outer'):
     arrays = list(arrays)
 
     if data_vars != 'all':
@@ -337,5 +348,5 @@ def _dataarray_concat(arrays, dim, data_vars, coords, compat,
         datasets.append(arr._to_temp_dataset())
 
     ds = _dataset_concat(datasets, dim, data_vars, coords, compat,
-                         positions, fill_value=fill_value)
+                         positions, fill_value=fill_value, join=join)
     return arrays[0]._from_temp_dataset(ds, name)
diff --git a/xarray/core/merge.py b/xarray/core/merge.py
@@ -530,7 +530,14 @@ def merge(objects, compat='no_conflicts', join='outer', fill_value=dtypes.NA):
           must be equal. The returned dataset then contains the combination
           of all non-null values.
     join : {'outer', 'inner', 'left', 'right', 'exact'}, optional
-        How to combine objects with different indexes.
+        String indicating how to combine differing indexes in objects.
+
+        - 'outer': use the union of object indexes
+        - 'inner': use the intersection of object indexes
+        - 'left': use indexes from the first object with each dimension
+        - 'right': use indexes from the last object with each dimension
+        - 'exact': instead of aligning, raise `ValueError` when indexes to be
+          aligned are not equal
     fill_value : scalar, optional
         Value to use for newly missing values