Skip to content

Commit 132a40d

Browse files
stevejgordongithub-actions[bot]
authored andcommitted
Add hard_bounds for histograms (#5098)
* Add hard_bounds for histograms * Apply date_optional_time format for hard_bounds * Add new hard_bounds tests As with extended_bounds, we append the date_optional_time format to the format starting on the aggregation. This avoids parsing errors on the server. Includes an update to the documentation which we be generated and added in a subsequent PR.
1 parent 1d1d314 commit 132a40d

File tree

5 files changed

+185
-5
lines changed

5 files changed

+185
-5
lines changed

src/Nest/Aggregations/Bucket/DateHistogram/DateHistogramAggregation.cs

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,13 @@ public interface IDateHistogramAggregation : IBucketAggregation
2424
[DataMember(Name ="extended_bounds")]
2525
ExtendedBounds<DateMath> ExtendedBounds { get; set; }
2626

27+
/// <summary>
28+
/// The hard_bounds is a counterpart of extended_bounds and can limit the range of buckets in the histogram.
29+
/// It is particularly useful in the case of open data ranges that can result in a very large number of buckets.
30+
/// </summary>
31+
[DataMember(Name = "hard_bounds")]
32+
HardBounds<DateMath> HardBounds { get; set; }
33+
2734
/// <summary>
2835
/// The field to target
2936
/// </summary>
@@ -104,20 +111,21 @@ public DateHistogramAggregation(string name) : base(name) { }
104111
/// <inheritdoc />
105112
public ExtendedBounds<DateMath> ExtendedBounds { get; set; }
106113
/// <inheritdoc />
114+
public HardBounds<DateMath> HardBounds { get; set; }
115+
/// <inheritdoc />
107116
public Field Field { get; set; }
108117

109118
/// <inheritdoc />
110119
public string Format
111120
{
112121
get => !string.IsNullOrEmpty(_format) &&
113122
!_format.Contains("date_optional_time") &&
114-
(ExtendedBounds != null || Missing.HasValue)
123+
(ExtendedBounds != null || HardBounds != null || Missing.HasValue)
115124
? _format + "||date_optional_time"
116125
: _format;
117126
set => _format = value;
118127
}
119128

120-
121129
[Obsolete("Deprecated in version 7.2.0, use CalendarInterval or FixedInterval instead")]
122130
public Union<DateInterval, Time> Interval { get; set; }
123131
/// <inheritdoc />
@@ -148,14 +156,15 @@ public class DateHistogramAggregationDescriptor<T>
148156
private string _format;
149157

150158
ExtendedBounds<DateMath> IDateHistogramAggregation.ExtendedBounds { get; set; }
159+
HardBounds<DateMath> IDateHistogramAggregation.HardBounds { get; set; }
151160
Field IDateHistogramAggregation.Field { get; set; }
152161

153162
//see: https://github.com/elastic/elasticsearch/issues/9725
154163
string IDateHistogramAggregation.Format
155164
{
156165
get => !string.IsNullOrEmpty(_format) &&
157166
!_format.Contains("date_optional_time") &&
158-
(Self.ExtendedBounds != null || Self.Missing.HasValue)
167+
(Self.ExtendedBounds != null || Self.HardBounds != null || Self.Missing.HasValue)
159168
? _format + "||date_optional_time"
160169
: _format;
161170
set => _format = value;
@@ -229,6 +238,10 @@ public DateHistogramAggregationDescriptor<T> OrderDescending(string key) =>
229238
public DateHistogramAggregationDescriptor<T> ExtendedBounds(DateMath min, DateMath max) =>
230239
Assign(new ExtendedBounds<DateMath> { Minimum = min, Maximum = max }, (a, v) => a.ExtendedBounds = v);
231240

241+
/// <inheritdoc cref="IDateHistogramAggregation.HardBounds" />
242+
public DateHistogramAggregationDescriptor<T> HardBounds(DateMath min, DateMath max) =>
243+
Assign(new HardBounds<DateMath> { Minimum = min, Maximum = max }, (a, v) => a.HardBounds = v);
244+
232245
/// <inheritdoc cref="IDateHistogramAggregation.Missing" />
233246
public DateHistogramAggregationDescriptor<T> Missing(DateTime? missing) => Assign(missing, (a, v) => a.Missing = v);
234247
}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
// Licensed to Elasticsearch B.V under one or more agreements.
2+
// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
3+
// See the LICENSE file in the project root for more information
4+
5+
using System.Runtime.Serialization;
6+
7+
namespace Nest
8+
{
9+
public class HardBounds<T>
10+
{
11+
[DataMember(Name = "max")]
12+
public T Maximum { get; set; }
13+
14+
[DataMember(Name = "min")]
15+
public T Minimum { get; set; }
16+
}
17+
}

src/Nest/Aggregations/Bucket/Histogram/HistogramAggregation.cs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@ public interface IHistogramAggregation : IBucketAggregation
1616
[DataMember(Name ="extended_bounds")]
1717
ExtendedBounds<double> ExtendedBounds { get; set; }
1818

19+
[DataMember(Name = "hard_bounds")]
20+
HardBounds<double> HardBounds { get; set; }
21+
1922
[DataMember(Name ="field")]
2023
Field Field { get; set; }
2124

@@ -45,6 +48,7 @@ internal HistogramAggregation() { }
4548
public HistogramAggregation(string name) : base(name) { }
4649

4750
public ExtendedBounds<double> ExtendedBounds { get; set; }
51+
public HardBounds<double> HardBounds { get; set; }
4852
public Field Field { get; set; }
4953
public double? Interval { get; set; }
5054
public int? MinimumDocumentCount { get; set; }
@@ -61,6 +65,7 @@ public class HistogramAggregationDescriptor<T>
6165
where T : class
6266
{
6367
ExtendedBounds<double> IHistogramAggregation.ExtendedBounds { get; set; }
68+
HardBounds<double> IHistogramAggregation.HardBounds { get; set; }
6469
Field IHistogramAggregation.Field { get; set; }
6570

6671
double? IHistogramAggregation.Interval { get; set; }
@@ -100,6 +105,9 @@ public HistogramAggregationDescriptor<T> OrderDescending(string key) =>
100105
public HistogramAggregationDescriptor<T> ExtendedBounds(double min, double max) =>
101106
Assign(new ExtendedBounds<double> { Minimum = min, Maximum = max }, (a, v) => a.ExtendedBounds = v);
102107

108+
public HistogramAggregationDescriptor<T> HardBounds(double min, double max) =>
109+
Assign(new HardBounds<double> { Minimum = min, Maximum = max }, (a, v) => a.HardBounds = v);
110+
103111
public HistogramAggregationDescriptor<T> Offset(double? offset) => Assign(offset, (a, v) => a.Offset = v);
104112

105113
public HistogramAggregationDescriptor<T> Missing(double? missing) => Assign(missing, (a, v) => a.Missing = v);

tests/Tests/Aggregations/Bucket/DateHistogram/DateHistogramAggregationUsageTests.cs

Lines changed: 80 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ namespace Tests.Aggregations.Bucket.DateHistogram
2121
* From a functionality perspective, this histogram supports the same features as the normal histogram.
2222
* The main difference is that the interval can be specified by date/time expressions.
2323
*
24-
* NOTE: When specifying a `format` **and** `extended_bounds` or `missing`, in order for Elasticsearch to be able to parse
24+
* NOTE: When specifying a `format` **and** `extended_bounds`, `hard_bounds` or `missing`, in order for Elasticsearch to be able to parse
2525
* the serialized `DateTime` of `extended_bounds` or `missing` correctly, the `date_optional_time` format is included
2626
* as part of the `format` value.
2727
*
@@ -41,7 +41,7 @@ public DateHistogramAggregationUsageTests(ReadOnlyCluster i, EndpointUsage usage
4141
field = "startedOn",
4242
calendar_interval = "month",
4343
min_doc_count = 2,
44-
format = "yyyy-MM-dd'T'HH:mm:ss||date_optional_time", //<1> Note the inclusion of `date_optional_time` to `format`
44+
format = "yyyy-MM-dd'T'HH:mm:ss||date_optional_time", // <1> Note the inclusion of `date_optional_time` to `format`
4545
order = new { _count = "asc" },
4646
extended_bounds = new
4747
{
@@ -208,4 +208,82 @@ protected override void ExpectResponse(ISearchResponse<Project> response)
208208
}
209209
}
210210
}
211+
212+
// hide
213+
[SkipVersion("<7.10.0", "hard_bounds introduced in 7.10.0")]
214+
public class DateHistogramAggregationWithHardBoundsUsageTests : ProjectsOnlyAggregationUsageTestBase
215+
{
216+
private readonly DateTime _hardBoundsMinimum;
217+
private readonly DateTime _hardBoundsMaximum;
218+
219+
public DateHistogramAggregationWithHardBoundsUsageTests(ReadOnlyCluster i, EndpointUsage usage) : base(i, usage)
220+
{
221+
// Note: If these tests are run against an existing node, and seeding is not forced, it's possible the
222+
// dates used will not appear in the index and result in no buckets being returned. The test will still
223+
// pass if this is the case. For best results locally, force a reseed. This is not an issue in CI.
224+
225+
var projects = Project.Projects.OrderBy(p => p.StartedOn).Skip(2).Take(5).ToArray();
226+
227+
_hardBoundsMinimum = projects.Min(p => p.StartedOn.Date);
228+
_hardBoundsMaximum = projects.Max(p => p.StartedOn.Date);
229+
}
230+
231+
protected override object AggregationJson => new
232+
{
233+
projects_started_per_day = new
234+
{
235+
date_histogram = new
236+
{
237+
field = "startedOn",
238+
calendar_interval = "day",
239+
format = "yyyy-MM-dd'T'HH:mm:ss||date_optional_time",
240+
min_doc_count = 1,
241+
hard_bounds = new
242+
{
243+
min = _hardBoundsMinimum,
244+
max = _hardBoundsMaximum
245+
},
246+
order = new { _key = "asc" },
247+
}
248+
}
249+
};
250+
251+
#pragma warning disable 618, 612
252+
protected override Func<AggregationContainerDescriptor<Project>, IAggregationContainer> FluentAggs => a => a
253+
.DateHistogram("projects_started_per_day", date => date
254+
.Field(p => p.StartedOn)
255+
.Format("yyyy-MM-dd'T'HH:mm:ss")
256+
.CalendarInterval(DateInterval.Day)
257+
.HardBounds(_hardBoundsMinimum, _hardBoundsMaximum)
258+
.MinimumDocumentCount(1)
259+
.Order(HistogramOrder.KeyAscending)
260+
);
261+
262+
protected override AggregationDictionary InitializerAggs =>
263+
new DateHistogramAggregation("projects_started_per_day")
264+
{
265+
Field = Field<Project>(p => p.StartedOn),
266+
Format = "yyyy-MM-dd'T'HH:mm:ss",
267+
CalendarInterval = DateInterval.Day,
268+
HardBounds = new HardBounds<DateMath>
269+
{
270+
Minimum = _hardBoundsMinimum,
271+
Maximum = _hardBoundsMaximum
272+
},
273+
MinimumDocumentCount = 1,
274+
Order = HistogramOrder.KeyAscending
275+
};
276+
#pragma warning restore 618, 612
277+
278+
protected override void ExpectResponse(ISearchResponse<Project> response)
279+
{
280+
response.ShouldBeValid();
281+
var dateHistogram = response.Aggregations.DateHistogram("projects_started_per_day");
282+
dateHistogram.Should().NotBeNull();
283+
dateHistogram.Buckets.Should().NotBeNull();
284+
285+
foreach (var date in dateHistogram.Buckets.Select(b => DateTime.Parse(b.KeyAsString)))
286+
date.Should().BeOnOrAfter(_hardBoundsMinimum).And.BeOnOrBefore(_hardBoundsMaximum);
287+
}
288+
}
211289
}

tests/Tests/Aggregations/Bucket/Histogram/HistogramAggregationUsageTests.cs

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
// See the LICENSE file in the project root for more information
44

55
using System;
6+
using Elastic.Elasticsearch.Xunit.XunitPlumbing;
67
using FluentAssertions;
78
using Nest;
89
using Tests.Core.Extensions;
@@ -31,6 +32,7 @@ public HistogramAggregationUsageTests(ReadOnlyCluster i, EndpointUsage usage) :
3132
_key = "desc"
3233
},
3334
offset = 1.1
35+
3436
}
3537
}
3638
};
@@ -65,4 +67,66 @@ protected override void ExpectResponse(ISearchResponse<Project> response)
6567
item.DocCount.Should().BeGreaterThan(0);
6668
}
6769
}
70+
71+
// hide
72+
[SkipVersion("<7.10.0", "hard_bounds introduced in 7.10.0")]
73+
public class HistogramAggregationWithHardBoundsUsageTests : AggregationUsageTestBase
74+
{
75+
private const double HardBoundsMinimum = 100;
76+
private const double HardBoundsMaximum = 300;
77+
78+
public HistogramAggregationWithHardBoundsUsageTests(ReadOnlyCluster i, EndpointUsage usage) : base(i, usage) { }
79+
80+
protected override object AggregationJson => new
81+
{
82+
commits = new
83+
{
84+
histogram = new
85+
{
86+
field = "numberOfCommits",
87+
hard_bounds = new { min = HardBoundsMinimum, max = HardBoundsMaximum },
88+
interval = 100.0,
89+
min_doc_count = 1,
90+
order = new
91+
{
92+
_key = "desc"
93+
}
94+
}
95+
}
96+
};
97+
98+
protected override Func<AggregationContainerDescriptor<Project>, IAggregationContainer> FluentAggs => a => a
99+
.Histogram("commits", h => h
100+
.Field(p => p.NumberOfCommits)
101+
.Interval(100)
102+
.MinimumDocumentCount(1)
103+
.Order(HistogramOrder.KeyDescending)
104+
.HardBounds(HardBoundsMinimum, HardBoundsMaximum)
105+
);
106+
107+
protected override AggregationDictionary InitializerAggs =>
108+
new HistogramAggregation("commits")
109+
{
110+
Field = Field<Project>(p => p.NumberOfCommits),
111+
Interval = 100,
112+
MinimumDocumentCount = 1,
113+
Order = HistogramOrder.KeyDescending,
114+
HardBounds = new HardBounds<double>
115+
{
116+
Minimum = HardBoundsMinimum,
117+
Maximum = HardBoundsMaximum
118+
}
119+
};
120+
121+
protected override void ExpectResponse(ISearchResponse<Project> response)
122+
{
123+
response.ShouldBeValid();
124+
var commits = response.Aggregations.Histogram("commits");
125+
commits.Should().NotBeNull();
126+
commits.Buckets.Should().NotBeNull();
127+
128+
foreach (var bucket in commits.Buckets)
129+
bucket.Key.Should().BeGreaterOrEqualTo(HardBoundsMinimum).And.BeLessOrEqualTo(HardBoundsMaximum);
130+
}
131+
}
68132
}

0 commit comments

Comments
 (0)