Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add basic implementation of ddsketch #1

Merged
merged 11 commits into from
Mar 7, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .formatter.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Used by "mix format"
[
inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"]
]
26 changes: 26 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# The directory Mix will write compiled artifacts to.
/_build/

# If you run "mix test --cover", coverage assets end up here.
/cover/

# The directory Mix downloads your dependencies sources to.
/deps/

# Where third-party dependencies like ExDoc output generated docs.
/doc/

# Ignore .fetch files in case you like to edit your project deps locally.
/.fetch

# If the VM crashes, it generates a dump, let's ignore it too.
erl_crash.dump

# Also ignore archive artifacts (built via "mix archive.build").
*.ez

# Ignore package tarball (built via "mix hex.build").
dd_data_streams_ex-*.tar

# Temporary files, for example, from tests.
/tmp/
23 changes: 21 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,21 @@
# data-streams-ex
DataDog data streams library for Elixir
# DdDataStreamsEx

**TODO: Add description**

## Installation

If [available in Hex](https://hex.pm/docs/publish), the package can be installed
by adding `dd_data_streams_ex` to your list of dependencies in `mix.exs`:

```elixir
def deps do
[
{:dd_data_streams_ex, "~> 0.1.0"}
]
end
```

Documentation can be generated with [ExDoc](https://github.com/elixir-lang/ex_doc)
and published on [HexDocs](https://hexdocs.pm). Once published, the docs can
be found at <https://hexdocs.pm/dd_data_streams_ex>.

16 changes: 16 additions & 0 deletions benchmarks/datadog/sketch/store.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
inputs = %{
"small" => 1..100 |> Enum.to_list() |> Enum.shuffle(),
"medium" => 1..10_000 |> Enum.to_list() |> Enum.shuffle(),
"large" => 1..1_000_000 |> Enum.to_list() |> Enum.shuffle()
}

Benchee.run(
%{
"dense" => fn list ->
Enum.reduce(list, Datadog.Sketch.Store.Dense.new(), fn i, store ->
Datadog.Sketch.Store.Dense.add(store, i)
end)
end,
},
inputs: inputs
)
Empty file.
105 changes: 105 additions & 0 deletions lib/datadog/sketch/index_mapping.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
defmodule Datadog.Sketch.IndexMapping do
@moduledoc """
Basic module for handling various index mapping algorithms.
"""

@type t :: %{
__struct__: module(),
gamma: float(),
index_offset: float(),
multiplier: float(),
min_indexable_value: float(),
max_indexable_value: float()
}

@doc """
Checks if an index mapping matches another index mapping.
"""
@callback equals(t(), t()) :: boolean()

@callback index(t(), float()) :: integer()

@callback value(t(), integer()) :: float()

@callback lower_bound(t(), integer()) :: float()

@callback relative_accuracy(t()) :: float()

@doc """
Returns the minimum positive value that can be mapped to an index.
"""
@callback min_indexable_value(t()) :: float()

@doc """
Returns the maximum positive value that can be mapped to an index.
"""
@callback max_indexable_value(t()) :: float()

@doc """
Returns a `Datadog.Sketch.Protobuf.IndexMapping` Protobuf-able
struct for the index mapping. Used for sending data to Datadog.
"""
@callback to_proto(t()) :: Datadog.Sketch.Protobuf.IndexMapping.t()

@doc """
The value at which golang math.Exp overflows. This is golang
specific, but we want to match implementation details.
"""
@spec exp_overflow() :: number()
def exp_overflow, do: 7.094361393031e+02

@doc """
The minimum value of golang float64. 2^(-1022)
"""
@spec min_normal_float_64() :: number()
def min_normal_float_64, do: 2.2250738585072014e-308

@doc """
Checks if an index mapping matches another index mapping.
"""
@spec equals(t(), t()) :: boolean()
def equals(%{__struct__: module} = self, other), do: module.equals(self, other)

@doc """
Checks if an index mapping matches another index mapping.
"""
@spec index(t(), float()) :: integer()
def index(%{__struct__: module} = self, value), do: module.index(self, value)

@spec value(t(), integer()) :: float()
def value(%{__struct__: module} = self, index), do: module.value(self, index)

@spec lower_bound(t(), integer()) :: float()
def lower_bound(%{__struct__: module} = self, index), do: module.lower_bound(self, index)

@spec relative_accuracy(t()) :: float()
def relative_accuracy(%{__struct__: module} = self), do: module.relative_accuracy(self)

@doc """
Returns the minimum positive value that can be mapped to an index.
"""
@spec min_indexable_value(t()) :: float()
def min_indexable_value(%{__struct__: module} = self), do: module.min_indexable_value(self)

@doc """
Returns the maximum positive value that can be mapped to an index.
"""
@spec max_indexable_value(t()) :: float()
def max_indexable_value(%{__struct__: module} = self), do: module.max_indexable_value(self)

@doc """
Returns a `Datadog.Sketch.Protobuf.IndexMapping` Protobuf-able
struct for the index mapping. Used for sending data to Datadog.
"""
@spec to_proto(t()) :: Datadog.Sketch.Protobuf.IndexMapping.t()
def to_proto(%{__struct__: module} = self), do: module.to_proto(self)

@spec within_tolerance(float(), float(), float()) :: bool()
def within_tolerance(x, y, tolerance) do
if x == 0 or y == 0 do
abs(x) <= tolerance and abs(y) <= tolerance
else
abs(x - y) <= tolerance * max(abs(x), abs(y))
end
end
end
120 changes: 120 additions & 0 deletions lib/datadog/sketch/index_mapping/logarithmic.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
defmodule Datadog.Sketch.IndexMapping.Logarithmic do
@moduledoc """
LogarithmicMapping is an IndexMapping that is memory-optimal, that is to say
that given a targeted relative accuracy, it requires the least number of
indices to cover a given range of values. This is done by logarithmically
mapping floating-point values to integers.
"""

defstruct gamma: 0.0,
index_offset: 0.0,
multiplier: 0.0,
min_indexable_value: 0.0,
max_indexable_value: 0.0

@type t :: Datadog.Sketch.IndexMapping.t()

@behaviour Datadog.Sketch.IndexMapping

alias Datadog.Sketch.{IndexMapping, Utils}

@doc """
Creates a new Logarithmic index mapping with the given accuracy.
"""
@spec new(float()) :: t() | no_return()
def new(relative_accuracy) when relative_accuracy < 0 or relative_accuracy > 1,
do: raise(ArgumentError, message: "The relative accuracy must be between 0 and 1.")

def new(relative_accuracy) do
gamma = (1 + relative_accuracy) / (1 - relative_accuracy)
new(gamma, 0.0)
end

@doc """
Creates a new Logarithmic index mapping with the given gamma
and index offset.
"""
@spec new(float(), float()) :: t() | no_return()
def new(gamma, _index_offset) when gamma <= 1,
do: raise(ArgumentError, message: "Gamma must be greater than 1.")

def new(gamma, index_offset) do
multiplier = 1 / :math.log(gamma)

%__MODULE__{
gamma: gamma,
index_offset: index_offset,
multiplier: multiplier,
min_indexable_value:
max(
:math.exp((Utils.min_int_32() - index_offset) / multiplier + 1),
IndexMapping.min_normal_float_64() * gamma
),
max_indexable_value:
min(
:math.exp((Utils.max_int_32() - index_offset) / multiplier - 1),
:math.exp(IndexMapping.exp_overflow()) / (2 * gamma) * (gamma + 1)
)
}
end

@impl true
@spec equals(t(), t()) :: boolean()
def equals(%{gamma: sgamma, index_offset: sindex_offset}, %{
gamma: ogamma,
index_offset: oindex_offset
}) do
tol = 1.0e-12

IndexMapping.within_tolerance(sgamma, ogamma, tol) and
IndexMapping.within_tolerance(sindex_offset, oindex_offset, tol)
end

@impl true
@spec index(t(), float()) :: integer()
def index(%{index_offset: index_offset, multiplier: multiplier}, value) do
index = :math.log(value) * multiplier + index_offset

if index >= 0 do
trunc(index)
else
trunc(index) - 1
end
end

@impl true
@spec value(t(), integer()) :: float()
def value(self, index) do
lower_bound(self, index) * (1 + relative_accuracy(self))
end

@impl true
@spec lower_bound(t(), integer()) :: float()
def lower_bound(%{index_offset: index_offset, multiplier: multiplier}, index) do
:math.exp((index - index_offset) / multiplier)
end

@impl true
@spec min_indexable_value(t()) :: float()
def min_indexable_value(%{min_indexable_value: value}), do: value

@impl true
@spec max_indexable_value(t()) :: float()
def max_indexable_value(%{max_indexable_value: value}), do: value

@impl true
@spec relative_accuracy(t()) :: float()
def relative_accuracy(%{gamma: gamma}) do
1 - 2 / (1 + gamma)
end

@impl true
@spec to_proto(t()) :: Datadog.Sketch.Protobuf.IndexMapping.t()
def to_proto(self) do
%Datadog.Sketch.Protobuf.IndexMapping{
gamma: self.gamma,
indexOffset: self.index_offset,
interpolation: :NONE
}
end
end
48 changes: 48 additions & 0 deletions lib/datadog/sketch/protobuf.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# File generated from the `sketches-go` library.
# https://github.com/DataDog/sketches-go/blob/master/ddsketch/pb/ddsketch.proto

defmodule Datadog.Sketch.Protobuf.DDSketch do
@moduledoc false
use Protobuf, protoc_gen_elixir_version: "0.11.0", syntax: :proto3

field(:mapping, 1, type: Datadog.Sketch.Protobuf.IndexMapping)
field(:positiveValues, 2, type: Datadog.Sketch.Protobuf.Store)
field(:negativeValues, 3, type: Datadog.Sketch.Protobuf.Store)
field(:zeroCount, 4, type: :double)
end

defmodule Datadog.Sketch.Protobuf.IndexMapping do
@moduledoc false
use Protobuf, protoc_gen_elixir_version: "0.11.0", syntax: :proto3

field(:gamma, 1, type: :double)
field(:indexOffset, 2, type: :double)
field(:interpolation, 3, type: Datadog.Sketch.Protobuf.IndexMapping.Interpolation, enum: true)
end

defmodule Datadog.Sketch.Protobuf.IndexMapping.Interpolation do
@moduledoc false
use Protobuf, enum: true, protoc_gen_elixir_version: "0.11.0", syntax: :proto3

field(:NONE, 0)
field(:LINEAR, 1)
field(:QUADRATIC, 2)
field(:CUBIC, 3)
end

defmodule Datadog.Sketch.Protobuf.Store do
@moduledoc false
use Protobuf, protoc_gen_elixir_version: "0.11.0", syntax: :proto3

field(:binCounts, 1, repeated: true, type: Datadog.Sketch.Protobuf.Store.BinCountsEntry)
field(:contiguousBinCounts, 2, repeated: true, type: :double, packed: true, deprecated: false)
field(:contiguousBinIndexOffset, 3, type: :sint32)
end

defmodule Datadog.Sketch.Protobuf.Store.BinCountsEntry do
@moduledoc false
use Protobuf, map: true, protoc_gen_elixir_version: "0.11.0", syntax: :proto3

field(:key, 1, type: :sint32)
field(:value, 2, type: :double)
end
Loading