Skip to content

Commit

Permalink
jlmelville#46: address some floating point determinism issues
Browse files Browse the repository at this point in the history
  • Loading branch information
jlmelville committed Feb 17, 2020
1 parent 3e5fc80 commit 4256507
Show file tree
Hide file tree
Showing 8 changed files with 250 additions and 200 deletions.
1 change: 1 addition & 0 deletions R/util.R
Original file line number Diff line number Diff line change
Expand Up @@ -104,3 +104,4 @@ lsplit_unnamed <- function(l) {
unnamed = l[uids]
)
}

30 changes: 30 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,36 @@ from [How to Use t-SNE Effectively](https://distill.pub/2016/misread-tsne/).
* A list of examples of using the LargeVis-like
[`lvish`](https://jlmelville.github.io/uwot/lvish.html) method.

## A Note on Reproducibility

`uwot` relies on the underlying compiler and C++ standard library on your
machine and this can result in differences in output even with the same input
data, arguments, packages and R version. If you require reproducibility between
machines, it is strongly suggested that you stick with the same OS and compiler
version on all of them (e.g. a fixed LTS of a Linux distro and gcc version).
Otherwise, the following can help:

* Use the `tumap` method instead of `umap`. This avoid the use of `std::pow` in
gradient calculations. This also has the advantage of being faster to optimize.
However, this gives larger clusters in the output, and you don't have the
ability to control that with `a` and `b` (or `spread` and `min_dist`)
parameters.
* For `umap`, it's better to provide `a` and `b` directly with a fixed precision
rather than allowing them to be calculated via the `spread` and `min_dist`
parameters. For default UMAP, use `a = 1.8956, b = 0.8006`.
* Use `approx_pow = TRUE`, which avoids the use of the `std::pow` function.
* Use `init = "spca"` rather than `init = "spectral"` (although the latter is
the default and preferred method for UMAP initialization).

In summary, your chances of reproducibility are increased by using:

```R
mnist_umap <- umap(mnist, a = 1.8956, b = 0.8006, approx_pow = TRUE, init = "spca")
# or
mnist_tumap <- tumap(mnist, init = "spca")
```


## Implementation Details

For small (N < 4096) and Euclidean distance, exact nearest neighbors are found
Expand Down
32 changes: 16 additions & 16 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ BEGIN_RCPP
END_RCPP
}
// optimize_layout_umap
Rcpp::NumericMatrix optimize_layout_umap(Rcpp::NumericMatrix head_embedding, Rcpp::Nullable<Rcpp::NumericMatrix> tail_embedding, const std::vector<unsigned int> positive_head, const std::vector<unsigned int> positive_tail, unsigned int n_epochs, unsigned int n_vertices, const std::vector<double> epochs_per_sample, double a, double b, double gamma, double initial_alpha, double negative_sample_rate, bool approx_pow, bool pcg_rand, bool parallelize, std::size_t grain_size, bool move_other, bool verbose);
Rcpp::NumericMatrix optimize_layout_umap(Rcpp::NumericMatrix head_embedding, Rcpp::Nullable<Rcpp::NumericMatrix> tail_embedding, const std::vector<unsigned int> positive_head, const std::vector<unsigned int> positive_tail, unsigned int n_epochs, unsigned int n_vertices, const std::vector<float> epochs_per_sample, float a, float b, float gamma, float initial_alpha, float negative_sample_rate, bool approx_pow, bool pcg_rand, bool parallelize, std::size_t grain_size, bool move_other, bool verbose);
RcppExport SEXP _uwot_optimize_layout_umap(SEXP head_embeddingSEXP, SEXP tail_embeddingSEXP, SEXP positive_headSEXP, SEXP positive_tailSEXP, SEXP n_epochsSEXP, SEXP n_verticesSEXP, SEXP epochs_per_sampleSEXP, SEXP aSEXP, SEXP bSEXP, SEXP gammaSEXP, SEXP initial_alphaSEXP, SEXP negative_sample_rateSEXP, SEXP approx_powSEXP, SEXP pcg_randSEXP, SEXP parallelizeSEXP, SEXP grain_sizeSEXP, SEXP move_otherSEXP, SEXP verboseSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Expand All @@ -96,12 +96,12 @@ BEGIN_RCPP
Rcpp::traits::input_parameter< const std::vector<unsigned int> >::type positive_tail(positive_tailSEXP);
Rcpp::traits::input_parameter< unsigned int >::type n_epochs(n_epochsSEXP);
Rcpp::traits::input_parameter< unsigned int >::type n_vertices(n_verticesSEXP);
Rcpp::traits::input_parameter< const std::vector<double> >::type epochs_per_sample(epochs_per_sampleSEXP);
Rcpp::traits::input_parameter< double >::type a(aSEXP);
Rcpp::traits::input_parameter< double >::type b(bSEXP);
Rcpp::traits::input_parameter< double >::type gamma(gammaSEXP);
Rcpp::traits::input_parameter< double >::type initial_alpha(initial_alphaSEXP);
Rcpp::traits::input_parameter< double >::type negative_sample_rate(negative_sample_rateSEXP);
Rcpp::traits::input_parameter< const std::vector<float> >::type epochs_per_sample(epochs_per_sampleSEXP);
Rcpp::traits::input_parameter< float >::type a(aSEXP);
Rcpp::traits::input_parameter< float >::type b(bSEXP);
Rcpp::traits::input_parameter< float >::type gamma(gammaSEXP);
Rcpp::traits::input_parameter< float >::type initial_alpha(initial_alphaSEXP);
Rcpp::traits::input_parameter< float >::type negative_sample_rate(negative_sample_rateSEXP);
Rcpp::traits::input_parameter< bool >::type approx_pow(approx_powSEXP);
Rcpp::traits::input_parameter< bool >::type pcg_rand(pcg_randSEXP);
Rcpp::traits::input_parameter< bool >::type parallelize(parallelizeSEXP);
Expand All @@ -113,7 +113,7 @@ BEGIN_RCPP
END_RCPP
}
// optimize_layout_tumap
Rcpp::NumericMatrix optimize_layout_tumap(Rcpp::NumericMatrix head_embedding, Rcpp::Nullable<Rcpp::NumericMatrix> tail_embedding, const std::vector<unsigned int> positive_head, const std::vector<unsigned int> positive_tail, unsigned int n_epochs, unsigned int n_vertices, const std::vector<double> epochs_per_sample, double initial_alpha, double negative_sample_rate, bool pcg_rand, bool parallelize, std::size_t grain_size, bool move_other, bool verbose);
Rcpp::NumericMatrix optimize_layout_tumap(Rcpp::NumericMatrix head_embedding, Rcpp::Nullable<Rcpp::NumericMatrix> tail_embedding, const std::vector<unsigned int> positive_head, const std::vector<unsigned int> positive_tail, unsigned int n_epochs, unsigned int n_vertices, const std::vector<float> epochs_per_sample, float initial_alpha, float negative_sample_rate, bool pcg_rand, bool parallelize, std::size_t grain_size, bool move_other, bool verbose);
RcppExport SEXP _uwot_optimize_layout_tumap(SEXP head_embeddingSEXP, SEXP tail_embeddingSEXP, SEXP positive_headSEXP, SEXP positive_tailSEXP, SEXP n_epochsSEXP, SEXP n_verticesSEXP, SEXP epochs_per_sampleSEXP, SEXP initial_alphaSEXP, SEXP negative_sample_rateSEXP, SEXP pcg_randSEXP, SEXP parallelizeSEXP, SEXP grain_sizeSEXP, SEXP move_otherSEXP, SEXP verboseSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Expand All @@ -124,9 +124,9 @@ BEGIN_RCPP
Rcpp::traits::input_parameter< const std::vector<unsigned int> >::type positive_tail(positive_tailSEXP);
Rcpp::traits::input_parameter< unsigned int >::type n_epochs(n_epochsSEXP);
Rcpp::traits::input_parameter< unsigned int >::type n_vertices(n_verticesSEXP);
Rcpp::traits::input_parameter< const std::vector<double> >::type epochs_per_sample(epochs_per_sampleSEXP);
Rcpp::traits::input_parameter< double >::type initial_alpha(initial_alphaSEXP);
Rcpp::traits::input_parameter< double >::type negative_sample_rate(negative_sample_rateSEXP);
Rcpp::traits::input_parameter< const std::vector<float> >::type epochs_per_sample(epochs_per_sampleSEXP);
Rcpp::traits::input_parameter< float >::type initial_alpha(initial_alphaSEXP);
Rcpp::traits::input_parameter< float >::type negative_sample_rate(negative_sample_rateSEXP);
Rcpp::traits::input_parameter< bool >::type pcg_rand(pcg_randSEXP);
Rcpp::traits::input_parameter< bool >::type parallelize(parallelizeSEXP);
Rcpp::traits::input_parameter< std::size_t >::type grain_size(grain_sizeSEXP);
Expand All @@ -137,7 +137,7 @@ BEGIN_RCPP
END_RCPP
}
// optimize_layout_largevis
Rcpp::NumericMatrix optimize_layout_largevis(Rcpp::NumericMatrix head_embedding, const std::vector<unsigned int> positive_head, const std::vector<unsigned int> positive_tail, unsigned int n_epochs, unsigned int n_vertices, const std::vector<double> epochs_per_sample, double gamma, double initial_alpha, double negative_sample_rate, bool pcg_rand, bool parallelize, std::size_t grain_size, bool verbose);
Rcpp::NumericMatrix optimize_layout_largevis(Rcpp::NumericMatrix head_embedding, const std::vector<unsigned int> positive_head, const std::vector<unsigned int> positive_tail, unsigned int n_epochs, unsigned int n_vertices, const std::vector<float> epochs_per_sample, float gamma, float initial_alpha, float negative_sample_rate, bool pcg_rand, bool parallelize, std::size_t grain_size, bool verbose);
RcppExport SEXP _uwot_optimize_layout_largevis(SEXP head_embeddingSEXP, SEXP positive_headSEXP, SEXP positive_tailSEXP, SEXP n_epochsSEXP, SEXP n_verticesSEXP, SEXP epochs_per_sampleSEXP, SEXP gammaSEXP, SEXP initial_alphaSEXP, SEXP negative_sample_rateSEXP, SEXP pcg_randSEXP, SEXP parallelizeSEXP, SEXP grain_sizeSEXP, SEXP verboseSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Expand All @@ -147,10 +147,10 @@ BEGIN_RCPP
Rcpp::traits::input_parameter< const std::vector<unsigned int> >::type positive_tail(positive_tailSEXP);
Rcpp::traits::input_parameter< unsigned int >::type n_epochs(n_epochsSEXP);
Rcpp::traits::input_parameter< unsigned int >::type n_vertices(n_verticesSEXP);
Rcpp::traits::input_parameter< const std::vector<double> >::type epochs_per_sample(epochs_per_sampleSEXP);
Rcpp::traits::input_parameter< double >::type gamma(gammaSEXP);
Rcpp::traits::input_parameter< double >::type initial_alpha(initial_alphaSEXP);
Rcpp::traits::input_parameter< double >::type negative_sample_rate(negative_sample_rateSEXP);
Rcpp::traits::input_parameter< const std::vector<float> >::type epochs_per_sample(epochs_per_sampleSEXP);
Rcpp::traits::input_parameter< float >::type gamma(gammaSEXP);
Rcpp::traits::input_parameter< float >::type initial_alpha(initial_alphaSEXP);
Rcpp::traits::input_parameter< float >::type negative_sample_rate(negative_sample_rateSEXP);
Rcpp::traits::input_parameter< bool >::type pcg_rand(pcg_randSEXP);
Rcpp::traits::input_parameter< bool >::type parallelize(parallelizeSEXP);
Rcpp::traits::input_parameter< std::size_t >::type grain_size(grain_sizeSEXP);
Expand Down
44 changes: 22 additions & 22 deletions src/gradient.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,16 @@

// https://martin.ankerl.com/2012/01/25/optimized-approximative-pow-in-c-and-cpp/
// an approximation to pow
double fastPrecisePow(double a, double b) {
float fastPrecisePow(float a, float b) {
// calculate approximation with fraction of the exponent
int e = (int)b;
int e = static_cast<int>(b);
union {
double d;
int x[2];
} u = {a};
u.x[1] = (int)((b - e) * (u.x[1] - 1072632447) + 1072632447);
} u = { a };
u.x[1] = static_cast<int>((b - e) * (u.x[1] - 1072632447) + 1072632447);
u.x[0] = 0;

// exponentiation by squaring with the exponent's integer part
// double r = u.d makes everything much slower, not sure why
double r = 1.0;
Expand All @@ -41,26 +41,26 @@ double fastPrecisePow(double a, double b) {
a *= a;
e >>= 1;
}

return r * u.d;
return static_cast<float>(r * u.d);
}

// UMAP implementation code
template <double (*powfun)(double, double)>
base_umap_gradient<powfun>::base_umap_gradient(const double a, const double b,
const double gamma)
template <float (*powfun)(float, float)>
base_umap_gradient<powfun>::base_umap_gradient(const float a, const float b,
const float gamma)
: a(a), b(b), a_b_m2(-2.0 * a * b), gamma_b_2(2.0 * gamma * b) {}

template <double (*powfun)(double, double)>
const double
base_umap_gradient<powfun>::grad_attr(const double dist_squared) const {
const double pd2b = powfun(dist_squared, b);
template <float (*powfun)(float, float)>
const float
base_umap_gradient<powfun>::grad_attr(const float dist_squared) const {
const float pd2b = powfun(dist_squared, b);
return (a_b_m2 * pd2b) / (dist_squared * (a * pd2b + 1.0));
}

template <double (*powfun)(double, double)>
const double
base_umap_gradient<powfun>::grad_rep(const double dist_squared) const {
template <float (*powfun)(float, float)>
const float
base_umap_gradient<powfun>::grad_rep(const float dist_squared) const {
return gamma_b_2 /
((0.001 + dist_squared) * (a * powfun(dist_squared, b) + 1.0));
}
Expand All @@ -74,23 +74,23 @@ template class base_umap_gradient<fastPrecisePow>;

tumap_gradient::tumap_gradient() {}

const double tumap_gradient::grad_attr(const double dist_squared) const {
const float tumap_gradient::grad_attr(const float dist_squared) const {
return -2.0 / (dist_squared + 1.0);
}

const double tumap_gradient::grad_rep(const double dist_squared) const {
const float tumap_gradient::grad_rep(const float dist_squared) const {
return 2.0 / ((0.001 + dist_squared) * (dist_squared + 1.0));
}

// LargeVis

largevis_gradient::largevis_gradient(const double gamma)
largevis_gradient::largevis_gradient(const float gamma)
: gamma_2(gamma * 2.0) {}

const double largevis_gradient::grad_attr(const double dist_squared) const {
const float largevis_gradient::grad_attr(const float dist_squared) const {
return -2.0 / (dist_squared + 1.0);
}

const double largevis_gradient::grad_rep(const double dist_squared) const {
const float largevis_gradient::grad_rep(const float dist_squared) const {
return gamma_2 / ((0.1 + dist_squared) * (dist_squared + 1.0));
}
42 changes: 21 additions & 21 deletions src/gradient.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,26 +23,26 @@
#include <cmath>

// Class templated on the powfun function as suggested by Aaron Lun
template <double (*powfun)(double, double)> class base_umap_gradient {
template <float (*powfun)(float, float)> class base_umap_gradient {
public:
base_umap_gradient(const double a, const double b, const double gamma);
const double grad_attr(const double dist_squared) const;
const double grad_rep(const double dist_squared) const;
static const constexpr double clamp_hi = 4.0;
static const constexpr double clamp_lo = -4.0;
base_umap_gradient(const float a, const float b, const float gamma);
const float grad_attr(const float dist_squared) const;
const float grad_rep(const float dist_squared) const;
static const constexpr float clamp_hi = 4.0;
static const constexpr float clamp_lo = -4.0;

private:
const double a;
const double b;
const double a_b_m2;
const double gamma_b_2;
const float a;
const float b;
const float a_b_m2;
const float gamma_b_2;
};

// UMAP
typedef base_umap_gradient<std::pow> umap_gradient;

// apUMAP: UMAP with an approximate power calculation
double fastPrecisePow(double, double);
float fastPrecisePow(float, float);
typedef base_umap_gradient<fastPrecisePow> apumap_gradient;

// t-UMAP: the UMAP function with a = 1, and b = 1, which results in the Cauchy
Expand All @@ -54,22 +54,22 @@ typedef base_umap_gradient<fastPrecisePow> apumap_gradient;
class tumap_gradient {
public:
tumap_gradient();
const double grad_attr(const double dist_squared) const;
const double grad_rep(const double dist_squared) const;
static const constexpr double clamp_hi = 4.0;
static const constexpr double clamp_lo = -4.0;
const float grad_attr(const float dist_squared) const;
const float grad_rep(const float dist_squared) const;
static const constexpr float clamp_hi = 4.0;
static const constexpr float clamp_lo = -4.0;
};

class largevis_gradient {
public:
largevis_gradient(const double gamma);
const double grad_attr(const double dist_squared) const;
const double grad_rep(const double dist_squared) const;
static const constexpr double clamp_hi = 5.0;
static const constexpr double clamp_lo = -5.0;
largevis_gradient(const float gamma);
const float grad_attr(const float dist_squared) const;
const float grad_rep(const float dist_squared) const;
static const constexpr float clamp_hi = 5.0;
static const constexpr float clamp_lo = -5.0;

private:
const double gamma_2;
const float gamma_2;
};

#endif // UWOT_GRADIENT_H
Loading

0 comments on commit 4256507

Please sign in to comment.