From 06bb21be99227bc1e312bd0b9b7c3c2b378ff3e8 Mon Sep 17 00:00:00 2001
From: "E. G. Patrick Bos" <egpbos@gmail.com>
Date: Sun, 14 Oct 2018 23:26:45 +0200
Subject: [PATCH] First public commit

---
 BarcodeException.h                   |   36 +
 CMakeLists.txt                       |  344 ++++++
 EqSolvers.cc                         |  422 +++++++
 EqSolvers.h                          |   40 +
 HMC.cc                               |  819 +++++++++++++
 HMC.h                                |   15 +
 HMC_help.cc                          |   63 +
 HMC_help.h                           |   13 +
 HMC_mass.cc                          |  446 +++++++
 HMC_mass.h                           |   14 +
 HMC_models.cc                        | 1379 +++++++++++++++++++++
 HMC_models.h                         |  103 ++
 HMC_momenta.cc                       |  151 +++
 HMC_momenta.h                        |   13 +
 IOfunctions.cc                       |   81 ++
 IOfunctions.h                        |   15 +
 IOfunctionsGen.cc                    |  269 +++++
 IOfunctionsGen.h                     |   40 +
 Lag2Eul.cc                           |  426 +++++++
 Lag2Eul.h                            |   34 +
 PLANCK_CAMB.dat                      |  788 ++++++++++++
 README.md                            |   52 +-
 WMAP7_CAMB.dat                       |  781 ++++++++++++
 barcoderunner.cc                     |  541 +++++++++
 barcoderunner.h                      |   20 +
 calc_power.cc                        |  107 ++
 calc_power.h                         |   12 +
 call_hamil.cc                        |   64 +
 call_hamil.h                         |   11 +
 cmake/Modules/AddDefaultBinary.cmake |   39 +
 cmake/downloadFindFFTW.cmake.in      |   16 +
 cmake/hg_rev_filename.sh             |   34 +
 convenience.cc                       |  269 +++++
 convenience.h                        |   49 +
 cosmo.cc                             |  236 ++++
 cosmo.h                              |   21 +
 curses_funcs.cc                      |   88 ++
 curses_funcs.h                       |   29 +
 debug.cc                             |  310 +++++
 debug.h                              |   19 +
 define_opt.h                         |   86 ++
 disp_part.cc                         |  165 +++
 disp_part.h                          |   14 +
 fftw_array.h                         |  127 ++
 fftwrapper.cc                        |  276 +++++
 fftwrapper.h                         |  100 ++
 field_statistics.cpp                 |   89 ++
 field_statistics.h                   |   18 +
 init_par.cc                          |  585 +++++++++
 init_par.h                           |   23 +
 input.par                            |  168 +++
 main.cc                              |  223 ++++
 massFunctions.cc                     |  654 ++++++++++
 massFunctions.h                      |   33 +
 math_funcs.cc                        | 1651 ++++++++++++++++++++++++++
 math_funcs.h                         |  105 ++
 planck/LICENSE                       |  339 ++++++
 planck/bstream.h                     |  103 ++
 planck/constants.h                   |   86 ++
 planck/cxxutils.cc                   |  260 ++++
 planck/cxxutils.h                    |  242 ++++
 planck/datatypes.h                   |  198 +++
 planck/message_error.h               |   70 ++
 planck/openmp_support.h              |   60 +
 planck/paramfile.h                   |   85 ++
 planck/simparams.h                   |   58 +
 protocol.cc                          |  115 ++
 protocol.h                           |   26 +
 random.hpp                           |  115 ++
 rankorder.cc                         |   57 +
 rankorder.h                          |   16 +
 rsd.cc                               |   68 ++
 rsd.h                                |   15 +
 sample_maker.cc                      |   35 +
 sample_maker.h                       |   11 +
 struct_hamil.h                       |  403 +++++++
 struct_main.h                        |  258 ++++
 tools/2D_corr_fct.cc                 |  312 +++++
 tools/2D_corr_fct_interp.cc          |  433 +++++++
 tools/2D_powspec.cc                  |  165 +++
 tools/LAG2EULer.cc                   |   91 ++
 tools/corr_fct.cc                    |  134 +++
 tools/density.cc                     |   87 ++
 tools/interp_upres.cc                |  112 ++
 tools/poisson_upres.cc               |  156 +++
 tools/powspec.cc                     |   72 ++
 transf.cpp                           |  182 +++
 transf.h                             |   19 +
 88 files changed, 16908 insertions(+), 1 deletion(-)
 create mode 100644 BarcodeException.h
 create mode 100644 CMakeLists.txt
 create mode 100644 EqSolvers.cc
 create mode 100644 EqSolvers.h
 create mode 100644 HMC.cc
 create mode 100644 HMC.h
 create mode 100644 HMC_help.cc
 create mode 100644 HMC_help.h
 create mode 100644 HMC_mass.cc
 create mode 100644 HMC_mass.h
 create mode 100644 HMC_models.cc
 create mode 100644 HMC_models.h
 create mode 100644 HMC_momenta.cc
 create mode 100644 HMC_momenta.h
 create mode 100644 IOfunctions.cc
 create mode 100644 IOfunctions.h
 create mode 100644 IOfunctionsGen.cc
 create mode 100644 IOfunctionsGen.h
 create mode 100644 Lag2Eul.cc
 create mode 100644 Lag2Eul.h
 create mode 100644 PLANCK_CAMB.dat
 create mode 100644 WMAP7_CAMB.dat
 create mode 100644 barcoderunner.cc
 create mode 100644 barcoderunner.h
 create mode 100644 calc_power.cc
 create mode 100644 calc_power.h
 create mode 100644 call_hamil.cc
 create mode 100644 call_hamil.h
 create mode 100644 cmake/Modules/AddDefaultBinary.cmake
 create mode 100644 cmake/downloadFindFFTW.cmake.in
 create mode 100644 cmake/hg_rev_filename.sh
 create mode 100644 convenience.cc
 create mode 100644 convenience.h
 create mode 100644 cosmo.cc
 create mode 100644 cosmo.h
 create mode 100644 curses_funcs.cc
 create mode 100644 curses_funcs.h
 create mode 100644 debug.cc
 create mode 100644 debug.h
 create mode 100644 define_opt.h
 create mode 100644 disp_part.cc
 create mode 100644 disp_part.h
 create mode 100644 fftw_array.h
 create mode 100644 fftwrapper.cc
 create mode 100644 fftwrapper.h
 create mode 100644 field_statistics.cpp
 create mode 100644 field_statistics.h
 create mode 100644 init_par.cc
 create mode 100644 init_par.h
 create mode 100644 input.par
 create mode 100644 main.cc
 create mode 100644 massFunctions.cc
 create mode 100644 massFunctions.h
 create mode 100644 math_funcs.cc
 create mode 100644 math_funcs.h
 create mode 100644 planck/LICENSE
 create mode 100644 planck/bstream.h
 create mode 100644 planck/constants.h
 create mode 100644 planck/cxxutils.cc
 create mode 100644 planck/cxxutils.h
 create mode 100644 planck/datatypes.h
 create mode 100644 planck/message_error.h
 create mode 100644 planck/openmp_support.h
 create mode 100644 planck/paramfile.h
 create mode 100644 planck/simparams.h
 create mode 100644 protocol.cc
 create mode 100644 protocol.h
 create mode 100644 random.hpp
 create mode 100644 rankorder.cc
 create mode 100644 rankorder.h
 create mode 100644 rsd.cc
 create mode 100644 rsd.h
 create mode 100644 sample_maker.cc
 create mode 100644 sample_maker.h
 create mode 100644 struct_hamil.h
 create mode 100644 struct_main.h
 create mode 100644 tools/2D_corr_fct.cc
 create mode 100644 tools/2D_corr_fct_interp.cc
 create mode 100644 tools/2D_powspec.cc
 create mode 100644 tools/LAG2EULer.cc
 create mode 100644 tools/corr_fct.cc
 create mode 100644 tools/density.cc
 create mode 100644 tools/interp_upres.cc
 create mode 100644 tools/poisson_upres.cc
 create mode 100644 tools/powspec.cc
 create mode 100644 transf.cpp
 create mode 100644 transf.h

diff --git a/BarcodeException.h b/BarcodeException.h
new file mode 100644
index 0000000..3e3a308
--- /dev/null
+++ b/BarcodeException.h
@@ -0,0 +1,36 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+#pragma once
+#include <exception>
+#include <string>
+
+class BarcodeException : public std::exception {
+  private:
+    std::string s;
+  public:
+    BarcodeException(std::string ss) : s(ss) {}
+    ~BarcodeException() throw () {}
+    const char* what() const throw() { return this->s.c_str(); }
+};
+
+/* Usage example:
+void Foo::Bar(){
+  if(!QueryPerformanceTimer(&m_baz)){
+    throw BarcodeException("it's the end of the world!");
+  }
+}
+
+void Foo::Caller(){
+  try{
+    this->Bar();// should throw
+  }catch(BarcodeException& caught){
+    std::cout<<"Got "<<caught.what()<<std::endl;
+  }
+}
+*/
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..c51de28
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,344 @@
+#
+# Barcode
+# Copyright E.G.P. Bos and F.S. Kitaura
+#
+# Distributed under the terms of the MIT License.
+# The full license is in the file LICENSE, distributed with this software.
+#
+
+cmake_minimum_required(VERSION 3.1.3)
+# 3.1.3 for set(CMAKE_CXX_STANDARD 11), all other features are in 3.0 at least (didn't check 2.x)
+
+
+#--------------------------------------- download and setup FindFFTW
+configure_file(cmake/downloadFindFFTW.cmake.in findFFTW-download/CMakeLists.txt)
+execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" .
+        RESULT_VARIABLE result
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/findFFTW-download )
+if(result)
+    message(FATAL_ERROR "CMake step for findFFTW failed: ${result}")
+    else()
+    message("CMake step for findFFTW completed (${result}).")
+endif()
+execute_process(COMMAND ${CMAKE_COMMAND} --build .
+        RESULT_VARIABLE result
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/findFFTW-download )
+if(result)
+    message(FATAL_ERROR "Build step for findFFTW failed: ${result}")
+endif()
+
+set(findFFTW_DIR ${CMAKE_CURRENT_BINARY_DIR}/findFFTW-src)
+
+#--------------------------------------- CMake modules
+
+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules/" "${findFFTW_DIR}")
+
+project(barcode)
+
+#--------------------------------------- warnings (for all binaries)
+add_compile_options(-Wundef -Wshadow -Wwrite-strings -Wredundant-decls -Woverloaded-virtual -Wcast-qual -Wcast-align -Wpointer-arith -Wconversion -Wold-style-cast -Wall -Wextra)
+
+
+#--------------------------------------- OPTIONS
+
+option(INTEL "Use intel library and compiler" OFF)
+
+#--------------------------------------- Basic operation mode of code
+option(DEBUG "Debugging mode" OFF)
+option(MULTITHREAD "Enables OpenMP directives on most for-loops, except once with RNGs." ON)
+option(MULTITHREAD_FFTW "Use multi-threaded version of the FFTW3 library." ON)
+option(MULTITHREAD_RNG "Enable OpenMP directives on for-loops containing random number generators. This causes an extra degree of randomness, due to the order in which cores access the RNG, which will differ every run, because the scheduler controls this and other running processes will have an (unpredictable) influence on this." OFF)
+option(NAN_DETECTION "Turns on not-a-number detection. This causes floating point exceptions if a NaN is created somewhere in the code. If you don't do this, the NaNs may freely propagate and your code will keep running, producing nonsense results." ON)
+option(RESTART_FILE "Using this option, if you want to restart your simulation, you have to create an empty file called \"restart\" in the output directory. This will then read in the restart.prt file that is created every step. If you don't use this option, you can restart at a step of your choice by simply calling the code with an extra command line option specifying the number you want to start from, e.g. \"barcode.x 203\". The disadvantage of using RESTART_FILE is that it can cause big I/O lags on computers with network storage if you're running multiple instances on the same computer at once." OFF)
+
+#--------------------------------------- Hamiltonian stuff
+option(MASKING "[NOT TESTED!] Turns handling of masking on." OFF)
+
+#--------------------------------------- Modelled physics
+# Note that these options should be replaced by a sfmodel parameter in
+# the input.par file, like in the patchy code! This is work in progress.
+option(TRANSF "[NOT TESTED!] Activate transfer function convolution for Zel'dovich and 2LPT models." OFF)
+option(TRANSFSC "[NOT TESTED!] Convolve the SC part of Psi with a transfer function as well. This seems to be the same transfer function as the one used for the Zel'dovich+TF model. This option gives a \"ALPT+TF\" model." OFF)
+option(BARYONS "Power spectrum with baryons (Eisenstein & Hu 1998). When set to OFF, the power spectrum will be a power law, incl. apodisation at Nyquist scale. Both options only apply when not reading the power spectrum from a file, which can be set in the input file." ON)
+
+#--------------------------------------- Rank ordering
+option(RANKORD "NOT IMPLEMENTED. Switch on rank ordering." OFF)
+option(RANDORDTYPE "NOT IMPLEMENTED. Do rank ordering split up according to t-web type. If not, just order by density, regardless of type." OFF)
+option(LOGRO "NOT USED. Do a logarithmic rank ordering, i.e. transform density to log-density before ordering." OFF)
+
+#--------------------------------------- Fourier transform convention
+option(FOURIER_DEF_1 "Defines the type of (discrete) Fourier transforms. With DEF_1, the FT itself has the 1/N factor in front. With DEF_2, the inverse FT has the 1/N factor. Default: FOURIER_DEF_2. Note: code was only tested with FOURIER_DEF_2!" OFF)
+option(FOURIER_DEF_2 "Defines the type of (discrete) Fourier transforms. With DEF_1, the FT itself has the 1/N factor in front. With DEF_2, the inverse FT has the 1/N factor. Default: FOURIER_DEF_2. Note: code was only tested with FOURIER_DEF_2!" ON)
+option(FOURIER_DEF_2_20151021 "[NOT TESTED!] Experimental update to DEF_2. Can only be used simultaneously with FOURIER_DEF_2, not with DEF_1 and not by itself." OFF)
+
+if ( FOURIER_DEF_1 AND FOURIER_DEF_2 )
+    message(FATAL_ERROR "Cannot use FOURIER_DEF_1 and FOURIER_DEF_2 at same time!")
+endif()
+
+if ( ( FOURIER_DEF_1 AND FOURIER_DEF_2_20151021 ) OR (FOURIER_DEF_2_20151021 AND NOT FOURIER_DEF_2))
+    message(FATAL_ERROR "Cannot use FOURIER_DEF_2_20151021 with FOURIER_DEF_1 or by itself! Only together with DEF_2.")
+endif()
+
+#--------------------------------------- Single/Double Precision
+option(SINGLE_PREC "Choose single or double precision calculations and input/output." OFF)
+option(DOUBLE_PREC "Choose single or double precision calculations and input/output." ON)
+
+if ( SINGLE_PREC AND DOUBLE_PREC )
+    message(FATAL_ERROR "Cannot use SINGLE_PREC and DOUBLE_PREC at same time!")
+endif()
+
+#--------------------------------------- Testing and Debugging options
+option(PLANCK_SILENT "Makes the Planck LevelS package functions silent (used for parsing configuration input file). Turn off to make them more verbose." ON)
+
+#--------------------------------------- Things for special behaviour
+option(SCSMOO "[NOT TESTED!] Smooth the spherical collapse part of the displacement field with a smoothing length kthsc2. This variable is currently not implemented in the code. kthsc is implemented and is equal to kth, which equals ksmooth from input.par. Something similar should probably be defined for this." OFF)
+
+#--------------------------------------- NEEDS ATTENTION
+
+# CONSIDER EITHER TAKING THESE OPTIONS OUT OR ACTUALLY GIVING THEM CODE-WIDE IMPACT:
+option(GFFT "Use FFT based derivative (NOT REALLY USED, currently only used in non-Zel'dovich Lag2Eul via calc_m2v_mem and in EigenValuesTweb)." OFF)
+option(GFINDIFF "Use finite difference derivative (NOT REALLY USED, currently only used in non-Zel'dovich Lag2Eul via calc_m2v_mem and in EigenValuesTweb)." ON)
+
+if ( GFFT AND GFINDIFF )
+    message(FATAL_ERROR "Cannot use GFFT and GFINDIFF at same time!")
+endif()
+
+if ( NOT (GFINDIFF OR GFFT) )
+    message(FATAL_ERROR "Must choose one of GFFT and GFINDIFF!")
+endif()
+
+set(CMAKE_CXX_STANDARD 11)
+
+set(BARCODE_SOURCE_FILES
+# CORE
+        barcoderunner.cc
+        protocol.cc         # initialization of ... not sure what exactly. TODO: REMOVE, ONLY ONE THAT'S USED IS RESTART_PROTOCOL
+        sample_maker.cc
+# CONFIG FILE
+        planck/cxxutils.cc  # PARSING
+        init_par.cc
+# HMC
+        call_hamil.cc
+        HMC.cc
+        HMC_help.cc
+        HMC_mass.cc
+        HMC_models.cc
+        HMC_momenta.cc
+# ARRAY, MATH AND STRING MANIPULATIONS:
+        convenience.cc
+        math_funcs.cc
+# COSMOLOGY
+        cosmo.cc
+        calc_power.cc       # calculate theoretical cosmological power spectra
+# INTERFACE
+        curses_funcs.cc
+# DEBUGGING
+        debug.cc
+# LAG2EUL
+        Lag2Eul.cc
+        disp_part.cc
+        EqSolvers.cc
+# FFT
+        fftwrapper.cc
+# IO
+        IOfunctions.cc
+        IOfunctionsGen.cc
+# DENSITY
+        massFunctions.cc
+        transf.cpp
+        rankorder.cc         # aren't these also just transfer functions? TODO: maybe merge with transf.cpp
+        field_statistics.cpp # field statistics; power spectrum, etc
+# REDSHIFT SPACE
+        rsd.cc)
+
+add_library(barlib ${BARCODE_SOURCE_FILES})
+
+add_executable(barcode main.cc)
+target_link_libraries(barcode barlib)
+
+#--------------------------------------- dependencies
+find_package(GSL REQUIRED)
+include_directories(${GSL_INCLUDE_DIRS})
+target_link_libraries(barcode ${GSL_LIBRARIES})
+#target_link_libraries(barlib ${GSL_LIBRARIES})
+
+find_package(FFTW)
+
+include_directories(${FFTW_INCLUDE_DIRS})
+if (DOUBLE_PREC)
+    target_link_libraries(barcode ${FFTW_DOUBLE_LIB})
+#    target_link_libraries(barlib ${FFTW_DOUBLE_LIB})
+    if (MULTITHREAD_FFTW)
+        target_link_libraries(barcode ${FFTW_DOUBLE_OPENMP_LIB})
+#        target_link_libraries(barlib ${FFTW_DOUBLE_OPENMP_LIB})
+    endif()
+elseif(SINGLE_PREC)
+    target_link_libraries(barcode ${FFTW_FLOAT_LIB})
+#    target_link_libraries(barlib ${FFTW_FLOAT_LIB})
+    if (MULTITHREAD_FFTW)
+        target_link_libraries(barcode ${FFTW_FLOAT_OPENMP_LIB})
+#        target_link_libraries(barlib ${FFTW_FLOAT_OPENMP_LIB})
+    endif()
+endif()
+
+
+# convert all options to preprocessor flag compilation options
+if (INTEL)
+    add_definitions(-DINTEL)
+endif()
+
+if (DEBUG OR (CMAKE_BUILD_TYPE MATCHES Debug) OR (CMAKE_BUILD_TYPE MATCHES RelWithDebInfo))
+    add_definitions(-DDEBUG)
+endif()
+
+if (MULTITHREAD)
+    add_definitions(-DMULTITHREAD)
+endif()
+
+if (MULTITHREAD_FFTW)
+    add_definitions(-DMULTITHREAD_FFTW)
+endif()
+
+if (MULTITHREAD_RNG)
+    add_definitions(-DMULTITHREAD_RNG)
+endif()
+
+if (NAN_DETECTION)
+    add_definitions(-DNAN_DETECTION)
+endif()
+
+if (RESTART_FILE)
+    add_definitions(-DRESTART_FILE)
+endif()
+
+if (MASKING)
+    add_definitions(-DMASKING)
+endif()
+
+if (ZELD)
+    add_definitions(-DZELD)
+endif()
+
+if (TRANSF)
+    add_definitions(-DTRANSF)
+endif()
+
+if (TRANSFSC)
+    add_definitions(-DTRANSFSC)
+endif()
+
+if (BARYONS)
+    add_definitions(-DBARYONS)
+endif()
+
+if (RANKORD)
+    add_definitions(-DRANKORD)
+endif()
+
+if (RANKORDTYPE)
+    add_definitions(-DRANKORDTYPE)
+endif()
+
+if (LOGRO)
+    add_definitions(-DLOGRO)
+endif()
+
+if (FOURIER_DEF_1)
+    add_definitions(-DFOURIER_DEF_1)
+endif()
+
+if (FOURIER_DEF_2)
+    add_definitions(-DFOURIER_DEF_2)
+endif()
+
+if (FOURIER_DEF_2_20151021)
+    add_definitions(-DFOURIER_DEF_2_20151021)
+endif()
+
+if (SINGLE_PREC)
+    add_definitions(-DSINGLE_PREC)
+endif()
+
+if (DOUBLE_PREC)
+    add_definitions(-DDOUBLE_PREC)
+endif()
+
+if (PLANCK_SILENT)
+    add_definitions(-DPLANCK_SILENT)
+endif()
+
+if (SCSMOO)
+    add_definitions(-DSCSMOO)
+endif()
+
+if (GFFT)
+    add_definitions(-DGFFT)
+endif()
+
+if (GFINDIFF)
+    add_definitions(-DGFINDIFF)
+endif()
+
+# some additional things that must be added based on the options
+if (NOT INTEL)
+    if (MULTITHREAD)
+        target_link_libraries(barcode gomp)
+#        target_link_libraries(barlib gomp)
+        target_compile_options(barcode PUBLIC -fopenmp)
+        target_compile_options(barlib PUBLIC -fopenmp)
+    endif()
+endif()
+
+# final things that are always used
+target_link_libraries(barcode m ncurses)
+#target_link_libraries(barlib m ncurses)
+
+# make a link for the most recent build
+# if Mercurial is present, use it to add the revision number to the output executable
+# N.B.: this causes the barcode binary output file to not be present, so every build will always do a new linking step!
+find_package(Hg)
+if(HG_FOUND)
+    get_filename_component(OUTPUT_FN $<TARGET_FILE_NAME:barcode> NAME)
+    add_custom_command(TARGET barcode
+            POST_BUILD
+            COMMAND bash ${CMAKE_SOURCE_DIR}/cmake/hg_rev_filename.sh ${PROJECT_BINARY_DIR} ${OUTPUT_FN}
+            WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
+            COMMENT "Renamed output binary to include Mercurial revision information and created symbolic link barcode_latest_build to new path.")
+endif(HG_FOUND)
+
+
+##################
+# OTHER BINARIES #
+##################
+include(AddDefaultBinary)
+
+#############
+### TOOLS ###
+#############
+
+## density
+ADD_DEFAULT_BINARY(density tools/density.cc)
+
+## convert primordial density fluctuation field ("lagrangian") to z=0 density field ("eulerian")
+ADD_DEFAULT_BINARY(LAG2EULer tools/LAG2EULer.cc)
+
+## correlation function calculator
+ADD_DEFAULT_BINARY(corr_fct tools/corr_fct.cc)
+
+## powspec
+ADD_DEFAULT_BINARY(powspec tools/powspec.cc)
+
+## 2D correlation function generators (with or without interpolating resolution upscaling)
+ADD_DEFAULT_BINARY(2D_corr_fct tools/2D_corr_fct.cc)
+ADD_DEFAULT_BINARY(2D_corr_fct_interp tools/2D_corr_fct_interp.cc)
+target_link_libraries(2D_corr_fct_interp ${FFTW_DOUBLE_LIB})
+if (MULTITHREAD_FFTW)
+    target_link_libraries(2D_corr_fct_interp ${FFTW_DOUBLE_OPENMP_LIB})
+endif()
+
+## 2D power spectrum calculator
+ADD_DEFAULT_BINARY(2D_powspec tools/2D_powspec.cc)
+
+## tools for upscaling resolution (with poisson sampling or interpolation)
+ADD_DEFAULT_BINARY(interp_upres tools/interp_upres.cc)
+ADD_DEFAULT_BINARY(poisson_upres tools/poisson_upres.cc)
diff --git a/EqSolvers.cc b/EqSolvers.cc
new file mode 100644
index 0000000..e0208f1
--- /dev/null
+++ b/EqSolvers.cc
@@ -0,0 +1,422 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#include "struct_main.h"
+#include "fftw_array.h"
+
+#include <math.h>
+#include <iomanip>
+
+#include "cosmo.h"
+#include "IOfunctionsGen.h"
+#include "math_funcs.h"
+
+#include "convenience.h"
+#include "BarcodeException.h"
+
+// Everything in here is only used in Lag2Eul
+
+using namespace std;
+
+void PoissonSolver(unsigned int N1, unsigned int N2, unsigned int N3, real_prec L1, real_prec L2, real_prec L3,
+                   real_prec *delta, real_prec *Pot)
+{
+  // ULONG N=N1*N2*N3;
+  unsigned int N3half = (N3/2+1);
+  ULONG Nhalf = N1*N2*N3half;
+
+  // fftw_array<complex_prec> deltaC(N), PotC(N), AUX(N);
+  fftw_array<complex_prec> deltaC(Nhalf);
+
+  // complexify_array(delta, deltaC, N);
+  // FFT3d(N1,N2,N3, true, deltaC, AUX);
+  fftR2C(N1, N2, N3, delta, deltaC);
+
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD
+  for (unsigned i=0; i<N1;i++)
+    for (unsigned j=0; j<N2;j++)
+      for (unsigned k = 0; k < N3half; ++k) {
+        ULONG index=k + N3half * (j + N2*i);
+
+        real_prec kmod2=k_squared(i,j,k,L1,L2,L3,N1,N2,N3);
+
+        real_prec fackern=0.;
+        if (kmod2>0.)
+          fackern=static_cast<real_prec>(-1./kmod2);
+
+        re(deltaC[index]) = re(deltaC[index]) * fackern;  // deltaC -> "PotC"
+        im(deltaC[index]) = im(deltaC[index]) * fackern;
+      }
+
+  // FFT3d(N1,N2,N3, false, AUX, PotC);
+  // real_part_array(PotC, Pot, N);
+  fftC2R(N1, N2, N3, deltaC, Pot);
+}
+
+
+// Also used in webclass, but effectively Lag2Eul only.
+void calc_LapPhiv(unsigned int N1, unsigned int N2, unsigned int N3, real_prec L1, complex_prec *philv, real_prec *LapPhiv,
+                  int index1, int index2)
+{
+  ULONG N=N1*N2*N3;
+  // int N3half = (N3/2 + 1);
+  // ULONG Nhalf = N1*N2*N3half;
+  fftw_array<complex_prec> LapPhivl(N), LapPhivC(N);
+  // fftw_array<complex_prec> LapPhivl(Nhalf);
+
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD 
+  for (unsigned i=0;i<N1;i++)
+    for (unsigned j=0;j<N2;j++)
+      for (unsigned k = 0; k < N3; ++k) {
+        ULONG ii = k + N3 * (j + N2*i);
+        real_prec k1 = 0., k2 = 0.;
+
+        real_prec kx=calc_kx(i,L1,N1);
+        real_prec ky=calc_ky(j,L1,N2);
+        real_prec kz=calc_kz(k,L1,N3);
+
+        switch (index1)
+        {
+          case 1:
+            k1=kx;
+            break;
+          case 2:
+            k1=ky;
+            break;
+          case 3:
+            k1=kz;
+            break;
+          default:
+            throw BarcodeException("In calc_LapPhiv : index1 must be either 1, 2 or 3!");
+        }
+
+        switch (index2)
+        {
+          case 1:
+            k2=kx;
+            break;
+          case 2:
+            k2=ky;
+            break;
+          case 3:
+            k2=kz;
+            break;
+          default:
+            throw BarcodeException("In calc_LapPhiv : index2 must be either 1, 2 or 3!");
+        }
+
+        re(LapPhivl[ii])=-k1*k2*re(philv[ii]); // EGP (20131213): note that this was wrong! philv[ihalf] was called,
+        im(LapPhivl[ii])=-k1*k2*im(philv[ii]); // but should be ii, because philv was already a full N size array!
+      }
+
+  FFT3d(N1,N2,N3, false, LapPhivl, LapPhivC);
+  real_part_array(LapPhivC, LapPhiv, N);
+  // fftC2R(N1, N2, N3, LapPhivl, LapPhiv);
+}
+
+
+real_prec linearvel3d(int index, real_prec kx, real_prec ky, real_prec kz, real_prec phi)
+{
+  real_prec out, kl;
+
+  switch(index)
+  {
+    case 1:
+      {
+        kl=kx;
+        break;
+      }
+    case 2:
+      {
+        kl=ky;
+        break;
+      }
+    case 3:
+      {
+        kl=kz;
+        break;
+      }
+    default:
+      {
+        throw BarcodeException("In linearvel3d : index must be either 1, 2 or 3!");
+      }
+  }
+  real_prec kmod2=kx*kx+ky*ky+kz*kz;
+
+  real_prec fackern=0.0;
+  if (kmod2>eps)
+    fackern = kl/kmod2;
+
+  out=fackern*phi;
+
+  return out;
+}
+
+
+void theta2vel(unsigned int N1, unsigned int N2, unsigned int N3, real_prec L1, real_prec L2, real_prec L3,
+               real_prec scale, real_prec Omega_M,
+               real_prec Omega_L, real_prec *delta, real_prec *vex, real_prec *vey, real_prec *vez, bool zeropad,
+               bool norm, plan_pkg *R2Cplan, plan_pkg *C2Rplan)
+{
+  // The fastest way to call this function is to use:
+  // 1. R2Cplan->R for delta
+  // 2. C2Rplan->R for vez
+  real_prec Lzp1=L1, Lzp2=L2, Lzp3=L3;
+  unsigned Nzp1=N1, Nzp2=N2, Nzp3=N3;
+
+  if (zeropad==true)
+  {
+    Lzp1=L1*static_cast<real_prec>(2.0);
+    Lzp2=L2*static_cast<real_prec>(2.0);
+    Lzp3=L3*static_cast<real_prec>(2.0);
+
+    Nzp1=N1*2;
+    Nzp2=N2*2;
+    Nzp3=N3*2;
+  }
+
+  //real_prec H0=static_cast<real_prec>(100.*hconst *cgs_km/cgs_Mpc/cgs_sec);
+  real_prec cpecvel=1.;
+
+  if (norm==true)
+    //cpecvel=c_pecvel(scale,Omega_M,Omega_L,H0);
+    cpecvel = c_pecvel(scale, Omega_M, Omega_L, 1); // Note: only first order (Zel'dovich) f!
+
+  unsigned N3half = (N3/2 + 1);
+  ULONG Nhalf = N1 * N2 * N3half;
+  // fftw_array<complex_prec> deltaC(Nhalf), velx(Nhalf), vely(Nhalf);
+  fftw_array<complex_prec> velx(Nhalf), vely(Nhalf);
+
+  // fftR2C(N1, N2, N3, delta, deltaC);
+  fftR2Cplanned(delta, R2Cplan->C, R2Cplan);  // using R2Cplan->C as deltaC
+
+  #ifdef MULTITHREAD
+  #pragma omp parallel for
+  #endif // MULTITHREAD
+  for (unsigned i=0;i<N1;i++) {
+    real_prec kx=calc_kx(i,Lzp1,Nzp1);
+    for (unsigned j=0;j<N2;j++) {
+      real_prec ky=calc_ky(j,Lzp2,Nzp2);
+      for (unsigned k = 0; k < N3half; ++k) {
+        real_prec kz=calc_kz(k,Lzp3,Nzp3);
+        real_prec ksq = kx*kx+ky*ky+kz*kz;
+
+        if (ksq > eps) {  // eps is defined in define_opt.h as 1.e-14
+          real_prec fac_kern = cpecvel / ksq;
+          ULONG ix = k + N3half * (j + N2*i);
+          // real_prec deltaC_ix_r = re(deltaC[ix]);
+          // real_prec deltaC_ix_i = im(deltaC[ix]);
+          real_prec deltaC_ix_r = re(R2Cplan->C[ix]);
+          real_prec deltaC_ix_i = im(R2Cplan->C[ix]);
+
+          real_prec fac_kern_x = fac_kern * kx;
+          re(velx[ix]) = fac_kern_x * deltaC_ix_i;
+          im(velx[ix]) = fac_kern_x * -deltaC_ix_r;
+
+          real_prec fac_kern_y = fac_kern * ky;
+          re(vely[ix]) = fac_kern_y * deltaC_ix_i;
+          im(vely[ix]) = fac_kern_y * -deltaC_ix_r;
+
+          // using deltaC as dummy instead of velz
+          real_prec fac_kern_z = fac_kern * kz;
+          // re(deltaC[ix]) = fac_kern_z * deltaC_ix_i;
+          // im(deltaC[ix]) = fac_kern_z * -deltaC_ix_r;
+          re(R2Cplan->C[ix]) = fac_kern_z * deltaC_ix_i;
+          im(R2Cplan->C[ix]) = fac_kern_z * -deltaC_ix_r;
+        } else {
+          ULONG ix = k + N3half * (j + N2*i);
+          re(velx[ix]) = 0;
+          im(velx[ix]) = 0;
+          re(vely[ix]) = 0;
+          im(vely[ix]) = 0;
+          // re(deltaC[ix]) = 0;
+          // im(deltaC[ix]) = 0;
+          re(R2Cplan->C[ix]) = 0;
+          im(R2Cplan->C[ix]) = 0;
+        }
+
+        // From http://math.mit.edu/~stevenj/fft-deriv.pdf: Nyquist components
+        // should be zero, in every dimension, otherwise the result is complex.
+        // This function is not actually treated in that document, but I would
+        // say it also counts as an odd-ordered derivative.
+        if ((i==N1/2) || (j==N2/2) || (k==N3/2))
+        {
+          ULONG ix = k + N3half * (j + N2*i);
+          re(velx[ix]) = 0.;
+          im(velx[ix]) = 0.;
+          re(vely[ix]) = 0.;
+          im(vely[ix]) = 0.;
+          // re(deltaC[ix]) = 0;
+          // im(deltaC[ix]) = 0;
+          re(R2Cplan->C[ix]) = 0;
+          im(R2Cplan->C[ix]) = 0;
+        }
+      }
+    }
+  }
+
+  // fftC2R(N1, N2, N3, velx, vex);
+  // fftC2R(N1, N2, N3, vely, vey);
+  // fftC2R(N1, N2, N3, deltaC, vez);
+  // fftC2R(N1, N2, N3, R2Cplan->C, vez);
+  fftC2Rplanned(R2Cplan->C, vez, C2Rplan);  // must go first, C2R destroys input!
+  fftC2Rplanned(velx, vex, C2Rplan);
+  fftC2Rplanned(vely, vey, C2Rplan);
+}
+
+
+void theta2velcomp(unsigned int N1, unsigned int N2, unsigned int N3, real_prec L1, real_prec L2, real_prec L3,
+                   real_prec scale, real_prec Omega_M, real_prec Omega_L, real_prec *delta, real_prec *vei,
+                   bool zeropad, bool norm, int comp)
+{
+  real_prec Lzp1=L1, Lzp2=L2, Lzp3=L3;
+  unsigned Nzp1=N1, Nzp2=N2, Nzp3=N3;
+
+  if (zeropad==true)
+  {
+    Lzp1=L1*static_cast<real_prec>(2.0);
+    Lzp2=L2*static_cast<real_prec>(2.0);
+    Lzp3=L3*static_cast<real_prec>(2.0);
+
+    Nzp1=N1*2;
+    Nzp2=N2*2;
+    Nzp3=N3*2;
+  }
+
+  //real_prec H0=static_cast<real_prec>(100.*hconst *cgs_km/cgs_Mpc/cgs_sec);
+  real_prec cpecvel=1.;
+
+  if (norm==true)
+    //cpecvel=c_pecvel(scale,Omega_M,Omega_L,H0);
+    cpecvel = c_pecvel(scale, Omega_M, Omega_L, 1); // Note: only first order (Zel'dovich) f!
+
+  // ULONG N=N1*N2*N3;
+  unsigned N3half = (N3/2 + 1);
+  ULONG Nhalf = N1 * N2 * N3half;
+  // fftw_array<complex_prec> dummyC(N), phi(N), veli(N);
+  fftw_array<complex_prec> deltaC(Nhalf);
+
+  // complexify_array(delta, dummyC, N);
+  // FFT3d(N1,N2,N3, true, dummyC, phi);
+  fftR2C(N1, N2, N3, delta, deltaC);
+
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD
+  for (unsigned i=0;i<N1;i++)
+    for (unsigned j=0;j<N2;j++)
+      for (unsigned k = 0; k < N3half; ++k) {
+        ULONG index = k + N3half * (j + N2*i);
+
+        real_prec kx=calc_kx(i,Lzp1,Nzp1);
+        real_prec ky=calc_ky(j,Lzp2,Nzp2);
+        real_prec kz=calc_kz(k,Lzp3,Nzp3);
+
+        real_prec deltaC_ix_r = re(deltaC[index]);
+        real_prec deltaC_ix_i = im(deltaC[index]);
+
+        // deltaC is now used as dummy instead of veli
+        switch (comp)
+        {
+          case 1:
+            {
+              re(deltaC[index])=cpecvel*linearvel3d(1, kx, ky, kz, deltaC_ix_i);
+              im(deltaC[index])=cpecvel*linearvel3d(1, kx, ky, kz, -deltaC_ix_r);
+            }
+            break;
+          case 2:
+            {
+              re(deltaC[index])=cpecvel*linearvel3d(2, kx, ky, kz, deltaC_ix_i);
+              im(deltaC[index])=cpecvel*linearvel3d(2, kx, ky, kz, -deltaC_ix_r);
+
+            }
+            break;
+          case 3:
+            {
+              re(deltaC[index])=cpecvel*linearvel3d(3, kx, ky, kz, deltaC_ix_i);
+              im(deltaC[index])=cpecvel*linearvel3d(3, kx, ky, kz, -deltaC_ix_r);
+            }
+            break;
+        }
+
+        // From http://math.mit.edu/~stevenj/fft-deriv.pdf: Nyquist components
+        // should be zero, in every dimension, otherwise the result is complex.
+        // This function is not actually treated in that document, but I would
+        // say it also counts as an odd-ordered derivative.
+        if ((i==N1/2) || (j==N2/2) || (k==N3/2))
+        {
+          re(deltaC[index]) = 0.;
+          im(deltaC[index]) = 0.;
+        }
+      }
+
+  // FFT3d(N1,N2,N3, false, veli, dummyC);
+  // real_part_array(dummyC, vei, N);
+  fftC2R(N1, N2, N3, deltaC, vei);
+}
+
+
+/* start LPT packages*/
+
+void calc_m2v_mem(unsigned int N1, unsigned int N2, unsigned int N3, real_prec L1, real_prec *phiv, real_prec *m2v)
+{
+  ULONG N=N1*N2*N3;
+
+  fftw_array<real_prec> LapPhivx(N), LapPhivy(N), LapPhivz(N);
+  fftw_array<real_prec> LapPhivxy(N), LapPhivxz(N), LapPhivyz(N);
+
+#ifdef GFFT
+  fftw_array<complex_prec> philv(N);
+  for(ULONG i=0;i<N;i++)
+  {
+    re(philv[i])=0.0;
+    im(philv[i])=0.0;
+  }
+
+  FFT3dR2C (N1,N2,N3,phiv,philv);
+
+  calc_LapPhiv(N1,N2,N3,L1,philv,LapPhivx,1,1);
+  calc_LapPhiv(N1,N2,N3,L1,philv,LapPhivy,2,2);
+  calc_LapPhiv(N1,N2,N3,L1,philv,LapPhivz,3,3);
+
+  calc_LapPhiv(N1,N2,N3,L1,philv,LapPhivxy,1,2);
+  calc_LapPhiv(N1,N2,N3,L1,philv,LapPhivxz,1,3);
+  calc_LapPhiv(N1,N2,N3,L1,philv,LapPhivyz,2,3);
+#endif  // GFFT
+
+#ifdef GFINDIFF
+  fftw_array<real_prec> dummy(N);
+  gradfindif(N1, L1, phiv, dummy, 1);
+  gradfindif(N1, L1, dummy, LapPhivx, 1);
+  gradfindif(N1, L1, dummy, LapPhivxy, 2);
+  gradfindif(N1, L1, dummy, LapPhivxz, 3);
+
+  gradfindif(N1, L1, phiv, dummy, 2);
+  gradfindif(N1, L1, dummy, LapPhivy, 2);
+  gradfindif(N1, L1, dummy, LapPhivyz, 3);
+
+  gradfindif(N1, L1, phiv, dummy, 3);
+  gradfindif(N1, L1, dummy, LapPhivz, 3);
+#endif  // GFINDIFF
+
+
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD
+  for(ULONG i=0;i<N;i++)
+  {
+    m2v[i]=LapPhivx[i]*LapPhivy[i]-LapPhivxy[i]*LapPhivxy[i]+LapPhivx[i]*LapPhivz[i]-LapPhivxz[i]*LapPhivxz[i]+LapPhivy[i]*LapPhivz[i]-LapPhivyz[i]*LapPhivyz[i];
+  }
+}
+
+
diff --git a/EqSolvers.h b/EqSolvers.h
new file mode 100644
index 0000000..e275cee
--- /dev/null
+++ b/EqSolvers.h
@@ -0,0 +1,40 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#pragma once
+#include <gsl/gsl_errno.h>
+#include <gsl/gsl_spline.h>
+#include <gsl/gsl_randist.h>
+#include <gsl/gsl_integration.h>
+#include <gsl/gsl_sf.h>
+#include "fftwrapper.h"
+#include <string>
+
+real_prec linearvel3d(int index, real_prec kx, real_prec ky, real_prec kz, real_prec phi);
+
+void PoissonSolver(unsigned int N1, unsigned int N2, unsigned int N3, real_prec L1, real_prec L2, real_prec L3,
+                   real_prec *delta, real_prec *Pot);
+
+void calc_LapPhiv(unsigned int N1, unsigned int N2, unsigned int N3, real_prec L1, complex_prec *philv, real_prec *LapPhiv,
+                  int index1, int index2);
+
+void theta2vel(unsigned int N1, unsigned int N2, unsigned int N3, real_prec L1, real_prec L2, real_prec L3,
+               real_prec scale, real_prec Omega_M,
+               real_prec Omega_L, real_prec *delta, real_prec *vex, real_prec *vey, real_prec *vez, bool zeropad,
+               bool norm, plan_pkg *R2Cplan, plan_pkg *C2Rplan);
+
+void theta2velcomp(unsigned int N1, unsigned int N2, unsigned int N3, real_prec L1, real_prec L2, real_prec L3,
+                   real_prec scale, real_prec Omega_M,
+                   real_prec Omega_L, real_prec *delta, real_prec *vei, bool zeropad, bool norm, int comp);
+
+void calc_m2v_mem(unsigned int N1, unsigned int N2, unsigned int N3, real_prec L1, real_prec *phiv, real_prec *m2v);
+
+void create_real_garfield (int N1,int N2,int N3,real_prec *Power,real_prec *out,gsl_rng * gBaseRand);
+
+
diff --git a/HMC.cc b/HMC.cc
new file mode 100644
index 0000000..e133a10
--- /dev/null
+++ b/HMC.cc
@@ -0,0 +1,819 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+#include "struct_main.h"
+#include "struct_hamil.h"
+
+#include <math.h>
+#include <iomanip>
+#include <cassert>
+#include <numeric>  // accumulate, partial_sum
+#include <algorithm>  // find, find_if, copy
+
+#include "fftw_array.h"
+#include "planck/paramfile.h"
+
+#include "math_funcs.h"
+#include "IOfunctionsGen.h"
+
+#include "HMC_help.h"
+#include "HMC_momenta.h"
+#include "HMC_mass.h"
+
+#include "convenience.h"
+
+#include "debug.h"
+
+using namespace std;
+
+#ifdef MASKING
+bool masking = true;
+#else
+bool masking = false;
+#endif  // ifdef MASKING
+
+
+void PROTOCOL_HMC(real_prec dH, real_prec Pa, struct DATA *data) {
+  string outputFileName= data->numerical->dir + string("HMC.prt");
+
+  ofstream outStream(outputFileName.data());
+  assert(outStream.is_open());
+
+  outStream << dH  <<endl;
+  outStream << Pa  <<endl;
+  outStream.close();
+}
+
+
+void PROTOCOL_NREJ(ULONG nrej, struct DATA *data) {
+  string outputFileName= data->numerical->dir + string("NREJ.prt");
+
+  ofstream outStream(outputFileName.data());
+  assert(outStream.is_open());
+
+  outStream << nrej-1  <<endl;
+  outStream.close();
+}
+
+
+
+void write_to_performance_log(struct DATA *data, struct HAMIL_DATA *hd) {
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+  ofstream &plog = data->numerical->performance_log;
+
+  assert(plog.is_open());
+
+  plog << n->accepted << "\t";
+  plog << n->epsilon << "\t";
+  plog << n->Neps << "\t";
+  plog << n->dH << "\t";
+  plog << n->dK << "\t";
+  plog << n->dE << "\t";
+  plog << n->dprior << "\t";
+  plog << n->dlikeli << "\t";
+  plog << n->psi_prior_i << "\t";
+  plog << n->psi_prior_f << "\t";
+  plog << n->psi_likeli_i << "\t";
+  plog << n->psi_likeli_f << "\t";
+  plog << n->H_kin_i << "\t";
+  plog << n->H_kin_f << endl;
+}
+
+
+// for Gaussian momentum distribution //
+real_prec kinetic_term(struct HAMIL_DATA *hd, real_prec *momenta, struct DATA *data) {
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+  // H_kin = 1/2 * p * M^-1 * p
+  fftw_array<real_prec> dummy(n->N);
+
+  if (n->mass_fs) {
+    if (masking == true) {
+      // X^{-1}S^(1/2)p
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif  // MULTITHREAD
+      for (ULONG i = 0; i < n->N; i++) {
+        real_prec corr = hd->corrf[i];
+        real_prec winprime = num_1 + corr;
+        dummy[i] = momenta[i] / winprime;
+      }
+      convolveInvCorrFuncWithSignal(hd, dummy, dummy, hd->mass_f);
+    } else {
+      convolveInvCorrFuncWithSignal(hd, momenta, dummy, hd->mass_f);
+    }
+  } else {
+    fillZero(dummy, n->N);  // prepare for addition to real part
+  }
+
+  if (n->mass_rs) {
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif  // MULTITHREAD
+    for (ULONG i = 0; i < n->N; i++) {
+      real_prec invM = 0.;
+      if (hd->mass_r[i] > 0.0)
+        invM = static_cast<real_prec>(1. / hd->mass_r[i]);
+
+      dummy[i] += invM * momenta[i];
+    }
+  }
+
+  // 5) 1/2*p*IFT[ 1/M*FT[p] ]
+
+  bool flag_write = false;
+  real_prec value = 0.;
+#ifdef MULTITHREAD
+#pragma omp parallel for reduction(+:value)
+#endif  // MULTITHREAD
+  for (ULONG i = 0; i < n->N; i++) {
+    value += num_0_5*momenta[i]*dummy[i];
+
+    if (momenta[i] > 1e100 && flag_write == false) {
+      cout << "aaargggh! too much kinetic energy!" << endl;
+      flag_write = true;
+    }
+  }
+  // cout << endl;
+  // cout << "... kinetic energy: " << value << endl;
+  wprintw(data->curses->table, "%5.0e ", value);
+
+  return(value);
+}
+
+
+real_prec psi(struct HAMIL_DATA *hd, real_prec *signal, struct DATA *data) {
+  // Psi = 1/2* s_k * S^-1 * s_k -log L
+  // The "potential energy" analogue of the Hamiltonian MC algorithm.
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+
+  // real_prec psi_prior = prior_gaussian_log_prior(hd, signal);
+  real_prec psi_prior = hd->log_prior(hd, signal);
+  real_prec psi_likelihood = hd->log_like(hd, signal);  // care
+  real_prec psi_total = psi_prior + psi_likelihood;
+
+  // cout << endl << "... energy of the prior: " << psi_prior << " energy of the likelihood: " << psi_likelihood << endl;
+  wprintw(data->curses->table, "%5.0e ", psi_prior);
+  wprintw(data->curses->table, "%5.0e ", psi_likelihood);
+
+  // for performance log:
+  n->psi_prior = psi_prior;
+  n->psi_likeli = psi_likelihood;
+
+  return(psi_total);
+}
+
+
+void gradient_psi(struct HAMIL_DATA *hd, real_prec *signal, struct DATA *data) {
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+  fftw_array<real_prec> grad_psi_prior(n->N), grad_psi_likeli(n->N);
+
+  // prior_gaussian_grad_log_prior(hd, signal, grad_psi_prior);
+  hd->grad_log_prior(hd, signal, grad_psi_prior);
+
+#ifdef DEBUG
+  debug_array_statistics(grad_psi_prior, n->N, "gradPsi prior");
+#endif // DEBUG
+
+  // IFT[ 1/P*FT[s] ] + dPsiL/ds //
+
+  if (data->numerical->likelihood == 3)  // Gaussian random field
+    grf_likelihood_grad_log_like(hd, signal, grad_psi_likeli);
+  else
+    likelihood_grad_log_like(hd, signal, grad_psi_likeli);
+
+#ifdef DEBUG
+  debug_array_statistics(grad_psi_likeli, n->N, "gradPsi likelihood");
+#endif // DEBUG
+
+
+  //// TESTING ////
+  multiply_factor_array(n->grad_psi_prior_factor, grad_psi_prior,
+                        grad_psi_prior, n->N);
+  multiply_factor_array(n->grad_psi_likeli_factor, grad_psi_likeli,
+                        grad_psi_likeli, n->N);
+  if (n->grad_psi_prior_conjugate || n->grad_psi_prior_times_i) {
+    // fftw_array<complex_prec> AUX(n->N), dummyC(n->N);
+    // complexify_array(grad_psi_prior, dummyC, n->N);
+    // FFT3d(n->N1, n->N2, n->N3, true, dummyC, AUX);
+    fftw_array<complex_prec> grad_psi_prior_C(n->Nhalf);
+    fftR2C(n->N1, n->N2, n->N3, grad_psi_prior, grad_psi_prior_C);
+    if (n->grad_psi_prior_conjugate)
+      conjugate_array(grad_psi_prior_C, grad_psi_prior_C, n->Nhalf);
+    if (n->grad_psi_prior_times_i)
+      times_i_array(grad_psi_prior_C, grad_psi_prior_C, n->Nhalf);
+    // FFT3d(n->N1, n->N2, n->N3, false, AUX, dummyC);
+    // real_part_array(dummyC, grad_psi_prior, n->N);
+    fftC2R(n->N1, n->N2, n->N3, grad_psi_prior_C, grad_psi_prior);
+  }
+  if (n->grad_psi_likeli_conjugate || n->grad_psi_likeli_times_i) {
+    // fftw_array<complex_prec> AUX(n->N), dummyC(n->N);
+    // complexify_array(grad_psi_likeli, dummyC, n->N);
+    // FFT3d(n->N1, n->N2, n->N3, true, dummyC, AUX);
+    fftw_array<complex_prec> grad_psi_likeli_C(n->Nhalf);
+    fftR2C(n->N1, n->N2, n->N3, grad_psi_likeli, grad_psi_likeli_C);
+    if (n->grad_psi_likeli_conjugate)
+      conjugate_array(grad_psi_likeli_C, grad_psi_likeli_C, n->N);
+    if (n->grad_psi_likeli_times_i)
+      times_i_array(grad_psi_likeli_C, grad_psi_likeli_C, n->N);
+    // FFT3d(n->N1, n->N2, n->N3, false, AUX, dummyC);
+    // real_part_array(dummyC, grad_psi_likeli, n->N);
+    fftC2R(n->N1, n->N2, n->N3, grad_psi_likeli_C, grad_psi_likeli);
+  }
+  //// END TESTING ////
+
+
+  sum_arrays(grad_psi_prior, grad_psi_likeli, hd->gradpsi, n->N);
+}
+
+
+real_prec delta_Hamiltonian(struct HAMIL_DATA *hd, real_prec *signali, real_prec *momentai, real_prec *signalf,
+                            real_prec *momentaf, struct DATA *data) {
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+  // dH = H(sf,pf) - H(si,pi)
+
+  real_prec Hkini = kinetic_term(hd, momentai, data);
+  real_prec Hpsii = psi(hd, signali, data);
+
+  // save initial psi's for performance log
+  n->psi_prior_i = n->psi_prior;
+  n->psi_likeli_i = n->psi_likeli;
+  n->H_kin_i = Hkini;
+
+  real_prec Hami = Hkini + Hpsii;
+
+  real_prec Hkinf = kinetic_term(hd, momentaf, data);
+  real_prec Hpsif = psi(hd, signalf, data);
+
+  // more performance log stuff:
+  n->dprior = n->psi_prior - n->psi_prior_i;
+  n->dlikeli = n->psi_likeli - n->psi_likeli_i;
+
+  real_prec Hamf = Hkinf + Hpsif;
+
+  real_prec dHam = Hamf - Hami;
+  if (n->div_dH_by_N) {
+    // test: divide dH by N to scale for cell number & make the code accept more
+    dHam /= static_cast<real_prec>(n->N);
+  }
+
+  // save final stuff in hd for output to performance log:
+  n->dH = dHam;
+  n->dK = Hkinf - Hkini;
+  n->dE = Hpsif - Hpsii;
+  n->psi_prior_f = n->psi_prior;
+  n->psi_likeli_f = n->psi_likeli;
+  n->H_kin_f = Hkinf;
+
+  return(dHam);
+}
+
+
+void Hamiltonian_EoM(struct HAMIL_DATA *hd, real_prec *signali,
+                     real_prec *momentai, real_prec *signalf,
+                     real_prec *momentaf, gsl_rng * seed, struct DATA *data) {
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+  // tau=n*e
+  // 1) pi(t+e/2)=pi(t)-e/2*gradPsi(si(t))
+  // 2) si(t+e)=si(t)+e/Mi*pi(t+e/2)
+  // 3) pi(t+e)=pi(t+e/2)-e/2*gradPsi(si(t+e))
+
+  n->Neps = static_cast<ULONG>(n->N_eps_fac*(gsl_rng_uniform(seed))) + 1;
+  n->epsilon = static_cast<real_prec>(n->eps_fac*gsl_rng_uniform(seed));
+
+  if (n->epsilon > 2.)
+    n->epsilon = 2.;
+
+  // cout << endl;
+  // cout << "----> Number of leapfrog iterations: " << n->Neps <<endl;
+  // cout << "----> Step size of the leapfrog scheme: " << n->epsilon <<endl;
+  wprintw(data->curses->table, "%5.0e ", n->epsilon);
+  wprintw(data->curses->table, "%4lu ", n->Neps);
+  wrefresh(data->curses->table);
+
+  fftw_array<real_prec> dummy(n->N);
+
+  copyArray(signali, signalf, n->N);
+  copyArray(momentai, momentaf, n->N);
+
+  // 0) determine gradient_psi at t = 0
+  fillZero(hd->gradpsi, n->N);
+  gradient_psi(hd, signalf, data);
+
+  // cout << "... Leaping " << flush;
+
+  for (ULONG jj = 0; jj < n->Neps; jj++) {
+    wprintw(data->curses->status, "\nLeap-frogging ... %lu/%lu", jj+1, n->Neps);
+    wrefresh(data->curses->status);
+    // cout << "." << flush;
+
+    // 1) pi(t+e/2)=pi(t)-e/2*gradPsi(si(t))
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif  // MULTITHREAD
+    for (ULONG i = 0; i < n->N; i++)
+      momentaf[i] -= num_0_5 * n->epsilon * hd->gradpsi[i];
+
+
+    // 2a) M^-1 * p(t+e/2)
+    if (n->mass_fs) {
+#ifdef MASKING
+      {
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif  // MULTITHREAD
+        for (ULONG i = 0; i < n->N; i++)
+          dummy[i]=momentaf[i]/(num_1 + hd->corrf[i]);
+        convolveInvCorrFuncWithSignal(hd, dummy, dummy, hd->mass_f);
+      }
+#else  // MASKING
+      {
+        convolveInvCorrFuncWithSignal(hd, momentaf, dummy, hd->mass_f);
+      }
+#endif  // MASKING
+    } else {  // mass_fs == false
+      fillZero(dummy, n->N);
+    }
+
+    if (n->mass_rs) {
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif  // MULTITHREAD
+      for (ULONG i = 0; i < n->N; i++) {
+        real_prec invM = 0.;
+        if (hd->mass_r[i] > 0.0)
+          invM = num_1/hd->mass_r[i];
+        dummy[i] += momentaf[i]*invM;
+      }
+    }
+
+#ifdef DEBUG
+    debug_array_statistics(dummy, n->N, "shift / epsilon");
+#endif // DEBUG
+
+
+    // 2b) si(t+e)=si(t)+e/Mi*pi(t+e/2)
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif  // MULTITHREAD
+    for (ULONG i = 0; i < n->N; i++)
+      signalf[i] += n->epsilon*dummy[i];
+
+
+    // 3a) determine gradient_psi at t+e
+    fillZero(hd->gradpsi, n->N);
+    gradient_psi(hd, signalf, data);
+
+
+    // 3b) pi(t+e)=pi(t+e/2)-e/2*gradPsi(si(t+e))
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif  // MULTITHREAD
+    for (ULONG i = 0; i < n->N; i++)
+      momentaf[i] -= num_0_5 * n->epsilon * hd->gradpsi[i];
+
+#ifdef DEBUG
+    debug_array_statistics(momentaf, n->N, "momf");
+#endif // DEBUG
+
+    // check for too high values of momentum; it sometimes goes crazy out of
+    // control and at ~1e308 a SIGFPE stops the program
+    if (abs(momentaf[0]) > 1e50) {
+      wprintw(data->curses->message, "\nLeap-frogging ... stopped at %lu/%lu, momentum too high (momenta[0] = %e)", jj+1, n->Neps, momentaf[0]);
+      wrefresh(data->curses->message);
+      jj = n->Neps;  // stop the loop, this EoM will lead to nothing
+    }
+  }
+  // cout<<endl;
+  // cout <<"----> maximum value in the reconstruction: "<<max_arr(n->N,signalf)<<endl;
+  data->numerical->count_attempts++;
+}
+
+// TODO: MOVE THIS TO TEMPLATE/HEADER FILE
+// From stackoverflow.com/a/12399290
+template <typename T>
+vector<size_t> sort_indexes(const vector<T> &v) {
+  // initialize original index locations
+  vector<size_t> idx(v.size());
+  for (size_t i = 0; i != idx.size(); ++i) idx[i] = i;
+
+  // sort indexes based on comparing values in v
+  sort(idx.begin(), idx.end(),
+       [&v](size_t i1, size_t i2) {return v[i1] < v[i2];});
+
+  return idx;
+}
+
+template <typename T, typename T_other>
+vector<T> sort_vector_by_other(const vector<T> &sortee,
+                               const vector<T_other> &other) {
+  if (sortee.size() != other.size()) {
+    throw BarcodeException("sort_vector_by_other: vector sizes must be equal!");
+  }
+  auto ix_sort = sort_indexes(other);
+  vector<T> sorted;
+  for (auto i: ix_sort) {
+    sorted.push_back(sortee[i]);
+  }
+  return (sorted);
+}
+
+template <typename InputIterator, typename OutputIterator>
+OutputIterator cumulative_moving_average(InputIterator begin, InputIterator end,
+                                         OutputIterator result) {
+  partial_sum(begin, end, result);
+  int i = 1;  // start at one, not zero (number, not index)
+  while (++begin != end) {
+    ++result;
+    *result = *result / static_cast<real_prec>(++i);  // average up to that ix
+  }
+  return ++result;
+}
+
+template <typename InputIterator, typename OutputIterator>
+OutputIterator stl_smooth(InputIterator begin, InputIterator end,
+                          OutputIterator result, int smooth_size) {
+  for (auto ix = begin; ix != end; ++ix) {
+    auto ix_min = ix - smooth_size;
+    auto ix_max = ix + smooth_size + 1;  // +1: accumulate is not inclusive last
+    if (ix_min < begin) ix_min = begin;
+    if (ix_max > end)   ix_max = end;
+    *result++ = accumulate(ix_min, ix_max, 0.0)
+                / static_cast<real_prec>(ix_max - ix_min);
+  }
+  return result;
+}
+
+real_prec bool_mean(vector<bool> input) {
+  real_prec result = 0;
+  for (auto i = input.begin(); i != input.end(); ++i) {
+    if (*i) {
+      result += 1.;
+    }
+  }
+  result /= static_cast<real_prec>(input.size());
+  return result;
+}
+
+vector<real_prec> real_from_bool(vector<bool> input) {
+  vector<real_prec> output;
+  for (auto i = input.begin(); i != input.end(); ++i) {
+    output.push_back(static_cast<real_prec>(*i));
+  }
+  return output;
+}
+
+string update_eps_fac_acceptance_rate_downwards(struct HAMIL_DATA *hd,
+                                                struct DATA *data) {
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+  struct NUMERICAL *dn = data->numerical;
+
+  real_prec alpha_N_a = bool_mean(dn->acc_flag_N_a);
+  real_prec acc_target = (dn->acc_max + dn->acc_min)/2.;
+
+  string message = "\nadjusted eps_fac downwards to %f";
+  // make vector of acceptance flags sorted by epsilon:
+  auto a_sort_bool = sort_vector_by_other(dn->acc_flag_N_a, dn->epsilon_N_a);
+  auto a_sort = real_from_bool(a_sort_bool);
+
+  // turn it into cumulative moving average of acceptance rate (sort by eps)
+  cumulative_moving_average(a_sort.begin(), a_sort.end(), a_sort.begin());
+
+  // smooth it a bit, due to the discrete acc_flags it will be too bumpy
+  vector<real_prec> a_sm(dn->N_a_eps_update);
+  stl_smooth(a_sort.begin(), a_sort.end(), a_sm.begin(), dn->eps_down_smooth);
+
+  // loop until an element larger than acc_target is found, just to be sure we
+  // start at a peak; in some cases there might be zeroes at the beginning of
+  // the acceptance_rate array, which will cause an initial dip, which must not
+  // be mistaken for the true location where the line goes below the target
+  // acceptance rate
+  auto ix_max_a_sm = max_element(a_sm.begin(), a_sm.end());
+  // check if indeed is larger than acc_target (if not, we can't continue):
+  if (*ix_max_a_sm > acc_target) {
+    // then find where the line goes below the target acceptance rate.
+    auto ix_target = find_if(ix_max_a_sm, a_sm.end(), [&] (real_prec acc_sm) {
+                              return (acc_sm < acc_target);
+                            });
+    // if acc_target not crossed, we have some strange special case:
+    if (ix_target == a_sm.end()) {
+      stringstream ss, debugss;
+      ss << "\neps_fac stays at %f (special: alpha_N_a=" << alpha_N_a <<
+            ", a_t=" << acc_target << ", a_sm_max=" << *ix_max_a_sm << ")";
+      message = ss.str();
+    } else {
+      // adjust downward the way we wanted
+      vector<real_prec> eps_sort(dn->N_a_eps_update);
+      copy(dn->epsilon_N_a.begin(), dn->epsilon_N_a.end(), eps_sort.begin());
+      sort(eps_sort.begin(), eps_sort.end());
+      unsigned ix_eps = static_cast<unsigned>(ix_target - a_sm.begin());
+      n->eps_fac = eps_sort[ix_eps];
+    }
+  } else {
+    // Can't continue, so we just take a guess for the next epsilon.
+    if (alpha_N_a == 0.) {
+      // If there was no accepted step, set to the lowest epsilon we tried:
+      auto ix_min_eps = min_element(dn->epsilon_N_a.begin(),
+                                    dn->epsilon_N_a.end());
+      n->eps_fac = *ix_min_eps;
+    } else {
+      // Otherwise, just divide by three (not two, that's not enough most
+      // of the time that you can continue...)
+      n->eps_fac /= 3.;
+    }
+  }
+  if (n->eps_fac == 0.) {
+    throw BarcodeException("In update_eps_fac_acceptance_rate_downwards: "
+                           "epsilon became zero, shouldn't happen!");
+  }
+  return (message);
+}
+
+void update_eps_fac_acceptance_rate(struct HAMIL_DATA *hd, struct DATA *data) {
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+  struct NUMERICAL *dn = data->numerical;
+  // ULONG total_steps = n->iGibbs + n->rejections + data->numerical->rejections;
+  // modulo (total - 1), because this is the step we haven't finished yet
+  // if (((total_steps - 1)% dn->N_a_eps_update == 0) && (total_steps > 1)) {
+
+  // N.B.: the above (with -1) was only for total_steps, not for count_attempts!
+  if ((dn->count_attempts % dn->N_a_eps_update == 0)
+      && (dn->count_attempts > 0)) {
+    string message;  // to send to message window at the end
+    // recent (N_a tries) acceptance rate
+    real_prec alpha_N_a = bool_mean(dn->acc_flag_N_a);
+
+    if (alpha_N_a < dn->acc_min) {
+      // adjust downwards
+      message = update_eps_fac_acceptance_rate_downwards(hd, data);
+    } else if (alpha_N_a > dn->acc_max) {
+      // adjust upwards
+      real_prec acc_target = (dn->acc_max + dn->acc_min)/2.;
+      n->eps_fac *= dn->eps_up_fac * (alpha_N_a / acc_target);
+      message = "\nadjusted eps_fac upwards to %f";
+    } else {
+      // don't adjust
+      message = "\nnot adjusting eps_fac, stays at %f";
+    }
+    wprintw(data->curses->message, message.c_str(), n->eps_fac);
+    wrefresh(data->curses->message);
+  }
+}
+
+void update_eps_fac_acceptance_rate_fast_initial(struct HAMIL_DATA *hd,
+                                                 struct DATA *data) {
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+
+  if ((n->iGibbs == 1) && (n->rejections > 0)) {
+    n->eps_fac /= 2.;
+    string message = "\nadjusted eps_fac downwards to %f";
+    wprintw(data->curses->message, message.c_str(), n->eps_fac);
+    wrefresh(data->curses->message);
+  } else {
+    update_eps_fac_acceptance_rate(hd, data);
+  }
+}
+
+void update_eps_fac(struct HAMIL_DATA *hd, struct DATA *data) {
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+  struct NUMERICAL *dn = data->numerical;
+  switch (data->numerical->eps_fac_update_type) {
+    case 0:
+      // don't update
+      break;
+    case 1:
+    {
+      // update eps_fac every s_eps_total steps
+      // ULONG total_steps = n->iGibbs + n->rejections
+                          // + data->numerical->rejections;
+      // modulo (total - 1), because this is the step we haven't finished yet
+      // if (((total_steps-1) % n->s_eps_total == 0) && (total_steps > 1)) {
+
+      // N.B.: the above (-1) was only for total_steps, not for count_attempts!
+      if ((dn->count_attempts % n->s_eps_total == 0)
+          && (dn->count_attempts > 0)) {
+        n->eps_fac = power_mean(n->eps_fac, n->eps_fac_target,
+                                n->eps_fac_power);
+        cout << "  updating eps_fac to " << n->eps_fac << endl;
+      }
+      break;
+    }
+    case 2:
+      // adjust eps_fac based on recent acceptance rate
+      update_eps_fac_acceptance_rate(hd, data);
+      break;
+    case 3:
+      // adjust eps_fac based on recent acceptance rate
+      // version 2: with extra fast initial phase until first accepted step
+      update_eps_fac_acceptance_rate_fast_initial(hd, data);
+      break;
+  }
+}
+
+void update_epsilon_acc_rate_tables(struct DATA *data, struct HAMIL_DATA *hd) {
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+  struct NUMERICAL *dn = data->numerical;
+  // ULONG total_steps = n->iGibbs + n->rejections + data->numerical->rejections;
+  // ULONG ix_table = (total_steps-1)  // -1: index, not number
+  ULONG ix_table = (dn->count_attempts-1)  // -1: index, not number
+                   % data->numerical->N_a_eps_update;
+  data->numerical->acc_flag_N_a[ix_table] = n->accepted;
+  data->numerical->epsilon_N_a[ix_table] = n->epsilon;
+  stringstream s;
+  s << setw(9) << "\n" << data->numerical->epsilon_N_a[0];
+  for (unsigned i = 1; (i < 8) && (i < data->numerical->N_a_eps_update); ++i) {
+    s << " " << data->numerical->epsilon_N_a[i];
+  }
+  wprintw(data->curses->debug, s.str().c_str());
+  wrefresh(data->curses->debug);
+}
+
+
+void HamiltonianMC(struct HAMIL_DATA *hd, gsl_rng * seed, struct DATA *data) {
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+  struct NUMERICAL *dn = data->numerical;
+  fftw_array<real_prec> momentai(n->N), momentaf(n->N), signali(n->N),
+                        signalf(n->N);
+
+  bool reach_acceptance = false;
+
+  // define the starting signal estimate //
+  ULONG iter = 0;
+
+  // real_prec maxs = max_arr(n->N, hd->x);
+  // cout << "maximum of the guess signal: "<<maxs<<endl;
+
+  // calculate Hamiltonian masses //
+  ULONG massnum;
+  if (n->iGibbs > n->massnum_burn)
+    massnum = n->massnum_burn;
+  else
+    massnum = n->massnum_init;
+
+  // if (massnum == 1)
+  // {
+  //   cout<<"---->compute mass for the momenta ... "<<endl;
+  //   Hamiltonian_mass(hd,hd->x,seed, data);
+  // }
+  // else
+  // {
+    if (0 == n->iGibbs % massnum || n->iGibbs == 1) {
+      // cout << "---->compute mass for the momenta ... " << endl;
+      Hamiltonian_mass(hd, hd->x, data);
+      string fname = dn->dir + string("auxmass_r");
+      if (n->mass_rs) {
+        if (contains_nan(hd->mass_r, n->N)) {
+          throw BarcodeException("auxmass_r contains a NaN! aborting.");
+        }
+        write_array(fname, hd->mass_r, n->N1, n->N2, n->N3);
+      }
+      fname = dn->dir + string("auxmass_f");
+      if (n->mass_fs) {
+        write_array(fname, hd->mass_f, n->N1, n->N2, n->N3);
+      }
+    } else {
+      string fname = dn->dir + string("auxmass_r");
+      if (n->mass_rs) {
+        read_array(fname, hd->mass_r, n->N1, n->N2, n->N3);
+      }
+      fname = dn->dir + string("auxmass_f");
+      if (n->mass_fs) {
+        read_array(fname, hd->mass_f, n->N1, n->N2, n->N3);
+      }
+    }
+  // }
+
+  // cout << "---->start Hamiltonian sampling ... " << endl;
+  wprintw(data->curses->status, "starting Hamiltonian sampling");
+  wrefresh(data->curses->status);
+
+  int naccepted = 0;
+  while (iter < n->itmax && reach_acceptance == false) {
+    // print sample number
+    wprintw(data->curses->table, "%6lu ", n->iGibbs);
+    wrefresh(data->curses->table);
+
+    iter++;
+    // cout<<endl
+    // cout << "----> Number of Metropolis-Hastings sample candidate: " << iter <<endl;
+    // print sample-candidate number
+    wprintw(data->curses->table, "%4lu ", iter);
+    wrefresh(data->curses->table);
+
+    // 0) initiliaze
+    // define the starting position
+    copyArray(hd->x, signali, n->N);
+
+    // 1) step 1: new momenta
+    // draw momenta
+    draw_momenta(hd, seed, momentai, data);
+    // quick_dump_scalar(momentai, n->N1, "initial_momentum", iter, true);
+
+    // 2) step 2: Eq of motion
+    update_eps_fac(hd, data);
+    // calculate next positions with the Equation of motions
+    Hamiltonian_EoM(hd, signali, momentai, signalf, momentaf, seed, data);
+
+    // 3) step 3: acceptance of the new position
+    // calculate the Hamiltonian difference
+    real_prec dH = delta_Hamiltonian(hd, signali, momentai, signalf, momentaf, data);
+
+    // calculate the acceptance probability
+    real_prec p_acceptance = 1.;
+
+    if (dH < 0.0)
+      p_acceptance = 1.0;
+    else if (exp(-dH) < 1.0)
+      p_acceptance = exp(-dH);
+
+    // cout<<endl;
+    // cout<<"----> ACHTUNG dH: "<< dH <<endl;
+    // cout<<"----> ACHTUNG acceptance probability: "<< p_acceptance <<endl;
+    wprintw(data->curses->table, "%6.0e ", dH);
+    wprintw(data->curses->table, "%5.0e ", p_acceptance);
+    wrefresh(data->curses->table);
+
+#ifdef DEBUG
+    PROTOCOL_HMC(dH, p_acceptance, data);
+#endif  // DEBUG
+
+    // draw acceptance
+    if (p_acceptance >= 1.0) {
+      reach_acceptance = true;
+    } else {
+      real_prec u = static_cast<real_prec>(gsl_rng_uniform(seed));
+
+      if (u < p_acceptance)
+        reach_acceptance = true;
+      else
+        reach_acceptance = false;
+    }
+
+    if (reach_acceptance == true) {
+      naccepted++;
+      // cout<<"----> accepted! ;-) # "<<naccepted<<endl;
+      wprintw(data->curses->table, "y ");
+    } else {
+      wprintw(data->curses->table, "n ");
+    }
+    wrefresh(data->curses->table);
+
+    if (reach_acceptance)
+      copyArray(signalf, hd->x, n->N);
+
+    if (!reach_acceptance)
+      n->rejections++;
+
+    // EGP: save in hd for performance log:
+    n->accepted = reach_acceptance;
+
+    write_to_performance_log(data, hd);
+    update_epsilon_acc_rate_tables(data, hd);
+
+    // ULONG total_steps = n->iGibbs + n->rejections + dn->rejections;
+    ULONG total_steps;
+    if (reach_acceptance) {
+      total_steps = n->iGibbs + n->rejections + dn->rejections + 1;
+    } else {
+      total_steps = n->iGibbs + n->rejections + dn->rejections;
+    }
+
+    // compute recent acceptance rate
+    // -1: index, not number
+    ULONG ix_acc = (dn->count_attempts-1) % dn->N_a_eps_update;
+    if (reach_acceptance) {
+      dn->acc_recent[ix_acc] = 1;
+    } else {
+      dn->acc_recent[ix_acc] = 0;
+    }
+    real_prec acc_N_a = 0;
+    for (unsigned i = 0; i < dn->N_a_eps_update; ++i) {
+      acc_N_a += static_cast<real_prec>(dn->acc_recent[i]);
+    }
+    acc_N_a /= static_cast<real_prec>(dn->N_a_eps_update);
+    wprintw(data->curses->table, "%4.2f", acc_N_a);
+    wrefresh(data->curses->table);
+
+    if (total_steps >= n->total_steps_lim) {
+      throw BarcodeException("ABORTING: total steps exceeds total_steps_lim");
+    }
+
+    wprintw(data->curses->table, "\n");
+    wrefresh(data->curses->table);
+  }
+#ifdef DEBUG
+  PROTOCOL_NREJ(iter, data);
+#endif  // DEBUG
+
+  // set inversion success
+  if (reach_acceptance == true)
+    n->INV_SUCCESS = 1;
+  else
+    n->INV_SUCCESS = 0;
+
+  // cout<<endl;
+}
diff --git a/HMC.h b/HMC.h
new file mode 100644
index 0000000..5e7e62e
--- /dev/null
+++ b/HMC.h
@@ -0,0 +1,15 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#pragma once
+#include "struct_hamil.h"
+#include <gsl/gsl_rng.h>
+#include "struct_main.h"
+
+void HamiltonianMC(struct HAMIL_DATA *hamil_data, gsl_rng * seed, struct DATA *data);
diff --git a/HMC_help.cc b/HMC_help.cc
new file mode 100644
index 0000000..eab32ca
--- /dev/null
+++ b/HMC_help.cc
@@ -0,0 +1,63 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#include "define_opt.h"
+#include "struct_hamil.h"
+
+using namespace std;
+
+void convolveInvCorrFuncWithSignal(struct HAMIL_DATA *hd, real_prec *signal,
+                                   real_prec *out, real_prec *corrFunc) {
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+  unsigned N3half = (n->N3/2 + 1);
+  // fftw_array<complex_prec> signal_C(n->Nhalf);
+
+#ifdef FOURIER_DEF_1
+  real_prec normFS = n->vol;
+#endif // FOURIER_DEF_1
+#ifdef FOURIER_DEF_2
+  real_prec normFS = n->vol/static_cast<real_prec>(n->N);
+#endif // FOURIER_DEF_2
+#ifdef FOURIER_DEF_2_20151021
+  real_prec normFS = static_cast<real_prec>(n->N)/n->vol;
+#endif  // FOURIER_DEF_2_20151021
+    
+  // 1) FT[signal] 
+  // fftR2C(n->N1, n->N2, n->N3, signal, signal_C);
+  fftR2Cplanned(signal, n->R2Cplan->C, n->R2Cplan);
+  // fftR2Cplanned(signal, signal_C, n->R2Cplan);
+
+  // 2) 1/C*FT[signal]
+#ifdef MULTITHREAD
+#pragma omp parallel for 
+#endif // MULTITHREAD
+  for (unsigned i=0 ; i<n->N1;i++)
+    for (unsigned j=0 ; j<n->N2;j++)
+      for (unsigned k = 0 ; k < N3half; ++k) {
+        ULONG ix = k + n->N3 * (j + n->N2*i);
+        ULONG ix_C = k + N3half * (j + n->N2*i);
+
+        // merged invC and normFS into one variable (saves a multiplication)
+        real_prec invC_normFS;
+        if (corrFunc[ix]>0.0)
+          invC_normFS = normFS / corrFunc[ix];
+        else
+          invC_normFS = 0.;
+
+        // re(signal_C[ix_C]) *= invC_normFS;
+        // im(signal_C[ix_C]) *= invC_normFS;
+        re(n->R2Cplan->C[ix_C]) *= invC_normFS;
+        im(n->R2Cplan->C[ix_C]) *= invC_normFS;
+      }
+
+  // 3) IFT[ 1/C*FT[signal] ]
+  // fftC2R(n->N1, n->N2, n->N3, signal_C, out);
+  fftC2Rplanned(n->R2Cplan->C, out, n->C2Rplan);
+  // fftC2Rplanned(signal_C, out, n->C2Rplan);
+}
diff --git a/HMC_help.h b/HMC_help.h
new file mode 100644
index 0000000..13c7b2a
--- /dev/null
+++ b/HMC_help.h
@@ -0,0 +1,13 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#pragma once
+#include "define_opt.h"
+
+void convolveInvCorrFuncWithSignal(struct HAMIL_DATA *hamil_data, real_prec *signal, real_prec *out, real_prec *corrFunc);
diff --git a/HMC_mass.cc b/HMC_mass.cc
new file mode 100644
index 0000000..6d9d77c
--- /dev/null
+++ b/HMC_mass.cc
@@ -0,0 +1,446 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#include <fftw3.h>
+#include <gsl/gsl_spline.h>
+#include <gsl/gsl_randist.h>
+#include <gsl/gsl_integration.h>
+
+#include <fstream>  //NOLINT
+#include <iomanip>
+#include <cassert>
+#include <ncurses.h>
+
+#include "planck/paramfile.h"
+
+#include "struct_main.h"
+#include "struct_hamil.h"
+#include "fftw_array.h"
+#include "math_funcs.h"
+#include "IOfunctions.h"
+#include "Lag2Eul.h"
+#include "field_statistics.h"
+#include "convenience.h"
+
+#include "HMC_mass.h"
+
+using namespace std;
+
+
+void likeli_force_power(struct HAMIL_DATA *hd, real_prec *signal,
+  real_prec *likeli_power, real_prec *kmode, struct DATA * data) {
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+  fftw_array<real_prec> likeli_force(n->N);
+
+  likelihood_grad_log_like(hd, signal, likeli_force);
+
+  measure_spectrum(n->N1, n->N2, n->N3, n->L1, n->L2, n->L3, likeli_force,
+    kmode, likeli_power, n->N_bin);
+  dump_measured_spec(kmode, likeli_power,
+    data->numerical->dir + string("forcespec.dat"), n->N_bin);
+}
+
+
+void Hamiltonian_mass_likeli_force(struct HAMIL_DATA *hd, real_prec *signal,
+  struct DATA *data, real_prec *out) {
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+
+  real_prec likeli_power[n->N_bin];
+  real_prec kmode[n->N_bin];
+  likeli_force_power(hd, signal, likeli_power, kmode, data);
+
+  real_prec kmax = sqrt(k_squared(n->N1/2, n->N2/2, n->N3/2, n->L1, n->L2,
+    n->L3, n->N1, n->N2, n->N3));
+  real_prec dk = kmax/static_cast<real_prec>(n->N_bin);
+
+  real_prec NORM = num_1;  // [>static_cast<real_prec>(N);// care
+  #ifdef MULTITHREAD
+  #pragma omp parallel for
+  #endif  // MULTITHREAD
+  for (unsigned i = 0; i < n->N1; i++)
+    for (unsigned j = 0; j < n->N2; j++)
+      for (unsigned k = 0; k < n->N3; k++) {
+        ULONG l = k+n->N3*(j+n->N2*i);
+
+        real_prec kr = sqrt(k_squared(i, j, k, n->L1, n->L2, n->L3, n->N1,
+          n->N2, n->N3));
+        ULONG nbin = static_cast<ULONG>(kr/dk);
+
+        if (kr>0.)
+          out[l] = likeli_power[nbin]*NORM;
+        else
+          out[l] = 0.0;
+      }
+}
+
+
+real_prec Hamiltonian_mass_mean_likeli_force(struct HAMIL_DATA *hd,
+  real_prec *signal, struct DATA *data) {
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+
+  real_prec likeli_power[n->N_bin];
+  real_prec kmode[n->N_bin];
+  likeli_force_power(hd, signal, likeli_power, kmode, data);
+
+  real_prec kmax = sqrt(k_squared(n->N1/2, n->N2/2, n->N3/2, n->L1, n->L2,
+    n->L3, n->N1, n->N2, n->N3));
+  real_prec dk = kmax/static_cast<real_prec>(n->N_bin);
+
+  real_prec force_mean_1D = 0.0;
+  real_prec k_volume_1D = 0.0;
+  #ifdef MULTITHREAD
+  #pragma omp parallel for reduction(+:force_mean_1D)
+  #endif  // MULTITHREAD
+  for (ULONG i = 0; i < n->N_bin; i++)
+    force_mean_1D += 4.*M_PI * kmode[i] * kmode[i] * dk * likeli_power[i];
+  #ifdef MULTITHREAD
+  #pragma omp parallel for reduction(+:k_volume_1D)
+  #endif  // MULTITHREAD
+  for (ULONG i = 0; i < n->N_bin; i++)
+    k_volume_1D += 4.*M_PI * kmode[i] * kmode[i] * dk;
+
+  force_mean_1D /= k_volume_1D;
+
+  return(force_mean_1D);
+}
+
+
+real_prec inv_ps(struct HAMIL_DATA *hd, ULONG ix) {
+  real_prec invP;
+  if (hd->signal_PS[ix] > 0.0)
+    invP = 1./hd->signal_PS[ix];
+  else
+    invP = 0.;
+  return (invP);
+}
+
+
+void likeli_force_mass(struct HAMIL_DATA *hd, real_prec *signal,
+  struct DATA *data, real_prec fac_conv) {
+  // invP + (mean) likelihood-force mass
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+
+  Hamiltonian_mass_likeli_force(hd, signal, data, hd->mass_f);
+
+  #ifdef MULTITHREAD
+  #pragma omp parallel for
+  #endif  // MULTITHREAD
+  for (ULONG i = 0; i < n->N; ++i) {
+    real_prec invP = inv_ps(hd, i);
+
+    hd->mass_f[i] = fac_conv * ( 2*invP + sqrt(invP * hd->mass_f[i]) );
+  }
+}
+
+
+void mean_likeli_force_mass(struct HAMIL_DATA *hd, real_prec *signal,
+  struct DATA *data, real_prec fac_conv) {
+  // invP + (mean) likelihood-force mass
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+
+  real_prec force_mean_1D = Hamiltonian_mass_mean_likeli_force(hd, signal, data);
+
+  #ifdef MULTITHREAD
+  #pragma omp parallel for
+  #endif  // MULTITHREAD
+  for (ULONG i = 0; i < n->N; ++i) {
+    real_prec invP = inv_ps(hd, i);
+
+    hd->mass_f[i] = fac_conv * ( 2*invP + sqrt(invP * force_mean_1D) );
+  }
+}
+
+
+void inverse_power_spectrum_mass(struct HAMIL_DATA *hd, real_prec fac_conv) {
+  // invP + (mean) likelihood-force mass
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+
+  #ifdef MULTITHREAD
+  #pragma omp parallel for
+  #endif  // MULTITHREAD
+  for (ULONG i = 0; i < n->N; ++i)
+    hd->mass_f[i] = fac_conv * inv_ps(hd, i);
+}
+
+
+
+
+
+// 1st order likelihood force expansion (like Jasche+13) //
+void Wprime_il(struct HAMIL_DATA *hd, ULONG l, real_prec *out_x,
+  real_prec *out_y, real_prec *out_z) {
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+
+  // cell center for cell l
+  ULONG N3 = static_cast<ULONG>(n->N3);
+  ULONG N2 = static_cast<ULONG>(n->N2);
+  int ix_x = static_cast<int>((l / N3) / N2);
+  int ix_y = static_cast<int>((l / N3) % N2);
+  int ix_z = static_cast<int>(l % N3);
+  real_prec xl = (static_cast<real_prec>(ix_x) + 0.5) * n->d1;
+  real_prec yl = (static_cast<real_prec>(ix_y) + 0.5) * n->d2;
+  real_prec zl = (static_cast<real_prec>(ix_z) + 0.5) * n->d3;
+
+  real_prec h = n->particle_kernel_h;  // SPH scale length
+
+  real_prec norm = 1. / (M_PI * gsl_pow_5(h));
+
+  #ifdef MULTITHREAD
+  #pragma omp parallel for
+  #endif  // MULTITHREAD
+  for (ULONG i = 0; i < n->N; ++i) {
+    real_prec dx = hd->posx[i] - xl;
+    real_prec dy = hd->posy[i] - yl;
+    real_prec dz = hd->posz[i] - zl;
+    pacman_difference(&dx, n->L1);
+    pacman_difference(&dy, n->L2);
+    pacman_difference(&dz, n->L3);
+
+    real_prec r = sqrt(dx * dx + dy * dy + dz * dz);
+    real_prec q = r / h;
+
+    if (q >= 2) {  // check q >= 2 first, because it will be true most cases
+      out_x[i] = 0;
+      out_y[i] = 0;
+      out_z[i] = 0;
+    } else if (q >= 1) {  // after that q >= 1 will be most prevalent
+      real_prec common = norm * (3 - 0.75 * q - 3./q);
+      out_x[i] = dx * common;
+      out_y[i] = dy * common;
+      out_z[i] = dz * common;
+    } else {  // don't need to check for q > 0, sqrt(sq) in r keeps it positive
+      real_prec common = norm * (2.25 * q - 3);
+      out_x[i] = dx * common;
+      out_y[i] = dy * common;
+      out_z[i] = dz * common;
+    }
+  }
+}
+
+
+void likeli_force_1st_order_diagonal_mass(struct HAMIL_DATA *hd, real_prec *signal, struct DATA *data) {
+  // C_ih term, diagonal only (C_ii) //
+  // In this version, C_ii is the whole mass, so don't need extra C_ii array.
+
+  // 0. prepare data structures
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+  fftw_array<real_prec> Wprime_il_x(n->N), Wprime_il_y(n->N), Wprime_il_z(n->N);
+  fftw_array<complex_prec> Wprime_il_A_f(n->Nhalf), C_ii_l_f(n->Nhalf);
+
+  fillZero(hd->mass_r, n->N);
+
+  // also have to run Lag2Eul, because pos arrays are needed in Wprime_il
+  {
+    unsigned facL=1;
+    bool reggrid=true;
+    gsl_rng *seed = nullptr; // empty: reggrid is true anyway
+    real_prec kernel_scale = SPH_kernel_scale(hd);
+    if (hd->rsd_model)
+      Lag2Eul_rsd_zeldovich(signal, hd->deltaX, hd->posx, hd->posy, hd->posz, n->N1, n->N2, n->N3, n->L1, n->L2, n->L3,
+                            n->d1, n->d2, n->d3, n->min1, n->min2, n->min3, hd->D1, hd->ascale, hd->OM, hd->OL, n->mk,
+                            facL,
+                            reggrid, seed, kernel_scale, n->xobs, n->yobs, n->zobs, n->planepar, n->periodic,
+                            n->R2Cplan, n->C2Rplan);
+    else
+      Lag2Eul(signal, hd->deltaX, hd->posx, hd->posy, hd->posz, n->N1, n->N2, n->N3, n->L1, n->L2, n->L3, n->d1, n->d2,
+              n->d3, n->min1, n->min2, n->min3, hd->D1, hd->D2, hd->ascale, hd->OM, hd->OL, data->numerical->sfmodel, n->mk, n->kth, facL,
+              reggrid, seed, "", kernel_scale, n->R2Cplan, n->C2Rplan);
+  }
+
+  bool rfft = true;  // use real FFT's to cut calculation time in half
+  // N.B.: the normalisation is (-1)(-1)m^2. -m^2 comes from C_ih itself,
+  // the other minus comes from the fact that M_ih = S^-1_ih - C_ih, so there's
+  // an extra minus there!
+  real_prec minus_minus_SPH_mass_sq = gsl_pow_2(hd->rho_c * n->vol /
+    static_cast<real_prec>(n->N));
+
+  // boost::progress_display show_progress(n->N);
+
+  for (ULONG l = 0; l < n->N; ++l) {  // 1.
+    if (hd->window[l] > 0) {  // 2.
+      Wprime_il(hd, l, Wprime_il_x, Wprime_il_y, Wprime_il_z);  // 4.
+
+      fftR2C(n->N1, n->N2, n->N3, Wprime_il_x, Wprime_il_A_f);  // 5. x
+      grad_inv_lap_FS(n->N1, n->N2, n->N3, n->L1, n->L2, n->L3, Wprime_il_A_f,
+        C_ii_l_f, 1, rfft);  // 6. calculate -ik_x/k^2 ^W'_il_x, store in C_ii_l
+
+      fftR2C(n->N1, n->N2, n->N3, Wprime_il_y, Wprime_il_A_f);  // 5. y
+      grad_inv_lap_FS(n->N1, n->N2, n->N3, n->L1, n->L2, n->L3, Wprime_il_A_f,
+        Wprime_il_A_f, 2, rfft);  // 6. calculate -ik_y/k^2 ^W'_il_y, in-place
+      add_to_array(Wprime_il_A_f, C_ii_l_f, n->Nhalf);  // 7. sum to temp array
+
+      fftR2C(n->N1, n->N2, n->N3, Wprime_il_z, Wprime_il_A_f);  // 5. z
+      grad_inv_lap_FS(n->N1, n->N2, n->N3, n->L1, n->L2, n->L3, Wprime_il_A_f,
+        Wprime_il_A_f, 3, rfft);  // 6. calculate -ik_z/k^2 ^W'_il_z, in-place
+      add_to_array(Wprime_il_A_f, C_ii_l_f, n->Nhalf);  // 7. sum to temp array
+
+      // 8. IFFT temporary array from 7, reusing Wprime_il_x for the result
+      // ("div_inv_lap_Wprime_il" if we had to name it):
+      fftC2R(n->N1, n->N2, n->N3, C_ii_l_f, Wprime_il_x);
+
+      // 9. take the square of all elements, 10. multiply by w_l/sigma_l^2,
+      // 11. sum elements to the appropriate C_ii, all in one loop-step:
+      #ifdef MULTITHREAD
+      #pragma omp parallel for
+      #endif  // MULTITHREAD
+      for (ULONG i = 0; i < n->N; ++i)
+        // C_ii[i] += hd->window[i] * gsl_pow_2(Wprime_il_A[i] / hd->noise[i]);
+        hd->mass_r[i] += hd->window[i] * gsl_pow_2(Wprime_il_x[i] / hd->noise[i]);
+    }
+    // ++show_progress;
+    double progress_percent = 100. * static_cast<double>(l) / static_cast<double>(n->N);
+    wprintw(data->curses->status, "computing Hamiltonian mass... %5.1d\%", progress_percent);
+    wrefresh(data->curses->status);
+  }
+  // 12.
+  multiply_factor_array(minus_minus_SPH_mass_sq, hd->mass_r, hd->mass_r, n->N);
+}
+
+
+
+
+
+
+
+// Momenta: mass for Gaussian likelihood! //
+void Hamiltonian_mass(struct HAMIL_DATA *hd, real_prec *signal, struct DATA *data) {
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+  // FT[M] = 1/P+re(FT[w*Nmean])
+
+  wprintw(data->curses->status, "computing Hamiltonian mass...");
+  wrefresh(data->curses->status);
+
+  real_prec fac_conv = 1;
+
+  switch (n->mass_type) {
+    case 0:  // all ones (R)
+      fill_one(hd->mass_r, n->N);
+      break;
+    case 1:  // inverse power spectrum (FS)
+      inverse_power_spectrum_mass(hd, fac_conv);
+      break;
+    case 2:  // inverse power spectrum + likelihood force (FS)
+      likeli_force_mass(hd, signal, data, fac_conv);
+      break;
+    case 3:  // inverse power spectrum + *mean* likelihood force (Wang+12) (FS)
+      mean_likeli_force_mass(hd, signal, data, fac_conv);
+      break;
+    case 4:  // mass == P(k) (FS)
+      copyArray(hd->signal_PS, hd->mass_f, n->N);
+      break;
+    case 5:  // inverse power spectrum (FS) + 1st order likelihood force
+             // expansion (Jasche+13) (R)
+      inverse_power_spectrum_mass(hd, fac_conv);
+      likeli_force_1st_order_diagonal_mass(hd, signal, data);
+      break;
+    case 6:  // 1st order likelihood force expansion (Jasche+13) (R)
+      likeli_force_1st_order_diagonal_mass(hd, signal, data);
+      break;
+    case 60:  // type 0 until burn-in, type 6 afterwards
+      // N.B.: n->s_eps_total is quite random, but it should not be a lot more.
+      // The amount of steps s is also rejected steps for the epsilon stepping,
+      // so probably if you count by n->iGibbs, you'll be a long way into the
+      // burn-in if you take e.g. a multiple of s_eps_total.
+      if (n->iGibbs < n->s_eps_total) {
+        fill_one(hd->mass_r, n->N);
+      } else {
+        likeli_force_1st_order_diagonal_mass(hd, signal, data);
+      }
+      break;
+  }
+
+  // Testing:
+  if (n->mass_fs)
+    multiply_factor_array(n->mass_factor, hd->mass_f, hd->mass_f, n->N);
+
+#ifdef DEBUG
+  dump_mass_spec(hd, data);
+#endif  // DEBUG
+}
+
+
+
+
+
+
+
+
+
+// helper function //
+void dump_mass_spec(struct HAMIL_DATA *hd, struct DATA *data) {
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+
+  int bmax = 100;
+  char buffer1[bmax];
+  sprintf(buffer1, "mass_spec.dat");
+  string outputFileName = data->numerical->dir + string(buffer1);
+  ofstream outStream(outputFileName.data());
+  assert(outStream.is_open());
+  real_prec NORM = 1.0;
+
+  real_prec kr_old = 0.;
+  for (unsigned i = 0; i < n->N1; i++)
+    for (unsigned j = 0; j < n->N2; j++)
+      for (unsigned k = 0; k < n->N3; k++) {
+        real_prec k2 = k_squared(i, j, k, n->L1, n->L2, n->L3, n->N1, n->N2,
+          n->N3);
+        real_prec kr = sqrt(k2);
+
+        if (i == 0 && j == 0 && k == 0)
+          hd->mass_f[k+n->N3*(j+n->N2*i)]=1.;
+
+        if (kr > kr_old) {
+          kr_old = kr;
+          outStream << kr << "   " << NORM*hd->mass_f[k+n->N3*(j+n->N2*i)]
+            << endl;
+        }
+      }
+  outStream.close();
+}
+
+
+// ABOUT fac_conv IN Hamiltonian_mass:
+//   I was experimenting with this factor, but didn't really find a suitable
+//   conclusion. Below is what I previously wrote down there. In the function
+//   let's stick with fac_conv = 1 for now.
+// EGP: a convolution factor is needed here, because in reality what we have
+// is that the "operators" M and P must be each others' inverses. This means
+// that their (continuous) convolution must give a Dirac delta. However,
+// since we want a discrete version of this mass, we need to use the
+// convolution factor V or V/N (see Martel 2005).
+// N.B.: not sure if it must also be in front of the Wang/force-masses...
+// real_prec fac_conv;
+// #ifdef FOURIER_DEF_1
+// fac_conv = 1./n->vol;
+// #endif
+// #ifdef FOURIER_DEF_2
+// fac_conv = static_cast<real_prec>(n->N)/n->vol;
+// #endif
+// EGP:
+// Possibly, another factor is needed though. And maybe the above isn't
+// necessary at all... However, one can imagine that the V and N factors
+// from the power spectrum need to be canceled first before adding new
+// ones... I've got it on paper.
+//#ifdef FOURIER_DEF_2
+//fac_conv = n->vol/static_cast<real_prec>(n->N*n->N*n->N);
+//#endif
+// EGP: definitely, this should not be in front the Wang/force-mass terms...
+// anyway... this term includes the N/V factor. Another possibility would be:
+//#ifdef FOURIER_DEF_2
+//fac_conv = gsl_pow_2(n->vol) / static_cast<real_prec>(n->N*n->N*n->N*n->N);
+//#endif
+// EGP: doesn't seem like this is going to work. Let's try the old one Paco used
+// (which was just N, but now the GRF function is changed, so we compensate for
+// that with V/N^2):
+//#ifdef FOURIER_DEF_2
+//fac_conv = static_cast<real_prec>(n->N)*n->vol/static_cast<real_prec>(n->N*n->N); // yeah, that's really just V/N, but for clarity of the derivation, let's keep it like this.
+//#endif
+// EGP: okay, let's just test how the mass behaves when we vary V and N in input.par.
+// fac_conv = 1.;
+
diff --git a/HMC_mass.h b/HMC_mass.h
new file mode 100644
index 0000000..d81afab
--- /dev/null
+++ b/HMC_mass.h
@@ -0,0 +1,14 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#pragma once
+#include "struct_hamil.h"
+
+void Hamiltonian_mass(struct HAMIL_DATA *hamil_data, real_prec *signal, struct DATA *data);
+void dump_mass_spec(struct HAMIL_DATA *hd, struct DATA *data);
diff --git a/HMC_models.cc b/HMC_models.cc
new file mode 100644
index 0000000..835448b
--- /dev/null
+++ b/HMC_models.cc
@@ -0,0 +1,1379 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+#include "struct_main.h"
+#include "struct_hamil.h"
+
+#include <math.h>
+#include <iomanip>
+#include <algorithm>  // std::max_element, std::find
+#include <iterator>  // std::distance
+
+#include <gsl/gsl_integration.h>
+
+#include "fftw_array.h"
+#include "planck/paramfile.h"
+
+#include "math_funcs.h"
+#include "Lag2Eul.h"
+#include "cosmo.h"
+
+#include "HMC_help.h"
+
+#include "convenience.h"
+
+using namespace std;
+
+// TODO: split this file into different files for each model:
+//       HMC/model/gaussian.cc, HMC/model/poisson.cc, etc.
+
+// Note: in HMC_{momenta,mass}.cc the mass is tuned (if mass_fs)
+// to the Gaussian likelihood model!
+
+
+// Model: Poissonian likelihood //
+void poissonian_likelihood_partial_f_delta_x_log_like(struct HAMIL_DATA *hd, real_prec *deltaX, real_prec *dummy)
+{
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+
+  for(ULONG i = 0; i < n->N; i++)
+  {
+    real_prec dens = static_cast<real_prec>(1. + hd->biasP * deltaX[i]);// must be positive!
+    real_prec Lambda = hd->window[i] * hd->rho_c * pow(dens, hd->biasE);
+
+    if ((hd->window[i] > 0.0) && (dens > 0.0))
+      //dummy[i] = (Lambda - hd->nobs[i]) * (hd->biasE * hd->biasP / dens);
+      dummy[i] = (1 - hd->nobs[i]/Lambda) * hd->rho_c * hd->biasE * hd->biasP * pow(dens, hd->biasE - 1);
+    else
+      dummy[i] = 0.0;
+  }
+}
+
+// N.B.: grad_f_delta_x_comp should no longer be used. Only necessary for calc_h != 2, which is bogus!
+void poissonian_likelihood_grad_f_delta_x_comp(struct HAMIL_DATA *hd, real_prec *deltaX, real_prec *out,
+                                               unsigned int component)
+{
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+  gradfindif(n->N1, n->L1, deltaX, out, component);
+}
+
+real_prec poissonian_likelihood_log_like(struct HAMIL_DATA *hd, real_prec *delta)
+{
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+
+  // calculate deltaX and pos
+  {
+    unsigned facL=1;
+    bool reggrid=true;
+    gsl_rng *seed = nullptr; // empty: reggrid is true anyway
+    real_prec kernel_scale = SPH_kernel_scale(hd);
+    Lag2Eul(delta, hd->deltaX, hd->posx, hd->posy, hd->posz, n->N1, n->N2, n->N3, n->L1, n->L2, n->L3, n->d1, n->d2,
+            n->d3, n->min1, n->min2, n->min3, hd->D1, hd->D2, hd->ascale, hd->OM, hd->OL, hd->sfmodel, n->mk, n->kth, facL,
+            reggrid, seed, "", kernel_scale, n->R2Cplan, n->C2Rplan);
+  }
+
+  // actual likelihood calculation
+  real_prec out = 0.;
+#ifdef MULTITHREAD
+#pragma omp parallel for reduction(+:out)
+#endif // MULTITHREAD 
+  for(ULONG i = 0; i < n->N; i++)
+  {
+    real_prec dens = static_cast<real_prec>(1. + hd->biasP * hd->deltaX[i]);// must be positive!
+    real_prec Lambda = hd->window[i] * hd->rho_c * pow(dens, hd->biasE);
+
+    if ((hd->window[i] > 0.) && (Lambda > 0.0))
+      out += Lambda - hd->nobs[i] * log(Lambda);
+  }
+
+  return (out);
+}
+
+// END model Poissonian likelihood //
+
+
+// Model: Gaussian random field (no evolution, just lagrangian field) //
+// N.B.: grad_f_delta_x_comp should no longer be used. Only necessary for calc_h != 2, which is bogus!
+void grf_likelihood_grad_f_delta_x_comp(struct HAMIL_DATA *, real_prec *, real_prec *, unsigned int)
+{
+}
+
+void grf_likelihood_partial_f_delta_x_log_like(struct HAMIL_DATA *, real_prec *, real_prec *)
+{
+}
+
+void grf_likelihood_grad_log_like(struct HAMIL_DATA *hd, real_prec *delta, real_prec *out)
+{
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+
+#ifdef MULTITHREAD
+#pragma omp parallel for 
+#endif // MULTITHREAD 
+  for(ULONG i=0;i<n->N;i++)
+    if (hd->window[i]>0.)
+      out[i] = (delta[i] - hd->nobs[i]) / (hd->noise[i] * hd->noise[i]);
+    else
+      out[i] = 0;
+}
+
+real_prec grf_likelihood_log_like(struct HAMIL_DATA *hd, real_prec *delta)
+{
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+
+  real_prec out=0.;
+#ifdef MULTITHREAD
+#pragma omp parallel for reduction(+:out)
+#endif // MULTITHREAD 
+  for(ULONG i=0;i<n->N;i++)
+    if (hd->window[i]>0.)
+      out += num_0_5 * gsl_pow_2((delta[i] - hd->nobs[i])/hd->noise[i]);
+
+  return (out);
+}
+
+// END model Gaussian random field //
+
+  
+
+// Model: Gaussian likelihood //
+void gaussian_likelihood_partial_f_delta_x_log_like_interpolate(struct HAMIL_DATA *hd, real_prec *deltaX, real_prec *out)
+{
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+  // EGP: partial -log L/partial delta_x = (Lambda-nobs)/sigma**2
+  
+  fftw_array<real_prec> deltaXQ(n->N), nobsXQ(n->N), noiseXQ(n->N), windowXQ(n->N);
+
+  interpolate_CIC(n->N1,n->N2,n->N3,n->L1,n->L2,n->L3,n->d1,n->d2,n->d3, hd->posx, hd->posy, hd->posz, deltaX, n->N, deltaXQ);
+  interpolate_CIC(n->N1,n->N2,n->N3,n->L1,n->L2,n->L3,n->d1,n->d2,n->d3, hd->posx, hd->posy, hd->posz, hd->nobs, n->N, nobsXQ);
+  interpolate_CIC(n->N1,n->N2,n->N3,n->L1,n->L2,n->L3,n->d1,n->d2,n->d3, hd->posx, hd->posy, hd->posz, hd->noise, n->N, noiseXQ);
+  interpolate_CIC(n->N1,n->N2,n->N3,n->L1,n->L2,n->L3,n->d1,n->d2,n->d3, hd->posx, hd->posy, hd->posz, hd->window, n->N, windowXQ);
+
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD 
+  for(ULONG i=0;i<n->N;i++)
+  {
+    real_prec Lambda = windowXQ[i] * hd->rho_c * pow(num_1 + hd->biasP * deltaXQ[i], hd->biasE);
+    if ((windowXQ[i]>0.) && (Lambda > 0.0))
+    {
+      real_prec resid = nobsXQ[i]-Lambda;
+      out[i] = resid/(noiseXQ[i]*noiseXQ[i]);
+    }
+    else
+      out[i] = 0.0;
+  }
+}
+// N.B.: grad_f_delta_x_comp should no longer be used. Only necessary for calc_h != 2, which is bogus!
+void gaussian_likelihood_grad_f_delta_x_comp_interpolate(struct HAMIL_DATA *hd, real_prec *deltaX, real_prec *out,
+                                                         unsigned int component)
+{
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+  fftw_array<real_prec> deltaXQ(n->N);
+  interpolate_CIC(n->N1,n->N2,n->N3,n->L1,n->L2,n->L3,n->d1,n->d2,n->d3, hd->posx, hd->posy, hd->posz, deltaX, n->N, deltaXQ);
+
+  gradfft(n->N1,n->N2,n->N3, n->L1,n->L2,n->L3, deltaXQ, out, component);
+  //gradfindif(n->N1,n->N2,n->N3, n->L1,n->L2,n->L3, deltaX, out, component);
+}
+
+// NOTE:
+// partial_f_delta_x_log_like is also used in Gaussian RSD likelihood!
+// Take care when introducing rank ordering, then it might become different.
+void gaussian_likelihood_partial_f_delta_x_log_like(struct HAMIL_DATA *hd, real_prec *deltaX, real_prec *out)
+{
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+  // EGP: partial -log L/partial delta_x = (Lambda-nobs)/sigma**2
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD 
+  for(ULONG i = 0; i < n->N; i++)
+  {
+    real_prec Lambda = hd->window[i] * hd->rho_c * pow(num_1 + hd->biasP * deltaX[i], hd->biasE);
+    if ((hd->window[i] > 0.) && (Lambda > 0.0))
+    {
+      real_prec resid = hd->nobs[i] - Lambda;
+      out[i] = resid / (hd->noise[i] * hd->noise[i]);
+    }
+    else
+      out[i]=0.0;
+  }
+}
+// N.B.: grad_f_delta_x_comp should no longer be used. Only necessary for calc_h != 2, which is bogus!
+void gaussian_likelihood_grad_f_delta_x_comp(struct HAMIL_DATA *hd, real_prec *deltaX, real_prec *out,
+                                             unsigned int component)
+{
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+  //gradfindif(n->N1,n->N2,n->N3, n->L1,n->L2,n->L3, deltaX, out, component);
+  gradfft(n->N1,n->N2,n->N3, n->L1,n->L2,n->L3, deltaX, out, component);
+}
+real_prec gaussian_likelihood_log_like(struct HAMIL_DATA *hd, real_prec *deltaQ)
+{
+  // -log L = 0.5*(Lambda-nobs)^2/sigma^2
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+  fftw_array<real_prec> delta_growing(n->N);
+
+  // single out growing mode (Peebles) -> delta_growing
+  multiply_factor_array(n->deltaQ_factor, deltaQ, delta_growing, n->N);
+
+  // calculate deltaX and pos
+  {
+    unsigned facL = 1;
+    bool reggrid = true;
+    gsl_rng *seed = nullptr; // empty: reggrid is true anyway
+    real_prec kernel_scale = SPH_kernel_scale(hd);
+    if (hd->rsd_model)
+      Lag2Eul_rsd_zeldovich(delta_growing, hd->deltaX, hd->posx, hd->posy, hd->posz, n->N1, n->N2, n->N3, n->L1, n->L2,
+                            n->L3, n->d1, n->d2, n->d3, n->min1, n->min2, n->min3, hd->D1, hd->ascale, hd->OM, hd->OL,
+                            n->mk, facL,
+                            reggrid, seed, kernel_scale, n->xobs, n->yobs, n->zobs, n->planepar, n->periodic,
+                            n->R2Cplan, n->C2Rplan);
+    else
+      Lag2Eul(delta_growing, hd->deltaX, hd->posx, hd->posy, hd->posz, n->N1, n->N2, n->N3, n->L1, n->L2, n->L3, n->d1,
+              n->d2, n->d3, n->min1, n->min2, n->min3, hd->D1, hd->D2, hd->ascale, hd->OM, hd->OL, hd->sfmodel, n->mk, n->kth,
+              facL,
+              reggrid, seed, "", kernel_scale, n->R2Cplan, n->C2Rplan);
+  }
+
+  // actual likelihood calculation
+  real_prec out=0.;
+#ifdef MULTITHREAD
+#pragma omp parallel for reduction(+:out)
+#endif // MULTITHREAD
+  for(ULONG i=0;i<n->N;i++)
+  {
+    real_prec Lambda = hd->window[i] * hd->rho_c * pow(num_1+hd->biasP*hd->deltaX[i],hd->biasE);
+    if ((hd->window[i]>0.) && (Lambda > 0.0))
+      out+=num_0_5 * gsl_pow_2( (Lambda - hd->nobs[i]) / hd->noise[i] );
+  }
+
+  return (out);
+}
+// End model: Gaussian likelihood  //
+
+
+
+// Model: log-normal likelihood //
+
+// TEST: with lambda function (c++11) for Lambda 
+//       This is also a useful structure for when/if we put these models in
+//       full-fledged classes; one could then define the function for Lambda
+//       as a class method and reuse it in the different functions.
+//void lognormal_likelihood_partial_f_delta_x_log_like(struct HAMIL_DATA *hd, real_prec *deltaX, real_prec *dummy)
+//{
+  //struct HAMIL_NUMERICAL *n = hd->numerical;
+  //// EGP: partial -log L/partial log(1+delta_x) = (Lambda-nobs)/sigma**2
+  //auto Lambda = [&] (ULONG index) -> real_prec {log(hd->rho_c * pow(num_1 + hd->biasP * deltaX[index], hd->biasE));};
+//#ifdef MULTITHREAD
+//#pragma omp parallel for
+//#endif // MULTITHREAD
+  //for(ULONG i=0; i < n->N;i++)
+    //if (hd->window[i]>0.)
+      //dummy[i] = (hd->nobs[i] - Lambda(i))/(hd->noise[i] * hd->noise[i]);
+    //else
+      //dummy[i] = 0.0;
+//}
+
+void lognormal_likelihood_partial_f_delta_x_log_like(struct HAMIL_DATA *hd, real_prec *deltaX, real_prec *dummy)
+{
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+  // EGP: partial -log L/partial log(1+delta_x) = (Lambda-nobs)/sigma**2
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD
+  for(ULONG i=0; i < n->N;i++)
+  {
+    real_prec Lambda = log(hd->rho_c * pow(num_1 + hd->biasP * deltaX[i], hd->biasE));
+    if (hd->window[i]>0.)
+      dummy[i]=(hd->nobs[i] - Lambda)/(hd->noise[i] * hd->noise[i]);
+    else
+      dummy[i]=0.0;
+  }
+}
+
+real_prec lognormal_likelihood_f_delta_x_i_calc(real_prec rho_c, real_prec delta_min, real_prec deltaX_i)
+{
+  // keep above zero density
+  if (deltaX_i < delta_min)
+    deltaX_i = delta_min;
+
+  return log(rho_c * (num_1 + deltaX_i));
+}
+
+real_prec lognormal_likelihood_f_delta_x_i(struct HAMIL_DATA *hd, real_prec deltaX_i)
+{
+  return lognormal_likelihood_f_delta_x_i_calc(hd->rho_c, hd->delta_min, deltaX_i);
+}
+
+void lognormal_likelihood_f_delta_x(struct HAMIL_DATA *hd, real_prec *deltaX, real_prec *out)
+{
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD
+  for (ULONG i = 0; i < n->N; i++)
+    out[i] = lognormal_likelihood_f_delta_x_i(hd, deltaX[i]);
+}
+
+// N.B.: grad_f_delta_x_comp should no longer be used. Only necessary for calc_h != 2, which is bogus!
+void lognormal_likelihood_grad_f_delta_x_comp(struct HAMIL_DATA *hd, real_prec *deltaX, real_prec *out,
+                                              unsigned int component)
+{
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+  fftw_array<real_prec> f_delta_x(n->N);
+
+  lognormal_likelihood_f_delta_x(hd, deltaX, f_delta_x);
+
+  gradfindif(n->N1, n->L1, f_delta_x, out, component);
+}
+
+real_prec lognormal_likelihood_log_like(struct HAMIL_DATA *hd, real_prec *delta)
+{
+  // -log L = 0.5*(Lambda-nobs)^2/sigma^2
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+
+  // calculate deltaX and pos
+  {
+    unsigned facL=1;
+    bool reggrid=true;
+    gsl_rng *seed = nullptr; // empty: reggrid is true anyway
+    real_prec kernel_scale = SPH_kernel_scale(hd);
+    Lag2Eul(delta, hd->deltaX, hd->posx, hd->posy, hd->posz, n->N1, n->N2, n->N3, n->L1, n->L2, n->L3, n->d1, n->d2,
+            n->d3, n->min1, n->min2, n->min3, hd->D1, hd->D2, hd->ascale, hd->OM, hd->OL, hd->sfmodel, n->mk, n->kth, facL,
+            reggrid, seed, "", kernel_scale, n->R2Cplan, n->C2Rplan);
+  }
+
+  // actual likelihood calculation
+  real_prec out=0.;
+#ifdef MULTITHREAD
+#pragma omp parallel for reduction(+:out)
+#endif // MULTITHREAD 
+  for(ULONG i=0;i<n->N;i++)
+  {
+    real_prec Lambda = lognormal_likelihood_f_delta_x_i(hd, hd->deltaX[i]);
+    if (hd->window[i]>0.)
+    {
+      real_prec resid=Lambda - hd->nobs[i];
+      out+=num_0_5*resid*resid/(hd->noise[i]*hd->noise[i]);
+    }
+  }
+
+  return (out);
+}
+// End model: log-normal likelihood //
+
+
+// Model: Gaussian likelihood (or any other that is dependent on delta_q only through delta_x) //
+void likelihood_calc_h(struct HAMIL_DATA *hd, real_prec *deltaX, real_prec *out)
+{
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+  fftw_array<real_prec> partLike(n->N), dummy(n->N);
+  fftw_array<complex_prec> outC(n->Nhalf), dummyC(n->Nhalf);
+
+  fillZero(outC, n->Nhalf);
+
+  hd->partial_f_delta_x_log_like(hd, deltaX, partLike);
+
+  for (unsigned i = 1; i <= 3; i++){
+    // compute i-th component of x-gradient of f(deltaX) -> dummy
+    hd->grad_f_delta_x_comp(hd, deltaX, dummy, i);
+    // multiply with partLike -> dummy
+    multiplyArrays(partLike, dummy, dummy, n->N);
+    // transform to FS => g_i(k) -> dummyC
+    fftR2C(n->N1, n->N2, n->N3, dummy, dummyC);
+    // multiply by -ik_i/k^2
+    bool rfft = true;
+    grad_inv_lap_FS(n->N1,n->N2,n->N3, n->L1,n->L2,n->L3, dummyC, dummyC, i, rfft); 
+    // sum the resulting h(k) term to outC
+    add_to_array(dummyC, outC, n->Nhalf);
+  }
+  // transform h(k) to real space -> h(q)
+  fftC2R(n->N1, n->N2, n->N3, outC, out);
+}
+void likelihood_calc_h_old(struct HAMIL_DATA *hd, real_prec *deltaX, real_prec *out)
+{
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+  fftw_array<real_prec> partLike(n->N), dummy(n->N), dummy2(n->N);
+  fftw_array<complex_prec> outC(n->Nhalf), dummyC(n->Nhalf);
+
+  fillZero(outC, n->Nhalf);
+
+  hd->partial_f_delta_x_log_like(hd, deltaX, partLike);
+
+  for (unsigned i = 1; i <= 3; i++){
+    // compute i-th component of x-gradient of f(deltaX) -> dummy
+    hd->grad_f_delta_x_comp(hd, deltaX, dummy, i);
+    //if (n->iGibbs == 1)
+      //fill_one(dummy, n->N); // testing: when delta_1 == 0 (which it is with zero initial guess), then deltaX and its derivative will be zero, breaking the whole algorithm for this first step
+
+    // multiply with partLike -> dummy
+    multiplyArrays(partLike, dummy, dummy, n->N);
+    // interpolate to x(q_i) => g_i(q)
+    //{
+     //switch (n->mk)
+      //{
+        //case 0:
+          //throw BarcodeException("likelihood_calc_h: NGP interpolation not yet implemented!");
+          //break;
+        //case 1:
+          interpolate_CIC(n->N1,n->N2,n->N3,n->L1,n->L2,n->L3,n->d1,n->d2,n->d3, hd->posx, hd->posy, hd->posz,dummy,n->N,dummy2);
+          //break;
+        //case 2:
+          //throw BarcodeException("likelihood_calc_h: TSC interpolation not yet implemented!");
+          //break;
+      //}
+    //}
+    // transform to FS => g_i(k) -> AUX
+    fftR2C(n->N1, n->N2, n->N3, dummy2, dummyC);
+    // multiply by -ik_i/k^2
+    bool rfft = true;
+    grad_inv_lap_FS(n->N1,n->N2,n->N3, n->L1,n->L2,n->L3, dummyC, dummyC, i, rfft);
+    // sum the resulting h(k) term to outC
+    add_to_array(dummyC, outC, n->Nhalf);
+  }
+  // transform h(k) to real space -> h(q)
+  fftC2R(n->N1, n->N2, n->N3, outC, out);
+}
+
+
+void likelihood_calc_h_simple(struct HAMIL_DATA *hd, real_prec *deltaX, real_prec *out)
+{
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+  fftw_array<real_prec> partLike(n->N);
+
+  hd->partial_f_delta_x_log_like(hd, deltaX, partLike);
+
+  // interpolate to x(q_i) => g_i(q)
+  //switch (n->mk)
+  //{
+    //case 0:
+      //throw BarcodeException("likelihood_calc_h: NGP interpolation not yet implemented!");
+      //break;
+    //case 1:
+      interpolate_CIC(n->N1,n->N2,n->N3,n->L1,n->L2,n->L3,n->d1,n->d2,n->d3, hd->posx, hd->posy, hd->posz, partLike, n->N, out);
+      //break;
+    //case 2:
+      //throw BarcodeException("likelihood_calc_h: TSC interpolation not yet implemented!");
+      //break;
+  //}
+}
+
+
+// new SPH method stuff //
+real_prec SPH_kernel_radius(struct HAMIL_DATA *hd) {
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+  return(SPH_kernel_radius(n->particle_kernel, n->particle_kernel_h));
+}
+
+real_prec SPH_kernel_radius(int particle_kernel_type,
+                            real_prec particle_kernel_h) {
+  real_prec radius;
+  switch (particle_kernel_type)
+  {
+    case 0: // SPH kernel
+      radius = particle_kernel_h * 2;
+      break;
+    default:
+      throw BarcodeException("\"Eat my shorts!\" -- B. J. Simpson");
+  }
+  return(radius);
+}
+
+// double inline __attribute__((fastcall)) sqrt14(double n) {
+//   __asm__ ("fld qword ptr [esp+4];"
+//            "fsqrt;"
+//            "ret 8");
+//         // _asm fld qword ptr [esp+4]
+//         // _asm fsqrt
+//         // _asm ret 8
+// }
+
+void grad_SPH_kernel_3D(real_prec x, real_prec y, real_prec z, real_prec h_inv, real_prec h_sq_inv, real_prec norm,
+                        real_prec &out_x, real_prec &out_y, real_prec &out_z) {
+  // N.B.: when using this for density estimation, you need to normalize the
+  // result afterwards with V/N! See e.g. getDensity_SPH. Same goes for
+  // SPH_kernel_3D.
+
+  // norm must be 1/(PI * h^4)
+  
+  // derivative of Monaghan kernel W_4
+  real_prec r_sq = x*x + y*y + z*z;
+  real_prec partial;
+  // if (r_sq > 4*h_sq)
+  //   partial = 0.;
+  // else if (r_sq > h_sq) {
+  //   real_prec r = sqrt(r_sq);
+  //   real_prec q = r * h_inv;
+  //   real_prec qmin2 = q - 2;
+  //   partial = -0.75*qmin2*qmin2 * norm / r;
+  // } else {
+  //   real_prec r = sqrt(r_sq);
+  //   partial = (2.25*r*h_inv*h_inv - 3*h_inv) * norm;
+  // }
+  int q_sq_i = static_cast<int>(r_sq * h_sq_inv);
+  switch (q_sq_i) {
+    case 0:
+      {
+        real_prec r = sqrt(r_sq);
+        partial = (2.25*r*h_inv*h_inv - 3*h_inv) * norm;
+      }
+      break;
+    case 1:
+    case 2:
+    case 3:
+      {
+        real_prec r = sqrt(r_sq);
+        real_prec q = r * h_inv;
+        real_prec qmin2 = q - 2;
+        partial = -0.75*qmin2*qmin2 * norm / r;
+      }
+      break;
+    default:
+      partial = 0.;
+      break;
+  }
+
+  out_x = partial * x;
+  out_y = partial * y;
+  out_z = partial * z;
+}
+
+
+// #include <ia32intrin.h>
+
+
+void grad_SPH_kernel_3D_h_units(real_prec x_h, real_prec y_h, real_prec z_h,
+                                real_prec norm,
+                        real_prec &out_x, real_prec &out_y, real_prec &out_z) {
+  // N.B.: when using this for density estimation, you need to normalize the
+  // result afterwards with V/N! See e.g. getDensity_SPH. Same goes for
+  // SPH_kernel_3D.
+
+  // norm must be 1/(PI * h^4)
+  
+  // derivative of Monaghan kernel W_4
+  // real_prec r_sq = x*x + y*y + z*z;
+  real_prec q_sq = x_h*x_h + y_h*y_h + z_h*z_h;
+  // __m128 d_h = _mm_setr_ps(static_cast<float>(x_h), static_cast<float>(y_h),
+                          // static_cast<float>(z_h), 0.0);
+  // real_prec q_sq = static_cast<real_prec>(_mm_cvtss_f32(_mm_dp_ps(d_h, d_h, 0x71)));
+
+  real_prec partial;
+  if (q_sq > 4)
+    partial = 0.;
+  else if (q_sq > 1) {
+    // real_prec r = sqrt(r_sq);
+    // real_prec q = r * h_inv;
+    // real_prec qmin2 = q - 2;
+    // partial = -0.75*qmin2*qmin2 * norm / r;
+    real_prec q = sqrt(q_sq);
+    real_prec qmin2 = q - 2;
+    partial = -0.75*qmin2*qmin2 * norm / q;
+  } else {
+    // real_prec r = sqrt(r_sq);
+    // partial = (2.25*r*h_inv*h_inv - 3*h_inv) * norm;
+    real_prec q = sqrt(q_sq);
+    partial = (2.25*q - 3) * norm;
+  }
+  // int q_sq_i = static_cast<int>(q_sq);
+  // switch (q_sq_i) {
+  //   case 0:
+  //     {
+  //       real_prec q = sqrt(q_sq);
+  //       partial = (2.25*q - 3) * norm;
+  //     }
+  //     break;
+  //   case 1:
+  //   case 2:
+  //   case 3:
+  //     {
+  //       real_prec q = sqrt(q_sq);
+  //       // real_prec q = r * h_inv;
+  //       real_prec qmin2 = q - 2;
+  //       // partial = -0.75*qmin2*qmin2 * norm / r;
+  //       partial = -0.75*qmin2*qmin2 * norm / q;
+  //     }
+  //     break;
+  //   default:
+  //     partial = 0.;
+  //     break;
+  // }
+
+  out_x = partial * x_h;
+  out_y = partial * y_h;
+  out_z = partial * z_h;
+}
+
+
+int SPH_kernel_3D_cells_count(struct HAMIL_DATA *hd) {
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+  return(SPH_kernel_3D_cells_count(n->particle_kernel, n->particle_kernel_h,
+                                   n->d1, n->d2, n->d3));
+}
+
+int SPH_kernel_3D_cells_count(int particle_kernel_type,
+                              real_prec particle_kernel_h,
+                              real_prec d1, real_prec d2, real_prec d3) {
+
+  // real_prec kernel_reach = SPH_kernel_radius(hd);
+  real_prec kernel_reach = SPH_kernel_radius(particle_kernel_type,
+                                             particle_kernel_h);
+
+  int reach1 = static_cast<int>(kernel_reach/d1) + 1;
+  int reach2 = static_cast<int>(kernel_reach/d2) + 1;
+  int reach3 = static_cast<int>(kernel_reach/d3) + 1;
+  real_prec kernel_reach_sq = kernel_reach * kernel_reach;
+
+  int out = 0;
+
+  for (int i1 = -reach1; i1 <= reach1; ++i1)
+    for (int i2 = -reach2; i2 <= reach2; ++i2)
+      for (int i3 = -reach3; i3 <= reach3; ++i3) {
+        real_prec dx = (abs(static_cast<real_prec>(i1)) - 0.5)*d1;
+        real_prec dy = (abs(static_cast<real_prec>(i2)) - 0.5)*d2;
+        real_prec dz = (abs(static_cast<real_prec>(i3)) - 0.5)*d3;
+        real_prec r_sq = dx*dx + dy*dy + dz*dz;  // squared for efficiency
+
+        if (r_sq <= kernel_reach_sq) {
+          ++out;
+        }
+      }
+
+  return(out);
+}
+
+
+// std::vector<int> SPH_kernel_3D_cells(struct HAMIL_DATA *hd)
+void SPH_kernel_3D_cells(struct HAMIL_DATA *hd, vector<int> &out_i,
+                         vector<int> &out_j, vector<int> &out_k) {
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+  SPH_kernel_3D_cells(n->particle_kernel, n->particle_kernel_h,
+                      n->d1, n->d2, n->d3, out_i, out_j, out_k);
+}
+
+void SPH_kernel_3D_cells(int particle_kernel_type, real_prec particle_kernel_h,
+                         real_prec d1, real_prec d2, real_prec d3,
+                         vector<int> &out_i,
+                         vector<int> &out_j, vector<int> &out_k) {
+  // First determine the reach of the kernel, which determines how many cells
+  // every particle needs to loop over to check for contributions. This is
+  // based on the kernel radius (e.g. 2*kernel_h for SPH with splines).
+  real_prec kernel_reach = SPH_kernel_radius(particle_kernel_type,
+                                             particle_kernel_h);
+  int reach1 = static_cast<int>(kernel_reach/d1) + 1;
+  int reach2 = static_cast<int>(kernel_reach/d2) + 1;
+  int reach3 = static_cast<int>(kernel_reach/d3) + 1;
+  // Determine sphere to loop over (+ 0.5dx, because we need to accomodate all
+  // possible positions in a cell; note that this is implemented as -0.5dx
+  // instead of just increasing the kernel reach by 0.5dx, because in principle
+  // dx can be different in each direction, so that can only be incorporated if
+  // done in each direction independently).
+  real_prec kernel_reach_sq = kernel_reach * kernel_reach;
+  // std::vector<int> kernel_cells;
+  // int ix_cell = 0;
+
+  for(int i1 = -reach1; i1 <= reach1; ++i1)
+    for(int i2 = -reach2; i2 <= reach2; ++i2)
+      for(int i3 = -reach3; i3 <= reach3; ++i3)
+      {
+        real_prec dx = (abs(static_cast<real_prec>(i1)) - 0.5)*d1; // abs, otherwise the "- 0.5" doesn't work for negative indices
+        real_prec dy = (abs(static_cast<real_prec>(i2)) - 0.5)*d2;
+        real_prec dz = (abs(static_cast<real_prec>(i3)) - 0.5)*d3;
+        real_prec r_sq = dx*dx + dy*dy + dz*dz; // squared for efficiency
+
+        if (r_sq <= kernel_reach_sq)
+        {
+          out_i.push_back(i1);
+          out_j.push_back(i2);
+          out_k.push_back(i3);
+          // ++ix_cell;
+        }
+      }
+
+  // return(kernel_cells);
+}
+
+
+
+// twee mogelijkheden:
+// 1. 3 vectors met i&j, k_begin en k_last (niet k_end, inclusive!)
+// 2. 2 2D vectors met k_begin en k_end (vierkant) waar indices i en j zijn
+// optie 1:
+void SPH_kernel_3D_cells_hull_1(const vector<int> &i, const vector<int> &j, const vector<int> &k,
+                                vector< pair<int, int> > &ij_out,
+                                vector<int> &k_begin, vector<int> &k_last) {
+  auto N_cells = i.size();
+
+  pair<int, int> ij0(i[0], j[0]);
+  ij_out.push_back(ij0);
+  k_begin.push_back(k[0]);
+  k_last.push_back(k[0]);
+
+  for (unsigned int ix = 1; ix < N_cells; ++ix) {
+    // cout << ix << endl;
+    // cout << i[ix] << " " << j[ix] << endl;
+    pair<int, int> ij(i[ix], j[ix]);
+    auto ij_duplicate = find(ij_out.begin(), ij_out.end(), ij);
+    if (ij_duplicate == ij_out.end()) {
+      // cout << "nieuw" << endl << endl;
+      ij_out.push_back(ij);
+      k_begin.push_back(k[ix]);
+      k_last.push_back(k[ix]);
+    } else {
+      // cout << "duplicaat" << endl << endl;
+      ULONG ix_duplicate = static_cast<ULONG>(distance(ij_out.begin(), ij_duplicate));
+      if (k_begin[ix_duplicate] > k[ix])
+        k_begin[ix_duplicate] = k[ix];
+      if (k_last[ix_duplicate] < k[ix])
+        k_last[ix_duplicate] = k[ix];
+    }
+  }
+}
+
+// optie 2:
+// void SPH_kernel_3D_cells_hull_2(vector<int> &i, vector<int> &j, vector<int> &k,
+//                                 vector<int> &k_begin, vector<int> &k_last) {
+  // auto N_cells = i.size();
+  // for (int ix = 0; ix < N_cells; ++ix) {}
+// }
+
+
+// TODO: check changed signedness warnings in below code, but make sure the changes don't make it slower, since this is an arduously optimized piece of code! Original version of the function put in comments below.
+
+// TODO: make sure somehow (using new type?) that ix/y/z are never larger than INT_MAX! Otherwise casts below can go wrong.
+// TODO: same goes for index_xy_part below, which should not be larger than LONG_MAX.
+/*
+ * N2pad and N3pad are ULONG because we need that to promote to ULONG in the calculation of index_xy_part, but in fact
+ * they are not expected to be higher than uint_max! I.e. conceptually they are unsigned int, not ulong.
+ */
+void _likelihood_calc_V_SPH_kernel_loop_h_units(ULONG N2pad, ULONG N3pad, real_prec d1_h, real_prec d2_h, real_prec d3_h,
+                                                vector<pair<int, int> > &ij, vector<int> &k_begin, vector<int> &k_last,
+                                                unsigned ix, unsigned iy, unsigned iz, real_prec dpcx_h, real_prec dpcy_h,
+                                                real_prec dpcz_h, real_prec *part_like_padded, unsigned padding,
+                                                real_prec grad_SPH_kernel_norm, real_prec &_out_x_j, real_prec &_out_y_j,
+                                                real_prec &_out_z_j) {
+  real_prec out_x_j = 0., out_y_j = 0., out_z_j = 0.; // to avoid having to add to out_#[j]'s inside the loop, which is expensive, because these arrays are not at all contiguous => factor 0.82 timesaving
+  unsigned ix_pad = ix + padding;
+  unsigned iy_pad = iy + padding;
+  unsigned iz_pad = iz + padding;
+  // Note: don't use unsigned int as index below! Slower than signed. See:
+  // http://stackoverflow.com/a/2044021/1199693.
+  // Can't seem to be able to remove warning (unsigned vs signed comparison).
+  // Tried converting the ij.size() to long and int, but is again slower...
+  for (vector< pair<int, int> >::size_type ij_ix = 0;
+       ij_ix < ij.size(); ++ij_ix) {
+    int i1 = ij[ij_ix].first;
+    int i2 = ij[ij_ix].second;
+    unsigned kx = static_cast<unsigned>(static_cast<int>(ix_pad) + i1);
+    unsigned ky = static_cast<unsigned>(static_cast<int>(iy_pad) + i2);
+    // Cell position (relative to the central cell):
+    real_prec diff_x_h = dpcx_h - static_cast<real_prec>(i1)*d1_h;
+    real_prec diff_y_h = dpcy_h - static_cast<real_prec>(i2)*d2_h;
+    // magic
+    ULONG index_xy_part = N3pad*(ky + N2pad*kx);  // N2/3pad will promote kx/y to ULONG
+    int kz_begin = k_begin[ij_ix];
+    int kz_last = k_last[ij_ix];
+    ULONG index_begin = static_cast<ULONG>(kz_begin + static_cast<long>(index_xy_part + iz_pad));
+    ULONG index_end = index_begin + static_cast<ULONG>(kz_last - kz_begin);
+    real_prec diff_z_h = dpcz_h - static_cast<real_prec>(kz_begin) * d3_h;
+    // the actual loop
+//    for (int i3 = kz_begin; i3 <= kz_last; ++i3) {
+    for (ULONG index = index_begin; index <= index_end; ++index) {
+      real_prec common_part = part_like_padded[index];
+
+      real_prec grad_kernel_x, grad_kernel_y, grad_kernel_z;
+      grad_SPH_kernel_3D_h_units(diff_x_h, diff_y_h, diff_z_h, grad_SPH_kernel_norm,
+                                 grad_kernel_x, grad_kernel_y, grad_kernel_z);
+
+      out_x_j += common_part * grad_kernel_x;
+      out_y_j += common_part * grad_kernel_y;
+      out_z_j += common_part * grad_kernel_z;
+
+      // for next iteration:
+//      ++index;
+      diff_z_h -= d3_h;
+    }
+  }
+  _out_x_j = out_x_j;
+  _out_y_j = out_y_j;
+  _out_z_j = out_z_j;
+}
+
+
+// NOTE:
+// Below the original version of the above function. The original version was optimized by hand.
+// However, the compiler complained about implicit conversions, which are now removed from the above
+// version. TODO: check at some point which version is faster!
+//void _likelihood_calc_V_SPH_kernel_loop_h_units(int N2pad, int N3pad, real_prec d1_h, real_prec d2_h, real_prec d3_h,
+//                                               vector<pair<int, int> > &ij, vector<int> &k_begin, vector<int> &k_last,
+//                                               int ix, int iy, int iz, real_prec dpcx_h, real_prec dpcy_h,
+//                                               real_prec dpcz_h, real_prec *part_like_padded, int padding,
+//                                               real_prec grad_SPH_kernel_norm, real_prec &_out_x_j, real_prec &_out_y_j,
+//                                               real_prec &_out_z_j) {
+//  real_prec out_x_j = 0., out_y_j = 0., out_z_j = 0.; // to avoid having to add to out_#[j]'s inside the loop, which is expensive, because these arrays are not at all contiguous => factor 0.82 timesaving
+//  int ix_pad = ix + padding;
+//  int iy_pad = iy + padding;
+//  int iz_pad = iz + padding;
+//  // Note: don't use unsigned int as index below! Slower than signed. See:
+//  // http://stackoverflow.com/a/2044021/1199693.
+//  // Can't seem to be able to remove warning (unsigned vs signed comparison).
+//  // Tried converting the ij.size() to long and int, but is again slower...
+//  for (vector< pair<int, int> >::size_type ij_ix = 0;
+//       ij_ix < ij.size(); ++ij_ix) {
+//    int i1 = ij[ij_ix].first;
+//    int i2 = ij[ij_ix].second;
+//    int kx = ix_pad + i1;
+//    int ky = iy_pad + i2;
+//    // Cell position (relative to the central cell):
+//    real_prec diff_x_h = dpcx_h - static_cast<real_prec>(i1)*d1_h;
+//    real_prec diff_y_h = dpcy_h - static_cast<real_prec>(i2)*d2_h;
+//    // magic
+//    ULONG index_xy_part = N3pad*(ky + N2pad*kx);
+//    int kz_begin = k_begin[ij_ix];
+//    int kz_last = k_last[ij_ix];
+//    ULONG index = kz_begin + index_xy_part + iz_pad;
+//    real_prec diff_z_h = dpcz_h - static_cast<real_prec>(kz_begin) * d3_h;
+//    // the actual loop
+//    for (int i3 = kz_begin; i3 <= kz_last; ++i3) {
+//      real_prec common_part = part_like_padded[index];
+//
+//      real_prec grad_kernel_x, grad_kernel_y, grad_kernel_z;
+//      // grad_SPH_kernel_3D(diff_x_h, diff_y_h, diff_z_h, particle_kernel_h,
+//      //                    h_sq, h_inv, h_sq_inv, grad_SPH_kernel_norm,
+//      //                    grad_kernel_x, grad_kernel_y, grad_kernel_z);
+//      grad_SPH_kernel_3D_h_units(diff_x_h, diff_y_h, diff_z_h, grad_SPH_kernel_norm,
+//                                 grad_kernel_x, grad_kernel_y, grad_kernel_z);
+//
+//      out_x_j += common_part * grad_kernel_x;
+//      out_y_j += common_part * grad_kernel_y;
+//      out_z_j += common_part * grad_kernel_z;
+//
+//      // for next iteration:
+//      ++index;
+//      diff_z_h -= d3_h;
+//    }
+//  }
+//  _out_x_j = out_x_j;
+//  _out_y_j = out_y_j;
+//  _out_z_j = out_z_j;
+//}
+
+void pad_array_pacman(real_prec *input, unsigned int N1_in, real_prec *out,
+                      unsigned int padding) {
+  unsigned N1_out = N1_in + 2*padding;
+  for (unsigned io = 0; io < N1_out; ++io)
+    for (unsigned jo = 0; jo < N1_out; ++jo)
+      for (unsigned ko = 0; ko < N1_out; ++ko) {
+        ULONG ix_out = ko + N1_out*(jo + static_cast<ULONG>(N1_out)*io);
+        unsigned ii = static_cast<unsigned>(static_cast<int>(io + N1_in) - static_cast<int>(padding)) % N1_in;
+        unsigned ji = static_cast<unsigned>(static_cast<int>(jo + N1_in) - static_cast<int>(padding)) % N1_in;
+        unsigned ki = static_cast<unsigned>(static_cast<int>(ko + N1_in) - static_cast<int>(padding)) % N1_in;
+        ULONG ix_in = ki + N1_in*(ji + static_cast<ULONG>(N1_in)*ii);
+        out[ix_out] = input[ix_in];
+      }
+}
+
+
+
+// TODO: COMPARE THE OUTPUT OF THIS FUNCTION FROM AFTER 6 JUNE 2017 TO VERSION FROM BEFORE!!!
+// TODO: COMPARE THE OUTPUT OF THIS FUNCTION FROM AFTER 6 JUNE 2017 TO VERSION FROM BEFORE!!!
+// TODO: COMPARE THE OUTPUT OF THIS FUNCTION FROM AFTER 6 JUNE 2017 TO VERSION FROM BEFORE!!!
+// TODO: COMPARE THE OUTPUT OF THIS FUNCTION FROM AFTER 6 JUNE 2017 TO VERSION FROM BEFORE!!!
+// TODO: COMPARE THE OUTPUT OF THIS FUNCTION FROM AFTER 6 JUNE 2017 TO VERSION FROM BEFORE!!!
+// TODO: COMPARE THE OUTPUT OF THIS FUNCTION FROM AFTER 6 JUNE 2017 TO VERSION FROM BEFORE!!!
+// TODO: COMPARE THE OUTPUT OF THIS FUNCTION FROM AFTER 6 JUNE 2017 TO VERSION FROM BEFORE!!!
+// TODO: also pad_array_pacman
+
+void likelihood_calc_V_SPH(struct HAMIL_DATA *hd, real_prec *part_like, real_prec *posx, real_prec *posy, real_prec *posz, real_prec *out_x, real_prec *out_y, real_prec *out_z)
+{
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+
+  // int N_cells = SPH_kernel_3D_cells_count(hd);
+  // // const int kernel_cells_i[N_cells], kernel_cells_j[N_cells], kernel_cells_k[N_cells];
+  // vector<int> kernel_cells_i, kernel_cells_j, kernel_cells_k;
+  // SPH_kernel_3D_cells(hd, kernel_cells_i, kernel_cells_j, kernel_cells_k);
+  const vector<int> kernel_cells_i = hd->kernel_cells_i;
+  const vector<int> kernel_cells_j = hd->kernel_cells_j;
+  const vector<int> kernel_cells_k = hd->kernel_cells_k;
+  // const int N_cells = hd->N_cells;
+
+  std::vector< std::pair<int, int> > ij;
+  std::vector<int> k_begin, k_last;
+  SPH_kernel_3D_cells_hull_1(kernel_cells_i, kernel_cells_j, kernel_cells_k,
+                             ij, k_begin, k_last);
+
+  unsigned padding = static_cast<unsigned>(*max_element(kernel_cells_i.begin(), kernel_cells_i.end()));
+  fftw_array<real_prec> part_like_padded((n->N1+2*padding)*(n->N2+2*padding)*
+                                         (n->N3+2*padding));
+  pad_array_pacman(part_like, n->N1, part_like_padded, padding);
+
+  // Normalization given that we want the integral over density to be V (left-
+  // hand side) and the integral over all particle kernels to be N (right-hand
+  // side).
+  // Basically, this is the mass of all particles given that we want the
+  // density to be rho_c.
+  real_prec normalize = hd->rho_c * n->L1*n->L2*n->L3/static_cast<real_prec>(n->N1*n->N2*n->N3);
+
+  real_prec f1;
+  bool rsd_model = hd->rsd_model;
+  bool planepar = n->planepar;
+  if (rsd_model)
+    f1 = fgrow(hd->ascale, hd->OM, hd->OL, 1); // first order growth factor (Zel'dovich only!)
+
+  real_prec h = n->particle_kernel_h;
+  real_prec h_sq = h*h;
+  real_prec h_inv = 1. / h;
+  // real_prec h_sq_inv = 1. / h_sq;
+  real_prec grad_SPH_kernel_norm = 1. / (M_PI*h_sq*h_sq);
+  unsigned N2 = n->N2, N3 = n->N3;
+  unsigned N3pad = N3 + 2*padding;
+  unsigned N2pad = N2 + 2*padding;
+  real_prec d1 = n->d1, d2 = n->d2, d3 = n->d3;
+  real_prec d1_h = d1 * h_inv, d2_h = d2 * h_inv, d3_h = d3 * h_inv;
+
+  // Initialize this thing outside the for-loop for performance
+  // std::vector<real_prec> grad_kernel(3);
+  // Firstprivate below initializes grad_kernel in each thread to a copy
+  // of the value outside the loop. Private only initializes an empty
+  // vector, which then does not have the correct length.
+  #ifdef MULTITHREAD
+  // #pragma omp parallel for firstprivate(grad_kernel)
+  #pragma omp parallel for
+  #endif // MULTITHREAD 
+  for (ULONG j = 0; j < n->N; j++)
+  {
+    // Load particle position
+    real_prec px = posx[j], py = posy[j], pz=posz[j];
+    // Determine central cell index where particle resides
+    // TODO: this calculation will go wrong when posx,y,z are not already put
+    // in periodic box coordinates; might be negative then.
+    // Addition 6 Jun 2017: TODO: guarantee that positions are in periodic box coords by making special type for that.
+    int ix = static_cast<int>(px/d1);
+    int iy = static_cast<int>(py/d2);
+    int iz = static_cast<int>(pz/d3);
+    // Central cell position:
+    // real_prec ccx = (static_cast<real_prec>(ix) + 0.5)*d1;
+    // real_prec ccy = (static_cast<real_prec>(iy) + 0.5)*d2;
+    // real_prec ccz = (static_cast<real_prec>(iz) + 0.5)*d3;
+    real_prec ccx_h = (static_cast<real_prec>(ix) + 0.5)*d1_h;
+    real_prec ccy_h = (static_cast<real_prec>(iy) + 0.5)*d2_h;
+    real_prec ccz_h = (static_cast<real_prec>(iz) + 0.5)*d3_h;
+    // Optimization: precalculate diff. pos and cell
+    // real_prec dpcx = px - ccx;
+    // real_prec dpcy = py - ccy;
+    // real_prec dpcz = pz - ccz;
+    real_prec dpcx_h = px * h_inv - ccx_h;
+    real_prec dpcy_h = py * h_inv - ccy_h;
+    real_prec dpcz_h = pz * h_inv - ccz_h;
+
+    real_prec out_x_j, out_y_j, out_z_j;
+
+    // 6 Jun 2017: don't convert ix/y/z to unsigned above, since conversion between uint and double (for ccx_h) is
+    //             slower than between int and double!
+    _likelihood_calc_V_SPH_kernel_loop_h_units(N2pad, N3pad, d1_h, d2_h, d3_h, ij, k_begin, k_last,
+                                               static_cast<unsigned>(ix), static_cast<unsigned>(iy),
+                                               static_cast<unsigned>(iz), dpcx_h, dpcy_h, dpcz_h, part_like_padded,
+                                               padding, grad_SPH_kernel_norm, out_x_j, out_y_j, out_z_j);
+
+    out_x[j] = normalize * out_x_j;
+    out_y[j] = normalize * out_y_j;
+    out_z[j] = normalize * out_z_j;
+
+    if (rsd_model) {
+      if (!planepar) {
+        throw BarcodeException("Non-plane-parallel RSD model is not yet implemented in calc_V! Use planepar = true.");
+      } else {
+        out_z[j] += f1 * out_z[j];
+      }
+    }
+  }
+}
+
+
+
+
+// new Fourier based version with TSC interpolation
+void likelihood_calc_V_SPH_fourier_TSC(struct HAMIL_DATA *hd, real_prec *part_like, real_prec *out_x, real_prec *out_y,
+                                       real_prec *out_z) {
+  // Works fastest if part_like == n->R2Cplan->R.
+  // NOTE: n->R2Cplan->R is used in calc_h_SPH for posx as well, so part_like
+  //       will be destroyed if it is also n->R2Cplan->R!
+
+  // 0. initialization
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+  real_prec h = n->particle_kernel_h;
+
+  // kernel normalization
+  real_prec norm_kernel = 24./(h*h*h);
+
+  // Normalization given that we want the integral over density to be V (left-
+  // hand side) and the integral over all particle kernels to be N (right-hand
+  // side).
+  // Basically, this is the mass of all particles given that we want the
+  // density to be rho_c.
+  real_prec norm_density = hd->rho_c * n->L1*n->L2*n->L3/static_cast<real_prec>(n->N1*n->N2*n->N3);
+
+  real_prec norm = norm_kernel * norm_density;
+
+  // 1. calculate convolution of part_like and kernel
+  // 1.a. calculate fourier transform of part_like
+  // fftw_array<complex_prec> part_like_F(n->Nhalf), conv_x_F(n->Nhalf),
+  //                          conv_y_F(n->Nhalf);
+  // fftw_array<complex_prec> conv_x_F(n->Nhalf), conv_y_F(n->Nhalf);
+  fftw_array<complex_prec> conv_y_F(n->Nhalf);  // use n->C2Rplan->C as conv_x_F
+
+  // fftR2C(n->N1, n->N2, n->N3, part_like, part_like_F);
+  fftR2Cplanned(part_like, n->R2Cplan->C, n->R2Cplan);  // use n->R2Cplan->C as part_like_F
+
+  real_prec L1 = n->L1, L2 = n->L2, L3 = n->L3;
+  unsigned N1 = n->N1, N2 = n->N2, N3 = n->N3;
+  unsigned N3half = N3/2 + 1;
+
+  #ifdef MULTITHREAD
+  #pragma omp parallel for //schedule(static,chunk) //collapse(3)
+  #endif // MULTITHREAD
+  for (unsigned i = 0; i < N1; ++i) {
+    real_prec kx = calc_kx(i, L1, N1);
+    for (unsigned j = 0; j < N2; ++j) {
+      real_prec ky = calc_ky(j, L2, N2);
+      for (unsigned k = 0; k < N3half; ++k) {
+        real_prec kz = calc_kz(k, L3, N3);
+        real_prec k_sq = kx*kx + ky*ky + kz*kz;
+        // 1.b. calculate fourier transform of SPH kernel (it's real, so don't
+        //      need complex number)
+        real_prec SPH_kernel_F;
+        if (k_sq == 0.) {
+          SPH_kernel_F = 1./(h*h*h);
+        } else {
+          real_prec kk = sqrt(k_sq);
+          real_prec ksink = kk * sin(kk);
+
+          SPH_kernel_F = norm * (3 + cos(2*kk) - ksink + cos(kk) * (ksink - 4))
+                         / (k_sq*k_sq*k_sq);
+        }
+
+        // 1.c. multiply fourier transforms of part_like and SPH kernel and take
+        //      derivative (multiply by ik)
+        // Also multiply with h to correct for wrong coordinate in derivative
+        // (x(q_i) instead of q=(x(q_i)-x)/h which is the Fourier dual of k)
+        ULONG ix = k + N3half*(j + static_cast<ULONG>(N2)*i);
+        // x
+        // re(conv_x_F[ix]) = h * kx * -im(part_like_F[ix]) * SPH_kernel_F;
+        // im(conv_x_F[ix]) = h * kx *  re(part_like_F[ix]) * SPH_kernel_F;
+        re(n->C2Rplan->C[ix]) = h * kx * -im(n->R2Cplan->C[ix]) * SPH_kernel_F;
+        im(n->C2Rplan->C[ix]) = h * kx *  re(n->R2Cplan->C[ix]) * SPH_kernel_F;
+        // y
+        // re(conv_y_F[ix]) = h * ky * -im(part_like_F[ix]) * SPH_kernel_F;
+        // im(conv_y_F[ix]) = h * ky *  re(part_like_F[ix]) * SPH_kernel_F;
+        re(conv_y_F[ix]) = h * ky * -im(n->R2Cplan->C[ix]) * SPH_kernel_F;
+        im(conv_y_F[ix]) = h * ky *  re(n->R2Cplan->C[ix]) * SPH_kernel_F;
+        // z -> part_like_C itself, to save memory
+        // real_prec dummy = re(part_like_F[ix]);
+        // re(part_like_F[ix]) = h * kz * -im(part_like_F[ix]) * SPH_kernel_F;
+        real_prec dummy = re(n->R2Cplan->C[ix]);
+        re(n->R2Cplan->C[ix]) = h * kz * -im(n->R2Cplan->C[ix]) * SPH_kernel_F;
+        im(n->R2Cplan->C[ix]) = h * kz * dummy                  * SPH_kernel_F;
+      }
+    }
+  }
+
+  ULONG N_part = n->N;
+
+  // 1.d. transform back to real space => convolution done
+  // fftC2R(n->N1, n->N2, n->N3, conv_x_F, out_z);  // out_z is dummy
+  fftC2Rplanned(n->C2Rplan->C, n->C2Rplan->R, n->C2Rplan);  // use n->C2Rplan->R as dummy
+  // 2. interpolate convolution of part_like and kernel to particle positions
+  // interpolate_TSC(n->N1, n->N2, n->N3, n->L1, n->L2, n->L3, n->d1, n->d2, n->d3,
+  //                 hd->posx, hd->posy, hd->posz, out_z, N_part, out_x);
+  interpolate_TSC(n->N1, n->N2, n->N3, n->d1, n->d2, n->d3, hd->posx, hd->posy, hd->posz, n->C2Rplan->R, N_part, out_x);
+
+  // fftC2R(n->N1, n->N2, n->N3, conv_y_F, out_z);  // again out_z dummy
+  fftC2Rplanned(conv_y_F, n->C2Rplan->R, n->C2Rplan);  // again n->C2Rplan->R as dummy
+  // interpolate_TSC(n->N1, n->N2, n->N3, n->L1, n->L2, n->L3, n->d1, n->d2, n->d3,
+  //                 hd->posx, hd->posy, hd->posz, out_z, N_part, out_y);
+  interpolate_TSC(n->N1, n->N2, n->N3, n->d1, n->d2, n->d3, hd->posx, hd->posy, hd->posz, n->C2Rplan->R, N_part, out_y);
+
+  // fftw_array<real_prec> conv_x(n->N), conv_y(n->N);  // multi version
+  // fftC2R(n->N1, n->N2, n->N3, conv_x_F, conv_x);     // multi version
+  // fftC2R(n->N1, n->N2, n->N3, conv_y_F, conv_y);     // multi version
+  // here part_like_F is still dummy and we also use part_like as dummy
+  // fftC2R(n->N1, n->N2, n->N3, part_like_F, part_like);  // ALSO for multi!
+  // fftC2R(n->N1, n->N2, n->N3, n->R2Cplan->C, part_like);  // ALSO for multi!
+  fftC2Rplanned(n->R2Cplan->C, n->C2Rplan->R, n->C2Rplan);  // again n->C2Rplan->R as dummy  // ALSO for multi!
+  // interpolate_TSC(n->N1, n->N2, n->N3, n->L1, n->L2, n->L3, n->d1, n->d2, n->d3,
+  //                 hd->posx, hd->posy, hd->posz, part_like, N_part, out_z);
+  interpolate_TSC(n->N1, n->N2, n->N3, n->d1, n->d2, n->d3, hd->posx, hd->posy, hd->posz, n->C2Rplan->R, N_part, out_z);
+
+  // real_prec *input_fields[3]  = {conv_x, conv_y, part_like}; // multi version
+  // real_prec *output_fields[3] = {out_x, out_y, out_z};       // multi version
+  // interpolate_TSC_multi(n->N1, n->N2, n->N3, n->L1, n->L2, n->L3, n->d1,
+  //                       n->d2, n->d3, hd->posx, hd->posy, hd->posz, N_part,
+  //                       input_fields, output_fields, 3);     // multi version
+
+  // RSD stuff
+  if (hd->rsd_model) {
+    if (!n->planepar) {
+      throw BarcodeException("Non-plane-parallel RSD model is not yet "
+                             "implemented in calc_V! Use planepar = true.");
+    } else {
+      // first order growth factor (Zel'dovich only!)
+      real_prec f1 = fgrow(hd->ascale, hd->OM, hd->OL, 1);
+      #ifdef MULTITHREAD
+      #pragma omp parallel for
+      #endif  // MULTITHREAD
+      for (unsigned int ix = 0; ix < n->N; ++ix) {
+        out_z[ix] += f1 * out_z[ix];
+      }
+    }
+  }
+
+}
+
+
+
+
+void likelihood_calc_h_SPH(struct HAMIL_DATA *hd, real_prec *deltaX, real_prec *out) {
+  // fastest if out == n->C2Rplan->R
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+
+  if (!(n->mk == 3)) {
+    throw BarcodeException("Must use SPH mass kernel (masskernel = 3) when "
+                           "using likelihood_calc_h_SPH (calc_h = 2 or 3)!");
+  }
+
+  bool rfft = true;
+
+  // fftw_array<real_prec> part_like(n->N), V_x(n->N), V_y(n->N), V_z(n->N);
+  // fftw_array<real_prec> V_x(n->N), V_y(n->N), V_z(n->N);
+  fftw_array<real_prec> V_y(n->N), V_z(n->N);
+  // hd->partial_f_delta_x_log_like(hd, deltaX, part_like);
+  hd->partial_f_delta_x_log_like(hd, deltaX, n->R2Cplan->R);
+
+  // The first fft below this switch works fastest if V_x == n->R2Cplan->R.
+  // This is allowed in likelihood_calc_V_SPH_fourier_TSC, but it overwrites
+  // part_like, because we also use n->R2Cplan->R for that!
+  switch (n->calc_h) {
+    case 2:
+      // likelihood_calc_V_SPH(hd, part_like, hd->posx, hd->posy, hd->posz,
+      //                       V_x, V_y, V_z);
+      likelihood_calc_V_SPH(hd, n->R2Cplan->R, hd->posx, hd->posy, hd->posz,
+                            n->R2Cplan->R, V_y, V_z);
+      break;
+    case 3:
+      // likelihood_calc_V_SPH_fourier_TSC(hd, part_like, hd->posx, hd->posy,
+      //                                   hd->posz, V_x, V_y, V_z);
+      likelihood_calc_V_SPH_fourier_TSC(hd, n->R2Cplan->R, n->R2Cplan->R, V_y, V_z);
+      break;
+  }
+
+  // fftw_array<complex_prec> outC(n->Nhalf), dummyC(n->Nhalf);
+
+  // use outC as dummy in first run, saves one addition and one fillZero
+  // fftR2C(n->N1, n->N2, n->N3, V_x, outC);
+  fftR2Cplanned(n->R2Cplan->R, n->C2Rplan->C, n->R2Cplan);  // use n->C2Rplan->C as outC
+
+  // grad_inv_lap_FS(n->N1,n->N2,n->N3, n->L1,n->L2,n->L3, outC, outC, 1, rfft); // multiply by -ik_i/k^2
+  grad_inv_lap_FS(n->N1,n->N2,n->N3, n->L1,n->L2,n->L3, n->C2Rplan->C, n->C2Rplan->C, 1, rfft); // multiply by -ik_i/k^2
+
+  // fftR2C(n->N1, n->N2, n->N3, V_y, dummyC);
+  fftR2Cplanned(V_y, n->R2Cplan->C, n->R2Cplan);  // use n->R2Cplan->C as dummyC
+  // grad_inv_lap_FS(n->N1,n->N2,n->N3, n->L1,n->L2,n->L3, dummyC, dummyC, 2, rfft); // multiply by -ik_i/k^2
+  grad_inv_lap_FS(n->N1,n->N2,n->N3, n->L1,n->L2,n->L3, n->R2Cplan->C, n->R2Cplan->C, 2, rfft); // multiply by -ik_i/k^2
+  // add_to_array(dummyC, outC, n->Nhalf); // sum the resulting h(k) term to outC
+  add_to_array(n->R2Cplan->C, n->C2Rplan->C, n->Nhalf); // sum the resulting h(k) term to outC
+
+  // fftR2C(n->N1, n->N2, n->N3, V_z, dummyC);
+  fftR2Cplanned(V_z, n->R2Cplan->C, n->R2Cplan);  // use n->R2Cplan->C as dummyC
+  // grad_inv_lap_FS(n->N1,n->N2,n->N3, n->L1,n->L2,n->L3, dummyC, dummyC, 3, rfft); // multiply by -ik_i/k^2
+  grad_inv_lap_FS(n->N1,n->N2,n->N3, n->L1,n->L2,n->L3, n->R2Cplan->C, n->R2Cplan->C, 3, rfft); // multiply by -ik_i/k^2
+  // add_to_array(dummyC, outC, n->Nhalf); // sum the resulting h(k) term to outC
+  add_to_array(n->R2Cplan->C, n->C2Rplan->C, n->Nhalf); // sum the resulting h(k) term to outC
+
+  // transform h(k) to real space -> h(q)
+  // fftC2R(n->N1, n->N2, n->N3, outC, out);
+  fftC2Rplanned(n->C2Rplan->C, out, n->C2Rplan);  // use n->R2Cplan->C as dummyC
+}
+// end SPH method stuff //
+
+
+// Model: Gaussian likelihood (or any other that is dependent on delta_q only through delta_x) //
+void likelihood_grad_log_like(struct HAMIL_DATA *hd, real_prec *delta, real_prec *out)
+{
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+  // fftw_array<real_prec> dummy(n->N);  // use n->C2Rplan->R as dummy
+  
+  // single out growing mode (Peebles) -> n->C2Rplan->R (dummy)
+  if (n->deltaQ_factor != 1.) {
+    multiply_factor_array(n->deltaQ_factor, delta, n->C2Rplan->R, n->N);
+  } else {
+    copyArray(delta, n->C2Rplan->R, n->N);
+  }
+
+  // calculate deltaX and pos
+  {
+    unsigned facL=1;
+    bool reggrid=true;
+    gsl_rng *seed = nullptr; // empty: reggrid is true anyway
+    real_prec kernel_scale = SPH_kernel_scale(hd);
+    if (hd->rsd_model)
+      Lag2Eul_rsd_zeldovich(n->C2Rplan->R, hd->deltaX, hd->posx, hd->posy, hd->posz, n->N1, n->N2, n->N3, n->L1, n->L2,
+                            n->L3, n->d1, n->d2, n->d3, n->min1, n->min2, n->min3, hd->D1, hd->ascale, hd->OM, hd->OL,
+                            n->mk, facL,
+                            reggrid, seed, kernel_scale, n->xobs, n->yobs, n->zobs, n->planepar, n->periodic,
+                            n->R2Cplan, n->C2Rplan);
+    else
+      Lag2Eul(n->C2Rplan->R, hd->deltaX, hd->posx, hd->posy, hd->posz, n->N1, n->N2, n->N3, n->L1, n->L2, n->L3, n->d1,
+              n->d2, n->d3, n->min1, n->min2, n->min3, hd->D1, hd->D2, hd->ascale, hd->OM, hd->OL, hd->sfmodel, n->mk, n->kth,
+              facL,
+              reggrid, seed, "", kernel_scale, n->R2Cplan, n->C2Rplan);
+  }
+
+  // h -> n->C2Rplan->R (dummy)
+  switch (n->calc_h)
+  {
+    case 0:
+      likelihood_calc_h(hd, hd->deltaX, n->C2Rplan->R);
+      break;
+    case 1:
+      hd->partial_f_delta_x_log_like(hd, hd->deltaX, n->C2Rplan->R);
+      break;
+    case 2:
+    case 3:
+      likelihood_calc_h_SPH(hd, hd->deltaX, n->C2Rplan->R);
+      break;
+  }
+
+  // now for some normalization terms
+  real_prec norm = 1.;
+
+  // ************************* WARNING **************************
+  // No longer using heuristic factor, it was caused by a mismatch between mass
+  // kernel used for density estimation and the one used for calc_h! As long as
+  // the two match, the correspondence between fin. diff. and calculated h is
+  // nearly perfect!
+  // ************************* WARNING **************************
+
+  // heuristically determined correction factor
+  // EGP: I have no idea where this came from, but it seems necessary. See tests
+  //      in ipynb "unittest - calc_V_fourier"
+  //      NOTE: I'm using n->d1 now; this is of course only correct if all d's
+  //            are the same!
+  // real_prec heuristic_correction = 1.;
+  // switch (n->calc_h)
+  // {
+  //   case 2:
+  //     heuristic_correction = 0.52/0.33;
+  //     break;
+  //   case 3:
+  //     if (n->d1 != n->d2 || n->d2 != n->d3) {
+  //       throw BarcodeException("likelihood_grad_log_like: d1, d2 and d3 are not"
+  //                              " equal, so heuristic_correction is unknown! "
+  //                              "Aborting.");
+  //     } else {
+  //       heuristic_correction = 0.52/0.31/pow(n->d1, 1.04);
+  //     }
+  //     break;
+  // }
+  // norm *= heuristic_correction;
+
+  // ************************* WARNING ************************** (see above)
+
+  // EGP: for Zel'dovich, -h is the entire term. For other models more has to be done.
+  // FIXME: hier moet nog een factor D1 bij als het signaal alleen delta^(1) is
+  real_prec zeldovich_norm = -1.;  // EGP: Zel'dovich: -gradLogLike = -h
+  norm *= zeldovich_norm;
+
+  // TODO: check dit theoretisch!
+  norm *= n->deltaQ_factor;
+
+  if (n->correct_delta) {
+    norm *= hd->D1;
+  }
+
+  multiply_factor_array(norm, n->C2Rplan->R, out, n->N);
+}
+void likelihood_grad_log_like_old(struct HAMIL_DATA *hd, real_prec *delta, real_prec *out)
+{
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+  fftw_array<real_prec> dummy_h(n->N);
+  
+  // calculate deltaX and pos
+  {
+    unsigned facL=1;
+    bool reggrid=true;
+    gsl_rng *seed = nullptr; // empty: reggrid is true anyway
+    real_prec kernel_scale = SPH_kernel_scale(hd);
+    Lag2Eul(delta, hd->deltaX, hd->posx, hd->posy, hd->posz, n->N1, n->N2, n->N3, n->L1, n->L2, n->L3, n->d1, n->d2,
+            n->d3, n->min1, n->min2, n->min3, hd->D1, hd->D2, hd->ascale, hd->OM, hd->OL, hd->sfmodel, n->mk, n->kth, facL,
+            reggrid, seed, "", kernel_scale, n->R2Cplan, n->C2Rplan);
+  }
+
+  likelihood_calc_h_old(hd, hd->deltaX, dummy_h);
+
+  // EGP: for Zel'dovich, -h is the entire term. For other models more has to be done.
+  multiply_factor_array(-1., dummy_h, out, n->N); // EGP: Zel'dovich: -gradLogLike = -h
+}
+
+
+//void prior_gaussian_grad_log_prior(struct HAMIL_DATA *hd, real_prec *signal, real_prec *out)
+//{
+  //// dPsiP/ds = S^-1 * s
+  //convolveInvCorrFuncWithSignal(hd, signal, out, hd->signal_PS);
+//}
+void prior_gaussian_grad_log_prior(struct HAMIL_DATA *hd, real_prec *signal, real_prec *out)
+{
+  //struct HAMIL_NUMERICAL *n = hd->numerical;
+  //fftw_array<real_prec> delta_growing(n->N);
+
+  //// single out growing mode (Peebles) -> delta_growing
+  //multiply_factor_array(n->deltaQ_factor, signal, delta_growing, n->N);
+  // dPsiP/ds = S^-1 * s
+  //convolveInvCorrFuncWithSignal(hd, delta_growing, out, hd->signal_PS);
+  convolveInvCorrFuncWithSignal(hd, signal, out, hd->signal_PS);
+}
+
+//real_prec prior_gaussian_log_prior(struct HAMIL_DATA *hd, real_prec *signal)
+//{
+  //struct HAMIL_NUMERICAL *n = hd->numerical;
+  //fftw_array<real_prec> dummy(n->N);
+
+  //convolveInvCorrFuncWithSignal(hd, signal, dummy, hd->signal_PS);
+
+  //// Psi_prior = 1/2 [s_k] * IFT[ 1/P*FT[s_k] ]
+
+  //real_prec psi_prior=0.;
+//#ifdef MULTITHREAD
+//#pragma omp parallel for reduction(+:psi_prior)
+//#endif // MULTITHREAD 
+  //for(ULONG i=0;i<n->N;i++)
+    //psi_prior += num_0_5 * signal[i] * dummy[i];
+
+  //return(psi_prior);
+//}
+real_prec prior_gaussian_log_prior(struct HAMIL_DATA *hd, real_prec *signal)
+{
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+  fftw_array<real_prec> dummy(n->N);
+  //fftw_array<real_prec> dummy(n->N), delta_growing(n->N);
+
+  //// single out growing mode (Peebles) -> delta_growing
+  //multiply_factor_array(n->deltaQ_factor, signal, delta_growing, n->N);
+
+  //convolveInvCorrFuncWithSignal(hd, delta_growing, dummy, hd->signal_PS);
+  convolveInvCorrFuncWithSignal(hd, signal, dummy, hd->signal_PS);
+
+  // Psi_prior = 1/2 [s_k] * IFT[ 1/P*FT[s_k] ]
+
+  real_prec psi_prior=0.;
+#ifdef MULTITHREAD
+#pragma omp parallel for reduction(+:psi_prior)
+#endif // MULTITHREAD 
+  for(ULONG i=0;i<n->N;i++)
+    psi_prior += num_0_5 * signal[i] * dummy[i];
+    //psi_prior += num_0_5 * delta_growing[i] * dummy[i];
+
+  return(psi_prior);
+}
diff --git a/HMC_models.h b/HMC_models.h
new file mode 100644
index 0000000..38ec585
--- /dev/null
+++ b/HMC_models.h
@@ -0,0 +1,103 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#pragma once
+#include "define_opt.h"
+#include "struct_hamil.h"
+#include <utility>  // std::pair
+#include <vector>
+
+// Prior //
+void prior_gaussian_grad_log_prior(struct HAMIL_DATA *hd, real_prec *signal, real_prec *out);
+real_prec prior_gaussian_log_prior(struct HAMIL_DATA *hd, real_prec *signal);
+
+
+// Likelihood //
+void grf_likelihood_grad_f_delta_x_comp(struct HAMIL_DATA *, real_prec *, real_prec *, unsigned int);
+void grf_likelihood_partial_f_delta_x_log_like(struct HAMIL_DATA *, real_prec *, real_prec *);
+real_prec grf_likelihood_log_like(struct HAMIL_DATA *hamil_data, real_prec *delta);
+
+void poissonian_likelihood_partial_f_delta_x_log_like(struct HAMIL_DATA *hamil_data, real_prec *deltaX, real_prec *dummy);
+real_prec poissonian_likelihood_log_like(struct HAMIL_DATA *hamil_data, real_prec *delta);
+void poissonian_likelihood_grad_f_delta_x_comp(struct HAMIL_DATA *hamil_data, real_prec *deltaX, real_prec *out,
+                                               unsigned int component);
+
+// old testing stuff (interpolate):
+void gaussian_likelihood_partial_f_delta_x_log_like_interpolate(struct HAMIL_DATA *hamil_data, real_prec *deltaX, real_prec *dummy);
+void gaussian_likelihood_grad_f_delta_x_comp_interpolate(struct HAMIL_DATA *hamil_data, real_prec *deltaX,
+                                                         real_prec *out, unsigned int component);
+
+void gaussian_likelihood_partial_f_delta_x_log_like(struct HAMIL_DATA *hd, real_prec *deltaX, real_prec *dummy);
+real_prec gaussian_likelihood_log_like(struct HAMIL_DATA *hamil_data, real_prec *delta);
+void gaussian_likelihood_grad_f_delta_x_comp(struct HAMIL_DATA *hd, real_prec *deltaX, real_prec *out,
+                                             unsigned int component);
+
+real_prec lognormal_likelihood_f_delta_x_i_calc(real_prec rho_c, real_prec delta_min, real_prec deltaX_i);
+void lognormal_likelihood_partial_f_delta_x_log_like(struct HAMIL_DATA *hamil_data, real_prec *deltaX, real_prec *dummy);
+real_prec lognormal_likelihood_log_like(struct HAMIL_DATA *hamil_data, real_prec *delta);
+void lognormal_likelihood_grad_f_delta_x_comp(struct HAMIL_DATA *hamil_data, real_prec *deltaX, real_prec *out,
+                                              unsigned int component);
+
+void likelihood_grad_log_like(struct HAMIL_DATA *hamil_data, real_prec *delta, real_prec *dummy);
+void likelihood_grad_log_like_old(struct HAMIL_DATA *hamil_data, real_prec *delta, real_prec *dummy);
+
+void likelihood_calc_h(struct HAMIL_DATA *hamil_data, real_prec *deltaX, real_prec *out);
+void likelihood_calc_h_old(struct HAMIL_DATA *hamil_data, real_prec *deltaX, real_prec *out);
+void likelihood_calc_h_simple(struct HAMIL_DATA *hamil_data, real_prec *deltaX, real_prec *out);
+
+// special for grf:
+void grf_likelihood_grad_log_like(struct HAMIL_DATA *hd, real_prec *delta, real_prec *out);
+
+real_prec gaussian_exponent_convolution(struct HAMIL_DATA *hamil_data, real_prec *signal, real_prec *var);//EGP,real_prec normFS);
+
+// SPH stuff
+//real_prec SPH_kernel_scale(struct HAMIL_DATA *hd);
+real_prec SPH_kernel_radius(struct HAMIL_DATA *hd);
+void grad_SPH_kernel_3D(real_prec x, real_prec y, real_prec z, real_prec h, std::vector<real_prec> &out);
+std::vector<int> SPH_kernel_3D_cells(struct HAMIL_DATA *hd);
+void likelihood_calc_V_SPH(struct HAMIL_DATA *hd, real_prec *part_like, real_prec *posx, real_prec *posy, real_prec *posz, real_prec *out_x, real_prec *out_y, real_prec *out_z);
+void likelihood_calc_h_SPH(struct HAMIL_DATA *hd, real_prec *deltaX, real_prec *out);
+
+// testing
+void SPH_kernel_3D_cells(struct HAMIL_DATA *hd, std::vector<int> &out_i,
+                         std::vector<int> &out_j, std::vector<int> &out_k);
+int SPH_kernel_3D_cells_count(struct HAMIL_DATA *hd);
+real_prec SPH_kernel_radius(int particle_kernel_type,
+                            real_prec particle_kernel_h);
+int SPH_kernel_3D_cells_count(int particle_kernel_type,
+                              real_prec particle_kernel_h,
+                              real_prec d1, real_prec d2, real_prec d3);
+void SPH_kernel_3D_cells(int particle_kernel_type, real_prec particle_kernel_h,
+                         real_prec d1, real_prec d2, real_prec d3,
+                         std::vector<int> &out_i,
+                         std::vector<int> &out_j, std::vector<int> &out_k);
+void SPH_kernel_3D_cells_hull_1(const std::vector<int> &i, const std::vector<int> &j, const std::vector<int> &k,
+                                std::vector< std::pair<int, int> > &ij_out,
+                                std::vector<int> &k_begin, std::vector<int> &k_last);
+void grad_SPH_kernel_3D(real_prec x, real_prec y, real_prec z, real_prec h_inv, real_prec h_sq_inv, real_prec norm,
+                        real_prec &out_x, real_prec &out_y, real_prec &out_z);
+
+
+void pad_array_pacman(real_prec *input, unsigned int N1_in, real_prec *out, unsigned int padding);
+
+
+template <class T = struct HAMIL_DATA>
+real_prec SPH_kernel_scale(T *d)
+{
+  real_prec scale;
+  switch (d->numerical->particle_kernel)
+  {
+    case 0: // SPH spline kernel
+      scale = d->numerical->particle_kernel_h;
+      break;
+    default:
+      scale = 0;
+  }
+  return(scale);
+}
diff --git a/HMC_momenta.cc b/HMC_momenta.cc
new file mode 100644
index 0000000..3769be4
--- /dev/null
+++ b/HMC_momenta.cc
@@ -0,0 +1,151 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#include <fftw3.h>
+#include <gsl/gsl_spline.h>
+#include <gsl/gsl_randist.h>
+#include <gsl/gsl_integration.h>
+
+#include <fstream>  //NOLINT
+
+#include "struct_main.h"
+#include "struct_hamil.h"
+#include "fftw_array.h"
+#include "math_funcs.h"
+#include "convenience.h"
+
+#include "debug.h"
+
+using namespace std;
+
+#ifdef MASKING
+bool masking2 = true;
+#else
+bool masking2 = false;
+#endif  // ifdef MASKING
+
+
+
+
+// Momenta: Gaussian momentum distribution //
+void draw_masked_momenta(struct HAMIL_DATA *hd, gsl_rng *seed,
+  real_prec *momenta);  // helper function defined below
+void draw_real_space_momenta(struct HAMIL_DATA *hd, gsl_rng *seed,
+  real_prec *momenta);  // helper function defined below
+void draw_momenta(struct HAMIL_DATA *hd, gsl_rng *seed, real_prec *momenta,
+  struct DATA *data) {
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+  // p ~ exp[-p^2/(2*M)]
+
+  if (n->mass_fs) {
+    create_GARFIELD(n->N1, n->N2, n->N3, n->L1, n->L2, n->L3, momenta,
+      hd->mass_f, seed);
+
+    // N.B.: als we masking weer mee willen nemen moeten we onderstaande functie
+    //       hebben
+    // draw_masked_momenta(hd, seed, momenta);
+  } else {
+    fillZero(momenta, n->N);  // prepare for addition to real part
+  }
+
+  if (n->mass_rs) {
+    fftw_array<real_prec> dummy(n->N);
+    draw_real_space_momenta(hd, seed, dummy);
+    add_to_array(dummy, momenta, n->N);
+  }
+
+#ifdef DEBUG
+  debug_array_statistics(momenta, n->N, "momenta");
+  debug_scalar_dump(momenta, n->N1, n->N2, n->N3, n->L1, n->L2, n->L3, n->N_bin,
+    data->numerical->dir + string("momenta"));
+#endif // DEBUG
+}
+
+void draw_real_space_momenta(struct HAMIL_DATA *hd, gsl_rng *seed,
+  real_prec *momenta) {
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+
+#ifdef MULTITHREAD_RNG
+#pragma omp parallel for
+#endif  // MULTITHREAD_RNG
+  for (unsigned i = 0 ; i < n->N1; i++)
+    for (unsigned j = 0 ; j < n->N2; j++)
+      for (unsigned k = 0 ; k < n->N3; k++) {
+        ULONG iind = k + n->N3 * (j + n->N2 * i);
+        real_prec sigma = sqrt(hd->mass_r[iind]);  // real_prec(N));  // care
+
+        momenta[iind] = static_cast<real_prec>(sigma *
+          static_cast<real_prec>(GR_NUM(seed, 1., 0)));
+      }
+}
+
+
+void draw_masked_momenta(struct HAMIL_DATA *hd, gsl_rng *seed,
+  real_prec *momenta) {
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+
+#ifdef MULTITHREAD_RNG
+#pragma omp parallel for
+#endif  // MULTITHREAD_RNG
+  for (ULONG i = 0; i < n->N; i++)
+    momenta[i] = static_cast<real_prec>(GR_NUM(seed, 1., 0));
+
+  // fftw_array<complex_prec> AUX(n->N), dummyC(n->N);
+  fftw_array<complex_prec> momentaC(n->Nhalf);
+
+  if (masking2) {
+    fftw_array<real_prec> dummy(n->N);
+    // X^{1/2}p
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif  // MULTITHREAD
+    for (ULONG i = 0; i < n->N; i++) {
+      real_prec corr = hd->corrf[i];
+      real_prec winprime = sqrt(num_1+corr);
+      dummy[i] = momenta[i]*winprime;
+    }
+    // complexify_array(dummy, dummyC, n->N);
+    // FFT3d(n->N1, n->N2, n->N3, true, dummyC, AUX);
+    fftR2C(n->N1, n->N2, n->N3, dummy, momentaC);
+  } else {
+    // complexify_array(momenta, dummyC, n->N);
+    // FFT3d(n->N1, n->N2, n->N3, true, dummyC, AUX);
+    fftR2C(n->N1, n->N2, n->N3, momenta, momentaC);
+  }
+
+  unsigned N3half = n->N3/2 + 1;
+
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif  // MULTITHREAD
+  for (unsigned i = 0; i < n->N1; i++)
+    for (unsigned j = 0; j < n->N2; j++)
+      for (unsigned k = 0; k < N3half; k++) {
+        ULONG ix = k + n->N3 * (j + n->N2 * i);
+        ULONG ix_C = k + N3half * (j + n->N2 * i);
+
+        real_prec mass = 0.0;
+#ifdef FOURIER_DEF_1
+        mass = hd->mass_f[ix];  // testing
+#endif
+#ifdef FOURIER_DEF_2
+        mass = hd->mass_f[ix] * static_cast<real_prec>(n->N);  // care
+        throw BarcodeException("Look into normalization of draw_masked_momenta! Seems wrong now.");
+#endif
+
+        real_prec sigma = sqrt(mass);
+
+        re(momentaC[ix_C]) *= sigma;
+        im(momentaC[ix_C]) *= sigma;
+      }
+
+  // FFT3d(n->N1, n->N2, n->N3, false, AUX, dummyC);
+  // real_part_array(dummyC, momenta, n->N);
+  fftC2R(n->N1, n->N2, n->N3, momentaC, momenta);
+}
diff --git a/HMC_momenta.h b/HMC_momenta.h
new file mode 100644
index 0000000..1bb9381
--- /dev/null
+++ b/HMC_momenta.h
@@ -0,0 +1,13 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#pragma once
+#include "struct_hamil.h"
+
+void draw_momenta(struct HAMIL_DATA *hamil_data, gsl_rng *seed, real_prec *momenta, struct DATA *data);
diff --git a/IOfunctions.cc b/IOfunctions.cc
new file mode 100644
index 0000000..68d8ae3
--- /dev/null
+++ b/IOfunctions.cc
@@ -0,0 +1,81 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#include "struct_main.h"
+
+#include <iomanip>
+#include <cassert>
+
+using namespace std;
+
+void dump_measured_spec(real_prec *x, real_prec *y, string outputFileName,
+  ULONG N_bin) {
+  ofstream outStream(outputFileName.data());
+  assert(outStream.is_open());
+
+#ifdef DEBUG
+  cout << " >>> dump power-spectrum in : " << outputFileName << " ...."
+    << endl << endl;
+#endif  // DEBUG
+
+  for (ULONG i = 0; i < N_bin; i++)
+    if (y[i] > 0. && x[i] > 0.)
+     outStream << x[i] << "   " << y[i] << endl;
+
+  outStream.close();
+}
+
+
+void dump_ps_it(struct DATA *data,real_prec *x,real_prec *y)
+{
+  //int N1=data->numerical->N1;
+  //int N2=data->numerical->N2;
+  //int N3=data->numerical->N3;
+
+  //ULONG N=N1*N2*N3;
+
+  //real_prec L1=data->numerical->L1;
+  //real_prec L2=data->numerical->L2;
+  //real_prec L3=data->numerical->L3;
+  
+  ULONG N_bin=data->numerical->N_bin;
+  
+  int bmax=100;
+  char buffer1[bmax];  	    
+  sprintf(buffer1,"it%lu.dat",data->numerical->iGibbs); // EGP: %d -> %lu
+	    
+  string outputFileName= data->numerical->dir + string("powSpec")+buffer1;	
+  ofstream outStream(outputFileName.data());
+  
+  assert(outStream.is_open());
+    
+  real_prec NORM=static_cast<real_prec>(1.0); // output normalisation
+  // EGP: this is now done in measure_spec already (EqSolvers.cc)
+
+//#ifdef	  FOURIER_DEF_1 
+  //NORM=static_cast<real_prec>(L1*L2*L3/4./M_PI); // output normalisation
+//#endif  
+//#ifdef	  FOURIER_DEF_2 
+  //NORM=static_cast<real_prec>(L1*L2*L3/real_prec(N)/real_prec(N)/4./M_PI); // output normalisation
+//#endif
+
+#ifdef DEBUG
+  cout << " >>> dump power-spectrum in : "<<outputFileName <<"...."<<endl<<endl;
+#endif // DEBUG
+  
+  for(ULONG i=0;i<N_bin;i++)
+    {
+      if(y[i]>0. && x[i]>0.)
+	{
+	  outStream << x[i] <<"   " << NORM*y[i]<<endl;
+	  
+	}
+    }
+  outStream.close();
+}
diff --git a/IOfunctions.h b/IOfunctions.h
new file mode 100644
index 0000000..9507e18
--- /dev/null
+++ b/IOfunctions.h
@@ -0,0 +1,15 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#pragma once
+
+void dump_ps_it(struct DATA *data,real_prec *x,real_prec *y);
+
+void dump_measured_spec(real_prec *x,real_prec *y,string outputFileName, ULONG N_bin);
+
diff --git a/IOfunctionsGen.cc b/IOfunctionsGen.cc
new file mode 100644
index 0000000..57159a0
--- /dev/null
+++ b/IOfunctionsGen.cc
@@ -0,0 +1,269 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#include "define_opt.h"
+#include "struct_main.h"
+
+#include "fftw_array.h"
+
+#include <iomanip>
+#include <cassert>
+#include <sstream>
+#include <netinet/in.h>
+
+#include <gsl/gsl_randist.h>
+
+#include "planck/bstream.h"
+#include "planck/paramfile.h"
+
+// for dump_deltas:
+#include "Lag2Eul.h"
+#include "HMC_models.h"
+
+#include <chrono> // for timestamp in quick_dump_scalar
+
+using namespace std;
+
+ULONG count_lines(string fname)
+{
+  ifstream inStream;
+  inStream.open(fname.data());
+  assert(inStream.is_open());
+  string line;
+  ULONG linenum = 0;
+  while (getline (inStream, line))
+  {
+    linenum++;     
+  }
+  inStream.close();
+
+  return(linenum);
+}
+
+
+void change_rm2cm (real_prec *A,float *B,int N1,int N2,int N3)
+{
+  /*
+     Change Row major to column major
+     */
+  for(int i=0;i<N1;i++)
+    for(int j=0;j<N2;j++)
+      for(int k=0;k<N3;k++)
+      {		
+        B[i+N1*(j+N2*k)]=float(A[k+N3*(j+N2*i)]);
+        //B[k+N3*(j+N2*i)]=A[k+N3*(j+N2*i)];
+      }
+}
+
+
+void calc_BoundingBox(real_prec *BBox, real_prec L1, real_prec L2, real_prec L3)
+{
+  // the length is calculated from Voxel center to Voxel center
+  BBox[0]=0.;
+  BBox[1]=L1;
+
+  BBox[2]=0.;
+  BBox[3]=L2;
+
+  BBox[4]=0.;
+  BBox[5]=L3;
+}
+
+
+
+void change_cm2rm (float *A,real_prec *B,int N1,int N2,int N3)
+{
+  /*
+     Change Row major to column major
+     */
+  int i_eff_cm=0;
+  int j_eff_cm=0;
+  int k_eff_cm=0;
+
+  int i_eff_rm=0;
+  int j_eff_rm=0;
+  int k_eff_rm=0;
+
+  for(int i=0;i<N1;i++)
+    for(int j=0;j<N2;j++)
+      for(int k=0;k<N3;k++)
+      {
+        i_eff_rm=N3*N2*i;
+        j_eff_rm=N3*j;
+        k_eff_rm=k;
+
+        i_eff_cm=i;
+        j_eff_cm=N1*j;
+        k_eff_cm=N2*N1*k;
+
+        B[i_eff_rm+j_eff_rm+k_eff_rm]=float (A[i_eff_cm+j_eff_cm+k_eff_cm]);
+      }
+}
+
+int get_scalar(string FNAME, real_prec *OUT, unsigned int N1, unsigned int N2, unsigned int N3) {
+  ULONG N=N1*N2*N3;	
+  fftw_array<real_prec> dummy(N);
+
+  string fname=FNAME+string(".dat");
+  // cout<<"... reading file "<<FNAME<<endl;
+  bifstream inStream(fname.data(),file_is_natural);
+  assert(inStream.is_open());
+  inStream.get(dummy.data,N);
+  inStream.close();
+
+  for(ULONG i=0;i<N;i++)
+    OUT[i]=dummy[i];
+
+  return 0;
+}
+
+
+void dump_scalar(real_prec *A_rm, unsigned int N1, unsigned int N2, unsigned int N3, string fname) {
+  ULONG N=N1*N2*N3;
+
+  fftw_array<real_prec> dummy(N);
+
+  for(ULONG i=0;i<N;i++)
+    dummy[i]=A_rm[i];
+
+  string FNAME=fname+string(".dat");
+#ifdef DEBUG
+  cout<<"... writing file "<<FNAME<<endl;
+#endif // DEBUG
+  bofstream outStream(FNAME.data(),file_is_natural);
+  assert(outStream.is_open());
+  outStream.put(dummy.data,N);
+  outStream.close();
+}
+
+// EGP: handy function for debugging, only for cubical boxes and only when not using amira mesh!
+// Note: N1 is the linear size.
+void quick_dump_scalar(real_prec *A_rm, unsigned int N1, string fname, unsigned int sample_number = 0,
+                       bool prepend_timestamp = true)
+{
+  stringstream filename;
+  if (prepend_timestamp)
+  {
+    chrono::milliseconds ms = chrono::duration_cast< chrono::milliseconds >(
+      chrono::high_resolution_clock::now().time_since_epoch()
+      );
+    filename << ms.count() << "_";
+  }
+  filename << fname;
+  if (sample_number)
+    filename << "_" << sample_number;
+
+  dump_scalar(A_rm, N1, N1, N1, filename.str());
+}
+
+// Dump all delta's
+void dump_deltas(struct DATA *data, real_prec *deltaLAG, real_prec *deltaS, string fname_append)
+{
+  // TODO: add appendage to signify exact Eulerian model (Zel'dovich, 2LPT, ALPT)
+
+  struct NUMERICAL *n = data->numerical;
+  struct COSMOLOGY *c = data->cosmology;
+
+  string fname = n->dir + string("deltaLAG") + fname_append;
+  dump_scalar(deltaLAG, n->N1, n->N2, n->N3, fname);
+
+  if (!n->rsd_model)
+  {
+    // without rsd_model, deltaS (S = q + Psi) is simply in Eulerian space
+    fname= n->dir + string("deltaEUL") + fname_append;
+    dump_scalar(deltaS, n->N1, n->N2, n->N3, fname);
+  } 
+  else
+  {
+    // The deltaS is now the redshift space density field!
+    fname = n->dir + string("deltaRSS") + fname_append;
+    dump_scalar(deltaS, n->N1, n->N2, n->N3, fname);
+
+    // Also output the real Eulerian density field
+    unsigned facL = 1;
+    bool reggrid = true;
+    gsl_rng *seed = nullptr; // empty: reggrid is true anyway
+    real_prec kernel_scale = SPH_kernel_scale<struct DATA>(data);
+    fftw_array<real_prec> deltaEUL(n->N), posx(n->N), posy(n->N), posz(n->N);
+    Lag2Eul(deltaLAG, deltaEUL, posx, posy, posz, n->N1, n->N2, n->N3, n->L1, n->L2, n->L3, n->d1, n->d2, n->d3,
+            n->xllc, n->yllc, n->zllc, c->D1, c->D2, c->ascale, c->omega_m, c->omega_q, n->sfmodel, n->mk, n->slength, facL,
+            reggrid, seed, "", kernel_scale, n->R2Cplan, n->C2Rplan);
+
+    fname = n->dir + string("deltaEUL") + fname_append;
+    dump_scalar(deltaEUL, n->N1, n->N2, n->N3, fname);
+  }
+}
+
+// EGP: another version, which only takes one length argument
+void read_array(string FNAME, real_prec *out, ULONG N)
+{
+  fftw_array<real_prec> dummy(N);
+
+  string fname=FNAME+string(".dat");
+  // cout<<"... reading file "<<FNAME<<endl;
+  
+  bifstream inStream(fname.data(),file_is_natural);
+  assert(inStream.is_open());
+  inStream.get(dummy.data,N);
+  inStream.close();
+
+  for(ULONG i=0;i<N;i++)
+    out[i]=dummy[i];
+}
+
+void read_array(string fname, real_prec *out, unsigned int N1, unsigned int N2, unsigned int N3)
+{
+  ULONG N=N1*N2*N3;
+  read_array(fname, out, N);
+}
+
+void write_array(string fname, real_prec *A_rm, ULONG N)
+{
+  fftw_array<real_prec> dummy(N);
+
+  for(ULONG i=0;i<N;i++)
+    dummy[i]=A_rm[i];
+
+  string FNAME=fname+string(".dat");
+#ifdef DEBUG
+  cout<<"... writing file "<<FNAME<<endl;
+#endif // DEBUG
+  bofstream outStream(FNAME.data(),file_is_natural);
+  assert(outStream.is_open());
+  outStream.put(dummy.data,N);
+  outStream.close();
+}
+
+void write_array(string fname, real_prec *A_rm, unsigned int N1, unsigned int N2, unsigned int N3)
+{
+  ULONG N=N1*N2*N3;
+  write_array(fname, A_rm, N);
+}
+
+
+void dump_signal_it(ULONG iGibbs, unsigned int N1, unsigned int N2, unsigned int N3, real_prec *signal, string filnam) {
+  ULONG N=N1*N2*N3;
+  fftw_array<real_prec> dummy(N);
+
+  for(ULONG i=0;i<N;i++)
+    dummy[i]=signal[i];
+
+  int bmax=100;
+  char buffer1[bmax];  	    
+  sprintf(buffer1,"_%lu.dat",iGibbs); // EGP: %d -> %lu
+
+  string FileName=filnam+buffer1;
+#ifdef DEBUG
+  cout<<"... writing file "<<FileName<<endl;
+#endif // DEBUG
+  bofstream outStream(FileName.data(),file_is_natural);
+  assert(outStream.is_open());
+  outStream.put(dummy.data,N);
+  outStream.close();
+}
diff --git a/IOfunctionsGen.h b/IOfunctionsGen.h
new file mode 100644
index 0000000..6ea7677
--- /dev/null
+++ b/IOfunctionsGen.h
@@ -0,0 +1,40 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#pragma once
+#include <string>
+#include "define_opt.h"
+#include "struct_main.h"
+
+using namespace std;
+
+ULONG count_lines(string fname);
+
+void change_rm2cm (real_prec *A,float *B,int N1,int N2,int N3);
+
+void change_cm2rm (float *A,real_prec *B,int N1,int N2,int N3);
+
+void calc_BoundingBox(real_prec *BBox, real_prec L1, real_prec L2, real_prec L3);
+
+void dump_scalar(real_prec *A_rm, unsigned int N1, unsigned int N2, unsigned int N3, string fname);
+void quick_dump_scalar(real_prec *A_rm, unsigned int N1, string fname, unsigned int sample_number = 0,
+                       bool prepend_timestamp = true);
+void dump_deltas(struct DATA *data, real_prec *deltaLAG, real_prec *deltaS, string fname_append);
+
+int get_scalar(string FNAME, real_prec *OUT, unsigned int N1, unsigned int N2, unsigned int N3);
+
+int read_amiramesh(char* filename, float* data, unsigned int* dims, float* bnds, float& time, bool readdata);
+
+void read_array(string FNAME, real_prec *out, ULONG N);
+void read_array(string fname, real_prec *out, unsigned int N1, unsigned int N2, unsigned int N3);
+
+void write_array(string fname, real_prec *A_rm, ULONG N);
+void write_array(string fname, real_prec *A_rm, unsigned int N1, unsigned int N2, unsigned int N3);
+
+void dump_signal_it(ULONG iGibbs, unsigned int N1, unsigned int N2, unsigned int N3, real_prec *signal, string filnam);
diff --git a/Lag2Eul.cc b/Lag2Eul.cc
new file mode 100644
index 0000000..e3b9ffa
--- /dev/null
+++ b/Lag2Eul.cc
@@ -0,0 +1,426 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#include "define_opt.h"
+#include "struct_main.h"
+#include "fftw_array.h"
+
+#include "planck/paramfile.h"
+
+#include "cosmo.h"
+#include "EqSolvers.h"
+#include "massFunctions.h"
+#include "math_funcs.h"
+#include "disp_part.h"
+
+#include "convenience.h"
+
+#include "rsd.h"
+
+using namespace std;
+
+// A few ideas for ordering input parameters of functions in such a way that
+// they aren't totally hidden away (like now with the data/hamil_data structs),
+// but aren't overly explicit (and long) either.
+
+struct GridSize {
+  ULONG N;
+  ULONG Nhalf;
+  int x;
+  int y;
+  int z;
+};
+
+// for total simulation volume, cell sizes, origin
+struct RealVec3 {
+  real_prec x;
+  real_prec y;
+  real_prec z;
+};
+
+struct Cosmology {
+  real_prec Omega_M;
+  real_prec Omega_L;
+  real_prec hconst;
+  real_prec D1;
+  real_prec D2;
+  real_prec scale;
+};
+
+// Really though, the in and outs should be DensityField objects that have
+// gridsize, boxsize, cellsize and origin builtin. That would really clean up
+// the mess of parameters in functions. Oh well.
+void Lag2Eul_zeldovich(const real_prec *in, real_prec *out, real_prec *posx,
+                       real_prec *posy, real_prec *posz,
+                       const GridSize &gridsize, const RealVec3 &boxsize,
+                       const RealVec3 &cellsize, const RealVec3 &origin,
+                       const Cosmology &cosmo, const int masskernel,
+                       const real_prec kth, const int facL, const bool reggrid,
+                       gsl_rng * gBaseRand, const real_prec kernel_scale_factor,
+                       plan_pkg *R2Cplan, plan_pkg *C2Rplan);
+
+
+void Lag2Eul_zeldovich(real_prec *in, real_prec *out, real_prec *posx, real_prec *posy, real_prec *posz,
+                       unsigned int N1, unsigned int N2,
+                       unsigned int N3, real_prec L1, real_prec L2, real_prec L3, real_prec d1, real_prec d2,
+                       real_prec d3,
+                       real_prec min1, real_prec min2, real_prec min3, real_prec D1, real_prec scale, real_prec Omega_M,
+                       real_prec Omega_L, int masskernel, unsigned int facL, bool reggrid, gsl_rng *gBaseRand,
+                       real_prec kernel_scale_factor, plan_pkg *R2Cplan, plan_pkg *C2Rplan)
+{
+  ULONG N=N1*N2*N3;
+
+  ULONG NL1=N1*facL, NL2=N2*facL, NL3=N3*facL;
+  ULONG NLL=NL1*NL2*NL3;
+
+  fftw_array<real_prec>  psix(N), psiy(N), psiz(N);
+  fftw_array<real_prec>  vex(N), vey(N), vez(N);
+
+  // compute components of Psi
+  // delta^(1) == input field (in)
+  // phi = -D1*delta^(1) -> out (used as dummy array)
+  multiply_factor_array(-D1, in, out, N);
+
+  // Psi components
+  bool zeropad = false;
+  bool norm = false;  // true -> velocities, false -> displacements
+  theta2vel(N1, N2, N3, L1, L2, L3, scale, Omega_M, Omega_L, out, psix, psiy, psiz, zeropad, norm, R2Cplan, C2Rplan);
+  // EGP: cellbound interpolates to cell boundaries (corners), but we want
+  // the values in the cell centers!
+  //cellboundcomp(N1,N2,N3,L1,L2,L3,psix);
+  //cellboundcomp(N1,N2,N3,L1,L2,L3,psiy);
+  //cellboundcomp(N1,N2,N3,L1,L2,L3,psiz);
+
+  // compute the resulting particle positions & velocities and density.
+  {
+    fftw_array<real_prec> dummyL(NLL);
+    fill_one(dummyL, N);
+    // FIXME
+    // Eigenlijk moet deze alleen 1 zijn als er een deeltje in zit,
+    // wat in disp_part wordt bepaald, maar alleen als de boel niet
+    // periodic is, dus for now (nu alles periodic is) kunnen we dit
+    // wel ff zo laten.
+    // Het stelt namelijk de particle mass voor en die is nul als het
+    // deeltje er niet meer bij zit.
+
+    disp_part(N1,N2,N3,L1,L2,L3,d1,d2,d3,posx,posy,posz,psix,psiy,psiz,dummyL,facL,reggrid,gBaseRand);
+
+    switch (masskernel)
+    {
+      case 0:
+        getDensity_NGP(N1,N2,N3,L1,L2,L3,d1,d2,d3,min1,min2,min3,posx,posy,posz,dummyL,NLL,out);
+        break;
+      case 1:
+        getDensity_CIC(N1,N2,N3,L1,L2,L3,d1,d2,d3,min1,min2,min3,posx,posy,posz,dummyL,NLL,out, true);
+        break;
+      case 2:
+        getDensity_TSC(N1,N2,N3,L1,L2,L3,d1,d2,d3,min1,min2,min3,posx,posy,posz,dummyL,NLL,out);
+        break;
+      case 3:
+        getDensity_SPH(N1, N2, N3, L1, L2, L3, d1, d2, d3, min1, min2, min3, posx, posy, posz, dummyL, NLL, out, true, kernel_scale_factor);
+        break;
+    }
+  }
+
+  overdens(N1,N2,N3,out,out);
+}
+
+
+// --------------------------------------------
+
+
+void Lag2Eul_non_zeldovich(real_prec *in, real_prec *dummy, real_prec *posx, real_prec *posy, real_prec *posz,
+                           unsigned int N1,
+                           unsigned int N2, unsigned int N3, real_prec L1, real_prec L2, real_prec L3, real_prec d1,
+                           real_prec d2,
+                           real_prec d3, real_prec min1, real_prec min2, real_prec min3, real_prec D1, real_prec D2,
+                           real_prec scale, real_prec Omega_M, real_prec Omega_L, int masskernel, real_prec kth,
+                           unsigned int facL, bool reggrid, gsl_rng *gBaseRand, string dir,
+                           real_prec kernel_scale_factor)
+{
+  real_prec kthsc=kth;
+#ifdef TRANSF
+  int ftype = 1;
+#endif // TRANSF
+#ifdef TRANSFSC
+  int ftype = 1;
+#endif // TRANSFSC
+
+  ULONG N=N1*N2*N3;
+
+  ULONG NL1=N1*facL, NL2=N2*facL, NL3=N3*facL;
+  ULONG NLL=NL1*NL2*NL3;
+
+
+  fftw_array<real_prec>  psix(N), psiy(N), psiz(N);
+  fftw_array<real_prec>  vex(N), vey(N), vez(N);
+
+  {
+    fftw_array<real_prec> dummy2(N), dummy3(N);
+
+    copyArray(in, dummy, N);
+
+    // delta(1)-> Phi(1)
+    PoissonSolver(N1,N2,N3,L1,L2,L3,dummy,dummy2);
+    // Phi(1)  -> delta(2)
+    calc_m2v_mem(N1, N2, N3, L1, dummy2, dummy3);
+
+#ifdef TRANSF
+    {
+      {
+        string fname= dir + string("auxtransfzeld");
+        get_scalar(fname,dummy2,N1,N2,N3);
+      }
+      convcompb(L1,L2,L3,d1,d2,d3,N1,N2,N3,dummy,dummy2,ftype);
+      copyArray(dummy2, dummy, N);
+    }
+    {
+      {
+        string fname= dir + string("auxtransf2lpt");
+        get_scalar(fname,dummy2,N1,N2,N3);
+      }
+      convcompb(L1,L2,L3,d1,d2,d3,N1,N2,N3,dummy3,dummy2,ftype);
+      copyArray(dummy2, dummy3, N);
+    }
+#endif /* TRANSF */
+
+    //div Psi^2LPT
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD
+    for(ULONG i=0;i<N;i++)
+      dummy2[i]=D1*dummy[i]-D2*dummy3[i];
+
+    //K o div Psi^2LPT
+    convcomp(N1, N2, N3, dummy2, dummy2, kth, dir);
+
+    /* Psi^tot=K o Psi^2LPT + Psi^SC - K o Psi^SC */
+
+    fftw_array<real_prec> dummy4(N);
+    //div Psi^SC
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD
+    for(ULONG i=0;i<N;i++)
+      {
+        real_prec psilin=-D1*dummy[i];
+        real_prec psisc=0.;
+
+        if (1.+2./3.*psilin > 0.)
+          psisc=static_cast<real_prec>(3.*(sqrt(1.+2./3.*psilin)-1.));
+        else
+          psisc=-3.;
+
+        psisc*=static_cast<real_prec>(-1.);
+
+        dummy4[i]=psisc;
+      }
+
+
+#ifdef TRANSFSC
+    fftw_array<real_prec> dummy2(N);
+    {
+      string fname= dir + string("auxtransfzeld");
+      get_scalar(fname,dummy,N1,N2,N3);
+    }
+    convcompb(L1,L2,L3,d1,d2,d3,N1,N2,N3,dummy4,dummy,ftype);
+    copyArray(dummy, dummy4, N);
+#endif
+
+    /* Psi^tot x-component */
+    //K o Psi^2LPT_x
+    theta2velcomp(N1, N2, N3, L1, L2, L3, scale, Omega_M, Omega_L, dummy2, dummy3, false, false, 1);
+   //Psi^SC_x
+    theta2velcomp(N1, N2, N3, L1, L2, L3, scale, Omega_M, Omega_L, dummy4, dummy, false, false, 1);
+    //K o Psi^2LPT_x + Psi^SC_x
+    add_to_array(dummy, dummy3, N);
+    //K o Psi^SC_x
+    convcomp(N1, N2, N3, dummy, dummy, kthsc, dir);
+    //K o Psi^2LPT_x + Psi^SC_x - K o Psi^SC_x
+    subtract_arrays(dummy3, dummy, psix, N);
+    cellboundcomp(N1, N2, N3, psix);
+
+    /* Psi^tot y-component */
+
+    //K o Psi^2LPT_y
+    theta2velcomp(N1, N2, N3, L1, L2, L3, scale, Omega_M, Omega_L, dummy2, dummy3, false, false, 2);
+    //Psi^SC_y
+    theta2velcomp(N1, N2, N3, L1, L2, L3, scale, Omega_M, Omega_L, dummy4, dummy, false, false, 2);
+    //K o Psi^2LPT_y + Psi^SC_y
+    add_to_array(dummy, dummy3, N);
+    //K o Psi^SC_y
+    convcomp(N1, N2, N3, dummy, dummy, kthsc, dir);
+    //K o Psi^2LPT_y + Psi^SC_y - K o Psi^SC_y
+    subtract_arrays(dummy3, dummy, psiy, N);
+    cellboundcomp(N1, N2, N3, psiy);
+
+    /* Psi^tot z-component */
+    //K o Psi^2LPT_z
+    theta2velcomp(N1, N2, N3, L1, L2, L3, scale, Omega_M, Omega_L, dummy2, dummy3, false, false, 3);
+    //Psi^SC_z
+    theta2velcomp(N1, N2, N3, L1, L2, L3, scale, Omega_M, Omega_L, dummy4, dummy, false, false, 3);
+    //K o Psi^2LPT_z + Psi^SC_z
+    add_to_array(dummy, dummy3, N);
+    //K o Psi^SC_z
+    convcomp(N1, N2, N3, dummy, dummy, kthsc, dir);
+    //K o Psi^2LPT_z + Psi^SC_z - K o Psi^SC_z
+    subtract_arrays(dummy3, dummy, psiz, N);
+    cellboundcomp(N1, N2, N3, psiz);
+  }
+
+  /* end Psi^tot */
+
+  fftw_array<real_prec> dummyL(NLL);
+  fill_one(dummyL, N);
+
+  disp_part(N1,N2,N3,L1,L2,L3,d1,d2,d3,posx,posy,posz,psix,psiy,psiz,dummyL,facL,reggrid,gBaseRand);
+
+  {
+  switch (masskernel)
+    {
+    case 0:
+      {
+        getDensity_NGP(N1,N2,N3,L1,L2,L3,d1,d2,d3,min1,min2,min3,posx,posy,posz,dummyL,NLL,dummy);
+      }
+      break;
+    case 1:
+      {
+        getDensity_CIC(N1,N2,N3,L1,L2,L3,d1,d2,d3,min1,min2,min3,posx,posy,posz,dummyL,NLL,dummy, true);
+      }
+      break;
+    case 2:
+      {
+        getDensity_TSC(N1,N2,N3,L1,L2,L3,d1,d2,d3,min1,min2,min3,posx,posy,posz,dummyL,NLL,dummy);
+      }
+      break;
+    case 3:
+      getDensity_SPH(N1, N2, N3, L1, L2, L3, d1, d2, d3, min1, min2, min3, posx, posy, posz, dummyL, NLL, dummy, true, kernel_scale_factor);
+      break;
+    }
+  }
+  {
+    fftw_array<real_prec> dummy2(N);
+    overdens(N1,N2,N3,dummy,dummy2);
+    copyArray(dummy2, dummy, N);
+  }
+}
+
+
+// --------------------------------------------
+
+
+void Lag2Eul(real_prec *in, real_prec *out, real_prec *posx, real_prec *posy, real_prec *posz, unsigned int N1,
+             unsigned int N2, unsigned int N3, real_prec L1, real_prec L2, real_prec L3, real_prec d1, real_prec d2,
+             real_prec d3, real_prec min1, real_prec min2, real_prec min3, real_prec D1, real_prec D2, real_prec scale,
+             real_prec Omega_M, real_prec Omega_L, int sfmodel, int masskernel, real_prec kth, unsigned int facL,
+             bool reggrid, gsl_rng *gBaseRand, string dir, real_prec kernel_scale_factor, plan_pkg *R2Cplan,
+             plan_pkg *C2Rplan)
+{
+  if (sfmodel == 1) {
+    Lag2Eul_zeldovich(in, out, posx, posy, posz, N1, N2, N3, L1, L2, L3, d1, d2, d3, min1, min2, min3, D1, scale, Omega_M,
+                      Omega_L, masskernel, facL,
+                      reggrid, gBaseRand, kernel_scale_factor, R2Cplan, C2Rplan);
+  } else {
+    Lag2Eul_non_zeldovich(in, out, posx, posy, posz, N1, N2, N3, L1, L2, L3, d1, d2, d3, min1,min2, min3, D1, D2, scale, Omega_M, Omega_L, masskernel, kth, facL, reggrid, gBaseRand, dir, kernel_scale_factor);
+  }
+}
+
+
+// --------------------------------------------
+
+
+void Lag2Eul_rsd_zeldovich(real_prec *in, real_prec *out, real_prec *posx, real_prec *posy, real_prec *posz,
+                           unsigned int N1, unsigned int N2,
+                           unsigned int N3, real_prec L1, real_prec L2, real_prec L3, real_prec d1, real_prec d2,
+                           real_prec d3,
+                           real_prec min1, real_prec min2, real_prec min3, real_prec D1, real_prec scale,
+                           real_prec Omega_M,
+                           real_prec Omega_L, int masskernel, unsigned int facL, bool reggrid, gsl_rng *gBaseRand,
+                           real_prec kernel_scale_factor, real_prec xobs, real_prec yobs, real_prec zobs, bool planepar,
+                           bool periodic, plan_pkg *R2Cplan, plan_pkg *C2Rplan)
+{
+  ULONG N=N1*N2*N3;
+
+  ULONG NL1=N1*facL, NL2=N2*facL, NL3=N3*facL;
+  ULONG NLL=NL1*NL2*NL3;
+
+  fftw_array<real_prec>  psix(N), psiy(N), psiz(N);
+  fftw_array<real_prec>  vex(N), vey(N), vez(N);
+
+  // compute components of Psi
+  // delta^(1) == input field (in)
+  // phi = -D1*delta^(1) -> out (used as dummy array)
+  // multiply_factor_array(-D1, in, out, N);
+  // R2Cplan->R as dummy helps speed up theta2vel!
+  multiply_factor_array(-D1, in, R2Cplan->R, N);
+
+  // Psi components
+  bool zeropad = false;
+  bool norm = false;  // true -> velocities, false -> displacements
+  theta2vel(N1, N2, N3, L1, L2, L3, scale, Omega_M, Omega_L, R2Cplan->R, psix, psiy, psiz, zeropad, norm, R2Cplan,
+            C2Rplan);
+
+  // EGP: cellbound interpolates to cell boundaries (corners), but we want
+  // the values in the cell centers!
+  //cellboundcomp(N1,N2,N3,L1,L2,L3,psix);
+  //cellboundcomp(N1,N2,N3,L1,L2,L3,psiy);
+  //cellboundcomp(N1,N2,N3,L1,L2,L3,psiz);
+
+  // EGP: calculate velocities (disp_part doesn't do that!)
+  // norm = true; // this makes sure it multiplies the psi's with the velocity factor
+  // theta2vel(N1,N2,N3,L1,L2,L3,scale,Omega_M,Omega_L,hconst, out, vex, vey, vez, redshift, zeropad, norm);
+  real_prec cpecvel = c_pecvel(scale, Omega_M, Omega_L, 1);
+  multiply_factor_array(cpecvel, psix, vex, N);
+  multiply_factor_array(cpecvel, psiy, vey, N);
+  multiply_factor_array(cpecvel, psiz, vez, N);
+
+  // compute the resulting particle positions and density.
+  {
+    fftw_array<real_prec> dummyL(NLL);
+    fill_one(dummyL, N);
+    // FIXME
+    // Eigenlijk moet deze alleen 1 zijn als er een deeltje in zit,
+    // wat in disp_part wordt bepaald, maar alleen als de boel niet
+    // periodic is, dus for now (nu alles periodic is) kunnen we dit
+    // wel ff zo laten.
+    // Het stelt namelijk de particle mass voor en die is nul als het
+    // deeltje er niet meer bij zit.
+
+    disp_part(N1,N2,N3,L1,L2,L3,d1,d2,d3,posx,posy,posz,psix,psiy,psiz,dummyL,facL,reggrid,gBaseRand);
+    //quick_dump_scalar(posz, N1, "posz_pre_rsd", 0, false);
+    //quick_dump_scalar(vez, N1, "vez_pre_rsd", 0, false);
+
+    // Here we do the RSD transformation
+    calc_pos_rsd(NLL, L3, xobs, yobs, zobs, posx, posy, posz, vex, vey, vez, posx, posy, posz, scale, Omega_M, Omega_L,
+                 planepar, periodic);
+
+    //quick_dump_scalar(posz, N1, "posz_post_rsd", 0, false);
+    //quick_dump_scalar(vez, N1, "vez_post_rsd", 0, false);
+
+    switch (masskernel)
+    {
+      case 0:
+        getDensity_NGP(N1,N2,N3,L1,L2,L3,d1,d2,d3,min1,min2,min3,posx,posy,posz,dummyL,NLL,out);
+        break;
+      case 1:
+        getDensity_CIC(N1,N2,N3,L1,L2,L3,d1,d2,d3,min1,min2,min3,posx,posy,posz,dummyL,NLL,out, true);
+        break;
+      case 2:
+        getDensity_TSC(N1,N2,N3,L1,L2,L3,d1,d2,d3,min1,min2,min3,posx,posy,posz,dummyL,NLL,out);
+        break;
+      case 3:
+        getDensity_SPH(N1, N2, N3, L1, L2, L3, d1, d2, d3, min1, min2, min3, posx, posy, posz, dummyL, NLL, out, true, kernel_scale_factor);
+        break;
+    }
+  }
+
+  overdens(N1,N2,N3,out,out);
+}
+
+
diff --git a/Lag2Eul.h b/Lag2Eul.h
new file mode 100644
index 0000000..ae02475
--- /dev/null
+++ b/Lag2Eul.h
@@ -0,0 +1,34 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#pragma once
+void Lag2Eul(real_prec *in, real_prec *dummy, real_prec *posx, real_prec *posy, real_prec *posz, unsigned int N1,
+             unsigned int N2, unsigned int N3, real_prec L1, real_prec L2, real_prec L3, real_prec d1, real_prec d2,
+             real_prec d3, real_prec min1, real_prec min2, real_prec min3, real_prec D1, real_prec D2, real_prec scale,
+             real_prec Omega_M, real_prec Omega_L, int sfmodel, int masskernel, real_prec kth, unsigned int facL,
+             bool reggrid, gsl_rng *gBaseRand, string dir, real_prec kernel_scale_factor, plan_pkg *R2Cplan,
+             plan_pkg *C2Rplan);
+
+void Lag2Eul_zeldovich(real_prec *in, real_prec *dummy, real_prec *posx, real_prec *posy, real_prec *posz,
+                       unsigned int N1, unsigned int N2,
+                       unsigned int N3, real_prec L1, real_prec L2, real_prec L3, real_prec d1, real_prec d2,
+                       real_prec d3,
+                       real_prec min1, real_prec min2, real_prec min3, real_prec D1, real_prec scale, real_prec Omega_M,
+                       real_prec Omega_L, int masskernel, unsigned int facL, bool reggrid, gsl_rng *gBaseRand,
+                       real_prec kernel_scale_factor, plan_pkg *R2Cplan, plan_pkg *C2Rplan);
+
+void Lag2Eul_rsd_zeldovich(real_prec *in, real_prec *out, real_prec *posx, real_prec *posy, real_prec *posz,
+                           unsigned int N1, unsigned int N2,
+                           unsigned int N3, real_prec L1, real_prec L2, real_prec L3, real_prec d1, real_prec d2,
+                           real_prec d3,
+                           real_prec min1, real_prec min2, real_prec min3, real_prec D1, real_prec scale,
+                           real_prec Omega_M,
+                           real_prec Omega_L, int masskernel, unsigned int facL, bool reggrid, gsl_rng *gBaseRand,
+                           real_prec kernel_scale_factor, real_prec xobs, real_prec yobs, real_prec zobs, bool planepar,
+                           bool periodic, plan_pkg *R2Cplan, plan_pkg *C2Rplan);
diff --git a/PLANCK_CAMB.dat b/PLANCK_CAMB.dat
new file mode 100644
index 0000000..8e18ecb
--- /dev/null
+++ b/PLANCK_CAMB.dat
@@ -0,0 +1,788 @@
+0.00     0.00
+1.00E-04 4.50E+02
+1.02E-04 4.58E+02
+1.04E-04 4.67E+02
+1.06E-04 4.76E+02
+1.08E-04 4.86E+02
+1.11E-04 4.95E+02
+1.13E-04 5.05E+02
+1.15E-04 5.14E+02
+1.17E-04 5.24E+02
+1.20E-04 5.35E+02
+1.22E-04 5.45E+02
+1.25E-04 5.56E+02
+1.27E-04 5.66E+02
+1.30E-04 5.77E+02
+1.32E-04 5.89E+02
+1.35E-04 6.00E+02
+1.38E-04 6.12E+02
+1.40E-04 6.23E+02
+1.43E-04 6.36E+02
+1.46E-04 6.48E+02
+1.49E-04 6.60E+02
+1.52E-04 6.73E+02
+1.55E-04 6.86E+02
+1.58E-04 7.00E+02
+1.62E-04 7.13E+02
+1.65E-04 7.27E+02
+1.68E-04 7.41E+02
+1.72E-04 7.55E+02
+1.75E-04 7.70E+02
+1.79E-04 7.85E+02
+1.82E-04 8.00E+02
+1.86E-04 8.16E+02
+1.90E-04 8.31E+02
+1.93E-04 8.48E+02
+1.97E-04 8.64E+02
+2.01E-04 8.81E+02
+2.05E-04 8.98E+02
+2.10E-04 9.15E+02
+2.14E-04 9.33E+02
+2.18E-04 9.51E+02
+2.23E-04 9.69E+02
+2.27E-04 9.88E+02
+2.32E-04 1.01E+03
+2.36E-04 1.03E+03
+2.41E-04 1.05E+03
+2.46E-04 1.07E+03
+2.51E-04 1.09E+03
+2.56E-04 1.11E+03
+2.61E-04 1.13E+03
+2.66E-04 1.15E+03
+2.72E-04 1.17E+03
+2.77E-04 1.20E+03
+2.83E-04 1.22E+03
+2.89E-04 1.24E+03
+2.94E-04 1.27E+03
+3.00E-04 1.29E+03
+3.06E-04 1.32E+03
+3.13E-04 1.34E+03
+3.19E-04 1.37E+03
+3.25E-04 1.39E+03
+3.32E-04 1.42E+03
+3.39E-04 1.45E+03
+3.46E-04 1.48E+03
+3.53E-04 1.51E+03
+3.60E-04 1.53E+03
+3.67E-04 1.56E+03
+3.74E-04 1.59E+03
+3.82E-04 1.62E+03
+3.90E-04 1.66E+03
+3.97E-04 1.69E+03
+4.06E-04 1.72E+03
+4.14E-04 1.75E+03
+4.22E-04 1.79E+03
+4.31E-04 1.82E+03
+4.39E-04 1.86E+03
+4.48E-04 1.89E+03
+4.57E-04 1.93E+03
+4.66E-04 1.97E+03
+4.76E-04 2.00E+03
+4.86E-04 2.04E+03
+4.95E-04 2.08E+03
+5.05E-04 2.12E+03
+5.16E-04 2.16E+03
+5.26E-04 2.20E+03
+5.37E-04 2.25E+03
+5.47E-04 2.29E+03
+5.58E-04 2.33E+03
+5.70E-04 2.38E+03
+5.81E-04 2.42E+03
+5.93E-04 2.47E+03
+6.05E-04 2.52E+03
+6.17E-04 2.56E+03
+6.30E-04 2.61E+03
+6.42E-04 2.66E+03
+6.55E-04 2.71E+03
+6.69E-04 2.77E+03
+6.82E-04 2.82E+03
+6.96E-04 2.87E+03
+7.10E-04 2.93E+03
+7.24E-04 2.98E+03
+7.39E-04 3.04E+03
+7.54E-04 3.10E+03
+7.69E-04 3.15E+03
+7.85E-04 3.21E+03
+8.00E-04 3.27E+03
+8.17E-04 3.34E+03
+8.33E-04 3.40E+03
+8.50E-04 3.46E+03
+8.67E-04 3.53E+03
+8.85E-04 3.60E+03
+9.03E-04 3.66E+03
+9.21E-04 3.73E+03
+9.39E-04 3.80E+03
+9.58E-04 3.87E+03
+9.78E-04 3.95E+03
+9.97E-04 4.02E+03
+1.02E-03 4.10E+03
+1.04E-03 4.17E+03
+1.06E-03 4.25E+03
+1.08E-03 4.33E+03
+1.10E-03 4.41E+03
+1.12E-03 4.49E+03
+1.15E-03 4.57E+03
+1.17E-03 4.66E+03
+1.19E-03 4.75E+03
+1.22E-03 4.83E+03
+1.24E-03 4.92E+03
+1.27E-03 5.01E+03
+1.29E-03 5.11E+03
+1.32E-03 5.20E+03
+1.35E-03 5.29E+03
+1.37E-03 5.39E+03
+1.40E-03 5.49E+03
+1.43E-03 5.59E+03
+1.46E-03 5.69E+03
+1.49E-03 5.80E+03
+1.52E-03 5.90E+03
+1.55E-03 6.01E+03
+1.58E-03 6.12E+03
+1.61E-03 6.23E+03
+1.64E-03 6.34E+03
+1.68E-03 6.45E+03
+1.71E-03 6.57E+03
+1.75E-03 6.69E+03
+1.78E-03 6.81E+03
+1.82E-03 6.93E+03
+1.85E-03 7.05E+03
+1.89E-03 7.17E+03
+1.93E-03 7.30E+03
+1.97E-03 7.43E+03
+2.01E-03 7.56E+03
+2.05E-03 7.69E+03
+2.09E-03 7.83E+03
+2.13E-03 7.96E+03
+2.18E-03 8.10E+03
+2.22E-03 8.24E+03
+2.26E-03 8.38E+03
+2.31E-03 8.53E+03
+2.36E-03 8.68E+03
+2.40E-03 8.82E+03
+2.45E-03 8.97E+03
+2.50E-03 9.13E+03
+2.55E-03 9.28E+03
+2.61E-03 9.44E+03
+2.66E-03 9.60E+03
+2.71E-03 9.76E+03
+2.77E-03 9.92E+03
+2.82E-03 1.01E+04
+2.88E-03 1.02E+04
+2.94E-03 1.04E+04
+3.00E-03 1.06E+04
+3.06E-03 1.08E+04
+3.12E-03 1.09E+04
+3.18E-03 1.11E+04
+3.25E-03 1.13E+04
+3.31E-03 1.15E+04
+3.38E-03 1.17E+04
+3.45E-03 1.18E+04
+3.52E-03 1.20E+04
+3.59E-03 1.22E+04
+3.66E-03 1.24E+04
+3.73E-03 1.26E+04
+3.81E-03 1.28E+04
+3.89E-03 1.30E+04
+3.96E-03 1.32E+04
+4.04E-03 1.34E+04
+4.13E-03 1.36E+04
+4.21E-03 1.38E+04
+4.29E-03 1.40E+04
+4.38E-03 1.42E+04
+4.47E-03 1.44E+04
+4.56E-03 1.46E+04
+4.65E-03 1.48E+04
+4.75E-03 1.50E+04
+4.84E-03 1.52E+04
+4.94E-03 1.54E+04
+5.04E-03 1.57E+04
+5.14E-03 1.59E+04
+5.25E-03 1.61E+04
+5.35E-03 1.63E+04
+5.46E-03 1.65E+04
+5.57E-03 1.67E+04
+5.68E-03 1.70E+04
+5.80E-03 1.72E+04
+5.91E-03 1.74E+04
+6.03E-03 1.76E+04
+6.16E-03 1.78E+04
+6.28E-03 1.81E+04
+6.41E-03 1.83E+04
+6.54E-03 1.85E+04
+6.67E-03 1.87E+04
+6.80E-03 1.89E+04
+6.94E-03 1.92E+04
+7.08E-03 1.94E+04
+7.22E-03 1.96E+04
+7.37E-03 1.98E+04
+7.52E-03 2.00E+04
+7.67E-03 2.02E+04
+7.83E-03 2.05E+04
+7.98E-03 2.07E+04
+8.15E-03 2.09E+04
+8.31E-03 2.11E+04
+8.48E-03 2.13E+04
+8.65E-03 2.15E+04
+8.82E-03 2.17E+04
+9.00E-03 2.19E+04
+9.18E-03 2.21E+04
+9.37E-03 2.23E+04
+9.56E-03 2.25E+04
+9.75E-03 2.27E+04
+9.95E-03 2.28E+04
+1.01E-02 2.30E+04
+1.04E-02 2.32E+04
+1.06E-02 2.34E+04
+1.08E-02 2.35E+04
+1.10E-02 2.37E+04
+1.12E-02 2.38E+04
+1.14E-02 2.40E+04
+1.17E-02 2.41E+04
+1.19E-02 2.43E+04
+1.22E-02 2.44E+04
+1.24E-02 2.45E+04
+1.26E-02 2.46E+04
+1.29E-02 2.47E+04
+1.32E-02 2.48E+04
+1.34E-02 2.49E+04
+1.37E-02 2.50E+04
+1.40E-02 2.51E+04
+1.43E-02 2.52E+04
+1.45E-02 2.52E+04
+1.48E-02 2.53E+04
+1.51E-02 2.53E+04
+1.54E-02 2.54E+04
+1.58E-02 2.54E+04
+1.61E-02 2.54E+04
+1.64E-02 2.54E+04
+1.67E-02 2.54E+04
+1.71E-02 2.54E+04
+1.74E-02 2.53E+04
+1.78E-02 2.53E+04
+1.81E-02 2.52E+04
+1.85E-02 2.52E+04
+1.89E-02 2.51E+04
+1.92E-02 2.50E+04
+1.96E-02 2.49E+04
+2.00E-02 2.48E+04
+2.04E-02 2.47E+04
+2.09E-02 2.45E+04
+2.13E-02 2.44E+04
+2.17E-02 2.42E+04
+2.21E-02 2.41E+04
+2.26E-02 2.39E+04
+2.30E-02 2.37E+04
+2.35E-02 2.35E+04
+2.40E-02 2.32E+04
+2.45E-02 2.30E+04
+2.50E-02 2.28E+04
+2.55E-02 2.25E+04
+2.60E-02 2.23E+04
+2.65E-02 2.20E+04
+2.70E-02 2.17E+04
+2.76E-02 2.14E+04
+2.81E-02 2.11E+04
+2.87E-02 2.08E+04
+2.93E-02 2.05E+04
+2.99E-02 2.02E+04
+3.05E-02 1.99E+04
+3.11E-02 1.95E+04
+3.17E-02 1.92E+04
+3.24E-02 1.88E+04
+3.30E-02 1.85E+04
+3.37E-02 1.82E+04
+3.44E-02 1.79E+04
+3.51E-02 1.76E+04
+3.58E-02 1.73E+04
+3.65E-02 1.70E+04
+3.72E-02 1.67E+04
+3.80E-02 1.64E+04
+3.88E-02 1.61E+04
+3.95E-02 1.58E+04
+4.03E-02 1.55E+04
+4.12E-02 1.52E+04
+4.20E-02 1.49E+04
+4.28E-02 1.46E+04
+4.37E-02 1.43E+04
+4.46E-02 1.40E+04
+4.55E-02 1.37E+04
+4.64E-02 1.34E+04
+4.73E-02 1.32E+04
+4.83E-02 1.30E+04
+4.93E-02 1.27E+04
+5.03E-02 1.25E+04
+5.13E-02 1.23E+04
+5.23E-02 1.22E+04
+5.34E-02 1.20E+04
+5.45E-02 1.18E+04
+5.56E-02 1.17E+04
+5.67E-02 1.15E+04
+5.78E-02 1.14E+04
+5.90E-02 1.13E+04
+6.02E-02 1.11E+04
+6.14E-02 1.09E+04
+6.26E-02 1.08E+04
+6.39E-02 1.06E+04
+6.52E-02 1.05E+04
+6.65E-02 1.03E+04
+6.79E-02 1.01E+04
+6.92E-02 9.91E+03
+7.06E-02 9.72E+03
+7.21E-02 9.52E+03
+7.35E-02 9.32E+03
+7.50E-02 9.11E+03
+7.65E-02 8.88E+03
+7.81E-02 8.64E+03
+7.96E-02 8.39E+03
+8.12E-02 8.13E+03
+8.29E-02 7.86E+03
+8.46E-02 7.59E+03
+8.63E-02 7.32E+03
+8.80E-02 7.05E+03
+8.98E-02 6.79E+03
+9.16E-02 6.53E+03
+9.34E-02 6.29E+03
+9.53E-02 6.06E+03
+9.73E-02 5.84E+03
+9.92E-02 5.64E+03
+1.01E-01 5.45E+03
+1.03E-01 5.29E+03
+1.05E-01 5.14E+03
+1.07E-01 5.00E+03
+1.10E-01 4.88E+03
+1.12E-01 4.78E+03
+1.14E-01 4.68E+03
+1.16E-01 4.59E+03
+1.19E-01 4.51E+03
+1.21E-01 4.43E+03
+1.24E-01 4.35E+03
+1.26E-01 4.27E+03
+1.29E-01 4.17E+03
+1.31E-01 4.07E+03
+1.34E-01 3.96E+03
+1.37E-01 3.83E+03
+1.39E-01 3.70E+03
+1.42E-01 3.56E+03
+1.45E-01 3.41E+03
+1.48E-01 3.26E+03
+1.51E-01 3.11E+03
+1.54E-01 2.97E+03
+1.57E-01 2.84E+03
+1.60E-01 2.72E+03
+1.64E-01 2.62E+03
+1.67E-01 2.53E+03
+1.70E-01 2.45E+03
+1.74E-01 2.39E+03
+1.77E-01 2.33E+03
+1.81E-01 2.28E+03
+1.84E-01 2.23E+03
+1.88E-01 2.17E+03
+1.92E-01 2.12E+03
+1.96E-01 2.05E+03
+2.00E-01 1.98E+03
+2.04E-01 1.90E+03
+2.08E-01 1.82E+03
+2.12E-01 1.74E+03
+2.16E-01 1.66E+03
+2.21E-01 1.58E+03
+2.25E-01 1.52E+03
+2.30E-01 1.46E+03
+2.34E-01 1.41E+03
+2.39E-01 1.37E+03
+2.44E-01 1.33E+03
+2.49E-01 1.29E+03
+2.54E-01 1.25E+03
+2.59E-01 1.21E+03
+2.64E-01 1.17E+03
+2.70E-01 1.12E+03
+2.75E-01 1.07E+03
+2.81E-01 1.02E+03
+2.86E-01 9.76E+02
+2.92E-01 9.36E+02
+2.98E-01 9.01E+02
+3.04E-01 8.69E+02
+3.10E-01 8.40E+02
+3.17E-01 8.12E+02
+3.23E-01 7.83E+02
+3.29E-01 7.53E+02
+3.36E-01 7.22E+02
+3.43E-01 6.91E+02
+3.50E-01 6.61E+02
+3.57E-01 6.34E+02
+3.64E-01 6.09E+02
+3.71E-01 5.86E+02
+3.79E-01 5.64E+02
+3.87E-01 5.42E+02
+3.94E-01 5.21E+02
+4.02E-01 4.99E+02
+4.11E-01 4.78E+02
+4.19E-01 4.58E+02
+4.27E-01 4.39E+02
+4.36E-01 4.22E+02
+4.45E-01 4.05E+02
+4.54E-01 3.89E+02
+4.63E-01 3.73E+02
+4.72E-01 3.58E+02
+4.82E-01 3.43E+02
+4.91E-01 3.29E+02
+5.01E-01 3.15E+02
+5.12E-01 3.02E+02
+5.22E-01 2.89E+02
+5.32E-01 2.77E+02
+5.43E-01 2.65E+02
+5.54E-01 2.54E+02
+5.65E-01 2.43E+02
+5.77E-01 2.33E+02
+5.88E-01 2.23E+02
+6.00E-01 2.13E+02
+6.12E-01 2.04E+02
+6.25E-01 1.95E+02
+6.37E-01 1.87E+02
+6.50E-01 1.79E+02
+6.63E-01 1.71E+02
+6.77E-01 1.64E+02
+6.91E-01 1.57E+02
+7.04E-01 1.50E+02
+7.19E-01 1.43E+02
+7.33E-01 1.37E+02
+7.48E-01 1.31E+02
+7.63E-01 1.25E+02
+7.79E-01 1.20E+02
+7.94E-01 1.14E+02
+8.10E-01 1.09E+02
+8.27E-01 1.05E+02
+8.43E-01 1.00E+02
+8.60E-01 9.56E+01
+8.78E-01 9.13E+01
+8.96E-01 8.73E+01
+9.14E-01 8.34E+01
+9.32E-01 7.96E+01
+9.51E-01 7.61E+01
+9.70E-01 7.27E+01
+9.90E-01 6.94E+01
+1.01E+00 6.63E+01
+1.03E+00 6.33E+01
+1.05E+00 6.04E+01
+1.07E+00 5.77E+01
+1.09E+00 5.50E+01
+1.12E+00 5.25E+01
+1.14E+00 5.01E+01
+1.16E+00 4.79E+01
+1.18E+00 4.57E+01
+1.21E+00 4.36E+01
+1.23E+00 4.16E+01
+1.26E+00 3.97E+01
+1.28E+00 3.78E+01
+1.31E+00 3.61E+01
+1.34E+00 3.44E+01
+1.36E+00 3.28E+01
+1.39E+00 3.13E+01
+1.42E+00 2.99E+01
+1.45E+00 2.85E+01
+1.48E+00 2.72E+01
+1.51E+00 2.59E+01
+1.54E+00 2.47E+01
+1.57E+00 2.35E+01
+1.60E+00 2.24E+01
+1.63E+00 2.14E+01
+1.66E+00 2.04E+01
+1.70E+00 1.94E+01
+1.73E+00 1.85E+01
+1.77E+00 1.76E+01
+1.80E+00 1.68E+01
+1.84E+00 1.60E+01
+1.88E+00 1.53E+01
+1.91E+00 1.45E+01
+1.95E+00 1.38E+01
+1.99E+00 1.32E+01
+2.03E+00 1.26E+01
+2.07E+00 1.20E+01
+2.12E+00 1.14E+01
+2.16E+00 1.08E+01
+2.20E+00 1.03E+01
+2.25E+00 9.83E+00
+2.29E+00 9.36E+00
+2.34E+00 8.91E+00
+2.39E+00 8.49E+00
+2.43E+00 8.08E+00
+2.48E+00 7.69E+00
+2.53E+00 7.32E+00
+2.58E+00 6.97E+00
+2.64E+00 6.63E+00
+2.69E+00 6.31E+00
+2.74E+00 6.00E+00
+2.80E+00 5.71E+00
+2.86E+00 5.44E+00
+2.91E+00 5.17E+00
+2.97E+00 4.92E+00
+3.03E+00 4.68E+00
+3.09E+00 4.45E+00
+3.16E+00 4.24E+00
+3.22E+00 4.03E+00
+3.29E+00 3.83E+00
+3.35E+00 3.65E+00
+3.42E+00 3.47E+00
+3.49E+00 3.30E+00
+3.56E+00 3.14E+00
+3.63E+00 2.98E+00
+3.70E+00 2.84E+00
+3.78E+00 2.70E+00
+3.86E+00 2.56E+00
+3.93E+00 2.44E+00
+4.01E+00 2.32E+00
+4.09E+00 2.20E+00
+4.18E+00 2.09E+00
+4.26E+00 1.99E+00
+4.35E+00 1.89E+00
+4.44E+00 1.80E+00
+4.53E+00 1.71E+00
+4.62E+00 1.62E+00
+4.71E+00 1.54E+00
+4.80E+00 1.47E+00
+4.90E+00 1.39E+00
+5.00E+00 1.32E+00
+5.10E+00 1.26E+00
+5.21E+00 1.20E+00
+5.31E+00 1.14E+00
+5.42E+00 1.08E+00
+5.53E+00 1.03E+00
+5.64E+00 9.74E-01
+5.75E+00 9.25E-01
+5.87E+00 8.79E-01
+5.99E+00 8.35E-01
+6.11E+00 7.93E-01
+6.23E+00 7.53E-01
+6.36E+00 7.15E-01
+6.49E+00 6.79E-01
+6.62E+00 6.45E-01
+6.75E+00 6.13E-01
+6.89E+00 5.82E-01
+7.03E+00 5.53E-01
+7.17E+00 5.25E-01
+7.31E+00 4.98E-01
+7.46E+00 4.73E-01
+7.61E+00 4.49E-01
+7.77E+00 4.26E-01
+7.92E+00 4.05E-01
+8.08E+00 3.84E-01
+8.25E+00 3.65E-01
+8.41E+00 3.46E-01
+8.58E+00 3.29E-01
+8.76E+00 3.12E-01
+8.93E+00 2.96E-01
+9.11E+00 2.81E-01
+9.30E+00 2.67E-01
+9.48E+00 2.53E-01
+9.68E+00 2.40E-01
+9.87E+00 2.28E-01
+1.01E+01 2.16E-01
+1.03E+01 2.05E-01
+1.05E+01 1.95E-01
+1.07E+01 1.85E-01
+1.09E+01 1.75E-01
+1.11E+01 1.66E-01
+1.14E+01 1.58E-01
+1.16E+01 1.50E-01
+1.18E+01 1.42E-01
+1.21E+01 1.35E-01
+1.23E+01 1.28E-01
+1.25E+01 1.21E-01
+1.28E+01 1.15E-01
+1.31E+01 1.09E-01
+1.33E+01 1.04E-01
+1.36E+01 9.82E-02
+1.39E+01 9.32E-02
+1.41E+01 8.84E-02
+1.44E+01 8.38E-02
+1.47E+01 7.95E-02
+1.50E+01 7.54E-02
+1.53E+01 7.15E-02
+1.56E+01 6.78E-02
+1.60E+01 6.43E-02
+1.63E+01 6.09E-02
+1.66E+01 5.78E-02
+1.69E+01 5.48E-02
+1.73E+01 5.20E-02
+1.76E+01 4.93E-02
+1.80E+01 4.67E-02
+1.84E+01 4.43E-02
+1.87E+01 4.20E-02
+1.91E+01 3.98E-02
+1.95E+01 3.77E-02
+1.99E+01 3.58E-02
+2.03E+01 3.39E-02
+2.07E+01 3.21E-02
+2.11E+01 3.05E-02
+2.15E+01 2.89E-02
+2.20E+01 2.74E-02
+2.24E+01 2.59E-02
+2.29E+01 2.46E-02
+2.33E+01 2.33E-02
+2.38E+01 2.21E-02
+2.43E+01 2.09E-02
+2.48E+01 1.98E-02
+2.53E+01 1.88E-02
+2.58E+01 1.78E-02
+2.63E+01 1.69E-02
+2.68E+01 1.60E-02
+2.74E+01 1.52E-02
+2.79E+01 1.44E-02
+2.85E+01 1.36E-02
+2.91E+01 1.29E-02
+2.97E+01 1.22E-02
+3.03E+01 1.16E-02
+3.09E+01 1.10E-02
+3.15E+01 1.04E-02
+3.21E+01 9.85E-03
+3.28E+01 9.33E-03
+3.34E+01 8.84E-03
+3.41E+01 8.37E-03
+3.48E+01 7.93E-03
+3.55E+01 7.52E-03
+3.62E+01 7.12E-03
+3.70E+01 6.74E-03
+3.77E+01 6.39E-03
+3.85E+01 6.05E-03
+3.92E+01 5.73E-03
+4.00E+01 5.43E-03
+4.08E+01 5.14E-03
+4.17E+01 4.87E-03
+4.25E+01 4.61E-03
+4.34E+01 4.37E-03
+4.42E+01 4.14E-03
+4.51E+01 3.92E-03
+4.60E+01 3.71E-03
+4.70E+01 3.51E-03
+4.79E+01 3.33E-03
+4.89E+01 3.15E-03
+4.99E+01 2.98E-03
+5.09E+01 2.82E-03
+5.19E+01 2.67E-03
+5.30E+01 2.53E-03
+5.40E+01 2.40E-03
+5.51E+01 2.27E-03
+5.62E+01 2.15E-03
+5.74E+01 2.03E-03
+5.85E+01 1.92E-03
+5.97E+01 1.82E-03
+6.09E+01 1.72E-03
+6.22E+01 1.63E-03
+6.34E+01 1.55E-03
+6.47E+01 1.46E-03
+6.60E+01 1.38E-03
+6.73E+01 1.31E-03
+6.87E+01 1.24E-03
+7.01E+01 1.17E-03
+7.15E+01 1.11E-03
+7.29E+01 1.05E-03
+7.44E+01 9.95E-04
+7.59E+01 9.42E-04
+7.75E+01 8.91E-04
+7.90E+01 8.43E-04
+8.06E+01 7.98E-04
+8.22E+01 7.55E-04
+8.39E+01 7.15E-04
+8.56E+01 6.76E-04
+8.73E+01 6.40E-04
+8.91E+01 6.05E-04
+9.09E+01 5.73E-04
+9.27E+01 5.42E-04
+9.46E+01 5.12E-04
+9.65E+01 4.85E-04
+9.85E+01 4.58E-04
+1.00E+02 4.34E-04
+1.02E+02 4.10E-04
+1.05E+02 3.88E-04
+1.07E+02 3.67E-04
+1.09E+02 3.47E-04
+1.11E+02 3.28E-04
+1.13E+02 3.10E-04
+1.16E+02 2.93E-04
+1.18E+02 2.77E-04
+1.20E+02 2.62E-04
+1.23E+02 2.48E-04
+1.25E+02 2.35E-04
+1.28E+02 2.22E-04
+1.30E+02 2.10E-04
+1.33E+02 1.98E-04
+1.36E+02 1.87E-04
+1.38E+02 1.77E-04
+1.41E+02 1.67E-04
+1.44E+02 1.58E-04
+1.47E+02 1.49E-04
+1.50E+02 1.41E-04
+1.53E+02 1.33E-04
+1.56E+02 1.26E-04
+1.59E+02 1.19E-04
+1.62E+02 1.13E-04
+1.66E+02 1.06E-04
+1.69E+02 1.00E-04
+1.72E+02 9.48E-05
+1.76E+02 8.96E-05
+1.79E+02 8.46E-05
+1.83E+02 7.99E-05
+1.87E+02 7.54E-05
+1.90E+02 7.12E-05
+1.94E+02 6.72E-05
+1.98E+02 6.35E-05
+2.02E+02 5.99E-05
+2.06E+02 5.65E-05
+2.11E+02 5.34E-05
+2.15E+02 5.03E-05
+2.19E+02 4.75E-05
+2.24E+02 4.48E-05
+2.28E+02 4.23E-05
+2.33E+02 3.99E-05
+2.37E+02 3.76E-05
+2.42E+02 3.55E-05
+2.47E+02 3.34E-05
+2.52E+02 3.15E-05
+2.57E+02 2.97E-05
+2.62E+02 2.80E-05
+2.68E+02 2.64E-05
+2.73E+02 2.49E-05
+2.79E+02 2.35E-05
+2.84E+02 2.21E-05
+2.90E+02 2.08E-05
+2.96E+02 1.96E-05
+3.02E+02 1.85E-05
+3.08E+02 1.74E-05
+3.14E+02 1.64E-05
+3.20E+02 1.54E-05
+3.27E+02 1.45E-05
+3.34E+02 1.36E-05
+3.40E+02 1.28E-05
+3.47E+02 1.21E-05
+3.54E+02 1.14E-05
+3.61E+02 1.07E-05
+3.69E+02 1.00E-05
+3.76E+02 9.44E-06
+3.84E+02 8.88E-06
+3.91E+02 8.34E-06
+3.99E+02 7.84E-06
+4.07E+02 7.37E-06
+4.16E+02 6.92E-06
+4.24E+02 6.50E-06
+4.33E+02 6.11E-06
+4.41E+02 5.73E-06
+4.50E+02 5.38E-06
+4.59E+02 5.05E-06
+4.69E+02 4.74E-06
+4.78E+02 4.45E-06
+4.88E+02 4.18E-06
+4.98E+02 3.92E-06
+5.08E+02 3.68E-06
+5.18E+02 3.45E-06
+5.28E+02 3.23E-06
+5.39E+02 3.03E-06
+5.50E+02 2.84E-06
+5.61E+02 2.67E-06
+5.72E+02 2.50E-06
+5.84E+02 2.34E-06
+5.96E+02 2.20E-06
+6.08E+02 2.06E-06
+6.20E+02 1.93E-06
+6.32E+02 1.81E-06
+6.45E+02 1.69E-06
+6.58E+02 1.59E-06
+6.72E+02 1.49E-06
diff --git a/README.md b/README.md
index edc80ce..cd76398 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,52 @@
 # barcode
-Bayesian reconstruction of cosmic density fields
+Bayesian Reconstruction of COsmic DEnsity fields
+
+This repository contains both Barcode and a set of supplementary analysis tools.
+
+
+# Build
+
+Clone the repository, `cd` into the cloned directory (`$BARCODE` below) and run the following to configure and build:
+
+```sh
+mkdir cmake-build
+cd cmake-build
+cmake ..
+make
+```
+
+This will create binaries for barcode and the supplementary tools in the `bin` directory.
+
+# Run
+
+Barcode must be run in the same directory as the `input.par` file.
+Edit this file to change input parameters.
+Then simply run with:
+
+```
+$BARCODE/bin/barcode [restart_iteration]
+```
+
+Optionally add the `restart_iteration` number when doing a restart run from existing output files.
+
+
+# MacOS
+The MacOS version has no OpenMP support. Configure the project with `cmake` options `-DMULTITHREAD=OFF -DMULTITHREAD_FFTW=OFF` to build on macOS.
+
+
+# Development and contributing
+This is an early release. Unit tests and other test codes will be added later (as mentioned in some of the code comments). Documentation as well.
+
+Contributions are very welcome! Please don't hesitate to propose ideas for improvements in the GitHub issues or in a PR.
+
+
+# License
+The original contributions made as part of this code are distributed under the MIT license (see `LICENSE` file).
+
+Barcode makes use of an early version of the [Planck LevelS toolkit](https://sourceforge.net/projects/planck-ls/), which is distributed under the GNU Public License v2 (see `planck/LICENSE` file).
+
+This code depends on FFTW 3, the GNU Scientific Library, both distributed under the GPL as well.
+
+These three dependencies mean that any redistribution of Barcode in binary form is subject to GPLv2 terms.
+
+Barcode also depends on ncurses, which is distributed under the X11 license, a permissive license similar to the MIT license.
diff --git a/WMAP7_CAMB.dat b/WMAP7_CAMB.dat
new file mode 100644
index 0000000..f422c0a
--- /dev/null
+++ b/WMAP7_CAMB.dat
@@ -0,0 +1,781 @@
+0.0    0.0
+1.11E-04 6.15E+02
+1.13E-04 6.27E+02
+1.15E-04 6.39E+02
+1.17E-04 6.52E+02
+1.20E-04 6.64E+02
+1.22E-04 6.77E+02
+1.25E-04 6.90E+02
+1.27E-04 7.04E+02
+1.30E-04 7.17E+02
+1.32E-04 7.31E+02
+1.35E-04 7.45E+02
+1.38E-04 7.60E+02
+1.40E-04 7.74E+02
+1.43E-04 7.89E+02
+1.46E-04 8.05E+02
+1.49E-04 8.20E+02
+1.52E-04 8.36E+02
+1.55E-04 8.52E+02
+1.58E-04 8.69E+02
+1.62E-04 8.86E+02
+1.65E-04 9.03E+02
+1.68E-04 9.20E+02
+1.72E-04 9.38E+02
+1.75E-04 9.56E+02
+1.79E-04 9.75E+02
+1.82E-04 9.94E+02
+1.86E-04 1.01E+03
+1.90E-04 1.03E+03
+1.93E-04 1.05E+03
+1.97E-04 1.07E+03
+2.01E-04 1.09E+03
+2.05E-04 1.11E+03
+2.10E-04 1.14E+03
+2.14E-04 1.16E+03
+2.18E-04 1.18E+03
+2.23E-04 1.20E+03
+2.27E-04 1.23E+03
+2.32E-04 1.25E+03
+2.36E-04 1.27E+03
+2.41E-04 1.30E+03
+2.46E-04 1.32E+03
+2.51E-04 1.35E+03
+2.56E-04 1.38E+03
+2.61E-04 1.40E+03
+2.66E-04 1.43E+03
+2.72E-04 1.46E+03
+2.77E-04 1.49E+03
+2.83E-04 1.51E+03
+2.89E-04 1.54E+03
+2.94E-04 1.57E+03
+3.00E-04 1.60E+03
+3.06E-04 1.63E+03
+3.13E-04 1.67E+03
+3.19E-04 1.70E+03
+3.25E-04 1.73E+03
+3.32E-04 1.76E+03
+3.39E-04 1.80E+03
+3.46E-04 1.83E+03
+3.53E-04 1.87E+03
+3.60E-04 1.90E+03
+3.67E-04 1.94E+03
+3.74E-04 1.98E+03
+3.82E-04 2.02E+03
+3.90E-04 2.05E+03
+3.97E-04 2.09E+03
+4.06E-04 2.13E+03
+4.14E-04 2.17E+03
+4.22E-04 2.22E+03
+4.31E-04 2.26E+03
+4.39E-04 2.30E+03
+4.48E-04 2.35E+03
+4.57E-04 2.39E+03
+4.66E-04 2.44E+03
+4.76E-04 2.48E+03
+4.86E-04 2.53E+03
+4.95E-04 2.58E+03
+5.05E-04 2.63E+03
+5.16E-04 2.68E+03
+5.26E-04 2.73E+03
+5.37E-04 2.78E+03
+5.47E-04 2.84E+03
+5.58E-04 2.89E+03
+5.70E-04 2.95E+03
+5.81E-04 3.00E+03
+5.93E-04 3.06E+03
+6.05E-04 3.12E+03
+6.17E-04 3.18E+03
+6.30E-04 3.24E+03
+6.42E-04 3.30E+03
+6.55E-04 3.36E+03
+6.69E-04 3.42E+03
+6.82E-04 3.49E+03
+6.96E-04 3.56E+03
+7.10E-04 3.62E+03
+7.24E-04 3.69E+03
+7.39E-04 3.76E+03
+7.54E-04 3.83E+03
+7.69E-04 3.90E+03
+7.85E-04 3.98E+03
+8.00E-04 4.05E+03
+8.17E-04 4.13E+03
+8.33E-04 4.21E+03
+8.50E-04 4.29E+03
+8.67E-04 4.37E+03
+8.85E-04 4.45E+03
+9.03E-04 4.53E+03
+9.21E-04 4.62E+03
+9.39E-04 4.70E+03
+9.58E-04 4.79E+03
+9.78E-04 4.88E+03
+9.97E-04 4.97E+03
+1.02E-03 5.06E+03
+1.04E-03 5.16E+03
+1.06E-03 5.25E+03
+1.08E-03 5.35E+03
+1.10E-03 5.45E+03
+1.12E-03 5.55E+03
+1.15E-03 5.65E+03
+1.17E-03 5.75E+03
+1.19E-03 5.86E+03
+1.22E-03 5.97E+03
+1.24E-03 6.08E+03
+1.27E-03 6.19E+03
+1.29E-03 6.30E+03
+1.32E-03 6.42E+03
+1.35E-03 6.53E+03
+1.37E-03 6.65E+03
+1.40E-03 6.77E+03
+1.43E-03 6.89E+03
+1.46E-03 7.02E+03
+1.49E-03 7.15E+03
+1.52E-03 7.27E+03
+1.55E-03 7.40E+03
+1.58E-03 7.54E+03
+1.61E-03 7.67E+03
+1.64E-03 7.81E+03
+1.68E-03 7.95E+03
+1.71E-03 8.09E+03
+1.75E-03 8.23E+03
+1.78E-03 8.38E+03
+1.82E-03 8.52E+03
+1.85E-03 8.67E+03
+1.89E-03 8.83E+03
+1.93E-03 8.98E+03
+1.97E-03 9.14E+03
+2.01E-03 9.29E+03
+2.05E-03 9.45E+03
+2.09E-03 9.62E+03
+2.13E-03 9.78E+03
+2.18E-03 9.95E+03
+2.22E-03 1.01E+04
+2.26E-03 1.03E+04
+2.31E-03 1.05E+04
+2.36E-03 1.06E+04
+2.40E-03 1.08E+04
+2.45E-03 1.10E+04
+2.50E-03 1.12E+04
+2.55E-03 1.14E+04
+2.61E-03 1.16E+04
+2.66E-03 1.18E+04
+2.71E-03 1.19E+04
+2.77E-03 1.21E+04
+2.82E-03 1.23E+04
+2.88E-03 1.25E+04
+2.94E-03 1.27E+04
+3.00E-03 1.29E+04
+3.06E-03 1.32E+04
+3.12E-03 1.34E+04
+3.18E-03 1.36E+04
+3.25E-03 1.38E+04
+3.31E-03 1.40E+04
+3.38E-03 1.42E+04
+3.45E-03 1.44E+04
+3.52E-03 1.47E+04
+3.59E-03 1.49E+04
+3.66E-03 1.51E+04
+3.73E-03 1.53E+04
+3.81E-03 1.56E+04
+3.89E-03 1.58E+04
+3.96E-03 1.60E+04
+4.04E-03 1.63E+04
+4.13E-03 1.65E+04
+4.21E-03 1.67E+04
+4.29E-03 1.70E+04
+4.38E-03 1.72E+04
+4.47E-03 1.75E+04
+4.56E-03 1.77E+04
+4.65E-03 1.79E+04
+4.75E-03 1.82E+04
+4.84E-03 1.84E+04
+4.94E-03 1.87E+04
+5.04E-03 1.89E+04
+5.14E-03 1.92E+04
+5.25E-03 1.94E+04
+5.35E-03 1.97E+04
+5.46E-03 1.99E+04
+5.57E-03 2.02E+04
+5.68E-03 2.04E+04
+5.80E-03 2.07E+04
+5.91E-03 2.09E+04
+6.03E-03 2.12E+04
+6.16E-03 2.14E+04
+6.28E-03 2.17E+04
+6.41E-03 2.19E+04
+6.54E-03 2.22E+04
+6.67E-03 2.24E+04
+6.80E-03 2.27E+04
+6.94E-03 2.29E+04
+7.08E-03 2.32E+04
+7.22E-03 2.34E+04
+7.37E-03 2.37E+04
+7.52E-03 2.39E+04
+7.67E-03 2.41E+04
+7.83E-03 2.44E+04
+7.98E-03 2.46E+04
+8.15E-03 2.48E+04
+8.31E-03 2.51E+04
+8.48E-03 2.53E+04
+8.65E-03 2.55E+04
+8.82E-03 2.57E+04
+9.00E-03 2.59E+04
+9.18E-03 2.61E+04
+9.37E-03 2.64E+04
+9.56E-03 2.65E+04
+9.75E-03 2.67E+04
+9.95E-03 2.69E+04
+1.01E-02 2.71E+04
+1.04E-02 2.73E+04
+1.06E-02 2.75E+04
+1.08E-02 2.76E+04
+1.10E-02 2.78E+04
+1.12E-02 2.79E+04
+1.14E-02 2.81E+04
+1.17E-02 2.82E+04
+1.19E-02 2.83E+04
+1.22E-02 2.84E+04
+1.24E-02 2.86E+04
+1.26E-02 2.87E+04
+1.29E-02 2.87E+04
+1.32E-02 2.88E+04
+1.34E-02 2.89E+04
+1.37E-02 2.90E+04
+1.40E-02 2.90E+04
+1.43E-02 2.90E+04
+1.45E-02 2.91E+04
+1.48E-02 2.91E+04
+1.51E-02 2.91E+04
+1.54E-02 2.91E+04
+1.58E-02 2.91E+04
+1.61E-02 2.90E+04
+1.64E-02 2.90E+04
+1.67E-02 2.89E+04
+1.71E-02 2.89E+04
+1.74E-02 2.88E+04
+1.78E-02 2.87E+04
+1.81E-02 2.86E+04
+1.85E-02 2.84E+04
+1.89E-02 2.83E+04
+1.92E-02 2.81E+04
+1.96E-02 2.80E+04
+2.00E-02 2.78E+04
+2.04E-02 2.76E+04
+2.09E-02 2.74E+04
+2.13E-02 2.72E+04
+2.17E-02 2.69E+04
+2.21E-02 2.67E+04
+2.26E-02 2.64E+04
+2.30E-02 2.61E+04
+2.35E-02 2.58E+04
+2.40E-02 2.55E+04
+2.45E-02 2.52E+04
+2.50E-02 2.49E+04
+2.55E-02 2.46E+04
+2.60E-02 2.42E+04
+2.65E-02 2.39E+04
+2.70E-02 2.35E+04
+2.76E-02 2.31E+04
+2.81E-02 2.27E+04
+2.87E-02 2.24E+04
+2.93E-02 2.20E+04
+2.99E-02 2.15E+04
+3.05E-02 2.11E+04
+3.11E-02 2.07E+04
+3.17E-02 2.03E+04
+3.24E-02 1.99E+04
+3.30E-02 1.96E+04
+3.37E-02 1.92E+04
+3.44E-02 1.89E+04
+3.51E-02 1.85E+04
+3.58E-02 1.82E+04
+3.65E-02 1.78E+04
+3.72E-02 1.75E+04
+3.80E-02 1.71E+04
+3.88E-02 1.68E+04
+3.95E-02 1.64E+04
+4.03E-02 1.61E+04
+4.12E-02 1.58E+04
+4.20E-02 1.54E+04
+4.28E-02 1.51E+04
+4.37E-02 1.48E+04
+4.46E-02 1.45E+04
+4.55E-02 1.42E+04
+4.64E-02 1.39E+04
+4.73E-02 1.37E+04
+4.83E-02 1.34E+04
+4.93E-02 1.32E+04
+5.03E-02 1.30E+04
+5.13E-02 1.29E+04
+5.23E-02 1.27E+04
+5.34E-02 1.25E+04
+5.45E-02 1.24E+04
+5.56E-02 1.22E+04
+5.67E-02 1.21E+04
+5.78E-02 1.19E+04
+5.90E-02 1.18E+04
+6.02E-02 1.16E+04
+6.14E-02 1.14E+04
+6.26E-02 1.12E+04
+6.39E-02 1.10E+04
+6.52E-02 1.08E+04
+6.65E-02 1.06E+04
+6.79E-02 1.04E+04
+6.92E-02 1.02E+04
+7.06E-02 9.96E+03
+7.21E-02 9.72E+03
+7.35E-02 9.47E+03
+7.50E-02 9.21E+03
+7.65E-02 8.93E+03
+7.81E-02 8.64E+03
+7.96E-02 8.34E+03
+8.12E-02 8.03E+03
+8.29E-02 7.73E+03
+8.46E-02 7.43E+03
+8.63E-02 7.14E+03
+8.80E-02 6.86E+03
+8.98E-02 6.59E+03
+9.16E-02 6.34E+03
+9.34E-02 6.10E+03
+9.53E-02 5.88E+03
+9.73E-02 5.69E+03
+9.92E-02 5.51E+03
+1.01E-01 5.35E+03
+1.03E-01 5.21E+03
+1.05E-01 5.08E+03
+1.07E-01 4.98E+03
+1.10E-01 4.88E+03
+1.12E-01 4.79E+03
+1.14E-01 4.71E+03
+1.16E-01 4.63E+03
+1.19E-01 4.55E+03
+1.21E-01 4.47E+03
+1.24E-01 4.37E+03
+1.26E-01 4.26E+03
+1.29E-01 4.14E+03
+1.31E-01 4.00E+03
+1.34E-01 3.86E+03
+1.37E-01 3.70E+03
+1.39E-01 3.54E+03
+1.42E-01 3.38E+03
+1.45E-01 3.22E+03
+1.48E-01 3.07E+03
+1.51E-01 2.93E+03
+1.54E-01 2.80E+03
+1.57E-01 2.69E+03
+1.60E-01 2.60E+03
+1.64E-01 2.52E+03
+1.67E-01 2.45E+03
+1.70E-01 2.40E+03
+1.74E-01 2.35E+03
+1.77E-01 2.30E+03
+1.81E-01 2.24E+03
+1.84E-01 2.19E+03
+1.88E-01 2.12E+03
+1.92E-01 2.04E+03
+1.96E-01 1.96E+03
+2.00E-01 1.87E+03
+2.04E-01 1.78E+03
+2.08E-01 1.70E+03
+2.12E-01 1.62E+03
+2.16E-01 1.55E+03
+2.21E-01 1.49E+03
+2.25E-01 1.44E+03
+2.30E-01 1.40E+03
+2.34E-01 1.36E+03
+2.39E-01 1.32E+03
+2.44E-01 1.28E+03
+2.49E-01 1.24E+03
+2.54E-01 1.19E+03
+2.59E-01 1.14E+03
+2.64E-01 1.09E+03
+2.70E-01 1.04E+03
+2.75E-01 9.93E+02
+2.81E-01 9.51E+02
+2.86E-01 9.15E+02
+2.92E-01 8.84E+02
+2.98E-01 8.55E+02
+3.04E-01 8.26E+02
+3.10E-01 7.96E+02
+3.17E-01 7.65E+02
+3.23E-01 7.33E+02
+3.29E-01 7.00E+02
+3.36E-01 6.69E+02
+3.43E-01 6.42E+02
+3.50E-01 6.16E+02
+3.57E-01 5.93E+02
+3.64E-01 5.71E+02
+3.71E-01 5.49E+02
+3.79E-01 5.27E+02
+3.87E-01 5.05E+02
+3.94E-01 4.83E+02
+4.02E-01 4.63E+02
+4.11E-01 4.43E+02
+4.19E-01 4.26E+02
+4.27E-01 4.09E+02
+4.36E-01 3.92E+02
+4.45E-01 3.77E+02
+4.54E-01 3.61E+02
+4.63E-01 3.46E+02
+4.72E-01 3.32E+02
+4.82E-01 3.18E+02
+4.91E-01 3.05E+02
+5.01E-01 2.92E+02
+5.12E-01 2.79E+02
+5.22E-01 2.67E+02
+5.32E-01 2.56E+02
+5.43E-01 2.45E+02
+5.54E-01 2.34E+02
+5.65E-01 2.24E+02
+5.77E-01 2.14E+02
+5.88E-01 2.05E+02
+6.00E-01 1.96E+02
+6.12E-01 1.87E+02
+6.25E-01 1.79E+02
+6.37E-01 1.71E+02
+6.50E-01 1.64E+02
+6.63E-01 1.57E+02
+6.77E-01 1.50E+02
+6.91E-01 1.43E+02
+7.04E-01 1.37E+02
+7.19E-01 1.31E+02
+7.33E-01 1.25E+02
+7.48E-01 1.20E+02
+7.63E-01 1.14E+02
+7.79E-01 1.09E+02
+7.94E-01 1.05E+02
+8.10E-01 9.99E+01
+8.27E-01 9.55E+01
+8.43E-01 9.12E+01
+8.60E-01 8.71E+01
+8.78E-01 8.32E+01
+8.96E-01 7.95E+01
+9.14E-01 7.59E+01
+9.32E-01 7.25E+01
+9.51E-01 6.92E+01
+9.70E-01 6.61E+01
+9.90E-01 6.31E+01
+1.01E+00 6.02E+01
+1.03E+00 5.74E+01
+1.05E+00 5.48E+01
+1.07E+00 5.23E+01
+1.09E+00 4.99E+01
+1.12E+00 4.76E+01
+1.14E+00 4.54E+01
+1.16E+00 4.33E+01
+1.18E+00 4.13E+01
+1.21E+00 3.94E+01
+1.23E+00 3.76E+01
+1.26E+00 3.59E+01
+1.28E+00 3.42E+01
+1.31E+00 3.26E+01
+1.34E+00 3.11E+01
+1.36E+00 2.96E+01
+1.39E+00 2.83E+01
+1.42E+00 2.69E+01
+1.45E+00 2.57E+01
+1.48E+00 2.45E+01
+1.51E+00 2.33E+01
+1.54E+00 2.22E+01
+1.57E+00 2.12E+01
+1.60E+00 2.02E+01
+1.63E+00 1.92E+01
+1.66E+00 1.83E+01
+1.70E+00 1.75E+01
+1.73E+00 1.66E+01
+1.77E+00 1.59E+01
+1.80E+00 1.51E+01
+1.84E+00 1.44E+01
+1.88E+00 1.37E+01
+1.91E+00 1.30E+01
+1.95E+00 1.24E+01
+1.99E+00 1.18E+01
+2.03E+00 1.13E+01
+2.07E+00 1.07E+01
+2.12E+00 1.02E+01
+2.16E+00 9.72E+00
+2.20E+00 9.25E+00
+2.25E+00 8.81E+00
+2.29E+00 8.38E+00
+2.34E+00 7.98E+00
+2.39E+00 7.59E+00
+2.43E+00 7.23E+00
+2.48E+00 6.88E+00
+2.53E+00 6.54E+00
+2.58E+00 6.23E+00
+2.64E+00 5.92E+00
+2.69E+00 5.64E+00
+2.74E+00 5.36E+00
+2.80E+00 5.10E+00
+2.86E+00 4.85E+00
+2.91E+00 4.62E+00
+2.97E+00 4.39E+00
+3.03E+00 4.18E+00
+3.09E+00 3.97E+00
+3.16E+00 3.78E+00
+3.22E+00 3.59E+00
+3.29E+00 3.42E+00
+3.35E+00 3.25E+00
+3.42E+00 3.09E+00
+3.49E+00 2.94E+00
+3.56E+00 2.79E+00
+3.63E+00 2.65E+00
+3.70E+00 2.52E+00
+3.78E+00 2.40E+00
+3.86E+00 2.28E+00
+3.93E+00 2.17E+00
+4.01E+00 2.06E+00
+4.09E+00 1.96E+00
+4.18E+00 1.86E+00
+4.26E+00 1.77E+00
+4.35E+00 1.68E+00
+4.44E+00 1.60E+00
+4.53E+00 1.52E+00
+4.62E+00 1.44E+00
+4.71E+00 1.37E+00
+4.80E+00 1.30E+00
+4.90E+00 1.24E+00
+5.00E+00 1.17E+00
+5.10E+00 1.12E+00
+5.21E+00 1.06E+00
+5.31E+00 1.01E+00
+5.42E+00 9.56E-01
+5.53E+00 9.08E-01
+5.64E+00 8.63E-01
+5.75E+00 8.19E-01
+5.87E+00 7.78E-01
+5.99E+00 7.39E-01
+6.11E+00 7.02E-01
+6.23E+00 6.66E-01
+6.36E+00 6.33E-01
+6.49E+00 6.01E-01
+6.62E+00 5.70E-01
+6.75E+00 5.42E-01
+6.89E+00 5.14E-01
+7.03E+00 4.88E-01
+7.17E+00 4.63E-01
+7.31E+00 4.40E-01
+7.46E+00 4.18E-01
+7.61E+00 3.96E-01
+7.77E+00 3.76E-01
+7.92E+00 3.57E-01
+8.08E+00 3.39E-01
+8.25E+00 3.22E-01
+8.41E+00 3.05E-01
+8.58E+00 2.90E-01
+8.76E+00 2.75E-01
+8.93E+00 2.61E-01
+9.11E+00 2.48E-01
+9.30E+00 2.35E-01
+9.48E+00 2.23E-01
+9.68E+00 2.12E-01
+9.87E+00 2.01E-01
+1.01E+01 1.91E-01
+1.03E+01 1.81E-01
+1.05E+01 1.71E-01
+1.07E+01 1.63E-01
+1.09E+01 1.54E-01
+1.11E+01 1.46E-01
+1.14E+01 1.39E-01
+1.16E+01 1.32E-01
+1.18E+01 1.25E-01
+1.21E+01 1.19E-01
+1.23E+01 1.12E-01
+1.25E+01 1.07E-01
+1.28E+01 1.01E-01
+1.31E+01 9.59E-02
+1.33E+01 9.10E-02
+1.36E+01 8.63E-02
+1.39E+01 8.18E-02
+1.41E+01 7.76E-02
+1.44E+01 7.36E-02
+1.47E+01 6.98E-02
+1.50E+01 6.62E-02
+1.53E+01 6.27E-02
+1.56E+01 5.95E-02
+1.60E+01 5.64E-02
+1.63E+01 5.35E-02
+1.66E+01 5.07E-02
+1.69E+01 4.81E-02
+1.73E+01 4.56E-02
+1.76E+01 4.32E-02
+1.80E+01 4.09E-02
+1.84E+01 3.88E-02
+1.87E+01 3.68E-02
+1.91E+01 3.49E-02
+1.95E+01 3.31E-02
+1.99E+01 3.13E-02
+2.03E+01 2.97E-02
+2.07E+01 2.81E-02
+2.11E+01 2.67E-02
+2.15E+01 2.53E-02
+2.20E+01 2.40E-02
+2.24E+01 2.27E-02
+2.29E+01 2.15E-02
+2.33E+01 2.04E-02
+2.38E+01 1.93E-02
+2.43E+01 1.83E-02
+2.48E+01 1.74E-02
+2.53E+01 1.64E-02
+2.58E+01 1.56E-02
+2.63E+01 1.48E-02
+2.68E+01 1.40E-02
+2.74E+01 1.33E-02
+2.79E+01 1.26E-02
+2.85E+01 1.19E-02
+2.91E+01 1.13E-02
+2.97E+01 1.07E-02
+3.03E+01 1.01E-02
+3.09E+01 9.58E-03
+3.15E+01 9.08E-03
+3.21E+01 8.60E-03
+3.28E+01 8.15E-03
+3.34E+01 7.72E-03
+3.41E+01 7.31E-03
+3.48E+01 6.92E-03
+3.55E+01 6.56E-03
+3.62E+01 6.21E-03
+3.70E+01 5.88E-03
+3.77E+01 5.57E-03
+3.85E+01 5.28E-03
+3.92E+01 5.00E-03
+4.00E+01 4.73E-03
+4.08E+01 4.48E-03
+4.17E+01 4.24E-03
+4.25E+01 4.02E-03
+4.34E+01 3.80E-03
+4.42E+01 3.60E-03
+4.51E+01 3.41E-03
+4.60E+01 3.23E-03
+4.70E+01 3.06E-03
+4.79E+01 2.90E-03
+4.89E+01 2.74E-03
+4.99E+01 2.60E-03
+5.09E+01 2.46E-03
+5.19E+01 2.33E-03
+5.30E+01 2.20E-03
+5.40E+01 2.08E-03
+5.51E+01 1.97E-03
+5.62E+01 1.87E-03
+5.74E+01 1.77E-03
+5.85E+01 1.67E-03
+5.97E+01 1.58E-03
+6.09E+01 1.50E-03
+6.22E+01 1.42E-03
+6.34E+01 1.34E-03
+6.47E+01 1.27E-03
+6.60E+01 1.20E-03
+6.73E+01 1.14E-03
+6.87E+01 1.08E-03
+7.01E+01 1.02E-03
+7.15E+01 9.65E-04
+7.29E+01 9.13E-04
+7.44E+01 8.64E-04
+7.59E+01 8.17E-04
+7.75E+01 7.73E-04
+7.90E+01 7.32E-04
+8.06E+01 6.92E-04
+8.22E+01 6.55E-04
+8.39E+01 6.19E-04
+8.56E+01 5.86E-04
+8.73E+01 5.54E-04
+8.91E+01 5.24E-04
+9.09E+01 4.96E-04
+9.27E+01 4.69E-04
+9.46E+01 4.44E-04
+9.65E+01 4.20E-04
+9.85E+01 3.97E-04
+1.00E+02 3.75E-04
+1.02E+02 3.55E-04
+1.05E+02 3.36E-04
+1.07E+02 3.17E-04
+1.09E+02 3.00E-04
+1.11E+02 2.84E-04
+1.13E+02 2.68E-04
+1.16E+02 2.54E-04
+1.18E+02 2.40E-04
+1.20E+02 2.27E-04
+1.23E+02 2.14E-04
+1.25E+02 2.02E-04
+1.28E+02 1.91E-04
+1.30E+02 1.81E-04
+1.33E+02 1.71E-04
+1.36E+02 1.62E-04
+1.38E+02 1.53E-04
+1.41E+02 1.44E-04
+1.44E+02 1.36E-04
+1.47E+02 1.29E-04
+1.50E+02 1.22E-04
+1.53E+02 1.15E-04
+1.56E+02 1.08E-04
+1.59E+02 1.02E-04
+1.62E+02 9.68E-05
+1.66E+02 9.14E-05
+1.69E+02 8.63E-05
+1.72E+02 8.15E-05
+1.76E+02 7.69E-05
+1.79E+02 7.26E-05
+1.83E+02 6.85E-05
+1.87E+02 6.47E-05
+1.90E+02 6.11E-05
+1.94E+02 5.76E-05
+1.98E+02 5.44E-05
+2.02E+02 5.13E-05
+2.06E+02 4.84E-05
+2.11E+02 4.56E-05
+2.15E+02 4.31E-05
+2.19E+02 4.06E-05
+2.24E+02 3.83E-05
+2.28E+02 3.61E-05
+2.33E+02 3.40E-05
+2.37E+02 3.21E-05
+2.42E+02 3.02E-05
+2.47E+02 2.85E-05
+2.52E+02 2.68E-05
+2.57E+02 2.53E-05
+2.62E+02 2.38E-05
+2.68E+02 2.24E-05
+2.73E+02 2.11E-05
+2.79E+02 1.99E-05
+2.84E+02 1.87E-05
+2.90E+02 1.76E-05
+2.96E+02 1.66E-05
+3.02E+02 1.56E-05
+3.08E+02 1.47E-05
+3.14E+02 1.38E-05
+3.20E+02 1.30E-05
+3.27E+02 1.22E-05
+3.34E+02 1.15E-05
+3.40E+02 1.08E-05
+3.47E+02 1.02E-05
+3.54E+02 9.55E-06
+3.61E+02 8.98E-06
+3.69E+02 8.43E-06
+3.76E+02 7.92E-06
+3.84E+02 7.44E-06
+3.91E+02 6.99E-06
+3.99E+02 6.56E-06
+4.07E+02 6.16E-06
+4.16E+02 5.78E-06
+4.24E+02 5.42E-06
+4.33E+02 5.09E-06
+4.41E+02 4.77E-06
+4.50E+02 4.48E-06
+4.59E+02 4.20E-06
+4.69E+02 3.94E-06
+4.78E+02 3.69E-06
+4.88E+02 3.46E-06
+4.98E+02 3.25E-06
+5.08E+02 3.04E-06
+5.18E+02 2.85E-06
+5.28E+02 2.67E-06
+5.39E+02 2.50E-06
+5.50E+02 2.34E-06
+5.61E+02 2.20E-06
+5.72E+02 2.06E-06
+5.84E+02 1.93E-06
+5.96E+02 1.81E-06
+6.08E+02 1.69E-06
+6.20E+02 1.58E-06
+6.32E+02 1.48E-06
+6.45E+02 1.39E-06
diff --git a/barcoderunner.cc b/barcoderunner.cc
new file mode 100644
index 0000000..624ce0b
--- /dev/null
+++ b/barcoderunner.cc
@@ -0,0 +1,541 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+#include "struct_main.h"
+#include "fftw_array.h"
+
+#include <math.h>
+#include <iomanip>
+#include <string.h>
+#include <cassert>
+#include <sstream>
+#include <algorithm>  // std::max_element
+
+#include <gsl/gsl_randist.h>
+#include <gsl/gsl_integration.h>
+
+#include "planck/paramfile.h"
+
+#include "transf.h"
+#include "field_statistics.h"
+#include "Lag2Eul.h"
+#include "IOfunctions.h"
+#include "IOfunctionsGen.h"
+#include "math_funcs.h"
+#include "protocol.h"
+#include "sample_maker.h"
+
+#include "convenience.h"
+
+#include "HMC_models.h"
+
+using namespace std;
+
+void setup_random_test(struct DATA *data, real_prec *delta_lag, real_prec *delta_eul, unsigned int facL,
+                       real_prec *posx, real_prec *posy, real_prec *posz, gsl_rng *gBaseRand)
+{
+  struct NUMERICAL *n = data->numerical;
+  struct OBSERVATIONAL *o = data->observational;
+  struct COSMOLOGY *c = data->cosmology;
+
+  fftw_array<real_prec> kmode(n->N_bin), power(n->N_bin);
+
+  // lagrangian
+  create_GARFIELD(n->N1, n->N2, n->N3, n->L1, n->L2, n->L3, delta_lag, o->Power, gBaseRand);
+
+  //dump_scalar(delta_lag, n->N1, n->N2, n->N3, n->L1, n->L2, n->L3, 0, n->dir + string("deltaLAGtest"));
+  measure_spectrum(n->N1, n->N2, n->N3, n->L1, n->L2, n->L3, delta_lag, kmode, power, n->N_bin);
+  dump_measured_spec(kmode, power, n->dir + string("specLAGtest.dat"), n->N_bin);
+
+  // eulerian
+  bool reggrid=true;
+  real_prec kernel_scale = SPH_kernel_scale<struct DATA>(data);
+  //real_prec kernel_scale;
+  //if (n->mk == 3)
+    //kernel_scale = n->particle_kernel_h;
+  //else
+    //kernel_scale = 0;
+
+  if (n->random_test_rsd)
+  {
+    // cout << " **** random RSD test! **** " << endl;
+    Lag2Eul_rsd_zeldovich(delta_lag, delta_eul, posx, posy, posz, n->N1, n->N2, n->N3, n->L1, n->L2, n->L3, n->d1,
+                          n->d2, n->d3, n->xllc, n->yllc, n->zllc, c->D1, c->ascale, c->omega_m, c->omega_q, n->mk,
+                          facL,
+                          reggrid, gBaseRand, kernel_scale, n->xobs, n->yobs, n->zobs, n->planepar, n->periodic,
+                          n->R2Cplan, n->C2Rplan);
+  }
+  else
+  {
+    // cout << " **** normal random test! **** " << endl;
+    Lag2Eul(delta_lag, delta_eul, posx, posy, posz, n->N1, n->N2, n->N3, n->L1, n->L2, n->L3, n->d1, n->d2, n->d3,
+            n->xllc, n->yllc, n->zllc, c->D1, c->D2, c->ascale, c->omega_m, c->omega_q, n->sfmodel, n->mk, n->slength, facL,
+            reggrid, gBaseRand, n->dir, kernel_scale, n->R2Cplan, n->C2Rplan);
+  }
+
+  //dump_scalar(delta_eul, n->N1, n->N2, n->N3, n->L1, n->L2, n->L3, 0, n->dir + string("deltaEULtest"));
+  dump_deltas(data, delta_lag, delta_eul, string("test"));
+  //measure_and_dump_spectrum(delta_eul, N1, N2, N3, L1, L2, L3, N_bin, n->dir + string("specEULtest.dat")); // build this function
+  measure_spectrum(n->N1, n->N2, n->N3, n->L1, n->L2, n->L3, delta_eul, kmode, power, n->N_bin);
+  dump_measured_spec(kmode, power,  n->dir + string("specEULtest.dat"), n->N_bin);
+
+  // window
+  switch (n->window_type)
+  {
+    case 1: // fill with ones
+      fill_one(o->window, n->N);
+      break;
+    case 10: // fill half with ones, half with zeros (for testing)
+      fill_one(o->window, n->N);
+      fillZero(o->window, n->N/2);
+      break;
+    case 23: // put zeros where delta_eul > 3, ones otherwise
+      for (ULONG i = 0; i < n->N; ++i)
+      {
+        if (delta_eul[i] > 3)
+          o->window[i] = 1;
+        else
+          o->window[i] = 0;
+      }
+      break;
+    default:
+      stringstream message;
+      message << "in barcoderunner: window_type = " << n->window_type << " is not a valid choice!";
+      throw BarcodeException(message.str());
+  }
+  dump_scalar(o->window, n->N1, n->N2, n->N3, n->dir + string("win"));
+
+  // nobs and noise_sf
+  switch (n->data_model)
+  {
+    case 0: // linear data model 
+#ifdef MULTITHREAD_RNG
+#pragma omp parallel for
+#endif // MULTITHREAD_RNG
+      for(ULONG i=0;i<n->N;i++)
+      {
+        real_prec Lambda = o->rho_c * (num_1+delta_eul[i]);
+        if (o->window[i]>0.)
+        { 
+          switch (n->likelihood)
+          {
+            case 0: // Poissonian likelihood
+              o->nobs[i]=static_cast<real_prec>(gsl_ran_poisson(gBaseRand,Lambda));
+              break;
+            case 1: // Gaussian likelihood
+              {
+                real_prec sigma_obs = o->sigma_min + o->sigma_fac*Lambda;
+                o->noise_sf[i] = sigma_obs;
+
+                real_prec nobs = Lambda + static_cast<real_prec>(GR_NUM(gBaseRand, sigma_obs, 0));
+                // Keep above 0 (or not):
+                if (!n->negative_obs && nobs < 0) {
+                  nobs = 0;
+                }
+                o->nobs[i] = nobs;
+              }
+              break;
+            case 3: // GRF
+              {
+                // sigma quadratic in terms of lambda (test)
+                real_prec sigma_obs = o->sigma_min + o->sigma_fac * gsl_pow_2(delta_lag[i]);
+                o->noise_sf[i] = sigma_obs;
+
+                o->nobs[i] = delta_lag[i] + static_cast<real_prec>(GR_NUM(gBaseRand, sigma_obs, 0));
+              }
+              break;
+            default:
+              throw BarcodeException("in barcoderunner: linear data model was chosen (additive error), but incompatible likelihood!");
+          }
+        }
+        else
+          o->nobs[i] = static_cast<real_prec>(0.0);
+      }
+      break;
+    case 1: // log-normal data model
+#ifdef MULTITHREAD_RNG
+#pragma omp parallel for
+#endif // MULTITHREAD_RNG 
+      for (ULONG i = 0; i < n->N; i++)
+      {
+        real_prec delta_i = delta_eul[i];
+        real_prec Lambda = lognormal_likelihood_f_delta_x_i_calc(o->rho_c, o->delta_min, delta_i);
+
+        if (o->window[i]>0.)
+        { 
+          real_prec sigma_obs = o->sigma_fac;
+          o->noise_sf[i] = sigma_obs;
+
+          real_prec nobs = Lambda + static_cast<real_prec>(GR_NUM(gBaseRand, sigma_obs, 0));
+          o->nobs[i] = nobs;
+        }
+        else
+          o->nobs[i] = static_cast<real_prec>(log(gsl_pow_2(o->rho_c * (1 + o->delta_min)))); // huh, why squared?
+      }
+      break;
+    default:
+      stringstream message;
+      message << "in barcoderunner: data_model = " << n->data_model << " is not a valid choice!";
+      throw BarcodeException(message.str());
+  }
+
+  // Gaussian (RSD) and GRF likelihood can't have zero noise terms, or you'll get NaNs.
+  if ((n->likelihood == 1) || (n->likelihood == 3))
+    for (ULONG i = 0; i < n->N; i++)
+      if (o->window[i] > 0)
+        if (o->noise_sf[i] == 0.) {
+          stringstream message;
+          message << "in barcoderunner(): noise = 0 found! Index " << i;
+          throw BarcodeException(message.str());
+        }
+
+  dump_scalar(o->nobs, n->N1, n->N2, n->N3, n->dir + string("nobs"));
+  dump_scalar(o->noise_sf, n->N1, n->N2, n->N3, n->dir + string("sigma"));
+
+  measure_spectrum(n->N1, n->N2, n->N3, n->L1, n->L2, n->L3,o->nobs,kmode,power, n->N_bin);
+  dump_measured_spec(kmode,power, n->dir + string("spec_nobs.dat"), n->N_bin);
+}
+
+void make_initial_guess(struct DATA *data, gsl_rng *gBaseRand)
+{
+  struct NUMERICAL *n = data->numerical;
+  struct OBSERVATIONAL *o = data->observational;
+  // Setting the initial guess in Lagrangian (signal) space 
+  switch (n->initial_guess)
+  {
+    case 0: // zero initial guess 
+      fillZero(o->signal, n->N);
+      break;
+    case 1: // initial guess from file 
+      get_scalar(n->dir + n->initial_guess_file, o->signal, n->N1, n->N2, n->N3);
+      break;
+    case 2: // GRF initial guess 
+      create_GARFIELD(n->N1, n->N2, n->N3, n->L1, n->L2, n->L3, o->signal, o->Power, gBaseRand);
+      break;
+    case 3: // smoothed GRF initial guess 
+      {
+        create_GARFIELD(n->N1, n->N2, n->N3, n->L1, n->L2, n->L3, o->signal, o->Power, gBaseRand);
+        int filtertype = n->initial_guess_smoothing_type;
+        real_prec smoothing_length = n->initial_guess_smoothing_scale;
+        kernelcomp(n->L1, n->L2, n->L3, n->N1, n->N2, n->N3, smoothing_length, filtertype, data);
+        convcomp(n->N1, n->N2, n->N3, o->signal, o->signal, smoothing_length, n->dir);
+      }
+      break;
+    case 4: // zero plus some random noise
+      {
+        fillZero(o->signal, n->N);
+#ifdef MULTITHREAD_RNG
+#pragma omp parallel for
+#endif // MULTITHREAD_RNG
+        for (ULONG i = 0; i < n->N; ++i)
+          o->signal[i] += static_cast<real_prec>(gsl_ran_gaussian(gBaseRand, 1.e-1));
+      }
+      break;
+    default:
+      stringstream message;
+      message << "In barcoderunner: invalid choice of initial_guess (" << n->initial_guess << ")!";
+      throw BarcodeException(message.str());
+  }
+}
+
+unsigned int initial_iteration_number(struct DATA *data)
+{
+  struct NUMERICAL *n = data->numerical;
+
+  unsigned resnum = 0;
+#ifdef RESTART_FILE
+  string fname= n->dir + string("restart");
+  ifstream inStream;
+  inStream.open(fname.data());
+
+  if (inStream.is_open() == true )
+  {
+    cout<<endl;
+    cout<<"attention: restart command!"<<endl;
+
+    string fname2= n->dir + string("restart.prt");
+    ifstream inStream2;
+    inStream2.open(fname2.data());
+    assert(inStream2.is_open());
+
+    inStream2 >> resnum;
+
+    inStream2.close();
+#else // RESTART_FILE, i.e. the following is for when RESTART_FILE is not defined:
+  if (n->start_at > 0)
+  {
+    resnum = n->start_at;
+#endif // RESTART_FILE
+    cout<<"restart file: "<<resnum<<endl;
+  }
+
+  return(resnum);
+}
+
+
+void load_initial_fields(struct DATA *data, ULONG resnum, gsl_rng *gBaseRand)
+{
+  struct NUMERICAL *n = data->numerical;
+  struct OBSERVATIONAL *o = data->observational;
+
+  if (resnum > 0) // restart from existing snapshots
+  {
+    {
+      string fname_res= n->dir + string("deltaLAG");
+      int bmax_res=100;
+      char buffer_res[bmax_res];
+      sprintf(buffer_res,"_%lu",resnum);
+      string FileName_res=string(fname_res)+buffer_res;
+      char * file_res;
+      file_res = new char[FileName_res.length() + 1];
+      strcpy(file_res, FileName_res.c_str());
+      get_scalar(file_res, o->signal, n->N1, n->N2, n->N3);
+      delete[] file_res;
+    }
+
+    get_scalar( n->dir + string("win"), o->window, n->N1, n->N2, n->N3);
+    get_scalar( n->dir + string("nobs"), o->nobs, n->N1, n->N2, n->N3);
+    get_scalar( n->dir + string("sigma"),  o->noise_sf,  n->N1,  n->N2,  n->N3);
+  }
+  else
+  {
+    if (n->random_test)
+    {
+      fftw_array<real_prec> delta_lag(n->N), delta_eul(n->N);
+      unsigned facL=1; // if you change this, you have to change the size of the pos arrays as well
+      fftw_array<real_prec> posx(n->N),posy(n->N),posz(n->N);
+      setup_random_test(data, delta_lag, delta_eul, facL, posx, posy, posz, gBaseRand);
+    }
+    else
+    {
+      get_scalar( n->dir + string("win"),  o->window, n->N1, n->N2, n->N3);
+      get_scalar( n->dir + string("nobs"),  o->nobs, n->N1, n->N2, n->N3);
+      get_scalar( n->dir + string("sigma"),  o->noise_sf, n->N1, n->N2, n->N3);
+    }
+
+    make_initial_guess(data, gBaseRand); // in lagrangian (signal) space
+    // dump initial guess:
+    dump_scalar(o->signal, n->N1, n->N2, n->N3, n->dir + string("initial_guess"));
+    fftw_array<real_prec> kmode(n->N_bin), power(n->N_bin);
+    measure_spectrum(n->N1, n->N2, n->N3, n->L1, n->L2, n->L3, o->signal,kmode,power,n->N_bin);
+    dump_measured_spec(kmode,power, n->dir + string("spec_initial_guess.dat"), n->N_bin);
+  }
+
+  // The signal in Eulerian space doesn't need to be set, the
+  // first time it will be used it will be filled with Lag2Eul first.
+  // But, let's set it to zero, just for fun.
+  fillZero( o->signalX, n->N);
+  // EGP: that was fun!
+
+  // Do remember to do the above when you need it! For instance, I forgot to do
+  // Lag2Eul inside the likeli_force_1st_order_diagonal_mass function and before
+  // that function no Lag2Eul calls have been made yet and posx,y,z were set
+  // initially to zero, so that gave crap results.
+  // I considered filling the arrays correctly here already, but actually the
+  // pos arrays are not passed along to the HAMIL_DATA struct.
+}
+
+
+void initialize_performance_log(struct DATA *data, ULONG resnum)
+{
+  if (resnum == 0)
+    data->numerical->performance_log.open("performance_log.txt");
+  else
+    data->numerical->performance_log.open("performance_log.txt", ofstream::out | ofstream::app); // "out" for writing (must be specified), "app" for appending instead of overwriting (necessary at restart)
+
+  assert(data->numerical->performance_log.is_open());
+  if (resnum == 0)
+  {
+    data->numerical->performance_log << "accepted\tepsilon\tNeps\tdH\tdK\tdE\tdprior\tdlikeli\t";
+    data->numerical->performance_log << "psi_prior_i\tpsi_prior_f\tpsi_likeli_i\tpsi_likeli_f\tH_kin_i\tH_kin_f" << endl;
+  }
+}
+
+
+
+void barcoderunner(struct DATA *data,gsl_rng * gBaseRand)
+{
+  struct NUMERICAL *n = data->numerical;
+  struct OBSERVATIONAL *o = data->observational;
+
+  // prepare smoothing kernels and transfer functions
+  {
+    real_prec kthsc=n->slength;//attention!!
+    int ftype=1;//attention!!
+
+    kernelcomp(n->L1, n->L2, n->L3, n->N1, n->N2, n->N3, n->slength, ftype, data);
+
+    if (kthsc!=n->slength)
+      kernelcomp(n->L1, n->L2, n->L3, n->N1, n->N2, n->N3, kthsc, ftype, data);
+
+#ifdef TRANSF
+    int sftype;
+    switch (n->sfmodel)
+    {
+      case 0:
+        {
+          cout<<"does not apply"<<endl;
+        }
+        break;
+      case 1:
+        {
+          sftype=1;
+          transflpt(n->N1, n->N2, n->N3, n->L1, n->L2, n->L3, n->d1, n->d2, n->d3,n->slength, sftype, n->dir);
+        }
+        break;
+      case 2: // same as case 3
+      case 3:
+        {
+          sftype=1;
+          transflpt(n->N1, n->N2, n->N3, n->L1, n->L2, n->L3, n->d1, n->d2, n->d3,n->slength, sftype, n->dir);
+          sftype=2;
+          transflpt(n->N1, n->N2, n->N3, n->L1, n->L2, n->L3, n->d1, n->d2, n->d3,n->slength, sftype, n->dir);
+        }
+        break;
+    }
+#endif
+  }
+  // END prepare smoothing kernels and transfer functions
+
+  unsigned resnum = initial_iteration_number(data); // restart number (0 if not restarting)
+  unsigned iistart = resnum + 1;
+
+  // read out possibly pre-existing log to set rejections and epsilon back to
+  // what they were before restarting
+  ifstream plog_read ("performance_log.txt");
+  auto plog_line_count = count(istreambuf_iterator<char>(plog_read),
+                               istreambuf_iterator<char>(), '\n');
+  plog_read.seekg(0, plog_read.beg);  // move "cursor" back to start of stream
+
+  if (plog_line_count > 0 && resnum > 0)
+  {
+    cout << "Performance log found with " << plog_line_count << " lines. Reading..." << endl;
+    unsigned accepted = 0; // counter for accepted steps
+    unsigned acc; // dummy
+    string line;
+    std::vector<std::string> plog_row;
+    ULONG ix_leapfrog;
+    real_prec epsilon_convert;
+    while ( std::getline(plog_read, line) && (accepted < resnum) )
+    {
+      plog_row = split(line, '\t');
+      std::istringstream acc_ss(plog_row[0]);
+      acc_ss >> acc;
+      if (acc == 0)
+        ++n->rejections;
+      else
+      {
+        ++accepted;
+      }
+
+      // update epsilon circular buffer
+      ix_leapfrog = n->rejections + accepted - 1;
+      std::istringstream eps_ss(plog_row[1]);
+      eps_ss >> epsilon_convert;
+      n->epsilon_N_a[ix_leapfrog % n->N_a_eps_update] = epsilon_convert;
+    }
+
+    std::cout << "accepted/rejected steps reset to " << accepted
+              << "/" << n->rejections << std::endl;
+
+    // reset eps_fac to what it was before restart
+    switch (n->eps_fac_update_type) {
+      case 0:
+      break;
+      case 1:
+      {
+        ULONG total_steps_before_restart = resnum + n->rejections;
+        ULONG updates = total_steps_before_restart / n->s_eps_total;
+        for (ULONG i = 0; i < updates; ++i)
+        {
+          n->eps_fac = power_mean(n->eps_fac, n->eps_fac_target, n->eps_fac_power);
+          cout << "  updating eps_fac to " << n->eps_fac << endl;
+        }
+      }
+      break;
+      case 2:
+      case 3:
+      {
+        // Not actually exact, but probably close enough. Will give a slightly
+        // smaller epsilon initially, leading to a slightly higher initial
+        // acceptance rate than before the restart. Already a lot better than
+        // wasting hours due to ~0 initial acceptance rate.
+        n->eps_fac = *std::max_element(n->epsilon_N_a.begin(),
+                                       n->epsilon_N_a.end());
+        std::cout << "Maximum epsilon (eps_fac) reset to " << n->eps_fac << std::endl;
+      }
+      break;
+    }
+
+  }
+  plog_read.close();
+
+  load_initial_fields(data, resnum, gBaseRand);
+
+  INIT_PROTOCOL_CONV(data);
+  INIT_PROTOCOL_SPEC(data);
+
+  fftw_array<real_prec> kmode(n->N_bin), power(n->N_bin);
+
+  initialize_performance_log(data, resnum);
+
+  // start MCMC iterations 
+  for (ULONG ii=iistart;ii<=n->N_Gibbs;ii++)
+  {
+    // if (n->N_Gibbs>0)
+      // cout<<"\n"<<">>> MCMC-sampling iterations: "<<ii<<endl;
+
+    // set MCMC iteration counter
+
+    n->iGibbs=ii;
+
+    string fnamefe= n->dir + string("fastexit");
+    ifstream inStreamfe;
+    inStreamfe.open(fnamefe.data());
+
+    if (inStreamfe.is_open())
+    {
+      stringstream message;
+      message << "attention: fast exit command!";
+      throw BarcodeException(message.str());
+    }
+    inStreamfe.close();
+
+    bool write_output = 0==ii%n->outnum || ii <= 10;
+    bool write_ps_output = 0==ii%n->outnum_ps || ii <= 10;
+
+#ifdef RESTART_FILE
+    if (( write_output || ii==1) && ii>1)
+      PROTOCOL_RESTART(ii-1, data);
+#endif // RESTART_FILE
+
+    sample_maker(data,gBaseRand);
+
+    if (write_output)
+    {
+      int bmax = 100;
+      char buffer[bmax];  	    
+      sprintf(buffer, "_%lu", ii);
+      dump_deltas(data, o->signal, o->signalX, buffer);
+    }
+
+    if (write_ps_output)
+    {
+      measure_spectrum(n->N1, n->N2, n->N3, n->L1, n->L2, n->L3, o->signal, kmode, power, n->N_bin);
+      dump_ps_it(data,kmode,power);
+    }
+
+    // ULONG total_steps = n->iGibbs + n->rejections;
+
+    // real_prec accepted_steps_f = static_cast<real_prec>(n->iGibbs);
+    // real_prec total_steps_f = static_cast<real_prec>(total_steps);
+    // real_prec acceptance_rate = accepted_steps_f/total_steps_f;
+
+    // cout << "***** Current acceptance rate: " << acceptance_rate << endl;
+  }
+}
diff --git a/barcoderunner.h b/barcoderunner.h
new file mode 100644
index 0000000..faeefd5
--- /dev/null
+++ b/barcoderunner.h
@@ -0,0 +1,20 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+#pragma once
+
+#include "struct_main.h"
+#include <gsl/gsl_rng.h>
+#include "define_opt.h"
+
+void barcoderunner(struct DATA *data,gsl_rng * gBaseRand);
+void load_initial_fields(struct DATA *data, int resnum, gsl_rng *gBaseRand);
+unsigned int initial_iteration_number(struct DATA *data);
+void setup_random_test(struct DATA *data, real_prec *delta_lag, real_prec *delta_eul, unsigned int facL,
+                       real_prec *posx, real_prec *posy, real_prec *posz, gsl_rng *gBaseRand);
+void make_initial_guess(struct DATA *data, gsl_rng *gBaseRand);
diff --git a/calc_power.cc b/calc_power.cc
new file mode 100644
index 0000000..06d848a
--- /dev/null
+++ b/calc_power.cc
@@ -0,0 +1,107 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#include "define_opt.h"
+#include "struct_main.h"
+#include "fftw_array.h"
+
+#include <math.h>
+#include <iomanip>
+#include <cassert>
+
+#include <gsl/gsl_spline.h>
+#include <gsl/gsl_integration.h>
+#include <gsl/gsl_sf.h>
+
+#include "cosmo.h"
+#include "IOfunctionsGen.h"
+#include "math_funcs.h"
+
+#include "convenience.h"
+
+using namespace std;
+
+void readtab(struct DATA *data)
+{
+  real_prec L1=data->numerical->L1, L2=data->numerical->L2, L3=data->numerical->L3;
+  unsigned N1=data->numerical->N1, N2=data->numerical->N2, N3=data->numerical->N3;
+  //ULONG N=N1*N2*N3;
+
+  string fnamePS=data->numerical->fnamePS;
+
+  ULONG nbinPS=count_lines(fnamePS);
+
+  fftw_array<float> kPS(nbinPS);
+  fftw_array<float> powPS(nbinPS);
+
+  cout<<"---> reading from ... "<<fnamePS<<endl;
+
+  ifstream inStreamPS;
+  inStreamPS.open(fnamePS.data());
+  assert(inStreamPS.is_open());
+  for(ULONG i=0;i<nbinPS;i++)
+  {
+    inStreamPS >>kPS[i]>>powPS[i];
+    if (i<10)
+      cout<<kPS[i]<<" "<<powPS[i]<<endl;
+  }
+  inStreamPS.close();
+
+  fftw_array<gsl_real> kPSg(nbinPS);
+  fftw_array<gsl_real> powPSg(nbinPS);
+
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD
+  for(ULONG i=0;i<nbinPS;i++)
+  {
+    kPSg[i]=static_cast<gsl_real>(kPS[i]);
+    powPSg[i]=static_cast<gsl_real>(powPS[i]);
+  }
+
+  //EGP  real_prec kmax=sqrt(k_squared(N1/2,N2/2,N3/2,L1,L2,L3,N1,N2,N3));
+  //EGP  real_prec dk=static_cast<real_prec>(kmax/real_prec(nbinPS));
+
+  gsl_interp *interpolation = gsl_interp_alloc (gsl_interp_linear,nbinPS);
+  gsl_interp_init(interpolation,kPSg,powPSg,nbinPS);
+  gsl_interp_accel * accelerator = gsl_interp_accel_alloc();
+
+  //EGP  real_prec Vol=L1*L2*L3;
+  //EGP  real_prec CONT_NORM=Vol;
+  //EGP  real_prec Norm=1.0;
+
+  //EGP  #ifdef          FOURIER_DEF_1
+  //EGP   Norm=static_cast<real_prec>(1./CONT_NORM);
+  //EGP #endif
+
+  //EGP #ifdef          FOURIER_DEF_2
+  //EGP   Norm=static_cast<real_prec>(real_prec(N)*real_prec(N)/CONT_NORM);
+  //EGP #endif
+
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD
+  for(unsigned i=0;i<N1;i++)
+    for(unsigned j=0;j<N2;j++)
+      for(unsigned k=0;k<N3;k++)
+      {
+        real_prec k2=k_squared(i,j,k,L1,L2,L3,N1,N2,N3);
+        gsl_real ktot=sqrt(k2);
+
+        ULONG iid=k+N3*(j+N2*i);
+
+        data->observational->Power[iid]=static_cast<real_prec>(gsl_interp_eval(interpolation,kPSg,powPSg,ktot,accelerator));//EGP *Norm);
+
+        if(i==0 && j==0 && k==0)
+          data->observational->Power[iid]=0.;
+      }
+  gsl_interp_accel_free (accelerator);
+  gsl_interp_free(interpolation);
+}
+
diff --git a/calc_power.h b/calc_power.h
new file mode 100644
index 0000000..713bf70
--- /dev/null
+++ b/calc_power.h
@@ -0,0 +1,12 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#pragma once
+
+void readtab(struct DATA *data);
diff --git a/call_hamil.cc b/call_hamil.cc
new file mode 100644
index 0000000..2acd36c
--- /dev/null
+++ b/call_hamil.cc
@@ -0,0 +1,64 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+#include "define_opt.h"
+#include "struct_hamil.h"
+#include "HMC.h"
+
+#include "fftw_array.h"
+
+
+void call_hamil(struct DATA *data, gsl_rng * seed) {
+  struct NUMERICAL *dn = data->numerical;
+
+  unsigned N1 = dn->N1;
+  unsigned N2 = dn->N2;
+  unsigned N3 = dn->N3;
+
+  ULONG  N = N1*N2*N3;
+
+  // // prepare SPH kernel stuff
+  // int particle_kernel_type = data->numerical->particle_kernel;
+  // real_prec particle_kernel_h = data->numerical->particle_kernel_h;
+  // real_prec d1 = data->numerical->d1;
+  // real_prec d2 = data->numerical->d2;
+  // real_prec d3 = data->numerical->d3;
+  // int N_cells = SPH_kernel_3D_cells_count(particle_kernel_type,
+  //                                         particle_kernel_h, d1, d2, d3);
+  // std::vector<int> cells_i, cells_j, cells_k;
+  // SPH_kernel_3D_cells(particle_kernel_type, particle_kernel_h, d1, d2, d3,
+  //                     cells_i, cells_j, cells_k);
+
+  // set arrays
+  fftw_array<real_prec> A(N), B_f(N), B_r(N), C(N), D(N), E(N);
+
+  struct HAMIL_DATA *hamil_data;
+  // memory allocation & initialization (== construction)
+  hamil_data = new HAMIL_DATA(data, A, B_f, B_r, C, D, E, dn->kernel_cells_i,
+                              dn->kernel_cells_j, dn->kernel_cells_k,
+                              dn->N_cells);
+
+  if (hamil_data == NULL) {
+    delete hamil_data;
+    throw BarcodeException("Hamiltonian: error allocating memory....");
+  }
+
+
+  HamiltonianMC(hamil_data, seed, data);
+
+  // hand over inversion success variable
+  dn->INV_SUCCESS = hamil_data->numerical->INV_SUCCESS;
+
+  // EGP: hand over rejections
+  dn->rejections += hamil_data->numerical->rejections;
+
+  // hand back possibly change eps_fac
+  dn->eps_fac = hamil_data->numerical->eps_fac;
+
+  delete hamil_data;
+}
diff --git a/call_hamil.h b/call_hamil.h
new file mode 100644
index 0000000..c595ccc
--- /dev/null
+++ b/call_hamil.h
@@ -0,0 +1,11 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+#pragma once
+
+void call_hamil(struct DATA *data, gsl_rng * seed);
diff --git a/cmake/Modules/AddDefaultBinary.cmake b/cmake/Modules/AddDefaultBinary.cmake
new file mode 100644
index 0000000..39ddd1b
--- /dev/null
+++ b/cmake/Modules/AddDefaultBinary.cmake
@@ -0,0 +1,39 @@
+#
+# Barcode
+# Copyright E.G.P. Bos and F.S. Kitaura
+#
+# Distributed under the terms of the MIT License.
+# The full license is in the file LICENSE, distributed with this software.
+#
+
+function(ADD_DEFAULT_BINARY binary_name source)
+    add_executable(${binary_name} ${source})
+
+    # optional argument: library dependencies, barlib by default
+    if (ARG3)
+        set(LIBS ${ARG3})
+    else()
+        set(LIBS barlib)
+    endif(ARG3)
+
+    target_link_libraries(${binary_name} ${LIBS} ${GSL_LIBRARIES} m ncurses)
+
+    if (DOUBLE_PREC)
+        target_link_libraries(${binary_name} ${FFTW_DOUBLE_LIB})
+        if (MULTITHREAD_FFTW)
+            target_link_libraries(${binary_name} ${FFTW_DOUBLE_OPENMP_LIB})
+        endif()
+    elseif(SINGLE_PREC)
+        target_link_libraries(${binary_name} ${FFTW_FLOAT_LIB})
+        if (MULTITHREAD_FFTW)
+            target_link_libraries(${binary_name} ${FFTW_FLOAT_OPENMP_LIB})
+        endif()
+    endif()
+
+    if (NOT INTEL)
+        if (MULTITHREAD)
+            target_link_libraries(${binary_name} gomp)
+            target_compile_options(${binary_name} PUBLIC -fopenmp)
+        endif()
+    endif()
+endfunction(ADD_DEFAULT_BINARY)
diff --git a/cmake/downloadFindFFTW.cmake.in b/cmake/downloadFindFFTW.cmake.in
new file mode 100644
index 0000000..ac2338a
--- /dev/null
+++ b/cmake/downloadFindFFTW.cmake.in
@@ -0,0 +1,16 @@
+cmake_minimum_required(VERSION 2.8.2)
+
+project(findFFTW-download NONE)
+
+include(ExternalProject)
+
+ExternalProject_Add(findFFTW_download
+    GIT_REPOSITORY    "https://github.com/egpbos/findfftw.git"
+    CONFIGURE_COMMAND ""
+    BUILD_COMMAND     ""
+    INSTALL_COMMAND   ""
+    TEST_COMMAND      ""
+    SOURCE_DIR        "${CMAKE_CURRENT_BINARY_DIR}/findFFTW-src"
+    BINARY_DIR        ""
+    INSTALL_DIR       ""
+)
diff --git a/cmake/hg_rev_filename.sh b/cmake/hg_rev_filename.sh
new file mode 100644
index 0000000..ab20d03
--- /dev/null
+++ b/cmake/hg_rev_filename.sh
@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+#
+# Barcode
+# Copyright E.G.P. Bos and F.S. Kitaura
+#
+# Distributed under the terms of the MIT License.
+# The full license is in the file LICENSE, distributed with this software.
+#
+
+PROJECT_BINARY_DIR=$1
+OUTNAME=$2
+
+#echo "$0"
+#echo "$1"
+#echo "$2"
+
+OLD_PATH="${PROJECT_BINARY_DIR}/${OUTNAME}"
+
+LATEST_PATH="${PROJECT_BINARY_DIR}/${OUTNAME}_latest_build"
+
+HG_REV=$(hg parents --template '{rev}')
+
+if [[ $(hg status 2>/dev/null) ]]
+then
+    NEW_PATH="${PROJECT_BINARY_DIR}/${OUTNAME}_r${HG_REV}"_uncommitted
+else
+    NEW_PATH="${PROJECT_BINARY_DIR}/${OUTNAME}_r${HG_REV}"
+fi
+
+mv "$OLD_PATH" "$NEW_PATH"
+rm "$LATEST_PATH"
+ln -s "$NEW_PATH" "$LATEST_PATH"
+
+#echo "Renamed output binary ${OLD_PATH} to ${NEW_PATH} and created symbolic link barcode_latest_build to new path."
diff --git a/convenience.cc b/convenience.cc
new file mode 100644
index 0000000..a9ad01e
--- /dev/null
+++ b/convenience.cc
@@ -0,0 +1,269 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+#include <cmath>
+#include <iostream>
+#include <sstream>
+#include <vector>
+
+#include "./define_opt.h"
+
+// using namespace std;
+
+// TODO: almost all functions are in fact array functions. Rename file to array.cc and move other stuff out (absolute_squared and split functions)
+
+void copyArray(real_prec *from, real_prec *to, ULONG size) {
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif  // MULTITHREAD
+  for (ULONG i = 0; i < size; i++)
+    to[i] = from[i];
+}
+
+void copyArray(complex_prec *from, complex_prec *to, ULONG size) {
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif  // MULTITHREAD
+  for (ULONG i = 0; i < size; i++) {
+    re(to[i]) = re(from[i]);
+    im(to[i]) = im(from[i]);
+  }
+}
+
+void sum_arrays(real_prec *array1, real_prec *array2, real_prec *out,
+                ULONG size) {
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif  // MULTITHREAD
+  for (ULONG i = 0; i < size; i++)
+    out[i] = array1[i] + array2[i];
+}
+
+void multiplyArrays(real_prec *array1, real_prec *array2, real_prec *out,
+                    ULONG size) {
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif  // MULTITHREAD
+  for (ULONG i = 0; i < size; i++)
+    out[i] = array1[i] * array2[i];
+}
+
+void multiply_factor_array(real_prec factor, real_prec *array, real_prec *out,
+                           ULONG size) {
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif  // MULTITHREAD
+  for (ULONG i = 0; i < size; i++)
+    out[i] = factor * array[i];
+}
+
+void subtract_arrays(real_prec *array1, real_prec *array2, real_prec *out,
+                     ULONG size) {
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif  // MULTITHREAD
+  for (ULONG i = 0; i < size; i++)
+    out[i] = array1[i] - array2[i];
+}
+
+void add_to_array(real_prec addition, real_prec *out, ULONG size) {
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif  // MULTITHREAD
+  for (ULONG i = 0; i < size; i++)
+    out[i] += addition;
+}
+
+void add_to_array(real_prec *addition, real_prec *out, ULONG size) {
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif  // MULTITHREAD
+  for (ULONG i = 0; i < size; i++)
+    out[i] += addition[i];
+}
+
+void add_to_array(complex_prec *addition, complex_prec *out, ULONG size) {
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif  // MULTITHREAD
+  for (ULONG j = 0; j < size; j++) {
+    re(out[j]) += re(addition[j]);
+    im(out[j]) += im(addition[j]);
+  }
+}
+
+void fill_one(real_prec *out, ULONG size) {
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif  // MULTITHREAD
+  for (ULONG i = 0; i < size; i++)
+    out[i] = 1.0;
+}
+
+void fill_one(complex_prec *out, ULONG size) {
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif  // MULTITHREAD
+  for (ULONG i = 0; i < size; i++) {
+    re(out[i]) = 1.0;
+    im(out[i]) = 1.0;
+  }
+}
+
+void fillZero(real_prec *out, ULONG size) {
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif  // MULTITHREAD
+  for (ULONG i = 0; i < size; i++)
+    out[i] = 0.0;
+}
+
+void fillZero(ULONG *out, ULONG size) {
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif  // MULTITHREAD
+  for (ULONG i = 0; i < size; i++)
+    out[i] = 0;
+}
+
+void fillZero(complex_prec *out, ULONG size) {
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif  // MULTITHREAD
+  for (ULONG i = 0; i < size; i++) {
+    re(out[i])=0.0;
+    im(out[i])=0.0;
+  }
+}
+
+void flip_sign(real_prec *in, real_prec *out, ULONG size) {
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif  // MULTITHREAD
+  for (ULONG j = 0; j < size; j++)
+    out[j] = -in[j];
+}
+
+void flip_sign(complex_prec *in, complex_prec *out, ULONG size) {
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif  // MULTITHREAD
+  for (ULONG j = 0; j < size; j++) {
+    re(out[j]) = -re(in[j]);
+    im(out[j]) = -im(in[j]);
+  }
+}
+
+void complexify_array(real_prec *in, complex_prec *out, ULONG size) {
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif  // MULTITHREAD
+  for (ULONG i = 0; i < size; i++) {
+    re(out[i]) = in[i];
+    im(out[i]) = 0.;
+  }
+}
+
+void real_part_array(complex_prec *in, real_prec *out, ULONG size) {
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif  // MULTITHREAD
+  for (ULONG i = 0; i < size; i++)
+    out[i] = re(in[i]);
+}
+
+void imaginary_part_array(complex_prec *in, real_prec *out, ULONG size) {
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif  // MULTITHREAD
+  for (ULONG i = 0; i < size; i++)
+    out[i] = im(in[i]);
+}
+
+
+bool contains_nan(real_prec *in, ULONG size) {
+  for (ULONG i = 0; i < size; ++i)
+    if (std::isnan(in[i])) {
+      std::cout << "found a NaN at i = " << i << "!" << std::endl;
+      return (true);
+    }
+  return false;
+}
+
+bool contains_nan(complex_prec *in, ULONG size) {
+  for (ULONG i = 0; i < size; ++i)
+    if (std::isnan(re(in[i])) || std::isnan(im(in[i]))) {
+      std::cout << "found a NaN at i = " << i << "!" << std::endl;
+      return (true);
+    }
+  return false;
+}
+
+
+// Operations in Fourier space //
+
+void conjugate_array(complex_prec *in, complex_prec *out, ULONG size) {
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif  // MULTITHREAD
+  for (ULONG i = 0; i < size; i++) {
+    re(out[i]) = re(in[i]);
+    im(out[i]) = -im(in[i]);
+  }
+}
+
+void times_i_array(complex_prec *in, complex_prec *out, ULONG size) {
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif  // MULTITHREAD
+  for (ULONG i = 0; i < size; i++) {
+    real_prec temp = re(in[i]);  // for in-place usage (out == in)
+    re(out[i]) = -im(in[i]);
+    im(out[i]) = temp;
+  }
+}
+
+
+// TODO: This should go to math_funcs or a new scalar_math file
+real_prec absolute_squared(complex_prec in) {
+  return re(in)*re(in) + im(in)*im(in);
+}
+
+void absolute_squared_array(complex_prec *in, real_prec *out, ULONG size) {
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif  // MULTITHREAD
+  for (ULONG i = 0; i < size; i++)
+    out[i] = absolute_squared(in[i]);
+}
+
+void absolute_squared_array(complex_prec *in, complex_prec *out, ULONG size) {
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif  // MULTITHREAD
+  for (ULONG i = 0; i < size; i++)
+    re(out[i]) = absolute_squared(in[i]);
+}
+
+
+// split string (http://stackoverflow.com/a/236803/1199693)
+std::vector<std::string> &split(const std::string &s, char delim,
+                                std::vector<std::string> &elems) {
+    std::stringstream ss(s);
+    std::string item;
+    while (std::getline(ss, item, delim)) {
+        elems.push_back(item);
+    }
+    return elems;
+}
+
+std::vector<std::string> split(const std::string &s, char delim) {
+    std::vector<std::string> elems;
+    split(s, delim, elems);
+    return elems;
+}
diff --git a/convenience.h b/convenience.h
new file mode 100644
index 0000000..8c2ade8
--- /dev/null
+++ b/convenience.h
@@ -0,0 +1,49 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+#pragma once
+#include <string>
+#include <vector>
+#include "./define_opt.h"
+
+void copyArray(real_prec *from, real_prec *to, ULONG size);
+void copyArray(complex_prec *from, complex_prec *to, ULONG size);
+void fillZero(real_prec *out, ULONG size);
+void fill_one(real_prec *out, ULONG size);
+void fill_one(complex_prec *out, ULONG size);
+void fillZero(ULONG *out, ULONG size);
+void fillZero(complex_prec *out, ULONG size);
+void sum_arrays(real_prec *array1, real_prec *array2, real_prec *out,
+                ULONG size);
+void multiplyArrays(real_prec *array1, real_prec *array2, real_prec *out,
+                    ULONG size);
+void multiply_factor_array(real_prec factor, real_prec *array, real_prec *out,
+                           ULONG size);
+void subtract_arrays(real_prec *array1, real_prec *array2, real_prec *out,
+                     ULONG size);
+void add_to_array(real_prec addition, real_prec *out, ULONG size);
+void add_to_array(real_prec *addition, real_prec *out, ULONG size);
+void add_to_array(complex_prec *addition, complex_prec *out, ULONG size);
+void flip_sign(complex_prec *in, complex_prec *out, ULONG size);
+void flip_sign(real_prec *in, real_prec *out, ULONG size);
+void complexify_array(real_prec *in, complex_prec *out, ULONG size);
+void real_part_array(complex_prec *in, real_prec *out, ULONG size);
+void imaginary_part_array(complex_prec *in, real_prec *out, ULONG size);
+
+void conjugate_array(complex_prec *in, complex_prec *out, ULONG size);
+void times_i_array(complex_prec *in, complex_prec *out, ULONG size);
+real_prec absolute_squared(complex_prec in);
+void absolute_squared_array(complex_prec *in, real_prec *out, ULONG size);
+void absolute_squared_array(complex_prec *in, complex_prec *out, ULONG size);
+
+bool contains_nan(real_prec *in, ULONG size);
+bool contains_nan(complex_prec *in, ULONG size);
+
+std::vector<std::string> &split(const std::string &s, char delim,
+                                std::vector<std::string> &elems);
+std::vector<std::string> split(const std::string &s, char delim);
diff --git a/cosmo.cc b/cosmo.cc
new file mode 100644
index 0000000..1f03a05
--- /dev/null
+++ b/cosmo.cc
@@ -0,0 +1,236 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#include "define_opt.h"
+
+#include <math.h>
+#include <fstream>
+
+#include <gsl/gsl_integration.h>
+
+
+real_prec E_Hubble_z(real_prec z, real_prec Omega_M, real_prec Omega_L)
+{
+  real_prec z_fac = 1+z;
+  real_prec Omega_K = num_1 - Omega_M - Omega_L;
+  real_prec E_Hubble = sqrt(Omega_M*z_fac*z_fac*z_fac + Omega_K*z_fac*z_fac + Omega_L);
+  return(E_Hubble);
+}
+
+real_prec E_Hubble_a(real_prec a, real_prec Omega_M, real_prec Omega_L)
+{
+  real_prec Omega_K = num_1 - Omega_M - Omega_L;
+  real_prec E_Hubble = sqrt(Omega_M/(a*a*a) + Omega_K/(a*a) + Omega_L);
+  return(E_Hubble);
+}
+
+// Integration stuff
+struct my_f_params {gsl_real OmM; gsl_real OmK; gsl_real OmL;};
+
+gsl_real
+e_of_z (gsl_real z, void * p) {
+   /*struct my_f_params * params 
+     = (struct my_f_params *)p;*/
+   struct my_f_params * params 
+     = static_cast<struct my_f_params *>( p ); // EGP: static_cast
+
+   gsl_real OmM = (params->OmM);
+   gsl_real OmK = (params->OmK);
+   gsl_real OmL = (params->OmL);
+
+   gsl_real e = sqrt(OmM*(1.+z)*(1.+z)*(1.+z)+OmK*(1.+z)*(1.+z)+OmL);
+
+   return 1./e;
+}
+
+gsl_real
+e_of_z_cube (real_prec z, void * p) {
+   /*struct my_f_params * params 
+     = (struct my_f_params *)p;*/
+   struct my_f_params * params 
+     = static_cast<struct my_f_params *>( p ); // EGP: static_cast
+
+   gsl_real OmM = (params->OmM);
+   gsl_real OmK = (params->OmK);
+   gsl_real OmL = (params->OmL);
+
+   gsl_real e = sqrt(OmM*(1.+z)*(1.+z)*(1.+z)+OmK*(1.+z)*(1.+z)+OmL);
+
+   return (1.+z)*(1.+z)*(1.+z)/(e*e*e);
+}
+
+gsl_real
+growth_var (gsl_real z, void * p) {
+   /*struct my_f_params * params 
+     = (struct my_f_params *)p;*/
+   struct my_f_params * params 
+     = static_cast<struct my_f_params *>( p ); // EGP: static_cast
+
+   gsl_real OmM = (params->OmM);
+   gsl_real OmK = (params->OmK);
+   gsl_real OmL = (params->OmL);
+
+   gsl_real e = sqrt(OmM*(1.+z)*(1.+z)*(1.+z)+OmK*(1.+z)*(1.+z)+OmL);
+
+   return (1.+z)/(e*e*e);
+} 
+// END integration stuff
+
+real_prec calc_dcom(real_prec scale_factor, real_prec Omega_M, real_prec Omega_L, real_prec hconst)
+{
+  real_prec Omega_C=static_cast<real_prec>(1.)-Omega_M-Omega_L;
+
+
+  // Getting Cosmology
+  real_prec H0=static_cast<real_prec>(100.*hconst*cgs_km/cgs_Mpc/cgs_sec);	
+  gsl_integration_workspace * w = gsl_integration_workspace_alloc (1000);
+  
+  gsl_real redshift=static_cast<gsl_real>(1./scale_factor-1.);
+
+  gsl_real result, error, epsabs, epsrel;
+  size_t neval;
+  
+  epsabs=1e-4;
+  epsrel=1e-8;
+
+  gsl_real omega_m=static_cast<gsl_real>(Omega_M);
+  gsl_real omega_k=static_cast<gsl_real>(Omega_C);
+  gsl_real omega_q=static_cast<gsl_real>(Omega_L);
+
+  gsl_function F;
+  struct my_f_params params = {omega_m,omega_k,omega_q};
+
+  F.function = &e_of_z;
+  F.params = &params;
+
+  //gsl_integration_qagiu (&F, redshift, 0, 1e-4, 1000,
+  //		 w, &result, &error);
+  
+  gsl_integration_qng (&F, 0.,redshift, epsabs, epsrel, 
+  		       &result, &error, &neval);
+  
+  gsl_integration_workspace_free (w);
+  return static_cast<real_prec>(result*cgs_clight/H0);
+}
+///____________________________________________________________________
+
+/* --- function D_growth growth factor --- */
+real_prec D_growth(real_prec scale_factor, real_prec Omega_M, real_prec Omega_L, real_prec hconst)
+{
+  // Getting Cosmology
+  real_prec Omega_C=static_cast<real_prec>(1.)-Omega_M-Omega_L;
+
+  real_prec H0=static_cast<real_prec>(hconst*(cgs_km/cgs_Mpc/cgs_sec));
+  real_prec H=static_cast<real_prec>(H0*sqrt(Omega_M/scale_factor/scale_factor/scale_factor+Omega_L+Omega_C/scale_factor/scale_factor)); // Beware only LCDM
+  
+  gsl_integration_workspace * w 
+    = gsl_integration_workspace_alloc (1000);
+  
+  gsl_real result, result2, error, epsabs, epsrel;
+  //EGP size_t neval;
+  
+  gsl_real redshift=static_cast<gsl_real>(1./scale_factor-1.);
+
+  epsabs=1e-4;
+  epsrel=1e-8;
+  
+  gsl_real omega_m=static_cast<gsl_real>(Omega_M);
+  gsl_real omega_k=static_cast<gsl_real>(Omega_C);
+  gsl_real omega_q=static_cast<gsl_real>(Omega_L);
+
+  gsl_function F;
+  struct my_f_params params = {omega_m,omega_k,omega_q};
+
+  F.function = &growth_var;
+  F.params = &params;
+
+  gsl_integration_qagiu(&F,redshift, epsabs, epsrel, 1000, w, &result, &error); // EGP: neval -> 1000
+  gsl_integration_qagiu(&F, 0., epsabs, epsrel, 1000, w, &result2, &error); // EGP: neval -> 1000
+  
+  gsl_integration_workspace_free (w);
+  
+  
+  /*
+  real_prec Om=Omega_M;
+  real_prec Ov=data->cosmology->omega_q;
+  real_prec Ot=Om+Ov;
+
+  real_prec x=a*(1.-Ot)/Ot;
+
+  //Bouchet et al 1995
+  //real_prec D=1.+3./x+3.*sqrt((1.+x)/x/x/x)*log(sqrt(1.+x)-sqrt(x));
+  
+  //Carroll,Press and Turner 1992
+  real_prec D=5./2.*a*Om/((pow(Om,4./7.)-Ov+(1.+0.5*Om)*(1.+1./70.*Ov)));
+  */
+
+      
+  //return D;
+  //printf("D: %f\n", static_cast<real_prec>(H/H0*result/result2)); // EGP: testing
+  return static_cast<real_prec>(H/H0*result/result2);
+  //return H*result/cgs_km;
+}
+/* --- end of function D_growth --- */
+
+//real_prec fgrow(real_prec a, real_prec Omega_M, real_prec Omega_L, real_prec H0, int term)
+real_prec fgrow(real_prec a, real_prec Omega_M, real_prec Omega_L, int term)
+{
+   // we use LCDM and the approximation given in Lahav 1991 MNRAS,252,128
+
+  // E = H/H0
+  real_prec E = E_Hubble_a(a, Omega_M, Omega_L); // Beware only LCDM
+
+  // Om=Om_M/(H/H0)^2/a^3
+  real_prec Omega=Omega_M/((E*E)*(a*a*a));
+
+  // f=dlnD_dlna
+  real_prec f=0.0;
+
+  switch (term)
+    {	      
+    case 1:
+      /*
+       // EGP: There is in fact a general solution in Lahav+91, but it is complicated.
+       //
+       if(num_1 - Omega_M - Omega_L == 0.) // for flat universes we can use an improved fit
+        {
+          f=pow(Omega,0.6)+1./70.*(1.-0.5*Omega*(1.+Omega));
+        }
+      */
+      f=static_cast<real_prec>(pow(Omega,5./9.));
+      break;
+    case 2:
+      f=static_cast<real_prec>(2.*pow(Omega,6./11.));
+      break;
+    case 3:
+      f=static_cast<real_prec>(3.*pow(Omega,13./24.));
+      break;
+    }
+
+  return(f);
+}
+
+//real_prec c_pecvel(real_prec scale_factor, real_prec Omega_M, real_prec Omega_L, real_prec H0)
+real_prec c_pecvel(real_prec scale_factor, real_prec Omega_M, real_prec Omega_L, int term)
+{
+  // EGP: put everything in units of Mpc/h etc., so that we don't need H0 (or h) to be given.
+  // Note: this requires distance units to be in Mpc/h!
+  real_prec H0 = static_cast<real_prec>(100.); // km/s/Mpc*h; i.e. in "Hubble-units"
+
+  real_prec f = fgrow(scale_factor, Omega_M, Omega_L, term);
+  real_prec E = E_Hubble_a(scale_factor, Omega_M, Omega_L);
+
+  //real_prec out=f*H;
+  // EGP: correction, needs to be times scale factor:
+  //real_prec out = f * H * scale_factor;
+  real_prec out = f * H0 * E * scale_factor;
+
+  return(out);
+}
+
diff --git a/cosmo.h b/cosmo.h
new file mode 100644
index 0000000..63a7cd9
--- /dev/null
+++ b/cosmo.h
@@ -0,0 +1,21 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+#pragma once
+#include "define_opt.h"
+
+real_prec calc_dcom(real_prec scale_factor, real_prec Omega_M, real_prec Omega_L, real_prec hconst);
+
+real_prec D_growth(real_prec scale_factor, real_prec Omega_M, real_prec Omega_L, real_prec hconst);
+
+//real_prec c_pecvel(real_prec a, real_prec Omega_M, real_prec Omega_L, real_prec H0);
+real_prec c_pecvel(real_prec a, real_prec Omega_M, real_prec Omega_L, int term);
+
+real_prec fgrow(real_prec a, real_prec Omega_M, real_prec Omega_L, int term);
+
+
diff --git a/curses_funcs.cc b/curses_funcs.cc
new file mode 100644
index 0000000..421ee51
--- /dev/null
+++ b/curses_funcs.cc
@@ -0,0 +1,88 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#include <cstdlib>     // atexit
+// #include <ncurses.h>
+// #include <iostream>
+// #include <signal.h>     // to handle SIGWINCH signal with handle_winch
+
+#include "curses_funcs.h"
+
+
+// from http://hughm.cs.ukzn.ac.za/~murrellh/os/notes/ncurses.html
+void end_curses(struct CURSES_STRUCT *curses) {
+    if (curses->started && !isendwin())
+        endwin();
+}
+
+void end_fast() {
+    endwin();
+}
+
+// from http://hughm.cs.ukzn.ac.za/~murrellh/os/notes/ncurses.html
+void start_curses(struct CURSES_STRUCT *curses) {
+    if (curses->started) {
+        refresh();
+    } else {
+        initscr();
+        start_color();
+        cbreak();
+        noecho();
+        std::atexit(end_fast);  // cannot use end_curses with atexit, because atexit
+                                // doesn't take arguments. If atexit is omitted, the
+                                // terminal will throw a bus error if the program exits
+                                // unexpectedly (e.g. due to a bug or error).
+        intrflush(stdscr, false);
+        keypad(stdscr, true);
+        // colors
+        init_pair(1, COLOR_BLACK, COLOR_YELLOW); // title
+        init_pair(2, COLOR_WHITE, COLOR_BLUE);  // for table
+        init_pair(3, COLOR_WHITE, COLOR_BLACK); // for status and message
+        // init_pair(4, COLOR_WHITE, COLOR_CYAN);  // for table_fixed 
+        curses->started = true;
+    }
+}
+
+void init_windows(struct CURSES_STRUCT *curses) {
+    // initialize windows
+    int ncols, nlines;
+    getmaxyx(stdscr, nlines, ncols);
+
+    curses->title = newwin(1, ncols, 0, 0);
+    curses->message = newwin(1, ncols, 1, 0);
+    curses->status = newwin(1, ncols, 2, 0);
+    curses->header = newwin(1, ncols, 3, 0);
+    // curses->table_fixed = newwin(1, ncols, 2, 0);
+    curses->debug = newwin(2, ncols, 4, 0);
+    // curses->table = newwin(nlines-4, ncols, 4, 0);  // without debug
+    curses->table = newwin(nlines-4-2, ncols, 4+2, 0);  // with debug
+    // colors
+    wattron(curses->title, COLOR_PAIR(1));
+    wbkgd(curses->title, COLOR_PAIR(1));
+    wattron(curses->table, COLOR_PAIR(2));
+    wbkgd(curses->table, COLOR_PAIR(2));
+    // wattron(curses->table_fixed, COLOR_PAIR(4));
+    // wbkgd(curses->table_fixed, COLOR_PAIR(4));
+    wattron(curses->status, COLOR_PAIR(3));
+    wbkgd(curses->status, COLOR_PAIR(3));
+    wattron(curses->message, COLOR_PAIR(3));
+    wbkgd(curses->message, COLOR_PAIR(3));
+    wattron(curses->header, COLOR_PAIR(2));
+    wbkgd(curses->header, COLOR_PAIR(2));
+    wattron(curses->header, A_BOLD);
+
+    wattron(curses->debug, COLOR_PAIR(1));
+    wbkgd(curses->debug, COLOR_PAIR(1));
+
+    // scrolling
+    scrollok(curses->table, TRUE);
+    scrollok(curses->status, TRUE);
+    scrollok(curses->message, TRUE);
+    scrollok(curses->debug, TRUE);
+}
diff --git a/curses_funcs.h b/curses_funcs.h
new file mode 100644
index 0000000..bd825d6
--- /dev/null
+++ b/curses_funcs.h
@@ -0,0 +1,29 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#pragma once
+
+#include <ncurses.h>
+
+struct CURSES_STRUCT {
+    bool started = false;
+    WINDOW *table;
+    WINDOW *status;
+    WINDOW *title;  // codename + pwd
+    WINDOW *header;
+    // WINDOW *table_fixed;  // for permanently displaying some rows, e.g. every
+                          // order of mag (1, 10, 100, 1000, ...)
+    WINDOW *message;
+    WINDOW *debug;
+};
+
+void end_fast();
+void end_curses(struct CURSES_STRUCT *curses);
+void start_curses(struct CURSES_STRUCT *curses);
+void init_windows(struct CURSES_STRUCT *curses);
diff --git a/debug.cc b/debug.cc
new file mode 100644
index 0000000..a2abc8b
--- /dev/null
+++ b/debug.cc
@@ -0,0 +1,310 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#include "define_opt.h"
+#include "struct_main.h"
+#include "struct_hamil.h"
+#include "math_funcs.h" // mean_arr, etc.
+#include "IOfunctionsGen.h" // dumps
+#include "IOfunctions.h" // dump_measured_spec
+#include "field_statistics.h" // measure_spectrum
+
+void debug_array_statistics(real_prec *array, ULONG size, std::string name) {
+  cout << endl << "DEBUG -- mean " << name << ": " << mean_arr(size, array) << endl;
+  cout <<         "DEBUG -- min "  << name << ": " << min_arr(size, array)  << " max " << name << ": " << max_arr(size, array) << endl;
+}
+
+void debug_scalar_dump(real_prec *array, unsigned int N1, unsigned int N2, unsigned int N3, real_prec L1, real_prec L2,
+                       real_prec L3, ULONG N_bin, std::string fname) {
+  dump_scalar(array, N1, N2, N3, fname);
+
+  real_prec power[N_bin];
+  real_prec kmode[N_bin];
+
+  measure_spectrum(N1, N2, N3, L1, L2, L3, array, kmode, power, N_bin);
+  dump_measured_spec(kmode, power, fname + std::string("_spec.dat"), N_bin);
+}
+
+void print_data(struct DATA *d) {
+  using namespace std;
+
+  struct NUMERICAL *n = d->numerical;
+
+  cout << "Numerical\n=========" << endl;
+
+  cout << "random_test:                       " << n->random_test << endl;
+  cout << "random_test_rsd:                   " << n->random_test_rsd << endl;
+
+  cout << "window_type:                       " << n->window_type << endl;
+
+  cout << "data_model:                        " << n->data_model << endl;
+  cout << "likelihood:                        " << n->likelihood << endl;
+  cout << "prior:                             " << n->prior << endl;
+  cout << "sfmodel:                           " << n->sfmodel << endl;
+  cout << "rsd_model:                         " << n->rsd_model << endl;
+  
+  cout << "N_eps_fac:                         " << n->N_eps_fac << endl;
+  cout << "eps_fac:                           " << n->eps_fac << endl;
+  cout << "eps_fac_target:                    " << n->eps_fac_target << endl;
+  cout << "eps_fac_initial:                   " << n->eps_fac_initial << endl;
+  cout << "eps_fac_power:                     " << n->eps_fac_power << endl;
+  cout << "s_eps_total_fac:                   " << n->s_eps_total_fac << endl;
+  cout << "s_eps_total_scaling:               " << n->s_eps_total_scaling << endl;
+  cout << "s_eps_total_Nx_norm:               " << n->s_eps_total_Nx_norm << endl;
+  cout << "s_eps_total:                       " << n->s_eps_total << endl;
+
+  // initial guess:
+  cout << "initial_guess:                     " << n->initial_guess << endl;
+  cout << "initial_guess_file:                " << n->initial_guess_file << endl;
+  cout << "initial_guess_smoothing_type:      " << n->initial_guess_smoothing_type << endl;
+  cout << "initial_guess_smoothing_scale:     " << n->initial_guess_smoothing_scale << endl;
+  // mass stuff
+  cout << "mass_type:                         " << n->mass_type << endl;
+  cout << "massnum_init:                      " << n->massnum_init << endl;
+  cout << "massnum_burn:                      " << n->massnum_burn << endl;
+  // output
+  cout << "outnum:                            " << n->outnum << endl;
+  cout << "outnum_ps:                         " << n->outnum_ps << endl;
+  // restarting
+  cout << "start_at:                          " << n->start_at << endl;
+  // testing:
+  cout << "mass_factor:                       " << n->mass_factor << endl;
+  cout << "calc_h:                            " << n->calc_h << endl;
+  // EGP: end added
+
+  cout << "seed:                              " << n->seed << endl;
+
+  cout << "N_bin:                             " << n->N_bin << endl;
+
+  cout << "inputmode:                         " << n->inputmode << endl;
+  
+  cout << "INV_SUCCESS:                       " << n->INV_SUCCESS << endl;
+
+  cout << "filter:                            " << n->filter << endl;
+
+  cout << "PRECON:                            " << n->PRECON << endl;
+
+  cout << "codename:                          " << n->codename << endl;
+  cout << "fnamePS:                           " << n->fnamePS << endl;
+  cout << "dataFileName:                      " << n->dataFileName << endl;
+  cout << "fname_ps_sample:                   " << n->fname_ps_sample << endl;
+  cout << "fname_PROT_SPEC:                   " << n->fname_PROT_SPEC << endl;
+  cout << "fname_PROT_SIGNAL_BOX:             " << n->fname_PROT_SIGNAL_BOX << endl;
+  cout << "fname_PROT_SIGNAL_SLICE:           " << n->fname_PROT_SIGNAL_SLICE << endl;
+  
+  cout << "dir:                               " << n->dir << endl;
+
+  cout << "mk:                                " << n->mk << endl;
+
+  cout << "N1:                                " << n->N1 << endl;			/* Number of cells per axis */
+  cout << "N2:                                " << n->N2 << endl;			/* Number of cells per axis */
+  cout << "N3:                                " << n->N3 << endl;			/* Number of cells per axis */
+  cout << "N:                                 " << n->N << endl; // total cell number
+  cout << "Nhalf:                             " << n->Nhalf << endl;
+
+  cout << "readPS:                            " << n->readPS << endl;
+  
+  cout << "code_norm:                         " << n->code_norm << endl;
+  
+  cout << "slength:                           " << n->slength << endl;			
+
+  cout << "iGibbs:                            " << n->iGibbs << endl;
+  cout << "N_Gibbs:                           " << n->N_Gibbs << endl;
+  cout << "rejections:                        " << n->rejections << endl;
+  cout << "total_steps_lim:                   " << n->total_steps_lim << endl;
+
+  cout << "xllc:                              " << n->xllc << endl;			
+  cout << "yllc:                              " << n->yllc << endl;			
+  cout << "zllc:                              " << n->zllc << endl;			
+  
+  // Redshift space distortions
+  cout << "xobs:                              " << n->xobs << endl;			
+  cout << "yobs:                              " << n->yobs << endl;			
+  cout << "zobs:                              " << n->zobs << endl;			
+  cout << "planepar:                          " << n->planepar << endl;
+  cout << "periodic:                          " << n->periodic << endl;
+  
+  cout << "L1:                                " << n->L1 << endl;			/* Length of axis */
+  cout << "L2:                                " << n->L2 << endl;			/* Length of axis */
+  cout << "L3:                                " << n->L3 << endl;			/* Length of axis */
+
+  cout << "vol:                               " << n->vol << endl;
+  
+  cout << "d1:                                " << n->d1 << endl;			/* grid spacing x-direction */
+  cout << "d2:                                " << n->d2 << endl;			/* grid spacing y-direction */
+  cout << "d3:                                " << n->d3 << endl;			/* grid spacing z-direction */
+
+  cout << "grad_psi_prior_factor:             " << n->grad_psi_prior_factor << endl;
+  cout << "grad_psi_likeli_factor:            " << n->grad_psi_likeli_factor << endl;
+  cout << "grad_psi_prior_conjugate:          " << n->grad_psi_prior_conjugate << endl;
+  cout << "grad_psi_likeli_conjugate:         " << n->grad_psi_likeli_conjugate << endl;
+  cout << "grad_psi_prior_times_i:            " << n->grad_psi_prior_times_i << endl;
+  cout << "grad_psi_likeli_times_i:           " << n->grad_psi_likeli_times_i << endl;
+  cout << "div_dH_by_N:                       " << n->div_dH_by_N << endl;
+  cout << "deltaQ_factor:                     " << n->deltaQ_factor << endl;
+
+  cout << "particle_kernel:                   " << n->particle_kernel << endl;
+  cout << "particle_kernel_h_rel:             " << n->particle_kernel_h_rel << endl;
+  cout << "particle_kernel_h:                 " << n->particle_kernel_h << endl;
+
+  struct COSMOLOGY *c = d->cosmology;
+
+  cout << endl << "Cosmology\n=========" << endl;
+
+  cout << "omega_r:                           " << c->omega_r << endl;			/* radiation density */
+  cout << "omega_k:                           " << c->omega_k << endl;			/* curvature density */
+  cout << "omega_m:                           " << c->omega_m << endl;			/* matter density */
+  cout << "omega_b:                           " << c->omega_b << endl;			/* baryon density */
+  cout << "omega_q:                           " << c->omega_q << endl;			/* dark energy density */
+  cout << "w:                                 " << c->w << endl;
+  cout << "wprime:                            " << c->wprime << endl;		        /* de eos parameter w and first time derivative wprime=dw/da */
+  cout << "sigma8:                            " << c->sigma8 << endl;			/* mass fluctuation on 8Mpc/h scales */
+  cout << "rsmooth:                           " << c->rsmooth << endl;			/* smoothing radius: rsmooth = 8 yields sigma8 */
+  cout << "n_s:                               " << c->n_s << endl;			/* index of primordial dm power spectrum */
+  cout << "h:                                 " << c->h << endl;			        /* Hubble parameter */
+  cout << "beta:                              " << c->beta << endl;			/* lens galaxy distribution parameter 1 */
+  cout << "A_spec:                            " << c->A_spec << endl;			/* Normalization of the power spectrum */  
+  cout << "z:                                 " << c->z << endl;  
+  cout << "ascale:                            " << c->ascale << endl;
+  cout << "mu:                                " << c->mu << endl;
+
+  cout << "D1:                                " << c->D1 << endl;
+  cout << "D2:                                " << c->D2 << endl;
+
+  struct OBSERVATIONAL *o = d->observational;
+
+  cout << endl << "Observational\n=============" << endl;
+
+  cout << "biasP:                             " << o->biasP << endl;
+  cout << "biasE:                             " << o->biasE << endl;
+  cout << "Nmean_Gal:                         " << o->Nmean_Gal << endl;
+  cout << "rho_c:                             " << o->rho_c << endl;
+  cout << "mu:                                " << o->mu << endl;
+
+  cout << "sigma_fac:                         " << o->sigma_fac << endl;
+  cout << "sigma_min:                         " << o->sigma_min << endl;
+  cout << "delta_min:                         " << o->delta_min << endl;
+}
+
+void print_hamil_data(struct HAMIL_DATA *hd) {
+  using namespace std;
+
+  cout << "Basic\n=====" << endl;
+
+  cout << "sfmodel:                           " << hd->sfmodel << endl;
+  cout << "rsd_model:                         " << hd->rsd_model << endl;
+  
+  // set scalars
+  cout << "Nmean_Gal:                         " << hd->Nmean_Gal << endl;
+  cout << "rho_c:                             " << hd->rho_c << endl;
+  cout << "code_norm:                         " << hd->code_norm << endl;
+  cout << "mu:                                " << hd->mu << endl;
+  
+  cout << "sigma_fac:                         " << hd->sigma_fac << endl;
+  cout << "sigma_min:                         " << hd->sigma_min << endl;
+  cout << "delta_min:                         " << hd->delta_min << endl;
+
+  cout << "biasP:                             " << hd->biasP << endl;
+  cout << "biasE:                             " << hd->biasE << endl;
+  
+  cout << "z:                                 " << hd->z << endl;
+  cout << "ascale:                            " << hd->ascale << endl;
+
+  cout << "D1:                                " << hd->D1 << endl;
+  cout << "D2:                                " << hd->D2 << endl;
+ 
+  cout << "OM:                                " << hd->OM << endl;
+  cout << "OL:                                " << hd->OL << endl;
+  cout << "h:                                 " << hd->h << endl;
+
+  struct HAMIL_NUMERICAL *n = hd->numerical;
+
+  cout << endl << "Numerical\n=========" << endl;
+
+  cout << "N1:                                " << n->N1 << endl;				/* Number of cells per axis */
+  cout << "N2:                                " << n->N2 << endl;				/* Number of cells per axis */
+  cout << "N3:                                " << n->N3 << endl;				/* Number of cells per axis */
+  cout << "N:                                 " << n->N << endl;
+  cout << "Nhalf:                             " << n->Nhalf << endl;
+  
+  cout << "mk:                                " << n->mk << endl;				
+
+  cout << "L1:                                " << n->L1 << endl;				
+  cout << "L2:                                " << n->L2 << endl;				
+  cout << "L3:                                " << n->L3 << endl;				
+
+  cout << "vol:                               " << n->vol << endl;
+
+  cout << "d1:                                " << n->d1 << endl;				
+  cout << "d2:                                " << n->d2 << endl;				
+  cout << "d3:                                " << n->d3 << endl;				
+
+  cout << "min1:                              " << n->min1 << endl;				
+  cout << "min2:                              " << n->min2 << endl;				
+  cout << "min3:                              " << n->min3 << endl;				
+
+  // Redshift space distortions
+  cout << "xobs:                              " << n->xobs << endl;			
+  cout << "yobs:                              " << n->yobs << endl;			
+  cout << "zobs:                              " << n->zobs << endl;			
+  cout << "planepar:                          " << n->planepar << endl;
+  cout << "periodic:                          " << n->periodic << endl;
+  
+  cout << "N_bin:                             " << n->N_bin << endl;				
+
+  cout << "iGibbs:                            " << n->iGibbs << endl;
+  cout << "rejections:                        " << n->rejections << endl;
+  cout << "total_steps_lim:                   " << n->total_steps_lim << endl;
+
+  // EGP: some extra parameters for statistics on performance:
+  cout << "accepted:                          " << n->accepted << endl;
+  cout << "epsilon:                           " << n->epsilon << endl;
+  cout << "Neps:                              " << n->Neps << endl;
+
+  cout << "method:                            " << n->method << endl;			/* which method */
+  cout << "filter:                            " << n->filter << endl;			/* which filter method */
+  cout << "itmax:                             " << n->itmax << endl;			/* maximal number of iterations */
+  cout << "i_reset:                           " << n->i_reset << endl;			/* number to reset */
+  cout << "INV_SUCCESS:                       " << n->INV_SUCCESS << endl;		/* indicate inversion success */
+
+  cout << "epsi:                              " << n->epsi << endl;			/* precision */
+ 
+  cout << "kth:                               " << n->kth << endl;
+
+  // EGP: pseudo-timestep parameters
+  cout << "N_eps_fac:                         " << n->N_eps_fac << endl;
+  cout << "eps_fac:                           " << n->eps_fac << endl;
+  cout << "eps_fac_target:                    " << n->eps_fac_target << endl;
+  cout << "eps_fac_power:                     " << n->eps_fac_power << endl;
+  cout << "s_eps_total:                       " << n->s_eps_total << endl;
+  
+  // mass type
+  cout << "mass_fs:                           " << n->mass_fs << endl;
+  cout << "mass_rs:                           " << n->mass_rs << endl;
+  cout << "mass_type:                         " << n->mass_type << endl;
+  cout << "massnum_init:                      " << n->massnum_init << endl;
+  cout << "massnum_burn:                      " << n->massnum_burn << endl;
+
+  // EGP: testing:
+  cout << "mass_factor:                       " << n->mass_factor << endl;
+
+  /// TESTING ///
+  cout << "grad_psi_prior_factor:             " << n->grad_psi_prior_factor << endl;
+  cout << "grad_psi_likeli_factor:            " << n->grad_psi_likeli_factor << endl;
+  cout << "grad_psi_prior_conjugate:          " << n->grad_psi_prior_conjugate << endl;
+  cout << "grad_psi_likeli_conjugate:         " << n->grad_psi_likeli_conjugate << endl;
+  cout << "grad_psi_prior_times_i:            " << n->grad_psi_prior_times_i << endl;
+  cout << "grad_psi_likeli_times_i:           " << n->grad_psi_likeli_times_i << endl;
+  cout << "div_dH_by_N:                       " << n->div_dH_by_N << endl;
+  cout << "calc_h:                            " << n->calc_h << endl;
+  cout << "deltaQ_factor:                     " << n->deltaQ_factor << endl;
+
+  cout << "particle_kernel:                   " << n->particle_kernel << endl;
+  cout << "particle_kernel_h:                 " << n->particle_kernel_h << endl;
+}
diff --git a/debug.h b/debug.h
new file mode 100644
index 0000000..5844371
--- /dev/null
+++ b/debug.h
@@ -0,0 +1,19 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#include "define_opt.h"
+#include "struct_hamil.h"
+#include "struct_main.h"
+#include <string>
+
+void debug_array_statistics(real_prec *array, ULONG size, std::string name);
+void debug_scalar_dump(real_prec *array, unsigned int N1, unsigned int N2, unsigned int N3, real_prec L1, real_prec L2,
+                       real_prec L3, ULONG N_bin, std::string fname);
+void print_data(struct DATA *d);
+void print_hamil_data(struct HAMIL_DATA *hd);
diff --git a/define_opt.h b/define_opt.h
new file mode 100644
index 0000000..1b3fa08
--- /dev/null
+++ b/define_opt.h
@@ -0,0 +1,86 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#pragma once
+#include <fftw3.h>
+
+// nullptr isn't implemented in GCC before version 4.6. This template is a
+// temporary solution.
+#ifndef __INTEL_COMPILER  // icc also defines GNUC, but we don't want this there
+#ifndef __clang__  // same story for clang
+#ifdef __GNUC__
+#define GCC_VERSION (__GNUC__ * 10000 \
+    + __GNUC_MINOR__ * 100 \
+    + __GNUC_PATCHLEVEL__)
+// Test for GCC < 4.6
+#if GCC_VERSION < 40600
+const                        // this is a const object...
+class {
+public:
+  template<class T>          // convertible to any type
+    operator T*() const      // of null non-member
+    { return 0; }            // pointer...
+  template<class C, class T> // or any type of null
+    operator T C::*() const  // member pointer...
+    { return 0; }
+private:
+  void operator&() const;    // whose address can't be taken
+} nullptr = {};              // and whose name is nullptr
+#endif  // GCC_VERSION
+#endif  // __GNUC__
+#endif  // __clang__
+#endif  // __INTEL_COMPILER
+
+// EGP: some often used variables and functions
+#define gsl_real double
+#define ULONG unsigned long
+#define re(c) ((c)[0])
+#define im(c) ((c)[1])
+
+// EGP: behaviour of FFTW routines
+#define FFTW_OPTION FFTW_ESTIMATE
+#define FORWARD FFTW_FORWARD 
+#define BACKWARD FFTW_BACKWARD
+#ifdef SINGLE_PREC
+#define fftwf_real float
+#define real_prec fftwf_real
+#define complex_prec fftwf_complex
+#endif
+#ifdef DOUBLE_PREC
+#define fftw_real double
+#define real_prec fftw_real
+#define complex_prec fftw_complex
+#endif
+
+// EGP: more variables (must come after definition of real_prec)
+#define num_1 static_cast<real_prec>(1.)
+#define num_2 static_cast<real_prec>(2.)
+#define num_0_1 static_cast<real_prec>(0.1)
+#define num_0_5 static_cast<real_prec>(0.5)
+
+// EGP: no idea what these things do:
+#define fwd true
+//#define inv false // does nothing in whole code AND causes problems with gsl_linalg (needed in mass_fourier_vs_matrix unittest)!
+#define fourier_space true
+#define real_space false
+#define to_Fspace true
+#define to_Rspace false
+
+// EGP: units and physical constants. Not in Makefile, calculations.
+#define cgs_Mpc 1. // h^{-1}
+#define cgs_sec 1.
+#define cgs_km (0.3240779290e-19 * cgs_Mpc)
+#define cgs_clight (0.9715611892e-14 * cgs_Mpc/cgs_sec)
+
+// EGP: numerical constants. Not in Makefile, because whatever.
+#define eps 1.e-14
+#define epsint 1.0e-6 /* numerical accuracy for integrations */
+#define NEVAL 1000    /* numerical integration a2com */
+
+
diff --git a/disp_part.cc b/disp_part.cc
new file mode 100644
index 0000000..47e3945
--- /dev/null
+++ b/disp_part.cc
@@ -0,0 +1,165 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#include "define_opt.h"
+
+#include <iomanip>
+#include <iostream>
+
+//#include <gsl/gsl_errno.h>
+#include <gsl/gsl_randist.h>
+
+#include "fftw_array.h"
+
+#include "math_funcs.h"
+
+#include "convenience.h"
+
+#include "BarcodeException.h"
+
+/*
+
+  F.S. Kitaura 2012
+  E.G.P. Bos 2013-2017
+
+*/
+
+using namespace std;
+
+bool planepar=false;// plane parallel approximation
+bool periodic=true; // periodic boundary conditions
+
+
+// ONLY USED IN Lag2Eul
+
+void disp_part(unsigned int N1, unsigned int N2, unsigned int N3, real_prec L1, real_prec L2, real_prec L3,
+               real_prec d1, real_prec d2, real_prec d3, real_prec *posx, real_prec *posy, real_prec *posz,
+               real_prec *psix, real_prec *psiy, real_prec *psiz, real_prec *dummyL, unsigned int facL, bool reggrid,
+               gsl_rng *gBaseRand)
+{
+  if ((facL > 1) && (reggrid == false))
+  {
+    throw BarcodeException("facL > 1 and reggrid == false in disp_part; doesn't make sense, because it would then just put facL^3 particles in the same cell center positions.");
+  }
+
+  real_prec rx,ry,rz;
+
+  ULONG nout=0;
+
+  unsigned NL = facL*facL*facL;
+  ULONG numpart = N1*N2*N3*NL;
+
+  // Get grid center positions, or random positions if reggrid == false.
+#ifdef MULTITHREAD_RNG
+#pragma omp parallel for
+#endif // MULTITHREAD_RNG
+  for (unsigned i=0;i<N1;i++)
+    for (unsigned j=0;j<N2;j++)
+      for (unsigned k=0;k<N3;k++)
+        for (unsigned ii=0; ii<NL; ii++)
+        {
+          ULONG jj = k + N3*(j+ N2*i);
+          ULONG jjj = ii + NL*jj;
+
+          if (reggrid==false)
+          {
+            rx=static_cast<real_prec>(gsl_rng_uniform(gBaseRand))*d1;
+            ry=static_cast<real_prec>(gsl_rng_uniform(gBaseRand))*d2;
+            rz=static_cast<real_prec>(gsl_rng_uniform(gBaseRand))*d3;
+          }
+          else
+          {
+            rx=static_cast<real_prec>(0.5)*d1;
+            ry=static_cast<real_prec>(0.5)*d2;
+            rz=static_cast<real_prec>(0.5)*d3;
+          }
+
+          real_prec posxi=d1*(real_prec(i))+rx;
+          real_prec posyi=d2*(real_prec(j))+ry;
+          real_prec poszi=d3*(real_prec(k))+rz;
+
+          posx[jjj]=posxi;
+          posy[jjj]=posyi;
+          posz[jjj]=poszi;
+        }
+
+  // Interpolate the psi values to the particle positions (if reggrid == false)
+  if (reggrid == false)
+  {
+    throw BarcodeException("This part of the code is not right, fix before use!");
+    // below I explain why it's wrong
+    fftw_array<real_prec> dri(numpart);
+    interpolate_CIC(N1, N2, N3, L1, L2, L3, d1, d2, d3, posx, posy, posz, psix, numpart, dri);
+    // WRONG: at this point we already add dri to posx:
+    add_to_array(dri, posx, numpart);
+    // WRONG: but in the next part we use this already updated posx to interpolate psiy!
+    //        This interpolation should have been done before updating the pos arrays.
+    interpolate_CIC(N1, N2, N3, L1, L2, L3, d1, d2, d3, posx, posy, posz, psiy, numpart, dri);
+    add_to_array(dri, posy, numpart);
+    interpolate_CIC(N1, N2, N3, L1, L2, L3, d1, d2, d3, posx, posy, posz, psiz, numpart, dri);
+    add_to_array(dri, posz, numpart);
+  }
+  else
+  {
+  // If reggrid, just couple the psi values directly to the positions.
+    add_to_array(psix, posx, numpart);
+    add_to_array(psiy, posy, numpart);
+    add_to_array(psiz, posz, numpart);
+  }
+
+  // Periodic boundary conditions
+  if (periodic==true)
+  {
+#ifdef MULTITHREAD_RNG
+#pragma omp parallel for
+#endif // MULTITHREAD_RNG
+    for (unsigned i=0;i<N1;i++)
+      for (unsigned j=0;j<N2;j++)
+        for (unsigned k=0;k<N3;k++)
+          for (unsigned ii=0; ii<NL; ii++)
+          {
+            ULONG jj = k + N3*(j+ N2*i);
+            ULONG jjj = ii + NL*jj;
+
+            pacman_coordinate(&posx[jjj], L1);
+            pacman_coordinate(&posy[jjj], L2);
+            pacman_coordinate(&posz[jjj], L3);
+          }
+  }
+  else
+  {
+  // Check for out of boundary particles (only if not periodic)
+    bool bout=false;
+#ifdef MULTITHREAD_RNG
+#pragma omp parallel for reduction(+:nout)
+#endif // MULTITHREAD_RNG
+    for (unsigned i=0;i<N1;i++)
+      for (unsigned j=0;j<N2;j++)
+        for (unsigned k=0;k<N3;k++)
+          for (unsigned ii=0; ii<NL; ii++)
+          {
+            ULONG jj = k + N3*(j+ N2*i);
+            ULONG jjj = ii + NL*jj;
+
+            if (posx[jjj]<0 || posy[jjj]<0 || posz[jjj]<0 || posx[jjj]>=L1 || posy[jjj]>=L1 || posz[jjj]>=L1)
+            {
+              nout++;
+              bout=true;
+            }
+
+            dummyL[jjj]=1.0;
+          }
+    if (bout)
+    {
+      cout<<endl;
+      cout<<"# of objects outside boundaries "<<nout<<endl;
+    }
+  }
+}
+
diff --git a/disp_part.h b/disp_part.h
new file mode 100644
index 0000000..a023434
--- /dev/null
+++ b/disp_part.h
@@ -0,0 +1,14 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#pragma once
+void disp_part(unsigned int N1, unsigned int N2, unsigned int N3, real_prec L1, real_prec L2, real_prec L3,
+               real_prec d1, real_prec d2, real_prec d3, real_prec *posx, real_prec *posy, real_prec *posz,
+               real_prec *psix, real_prec *psiy, real_prec *psiz, real_prec *dummyL, unsigned int facL, bool reggrid,
+               gsl_rng *gBaseRand);
diff --git a/fftw_array.h b/fftw_array.h
new file mode 100644
index 0000000..892eee3
--- /dev/null
+++ b/fftw_array.h
@@ -0,0 +1,127 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#pragma once
+
+#include "fftw3.h"
+#include <iostream>
+#include "define_opt.h"
+#include <cstddef>  // std::ptrdiff_t, std::size_t
+/* This template allows to avoid writing fftw_free */
+
+
+/*
+ * Trying to figure out a fix for the behavior of fftw_array as it is on 14 April 2017 (and before).
+ * Specifically, when compiling with clang (possibly with gcc as well), I get a warning about the implicit conversion:
+ *   /Users/pbos/code/barcode/EqSolvers.cc:582:21: warning: implicit conversion changes signedness: 'unsigned long' to 'long' [-Wsign-conversion]
+ *     m2v[i]=LapPhivx[i]*LapPhivy[i]-LapPhivxy[i]*LapPhivxy[i]+LapPhivx[i]*LapPhivz[i]-LapPhivxz[i]*LapPhivxz[i]+LapPhivy[i]*LapPhivz[i]-LapPhivyz[i]*LapPhivyz[i];
+ *            ~~~~~~~~ ^
+ * This happens with all fftw_arrays when an unsigned integer is used as an index.
+ *
+ * It is probably caused by the built-in subscript operator: http://en.cppreference.com/w/cpp/language/operator_member_access
+ * Since fftw_array has no operator[] (because it is neither a pointer nor an array, so operator[] cannot take fftw_array as operand),
+ * the implicit conversion operator `operator T*()` (see fftw_array.h) kicks in as the only available option. This returns the `data`
+ * member which is a pointer of type T*.
+ *
+ * The builtin subscript operator T& operator[](T*, std::ptrdiff_t) then converts the index to a std::ptrdiff_t, which though
+ * implementation dependent is usually a signed integer of some size, since it needs to be able to do negative differences too.
+ * This causes the signedness warning.
+ *
+ * A solution would be to overload the subscript operator for unsigned types.
+ *
+ * However, fftw_array is often passed to functions using its data member only. In this case, we cannot overload the operator from the
+ * fftw_array class.
+ *
+ * Can we have the class template additionally create an overloaded global function T& operator[](T*, std::ptrdiff_t)? I.e. one outside
+ * of the class fftw_array, but defined by the use of fftw_array<T> somewhere in the code...
+ *
+ * Do we have to overload the template function? :D
+ *
+ * 15 April:
+ * It seems like there are no changed signedness warnings for the real_prec* arrays. Perhaps the compiler only warns about this
+ * for user defined classes. Let's just overload the operator here and see what happens.
+ */
+
+
+template<typename T> class fftw_array
+{
+  // N.B.: fftw_array does not copy/assign properly! The data pointers will be copied, meaning that the new array will point to the same memory slots! This gives errors when destroying the objects when they go out of scope.
+public:
+  T *data;
+
+  fftw_array(ULONG size)
+  {
+#ifdef SINGLE_PREC
+    data = static_cast<T *>( fftwf_malloc(size*sizeof(T)) );
+#endif
+#ifdef DOUBLE_PREC
+    data = static_cast<T *>( fftw_malloc(size*sizeof(T)) );
+#endif
+  }
+  ~fftw_array()
+  {
+#ifdef SINGLE_PREC
+    fftwf_free(data);
+#endif
+#ifdef DOUBLE_PREC
+    fftw_free(data);
+#endif
+  }
+
+  // "implicit conversion operator": works for functions, but not for templates. For the latter case write: <array_name>.data
+  operator T*()
+  {
+    return data;
+  }
+
+  // overloaded subscript operators, see story at the top of the file
+  T& operator[](std::size_t N) {
+    // *(ptr+N) is equivalent to ptr[N], but since ptr[N] will again call the builtin subscript operator, we don't
+    // want to use that, since it will again need casting to signed integer (std::ptrdiff_t) and thus will limit the
+    // range of the index to half the total possible range.
+    return *(data + N);
+  }
+
+};
+
+template<typename T> class fftw_array_debug
+{
+  // N.B.: fftw_array_debug does not copy/assign properly! The data pointers will be copied, meaning that the new array will point to the same memory slots! This gives errors when destroying the objects when they go out of scope.
+public:
+  T *data;
+
+  fftw_array_debug(ULONG size)
+  {
+    std::cout << "creating fftw_array_debug with address " << this << " and data address " << data << std::endl;
+
+#ifdef SINGLE_PREC
+    data = static_cast<T *>( fftwf_malloc(size*sizeof(T)) );
+#endif
+#ifdef DOUBLE_PREC
+    data = static_cast<T *>( fftw_malloc(size*sizeof(T)) );
+#endif
+  }
+  ~fftw_array_debug()
+  {
+    std::cout << "destroying fftw_array_debug with address " << this << std::endl;
+#ifdef SINGLE_PREC
+    fftwf_free(data);
+#endif
+#ifdef DOUBLE_PREC
+    fftw_free(data);
+#endif
+  }
+
+  // implicit conversion: works for functions, but not for templates. For the latter case write: <array_name>.data
+  operator T*()
+  {
+    return data;
+  }
+
+};
diff --git a/fftwrapper.cc b/fftwrapper.cc
new file mode 100644
index 0000000..2e4eec1
--- /dev/null
+++ b/fftwrapper.cc
@@ -0,0 +1,276 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#include <climits>  // INT_MAX
+#include <exception>  // std::exception
+
+#include "define_opt.h"
+#include "fftw_array.h"
+
+#include "math_funcs.h"
+
+#include "convenience.h"
+
+#include "fftwrapper.h"
+
+
+// Take care: multidimensional C2R fftw transforms destroy the input array!
+void fftC2R(unsigned int N1, unsigned int N2, unsigned int N3, complex_prec *in, real_prec *out)
+{
+  if (N1 > INT_MAX || N2 > INT_MAX || N3 > INT_MAX) {
+    std::string msg = "FFT dimensions must not be larger than INT_MAX!";
+    throw std::logic_error(msg);
+  }
+#ifdef SINGLE_PREC
+  fftwf_plan fftp;
+  fftp = fftwf_plan_dft_c2r_3d(static_cast<int>(N1), static_cast<int>(N2), static_cast<int>(N3), in,out,FFTW_OPTION);
+  fftwf_execute(fftp);
+#endif  
+#ifdef DOUBLE_PREC
+  fftw_plan fftp;
+  fftp = fftw_plan_dft_c2r_3d(static_cast<int>(N1), static_cast<int>(N2), static_cast<int>(N3), in,out,FFTW_OPTION);
+  fftw_execute(fftp);
+#endif  
+
+  ULONG  N=N1*N2*N3;
+  real_prec fac = 1 / static_cast<real_prec>(N);
+  multiply_factor_array(fac, out, out, N);
+
+#ifdef SINGLE_PREC
+  fftwf_destroy_plan(fftp);
+#endif
+#ifdef DOUBLE_PREC
+  fftw_destroy_plan(fftp);
+#endif
+}
+
+
+void fftR2C(unsigned int N1, unsigned int N2, unsigned int N3, real_prec *in, complex_prec *out)
+{
+  if (N1 > INT_MAX || N2 > INT_MAX || N3 > INT_MAX) {
+    std::string msg = "FFT dimensions must not be larger than INT_MAX!";
+    throw std::logic_error(msg);
+  }
+#ifdef SINGLE_PREC
+  fftwf_plan fftp;
+  fftp = fftwf_plan_dft_r2c_3d(static_cast<int>(N1), static_cast<int>(N2), static_cast<int>(N3), in,out,FFTW_OPTION);
+  fftwf_execute(fftp);
+#endif
+#ifdef DOUBLE_PREC
+  fftw_plan fftp;
+  fftp = fftw_plan_dft_r2c_3d(static_cast<int>(N1), static_cast<int>(N2), static_cast<int>(N3), in,out,FFTW_OPTION);
+  fftw_execute(fftp);
+#endif
+
+#ifdef SINGLE_PREC
+  fftwf_destroy_plan(fftp);
+#endif
+#ifdef DOUBLE_PREC
+  fftw_destroy_plan(fftp);
+#endif
+}
+
+
+
+
+
+// Real FFT versions that take a plan as input. This greatly speeds up the FFT
+
+// Take care: multidimensional C2R fftw transforms destroy the input array!
+void fftC2Rplanned(complex_prec *in, real_prec *out, struct plan_pkg *plan) {
+  if (in != plan->C) {
+    copyArray(in, plan->C, plan->Nhalf);
+  }
+
+  #ifdef SINGLE_PREC
+  fftwf_execute(plan->plan);
+  #endif
+  #ifdef DOUBLE_PREC
+  fftw_execute(plan->plan);
+  #endif
+
+  real_prec fac = 1 / static_cast<real_prec>(plan->N);
+  multiply_factor_array(fac, plan->R, out, plan->N);
+}
+
+void fftR2Cplanned(real_prec *in, complex_prec *out, struct plan_pkg *plan) {
+  if (in != plan->R) {
+    copyArray(in, plan->R, plan->N);
+  }
+
+  #ifdef SINGLE_PREC
+  fftwf_execute(plan->plan);
+  #endif
+  #ifdef DOUBLE_PREC
+  fftw_execute(plan->plan);
+  #endif
+
+  if (out != plan->C) {
+    copyArray(plan->C, out, plan->Nhalf);
+  }
+}
+
+
+
+
+
+
+
+
+
+void FFT3d(unsigned int N1, unsigned int N2, unsigned int N3, bool direction, complex_prec *in, complex_prec *out)
+{
+  if (N1 > INT_MAX || N2 > INT_MAX || N3 > INT_MAX) {
+    std::string msg = "FFT dimensions must not be larger than INT_MAX!";
+    throw std::logic_error(msg);
+  }
+
+  ULONG factor=N1*N2*N3;
+
+#ifdef SINGLE_PREC
+  fftwf_plan fftp;
+  if (direction == true) // EGP: to Fourier space
+  {
+    fftp = fftwf_plan_dft_3d(N1,N2,N3,in,out,FORWARD,FFTW_OPTION);	
+    fftwf_execute(fftp);
+  }
+  else // EGP: back to real space
+  {
+    fftp = fftwf_plan_dft_3d(N1,N2,N3,in,out,BACKWARD,FFTW_OPTION);	
+    fftwf_execute(fftp);
+    complexfactor_mult(factor,static_cast<real_prec>(1./real_prec(factor)),out,out);
+  }
+#endif
+
+#ifdef DOUBLE_PREC
+  fftw_plan fftp;
+  if (direction == true) // EGP: to Fourier space
+  {
+    fftp = fftw_plan_dft_3d(static_cast<int>(N1), static_cast<int>(N2), static_cast<int>(N3),in,out,FORWARD,FFTW_OPTION);
+    fftw_execute(fftp);
+  }
+  else // EGP: back to real space
+  {
+    fftp = fftw_plan_dft_3d(static_cast<int>(N1), static_cast<int>(N2), static_cast<int>(N3),in,out,BACKWARD,FFTW_OPTION);
+    fftw_execute(fftp);
+    complexfactor_mult(factor,1./real_prec(factor),out,out);
+  }
+#endif
+#ifdef SINGLE_PREC
+  fftwf_destroy_plan(fftp);
+#endif
+#ifdef DOUBLE_PREC
+  fftw_destroy_plan(fftp);
+#endif
+}
+
+
+void FFT3dR2C(unsigned int N1, unsigned int N2, unsigned int N3, real_prec *in, complex_prec *out)
+{
+  if (N1 > INT_MAX || N2 > INT_MAX || N3 > INT_MAX) {
+    std::string msg = "FFT dimensions must not be larger than INT_MAX!";
+    throw std::logic_error(msg);
+  }
+
+  ULONG N=N1*N2*N3;
+
+  fftw_array<complex_prec> inC(N);
+
+  for(ULONG i=0;i<N;i++){
+    re(inC[i])=in[i];
+    im(inC[i])=0.;
+  }
+
+#ifdef SINGLE_PREC
+  fftwf_plan fftp;  
+  fftp = fftwf_plan_dft_3d(N1,N2,N3,inC,out,FORWARD,FFTW_OPTION);	
+  fftwf_execute(fftp);
+#endif
+#ifdef DOUBLE_PREC
+  fftw_plan fftp;  
+  fftp = fftw_plan_dft_3d(static_cast<int>(N1), static_cast<int>(N2), static_cast<int>(N3),inC,out,FORWARD,FFTW_OPTION);
+  fftw_execute(fftp);
+#endif
+
+#ifdef SINGLE_PREC
+  fftwf_destroy_plan(fftp);
+#endif
+#ifdef DOUBLE_PREC
+  fftw_destroy_plan(fftp);
+#endif
+}
+
+void FFT3dC2R(unsigned int N1, unsigned int N2, unsigned int N3, complex_prec *in, real_prec *out)
+{
+  if (N1 > INT_MAX || N2 > INT_MAX || N3 > INT_MAX) {
+    std::string msg = "FFT dimensions must not be larger than INT_MAX!";
+    throw std::logic_error(msg);
+  }
+
+  ULONG N=N1*N2*N3;
+
+  fftw_array<complex_prec> inC(N);
+
+  for(ULONG i=0;i<N;i++){
+    re(inC[i])=re(in[i]);
+    im(inC[i])=im(in[i]);
+  }  
+
+#ifdef SINGLE_PREC
+  fftwf_plan fftp;  
+  fftp = fftwf_plan_dft_3d(N1,N2,N3,inC,inC,BACKWARD,FFTW_OPTION);	
+  fftwf_execute(fftp);
+#endif
+#ifdef DOUBLE_PREC
+  fftw_plan fftp;  
+  fftp = fftw_plan_dft_3d(static_cast<int>(N1), static_cast<int>(N2), static_cast<int>(N3),inC,inC,BACKWARD,FFTW_OPTION);
+  fftw_execute(fftp);
+#endif
+
+  complexfactor_mult(N,static_cast<real_prec>(1./real_prec(N)),inC,inC);
+#ifdef SINGLE_PREC
+  fftwf_destroy_plan(fftp);
+#endif
+#ifdef DOUBLE_PREC
+  fftw_destroy_plan(fftp);
+#endif
+
+  for(ULONG i=0;i<N;i++){
+    out[i]=re(inC[i]);
+  }  
+}
+
+
+//void FFTW_accumulate_wisdom (int N1,int N2,int N3)
+//{
+  //ULONG N=N1*N2*N3;
+
+  //fftw_array<complex_prec> inC(N), out(N);
+  //fill_one(inC, N);
+
+  //// Options below: FFTW_MEASURE, _PATIENT and _EXHAUSTIVE
+
+//#ifdef SINGLE_PREC
+  //fftwf_plan fftp;  
+  //fftp = fftwf_plan_dft_3d(N1,N2,N3,inC,out,FORWARD,FFTW_MEASURE);	
+  //fftwf_execute(fftp);
+//#endif
+//#ifdef DOUBLE_PREC
+  //fftw_plan fftp;  
+  //fftp = fftw_plan_dft_3d(N1,N2,N3,inC,out,FORWARD,FFTW_MEASURE);	
+  //fftw_execute(fftp);
+//#endif
+
+//#ifdef SINGLE_PREC
+  //fftwf_destroy_plan(fftp);
+//#endif
+//#ifdef DOUBLE_PREC
+  //fftw_destroy_plan(fftp);
+//#endif
+//}
diff --git a/fftwrapper.h b/fftwrapper.h
new file mode 100644
index 0000000..c670f00
--- /dev/null
+++ b/fftwrapper.h
@@ -0,0 +1,100 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#pragma once
+#include <climits>  // INT_MAX
+#include <exception>  // std::exception
+#include <stdexcept>  // std::logic_error
+#include <string>
+
+#include <fftw3.h>
+#include "define_opt.h"
+#include <iostream>
+
+void fftC2R(unsigned int N1, unsigned int N2, unsigned int N3, complex_prec *in, real_prec *out);
+
+void fftR2C(unsigned int N1, unsigned int N2, unsigned int N3, real_prec *in, complex_prec *out);
+
+void FFT3d(unsigned int N1, unsigned int N2, unsigned int N3, bool direction, complex_prec *in, complex_prec *out);
+
+void FFT3dR2C(unsigned int N1, unsigned int N2, unsigned int N3, real_prec *in, complex_prec *out);
+
+void FFT3dC2R(unsigned int N1, unsigned int N2, unsigned int N3, complex_prec *in, real_prec *out);
+
+struct plan_pkg {
+  unsigned N1, N2, N3;
+  ULONG N;
+  ULONG Nhalf;
+  complex_prec *C;
+  real_prec *R;
+  #ifdef SINGLE_PREC
+  fftwf_plan plan;
+  #endif
+  #ifdef DOUBLE_PREC
+  fftw_plan plan;
+  #endif
+
+  // R2C
+  plan_pkg(unsigned int _N1, unsigned int _N2, unsigned int _N3, real_prec *in, complex_prec *out) {
+    std::cout << "plan_pkg R2C" << std::endl;
+
+    if (_N1 > INT_MAX || _N2 > INT_MAX || _N3 > INT_MAX) {
+      std::string msg = "FFT dimensions must not be larger than INT_MAX!";
+      throw std::logic_error(msg);
+    }
+
+    N1 = _N1, N2 = _N2, N3 = _N3;
+    N = static_cast<ULONG>(N1*N2*N3);
+    Nhalf = static_cast<ULONG>(N1 * N2 * (N3/2 + 1));
+    R = in;
+    C = out;
+
+    #ifdef SINGLE_PREC
+    plan = fftwf_plan_dft_r2c_3d(static_cast<int>(N1), static_cast<int>(N2), static_cast<int>(N3), in, out, FFTW_PATIENT);
+    #endif
+    #ifdef DOUBLE_PREC
+    plan = fftw_plan_dft_r2c_3d(static_cast<int>(N1), static_cast<int>(N2), static_cast<int>(N3), in, out, FFTW_PATIENT);
+    #endif
+  }
+
+  // C2R
+  plan_pkg(unsigned int _N1, unsigned int _N2, unsigned int _N3, complex_prec *in, real_prec *out) {
+    std::cout << "plan_pkg C2R" << std::endl;
+
+    if (_N1 > INT_MAX || _N2 > INT_MAX || _N3 > INT_MAX) {
+      std::string msg = "FFT dimensions must not be larger than INT_MAX!";
+      throw std::logic_error(msg);
+    }
+
+    N1 = _N1, N2 = _N2, N3 = _N3;
+    N = static_cast<ULONG>(N1*N2*N3);
+    Nhalf = static_cast<ULONG>(N1 * N2 * (N3/2 + 1));
+    C = in;
+    R = out;
+
+    #ifdef SINGLE_PREC
+    plan = fftwf_plan_dft_c2r_3d(static_cast<int>(N1), static_cast<int>(N2), static_cast<int>(N3), in, out, FFTW_PATIENT);
+    #endif
+    #ifdef DOUBLE_PREC
+    plan = fftw_plan_dft_c2r_3d(static_cast<int>(N1), static_cast<int>(N2), static_cast<int>(N3), in, out, FFTW_PATIENT);
+    #endif
+  }
+
+  ~plan_pkg() {
+    #ifdef SINGLE_PREC
+    fftwf_destroy_plan(plan);
+    #endif
+    #ifdef DOUBLE_PREC
+    fftw_destroy_plan(plan);
+    #endif
+  }
+};
+
+void fftC2Rplanned(complex_prec *in, real_prec *out, struct plan_pkg *plan);
+void fftR2Cplanned(real_prec *in, complex_prec *out, struct plan_pkg *plan);
diff --git a/field_statistics.cpp b/field_statistics.cpp
new file mode 100644
index 0000000..a7aa516
--- /dev/null
+++ b/field_statistics.cpp
@@ -0,0 +1,89 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#include "field_statistics.h"
+#include "fftw_array.h"
+#include "convenience.h"
+#include "fftwrapper.h"
+#include <cmath>
+#include "math_funcs.h"
+
+// Used in barcoderunner.cc and HMC_mass.cc:
+// Calculating power spectrum of the density field
+void measure_spectrum(unsigned int N1, unsigned int N2, unsigned int N3, real_prec L1, real_prec L2, real_prec L3,
+                      real_prec *signal, real_prec *kmode, real_prec *power, ULONG N_bin)
+{
+  fftw_array<ULONG> nmode(N_bin);
+
+  ULONG N = static_cast<ULONG>(N1*N2*N3);// EGP: added
+
+  /* Initialize the arrays */
+  fillZero(power, N_bin);
+  fillZero(kmode, N_bin);
+  fillZero(nmode, N_bin);
+  fftw_array<complex_prec> Signal(N);
+  fillZero(Signal, N);
+
+  FFT3dR2C (N1,N2,N3,signal,Signal);  // keep this one 3D, otherwise factor 2 in loop!
+
+  /* measure the greatest |k| in the box */
+  real_prec kmax=sqrt(k_squared(N1/2,N2/2,N3/2,L1,L2,L3,N1,N2,N3));
+  /* bin width in k-space */
+  real_prec dk=kmax/real_prec(N_bin);
+
+  /* Compute the power-spectrum : P(k) */
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD
+  for(unsigned i=0;i<N1;i++)
+    for(unsigned j=0;j<N2;j++)
+      for(unsigned k=0;k<N3;k++) {
+        real_prec ktot=sqrt(k_squared(i,j,k,L1,L2,L3,N1,N2,N3));
+        ULONG nbin=static_cast<ULONG>(ktot/dk);
+        // N.B.: we leave the kmax contribution out; it's only one mode, so very
+        //       noisy anyway, but also otherwise we have to sacrifice room for
+        //       the better modes, because we have to keep the arrays N_bin size
+        if (nbin < N_bin) {
+          real_prec akl=re(Signal[k+N3*(j+N2*i)]);
+          real_prec bkl=im(Signal[k+N3*(j+N2*i)]);
+#ifdef MULTITHREAD
+#pragma omp atomic
+#endif // MULTITHREAD
+          kmode[nbin]+=1*ktot;
+#ifdef MULTITHREAD
+#pragma omp atomic
+#endif // MULTITHREAD
+          power[nbin]+=(akl*akl+bkl*bkl);
+#ifdef MULTITHREAD
+#pragma omp atomic
+#endif // MULTITHREAD
+          nmode[nbin]+=1;
+        }
+      }
+
+  // Transform from discrete Fourier coefficients to actual power spectrum (see Martel 2005)
+  real_prec NORM=static_cast<real_prec>(1.0);
+#ifdef	  FOURIER_DEF_1
+  NORM=static_cast<real_prec>(L1*L2*L3);//4./M_PI);
+#endif
+#ifdef	  FOURIER_DEF_2
+  NORM=static_cast<real_prec>(L1*L2*L3/real_prec(N)/real_prec(N));///4./M_PI);
+#endif
+
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD
+  for (ULONG l=0;l<N_bin;l++)
+    if(nmode[l]>0)
+    {
+      kmode[l]=kmode[l]/static_cast<real_prec>(nmode[l]);
+      power[l]=power[l]/static_cast<real_prec>(nmode[l])*NORM;
+    }
+
+}
diff --git a/field_statistics.h b/field_statistics.h
new file mode 100644
index 0000000..3b3ad95
--- /dev/null
+++ b/field_statistics.h
@@ -0,0 +1,18 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#ifndef BARCODE_FIELD_STATISTICS_H
+#define BARCODE_FIELD_STATISTICS_H
+
+#include "define_opt.h"
+
+void measure_spectrum(unsigned int N1, unsigned int N2, unsigned int N3, real_prec L1, real_prec L2, real_prec L3,
+                      real_prec *signal, real_prec *kmode, real_prec *power, ULONG N_bin);
+
+#endif //BARCODE_FIELD_STATISTICS_H
diff --git a/init_par.cc b/init_par.cc
new file mode 100644
index 0000000..456db60
--- /dev/null
+++ b/init_par.cc
@@ -0,0 +1,585 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#include <cmath>
+#include <iostream>
+#include <fstream>
+#include <iomanip>
+#include <fftw3.h>
+#include <sstream>
+#include <climits>
+
+#include <gsl/gsl_integration.h>
+
+#include "define_opt.h"
+
+#include "fftwrapper.h"
+#include "planck/paramfile.h"
+
+#include "struct_main.h"
+#include "cosmo.h"
+
+#include "HMC_models.h"
+
+using namespace std;
+
+int cmbcosm=3;// 1: WMAP3, 2: WMAP7 mean, 3: WMAP7 maximum. take care!
+//power spectrum is calculated only approximately with fitting formulae!
+
+void INIT_PARAMS(struct DATA *data) {
+  data->numerical->codename=string("BARCODE");
+
+  string codename=data->numerical->codename;
+  cout << " >>> "<<codename<<": initialising settings...."<<endl<<endl;
+
+  data->numerical->rejections = 0; // EGP: start counting
+
+  char fname_PAR[100]="input.par";
+  paramfile params (fname_PAR);
+
+  int inputmode=params.find<int>("inputmode");
+
+  string dataFileName=params.find<string>("file");
+  cout << "file: " << dataFileName << endl;
+
+  // EGP: this was lost, causing fuckups:
+  data->numerical->INV_SUCCESS = 0;
+
+  // EGP: added following switches
+  bool random_test = params.find<bool>("random_test");
+  bool random_test_rsd = params.find<bool>("random_test_rsd");
+
+  int window_type = params.find<int>("window_type");
+
+  int data_model = params.find<int>("data_model");
+  int negative_obs = params.find<bool>("negative_obs");
+
+  int likelihood = params.find<int>("likelihood");
+  int prior = params.find<int>("prior");
+
+  int sfmodel = params.find<int>("sfmodel"); 
+  bool rsd_model = params.find<bool>("rsd_model");
+
+  if (((data_model == 1) && (likelihood != 2)) ||
+      ((likelihood == 2) && (data_model != 1)) ) {
+    throw BarcodeException("Error: incompatible data_model and likelihood in input.par! Logarithmic and log-normal must go together, or you must choose other models.");
+  }
+  // EGP: end added switches
+
+  // EGP: pseudo-timestep parameters
+  real_prec N_eps_fac = params.find<real_prec>("N_eps_fac");
+  real_prec eps_fac = params.find<real_prec>("eps_fac");
+  real_prec eps_fac_initial = params.find<real_prec>("eps_fac_initial");
+  real_prec eps_fac_power = params.find<real_prec>("eps_fac_power");
+  real_prec s_eps_total_fac = params.find<real_prec>("s_eps_total_fac");
+  real_prec s_eps_total_scaling = params.find<real_prec>("s_eps_total_scaling");
+  int s_eps_total_Nx_norm = params.find<int>("s_eps_total_Nx_norm");
+
+  // EGP: adjusting epsilon parameters
+  int eps_fac_update_type = params.find<int>("eps_fac_update_type");
+  unsigned N_a_eps_update = params.find<unsigned>("N_a_eps_update");
+  real_prec acc_min = params.find<real_prec>("acc_min");
+  real_prec acc_max = params.find<real_prec>("acc_max");
+  int eps_down_smooth = params.find<int>("eps_down_smooth");
+  real_prec eps_up_fac = params.find<real_prec>("eps_up_fac");
+
+  // EGP: initial guess
+  int initial_guess = params.find<int>("initial_guess");
+  string initial_guess_file = params.find<string>("initial_guess_file");
+  int initial_guess_smoothing_type = params.find<int>("initial_guess_smoothing_type");
+  real_prec initial_guess_smoothing_scale = params.find<real_prec>("initial_guess_smoothing_scale");
+
+  // mass type
+  // bool mass_fs = params.find<bool>("mass_fs");
+  int mass_type = params.find<int>("mass_type");
+  ULONG massnum_init = params.find<ULONG>("massnum_burn");
+  ULONG massnum_burn = params.find<ULONG>("massnum_post");
+
+  // output
+  unsigned outnum = params.find<unsigned>("outnum");
+  unsigned outnum_ps = params.find<unsigned>("outnum_ps");
+
+  string dir=params.find<string>("dir");
+
+  unsigned N1=params.find<unsigned>("Nx");
+  unsigned N2=N1;
+  unsigned N3=N1;
+
+  real_prec L1=params.find<real_prec>("Lx");
+  real_prec L2=L1;
+  real_prec L3=L1;
+
+  real_prec xllc=params.find<real_prec>("xllc");
+  real_prec yllc=params.find<real_prec>("yllc");
+  real_prec zllc=params.find<real_prec>("zllc");
+
+  real_prec xobs=params.find<real_prec>("xobs");
+  real_prec yobs=params.find<real_prec>("yobs");
+  real_prec zobs=params.find<real_prec>("zobs");
+  bool planepar = params.find<bool>("planepar");
+  bool periodic = params.find<bool>("periodic");
+
+  ULONG seed=params.find<ULONG>("seed");
+
+  ULONG N_bin=params.find<ULONG>("N_bin");
+  ULONG N_Gibbs=params.find<ULONG>("N_Gibbs"); // EGP: added
+  ULONG total_steps_lim = params.find<ULONG>("total_steps_lim");
+
+  int masskernel=params.find<int>("masskernel");
+
+  real_prec z=params.find<real_prec>("z");
+  real_prec ascale = static_cast<real_prec>(1./(1.+z));
+
+  real_prec slength=params.find<real_prec>("slength");
+
+  bool readPS=params.find<bool>("readPS");
+
+  real_prec sigma_fac = params.find<real_prec>("sigma_fac");
+  real_prec sigma_min = params.find<real_prec>("sigma_min");
+  real_prec delta_min = params.find<real_prec>("delta_min");
+
+  bool correct_delta = params.find<bool>("correct_delta");
+
+  string fnamePS=params.find<string>("fnamePS");
+  cout << "fnamePS: " << fnamePS << endl;
+
+  data->cosmology->z = z;
+  cout<<"---> attention: redshift= "<<data->cosmology->z<<endl;
+  data->cosmology->ascale = ascale;
+
+  data->numerical->seed = seed;
+  cout<<"---> attention: seed= "<<data->numerical->seed<<endl;
+
+  data->numerical->slength = slength;
+  cout<<"---> attention: slength= "<<data->numerical->slength<<endl;
+
+  data->numerical->inputmode = inputmode;
+
+  data->numerical->random_test = random_test;
+  data->numerical->random_test_rsd = random_test_rsd;
+
+  data->numerical->window_type = window_type;
+
+  // EGP: absolute! Not w.r.t. dir.
+  data->numerical->dataFileName = dataFileName;
+
+  data->numerical->dir = dir;
+
+  data->numerical->N_bin = N_bin;
+  data->numerical->N_Gibbs = N_Gibbs; // EGP: added
+  if (total_steps_lim > 0) {
+    data->numerical->total_steps_lim = total_steps_lim;
+  } else {
+    data->numerical->total_steps_lim = ULONG_MAX;
+  }
+  data->numerical->count_attempts = 0;  // initialize at zero
+  // N.B.: when restarting, this will also stay at zero! In barcoderunner.cc,
+  // the performance log is read and this could also be used to reset this
+  // counter to its pre-restart value. However, after a second restart, this
+  // will no longer be accurate, because the part of the log that was written
+  // after the attempt in the first run from which the restart will continue
+  // is not properly taken into account.
+
+  data->numerical->readPS = readPS;
+
+  // EGP: also absolute!
+  data->numerical->fnamePS = fnamePS;
+
+  data->numerical->mk = masskernel;	
+
+  data->numerical->data_model = data_model;
+  data->numerical->negative_obs = negative_obs;
+  data->numerical->sfmodel = sfmodel;
+  data->numerical->rsd_model = rsd_model;
+  data->numerical->likelihood = likelihood;
+  data->numerical->prior = prior;
+
+  data->numerical->initial_guess = initial_guess;
+  data->numerical->initial_guess_file = initial_guess_file;
+  data->numerical->initial_guess_smoothing_type = initial_guess_smoothing_type;
+  data->numerical->initial_guess_smoothing_scale = initial_guess_smoothing_scale;
+
+  data->numerical->N1 = N1;		/* Number of cells per axis */
+  cout<<"---> attention: N1= "<<data->numerical->N1<<endl;
+  data->numerical->N2 = N2;		/* Number of cells per axis */
+  data->numerical->N3 = N3;		/* Number of cells per axis */
+
+  ULONG N = N1 * N2 * N3;
+  data->numerical->N = N;
+  data->numerical->Nhalf = N1 * N2 * (N3/2 + 1);
+
+  data->numerical->L1 = L1;		/* Length of axis */
+  data->numerical->L2 = L2;		/* Length of axis */
+  data->numerical->L3 = L3;		/* Length of axis */
+
+  data->numerical->vol = L1 * L2 * L3;
+
+  data->numerical->xllc = xllc;
+  data->numerical->yllc = yllc;
+  data->numerical->zllc = zllc;
+
+  data->numerical->xobs = xobs;
+  data->numerical->yobs = yobs;
+  data->numerical->zobs = zobs;
+  data->numerical->planepar = planepar;
+  data->numerical->periodic = periodic;
+
+  data->numerical->d1=L1/real_prec(N1);		/* grid spacing x-direction */
+  data->numerical->d2=L2/real_prec(N2);		/* grid spacing y-direction */
+  data->numerical->d3=L3/real_prec(N3);		/* grid spacing z-direction */
+
+  // "Observational" parameters
+  data->observational->sigma_fac = sigma_fac;
+  data->observational->sigma_min = sigma_min;
+  data->observational->delta_min = delta_min;
+
+  // EGP: pseudo-timestep parameters
+  // step number
+  data->numerical->N_eps_fac = N_eps_fac;
+  // step size
+  data->numerical->eps_fac_update_type = eps_fac_update_type;
+  if (eps_fac > 0)
+  {
+    data->numerical->eps_fac_target = eps_fac;
+  }
+  else
+  {
+    // power law fit (heuristic (calibration/epsilon/fit_powerlaw.py))
+    data->numerical->eps_fac_target = 2.38902581 * pow(N, -0.57495347);
+    cout << "  eps_fac_target determined to be " << data->numerical->eps_fac_target << endl;
+  }
+  if (eps_fac_initial > 0)
+  {
+    data->numerical->eps_fac_initial = eps_fac_initial;
+  }
+  else
+  {
+    data->numerical->eps_fac_initial = data->numerical->eps_fac_target;
+  }
+  // set actual initial eps_fac for running code:
+  switch (eps_fac_update_type) {
+    case 0:
+      data->numerical->eps_fac = data->numerical->eps_fac_target;
+      break;
+    case 1:
+      data->numerical->eps_fac = data->numerical->eps_fac_initial;
+      break;
+    case 2:
+    case 3:
+      if (eps_fac > 0) {
+        data->numerical->eps_fac = data->numerical->eps_fac_target;
+      } else {
+        data->numerical->eps_fac = 2.;
+      }
+      break;
+  }
+  
+  // other eps parameters (updating scheme):
+  data->numerical->eps_fac_power = eps_fac_power;
+  data->numerical->s_eps_total_fac = s_eps_total_fac;
+  data->numerical->s_eps_total_scaling = s_eps_total_scaling;
+  data->numerical->s_eps_total_Nx_norm = s_eps_total_Nx_norm;
+  real_prec grondtal = static_cast<real_prec>(N)/static_cast<real_prec>(pow(s_eps_total_Nx_norm,3));
+  data->numerical->s_eps_total = static_cast<ULONG>(ceil( s_eps_total_fac * pow(grondtal, s_eps_total_scaling) ));
+  if (data->numerical->s_eps_total <= 0)
+    data->numerical->s_eps_total = 1;
+  cout << "  s_eps_total determined to be " << data->numerical->s_eps_total << endl;
+
+  // eps_fac_update_type 2
+  data->numerical->N_a_eps_update = N_a_eps_update;
+  data->numerical->acc_min = acc_min;
+  data->numerical->acc_max = acc_max;
+  data->numerical->eps_down_smooth = eps_down_smooth;
+  data->numerical->eps_up_fac = eps_up_fac;
+  data->numerical->acc_flag_N_a = vector<bool>(N_a_eps_update);
+  data->numerical->epsilon_N_a = vector<real_prec>(N_a_eps_update);
+  // fill initially with initial eps_fac, so that there are no zeroes on restart
+  // (which would break the update function, which searches a minimum value,
+  // which should never be zero!)
+  fill(data->numerical->epsilon_N_a.begin(), data->numerical->epsilon_N_a.end(),
+       data->numerical->eps_fac);
+
+  data->numerical->acc_recent = vector<short>(N_a_eps_update);
+
+
+  // Mass type parameters
+  // data->numerical->mass_fs = mass_fs;
+  data->numerical->mass_type = mass_type;
+
+  // How often recompute the mass (before and after "burn-in")
+  if (massnum_init > 0) {
+    data->numerical->massnum_init = massnum_init;
+  } else {
+    data->numerical->massnum_init = N_Gibbs;
+  }
+  if (massnum_burn > 0) {
+    data->numerical->massnum_burn = massnum_burn;
+  } else {
+    data->numerical->massnum_burn = N_Gibbs;
+  }
+
+  // output
+  data->numerical->outnum = outnum;
+  data->numerical->outnum_ps = outnum_ps;
+
+  // Testing:
+  real_prec mass_factor = params.find<real_prec>("mass_factor");
+  data->numerical->mass_factor = mass_factor;
+  data->numerical->grad_psi_prior_factor = params.find<real_prec>("grad_psi_prior_factor");
+  data->numerical->grad_psi_likeli_factor = params.find<real_prec>("grad_psi_likeli_factor");
+  data->numerical->grad_psi_prior_conjugate = params.find<bool>("grad_psi_prior_conjugate");
+  data->numerical->grad_psi_likeli_conjugate = params.find<bool>("grad_psi_likeli_conjugate");
+  data->numerical->grad_psi_prior_times_i = params.find<bool>("grad_psi_prior_times_i");
+  data->numerical->grad_psi_likeli_times_i = params.find<bool>("grad_psi_likeli_times_i");
+  data->numerical->div_dH_by_N = params.find<bool>("div_dH_by_N");
+  data->numerical->calc_h = params.find<int>("calc_h");
+
+  data->numerical->deltaQ_factor = params.find<real_prec>("deltaQ_factor");
+
+
+  data->numerical->particle_kernel = params.find<int>("particle_kernel");
+
+  data->numerical->particle_kernel_h_rel = params.find<real_prec>("particle_kernel_h_rel");
+  if (data->numerical->particle_kernel_h_rel != 1.)
+  {
+    cout << "!!! Warning: particle_kernel_h_rel other than 1 for the SPH cubic spline kernel will cause problems! If it is smaller, not enough contributions from surrounding cells will be included. If it is larger, it will smooth the field too much, wiping out small scale structure." << endl;
+
+#ifdef DEBUG
+    char type;
+    do
+    {
+      cout << "Do you want to continue with particle_kernel_h_rel = " << data->numerical->particle_kernel_h_rel << "? [y/n]" << endl;
+      cin >> type;
+    }
+    while( !cin.fail() && type!='y' && type!='n' );
+
+    if (type == 'n') {
+      throw BarcodeException("entered n, aborting.");
+    }
+#endif  // DEBUG
+  }
+  if (data->numerical->particle_kernel_h_rel > static_cast<real_prec>(data->numerical->N1)/4) {
+    throw BarcodeException("A particle_kernel_h_rel of more than Nx/4 cells does not make sense for SPH kernels, since the diameter of the kernel will be larger than the width of the box!\nAborting.");
+  }
+
+  real_prec d1 = data->numerical->d1, d2 = data->numerical->d2, d3 = data->numerical->d3;
+  real_prec avg_cell_size = (d1 + d2 + d3)/3.;
+  data->numerical->particle_kernel_h = data->numerical->particle_kernel_h_rel * avg_cell_size;
+
+  // prepare SPH kernel cells vectors
+  int particle_kernel_type = data->numerical->particle_kernel;
+  real_prec particle_kernel_h = data->numerical->particle_kernel_h;
+  int N_cells = SPH_kernel_3D_cells_count(particle_kernel_type,
+                                          particle_kernel_h, d1, d2, d3);
+  std::vector<int> cells_i, cells_j, cells_k;
+  SPH_kernel_3D_cells(particle_kernel_type, particle_kernel_h, d1, d2, d3,
+                      cells_i, cells_j, cells_k);
+
+  // std::cerr << "cells_ijk:" << std::endl;
+  // for (int ix = 0; ix < N_cells; ++ix)
+  //   std::cerr << cells_i[ix] << " " << cells_j[ix] << " " << cells_k[ix] << std::endl;
+  // std::cerr << endl;
+
+  // std::vector< std::pair<int, int> > ij_out;
+  // std::vector<int> k_begin, k_last;
+  // SPH_kernel_3D_cells_hull_1(cells_i, cells_j, cells_k, ij_out, k_begin, k_last);
+
+  // std::cerr << "cells_ij{k_b}{k_l}:" << std::endl;
+  // int new_N = ij_out.size();
+  // std::cout << new_N << " " << k_begin.size() << " " << k_last.size() << std::endl;
+  // for (int ix = 0; ix < new_N; ++ix)
+  //   std::cerr << ij_out[ix].first << " " << ij_out[ix].second << " "
+  //            << k_begin[ix] << " " << k_last[ix] << std::endl;
+
+  // exit(1);
+
+  data->numerical->N_cells = N_cells;
+  data->numerical->kernel_cells_i = cells_i;
+  data->numerical->kernel_cells_j = cells_j;
+  data->numerical->kernel_cells_k = cells_k;
+
+  data->numerical->correct_delta = correct_delta;
+
+  cout<<endl;
+}
+
+void INIT_FFTW(struct DATA *data, real_prec *in_r2c, real_prec *out_c2r, complex_prec *in_c2r, complex_prec *out_r2c) {
+  struct NUMERICAL *n = data->numerical;
+  n->in_r2c = in_r2c;
+  n->out_c2r = out_c2r;
+  n->in_c2r = in_c2r;
+  n->out_r2c = out_r2c;
+  n->R2Cplan = new plan_pkg(n->N1, n->N2, n->N3, in_r2c, out_r2c);
+  n->C2Rplan = new plan_pkg(n->N1, n->N2, n->N3, in_c2r, out_c2r);
+}
+
+
+/* --- function init_cosmology --- */
+int INIT_COSMOLOGY(struct COSMOLOGY *c, string codename)
+{
+  //struct COSMOLOGY *c = data->cosmology;
+
+  //string codename=data->numerical->codename;
+  cout<<"\n >>> starting "<<codename<<"  ...\n"; 
+  cout<<endl;
+  cout << " >>> "<<codename<<" initialising cosmological model...."<<endl<<endl;
+
+  //real_prec Lx=data->numerical->L1;
+  //real_prec Ly=data->numerical->L2;
+  //real_prec Lz=data->numerical->L3;
+
+  //int Nx=data->numerical->N1;
+  //int Ny=data->numerical->N2;
+  //int Nz=data->numerical->N3;
+
+  //EGP  ULONG  N=Nx*Ny*Nz;
+
+  c->omega_r = 0.0;		/* negligible radiation density */
+  c->omega_k = 0.0;		/* curvature - flat prior for everything! */
+
+  switch (cmbcosm)
+  {	
+    //WMAP3      
+    case 1:
+      {
+        c->omega_m = static_cast<real_prec>(0.25);//WMPA3
+        c->omega_b = static_cast<real_prec>(0.0456);
+        c->omega_q = static_cast<real_prec>(1.)-c->omega_m;
+        c->w = -static_cast<real_prec>(1.0);
+        c->n_s = static_cast<real_prec>(1.0);//WMPA3
+        c->wprime = static_cast<real_prec>(0.0);
+        c->sigma8 = static_cast<real_prec>(0.9);//WMPA3
+        c->rsmooth = static_cast<real_prec>(8.0);
+        c->h = static_cast<real_prec>(0.73); //WMPA3
+        c->beta = static_cast<real_prec>(3.0 / 2.0);
+      }	
+      break;
+      //WMAP7  mean 
+    case 2:
+      {
+        c->omega_m = static_cast<real_prec>(0.272);//WMPA7
+        c->omega_b = static_cast<real_prec>(0.0456);
+        c->omega_q = static_cast<real_prec>(1.-c->omega_m);
+        c->w = static_cast<real_prec>(-1.0);
+        c->n_s = static_cast<real_prec>(0.963);//WMPA7
+        c->wprime = static_cast<real_prec>(0.0);
+        c->sigma8 = static_cast<real_prec>(0.809);//WMPA7
+        c->rsmooth = static_cast<real_prec>(8.0);
+        c->h = static_cast<real_prec>(0.704);//WMPA7
+        c->beta = static_cast<real_prec>(3.0 / 2.0);
+      }
+      break;
+      //WMAP7  max
+    case 3:
+      {
+        c->omega_m = static_cast<real_prec>(0.272);//WMPA7
+        c->omega_b = static_cast<real_prec>(0.046);
+        c->omega_q = static_cast<real_prec>(1.-c->omega_m);
+        c->w = static_cast<real_prec>(-1.0);
+        c->n_s = static_cast<real_prec>(0.961);//WMPA7
+        c->wprime = static_cast<real_prec>(0.0);
+        c->sigma8 = static_cast<real_prec>(0.807);//WMPA7
+        c->rsmooth = static_cast<real_prec>(8.0);
+        c->h = static_cast<real_prec>(0.702);//WMPA7
+        c->beta = static_cast<real_prec>(3.0 / 2.0);
+      }
+      break;
+    case 4:
+      {
+        //WMAP9 +eCMB+BAO+H0 best fit
+        c->omega_m = static_cast<real_prec>(0.28645);//WMPA9
+        c->omega_b = static_cast<real_prec>(0.04628);//WMPA9
+        c->omega_q = static_cast<real_prec>(1.-c->omega_m);
+        c->w = static_cast<real_prec>(-1.0);
+        c->n_s = static_cast<real_prec>(0.972);//WMPA9
+        c->wprime = static_cast<real_prec>(0.0);
+        c->sigma8 = static_cast<real_prec>(0.82);//WMPA9
+        c->rsmooth = static_cast<real_prec>(8.0);
+        c->h = static_cast<real_prec>(0.6932);//WMPA9
+        c->beta = static_cast<real_prec>(3.0 / 2.0);
+      }
+      break;
+  }
+
+  // growth factors
+  real_prec Omega_C = num_1 - c->omega_m - c->omega_q;
+
+  real_prec H0 = static_cast<real_prec>(100. * c->h *cgs_km/cgs_Mpc/cgs_sec);
+  real_prec H = H0 * sqrt(c->omega_m/c->ascale/c->ascale/c->ascale + c->omega_q + Omega_C/c->ascale/c->ascale);
+  real_prec Omega=c->omega_m / (((H/H0)*(H/H0))*(c->ascale*c->ascale*c->ascale));
+
+  real_prec D1 = D_growth(c->ascale, c->omega_m, c->omega_q, c->h);
+  c->D1 = D1;
+  cout<<"D1 = "<<D1<<endl;
+  real_prec fD2 = static_cast<real_prec>(pow(Omega,-1./143.));
+  real_prec D2 = static_cast<real_prec>(-3./7.*D1*D1*fD2);
+  c->D2=D2;
+  cout<<"D2 = "<<D2<<endl;
+
+  return(0);
+}
+
+void set_likelihood_functions(struct DATA *data)
+{
+  switch (data->numerical->likelihood)
+  {
+    case 0: // Poissonian 
+      data->observational->log_like = poissonian_likelihood_log_like;
+      data->observational->partial_f_delta_x_log_like = poissonian_likelihood_partial_f_delta_x_log_like;
+      data->observational->grad_f_delta_x_comp = poissonian_likelihood_grad_f_delta_x_comp;
+      break;
+    case 1: // Gaussian 
+      data->observational->log_like = gaussian_likelihood_log_like;
+      data->observational->partial_f_delta_x_log_like = gaussian_likelihood_partial_f_delta_x_log_like;
+      data->observational->grad_f_delta_x_comp = gaussian_likelihood_grad_f_delta_x_comp;
+      break;
+    case 2: // log-normal 
+      data->observational->log_like = lognormal_likelihood_log_like;
+      data->observational->partial_f_delta_x_log_like = lognormal_likelihood_partial_f_delta_x_log_like;
+      data->observational->grad_f_delta_x_comp = lognormal_likelihood_grad_f_delta_x_comp;
+      break;
+    case 3: // Gaussian Random Field (only prior, effectively)
+      data->observational->log_like = grf_likelihood_log_like;
+      data->observational->partial_f_delta_x_log_like = grf_likelihood_partial_f_delta_x_log_like;
+      data->observational->grad_f_delta_x_comp = grf_likelihood_grad_f_delta_x_comp;
+      break;
+  }
+}
+
+int INIT_OBSERVATIONAL(struct DATA *data,real_prec *POWER,real_prec *SIGNAL,real_prec *SIGNALX,real_prec *window,real_prec *NOISE_SF,real_prec *NOBS,real_prec *CORRF)
+{
+  string codename=data->numerical->codename;
+  cout << " >>> "<<codename<<": initialising observational settings...."<<endl<<endl;
+
+  data->observational->Power=POWER;		
+  data->observational->signal=SIGNAL;	
+  data->observational->signalX=SIGNALX;	
+  data->observational->nobs=NOBS;	
+  data->observational->window=window;	
+  data->observational->noise_sf=NOISE_SF;	
+  data->observational->corrf=CORRF;	
+
+  data->observational->biasP=static_cast<real_prec>(1.);
+  data->observational->biasE=static_cast<real_prec>(1.);
+  data->observational->mu=static_cast<real_prec>(1.);
+  data->observational->Nmean_Gal=static_cast<real_prec>(1.);
+  data->observational->rho_c = static_cast<real_prec>(1.);
+
+  set_likelihood_functions(data);
+
+  switch (data->numerical->prior)
+  {
+    case 0: // Gaussian
+      data->observational->log_prior = prior_gaussian_log_prior;
+      data->observational->grad_log_prior = prior_gaussian_grad_log_prior;
+      break;
+  }
+
+  return (0);
+}
diff --git a/init_par.h b/init_par.h
new file mode 100644
index 0000000..6b9b112
--- /dev/null
+++ b/init_par.h
@@ -0,0 +1,23 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#pragma once
+#include <string>
+void INIT_PARAMS(struct DATA *data);
+
+int INIT_COSMOLOGY(struct COSMOLOGY *c, std::string codename);
+
+void set_likelihood_functions(struct DATA *data);
+
+int INIT_OBSERVATIONAL(struct DATA *data, real_prec *POWER, real_prec *SIGNAL,
+                       real_prec *SIGNALX, real_prec *WINDOW,
+                       real_prec *NOISE_SF, real_prec *NOBS, real_prec *CORRF);
+
+void INIT_FFTW(struct DATA *d, real_prec *in_r2c, real_prec *out_c2r,
+               complex_prec *in_c2r, complex_prec *out_r2c);
diff --git a/input.par b/input.par
new file mode 100644
index 0000000..8f597b7
--- /dev/null
+++ b/input.par
@@ -0,0 +1,168 @@
+#---------------------------------------------------------------------
+# BARCODE: parameter file
+#---------------------------------------------------------------------
+correct_delta = true
+# calc_h:
+# Note: 0 and 1 are only kept in for reasons of lazyness. Only use 2 or 3!
+# WRONG   0: normal full calc_h
+# WRONG   1: only the dlnL/ddelta(x) term
+#         2: SPH calc_h, old version. More accurate, slower than option 3.
+#         3: SPH calc_h with Fourier/TSC calc_V. Less accurate, much faster.
+calc_h = 2
+particle_kernel = 0 # 0: SPH splines
+particle_kernel_h_rel = 1. # SPH splines kernel scale (unit relative to grid cell size; 1 means one grid cell size). Note: cell size is the average size, i.e. (d1 + d2 + d3)/3.)
+#---------------------------------------------------------------------
+inputmode = 0  # 0: random
+#---------------------------------------------------------------------
+seed  = 1   # seed (>0, otherwise depends on clock-time)
+#---------------------------------------------------------------------
+random_test = true
+random_test_rsd = false
+#---------------------------------------------------------------------
+# Window type for random_test:
+# 1: all ones
+# 10: half ones, half zeros
+# 23: put zeros where delta_eul <= 3, ones otherwise
+window_type = 1
+#---------------------------------------------------------------------
+# data_model is used mainly for the random_test. It must also be
+# compatible with the chosen likelihood; e.g. 0 for Gaussian
+# likelihood and 1 for log-normal likelihood.
+data_model = 0 # 0: additive error (linear model), 1: multiplicative error (logarithmic model)
+# negative_obs is also used in setup_random_test. When true, it allows
+# negative density values, which may be used to test idealized pdfs.
+negative_obs = false
+#---------------------------------------------------------------------
+# Likelihoods:
+# 0: Poissonian (integer observation field, e.g. galaxy counts!)
+# 1: Gaussian
+# 2: log-normal
+# 3: GRF (no evolution, just lagrangian)
+likelihood = 1
+prior = 0 # 0: Gaussian 
+#---------------------------------------------------------------------
+sfmodel = 1 # 0: Lognormal; 1: ZELDOVICH; 2: 2LPT; 3: ALPT (in Patchy code)
+#---------------------------------------------------------------------
+# N.B.: rsd_model only yet implemented in Gaussian likelihood with Zel'dovich model!
+rsd_model = false # include redshift space distortions in SF-model and statistical model
+#---------------------------------------------------------------------
+sigma_min = 1.0 # gaussian likelihood
+sigma_fac = 0.0 # gaussian and log-normal likelihoods
+delta_min = -0.999 # log-normal likelihood
+#---------------------------------------------------------------------
+initial_guess = 0 # 0: zero, 1: field from file, 2: GRF (with given PS), 3: smoothed GRF (with given PS) on scale 20 Mpc/h
+initial_guess_file = deltaLAGtest # when you want to use the solution for the random_test use deltaLAGtest (without .dat!).
+initial_guess_smoothing_type = 1 # 1: Gaussian
+initial_guess_smoothing_scale = 20. # [Mpc/h]
+#---------------------------------------------------------------------
+# number of leap-frog steps per MC iteration
+N_eps_fac = 8.0
+#---------------------------------------------------------------------
+
+# Epsilon (time step per leap-frog jump) schemes:
+
+# 0: constant epsilon, given by eps_fac
+# 1: updating epsilon; starts at eps_fac_initial, slowly decreases toward eps_fac
+# 2: adjusting epsilon; starts at eps_fac and adjusts automatically to get
+#    acceptance rate within given range.
+# 3: same as 2, but with extra fast initial stage; halving every step, until
+#    the first step is accepted.
+eps_fac_update_type = 3
+
+# eps_fac:
+# all: Set to zero to automatically let the code determine heuristic optimal
+#      value for given Nx gridsize.
+# 0: constant epsilon
+# 1: target epsilon, i.e. final eps_fac that will be reached after the process
+#    carried out by the parameters below.
+# 2: starting eps_fac
+eps_fac = 0.0
+
+# 1 (more options at bottom of input.par):
+eps_fac_initial = 0.5 # starting eps_fac for faster burn-in, from which it then slowly moves toward eps_fac set above
+eps_fac_power = 2 # the higher, the slower eps_fac will move towards the preset eps_fac, and vice-versa
+
+# 2:
+N_a_eps_update = 100
+acc_min = 0.6
+acc_max = 0.7
+eps_down_smooth = 5
+eps_up_fac = 1
+
+#---------------------------------------------------------------------
+# Mass type (either in Fourier space FS, real space R or both):
+# 0: R: all ones (essentially: no mass term)
+# 1: FS: inverse power spectrum
+# 2: FS+FS: inverse power spectrum + likelihood force spectrum
+# 3: FS+FS: inverse power spectrum + mean likelihood force (Wang+13)
+# 4: FS: power spectrum
+# 5: FS+R: inverse power spectrum + 1st order likelihood force expansion (Jasche+13)
+# 6: R: 1st order likelihood force expansion (Jasche+13)
+# 60: R: type 0 until burn-in, type 6 afterwards
+mass_type = 1
+
+massnum_burn = 0   # recompute masses each #, before burn-in
+massnum_post = 0   # recompute masses each #, after burn-in
+# in the above we define burn-in loosely as the massnum'th iteration, for now
+#---------------------------------------------------------------------
+outnum = 10 # number of output samples (1-10 will always be output)
+outnum_ps = 10 # same for power spectrum
+#---------------------------------------------------------------------
+file = /path/to/input/file.dat
+#---------------------------------------------------------------------
+filec = file.dat
+#---------------------------------------------------------------------
+readPS = true
+#---------------------------------------------------------------------
+fnamePS = /path/to/power/spectrum/like/for/instance/WMAP7_CAMB.dat
+#---------------------------------------------------------------------
+dir = ./data/
+#---------------------------------------------------------------------
+slength = 4.   # 
+#---------------------------------------------------------------------
+Nx = 64   # Number of pixels x 
+#---------------------------------------------------------------------
+Lx = 200.  # Box length in x-direction [Mpc/h] 
+#---------------------------------------------------------------------
+z  = .0  # redshift 
+#---------------------------------------------------------------------
+N_bin = 200
+#---------------------------------------------------------------------
+N_Gibbs = 10000      # abort when accepted steps exceed this number
+total_steps_lim = 0  # abort when accepted + rejected steps exceed this number
+#---------------------------------------------------------------------
+masskernel = 3
+#---------------------------------------------------------------------
+xllc = 0.
+yllc = 0.
+zllc = 0.
+#---------------------------------------------------------------------
+# Redshift space distortions:
+xobs = 90.
+yobs = 90.
+zobs = 90.
+planepar = true
+periodic = true
+#---------------------------------------------------------------------
+
+#---------------------------------------------------------------------
+# Testing:
+mass_factor = 1.
+grad_psi_prior_factor = 1.
+grad_psi_likeli_factor = 1.
+grad_psi_prior_conjugate = false
+grad_psi_likeli_conjugate = false
+grad_psi_prior_times_i = false
+grad_psi_likeli_times_i = false
+div_dH_by_N = false
+deltaQ_factor = 1.0 
+#---------------------------------------------------------------------
+
+# More epsilon options:
+
+# 1:
+# The values below can be edited, but these are pretty safe choices for
+# automated resolution dependent s_eps_total values:
+s_eps_total_fac = 158.0 # the factor with which the amount of iteration steps s between eps_fac updates varies with the total number of cells n (to some power), i.e. s_eps_total = fac * (n/Nx_norm^3)^scaling, where n = nx^3. 0.0003 was the result of a heuristic fit to test-runs. a higher value means that eps_fac is updated less frequently. based on later tests, 0.0003 was changed to 0.0006 with scaling of 0.5 instead of 1 as it was before. 0.0006, in the new new way of writing (adding the /Nx_norm) is equivalent to 158.0 at Nx_norm = 64.
+s_eps_total_Nx_norm = 64 # the resolution at which we normalize the s_eps_total law
+s_eps_total_scaling = 0.5 # see above
diff --git a/main.cc b/main.cc
new file mode 100644
index 0000000..f013b30
--- /dev/null
+++ b/main.cc
@@ -0,0 +1,223 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+/*
+BAyesian Reconstruction of COsimc DEnsity fields
+
+purpose: reconstruct initial and final fields compatible with a dark matter distribution (in redshift space)
+
+input  : cosmology, random seed
+output : initial and final density and velocity field on a cartesian grid
+*/
+
+// c headers
+#if defined(MULTITHREAD) | defined(MULTITHREAD_FFTW) | defined(MULTITHREAD_RNG)
+#include <omp.h>
+#endif  // openmp headers
+#include <fftw3.h>
+#include <ncurses.h>
+#include <gsl/gsl_spline.h>
+#include <gsl/gsl_randist.h>
+
+// c++ headers
+#include <fstream>
+#include <iomanip>
+#include <iostream>
+#include <sstream>
+#ifndef RESTART_FILE
+#endif  // RESTART_FILE
+
+// barcode headers
+#include "struct_main.h"
+#include "fftw_array.h"
+#include "init_par.h"
+#include "IOfunctionsGen.h"
+#include "calc_power.h"
+#include "barcoderunner.h"
+#include "BarcodeException.h"
+
+// EGP: NaN detection (part 1)
+#ifdef NAN_DETECTION
+#ifdef __linux__
+#ifdef __GNUC__
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <cfenv>  // FE_INVALID, FE_OVERFLOW
+#endif  // __GNUC__
+#elif defined(__APPLE__)
+#ifdef __llvm__
+#include <xmmintrin.h>  // _MM_*
+#endif  // __llvm__
+#endif  // OSes
+#endif  // NAN_DETECTION
+// EGP: end NaN detection (part 1)
+
+using namespace std;
+
+int main(int argc, char *argv[]) {
+  // EGP: NaN detection (part 2)
+#ifdef NAN_DETECTION
+#ifdef __linux__
+#ifdef __GNUC__
+  feenableexcept(FE_INVALID | FE_OVERFLOW);
+#endif  // __GNUC__
+#elif defined(__APPLE__)
+#ifdef __llvm__
+  _MM_SET_EXCEPTION_MASK(_MM_GET_EXCEPTION_MASK() & ~static_cast<unsigned int>(_MM_MASK_INVALID));
+#endif  // __llvm__
+#endif  // OSes
+#endif  // NAN_DETECTION
+  // EGP: end NaN detection (part 2)
+
+#ifdef MULTITHREAD
+  printf("\nCompiled with MULTITHREAD. Using %d threads.\n*** Densities will "
+         "usually be slightly different every multi-core run, due to floating "
+         "point addition order! ***\n", omp_get_max_threads());
+#endif  // MULTITHREAD
+
+#ifdef MULTITHREAD_FFTW
+#ifdef SINGLE_PREC
+  fftwf_init_threads();
+  fftwf_plan_with_nthreads(omp_get_max_threads());
+#endif
+#ifdef DOUBLE_PREC
+  fftw_init_threads();
+  fftw_plan_with_nthreads(omp_get_max_threads());
+#endif
+  printf("Compiled with MULTITHREAD_FFTW support, with %dthreads\n",
+         omp_get_max_threads());
+#endif
+
+  // memory allocation before try/catch
+  struct DATA *data = new DATA;
+  struct CURSES_STRUCT *curses = new CURSES_STRUCT;
+  gsl_rng *gBaseRand = gsl_rng_alloc(gsl_rng_mt19937);
+
+  try {
+    if (data == NULL) {
+      throw BarcodeException("main: error allocating memory....");
+    }
+
+  /* read parameter file */
+    INIT_PARAMS(data);
+    struct NUMERICAL *n = data->numerical;
+    fftw_array<real_prec> in_r2c(n->N), out_c2r(n->N);
+    fftw_array<complex_prec> in_c2r(n->Nhalf), out_r2c(n->Nhalf);
+    INIT_FFTW(data, in_r2c, out_c2r, in_c2r, out_r2c);
+
+#ifndef RESTART_FILE
+    if (argc > 1) {
+      stringstream str;
+      unsigned restart_step;
+      str << argv[1];
+      str >> restart_step;
+      data->numerical->start_at = restart_step;
+    } else {
+      data->numerical->start_at = 0;
+    }
+#endif  // RESTART_FILE
+
+    // generate random number
+    ULONG seed = n->seed;
+
+    // EGP  const gsl_rng_type *T;
+    /*
+    if (seed==0) {
+      srand(time(NULL)); // initialization for rand()
+      seed = time(NULL); // changes random chain based on time  
+    }
+    */
+    gsl_rng_set(gBaseRand, seed);
+    // end generate random number
+
+    unsigned N1 = n->N1, N2 = n->N2, N3 = n->N3;
+
+    INIT_COSMOLOGY(data->cosmology, n->codename);
+
+    ULONG N = N1*N2*N3;
+
+    fftw_array<real_prec> A(N), B(N), C(N), D(N), E(N), F(N), G(N);
+
+    INIT_OBSERVATIONAL(data, A, B, C, D, E, F, G);
+
+    string fname = n->dir + string("powerero");
+    string fnameps = fname+string(".dat");
+
+    ifstream inStream;
+    inStream.open(fnameps.data());
+
+//    if (n->readPS == true) {
+      cout << endl;
+      cout << "reading power-spectrum from table!" << endl;
+
+      readtab(data);
+
+      dump_scalar(data->observational->Power, N1, N2, N3, fname);
+//    } else {
+//      if ( inStream.is_open() == true ) {
+//        cout << endl;
+//        cout << "power-spectrum was computed before!" << endl;
+//
+//        get_scalar(fname, data->observational->Power, N1, N2, N3);
+//      } else {
+//        cout << endl;
+//        cout << "computing power-spectrum from fitting formulae!" << endl;
+//
+//        initialize_pow_spec(data);
+//
+//        dump_scalar(data->observational->Power, N1, N2, N3, fname);
+//      }
+//    }
+
+    // initialize curses
+    data->curses = curses;
+    start_curses(data->curses);
+    init_windows(data->curses);
+    wprintw(data->curses->title, "BARCODE");
+    wrefresh(data->curses->title);
+    wprintw(data->curses->header, "sample cand eps   Neps K_i   pr_i  li_i  "
+            "K_f   pr_f  li_f  dH     P(a)  a a_N_a");
+    wrefresh(data->curses->header);
+    wrefresh(data->curses->table);
+
+    barcoderunner(data, gBaseRand);
+
+    delete n->R2Cplan;
+    delete n->C2Rplan;
+
+    end_curses(data->curses);
+  }
+  catch(BarcodeException &caught) {
+    end_curses(data->curses);
+    cout << endl << "ERROR: " << caught.what() << endl;
+  }
+
+  // clean up manually defined and c (gsl & fftw) stuff
+  gsl_rng_free(gBaseRand);
+
+  data->numerical->performance_log.close();
+  delete curses;
+
+  // delete data->numerical->R2Cplan;
+  // delete data->numerical->C2Rplan;
+
+  string codename = data->numerical->codename;
+  cout << "\n >>> " << codename << "  finished.\n";
+
+  delete data;
+
+#ifdef MULTITHREAD_FFTW
+#ifdef SINGLE_PREC
+  fftwf_cleanup_threads();
+#endif
+#ifdef DOUBLE_PREC
+  fftw_cleanup_threads();
+#endif
+#endif
+}
diff --git a/massFunctions.cc b/massFunctions.cc
new file mode 100644
index 0000000..4190e29
--- /dev/null
+++ b/massFunctions.cc
@@ -0,0 +1,654 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#include "define_opt.h"
+#include "struct_main.h"
+
+#include <math.h>
+#include <iomanip>
+
+#include <gsl/gsl_integration.h>
+
+#include "fftw_array.h"
+
+#include "math_funcs.h"
+
+#include "convenience.h"
+#include "BarcodeException.h"
+
+// EGP: time.h for benchmark tests
+#include <sys/time.h>
+
+using namespace std;
+
+void overdens(unsigned int N1, unsigned int N2, unsigned int N3, real_prec *in, real_prec *out)
+{
+  ULONG N=N1*N2*N3;
+
+  double nmeanD=0.;
+#ifdef MULTITHREAD
+#pragma omp parallel for reduction(+:nmeanD)
+#endif // MULTITHREAD
+  for(ULONG i=0;i<N;i++)
+    nmeanD+=static_cast<double>(in[i]);
+  real_prec nmean=static_cast<real_prec>(nmeanD)/static_cast<real_prec>(N);
+
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD
+  for (ULONG i=0;i<N;i++)
+    out[i]=in[i]/nmean-num_1;
+}
+
+void getDensity_NGP(unsigned int N1, unsigned int N2, unsigned int N3, real_prec L1, real_prec L2, real_prec L3,
+                    real_prec d1, real_prec d2, real_prec d3, real_prec min1, real_prec min2, real_prec min3,
+                    real_prec *xp, real_prec *yp, real_prec *zp, real_prec *Par_mass, ULONG N_OBJ, real_prec *delta)
+{
+
+  //EGP    real_prec xc, yc, zc;
+  //EGP    real_prec dx, dy, dz, tx, tz, ty;
+  unsigned i, j, k;//EGP, ii, jj, kk, n;
+
+
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD
+  for (i=0;i<N1; i++)
+    for (j=0; j<N2; j++)
+      for (k=0; k<N3; k++){
+        delta[k+N3*(j+N2*i)] = 0.;  //-1 if we want to calculate overdensity
+      }
+
+  ULONG NLOSS=0;
+#ifdef MULTITHREAD
+#pragma omp parallel for private(i,j,k)
+#endif // MULTITHREAD
+  for (ULONG n=0; n<N_OBJ; n++){ // EGP: declared n here instead of above as int
+
+    //check if particle is in selected Domain, else discard it
+    if((xp[n]>=min1 && xp[n]<min1+L1) && (yp[n]>=min2 && yp[n]<min2+L2) && (zp[n]>=min3 && zp[n]<min3+L3))
+    {
+
+      i = static_cast<unsigned>(floor((xp[n]-min1)/d1)); // indices of the cell of the particle
+      j = static_cast<unsigned>(floor((yp[n]-min2)/d2));
+      k = static_cast<unsigned>(floor((zp[n]-min3)/d3));
+
+
+      i = static_cast<unsigned>(fmod(real_prec(i),real_prec(N1)));
+      j = static_cast<unsigned>(fmod(real_prec(j),real_prec(N2)));
+      k = static_cast<unsigned>(fmod(real_prec(k),real_prec(N3)));
+
+
+      real_prec mass=Par_mass[n];
+#ifdef MULTITHREAD
+#pragma omp atomic
+#endif // MULTITHREAD
+      delta[k+N3*(j+N2*i)]    +=mass;
+
+    }
+    //else NLOSS++;
+  }
+  if(NLOSS!=0) cout << " >>> mass assignment found "<<NLOSS<<" particles outside mesh boundary...."<<endl<<endl;
+}
+
+void getDensity_CIC(ULONG N1, ULONG N2, ULONG N3,real_prec L1, real_prec L2, real_prec L3, real_prec d1, real_prec d2, real_prec d3, real_prec min1, real_prec min2, real_prec min3,real_prec *xp,real_prec *yp,real_prec *zp, real_prec *Par_mass, ULONG N_OBJ, real_prec *delta, bool weightmass)
+{
+#define DELTA(i,j,k) delta[k[2]+N3*(j[1]+N2*i[0])]
+
+  fillZero(delta, N1*N2*N3);
+
+  ULONG NLOSS=0;
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD
+  for (ULONG n=0; n<N_OBJ; n++){
+    real_prec dx[3], tx[3];
+    ULONG i[3], ii[3];
+
+   //check if particle is in selected Domain, else discard it
+    if((xp[n]>=min1 && xp[n]<min1+L1) && (yp[n]>=min2 && yp[n]<min2+L2) && (zp[n]>=min3 && zp[n]<min3+L3))
+    {	
+      getCICcells(N1, N2, N3, L1, L2, L3, d1, d2, d3, xp[n], yp[n], zp[n], i, ii);
+      getCICweights(L1, L2, L3, d1, d2, d3, xp[n], yp[n], zp[n], i, dx, tx);
+
+      // Take care this assumes periodic boundary conditions. This conserves Mass when using FFTs to deconvolve with CIC kernel, as the FFT assumes periodicity
+      real_prec mass=num_1;
+      if (weightmass==true)
+        mass=Par_mass[n];
+
+#ifdef MULTITHREAD
+#pragma omp atomic
+#endif // MULTITHREAD
+      DELTA(i,i,i)    += mass*tx[0]*tx[1]*tx[2];
+#ifdef MULTITHREAD
+#pragma omp atomic
+#endif // MULTITHREAD
+      DELTA(ii,i,i)   += mass*dx[0]*tx[1]*tx[2];
+#ifdef MULTITHREAD
+#pragma omp atomic
+#endif // MULTITHREAD
+      DELTA(i,ii,i)   += mass*tx[0]*dx[1]*tx[2];
+#ifdef MULTITHREAD
+#pragma omp atomic
+#endif // MULTITHREAD
+      DELTA(i,i,ii)   += mass*tx[0]*tx[1]*dx[2];
+#ifdef MULTITHREAD
+#pragma omp atomic
+#endif // MULTITHREAD
+      DELTA(ii,ii,i)  += mass*dx[0]*dx[1]*tx[2];
+#ifdef MULTITHREAD
+#pragma omp atomic
+#endif // MULTITHREAD
+      DELTA(ii,i,ii)  += mass*dx[0]*tx[1]*dx[2];
+#ifdef MULTITHREAD
+#pragma omp atomic
+#endif // MULTITHREAD
+      DELTA(i,ii,ii)  += mass*tx[0]*dx[1]*dx[2];
+#ifdef MULTITHREAD
+#pragma omp atomic
+#endif // MULTITHREAD
+      DELTA(ii,ii,ii) += mass*dx[0]*dx[1]*dx[2];
+    }
+    //else NLOSS++;	
+  }
+  if(NLOSS!=0) cout << " >>> ARGO: Mass assignment found "<<NLOSS<<" particles outside mesh boundary...."<<endl<<endl;
+
+#undef DELTA
+}
+
+
+void getDensity_TSC(unsigned int N1, unsigned int N2, unsigned int N3, real_prec L1, real_prec L2, real_prec L3,
+                    real_prec d1, real_prec d2, real_prec d3, real_prec min1, real_prec min2, real_prec min3,
+                    real_prec *xp, real_prec *yp, real_prec *zp, real_prec *Par_mass, ULONG N_OBJ, real_prec *delta)
+{
+
+  real_prec xc, yc, zc;
+  real_prec dx, dy, dz, hx0, hz0, hy0, hxp1,hyp1,hzp1, hxm1,hym1,hzm1;
+  unsigned i, j, k, ii, jj, kk,iii,jjj,kkk;//EGP, n;
+
+#define DELTA(i,j,k) delta[k+N3*(j+N2*i)]
+
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD
+  for (i=0;i<N1; i++)
+    for (j=0; j<N2; j++)
+      for (k=0; k<N3; k++){
+        DELTA(i,j,k) = 0.;  //-1 if we want to calculate overdensity
+      }
+
+  ULONG NLOSS=0;
+
+#ifdef MULTITHREAD
+#pragma omp parallel for private(xc, yc, zc, dx, dy, dz, hx0, hz0, hy0, hxp1,hyp1,hzp1, hxm1,hym1,hzm1, i, j, k, ii, jj, kk,iii,jjj,kkk)
+#endif // MULTITHREAD
+  for (ULONG n=0; n<N_OBJ; n++){// EGP: declared n here instead of above as int
+
+    //check if particle is in selected Domain, else discard it
+    if((xp[n]>=min1 && xp[n]<=min1+L1) && (yp[n]>=min2 && yp[n]<=min2+L2) && (zp[n]>=min3 && zp[n]<=min3+L3))
+    {
+
+      i = static_cast<unsigned>(floor((xp[n]-min1)/d1)); // indices of the cell of the particle
+      j = static_cast<unsigned>(floor((yp[n]-min2)/d2));
+      k = static_cast<unsigned>(floor((zp[n]-min3)/d3));
+
+
+      i = static_cast<unsigned>(fmod(real_prec(i),real_prec(N1)));
+      j = static_cast<unsigned>(fmod(real_prec(j),real_prec(N2)));
+      k = static_cast<unsigned>(fmod(real_prec(k),real_prec(N3)));
+
+      ii = static_cast<unsigned>(fmod(real_prec(i+1),real_prec(N1)));
+      jj = static_cast<unsigned>(fmod(real_prec(j+1),real_prec(N2)));
+      kk = static_cast<unsigned>(fmod(real_prec(k+1),real_prec(N3)));
+
+      iii= static_cast<unsigned>(fmod(real_prec(i-1+N1),real_prec(N1)));
+      jjj= static_cast<unsigned>(fmod(real_prec(j-1+N2),real_prec(N2)));
+      kkk= static_cast<unsigned>(fmod(real_prec(k-1+N3),real_prec(N3)));
+
+
+      //printf("%f, %f, %f\n", xp[n], yp[n], zp[n]);
+
+      xc = static_cast<real_prec>(i+0.5); // centers of the cells
+      yc = static_cast<real_prec>(j+0.5);
+      zc = static_cast<real_prec>(k+0.5);
+
+      dx = (xp[n]-min1)/d1 - xc; // distance of particle to center of the cell
+      dy = (yp[n]-min2)/d2 - yc;
+      dz = (zp[n]-min3)/d3 - zc;
+
+
+
+      hx0=static_cast<real_prec>(0.75-dx*dx); // fraction of particle assigned
+      hy0=static_cast<real_prec>(0.75-dy*dy); // fraction of particle assigned
+      hz0=static_cast<real_prec>(0.75-dz*dz); // fraction of particle assigned
+
+      hxp1=static_cast<real_prec>(0.5*(0.5+dx)*(0.5+dx)); // fraction of particle assigned
+      hyp1=static_cast<real_prec>(0.5*(0.5+dy)*(0.5+dy)); // fraction of particle assigned
+      hzp1=static_cast<real_prec>(0.5*(0.5+dz)*(0.5+dz)); // fraction of particle assigned
+
+      hxm1=static_cast<real_prec>(0.5*(0.5-dx)*(0.5-dx)); // fraction of particle assigned
+      hym1=static_cast<real_prec>(0.5*(0.5-dy)*(0.5-dy)); // fraction of particle assigned
+      hzm1=static_cast<real_prec>(0.5*(0.5-dz)*(0.5-dz)); // fraction of particle assigned
+
+
+
+      real_prec mass=Par_mass[n];
+#ifdef MULTITHREAD
+#pragma omp atomic
+#endif // MULTITHREAD
+      DELTA(i,j,k)            += mass*hx0*hy0*hz0;
+#ifdef MULTITHREAD
+#pragma omp atomic
+#endif // MULTITHREAD
+      DELTA(ii,jj,kk)         += mass*hxp1*hyp1*hzp1;
+#ifdef MULTITHREAD
+#pragma omp atomic
+#endif // MULTITHREAD
+      DELTA(iii,jjj,kkk)         += mass*hxm1*hym1*hzm1;
+
+#ifdef MULTITHREAD
+#pragma omp atomic
+#endif // MULTITHREAD
+      DELTA(ii,j,k)           += mass*hxp1*hy0*hz0;
+#ifdef MULTITHREAD
+#pragma omp atomic
+#endif // MULTITHREAD
+      DELTA(i,jj,k)           += mass*hx0*hyp1*hz0;
+#ifdef MULTITHREAD
+#pragma omp atomic
+#endif // MULTITHREAD
+      DELTA(i,j,kk)           += mass*hx0*hy0*hzp1;
+
+#ifdef MULTITHREAD
+#pragma omp atomic
+#endif // MULTITHREAD
+      DELTA(ii,jj,k)          += mass*hxp1*hyp1*hz0;
+#ifdef MULTITHREAD
+#pragma omp atomic
+#endif // MULTITHREAD
+      DELTA(ii,j,kk)          += mass*hxp1*hy0*hzp1;
+#ifdef MULTITHREAD
+#pragma omp atomic
+#endif // MULTITHREAD
+      DELTA(i,jj,kk)          += mass*hx0*hyp1*hzp1;
+
+#ifdef MULTITHREAD
+#pragma omp atomic
+#endif // MULTITHREAD
+      DELTA(iii,jj,kk)           += mass*hxm1*hyp1*hzp1;
+#ifdef MULTITHREAD
+#pragma omp atomic
+#endif // MULTITHREAD
+      DELTA(ii,jjj,kk)          += mass*hxp1*hym1*hzp1;
+#ifdef MULTITHREAD
+#pragma omp atomic
+#endif // MULTITHREAD
+      DELTA(ii,jj,kkk)          += mass*hxp1*hyp1*hzm1;
+
+#ifdef MULTITHREAD
+#pragma omp atomic
+#endif // MULTITHREAD
+      DELTA(iii,jjj,kk)          += mass*hxm1*hym1*hzp1;
+#ifdef MULTITHREAD
+#pragma omp atomic
+#endif // MULTITHREAD
+      DELTA(iii,jj,kkk)          += mass*hxm1*hyp1*hzm1;
+#ifdef MULTITHREAD
+#pragma omp atomic
+#endif // MULTITHREAD
+      DELTA(ii,jjj,kkk)          += mass*hxp1*hym1*hzm1;
+
+#ifdef MULTITHREAD
+#pragma omp atomic
+#endif // MULTITHREAD
+      DELTA(i,jjj,kkk)           += mass*hx0*hym1*hzm1;
+#ifdef MULTITHREAD
+#pragma omp atomic
+#endif // MULTITHREAD
+      DELTA(iii,j,kkk)          += mass*hxm1*hy0*hzm1;
+#ifdef MULTITHREAD
+#pragma omp atomic
+#endif // MULTITHREAD
+      DELTA(iii,jjj,k)          += mass*hxm1*hym1*hz0;
+
+#ifdef MULTITHREAD
+#pragma omp atomic
+#endif // MULTITHREAD
+      DELTA(i,j,kkk)          += mass*hx0*hy0*hzm1;
+#ifdef MULTITHREAD
+#pragma omp atomic
+#endif // MULTITHREAD
+      DELTA(i,jjj,k)          += mass*hx0*hym1*hz0;
+#ifdef MULTITHREAD
+#pragma omp atomic
+#endif // MULTITHREAD
+      DELTA(iii,j,k)          += mass*hxm1*hy0*hz0;
+
+#ifdef MULTITHREAD
+#pragma omp atomic
+#endif // MULTITHREAD
+      DELTA(i,jj,kkk)           += mass*hx0*hyp1*hzm1;
+#ifdef MULTITHREAD
+#pragma omp atomic
+#endif // MULTITHREAD
+      DELTA(ii,j,kkk)          += mass*hxp1*hy0*hzm1;
+
+#ifdef MULTITHREAD
+#pragma omp atomic
+#endif // MULTITHREAD
+      DELTA(iii,jj,k)           += mass*hxm1*hyp1*hz0;
+#ifdef MULTITHREAD
+#pragma omp atomic
+#endif // MULTITHREAD
+      DELTA(ii,jjj,k)          += mass*hxp1*hym1*hz0;
+
+#ifdef MULTITHREAD
+#pragma omp atomic
+#endif // MULTITHREAD
+      DELTA(i,jjj,kk)           += mass*hx0*hym1*hzp1;
+#ifdef MULTITHREAD
+#pragma omp atomic
+#endif // MULTITHREAD
+      DELTA(iii,j,kk)          += mass*hxm1*hy0*hzp1;
+    }
+  }
+  if(NLOSS!=0) cout << " >>> mass assignment found "<<NLOSS<<" particles outside mesh boundary...."<<endl<<endl;
+  // cout <<"TOTAL NUMBER OF OBJECTS = "<< N_OBJ <<endl;
+}
+
+real_prec SPH_kernel_3D(real_prec r, real_prec h)
+{
+  // N.B.: when using this for density estimation, you need to normalize the
+  // result afterwards with V/N! See e.g. getDensity_SPH. Same goes for
+  // grad_SPH_kernel_3D.
+  
+  // Monaghan kernel W_4
+  real_prec result = 0.;
+  real_prec q = r/h;
+  if (q < 0.) {
+    throw BarcodeException("In SPH_kernel_3D: r may not be smaller than zero!");
+  }
+  else if (q <= 1.)
+    result = 1./M_PI/(h*h*h) * (1 - 3./2*q*q + 3./4*q*q*q);
+  else if (q <= 2.)
+    result = 1./M_PI/(h*h*h) * (1./4 * gsl_pow_3(2.-q));
+
+  return(result);
+}
+
+
+//void getDensity_SPH(ULONG N1, ULONG N2, ULONG N3,real_prec L1, real_prec L2, real_prec L3, real_prec d1, real_prec d2, real_prec d3, real_prec min1, real_prec min2, real_prec min3,real_prec *xp,real_prec *yp,real_prec *zp, real_prec *Par_mass, ULONG N_OBJ, real_prec *delta, bool weightmass, real_prec kernel_h, bool old_cell_index) {
+void getDensity_SPH(ULONG, ULONG, ULONG, real_prec, real_prec, real_prec, real_prec, real_prec, real_prec, real_prec, real_prec, real_prec, real_prec *, real_prec *,real_prec *, real_prec *, ULONG, real_prec *, bool, real_prec, bool) {
+  std::cout << "This signature is deprecated, the old_cell_index parameter is no longer used. Use Mercurial revision 938 if you want to use the version with the old cell index for testing." << std::endl;
+}
+
+void getDensity_SPH(ULONG N1, ULONG N2, ULONG N3,real_prec L1, real_prec L2, real_prec L3, real_prec d1, real_prec d2, real_prec d3, real_prec min1, real_prec min2, real_prec min3,real_prec *xp,real_prec *yp,real_prec *zp, real_prec *Par_mass, ULONG N_OBJ, real_prec *delta, bool weightmass, real_prec kernel_h)
+{
+  // Note: current implementation assumes periodic boundary conditions!
+  //
+  // Note: all normalization stuff removed; it is unnecessary. This function now
+  // just gives rho.
+  //
+  // Warning: don't use with kernel_h larger than N{1,2,3}/4, otherwise the kernel diameter will be larger than the box width in that direction!
+  //          When using this function inside Barcode using the supplied INIT_PARAMS initialization function, this condition will always be enforced.
+  // Warning 2: Make sure N{1,2,3} are smaller than LONG_MAX, since they need to be cast to signed integers.
+
+  fillZero(delta, N1*N2*N3);
+
+  // First determine the reach of the kernel, which determines how many cells
+  // every particle needs to loop over to check for contributions. This is
+  // based on the kernel radius ( == 2*kernel_h ).
+  int reach1 = static_cast<int>(2*kernel_h/d1) + 1, reach2 = static_cast<int>(2*kernel_h/d2) + 1, reach3 = static_cast<int>(2*kernel_h/d3) + 1;
+  // cout << "reach1: " << reach1 << endl;
+
+  // For normalization later on:
+  //real_prec mass_total = 0;
+
+  ULONG NLOSS=0;
+#ifdef MULTITHREAD
+//#pragma omp parallel for reduction(+:mass_total)
+#pragma omp parallel for
+#endif // MULTITHREAD
+  for (ULONG n=0; n<N_OBJ; n++)
+  {
+    // check if particle is in selected domain, else discard it
+    if((xp[n] >= min1 && xp[n] < min1+L1) && (yp[n] >= min2 && yp[n] < min2+L2) && (zp[n] >= min3 && zp[n] < min3+L3))
+    {
+      real_prec mass = num_1;
+      if (weightmass == true)
+        mass = Par_mass[n];
+      //mass_total += mass;
+
+      // Determine central cell index where particle resides
+      ULONG ix = static_cast<ULONG>(xp[n]/d1);
+      ULONG iy = static_cast<ULONG>(yp[n]/d2);
+      ULONG iz = static_cast<ULONG>(zp[n]/d3);
+      // Central cell position:
+      real_prec ccx = (static_cast<real_prec>(ix) + 0.5)*d1;
+      real_prec ccy = (static_cast<real_prec>(iy) + 0.5)*d2;
+      real_prec ccz = (static_cast<real_prec>(iz) + 0.5)*d3;
+
+      // Loop over surrounding gridcells (including central cell itself) within
+      // kernel radius.
+      for(int i1 = -reach1; i1 <= reach1; ++i1)
+        for(int i2 = -reach2; i2 <= reach2; ++i2)
+          for(int i3 = -reach3; i3 <= reach3; ++i3)
+          {
+            // Cell position (relative to the central cell):
+            real_prec cx = ccx + static_cast<real_prec>(i1)*d1;
+            real_prec cy = ccy + static_cast<real_prec>(i2)*d2;
+            real_prec cz = ccz + static_cast<real_prec>(i3)*d3;
+            // Cell index, taking into account periodic boundary conditions:
+            ULONG kx, ky, kz;
+            kx = (static_cast<ULONG>(static_cast<long>(N1) + i1) + ix) % N1;  // checked in testcodes/signed_unsigned_periodic.cpp that signedness casts here doesn't cause bugs
+            ky = (static_cast<ULONG>(static_cast<long>(N2) + i2) + iy) % N2;
+            kz = (static_cast<ULONG>(static_cast<long>(N3) + i3) + iz) % N3;
+            // The above casts are necessary to ensure implicit signedness casts don't cause trouble.
+            // We assume here that kernel_h (and thus reach) <= N{1,2,3}/4 and N1 < LONG_MAX (the latter is true in Barcode, since Ni are usually uint, not ulong).
+            ULONG index = kz + N3*(ky + N2*kx);
+
+            real_prec diff_x = xp[n] - cx;
+            real_prec diff_y = yp[n] - cy;
+            real_prec diff_z = zp[n] - cz;
+            real_prec r = sqrt(diff_x*diff_x + diff_y*diff_y + diff_z*diff_z);
+            
+            if (r/kernel_h <= 2.)
+#ifdef MULTITHREAD
+#pragma omp atomic
+#endif // MULTITHREAD
+              //delta[index] += normalize * SPH_kernel_3D(r, kernel_h) * mass;
+              delta[index] += SPH_kernel_3D(r, kernel_h) * mass;
+          }
+    }
+    else
+      ++NLOSS;
+  }
+  if (NLOSS>0)
+    cout << "Lost " << NLOSS << " particles in getDensity_SPH." << endl;
+
+  // Normalization given that we want the integral over density to be V (left-
+  // hand side) and the integral over all particle kernels to be N (right-hand
+  // side).
+  // FIXME: this really isn't necessary at all. It's only necessary if you
+  // expect this function to give you rho/rho_c. If you want it to give you
+  // simply rho (in units of mass/volume), then you don't need it!
+  //real_prec normalize = L1*L2*L3/static_cast<real_prec>(N1*N2*N3); // Note: actually N1*N2*N3 was also wrong, should be N_OBJ!
+  //real_prec normalize = L1*L2*L3/mass_total;
+
+//#ifdef MULTITHREAD
+//#pragma omp parallel for
+//#endif // MULTITHREAD
+  //for (ULONG n = 0; n < N_OBJ; ++n)
+    //delta[n] *= normalize;
+
+}
+
+
+void cellbound(unsigned int N1, unsigned int N2, unsigned int N3, real_prec *v1, real_prec *v2, real_prec *v3)
+{
+  ULONG  N=N1*N2*N3;
+
+  /* start: interpolate from cell center to cell boundaries */
+  fftw_array<real_prec> vx(N),vy(N),vz(N);
+
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD
+  for (unsigned i=0;i<N1; i++)
+    for (unsigned j=0; j<N2; j++)
+      for (unsigned k=0; k<N3; k++)
+      {
+        ULONG l=k+N3*(j+N2*i);
+        ULONG m=k-1+N3*(j-1+N2*(i-1));
+
+        if (i>0 && j>0 && k>0)
+        {
+          vx[l]=static_cast<real_prec>(0.5)*(v1[m]+v1[l]);
+          vy[l]=static_cast<real_prec>(0.5)*(v2[m]+v2[l]);
+          vz[l]=static_cast<real_prec>(0.5)*(v3[m]+v3[l]);
+        }
+
+        /* periodic boundary conditions */
+
+        long ii=k-1+N3*(j-1+N2*(N1-1));
+        long jj=k-1+N3*(N2-1+N2*(i-1));
+        long kk=N3-1+N3*(j-1+N2*(i-1));
+        long ij=k-1+N3*(N2-1+N2*(N1-1));
+        long ik=N3-1+N3*(j-1+N2*(N1-1));
+        long jk=N3-1+N3*(N2-1+N2*(i-1));
+        long ijk=N3-1+N3*(N2-1+N2*(N1-1));
+
+        if (i==0 && j>0 && k>0)
+        {
+          vx[l]=static_cast<real_prec>(0.5)*(v1[ii]+v1[l]);
+          vy[l]=static_cast<real_prec>(0.5)*(v2[ii]+v2[l]);
+          vz[l]=static_cast<real_prec>(0.5)*(v3[ii]+v3[l]);
+        }
+        if (i==0 && j==0 && k>0)
+        {
+          vx[l]=static_cast<real_prec>(0.5)*(v1[ij]+v1[l]);
+          vy[l]=static_cast<real_prec>(0.5)*(v2[ij]+v2[l]);
+          vz[l]=static_cast<real_prec>(0.5)*(v3[ij]+v3[l]);
+        }
+        if (i==0 && j>0 && k==0)
+        {
+          vx[l]=static_cast<real_prec>(0.5)*(v1[ik]+v1[l]);
+          vy[l]=static_cast<real_prec>(0.5)*(v2[ik]+v2[l]);
+          vz[l]=static_cast<real_prec>(0.5)*(v3[ik]+v3[l]);
+        }
+        if (i==0 && j==0 && k==0)
+        {
+          vx[l]=static_cast<real_prec>(0.5)*(v1[ijk]+v1[l]);
+          vy[l]=static_cast<real_prec>(0.5)*(v2[ijk]+v2[l]);
+          vz[l]=static_cast<real_prec>(0.5)*(v3[ijk]+v3[l]);
+        }
+        if (i>0 && j==0 && k==0)
+        {
+          vx[l]=static_cast<real_prec>(0.5)*(v1[jk]+v1[l]);
+          vy[l]=static_cast<real_prec>(0.5)*(v2[jk]+v2[l]);
+          vz[l]=static_cast<real_prec>(0.5)*(v3[jk]+v3[l]);
+        }
+        if (i>0 && j==0 && k>0)
+        {
+          vx[l]=static_cast<real_prec>(0.5)*(v1[jj]+v1[l]);
+          vy[l]=static_cast<real_prec>(0.5)*(v2[jj]+v2[l]);
+          vz[l]=static_cast<real_prec>(0.5)*(v3[jj]+v3[l]);
+        }
+        if (i>0 && j>0 && k==0)
+        {
+          vx[l]=static_cast<real_prec>(0.5)*(v1[kk]+v1[l]);
+          vy[l]=static_cast<real_prec>(0.5)*(v2[kk]+v2[l]);
+          vz[l]=static_cast<real_prec>(0.5)*(v3[kk]+v3[l]);
+        }
+      }
+
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD
+  for (ULONG l=0;l<N; l++) // EGP: long -> ULONG
+  {
+    v1[l]=vx[l];
+    v2[l]=vy[l];
+    v3[l]=vz[l];
+  }
+  /* end: interpolate from cell center to cell boundaries */
+}
+
+void cellboundcomp(unsigned int N1, unsigned int N2, unsigned int N3, real_prec *vi)
+{
+  ULONG  N=N1*N2*N3;
+
+  /* start: interpolate from cell center to cell boundaries */
+  fftw_array<real_prec> viout(N);
+
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD
+  for (unsigned i = 0; i < N1; i++)
+    for (unsigned j = 0; j < N2; j++)
+      for (unsigned k = 0; k < N3; k++)
+      {
+        ULONG l=k+N3*(j+N2*i);
+        ULONG m=k-1+N3*(j-1+N2*(i-1));
+
+        if (i>0 && j>0 && k>0)
+        {
+          viout[l]=static_cast<real_prec>(0.5)*(vi[m]+vi[l]);
+        }
+
+        /* periodic boundary conditions */
+
+        ULONG ii=k-1+N3*(j-1+N2*(N1-1));
+        ULONG jj=k-1+N3*(N2-1+N2*(i-1));
+        ULONG kk=N3-1+N3*(j-1+N2*(i-1));
+        ULONG ij=k-1+N3*(N2-1+N2*(N1-1));
+        ULONG ik=N3-1+N3*(j-1+N2*(N1-1));
+        ULONG jk=N3-1+N3*(N2-1+N2*(i-1));
+        ULONG ijk=N3-1+N3*(N2-1+N2*(N1-1));
+
+        if (i==0 && j>0 && k>0)
+        {
+          viout[l]=static_cast<real_prec>(0.5)*(vi[ii]+vi[l]);
+        }
+        if (i==0 && j==0 && k>0)
+        {
+          viout[l]=static_cast<real_prec>(0.5)*(vi[ij]+vi[l]);
+        }
+        if (i==0 && j>0 && k==0)
+        {
+          viout[l]=static_cast<real_prec>(0.5)*(vi[ik]+vi[l]);
+        }
+        if (i==0 && j==0 && k==0)
+        {
+          viout[l]=static_cast<real_prec>(0.5)*(vi[ijk]+vi[l]);
+        }
+        if (i>0 && j==0 && k==0)
+        {
+          viout[l]=static_cast<real_prec>(0.5)*(vi[jk]+vi[l]);
+        }
+        if (i>0 && j==0 && k>0)
+        {
+          viout[l]=static_cast<real_prec>(0.5)*(vi[jj]+vi[l]);
+        }
+        if (i>0 && j>0 && k==0)
+        {
+          viout[l]=static_cast<real_prec>(0.5)*(vi[kk]+vi[l]);
+        }
+      }
+
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD
+  for (ULONG l = 0; l < N; l++)
+  {
+    vi[l]=viout[l];
+  }
+  /* end: interpolate from cell center to cell boundaries */
+}
+
diff --git a/massFunctions.h b/massFunctions.h
new file mode 100644
index 0000000..d2a03cb
--- /dev/null
+++ b/massFunctions.h
@@ -0,0 +1,33 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#pragma once
+
+void cellbound(unsigned int N1, unsigned int N2, unsigned int N3, real_prec *v1, real_prec *v2, real_prec *v3);
+
+void cellboundcomp(unsigned int N1, unsigned int N2, unsigned int N3, real_prec *vi);
+
+void getDensity_NGP(unsigned int N1, unsigned int N2, unsigned int N3, real_prec L1, real_prec L2, real_prec L3,
+                    real_prec d1, real_prec d2, real_prec d3, real_prec min1, real_prec min2, real_prec min3,
+                    real_prec *xp, real_prec *yp, real_prec *zp, real_prec *Par_mass, ULONG N_OBJ, real_prec *delta);
+
+void getDensity_CIC(ULONG N1, ULONG N2, ULONG N3,real_prec L1, real_prec L2, real_prec L3, real_prec d1, real_prec d2, real_prec d3, real_prec min1, real_prec min2, real_prec min3,real_prec *xp,real_prec *yp,real_prec *zp, real_prec *Par_mass, ULONG N_OBJ, real_prec *delta, bool weightmass);
+void getDensity_CIC_old(unsigned int N1, unsigned int N2, unsigned int N3, real_prec L1, real_prec L2, real_prec L3,
+                        real_prec d1, real_prec d2, real_prec d3, real_prec min1, real_prec min2, real_prec min3,
+                        real_prec *xp, real_prec *yp, real_prec *zp, real_prec *Par_mass, ULONG N_OBJ, real_prec *delta);
+
+void getDensity_TSC(unsigned int N1, unsigned int N2, unsigned int N3, real_prec L1, real_prec L2, real_prec L3,
+                    real_prec d1, real_prec d2, real_prec d3, real_prec min1, real_prec min2, real_prec min3,
+                    real_prec *xp, real_prec *yp, real_prec *zp, real_prec *Par_mass, ULONG N_OBJ, real_prec *delta);
+
+real_prec SPH_kernel_3D(real_prec r, real_prec h);
+void getDensity_SPH(ULONG N1, ULONG N2, ULONG N3,real_prec L1, real_prec L2, real_prec L3, real_prec d1, real_prec d2, real_prec d3, real_prec min1, real_prec min2, real_prec min3,real_prec *xp,real_prec *yp,real_prec *zp, real_prec *Par_mass, ULONG N_OBJ, real_prec *delta, bool weightmass, real_prec kernel_h);
+void getDensity_SPH(ULONG N1, ULONG N2, ULONG N3,real_prec L1, real_prec L2, real_prec L3, real_prec d1, real_prec d2, real_prec d3, real_prec min1, real_prec min2, real_prec min3,real_prec *xp,real_prec *yp,real_prec *zp, real_prec *Par_mass, ULONG N_OBJ, real_prec *delta, bool weightmass, real_prec kernel_h, bool old_cell_index);
+
+void overdens(unsigned int N1, unsigned int N2, unsigned int N3, real_prec *in, real_prec *out);
diff --git a/math_funcs.cc b/math_funcs.cc
new file mode 100644
index 0000000..62d88c5
--- /dev/null
+++ b/math_funcs.cc
@@ -0,0 +1,1651 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#include "struct_main.h"
+
+#include <math.h>
+#include <stdlib.h>
+#include <stdexcept>  // std::logic_error
+
+#include <gsl/gsl_randist.h>
+#include <gsl/gsl_integration.h>
+
+#include <string>
+
+#include "fftw_array.h"
+#include "planck/paramfile.h"
+#include "IOfunctionsGen.h"
+
+#include "BarcodeException.h"
+
+#include "convenience.h"
+
+// EGP: for nth_element, max_element, min_element:
+#include <algorithm>
+
+// EGP: resolution independent random grid
+#include "random.hpp"
+
+/*
+   F.S. Kitaura 2007-2013
+   */
+
+using namespace std;
+
+////////////////////////////////////////////////////////////////
+// General scalar math functions
+////////////////////////////////////////////////////////////////
+
+// EGP: added odd to check for odd-ness (instead of even-ness).
+// Taken from http://forums.devshed.com/software-design-43/quick-algorithm-to-determine-odd-even-numbers-29843.html
+int odd(int inputval)
+{
+  return inputval & 1;
+}
+
+real_prec factorial(int term)
+{
+  real_prec out=1.0;
+
+  if (term>0)
+    for (int i=1;i<=term;i++)
+      out*=static_cast<real_prec>(i*1.0);
+  else
+    out=static_cast<real_prec>(1.);
+
+  return out;
+}
+
+real_prec power_mean(real_prec x, real_prec y, real_prec p)
+{
+  real_prec mean;
+  if (p == 0)
+  {
+    mean = sqrt(x*y);
+  }
+  else
+  {
+    mean = pow( ( pow(x, p) + pow(y, p) )/2, 1/p);
+  }
+  return mean;
+}
+
+
+
+
+////////////////////////////////////////////////////////////////
+// Array functions / operators
+// TODO: when the new array class (vector derived) is done, make
+//       versions of these functions that take such a vector as
+//       argument (by reference).
+////////////////////////////////////////////////////////////////
+
+real_prec min_arr ( ULONG factor, real_prec *in )
+{
+  real_prec *firstn=in;
+  real_prec *lastn=in+factor;
+  real_prec minn;
+  //get min
+  real_prec *min=min_element(firstn,lastn);
+  minn = *min;
+  return minn;
+}
+
+real_prec max_arr ( ULONG factor, real_prec *in )
+{
+  real_prec *firstn=in;
+  real_prec *lastn=in+factor;
+  real_prec max;
+  //get max
+  real_prec *maxn = max_element(firstn,lastn);
+  max =*maxn;
+  return max;
+}
+
+// EGP: added mean_arr
+real_prec mean_arr ( ULONG size, real_prec *in )
+{
+  real_prec mean = 0.0;
+#ifdef MULTITHREAD
+#pragma omp parallel for reduction(+:mean)
+#endif // MULTITHREAD
+  for(ULONG i=0; i<size; i++)
+    mean+=in[i];
+  return mean/static_cast<real_prec>(size);
+}
+
+// EGP: added median_arr
+real_prec median_arr (ULONG size, real_prec *in)
+{
+  //real_prec * copy = (real_prec *) malloc(size * sizeof(real_prec));
+  //real_prec * copy = reinterpret_cast<real_prec *>( malloc(size * sizeof(real_prec))) ); // EGP: fixes "old style cast" warning
+  real_prec *copy = new real_prec[size]; // EGP: suggested by Johan; C++ way of doing it
+  //fftw_array<real_prec> copy(size);
+  copyArray(in, copy, size);
+
+  real_prec median;
+
+  if (odd(static_cast<int>(size)))
+  {
+    nth_element(copy, copy + size/2, copy + size);
+    median = copy[size/2];
+  }
+  else
+  {
+    nth_element(copy, copy + size/2 - 1, copy + size);
+    median = copy[size/2-1]/2.;
+    median += *min_element(copy + size/2, copy + size)/2.;
+  }
+
+  //free( copy );
+  delete [] copy; // EGP: this should be used when using new, not free!!!
+  return median;
+}
+
+// EGP: added std_arr
+real_prec std_arr (ULONG size, real_prec *in)
+{
+  real_prec s_sq = 0.;
+  real_prec mean = mean_arr(size, in);
+#ifdef MULTITHREAD
+#pragma omp parallel for reduction(+:s_sq)
+#endif // MULTITHREAD
+  for (ULONG i = 0; i < size; i++)
+    s_sq += pow(in[i] - mean, 2);
+  s_sq /= static_cast<real_prec>(size-1); // "corrected sample standard deviation" (squared)
+
+  return sqrt(s_sq);
+}
+
+void complexfactor_mult (ULONG factor, real_prec in_a, complex_prec *in_b, complex_prec *out )
+{
+  for (ULONG i = 0; i < factor; i++)
+  {
+    re(out[i]) = in_a*re(in_b[i]);
+    im(out[i]) = in_a*im(in_b[i]);
+  }
+}
+
+
+
+
+////////////////////////////////////////////////////////////////
+// Pacman functions
+////////////////////////////////////////////////////////////////
+
+// EGP: convenience function for applying periodic boundary conditions on a
+// single coordinate.
+// Puts coordinate in [0,L) range; have to exclude L, because it is equal to 0!
+void pacman_coordinate(real_prec *x, real_prec L) {
+  if (*x < 0.) {
+    *x = fmod(*x, L);
+    *x += L;
+  }
+  if (*x >= L) {
+    *x = fmod(*x, L);
+  }
+}
+
+// Convenience function for applying periodic boundary conditions on
+// coordinate *distances*. This differs from absolute coordinates, because
+// distances can never be longer than half a boxsize; if it is longer, then
+// the distance to reach it the other way around is shorter, and we want
+// the shortest distance.
+// N.B.: only for single coordinate components, not for full distances
+// (so not for dr = sqrt(dx**2 + dy**2 + dz**2))!
+// N.B. 2: assumes the differences are already in pacman coordinates, so
+// not larger than the boxsize L. Use pacman_coordinate function first
+// otherwise.
+// So put d_x in [-L/2,L/2] range (note: unlike pacman_coordinate, this is a
+// range with two inclusive boundaries).
+void pacman_difference(real_prec *d_x, real_prec L) {
+  if (*d_x > L/2)
+    *d_x = L - *d_x;
+  if (*d_x < -(L/2))
+    *d_x = L + *d_x;
+}
+
+real_prec pacman_d_x_from_d_x(real_prec d_x, real_prec L) {
+  if (d_x > L/2)
+    d_x = L - d_x;
+  if (d_x < -(L/2))
+    d_x = L + d_x;
+  return d_x;
+}
+
+// Put d_ix in [-N/2,N/2] range (again, two inclusive boundaries)
+int pacman_d_ix_from_d_ix(int d_ix, int N) {
+  if (d_ix > N/2)
+    d_ix = N - d_ix;
+  if (d_ix < -(N/2))
+    d_ix = N + d_ix;
+  return d_ix;
+}
+
+
+
+
+////////////////////////////////////////////////////////////////
+// CIC: helper stuff and interpolation
+////////////////////////////////////////////////////////////////
+
+
+// EGP: convenience function for getting the 8 nearest cells to a coordinate.
+// Since these 8 form a cube, only two cells need to be returned, those that
+// form a diagonal of the cube. cell_index1 is the one whose cell center
+// coordinates xc_1 are lower than the coordinate x's, i.e. xc_1 < x, etc., and
+// cell_index2 has the higher cell center coordinates xc_2, xc_2 > x.
+// N.B.: cell_index arrays must have length 3!
+void getCICcells(ULONG N1, ULONG N2, ULONG N3, real_prec L1, real_prec L2, real_prec L3, real_prec d1, real_prec d2, real_prec d3, real_prec x, real_prec y, real_prec z, ULONG *cell_index1, ULONG *cell_index2)
+{
+  real_prec xpos, ypos, zpos;
+  
+  xpos = x - num_0_5*d1;
+  ypos = y - num_0_5*d2;
+  zpos = z - num_0_5*d3;
+  // periodic boundary conditions:
+  pacman_coordinate(&xpos, L1);
+  pacman_coordinate(&ypos, L2);
+  pacman_coordinate(&zpos, L3);
+
+  cell_index1[0] = static_cast<ULONG>(xpos/d1); // indices of the cell of the particle
+  cell_index1[1] = static_cast<ULONG>(ypos/d2);
+  cell_index1[2] = static_cast<ULONG>(zpos/d3);
+
+  cell_index1[0] = (cell_index1[0] + N1) % N1; // periodic boundary conditions
+  cell_index1[1] = (cell_index1[1] + N2) % N2;
+  cell_index1[2] = (cell_index1[2] + N3) % N3;
+
+  cell_index2[0] = (cell_index1[0] + 1) % N1;
+  cell_index2[1] = (cell_index1[1] + 1) % N2;
+  cell_index2[2] = (cell_index1[2] + 1) % N3;
+}
+
+// EGP: CIC weights, used in the CIC density estimator and CIC interpolator.
+// tx is just 1-dx. Note that these arrays must have length 3, and cell_index1
+// as well. The latter can be calculated using getCICcells.
+void getCICweights(real_prec L1, real_prec L2, real_prec L3, real_prec d1, real_prec d2, real_prec d3, real_prec x, real_prec y, real_prec z, ULONG *cell_index1, real_prec *dx, real_prec *tx)
+{
+  real_prec xpos, ypos, zpos, xc, yc, zc;
+  
+  xpos = x - num_0_5*d1;
+  ypos = y - num_0_5*d2;
+  zpos = z - num_0_5*d3;
+// periodic boundary conditions:
+  pacman_coordinate(&xpos, L1);
+  pacman_coordinate(&ypos, L2);
+  pacman_coordinate(&zpos, L3);
+
+  xc = static_cast<real_prec>(cell_index1[0]); 
+  yc = static_cast<real_prec>(cell_index1[1]);
+  zc = static_cast<real_prec>(cell_index1[2]);
+
+  dx[0] = xpos/d1 - xc;
+  dx[1] = ypos/d2 - yc;
+  dx[2] = zpos/d3 - zc;
+
+  tx[0] = num_1 - dx[0];
+  tx[1] = num_1 - dx[1];
+  tx[2] = num_1 - dx[2];
+}
+
+// Single coordinate interpolator without all the array mess.
+real_prec interpolate_CIC(ULONG N1, ULONG N2, ULONG N3, real_prec L1,
+                          real_prec L2, real_prec L3, real_prec d1,
+                          real_prec d2, real_prec d3, real_prec xp,
+                          real_prec yp, real_prec zp, real_prec *input) {
+  real_prec dx[3], tx[3];
+  ULONG i[3], ii[3];
+  getCICcells(N1, N2, N3, L1, L2, L3, d1, d2, d3, xp, yp, zp, i, ii);
+  getCICweights(L1, L2, L3, d1, d2, d3, xp, yp, zp, i, dx, tx);
+
+  #define FIELD(i, j, k) input[k[2]+N3*(j[1]+N2*i[0])]
+  real_prec output = FIELD(i, i, i)    * tx[0]*tx[1]*tx[2] +
+                     FIELD(ii, i, i)   * dx[0]*tx[1]*tx[2] +
+                     FIELD(i, ii, i)   * tx[0]*dx[1]*tx[2] +
+                     FIELD(i, i, ii)   * tx[0]*tx[1]*dx[2] +
+                     FIELD(ii, ii, i)  * dx[0]*dx[1]*tx[2] +
+                     FIELD(ii, i, ii)  * dx[0]*tx[1]*dx[2] +
+                     FIELD(i, ii, ii)  * tx[0]*dx[1]*dx[2] +
+                     FIELD(ii, ii, ii) * dx[0]*dx[1]*dx[2];
+  #undef FIELD
+
+  return output;
+}
+
+
+// EGP: this CIC interpolator was added because getDensity_CIC was used to do
+// this, which obviously doesn't work.
+void interpolate_CIC(ULONG N1, ULONG N2, ULONG N3, real_prec L1, real_prec L2,
+                     real_prec L3, real_prec d1, real_prec d2, real_prec d3,
+                     real_prec *xp, real_prec *yp, real_prec *zp,
+                     real_prec *field_grid, ULONG N_OBJ,
+                     real_prec *interpolation) {
+  #ifdef MULTITHREAD
+  #pragma omp parallel for
+  #endif  // MULTITHREAD
+  for (ULONG n = 0; n < N_OBJ; n++) {
+    interpolation[n] = interpolate_CIC(N1, N2, N3, L1, L2, L3, d1, d2, d3,
+                                       xp[n], yp[n], zp[n], field_grid);
+  }
+}
+
+
+////////////////////////////////////////////////////////////////
+// TSC: interpolation
+////////////////////////////////////////////////////////////////
+// Interpolating with TSC is in a sense even easier than CIC,
+// since the 27 nearest cells are trivially found; they are the
+// cells around the cell that contains the particle. 
+// Kernel values taken from p. 8 of:
+// http://www-hpcc.astro.washington.edu/simulations/DARK_MATTER/adap.ps
+////////////////////////////////////////////////////////////////
+
+// Single coordinate interpolator without all the array mess.
+real_prec interpolate_TSC(ULONG N1_ul, ULONG N2_ul, ULONG N3_ul, real_prec d1, real_prec d2, real_prec d3, real_prec xp,
+                          real_prec yp, real_prec zp, real_prec *field) {
+  unsigned N1 = static_cast<unsigned>(N1_ul);
+  unsigned N2 = static_cast<unsigned>(N2_ul);
+  unsigned N3 = static_cast<unsigned>(N3_ul);
+
+  real_prec output = 0;
+  // particle coordinates in "kernel units" (divided by d1/2/3)
+  real_prec xp_kern = xp/d1;
+  real_prec yp_kern = yp/d2;
+  real_prec zp_kern = zp/d3;
+  // indices of the cell of the particle
+  unsigned ix_x_cell = static_cast<unsigned>(xp_kern);
+  unsigned ix_y_cell = static_cast<unsigned>(yp_kern);
+  unsigned ix_z_cell = static_cast<unsigned>(zp_kern);
+  // cell_center coordinates (kernel units)
+  real_prec x_cell = static_cast<real_prec>(ix_x_cell) + 0.5;
+  real_prec y_cell = static_cast<real_prec>(ix_y_cell) + 0.5;
+  real_prec z_cell = static_cast<real_prec>(ix_z_cell) + 0.5;
+  // distance of particle from cell center
+  real_prec dx_p_cell = xp_kern - x_cell;
+  real_prec dy_p_cell = yp_kern - y_cell;
+  real_prec dz_p_cell = zp_kern - z_cell;
+
+  // weights per dimension
+  real_prec wx[3], wy[3], wz[3];
+  wx[1] = 0.75 - dx_p_cell*dx_p_cell;
+  wy[1] = 0.75 - dy_p_cell*dy_p_cell;
+  wz[1] = 0.75 - dz_p_cell*dz_p_cell;
+  wx[0] = 0.5 * gsl_pow_2(1.5 - abs(dx_p_cell+1));
+  wy[0] = 0.5 * gsl_pow_2(1.5 - abs(dy_p_cell+1));
+  wz[0] = 0.5 * gsl_pow_2(1.5 - abs(dz_p_cell+1));
+  wx[2] = 0.5 * gsl_pow_2(1.5 - abs(dz_p_cell-1));
+  wy[2] = 0.5 * gsl_pow_2(1.5 - abs(dz_p_cell-1));
+  wz[2] = 0.5 * gsl_pow_2(1.5 - abs(dz_p_cell-1));
+
+  // indices per dimension, periodic box-ized
+  unsigned ix_x[3], ix_y[3], ix_z[3];
+  ix_x[1] = ix_x_cell;
+  ix_y[1] = ix_y_cell;
+  ix_z[1] = ix_z_cell;
+  ix_x[0] = (ix_x_cell - 1 + N1) % N1;
+  ix_y[0] = (ix_y_cell - 1 + N2) % N2;
+  ix_z[0] = (ix_z_cell - 1 + N3) % N3;
+  ix_x[2] = (ix_x_cell + 1) % N1;
+  ix_y[2] = (ix_y_cell + 1) % N2;
+  ix_z[2] = (ix_z_cell + 1) % N3;
+
+  // add the 27 contributions
+  for (int i = 0; i < 3; ++i)
+    for (int j = 0; j < 3; ++j)
+      for (int k = 0; k < 3; ++k) {
+        unsigned ix_f = (ix_x[i]*N2 + ix_y[j])*N3 + ix_z[k];
+        output  +=    wx[i]    *   wy[j]     *   wz[k] * field[ix_f] ;
+      }
+
+  return output;
+}
+
+
+void interpolate_TSC(ULONG N1, ULONG N2, ULONG N3, real_prec d1, real_prec d2, real_prec d3, real_prec *xp, real_prec *yp,
+                     real_prec *zp, real_prec *field_grid, ULONG N_OBJ, real_prec *interpolation) {
+  #ifdef MULTITHREAD
+  #pragma omp parallel for
+  #endif  // MULTITHREAD
+  for (ULONG n = 0; n < N_OBJ; n++) {
+    interpolation[n] = interpolate_TSC(N1, N2, N3, d1, d2, d3, xp[n], yp[n], zp[n], field_grid);
+  }
+}
+
+
+// multi-field variant
+// Single coordinate interpolator without all the array mess.
+void interpolate_TSC_multi(ULONG N1_ul, ULONG N2_ul, ULONG N3_ul, real_prec d1, real_prec d2, real_prec d3, real_prec xp,
+                           real_prec yp, real_prec zp, real_prec **fields, int N_fields, real_prec *out) {
+  unsigned N1 = static_cast<unsigned>(N1_ul);
+  unsigned N2 = static_cast<unsigned>(N2_ul);
+  unsigned N3 = static_cast<unsigned>(N3_ul);
+
+  for (int ix_fields = 0; ix_fields < N_fields; ++ix_fields) {
+    out[ix_fields] = 0;
+  }
+  // particle coordinates in "kernel units" (divided by d1/2/3)
+  real_prec xp_kern = xp/d1;
+  real_prec yp_kern = yp/d2;
+  real_prec zp_kern = zp/d3;
+  // indices of the cell of the particle
+  unsigned ix_x_cell = static_cast<unsigned>(xp_kern);
+  unsigned ix_y_cell = static_cast<unsigned>(yp_kern);
+  unsigned ix_z_cell = static_cast<unsigned>(zp_kern);
+  // cell_center coordinates (kernel units)
+  real_prec x_cell = static_cast<real_prec>(ix_x_cell) + 0.5;
+  real_prec y_cell = static_cast<real_prec>(ix_y_cell) + 0.5;
+  real_prec z_cell = static_cast<real_prec>(ix_z_cell) + 0.5;
+  // distance of particle from cell center
+  real_prec dx_p_cell = xp_kern - x_cell;
+  real_prec dy_p_cell = yp_kern - y_cell;
+  real_prec dz_p_cell = zp_kern - z_cell;
+
+  // weights per dimension
+  real_prec wx[3], wy[3], wz[3];
+  wx[1] = 0.75 - dx_p_cell*dx_p_cell;
+  wy[1] = 0.75 - dy_p_cell*dy_p_cell;
+  wz[1] = 0.75 - dz_p_cell*dz_p_cell;
+  wx[0] = 0.5 * gsl_pow_2(1.5 - abs(dx_p_cell+1));
+  wy[0] = 0.5 * gsl_pow_2(1.5 - abs(dy_p_cell+1));
+  wz[0] = 0.5 * gsl_pow_2(1.5 - abs(dz_p_cell+1));
+  wx[2] = 0.5 * gsl_pow_2(1.5 - abs(dz_p_cell-1));
+  wy[2] = 0.5 * gsl_pow_2(1.5 - abs(dz_p_cell-1));
+  wz[2] = 0.5 * gsl_pow_2(1.5 - abs(dz_p_cell-1));
+
+  // indices per dimension, periodic box-ized
+  unsigned ix_x[3], ix_y[3], ix_z[3];
+  ix_x[1] = ix_x_cell;
+  ix_y[1] = ix_y_cell;
+  ix_z[1] = ix_z_cell;
+  ix_x[0] = (ix_x_cell - 1 + N1) % N1;
+  ix_y[0] = (ix_y_cell - 1 + N2) % N2;
+  ix_z[0] = (ix_z_cell - 1 + N3) % N3;
+  ix_x[2] = (ix_x_cell + 1) % N1;
+  ix_y[2] = (ix_y_cell + 1) % N2;
+  ix_z[2] = (ix_z_cell + 1) % N3;
+
+  // add the 27 contributions
+  for (int i = 0; i < 3; ++i)
+    for (int j = 0; j < 3; ++j)
+      for (int k = 0; k < 3; ++k) {
+        unsigned ix_f =         (ix_x[i]*N2 + ix_y[j])*N3 + ix_z[k];
+        // ...for each field
+        for (int ix_fields = 0; ix_fields < N_fields; ++ix_fields) {
+          real_prec *field = fields[ix_fields];
+          out[ix_fields]  +=    wx[i]    *   wy[j]     *   wz[k] * field[ix_f];
+        }
+      }
+}
+
+
+void interpolate_TSC_multi(ULONG N1, ULONG N2, ULONG N3, real_prec d1, real_prec d2, real_prec d3, real_prec *xp,
+                           real_prec *yp, real_prec *zp, ULONG N_OBJ, real_prec **field_grids,
+                           real_prec **interpolations, int N_fields) {
+  #ifdef MULTITHREAD
+  #pragma omp parallel for
+  #endif  // MULTITHREAD
+  for (ULONG n = 0; n < N_OBJ; n++) {
+    real_prec out[N_fields];
+    // interpolations[n] = interpolate_TSC(N1, N2, N3, L1, L2, L3, d1, d2, d3,
+    //                                    xp[n], yp[n], zp[n], field_grid);
+    interpolate_TSC_multi(N1, N2, N3, d1, d2, d3, xp[n], yp[n], zp[n], field_grids, N_fields, out);
+    for (int ix_fields = 0; ix_fields < N_fields; ++ix_fields) {
+      interpolations[ix_fields][n] = out[ix_fields];
+    }
+  }
+}
+
+
+
+
+
+
+////////////////////////////////////////////////////////////////
+// k-vector calculators
+////////////////////////////////////////////////////////////////
+
+real_prec k_squared(unsigned int i, unsigned int j, unsigned int k, real_prec L1, real_prec L2, real_prec L3,
+                    unsigned int N1, unsigned int N2, unsigned int N3)
+{
+  real_prec k2=0.;
+  real_prec  kfac=static_cast<real_prec>(2.*M_PI);
+
+  real_prec kx=0.;
+  real_prec ky=0.;
+  real_prec kz=0.;
+
+  if (i<=N1/2) kx = kfac/L1*static_cast<real_prec>(i);
+  else kx= -kfac/L1*static_cast<real_prec>(N1-i);
+
+  if (j<=N2/2) ky= kfac/L2*static_cast<real_prec>(j);
+  else ky= -kfac/L2*static_cast<real_prec>(N2-j);
+
+  if (k<=N3/2) kz = kfac/L3*static_cast<real_prec>(k);
+  else kz = -kfac/L3*static_cast<real_prec>(N3-k);
+
+  k2=kx*kx+ky*ky+kz*kz;
+
+  return(k2);
+}
+
+// EGP: Note: calc_kx, y and z were exactly the same functions! Replaced with calc_ki
+real_prec calc_ki(unsigned int i, real_prec Li, unsigned int Ni) {
+  real_prec kfac=static_cast<real_prec>(2.*M_PI/Li);
+  real_prec ki=0.;
+
+  if (i<=Ni/2)
+    ki = kfac*static_cast<real_prec>(i);
+  else
+    ki = -kfac*static_cast<real_prec>(Ni-i);
+
+  return(ki);
+}
+
+real_prec calc_kx(unsigned int i, real_prec L1, unsigned int N1) {
+  return(calc_ki(i, L1, N1));
+}
+
+real_prec calc_ky(unsigned int j, real_prec L2, unsigned int N2) {
+  return(calc_ki(j, L2, N2));
+}
+
+real_prec calc_kz(unsigned int k, real_prec L3, unsigned int N3) {
+  return(calc_ki(k, L3, N3));
+}
+
+
+
+
+
+////////////////////////////////////////////////////////////////
+// Convolution and kernel functions
+////////////////////////////////////////////////////////////////
+
+void convolve(real_prec L1, real_prec L2, real_prec L3, unsigned N1, unsigned N2, unsigned N3, real_prec *in, real_prec *out,
+              real_prec smol, bool zeropad, int filtertype)
+{
+  bool gauss=false;
+  bool tophat=false;
+  bool errfunc=false;
+
+  switch (filtertype)
+  {
+    case 1:
+      gauss=true;
+      break;
+    case 2:
+      tophat=true;
+      break;
+    case 3:
+      errfunc=true;
+      break;
+  }
+
+  //EGP  ULONG N=N1*N2*N3;
+
+  unsigned Nzp1=N1;
+  unsigned Nzp2=N2;
+  unsigned Nzp3=N3;
+
+  real_prec Lzp1=L1;
+  real_prec Lzp2=L2;
+  real_prec Lzp3=L3;
+
+  if (zeropad==true)
+  {
+    Lzp1=L1*num_2;
+    Lzp2=L2*num_2;
+    Lzp3=L3*num_2;
+
+    Nzp1=N1*2;
+    Nzp2=N2*2;
+    Nzp3=N3*2;
+  }
+
+  ULONG Nzp=Nzp1*Nzp2*Nzp3;
+
+  fftw_array<complex_prec>  AUX(Nzp), AUXb(Nzp);
+  fftw_array<real_prec> wkernel(Nzp),dummy(Nzp);
+
+  if (zeropad==true)
+  {
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD
+    for(ULONG i=0;i<Nzp;i++)
+      dummy[i]=0.0;
+
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD
+    for(unsigned i=0;i<N1;i++)
+      for(unsigned j=0;j<N2;j++)
+        for(unsigned k=0;k<N3;k++)
+        {
+          unsigned k_zp=k+N3/2;
+          unsigned j_zp=Nzp3*(j+N2/2);
+          unsigned i_zp=Nzp3*Nzp2*(i+N1/2);
+
+          unsigned k_eff=k;
+          unsigned j_eff=N3*j;
+          unsigned i_eff=N3*N2*i;
+
+          dummy[i_zp+j_zp+k_zp]=in[i_eff+j_eff+k_eff];
+        }
+  }
+  else
+  {
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD
+    for(ULONG i=0;i<Nzp;i++)
+      dummy[i]=in[i];
+  }
+
+  FFT3dR2C (Nzp1,Nzp2,Nzp3,dummy,AUXb);
+
+  //EGP  real_prec asmth=1.0;
+  real_prec u;
+
+#ifdef MULTITHREAD
+#pragma omp parallel for private(u)
+#endif // MULTITHREAD
+  for (unsigned i=0;i<Nzp1;i++)
+    for (unsigned j=0;j<Nzp2;j++)
+      for (unsigned k=0;k<Nzp3;k++)
+      {
+        real_prec k2=k_squared(i,j,k,Lzp1,Lzp2,Lzp3,Nzp1,Nzp2,Nzp3);
+
+        real_prec rS=smol;
+        real_prec rS2=rS*rS;
+        real_prec kcut=smol;//2.*M_PI/rS;
+        real_prec sigma=static_cast<real_prec>(.3);
+
+        if (tophat==true)
+        {
+          u = sqrt(k2);
+
+          if (u>kcut)
+            wkernel[k+(Nzp3)*(j+Nzp2*i)]=0.0;
+          else
+            wkernel[k+(Nzp3)*(j+Nzp2*i)]=1.0;
+        }
+
+        if (errfunc==true)
+        {
+          u = static_cast<real_prec>((sqrt(k2)-kcut)/(sqrt(2.)*sigma));
+          real_prec fac = static_cast<real_prec>(erfc(u));
+          wkernel[k+(Nzp3)*(j+Nzp2*i)]=fac;
+        }
+
+        if (gauss==true)
+          wkernel[k+(Nzp3)*(j+Nzp2*i)]=static_cast<real_prec>(exp(-k2*rS2/2.));
+      }
+
+
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD
+  for(ULONG i=0;i<Nzp;i++)
+  {
+    re(AUX[i])=wkernel[i];
+    im(AUX[i])=0.0;
+  }
+
+  {
+    FFT3d (Nzp1,Nzp2,Nzp3, to_Rspace, AUX, AUX);
+
+    real_prec wtot=0.;
+#ifdef MULTITHREAD
+#pragma omp parallel for reduction(+:wtot)
+#endif // MULTITHREAD
+    for(ULONG i=0;i<Nzp;i++)
+      wtot+=re(AUX[i]);
+
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD
+    for(ULONG i=0;i<Nzp;i++)
+    {
+      wkernel[i]/=wtot;
+    }
+  }
+
+
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD
+  for(ULONG i=0;i<Nzp;i++)
+  {
+    re(AUX[i])=wkernel[i];
+    im(AUX[i])=0.0;
+  }
+
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD
+  for(ULONG i=0;i<Nzp;i++)
+  {
+    re(AUXb[i])= re(AUXb[i])*re(AUX[i]);
+    im(AUXb[i])= im(AUXb[i])*re(AUX[i]);
+  }
+
+  FFT3d (Nzp1,Nzp2,Nzp3, to_Rspace, AUXb, AUXb);
+  if (zeropad==true)
+  {
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD
+    for(unsigned i=0;i<N1;i++)
+      for(unsigned j=0;j<N2;j++)
+        for(unsigned k=0;k<N3;k++)
+        {
+          unsigned k_zp=k+N3/2;
+          unsigned j_zp=Nzp3*(j+N2/2);
+          unsigned i_zp=Nzp3*Nzp2*(i+N1/2);
+
+          unsigned k_eff=k;
+          unsigned j_eff=N3*j;
+          unsigned i_eff=N3*N2*i;
+
+          out[i_eff+j_eff+k_eff]=re(AUXb[i_zp+j_zp+k_zp]);
+        }
+  }
+  else
+  {
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD
+    for(ULONG i=0;i<Nzp;i++)
+      out[i]=re(AUXb[i]);
+  }
+}
+
+
+void kernelcomp(real_prec L1, real_prec L2, real_prec L3, unsigned N1, unsigned N2, unsigned N3, real_prec smol, int filtertype,
+                struct DATA *data)
+{
+  bool gauss=false;
+  bool errfunc=false;
+  bool tophat=false;
+
+  switch (filtertype)
+  {
+    case 1:
+      gauss=true;
+      break;
+    case 2:
+      tophat=true;
+      break;
+    case 3:
+      errfunc=true;
+      break;
+  }
+
+  ULONG N=N1*N2*N3;
+
+  fftw_array<real_prec> out(N);
+  fftw_array<complex_prec> AUX(N);
+
+  //EGP  real_prec asmth=1.0;
+  real_prec u;
+
+#ifdef MULTITHREAD
+#pragma omp parallel for private(u)
+#endif // MULTITHREAD
+  for (unsigned i=0;i<N1;i++)
+    for (unsigned j=0;j<N2;j++)
+      for (unsigned k=0;k<N3;k++)
+      {
+        ULONG ii=k+N3*(j+N2*i);
+
+        real_prec k2=k_squared(i,j,k,L1,L2,L3,N1,N2,N3);
+
+        real_prec rS=smol;
+        real_prec rS2=rS*rS;
+        real_prec kcut=smol;//2.*M_PI/rS;
+        real_prec sigma=static_cast<real_prec>(.3);
+
+        if (tophat==true)
+        {
+          u = sqrt(k2);
+
+          if (u>kcut)
+            re(AUX[ii])=0.0;
+          else
+            re(AUX[ii])=1.0;
+        }
+
+        if (errfunc==true)
+        {
+          u = static_cast<real_prec>((sqrt(k2)-kcut)/(sqrt(2.)*sigma));
+          real_prec fac = static_cast<real_prec>(erfc(u));
+          re(AUX[ii])=fac;
+        }
+
+        if (gauss==true)
+          re(AUX[ii])=static_cast<real_prec>(exp(-k2*rS2/2.));
+
+
+        im(AUX[ii])=0.0;
+      }
+
+
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD
+  for(ULONG i=0;i<N;i++)
+  {
+    out[i]=re(AUX[i]);
+    im(AUX[i])=0.0;
+  }
+
+  FFT3d (N1,N2,N3, to_Rspace, AUX, AUX);
+
+  real_prec wtot=0.;
+#ifdef MULTITHREAD
+#pragma omp parallel for reduction(+:wtot)
+#endif // MULTITHREAD
+  for(ULONG i=0;i<N;i++)
+    wtot+=re(AUX[i]);
+
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD
+  for(ULONG i=0;i<N;i++)
+  {
+    out[i]/=wtot;
+  }
+
+  int bmax=100;
+  char buffsl[bmax];
+  sprintf(buffsl,"r%d",int(smol));
+  string fname= data->numerical->dir + string("auxkernel")+buffsl;
+  dump_scalar(out, N1, N2, N3, fname);
+}
+
+
+void convcomp(unsigned N1, unsigned N2, unsigned N3, real_prec *in, real_prec *out, real_prec smol, string dir)
+{
+  //bool gauss=false;
+  //bool errfunc=false;
+  //bool tophat=false;
+
+  //switch (filtertype)
+  //{
+    //case 1:
+      //gauss=true;
+      //break;
+    //case 2:
+      //tophat=true;
+      //break;
+    //case 3:
+      //errfunc=true;
+      //break;
+  //}
+
+
+  ULONG N=N1*N2*N3;
+
+  fftw_array<complex_prec> AUX(N);
+
+  FFT3dR2C (N1,N2,N3,in,AUX);
+
+  {
+    int bmax=100;
+    char buffsl[bmax];
+    sprintf(buffsl,"r%d",int(smol));
+    //EGP    char * fileN;
+    string fname= dir + string("auxkernel")+buffsl;
+    get_scalar(fname,out,N1,N2,N3);
+  }
+
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD
+  for (unsigned i=0;i<N1;i++)
+    for (unsigned j=0;j<N2;j++)
+      for (unsigned k=0;k<N3;k++)
+      {
+        ULONG kk=k+(N3)*(j+N2*i);
+        ULONG ii=k+(N3)*(j+N2*i);
+
+        re(AUX[kk])= re(AUX[kk])*out[ii];
+        im(AUX[kk])= im(AUX[kk])*out[ii];
+      }
+
+  FFT3dC2R (N1,N2,N3,AUX,out);
+}
+
+
+void convcompb(unsigned N1, unsigned N2, unsigned N3, real_prec *in, real_prec *out)
+{
+  ULONG N=N1*N2*N3;
+
+  fftw_array<complex_prec> AUX(N);
+
+  FFT3dR2C (N1,N2,N3,in,AUX);
+
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD
+  for (unsigned i=0;i<N1;i++)
+    for (unsigned j=0;j<N2;j++)
+      for (unsigned k=0;k<N3;k++)
+      {
+        ULONG kk=k+(N3)*(j+N2*i);
+        ULONG ii=k+(N3)*(j+N2*i);
+
+        re(AUX[kk])= re(AUX[kk])*out[ii];
+        im(AUX[kk])= im(AUX[kk])*out[ii];
+      }
+
+  FFT3dC2R (N1,N2,N3,AUX,out);
+}
+
+
+
+
+
+
+////////////////////////////////////////////////////////////////
+// Gradient and other 3D derivative functions
+////////////////////////////////////////////////////////////////
+
+void gradfft(unsigned N1, unsigned N2, unsigned N3, real_prec L1, real_prec L2, real_prec L3, real_prec *in,
+             real_prec *out, unsigned int dim)
+{
+  // ULONG N=N1*N2*N3;
+  unsigned N3half = N3/2 + 1;
+  ULONG Nhalf = N1 * N2 * N3half;
+  // fftw_array<complex_prec> AUX(N),AUX2(N);
+  fftw_array<complex_prec> AUX(Nhalf);
+
+  // complexify_array(in, AUX2, N);
+  // FFT3d(N1,N2,N3, true, AUX2, AUX);
+  fftR2C(N1, N2, N3, in, AUX);
+
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD
+  for (unsigned i=0;i<N1;i++)
+    for (unsigned j=0;j<N2;j++)
+      // for (unsigned k=0;k<N3;k++)
+      for (unsigned k = 0; k < N3half; ++k) {
+        real_prec kl;
+        switch (dim)
+        {
+          case 1:
+            kl=calc_kx(i,L1,N1);
+            break;
+          case 2:
+            kl=calc_ky(j,L2,N2);
+            break;
+          case 3:
+            kl=calc_kz(k,L3,N3);
+            break;
+          default:
+            throw BarcodeException("In gradfft: dim should be either 1, 2 or 3!");
+            break;
+        }
+
+        ULONG ll = k + N3half * (j + N2*i);
+
+        // EGP: switched minus sign between terms! Was wrong way round.
+        // EGP: ... and switched it back; Fourier def. assumption was wrong!
+        real_prec dummy = re(AUX[ll]);  // for in-place
+        re(AUX[ll]) = -kl * im(AUX[ll]);
+        im(AUX[ll]) =  kl * dummy;
+        
+        // From http://math.mit.edu/~stevenj/fft-deriv.pdf: Nyquist components
+        // should be zero, in every dimension, otherwise the result is complex.
+        if ((i==N1/2) || (j==N2/2) || (k==N3/2))
+        {
+          re(AUX[ll]) = 0.;
+          im(AUX[ll]) = 0.;
+        }
+      }
+  // FFT3d(N1,N2,N3, false, AUX2, AUX);
+  // real_part_array(AUX, out, N);
+  fftC2R(N1, N2, N3, AUX, out);
+}
+
+
+void gradfindif(unsigned N1, real_prec L1, real_prec *in, real_prec *out, unsigned int dim)
+{
+  if (N1 > INT_MAX) {
+    std::string msg = "Box dimensions must not be larger than INT_MAX in gradfindif!";
+    throw std::logic_error(msg);
+    // otherwise the signed integer indices below will overflow
+  }
+  real_prec fac=static_cast<real_prec>(N1/(2.*L1));
+
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD
+  for(unsigned x = 0; x < N1; x++)
+    for(unsigned y = 0; y < N1; y++)
+      for(unsigned z = 0; z < N1; z++)
+      {
+        unsigned xr, xrr;
+        int xl, xll;
+        unsigned yr, yrr;
+        int yl, yll;
+        unsigned zr, zrr;
+        int zl, zll;
+
+        xrr = xr = x;
+        yrr = yr = y;
+        zrr = zr = z;
+
+        xll = xl = static_cast<int>(x);
+        yll = yl = static_cast<int>(y);
+        zll = zl = static_cast<int>(z);
+
+        int *il, *ill;
+        unsigned *ir, *irr, *ii;
+
+        switch (dim) {
+          case 1: {
+            ii = &x; il = &xl; ill = &xll; ir = &xr; irr = &xrr;
+            break;
+          }
+          case 2: {
+            ii = &y; il = &yl; ill = &yll; ir = &yr; irr = &yrr;
+            break;
+          }
+          case 3: {
+            ii = &z; il = &zl; ill = &zll; ir = &zr; irr = &zrr;
+            break;
+          }
+          default: {
+            throw BarcodeException("dim must be 1, 2 or 3 in gradfindif!");
+          }
+        }
+
+        *ir = *ii + 1;
+        *il = static_cast<int>(*ii) - 1;
+        *irr = *ii + 2;
+        *ill = static_cast<int>(*ii) - 2;
+        if(*ir >= N1)
+          *ir -= N1;
+        if(*irr >= N1)
+          *irr -= N1;
+        if(*il < 0)
+          *il += N1;
+        if(*ill < 0)
+          *ill += N1;
+
+        ULONG ix = z+N1*(y+N1*x);
+        ULONG l = static_cast<unsigned>(zl)+N1*(static_cast<unsigned>(yl)+N1*static_cast<unsigned>(xl));
+        ULONG r = zr+N1*(yr+N1*xr);
+        ULONG ll = static_cast<unsigned>(zll)+N1*(static_cast<unsigned>(yll)+N1*static_cast<unsigned>(xll));
+        ULONG rr = zrr+N1*(yrr+N1*xrr);
+
+        out[ix] = -static_cast<real_prec>(fac*((4.0 / 3)*(in[l] - in[r]) - (1.0 / 6) * (in[ll] - in[rr])));
+      }
+}
+
+// EGP: added this simultaneous (a) one component fourier space gradient and (b) inverse laplacian calculator, in Fourier space:
+void grad_inv_lap_FS(unsigned N1, unsigned N2, unsigned N3, real_prec L1, real_prec L2, real_prec L3, complex_prec *in,
+                     complex_prec *out, unsigned int index, bool rfft)
+{
+  unsigned kz_max = N3;
+  if (rfft)
+    kz_max = N3/2 + 1;
+
+  #ifdef MULTITHREAD
+  #pragma omp parallel for
+  #endif // MULTITHREAD
+  for (unsigned i=0;i<N1;i++)
+    for (unsigned j=0;j<N2;j++)
+      for (unsigned k = 0; k < kz_max; ++k) {
+        ULONG ii=k + kz_max * (j + N2*i);
+
+        real_prec kx=calc_kx(i,L1,N1);
+        real_prec ky=calc_ky(j,L2,N2);
+        real_prec kz=calc_kz(k,L3,N3);
+
+        real_prec kmod = kx*kx + ky*ky + kz*kz;
+        real_prec fac_kmod = 0.;
+        if (kmod > 0)
+          fac_kmod = 1/kmod;
+
+        real_prec ki_over_kmod=0.;
+        switch (index)
+        {
+          case 1:
+            ki_over_kmod=kx*fac_kmod;
+            break;
+          case 2:
+            ki_over_kmod=ky*fac_kmod;
+            break;
+          case 3:
+            ki_over_kmod=kz*fac_kmod;
+            break;
+          default:
+            throw BarcodeException("In grad_inv_lap_FS: index must be either 1, 2 or 3!");
+            break;
+        }
+        real_prec dummy = re(in[ii]); // to make in-place possible (in == out)
+        re(out[ii]) =  ki_over_kmod*im(in[ii]);
+        im(out[ii]) = -ki_over_kmod*dummy;
+
+        // From http://math.mit.edu/~stevenj/fft-deriv.pdf: Nyquist components
+        // should be zero, in every dimension, otherwise the result is complex.
+        // This function is not actually treated in that document, but I would
+        // say it also counts as an odd-ordered derivative.
+        if ((i==N1/2) || (j==N2/2) || (k==N3/2))
+        {
+          re(out[ii]) = 0.;
+          im(out[ii]) = 0.;
+        }
+      }
+}
+
+
+
+
+
+////////////////////////////////////////////////////////////////
+// Random functions
+////////////////////////////////////////////////////////////////
+
+real_prec GR_NUM(gsl_rng * SEED, real_prec sigma ,int GR_METHOD)
+{
+  real_prec val=0.;
+  switch (GR_METHOD)
+  {
+    case 0:
+      {
+        val=static_cast<real_prec>(gsl_ran_gaussian(SEED, sigma));
+      }
+      break;
+
+    case 1:
+      {
+        val=static_cast<real_prec>(gsl_ran_gaussian_ziggurat(SEED, sigma));
+      }
+      break;
+
+    case 2:
+      {
+        val=static_cast<real_prec>(gsl_ran_gaussian_ratio_method(SEED, sigma));
+      }
+      break;
+
+  }
+  return(val);
+}
+
+
+void create_GARFIELD(unsigned N1, unsigned N2, unsigned N3, real_prec L1, real_prec L2, real_prec L3,real_prec *delta,real_prec * Power,gsl_rng * seed)
+{
+  /// FOLLOWING HUGO MARTEL
+  //
+  // New version, EGP: uses a "resolution independent random grid",
+  // which makes sure that, when given the same seed, the random grid
+  // that is generated for gridsize N is also generated for gridsize
+  // 2*N, plus the higher k terms (smaller scale structure).
+  // Even though there are no more random calls in the for loop, there
+  // is the possibility that one array location is accessed simultaneously
+  // by multiple threads, so no multithreading here! Unless you want to
+  // put atomics around all the calls...
+  
+
+  ULONG N=N1*N2*N3;
+  ULONG  Nhalf=N1*N2*(N3/2+1);
+
+  real_prec Vol = L1*L2*L3;
+
+  fftw_array<complex_prec> GRF_array(N),GRF_array2(Nhalf);
+
+  // Build a resolution independent random grid
+  vector< complex<real_prec> > random_grid = resolution_independent_random_grid_FS<real_prec>( N1, seed, false );
+  complex_prec *random_grid_array = reinterpret_cast<complex_prec *>(random_grid.data());
+  copyArray(random_grid_array, GRF_array, N);
+
+// Factor for conversion from power spectrum to discrete fourier transform
+// amplitude (see Martel 2005):
+  real_prec ps2dft_amp = 1.;
+#ifdef FOURIER_DEF_1
+  ps2dft_amp = 1./Vol;
+#endif // FOURIER_DEF_1
+#ifdef FOURIER_DEF_2
+  ps2dft_amp = static_cast<real_prec>(N*N)/Vol;
+#endif // FOURIER_DEF_2
+
+  // TODO:
+  // We can shorten the code considerably by just multiplying the whole array
+  // by sigma first in a separate (multithreaded) for-loop (regardless of all
+  // the special cases in the complicated loop below) and only after that 
+  // doing all the special cases below, but we can then leave out all the 
+  // *= sigma parts. Actually, I think what is done here is just two things:
+  // 1. making the array hermitian, and 2. a couple of special cases that need
+  // to be non-imaginary. The former we can do in a separate function (which is
+  // already in the code, I believe) and the latter we can just do with a few
+  // direct assignments, instead of having to go through all the if's at every
+  // step of the loop.
+  
+  //EGP  real_prec kr=0.;
+  real_prec sigma=0.;
+//#ifdef MULTITHREAD
+//#pragma omp parallel for
+//#endif // MULTITHREAD
+  for (unsigned i=0 ; i<=N1/2;i++)
+    for (unsigned j=0 ; j<=N2/2;j++)
+      for (unsigned k=0 ; k<=N3/2;k++)
+      {
+        sigma = sqrt(ps2dft_amp * Power[k+N3*(j+N2*i)]/num_2);
+
+        if( (i>0 && i<N1/2) && (j>0 && j<N2/2) && (k>0 && k<N3/2))
+        {
+          unsigned ii=N1-i; unsigned jj=N2-j; unsigned kk=N3-k;
+
+          re(GRF_array[k+N3*(j+N2*i)]) *= sigma;
+          im(GRF_array[k+N3*(j+N2*i)]) *= sigma;
+
+          re(GRF_array[kk+N3*(jj+N2*ii)])=re(GRF_array[k+N3*(j+N2*i)]);
+          im(GRF_array[kk+N3*(jj+N2*ii)])=-im(GRF_array[k+N3*(j+N2*i)]);
+
+          ///**********************************************************
+
+          re(GRF_array[k+N3*(j+N2*ii)]) *= sigma;
+          im(GRF_array[k+N3*(j+N2*ii)]) *= sigma;
+
+          re(GRF_array[kk+N3*(jj+N2*i)])=re(GRF_array[k+N3*(j+N2*ii)]);
+          im(GRF_array[kk+N3*(jj+N2*i)])=-im(GRF_array[k+N3*(j+N2*ii)]);
+
+          ///**********************************************************
+
+          re(GRF_array[k+N3*(jj+N2*i)]) *= sigma;
+          im(GRF_array[k+N3*(jj+N2*i)]) *= sigma;
+
+          re(GRF_array[kk+N3*(j+N2*ii)])=re(GRF_array[k+N3*(jj+N2*i)]);
+          im(GRF_array[kk+N3*(j+N2*ii)])=-im(GRF_array[k+N3*(jj+N2*i)]);
+
+          ///**********************************************************
+
+          re(GRF_array[kk+N3*(j+N2*i)]) *= sigma;
+          im(GRF_array[kk+N3*(j+N2*i)]) *= sigma;
+
+          re(GRF_array[k+N3*(jj+N2*ii)])=re(GRF_array[kk+N3*(j+N2*i)]);
+          im(GRF_array[k+N3*(jj+N2*ii)])=-im(GRF_array[kk+N3*(j+N2*i)]);
+
+        }
+
+        if( (i>0 && i<N1/2) && (j>0 && j<N2/2) && (k==N3/2))
+        {
+          unsigned ii=N1-i; unsigned jj=N2-j;
+
+          re(GRF_array[N3/2+N3*(j+N2*i)]) *= sigma;
+          im(GRF_array[N3/2+N3*(j+N2*i)]) *= sigma;
+
+          re(GRF_array[N3/2+N3*(jj+N2*ii)])=re(GRF_array[N3/2+N3*(j+N2*i)]);
+          im(GRF_array[N3/2+N3*(jj+N2*ii)])=-im(GRF_array[N3/2+N3*(j+N2*i)]);
+
+          ///**********************************************************
+
+          re(GRF_array[N3/2+N3*(j+N2*ii)]) *= sigma;
+          im(GRF_array[N3/2+N3*(j+N2*ii)]) *= sigma;
+
+          re(GRF_array[N3/2+N3*(jj+N2*i)])=re(GRF_array[N3/2+N3*(j+N2*ii)]);
+          im(GRF_array[N3/2+N3*(jj+N2*i)])=-im(GRF_array[N3/2+N3*(j+N2*ii)]);
+
+        }
+
+        if( (i>0 && i<N1/2) && (j==N2/2) && (k>0 && k<N3/2))
+        {
+          unsigned ii=N1-i; unsigned kk=N3-k;
+
+          re(GRF_array[k+N3*(N2/2+N2*i)]) *= sigma;
+          im(GRF_array[k+N3*(N2/2+N2*i)]) *= sigma;
+
+          re(GRF_array[kk+N3*(N2/2+N2*ii)])=re(GRF_array[k+N3*(N2/2+N2*i)]);
+          im(GRF_array[kk+N3*(N2/2+N2*ii)])=-im(GRF_array[k+N3*(N2/2+N2*i)]);
+
+          ///**********************************************************
+
+          re(GRF_array[k+N3*(N2/2+N2*ii)]) *= sigma;
+          im(GRF_array[k+N3*(N2/2+N2*ii)]) *= sigma;
+
+          re(GRF_array[kk+N3*(N2/2+N2*i)])=re(GRF_array[k+N3*(N2/2+N2*ii)]);
+          im(GRF_array[kk+N3*(N2/2+N2*i)])=-im(GRF_array[k+N3*(N2/2+N2*ii)]);
+
+        }
+
+        if( (i==N1/2) && (j>0 && j<N2/2) && (k>0 && k<N3/2))
+        {
+          unsigned jj=N2-j; unsigned kk=N3-k;
+
+          re(GRF_array[k+N3*(j+N2*N1/2)]) *= sigma;
+          im(GRF_array[k+N3*(j+N2*N1/2)]) *= sigma;
+
+          re(GRF_array[kk+N3*(jj+N2*N1/2)])=re(GRF_array[k+N3*(j+N2*N1/2)]);
+          im(GRF_array[kk+N3*(jj+N2*N1/2)])=-im(GRF_array[k+N3*(j+N2*N1/2)]);
+
+          ///**********************************************************
+
+          re(GRF_array[k+N3*(jj+N2*N1/2)]) *= sigma;
+          im(GRF_array[k+N3*(jj+N2*N1/2)]) *= sigma;
+
+          re(GRF_array[kk+N3*(j+N2*N1/2)])=re(GRF_array[k+N3*(jj+N2*N1/2)]);
+          im(GRF_array[kk+N3*(j+N2*N1/2)])=-im(GRF_array[k+N3*(jj+N2*N1/2)]);
+
+        }
+
+        if( (i==N1/2) && (j==N2/2) && (k>0 && k<N3/2))
+        {
+          unsigned kk=N3-k;
+
+          re(GRF_array[k+N3*(N2/2+N2*N1/2)]) *= sigma;
+          im(GRF_array[k+N3*(N2/2+N2*N1/2)]) *= sigma;
+
+          re(GRF_array[kk+N3*(N2/2+N2*N1/2)])=re(GRF_array[k+N3*(N2/2+N2*N1/2)]);
+          im(GRF_array[kk+N3*(N2/2+N2*N1/2)])=-im(GRF_array[k+N3*(N2/2+N2*N1/2)]);
+
+          ///**********************************************************
+        }
+
+        if( (i>0 && i<N1/2) && (j==N2/2) && (k==N3/2))
+        {
+          unsigned ii=N1-i;
+
+          re(GRF_array[N3/2+N3*(N2/2+N2*i)]) *= sigma;
+          im(GRF_array[N3/2+N3*(N2/2+N2*i)]) *= sigma;
+
+          re(GRF_array[N3/2+N3*(N2/2+N2*ii)])=re(GRF_array[N3/2+N3*(N2/2+N2*i)]);
+          im(GRF_array[N3/2+N3*(N2/2+N2*ii)])=-im(GRF_array[N3/2+N3*(N2/2+N2*i)]);
+
+          ///**********************************************************
+        }
+
+        if( (i==N1/2) && (j>0 && j<N2/2) && (k==N3/2))
+        {
+          unsigned jj=N2-j;
+
+          re(GRF_array[N3/2+N3*(j+N2*N1/2)]) *= sigma;
+          im(GRF_array[N3/2+N3*(j+N2*N1/2)]) *= sigma;
+
+          re(GRF_array[N3/2+N3*(jj+N2*N1/2)])=re(GRF_array[N3/2+N3*(j+N2*N1/2)]);
+          im(GRF_array[N3/2+N3*(jj+N2*N1/2)])=-im(GRF_array[N3/2+N3*(j+N2*N1/2)]);
+
+          ///**********************************************************
+        }
+
+        if( (i==N1/2) && (j==N2/2) && (k==N3/2))
+        {
+          re(GRF_array[N3/2+N3*(N2/2+N2*N1/2)]) *= sqrt(num_2)*sigma;
+          im(GRF_array[N3/2+N3*(N2/2+N2*N1/2)])=0.;
+
+          ///**********************************************************
+        }
+
+        if( (i>0 && i<N1/2) && (j>0 && j<N2/2) && k==0)
+        {
+          unsigned ii=N1-i; unsigned jj=N2-j;
+
+          re(GRF_array[0+N3*(j+N2*i)]) *= sigma;
+          im(GRF_array[0+N3*(j+N2*i)]) *= sigma;
+
+          re(GRF_array[0+N3*(jj+N2*ii)])=re(GRF_array[k+N3*(j+N2*i)]);
+          im(GRF_array[0+N3*(jj+N2*ii)])=-im(GRF_array[k+N3*(j+N2*i)]);
+
+          ///**********************************************************
+
+          re(GRF_array[0+N3*(j+N2*ii)]) *= sigma;
+          im(GRF_array[0+N3*(j+N2*ii)]) *= sigma;
+
+          re(GRF_array[0+N3*(jj+N2*i)])=re(GRF_array[0+N3*(j+N2*ii)]);
+          im(GRF_array[0+N3*(jj+N2*i)])=-im(GRF_array[0+N3*(j+N2*ii)]);
+        }
+
+        if( (i>0 && i<N1/2) && (j==0) && (k>0 && k<N3/2))
+        {
+          unsigned ii=N1-i; unsigned kk=N3-k;
+
+          re(GRF_array[k+N3*(0+N2*i)]) *= sigma;
+          im(GRF_array[k+N3*(0+N2*i)]) *= sigma;
+
+          re(GRF_array[kk+N3*(0+N2*ii)])=re(GRF_array[k+N3*(0+N2*i)]);
+          im(GRF_array[kk+N3*(0+N2*ii)])=-im(GRF_array[k+N3*(0+N2*i)]);
+
+          ///**********************************************************
+
+          re(GRF_array[k+N3*(0+N2*ii)]) *= sigma;
+          im(GRF_array[k+N3*(0+N2*ii)]) *= sigma;
+
+          re(GRF_array[kk+N3*(0+N2*i)])=re(GRF_array[k+N3*(0+N2*ii)]);
+          im(GRF_array[kk+N3*(0+N2*i)])=-im(GRF_array[k+N3*(0+N2*ii)]);
+        }
+
+        if( (i==0) && (j>0 && j<N2/2) && (k>0 && k<N3/2))
+        {
+          unsigned jj=N2-j; unsigned kk=N3-k;
+
+          re(GRF_array[k+N3*(j+N2*0)]) *= sigma;
+          im(GRF_array[k+N3*(j+N2*0)]) *= sigma;
+
+          re(GRF_array[kk+N3*(jj+N2*0)])=re(GRF_array[k+N3*(j+N2*0)]);
+          im(GRF_array[kk+N3*(jj+N2*0)])=-im(GRF_array[k+N3*(j+N2*0)]);
+
+          ///**********************************************************
+
+          re(GRF_array[k+N3*(jj+N2*0)]) *= sigma;
+          im(GRF_array[k+N3*(jj+N2*0)]) *= sigma;
+
+          re(GRF_array[kk+N3*(j+N2*0)])=re(GRF_array[k+N3*(jj+N2*0)]);
+          im(GRF_array[kk+N3*(j+N2*0)])=-im(GRF_array[k+N3*(jj+N2*0)]);
+        }
+
+        if( (i>0 && i<N1/2) && (j==0) && (k==0))
+        {
+          unsigned ii=N1-i;
+
+          re(GRF_array[0+N3*(0+N2*i)]) *= sigma;
+          im(GRF_array[0+N3*(0+N2*i)]) *= sigma;
+
+          re(GRF_array[0+N3*(0+N2*ii)])=re(GRF_array[0+N3*(0+N2*i)]);
+          im(GRF_array[0+N3*(0+N2*ii)])=-im(GRF_array[0+N3*(0+N2*i)]);
+          ///**********************************************************
+
+        }
+
+        if( (i==0) && (j==0) && (k>0 && k<N3/2))
+        {
+          unsigned kk=N3-k;
+
+          re(GRF_array[k+N3*(0+N2*0)]) *= sigma;
+          im(GRF_array[k+N3*(0+N2*0)]) *= sigma;
+
+          re(GRF_array[kk+N3*(0+N2*0)])=re(GRF_array[k+N3*(0+N2*0)]);
+          im(GRF_array[kk+N3*(0+N2*0)])=-im(GRF_array[k+N3*(0+N2*0)]);
+          ///**********************************************************
+
+        }
+
+        if( (i==0) && (j>0 && j<N2/2) && (k==0))
+        {
+          unsigned jj=N2-j;
+
+          re(GRF_array[0+N3*(j+N2*0)]) *= sigma;
+          im(GRF_array[0+N3*(j+N2*0)]) *= sigma;
+
+          re(GRF_array[0+N3*(jj+N2*0)])=re(GRF_array[0+N3*(j+N2*0)]);
+          im(GRF_array[0+N3*(jj+N2*0)])=-im(GRF_array[0+N3*(j+N2*0)]);
+          ///**********************************************************
+
+        }
+
+        if( (i==0) && (j==0) && (k==0))
+        {
+          re(GRF_array[0])=0.;
+          im(GRF_array[0])=0.;
+          ///**********************************************************
+
+        }
+
+        if( (i>0 && i<N1/2) && (j==N2/2) && (k==0))
+        {
+          unsigned ii=N1-i;
+
+          re(GRF_array[0+N3*(N2/2+N2*i)]) *= sigma;
+          im(GRF_array[0+N3*(N2/2+N2*i)]) *= sigma;
+
+          re(GRF_array[0+N3*(N2/2+N2*ii)])=re(GRF_array[0+N3*(N2/2+N2*i)]);
+          im(GRF_array[0+N3*(N2/2+N2*ii)])=-im(GRF_array[0+N3*(N2/2+N2*i)]);
+
+          ///**********************************************************
+
+        }
+
+        if( (i>0 && i<N1/2) && (j==0) && (k==N3/2))
+        {
+          unsigned ii=N1-i;
+
+          re(GRF_array[N3/2+N3*(0+N2*i)]) *= sigma;
+          im(GRF_array[N3/2+N3*(0+N2*i)]) *= sigma;
+
+          re(GRF_array[N3/2+N3*(0+N2*ii)])=re(GRF_array[N3/2+N3*(0+N2*i)]);
+          im(GRF_array[N3/2+N3*(0+N2*ii)])=-im(GRF_array[N3/2+N3*(0+N2*i)]);
+
+          ///**********************************************************
+
+        }
+
+        if( (i==N1/2) && (j>0 && j<N2/2) && (k==0))
+        {
+          unsigned jj=N2-j;
+
+          re(GRF_array[0+N3*(j+N2*N1/2)]) *= sigma;
+          im(GRF_array[0+N3*(j+N2*N1/2)]) *= sigma;
+
+          re(GRF_array[0+N3*(jj+N2*N1/2)])=re(GRF_array[0+N3*(j+N2*N1/2)]);
+          im(GRF_array[0+N3*(jj+N2*N1/2)])=-im(GRF_array[0+N3*(j+N2*N1/2)]);
+
+          ///**********************************************************
+
+        }
+
+
+        if( (i==0) && (j>0 && j<N2/2) && (k==N3/2))
+        {
+          unsigned jj=N2-j;
+
+          re(GRF_array[N3/2+N3*(j+N2*0)]) *= sigma;
+          im(GRF_array[N3/2+N3*(j+N2*0)]) *= sigma;
+
+          re(GRF_array[N3/2+N3*(jj+N2*0)])=re(GRF_array[N3/2+N3*(j+N2*0)]);
+          im(GRF_array[N3/2+N3*(jj+N2*0)])=-im(GRF_array[N3/2+N3*(j+N2*0)]);
+
+          ///**********************************************************
+        }
+
+        if( (i==N1/2) && (j==0) && (k>0 && k<N3/2))
+        {
+          unsigned kk=N3-k;
+
+          re(GRF_array[k+N3*(0+N2*N1/2)]) *= sigma;
+          im(GRF_array[k+N3*(0+N2*N1/2)]) *= sigma;
+
+          re(GRF_array[kk+N3*(0+N2*N1/2)])=re(GRF_array[k+N3*(0+N2*N1/2)]);
+          im(GRF_array[kk+N3*(0+N2*N1/2)])=-im(GRF_array[k+N3*(0+N2*N1/2)]);
+
+          ///**********************************************************
+
+        }
+
+        if( (i==0) && (j==N2/2) && (k>0 && k<N3/2))
+        {
+          unsigned kk=N3-k;
+
+          re(GRF_array[k+N3*(N2/2+N2*0)]) *= sigma;
+          im(GRF_array[k+N3*(N2/2+N2*0)]) *= sigma;
+
+          re(GRF_array[kk+N3*(N2/2+N2*0)])=re(GRF_array[k+N3*(N2/2+N2*0)]);
+          im(GRF_array[kk+N3*(N2/2+N2*0)])=-im(GRF_array[k+N3*(N2/2+N2*0)]);
+
+          ///**********************************************************
+
+        }
+
+        if( (i==0) && (j==0) && (k==N3/2))
+        {
+          re(GRF_array[N3/2+N3*(0+N2*0)]) *= sqrt(num_2)*sigma;
+          im(GRF_array[N3/2+N3*(0+N2*0)])=0.;
+          ///**********************************************************
+
+        }
+
+        if( (i==0) && (j==N2/2) && (k==0))
+        {
+          re(GRF_array[0+N3*(N2/2+N2*0)]) *= sqrt(num_2)*sigma;
+          im(GRF_array[0+N3*(N2/2+N2*0)])=0.;
+          ///**********************************************************
+
+        }
+
+        if( (i==N1/2) && (j==0) && (k==0))
+        {
+          re(GRF_array[0+N3*(0+N2*N1/2)]) *= sqrt(num_2)*sigma;
+          im(GRF_array[0+N3*(0+N2*N1/2)])=0.;
+          ///**********************************************************
+
+        }
+
+        if( (i==N1/2) && (j==N2/2) && (k==0))
+        {
+          re(GRF_array[0+N3*(N2/2+N2*N1/2)]) *= sqrt(num_2)*sigma;
+          im(GRF_array[0+N3*(N2/2+N2*N1/2)])=0.;
+          ///**********************************************************
+
+        }
+
+        if( (i==N1/2) && (j==0) && (k==N3/2))
+        {
+          re(GRF_array[N3/2+N3*(0+N2*N1/2)]) *= sqrt(num_2)*sigma;
+          im(GRF_array[N3/2+N3*(0+N2*N1/2)])=0.;
+          ///**********************************************************
+
+        }
+
+        if( (i==0) && (j==N2/2) && (k==N3/2))
+        {
+          re(GRF_array[N3/2+N3*(N2/2+N2*0)]) *= sqrt(num_2)*sigma;
+          im(GRF_array[N3/2+N3*(N2/2+N2*0)])=0.;
+          ///**********************************************************
+        }
+
+      }
+
+  /*
+     FFT3d (N1,N2,N3, to_Rspace, GRF_array, GRF_array);
+
+     for (ULONG i=0 ; i<N;i++)
+     delta[i] = re(GRF_array[i]);
+     */
+
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD
+  for (unsigned i=0 ; i<N1;i++)
+    for (unsigned j=0 ; j<N2;j++)
+      for (unsigned k=0 ; k<=N3/2;k++)
+      {
+        ULONG ihalf=k+(N3/2+1)*(j+N2*i);
+        ULONG iind=k+N3*(j+N2*i);
+
+        re(GRF_array2[ihalf])=re(GRF_array[iind]);
+        im(GRF_array2[ihalf])=im(GRF_array[iind]);
+      }
+
+  fftC2R(N1,N2,N3,GRF_array2,delta);
+
+}
diff --git a/math_funcs.h b/math_funcs.h
new file mode 100644
index 0000000..ca001bf
--- /dev/null
+++ b/math_funcs.h
@@ -0,0 +1,105 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#pragma once
+
+#include "define_opt.h"
+#include <gsl/gsl_randist.h>
+#include <string>
+
+void interpolate_CIC(ULONG N1, ULONG N2, ULONG N3, real_prec L1, real_prec L2, real_prec L3, real_prec d1, real_prec d2, real_prec d3, real_prec *xp, real_prec *yp, real_prec *zp, real_prec *field_grid, ULONG N_OBJ, real_prec *interpolation);
+real_prec interpolate_CIC(ULONG N1, ULONG N2, ULONG N3, real_prec L1,
+                          real_prec L2, real_prec L3, real_prec d1,
+                          real_prec d2, real_prec d3, real_prec xp,
+                          real_prec yp, real_prec zp, real_prec *input);
+
+real_prec
+interpolate_TSC(ULONG N1, ULONG N2, ULONG N3, real_prec d1, real_prec d2, real_prec d3, real_prec xp, real_prec yp,
+                real_prec zp, real_prec *field);
+void interpolate_TSC(ULONG N1, ULONG N2, ULONG N3, real_prec d1, real_prec d2, real_prec d3, real_prec *xp, real_prec *yp,
+                     real_prec *zp, real_prec *field_grid, ULONG N_OBJ, real_prec *interpolation);
+
+void interpolate_TSC_multi(ULONG N1, ULONG N2, ULONG N3, real_prec d1, real_prec d2, real_prec d3, real_prec *xp,
+                           real_prec *yp, real_prec *zp, ULONG N_OBJ, real_prec **field_grids,
+                           real_prec **interpolations, int N_fields);
+
+void getCICcells(ULONG N1, ULONG N2, ULONG N3, real_prec L1, real_prec L2, real_prec L3, real_prec d1, real_prec d2, real_prec d3, real_prec x, real_prec y, real_prec z, ULONG *cell_index1, ULONG *cell_index2);
+
+void getCICweights(real_prec L1, real_prec L2, real_prec L3, real_prec d1, real_prec d2, real_prec d3, real_prec x, real_prec y, real_prec z, ULONG *cell_index1, real_prec *dx, real_prec *tx);
+
+void pacman_coordinate(real_prec *x, real_prec L);
+void pacman_difference(real_prec *d_x, real_prec L);
+real_prec pacman_d_x_from_d_x(real_prec d_x, real_prec L);
+int pacman_d_ix_from_d_ix(int d_ix, int N);
+
+
+void complexfactor_mult (ULONG factor, real_prec in_a, complex_prec *in_b, complex_prec *out );
+
+real_prec factorial(int term);
+
+real_prec k_squared(unsigned int i, unsigned int j, unsigned int k, real_prec L1, real_prec L2, real_prec L3,
+                    unsigned int N1, unsigned int N2,
+                    unsigned int N3);
+
+real_prec calc_kx(unsigned int i, real_prec L1, unsigned int N1);
+
+real_prec calc_ky(unsigned int j, real_prec L2, unsigned int N2);
+
+real_prec calc_kz(unsigned int k, real_prec L3, unsigned int N3);
+
+
+
+void convolve(real_prec L1, real_prec L2, real_prec L3, unsigned N1, unsigned N2, unsigned N3, real_prec *in, real_prec *out,
+              real_prec smol, bool zeropad, int filtertype);
+
+void kernelcomp(real_prec L1, real_prec L2, real_prec L3, unsigned N1, unsigned N2, unsigned N3, real_prec smol, int filtertype,
+                struct DATA *data);
+
+void convcomp(unsigned N1, unsigned N2, unsigned N3, real_prec *in, real_prec *out, real_prec smol, std::string dir);
+
+void convcompb(unsigned N1, unsigned N2, unsigned N3, real_prec *in, real_prec *out);
+
+real_prec min_arr ( ULONG factor, real_prec *in );
+
+real_prec max_arr ( ULONG factor, real_prec *in );
+
+real_prec mean_arr ( ULONG size, real_prec *in );
+
+void gradfft(unsigned N1, unsigned N2, unsigned N3, real_prec L1, real_prec L2, real_prec L3, real_prec *in,
+             real_prec *out, unsigned int dim);
+
+void gradfindif(unsigned N1, real_prec L1, real_prec *in, real_prec *out, unsigned int dim);
+
+
+
+
+
+real_prec power_mean(real_prec x, real_prec y, real_prec p);
+
+
+
+real_prec GR_NUM(gsl_rng * SEED, real_prec sigma ,int GR_METHOD);
+
+
+void create_GARFIELDR2(int N1,int N2,int N3,real_prec *delta,real_prec * Power,gsl_rng * seed);
+
+void create_GARFIELD_old(int N1,int N2,int N3, real_prec L1, real_prec L2, real_prec L3,real_prec *delta,real_prec * Power,gsl_rng * seed);
+
+void create_GARFIELD(unsigned N1,unsigned N2,unsigned N3, real_prec L1, real_prec L2, real_prec L3,real_prec *delta,real_prec * Power,gsl_rng * seed);
+
+void grad_inv_lap_FS(unsigned N1, unsigned N2, unsigned N3, real_prec L1, real_prec L2, real_prec L3, complex_prec *in,
+                     complex_prec *out, unsigned int index, bool rfft = false);
+
+
+real_prec median_arr (ULONG size, real_prec *in);
+
+int odd(int inputval);
+
+real_prec std_arr (ULONG size, real_prec *in);
+
diff --git a/planck/LICENSE b/planck/LICENSE
new file mode 100644
index 0000000..d159169
--- /dev/null
+++ b/planck/LICENSE
@@ -0,0 +1,339 @@
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+                            NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
diff --git a/planck/bstream.h b/planck/bstream.h
new file mode 100644
index 0000000..112c862
--- /dev/null
+++ b/planck/bstream.h
@@ -0,0 +1,103 @@
+#pragma once
+
+#include <utility>
+
+typedef unsigned char byte;
+const bool big_endian=false;
+#define ULONG unsigned long
+
+template<ULONG size> inline void byteswap_helper (byte *)
+  {
+  bool Error_unspecialized_template_used[-(size*size)];
+  // compile time error would be nice here ...
+  }
+template<> inline void byteswap_helper<1> (byte *)
+  {}
+template<> inline void byteswap_helper<2> (byte *val)
+  {
+  using namespace std;
+  swap (val[0],val[1]);
+  }
+template<> inline void byteswap_helper<4> (byte *val)
+  {
+  using namespace std;
+  swap (val[0],val[3]); swap (val[1],val[2]);
+  }
+template<> inline void byteswap_helper<8> (byte *val)
+  {
+  using namespace std;
+  swap (val[0],val[7]); swap (val[1],val[6]);
+  swap (val[2],val[5]); swap (val[3],val[4]);
+  }
+
+template<typename T> inline void byteswap (T& val)
+  {
+  byteswap_helper<sizeof(T)> (reinterpret_cast<byte *> (&val));
+  }
+
+const bool file_is_lsb=big_endian, file_is_msb=!big_endian,
+           file_is_natural=false;
+
+class bofstream: public std::ofstream
+  {
+  private:
+    bool doswap;
+
+  public:
+    /*! */
+    bofstream (const char *fname, bool doswap_)
+      : std::ofstream(fname,std::ios::binary), doswap(doswap_) {}
+
+    template<typename T> bofstream &operator<< (const T &data)
+      {
+      if (doswap)
+        {
+        T tmp = data;
+        byteswap (tmp);
+        write (reinterpret_cast<const char *> (&tmp), sizeof(T));
+        }
+      else
+        write (reinterpret_cast<const char *> (&data), sizeof(T));
+      return *this;
+      }
+    template<typename T> bofstream &put (const T *data, ULONG num)
+      {
+      if (doswap)
+        {
+        for (ULONG m=0; m<num; ++m)
+          {
+	  T tmp=data[m];
+          byteswap (tmp);
+          write (reinterpret_cast<const char *> (&tmp), sizeof(T));
+          }
+        }
+      else
+        write (reinterpret_cast<const char *> (data), static_cast<std::streamsize>(num*sizeof(T)));
+      return *this;
+      }
+  };
+
+class bifstream: public std::ifstream
+  {
+  private:
+    bool doswap;
+
+  public:
+    /*! */
+    bifstream (const char *fname, bool doswap_)
+      : std::ifstream(fname,std::ios::binary), doswap(doswap_) {}
+
+    template<typename T> bifstream &operator>> (T &data) {
+      read (reinterpret_cast<char *> (&data), sizeof(T));
+      if (doswap)
+        byteswap (data);
+      return *this;
+    }
+    template<typename T> bifstream &get (T *data, ULONG num) {
+      read (reinterpret_cast<char *> (data), static_cast<std::streamsize>(num*sizeof(T)));
+      if (doswap)
+        for (ULONG m=0; m<num; ++m)
+          byteswap (data[m]);
+      return *this;
+    }
+  };
diff --git a/planck/constants.h b/planck/constants.h
new file mode 100644
index 0000000..41c2e23
--- /dev/null
+++ b/planck/constants.h
@@ -0,0 +1,86 @@
+/*! \file constants.h
+ *  Mathematical, physical and technical constants for LevelS.
+ */
+
+#ifndef PLANCK_CONSTANTS_H
+#define PLANCK_CONSTANTS_H
+
+#include <cmath>
+
+/*! \defgroup mathconstgroup Mathematical constants */
+/*! \{ */
+
+const double pi=3.141592653589793238462643383279502884197;
+const double twopi=6.283185307179586476925286766559005768394;
+const double inv_twopi=1.0/twopi;
+const double fourpi=12.56637061435917295385057353311801153679;
+const double halfpi=1.570796326794896619231321691639751442099;
+const double inv_halfpi=0.6366197723675813430755350534900574;
+const double inv_sqrt4pi = 0.2820947917738781434740397257803862929220;
+
+const double ln2 = 0.6931471805599453094172321214581766;
+const double inv_ln2 = 1.4426950408889634073599246810018921;
+const double ln10 = 2.3025850929940456840179914546843642;
+
+const double onethird=1.0/3.0;
+const double twothird=2.0/3.0;
+const double fourthird=4.0/3.0;
+
+const double degr2rad=pi/180.0;
+const double rad2degr=180.0/pi;
+
+//! Ratio between FWHM and sigma of a Gauss curve (\f$\sqrt{8\ln2}\f$).
+const double sigma2fwhm=2.3548200450309493; // sqrt(8*log(2.))
+const double fwhm2sigma=1/sigma2fwhm;
+
+/*! \} */
+
+/*! \defgroup physconstgroup Physical constants */
+/*! \{ */
+
+const double Jansky2SI=1.0e-26;
+const double SI2Jansky=1.0e+26;
+
+//! Light speed in m/s.
+const double speedOfLight=2.99792458e8;
+
+//! Boltzmann's constant in J/K
+const double kBoltzmann=1.380658e-23;
+
+//! Stefan-Boltzmann constant in W/m^2/K^4
+const double sigmaStefanBoltzmann=5.67051e-8;
+
+//! Planck's constant in J s
+const double hPlanck=6.6260755e-34;
+
+//! Astronomical unit in m
+const double astronomicalUnit=1.49597893e11;
+
+//! Solar constant in W/m^2
+const double solarConstant=1368.0;
+
+//! Tropical year in s
+const double tropicalYear=3.15569259747e7;
+
+//! Average CMB temperature in K
+const double tcmb = 2.726;
+
+//! Colatitude of the solar system motion relative to CMB
+//! (ecliptical coordinates).
+const double solsysdir_ecl_theta = 1.7678013480275747;
+
+//! Longitude of the solar system motion relative to CMB
+//! (ecliptical coordinates).
+const double solsysdir_ecl_phi = 3.0039153062803194;
+
+//! Speed of the solar system motion relative to CMB in m/s.
+const double solsysspeed = 371000.0;
+
+/*! \} */
+
+// technical constants
+
+//! Healpix value representing "undefined"
+const double Healpix_undef=-1.6375e30;
+
+#endif
diff --git a/planck/cxxutils.cc b/planck/cxxutils.cc
new file mode 100644
index 0000000..d4e6cc0
--- /dev/null
+++ b/planck/cxxutils.cc
@@ -0,0 +1,260 @@
+/*
+ *  This file contains the implementation of various convenience functions
+ *  used by the Planck LevelS package.
+ *
+ *  Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 Max-Planck-Society
+ *  Authors: Martin Reinecke, Reinhard Hell
+ */
+
+/*
+ * TODO: (EGP) clean up unused things
+ */
+
+// if we are using g++, check for version 3.0 or higher
+#ifdef __GNUC__
+#if (__GNUC__<3)
+#error your C++ compiler is too old. g++ version 3.0 or higher is required.
+#endif
+#endif
+
+#include <fstream>
+#include <sstream>
+#include <iostream>
+#include <iomanip>
+#include "cxxutils.h"
+#include "datatypes.h"
+#include "openmp_support.h"
+
+using namespace std;
+
+bool file_present (const string &filename)
+  {
+  ifstream dummy(filename.c_str());
+  return dummy.good();
+  }
+
+void assert_present (const string &filename)
+  {
+  if (file_present(filename)) return;
+  throw Message_error ("Error: file " + filename + " does not exist!");
+  }
+
+void assert_not_present (const string &filename)
+  {
+  if (!file_present(filename)) return;
+  throw Message_error ("Error: file " + filename + " already exists!");
+  }
+
+void remove_file (const string &filename)
+  {
+  remove (filename.c_str());
+  }
+
+string trim (const string &orig)
+  {
+  string::size_type p1=orig.find_first_not_of(" \t");
+  if (p1==string::npos) return "";
+  string::size_type p2=orig.find_last_not_of(" \t");
+  return orig.substr(p1,p2-p1+1);
+  }
+
+template<typename T> string dataToString (const T &x)
+  {
+  ostringstream strstrm;
+  strstrm << x;
+  return trim(strstrm.str());
+  }
+
+template<> string dataToString (const bool &x)
+  { return x ? "T" : "F"; }
+template<> string dataToString (const string &x)
+  { return trim(x); }
+template<> string dataToString (const float &x)
+  {
+  ostringstream strstrm;
+  strstrm << setprecision(8) << x;
+  return trim(strstrm.str());
+  }
+template<> string dataToString (const double &x)
+  {
+  ostringstream strstrm;
+  strstrm << setprecision(16) << x;
+  return trim(strstrm.str());
+  }
+
+template string dataToString (const int &x);
+template string dataToString (const unsigned int &x);
+template string dataToString (const long &x);
+template string dataToString (const unsigned long long &x);
+template string dataToString (const long long &x);
+template string dataToString (const unsigned long &x);
+
+string intToString(int x, int width)
+  {
+  ostringstream strstrm;
+  strstrm << setw(width) << setfill('0') << x;
+  return trim(strstrm.str());
+  }
+
+template<typename T> void stringToData (const string &x, T &value)
+  {
+  string error = string("conversion error in stringToData<")
+               + type2typename<T>()
+               +">(\""+x+"\")";
+  istringstream strstrm(x);
+  strstrm >> value;
+  if (!strstrm)
+    throw Message_error(error);
+
+  string rest;
+  strstrm >> rest;
+//  rest=trim(rest);
+  if (rest.length()>0) throw Message_error(error);
+  }
+
+template<> void stringToData (const string &x, string &value)
+  { value = trim(x); }
+
+template<> void stringToData (const string &x, bool &value)
+  {
+  if ( x=="F" || x=="f" || x=="n" || x=="N" || x=="false" || x==".false."
+       || x=="FALSE" || x==".FALSE.")
+    value=false;
+  else if (x=="T" || x=="t" || x=="y" || x=="Y" || x=="true" || x==".true."
+       || x=="TRUE" || x==".TRUE.")
+    value=true;
+  else
+    {
+    string error = string("conversion error in stringToData<bool>(\"")+x+"\")";
+    throw Message_error (error);
+    }
+  }
+
+template void stringToData (const string &x, int &value);
+template void stringToData (const string &x, long &value);
+template void stringToData (const string &x, float &value);
+template void stringToData (const string &x, double &value);
+template void stringToData (const string &x, unsigned long long &value);
+template void stringToData (const string &x, long long &value);
+template void stringToData (const string &x, unsigned long &value);
+template void stringToData (const string &x, unsigned int &value);
+
+bool equal_nocase (const string &a, const string &b)
+  {
+  if (a.size()!=b.size()) return false;
+  for (unsigned int m=0; m<a.size(); ++m)
+    if (tolower(a[m])!=tolower(b[m])) return false;
+  return true;
+  }
+
+string tolower(const string &input)
+  {
+  string result=input;
+  for (unsigned int m=0; m<result.size(); ++m)
+    result[m]=char(tolower(result[m]));
+  return result;
+  }
+
+#ifdef PLANCK_SILENT
+
+void announce_progress (int, int) {}
+void announce_progress (double, double, double) {}
+void end_announce_progress () {}
+
+#else
+
+void announce_progress (int now, int total)
+  {
+  if ((now%(max(total/100,1)))==0)
+    cout << "\r " << setw(3) << planck_nint ((now*100.)/total)
+         << "% done\r" << flush;
+  }
+
+void announce_progress (double now, double last, double total)
+  {
+  int lastpercent = int((last/total)*100),
+      nowpercent  = int(( now/total)*100);
+  if (nowpercent>lastpercent)
+    cout << "\r " << setw(3) << nowpercent << "% done\r" << flush;
+  }
+
+void end_announce_progress ()
+  { cout << endl; }
+
+#endif
+
+static void openmp_status()
+  {
+  if (openmp_enabled())
+    {
+    cout << "Application was compiled with OpenMP support," << endl;
+    if (openmp_max_threads() == 1)
+      cout << "but running with one process only." << endl;
+    else
+      cout << "running with up to " << openmp_max_threads()
+           << " processes." << endl;
+    }
+  else
+    cout << "Application was compiled without OpenMP support;" << endl
+         << "running in scalar mode." << endl;
+  }
+
+void announce (const string &name)
+  {
+  cout << endl << "+-";
+  for (unsigned int m=0; m<name.length(); ++m) cout << "-";
+  cout << "-+" << endl;
+  cout << "| " << name << " |" << endl;
+  cout << "+-";
+  for (unsigned int m=0; m<name.length(); ++m) cout << "-";
+  cout << "-+" << endl << endl;
+  openmp_status();
+  cout << endl;
+  }
+
+void module_startup (const string &name, int argc, const char **,
+  int argc_expected, const string &argv_expected)
+  {
+  announce (name);
+  if (argc==argc_expected) return;
+  cerr << "Usage: " << name << " " << argv_expected << endl;
+  throw Message_error();
+  }
+
+void parse_file (const string &filename, map<string,string> &dict)
+  {
+  int lineno=0;
+  dict.clear();
+  ifstream inp(filename.c_str());
+  planck_assert(inp.good(),"Could not open parameter file "+filename);
+  while (inp)
+    {
+    string line;
+    getline(inp, line);
+    ++lineno;
+    line=line.substr(0,line.find_first_of("#"));
+    line=trim(line);
+    if (line.size()>0)
+      {
+      string::size_type eqpos=line.find("=");
+      if (eqpos!=string::npos)
+        {
+        string key=trim(line.substr(0,eqpos)),
+               value=trim(line.substr(eqpos+1,string::npos));
+        if (key=="")
+          cerr << "Warning: empty key in " << filename << ", line "
+               << lineno << endl;
+        else
+          {
+          if (dict.find(key)!=dict.end())
+            cerr << "Warning: key " << key << " multiply defined in "
+                 << filename << ", line " << lineno << endl;
+          dict[key]=value;
+          }
+        }
+      else
+        cerr << "Warning: unrecognized format in " << filename << ", line "
+             << lineno << ":\n" << line << endl;
+      }
+    }
+  }
diff --git a/planck/cxxutils.h b/planck/cxxutils.h
new file mode 100644
index 0000000..034af98
--- /dev/null
+++ b/planck/cxxutils.h
@@ -0,0 +1,242 @@
+/*! \file cxxutils.h
+ *  Various convenience functions used by the Planck LevelS package.
+ *
+ *  Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 Max-Planck-Society
+ *  \author Martin Reinecke \author Reinhard Hell
+ */
+
+#ifndef PLANCK_CXXUTILS_H
+#define PLANCK_CXXUTILS_H
+
+#include <algorithm>
+#include <string>
+#include <map>
+#include <cmath>
+#include "message_error.h"
+#include "constants.h"
+
+/*! \defgroup mathutilsgroup Mathematical helper functions */
+/*! \{ */
+
+//! Returns \e true if | \a a-b | < \a epsilon * | \a b |, else \e false.
+template<typename F> inline bool approx (F a, F b, F epsilon=1e-5)
+  {
+  using namespace std;
+  return abs(a-b) < (epsilon*abs(b));
+  }
+
+//! Returns \e true if | \a a-b | < \a epsilon, else \e false.
+template<typename F> inline bool abs_approx (F a, F b, F epsilon=1e-5)
+  {
+  using namespace std;
+  return abs(a-b) < epsilon;
+  }
+
+//! Returns the largest integer which is smaller than (or equal to) \a arg.
+template<typename I, typename F> inline I ifloor (F arg)
+  {
+  return (arg>=0) ? I(arg) : I(arg)-1;
+  }
+
+//! Returns the integer which is nearest to \a arg.
+template<typename I, typename F> inline I nearest (F arg)
+  {
+  arg += 0.5;
+  return (arg>=0) ? I(arg) : I(arg)-1;
+  }
+
+//! Returns \a v1+v2 if \a v1<0, \a v1-v2 if \a v1>=v2, else \a v1.
+/*! \a v1 can be positive or negative; \a v2 must be positive. */
+template<typename T> inline T weak_modulo (T v1, T v2)
+  { return (v1>=0) ? ((v1<v2) ? v1 : (v1-v2)) : (v1+v2); }
+
+//! Returns the remainder of the division \a v1/v2.
+/*! The result is non-negative.
+    \a v1 can be positive or negative; \a v2 must be positive. */
+inline double fmodulo (double v1, double v2)
+  {
+  using namespace std;
+  return (v1>=0) ? ((v1<v2) ? v1 : fmod(v1,v2)) : (fmod(v1,v2)+v2);
+  }
+
+//! Returns the remainder of the division \a v1/v2.
+/*! The result is non-negative.
+    \a v1 can be positive or negative; \a v2 must be positive. */
+template<typename I> inline I imodulo (I v1, I v2)
+  { return (v1>=0) ? ((v1<v2) ? v1 : (v1%v2)) : ((v1%v2)+v2); }
+
+//! Returns -1 if \a signvalue is negative, else +1.
+template<typename T> inline T sign (const T& signvalue)
+  { return (signvalue>=0) ? 1 : -1; }
+
+//! Returns the integer \a n, which fulfills \a n*n<=arg<(n+1)*(n+1).
+template<typename I> inline unsigned int isqrt (I arg)
+  {
+  using namespace std;
+  if (sizeof(I)<=4)
+    return unsigned (sqrt(arg+0.5));
+  long double arg2 = arg;
+  return unsigned (sqrt(arg2+0.5));
+  }
+
+//! Returns the largest integer \a n that fulfills \a 2^n<=arg.
+template<typename I> inline unsigned int ilog2 (I arg)
+  {
+  unsigned int res=0;
+  while (arg > 0x0000FFFF) { res+=16; arg>>=16; }
+  if (arg > 0x000000FF) { res|=8; arg>>=8; }
+  if (arg > 0x0000000F) { res|=4; arg>>=4; }
+  if (arg > 0x00000003) { res|=2; arg>>=2; }
+  if (arg > 0x00000001) { res|=1; }
+  return res;
+  }
+
+//! Returns \a atan2(y,x) if \a x!=0 or \a y!=0; else returns 0.
+inline double safe_atan2 (double y, double x)
+  {
+  using namespace std;
+  return ((x==0.) && (y==0.)) ? 0.0 : atan2(y,x);
+  }
+
+//! Returns an index to the left of two interpolation values.
+/*! \a begin points to an array containing a sequence of values
+    sorted in ascending order. The length of the array is \a len.
+    If \a val is lower than the first element, 0 is returned.
+    If \a val is higher than the last element, \a len-2
+    is returned. Else, the index of the largest element smaller
+    than \a val is returned. */
+template<typename T> inline int interpol_left
+  (const T *begin, int len, const T &val)
+  {
+  const T *end = begin+len;
+  const T *iter = std::lower_bound (begin, end, val);
+  if (iter==begin) return 0;
+  if (iter==end) return len-2;
+  return (iter-begin)-1;
+  }
+
+//! Returns an index to the nearest interpolation value.
+/*! \a begin points to an array containing a sequence of values
+    sorted in ascending order. The length of the array is \a len.
+    If \a val is lower than the first element, 0 is returned.
+    If \a val is higher than the last element, \a len-1 is returned.
+    Else, the index of the nearest element within the sequence of
+    values is returned. */
+template<typename T> inline int interpol_nearest
+  (const T *begin, int len, const T &val)
+  {
+  int left = interpol_left(begin, len, val);
+  T delleft = val-(*(begin+left));
+  T delright = (*(begin+left+1))-val;
+  if (delright<0) return left+1;
+  return (delright<delleft) ? (left+1) : left;
+  }
+
+/*! \} */
+
+/*! \defgroup fileutilsgroup File-handling helper functions */
+/*! \{ */
+
+//! If the file \a filename is present, return \p true, else \p false.
+bool file_present (const std::string &filename);
+
+//! Removes the file \a filename
+void remove_file (const std::string &filename);
+
+/*! \} */
+
+/*! \defgroup assertgroup Assertions */
+/*! \{ */
+
+//! Throws a Message_error containing \a msg if \a testval is false.
+inline void planck_assert (bool testval, const std::string &msg)
+  {
+  if (testval) return;
+  throw Message_error ("Assertion failed: "+msg);
+  }
+//! Throws a Message_error containing \a msg if \a testval is false.
+inline void planck_assert (bool testval, const char *msg)
+  {
+  if (testval) return;
+  throw Message_error ("Assertion failed: "+std::string(msg));
+  }
+
+//! Checks the presence of the file \a filename.
+/*! If the file is not present, a Message_error is thrown. */
+void assert_present (const std::string &filename);
+
+//! Checks the absence of the file \a filename.
+/*! If the file is present, a Message_error is thrown. */
+void assert_not_present (const std::string &filename);
+
+/*! \} */
+
+/*! \defgroup stringutilsgroup String handling helper functions */
+/*! \{ */
+
+//! Returns the string \a orig without leading and trailing whitespace.
+std::string trim (const std::string &orig);
+
+//! Returns a string containing the text representation of \a x.
+/*! Care is taken that no information is lost in the conversion. */
+template<typename T> std::string dataToString(const T &x);
+template<> std::string dataToString (const bool &x);
+template<> std::string dataToString (const std::string &x);
+template<> std::string dataToString (const float &x);
+template<> std::string dataToString (const double &x);
+
+/*! Returns a string containing the text representation of \a x, padded
+    with leading zeroes to \a width characters. */
+std::string intToString(int x, int width);
+
+//! Reads a value of a given datatype from a string
+template<typename T> void stringToData (const std::string &x, T &value);
+template<> void stringToData (const std::string &x, std::string &value);
+template<> void stringToData (const std::string &x, bool &value);
+
+//! Reads a value of a given datatype from a string
+template<typename T> inline T stringToData (const std::string &x)
+  { T result; stringToData(x,result); return result; }
+
+//! Parses the file \a filename and returns the key/value pairs in \a dict.
+void parse_file (const std::string &filename,
+  std::map<std::string,std::string> &dict);
+
+//! Case-insensitive string comparison
+/*! Returns \a true, if \a a and \a b differ only in capitalisation,
+    else \a false. */
+bool equal_nocase (const std::string &a, const std::string &b);
+
+//! Returns lowercase version of \a input.
+std::string tolower(const std::string &input);
+
+/*! \} */
+
+//! Indicates progress by printing the percentage of \a now/total.
+/*! A message is only printed if it has changed since \a now-1/total.
+    The output is followed by a carriage return, not a newline. */
+void announce_progress (int now, int total);
+//! Indicates progress by printing the percentage of \a now/total.
+/*! A message is only printed if it has changed since \a last/total.
+    The output is followed by a carriage return, not a newline. */
+void announce_progress (double now, double last, double total);
+/*! This function should be called after a sequence of announce_progress()
+    calls has finished. */
+void end_announce_progress ();
+
+//! Prints a banner containing \a name. Useful for displaying program names.
+void announce (const std::string &name);
+
+/*! Prints a banner containing \a name and checks if \a argc==argc_expected.
+    If not, a usage description is given and the program is terminated. */
+void module_startup (const std::string &name, int argc, const char **argv,
+  int argc_expected, const std::string &argv_expected);
+
+//! Returns an appropriate FITS repetition count for a map with \a npix pixels.
+inline unsigned int healpix_repcount (int npix)
+  {
+  if (npix<1024) return 1;
+  if ((npix%1024)==0) return 1024;
+  return isqrt (npix/12);
+  }
+#endif
diff --git a/planck/datatypes.h b/planck/datatypes.h
new file mode 100644
index 0000000..9a49d06
--- /dev/null
+++ b/planck/datatypes.h
@@ -0,0 +1,198 @@
+/*
+ *  This file defines various platform-independent data types.
+ *  If any of the requested types is not available, compilation aborts
+ *  with an error (unfortunately a rather obscure one).
+ *
+ *  Copyright (C) 2004 Max-Planck-Society
+ *  Author: Martin Reinecke
+ */
+
+#ifndef PLANCK_DATATYPES_H
+#define PLANCK_DATATYPES_H
+
+#include <string>
+#include "message_error.h"
+
+// Template magic to select the proper data types. These templates
+// should not be used outside this file.
+
+template <typename T, bool equalSize> struct sizeChooserHelper
+  { typedef void TYPE; };
+
+template <typename T> struct sizeChooserHelper<T,true>
+  { typedef T TYPE; };
+
+template <typename T1, typename T2, typename T3> struct sizeChooserHelper2
+  { typedef T1 TYPE; };
+
+template <typename T2, typename T3> struct sizeChooserHelper2 <void, T2, T3>
+  { typedef T2 TYPE; };
+
+template <typename T3> struct sizeChooserHelper2 <void, void, T3>
+  { typedef T3 TYPE; };
+
+template <> struct sizeChooserHelper2 <void, void, void>
+  { };
+
+template <int sz, typename T1, typename T2=char, typename T3=char>
+  struct sizeChooser
+  {
+  typedef typename sizeChooserHelper2
+    <typename sizeChooserHelper<T1,sizeof(T1)==sz>::TYPE,
+     typename sizeChooserHelper<T2,sizeof(T2)==sz>::TYPE,
+     typename sizeChooserHelper<T3,sizeof(T3)==sz>::TYPE >::TYPE TYPE;
+  };
+
+typedef signed char int8;
+typedef unsigned char uint8;
+
+typedef sizeChooser<2, short, int>::TYPE
+  int16;
+typedef sizeChooser<2, unsigned short, unsigned int>::TYPE
+  uint16;
+
+typedef sizeChooser<4, int, long, short>::TYPE
+  int32;
+typedef sizeChooser<4, unsigned int, unsigned long, unsigned short>::TYPE
+  uint32;
+
+typedef sizeChooser<8, long, long long>::TYPE
+  int64;
+typedef sizeChooser<8, unsigned long, unsigned long long>::TYPE
+  uint64;
+
+typedef sizeChooser<4, float, double>::TYPE
+  float32;
+typedef sizeChooser<8, double, long double>::TYPE
+  float64;
+
+// mapping of types to integer constants
+enum { PLANCK_INT8    =  0,
+       PLANCK_UINT8   =  1,
+       PLANCK_INT16   =  2,
+       PLANCK_UINT16  =  3,
+       PLANCK_INT32   =  4,
+       PLANCK_UINT32  =  5,
+       PLANCK_INT64   =  6,
+       PLANCK_UINT64  =  7,
+       PLANCK_FLOAT32 =  8,
+       PLANCK_FLOAT64 =  9,
+       PLANCK_BOOL    = 10,
+       PLANCK_STRING  = 11 };
+
+template<typename T> struct typehelper {};
+
+template<> struct typehelper<int8>
+  { enum { id=PLANCK_INT8 }; };
+template<> struct typehelper<uint8>
+  { enum { id=PLANCK_UINT8 }; };
+template<> struct typehelper<int16>
+  { enum { id=PLANCK_INT16 }; };
+template<> struct typehelper<uint16>
+  { enum { id=PLANCK_UINT16 }; };
+template<> struct typehelper<int32>
+  { enum { id=PLANCK_INT32 }; };
+template<> struct typehelper<uint32>
+  { enum { id=PLANCK_UINT32 }; };
+template<> struct typehelper<int64>
+  { enum { id=PLANCK_INT64 }; };
+template<> struct typehelper<uint64>
+  { enum { id=PLANCK_UINT64 }; };
+template<> struct typehelper<float32>
+  { enum { id=PLANCK_FLOAT32 }; };
+template<> struct typehelper<float64>
+  { enum { id=PLANCK_FLOAT64 }; };
+template<> struct typehelper<bool>
+  { enum { id=PLANCK_BOOL }; };
+template<> struct typehelper<std::string>
+  { enum { id=PLANCK_STRING }; };
+
+inline int type2size (int type)
+  {
+  switch (type)
+    {
+    case PLANCK_INT8   : return 1;
+    case PLANCK_UINT8  : return 1;
+    case PLANCK_INT16  : return 2;
+    case PLANCK_UINT16 : return 2;
+    case PLANCK_INT32  : return 4;
+    case PLANCK_UINT32 : return 4;
+    case PLANCK_INT64  : return 8;
+    case PLANCK_UINT64 : return 8;
+    case PLANCK_FLOAT32: return 4;
+    case PLANCK_FLOAT64: return 8;
+    case PLANCK_BOOL   : return 1;
+    case PLANCK_STRING : return 1;
+    default: throw Message_error ("unsupported data type");
+    }
+  }
+
+inline int string2type(const std::string &type)
+  {
+  if (type=="FLOAT64") return PLANCK_FLOAT64;
+  if (type=="FLOAT32") return PLANCK_FLOAT32;
+  if (type=="INT8")    return PLANCK_INT8;
+  if (type=="UINT8")   return PLANCK_UINT8;
+  if (type=="INT16")   return PLANCK_INT16;
+  if (type=="UINT16")  return PLANCK_UINT16;
+  if (type=="INT32")   return PLANCK_INT32;
+  if (type=="UINT32")  return PLANCK_UINT32;
+  if (type=="INT64")   return PLANCK_INT64;
+  if (type=="UINT64")  return PLANCK_UINT64;
+  if (type=="BOOL")    return PLANCK_BOOL;
+  if (type=="STRING")  return PLANCK_STRING;
+  throw Message_error ("unknown data type "+type);
+  }
+
+inline const char *type2string (int type)
+  {
+  switch (type)
+    {
+    case PLANCK_INT8   : return "INT8";
+    case PLANCK_UINT8  : return "UINT8";
+    case PLANCK_INT16  : return "INT16";
+    case PLANCK_UINT16 : return "UINT16";
+    case PLANCK_INT32  : return "INT32";
+    case PLANCK_UINT32 : return "UINT32";
+    case PLANCK_INT64  : return "INT64";
+    case PLANCK_UINT64 : return "UINT64";
+    case PLANCK_FLOAT32: return "FLOAT32";
+    case PLANCK_FLOAT64: return "FLOAT64";
+    case PLANCK_BOOL   : return "BOOL";
+    case PLANCK_STRING : return "STRING";
+    default: throw Message_error ("unsupported data type");
+    }
+  }
+
+template<typename T> inline const char *type2typename ()
+  { return "unknown type"; }
+template<> inline const char *type2typename<signed char> ()
+  { return "signed char"; }
+template<> inline const char *type2typename<unsigned char> ()
+  { return "unsigned char"; }
+template<> inline const char *type2typename<short> ()
+  { return "short"; }
+template<> inline const char *type2typename<unsigned short> ()
+  { return "unsigned short"; }
+template<> inline const char *type2typename<int> ()
+  { return "int"; }
+template<> inline const char *type2typename<unsigned int> ()
+  { return "unsigned int"; }
+template<> inline const char *type2typename<long> ()
+  { return "long"; }
+template<> inline const char *type2typename<unsigned long> ()
+  { return "unsigned long"; }
+template<> inline const char *type2typename<long long> ()
+  { return "long long"; }
+template<> inline const char *type2typename<unsigned long long> ()
+  { return "unsigned long long"; }
+template<> inline const char *type2typename<float> ()
+  { return "float"; }
+template<> inline const char *type2typename<double> ()
+  { return "double"; }
+template<> inline const char *type2typename<bool> ()
+  { return "bool"; }
+template<> inline const char *type2typename<std::string> ()
+  { return "std::string"; }
+
+#endif
diff --git a/planck/message_error.h b/planck/message_error.h
new file mode 100644
index 0000000..d157004
--- /dev/null
+++ b/planck/message_error.h
@@ -0,0 +1,70 @@
+/*
+ *  Class for error reporting
+ *
+ *  Copyright (C) 2003, 2004 Max-Planck-Society
+ *  Authors: Reinhard Hell, Martin Reinecke
+ */
+
+#ifndef PLANCK_MESSAGE_ERROR_H
+#define PLANCK_MESSAGE_ERROR_H
+
+#include <exception>
+#include <iostream>
+#include <string>
+
+#if defined (PLANCK_STACKTRACE)
+#include <execinfo.h>
+#endif
+
+inline void show_stackframe()
+  {
+#if defined (PLANCK_STACKTRACE)
+  void *trace[16];
+  int trace_size = backtrace(trace, 16);
+  char **messages = backtrace_symbols(trace, trace_size);
+  std::cerr << "[bt] Execution path:" << std::endl;
+  for (int i=0; i<trace_size; ++i)
+    std::cerr << "[bt] " << messages[i] << std::endl;
+#endif
+  }
+
+
+class Message_error
+  {
+  private:
+    std::string msg;
+
+  public:
+    Message_error()
+      : msg (std::string("Unspecified error"))
+      { std::cerr<<msg<<std::endl; show_stackframe(); }
+
+    explicit Message_error(const std::string &message)
+      : msg (message) { std::cerr<<msg<<std::endl; show_stackframe(); }
+
+    virtual const char* what() const
+      { return msg.c_str(); }
+
+    virtual ~Message_error() {}
+  };
+
+#if defined (PLANCK_CHECKS)
+
+#define PLANCK_DIAGNOSIS_BEGIN try {
+#define PLANCK_DIAGNOSIS_END \
+} \
+catch (Message_error &e) \
+  { std::cerr << "Planck exception: " << e.what() << std::endl; throw; } \
+catch (std::exception &e) \
+  { std::cerr << "std::exception: " << e.what() << std::endl; throw; } \
+catch (...) \
+  { std::cerr << "Unknown exception" << std::endl; throw; }
+
+#else
+
+#define PLANCK_DIAGNOSIS_BEGIN
+#define PLANCK_DIAGNOSIS_END
+
+#endif
+
+#endif
diff --git a/planck/openmp_support.h b/planck/openmp_support.h
new file mode 100644
index 0000000..4623d45
--- /dev/null
+++ b/planck/openmp_support.h
@@ -0,0 +1,60 @@
+/*
+ *  Copyright (C) 2005, 2006, 2007 Max-Planck-Society
+ *  \author Martin Reinecke
+ */
+
+#ifndef PLANCK_OPENMP_SUPPORT_H
+#define PLANCK_OPENMP_SUPPORT_H
+
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+inline bool openmp_enabled()
+  {
+#ifdef _OPENMP
+  return true;
+#else
+  return false;
+#endif
+  }
+
+inline int openmp_max_threads ()
+  {
+#ifdef _OPENMP
+  return omp_get_max_threads();
+#else
+  return 1;
+#endif
+  }
+
+inline int openmp_thread_num ()
+  {
+#ifdef _OPENMP
+  return omp_get_thread_num();
+#else
+  return 0;
+#endif
+  }
+
+/*! Calculates the range of indices between \a glo and \a ghi which
+    must be processed by this thread and returns it in \a lo and \a hi.
+
+    The indices \a ghi and \a hi are "one past the last real index",
+    in analogy to the STL iterators. */
+inline void openmp_calc_share (int glo, int ghi, int &lo, int &hi)
+  {
+#ifdef _OPENMP
+  int nwork = ghi-glo;
+  int nproc = omp_get_num_threads();
+  int me = omp_get_thread_num();
+  int nbase = nwork/nproc;
+  int additional = nwork%nproc;
+  lo = glo+me*nbase + ((me<additional) ? me : additional);
+  hi = lo+nbase+(me<additional);
+#else
+  lo=glo; hi=ghi;
+#endif
+  }
+
+#endif
diff --git a/planck/paramfile.h b/planck/paramfile.h
new file mode 100644
index 0000000..d99f56e
--- /dev/null
+++ b/planck/paramfile.h
@@ -0,0 +1,85 @@
+/*
+ *  Class for parsing parameter files
+ *
+ *  Copyright (C) 2003, 2004, 2005 Max-Planck-Society
+ *  Authors: Martin Reinecke, Reinhard Hell
+ */
+
+#ifndef PLANCK_PARAMFILE_H
+#define PLANCK_PARAMFILE_H
+
+#include <map>
+#include <string>
+#include <iostream>
+#include "simparams.h"
+#include "cxxutils.h"
+
+class paramfile
+  {
+  private:
+    typedef std::map<std::string,std::string> params_type;
+    params_type params;
+    bool verbose;
+
+    std::string get_valstr(const std::string &key) const
+      {
+      params_type::const_iterator loc=params.find(key);
+      if (loc!=params.end()) return loc->second;
+      throw Message_error ("Error: Cannot find the key \"" + key + "\".");
+      }
+
+  public:
+    paramfile (const std::string &filename, bool verbose_=true)
+      : verbose(verbose_)
+      { parse_file (filename, params); }
+
+    paramfile (const params_type &par)
+      : params (par), verbose(true)
+      {}
+
+    bool param_present(const std::string &key) const
+      { return (params.find(key)!=params.end()); }
+
+    template<typename T> T find (const std::string &key) const
+      {
+      T result;
+      stringToData(get_valstr(key),result);
+      //EGP if (verbose)
+      //  std::cout << "Parser: " << key << " = " << dataToString(result)
+            //      << std::endl;
+      return result;
+      }
+    template<typename T> T find
+      (const std::string &key, const T &deflt)
+      {
+      if (param_present(key)) return find<T>(key);
+      if (verbose)
+      //  std::cout << "Parser: " << key << " = " << dataToString(deflt)
+          //        << " <default>" << std::endl;
+      params[key]=dataToString(deflt);
+      return deflt;
+      }
+
+    const params_type &getParams() const
+      { return params; }
+
+    template<typename T> void findParam
+      (const std::string &key, T &value) const
+      { value = find<T>(key); }
+
+    template<typename T> void findHeaderParam(const std::string& key,
+      T& value, simparams& headerParams, const std::string& headerKey,
+      const std::string& headerComment) const
+      {
+      findParam(key, value);
+      headerParams.add(key, headerKey, dataToString(value), headerComment);
+      }
+    void findSourceParam(const std::string& key, std::string& value,
+      simparams& headerParams) const
+      {
+      findParam(key, value);
+      headerParams.add_source_file(value);
+      }
+  };
+
+#endif
diff --git a/planck/simparams.h b/planck/simparams.h
new file mode 100644
index 0000000..a942136
--- /dev/null
+++ b/planck/simparams.h
@@ -0,0 +1,58 @@
+/*
+ *  Class for storing parameter information for later use
+ *
+ *  Copyright (C) 2003 Max-Planck-Society
+ *  Authors: Reinhard Hell, Martin Reinecke
+ */
+
+#ifndef PLANCK_SIMPARAMS_H
+#define PLANCK_SIMPARAMS_H
+
+#include <string>
+#include <vector>
+#include <iostream>
+#include "cxxutils.h"
+class fitshandle;
+
+class simparams
+  {
+  private:
+    class Param
+      {
+      public:
+        std::string key, shortkey, value, comment;
+
+        Param (const std::string &Key, const std::string &Shortkey,
+               const std::string &Value, const std::string &Comment)
+          : key(Key), shortkey(Shortkey), value(Value), comment(Comment) {}
+      };
+
+    std::vector<Param> paramMap;
+    std::vector<std::string> source_files;
+    std::vector<int> hdus;
+
+  public:
+    void add_comment (const std::string &comment)
+      { paramMap.push_back(Param("","","",comment)); }
+    template<typename T> void add(const std::string &key,
+      const std::string &shortkey, const T &value, const std::string &comment)
+      {
+      paramMap.push_back(Param(key, shortkey, dataToString(value), comment));
+      }
+    template<typename T> void add(const std::string &key,
+      const std::string &shortkey, const T &value)
+      {
+      paramMap.push_back(Param(key, shortkey, dataToString(value), ""));
+      }
+
+    void add_source_file (const std::string &filename, int hdu=2)
+      {
+      source_files.push_back(filename);
+      hdus.push_back(hdu);
+      }
+
+    void add_keys (std::ostream &os) const;
+    void add_keys (fitshandle &out) const;
+  };
+
+#endif
diff --git a/protocol.cc b/protocol.cc
new file mode 100644
index 0000000..df402a9
--- /dev/null
+++ b/protocol.cc
@@ -0,0 +1,115 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#include <math.h>
+#include <fstream>
+#include <iomanip>
+#include <iostream>
+#include <cassert>
+
+#include "planck/paramfile.h"
+
+#include "struct_main.h"
+
+using namespace std;
+
+void INIT_PROTOCOL_CONV(struct DATA *data)
+{
+  string outputFileName= data->numerical->dir + string("convergence.prt");
+  
+  ofstream outStream(outputFileName.data());
+  assert(outStream.is_open());
+  
+  outStream.close();
+}
+
+void UPDATE_PROTOCOL_CONV(ULONG it, real_prec res, struct DATA *data)
+{
+  string outputFileName= data->numerical->dir + string("convergence.prt");
+  
+  ofstream outStream(outputFileName.data(), ios::app );
+  assert(outStream.is_open());
+  
+  outStream << it <<" "<< res  <<endl;
+  outStream.close();		
+}
+
+void PROTOCOL_RESTART(ULONG it, struct DATA *data)
+{
+  string outputFileName= data->numerical->dir + string("restart.prt");
+		
+  ofstream outStream(outputFileName.data());
+  assert(outStream.is_open());
+
+  outStream << it <<endl;
+  outStream.close();	       
+}
+
+void INIT_PROTOCOL_SPEC(struct DATA *data)
+{
+  unsigned N1=data->numerical->N1;
+  unsigned N2=data->numerical->N2;
+  unsigned N3=data->numerical->N3;
+  
+  real_prec L1=data->numerical->L1;
+  real_prec L2=data->numerical->L2;
+  real_prec L3=data->numerical->L3;
+
+  ULONG N_bin=data->numerical->N_bin;
+
+  string outputFileName= data->numerical->dir + string("spec_protocol.prt");
+  
+  ofstream outStream(outputFileName.data());
+  assert(outStream.is_open());
+
+  string codename=data->numerical->codename;
+  
+  // cout << " >>> "<<codename<<": initialising power-spectrum protocol...."<<endl<<endl;
+  // cout<<"\n >>> "<<codename<<": create power-spectrum protocol file: "<<outputFileName<<" ...\n";
+  wprintw(data->curses->status, "initialising power-spectrum protocol...");
+  wrefresh(data->curses->status);
+  
+  outStream <<" "<<codename<<"_POWER_SPECTRUM_PROTOCOL "<<endl;
+  outStream << "-----------------------------"<<endl;
+  outStream << N1  <<endl;
+  outStream << N2  <<endl;
+  outStream << N3  <<endl;	
+  outStream << "-----------------------------"<<endl;
+  outStream << L1  <<endl;
+  outStream << L2  <<endl;
+  outStream << L3  <<endl;	
+  outStream << "-----------------------------"<<endl;
+  outStream << N_bin  <<endl;
+  outStream << "-----------------------------"<<endl;
+  outStream.close();  
+}
+
+void UPDATE_PROTOCOL_SPEC(struct DATA *data, string specname)
+{
+//EGP  int N1=data->numerical->N1;
+//EGP  int N2=data->numerical->N2;
+//EGP  int N3=data->numerical->N3;
+  
+//EGP  real_prec L1=data->numerical->L1;
+//EGP  real_prec L2=data->numerical->L2;
+//EGP  real_prec L3=data->numerical->L3;
+  
+//EGP  ULONG N_bin=data->numerical->N_bin;
+  
+  string outputFileName= data->numerical->dir + string("spec_protocol.prt");
+  
+  ofstream outStream(outputFileName.data(), ios::app );
+  assert(outStream.is_open());
+  
+  string codename=data->numerical->codename;
+
+  cout << " >>> "<<codename<<": update power-spectrum protocol...."<<endl<<endl;
+  outStream << specname  <<endl;
+  outStream.close();  
+}
diff --git a/protocol.h b/protocol.h
new file mode 100644
index 0000000..e5e9987
--- /dev/null
+++ b/protocol.h
@@ -0,0 +1,26 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#pragma once
+void INIT_PROTOCOL_CONV(struct DATA *data);
+
+void UPDATE_PROTOCOL_CONV(ULONG it, real_prec res, struct DATA *data);
+
+void PROTOCOL_RESTART(ULONG it, struct DATA *data);
+
+void INIT_PROTOCOL_SPEC(struct DATA *data);
+
+void UPDATE_PROTOCOL_SPEC(struct DATA *data, string specname);
+
+
+
+
+
+
+
diff --git a/random.hpp b/random.hpp
new file mode 100644
index 0000000..313eaf3
--- /dev/null
+++ b/random.hpp
@@ -0,0 +1,115 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#pragma once
+#include <gsl/gsl_randist.h> // includes gsl_rng.h
+#include <vector>
+#include <complex>
+
+template <class out_type>
+std::complex<out_type> complex_gaussian_random_number(gsl_rng *rng)
+{
+  // _ugaussian is the same as _gaussian, but with sigma = 1.0
+  out_type val_real = static_cast<out_type>(gsl_ran_ugaussian(rng)); // real part
+  out_type val_imag = static_cast<out_type>(gsl_ran_ugaussian(rng)); // imaginary part
+  std::complex<out_type> val(val_real, val_imag);
+  return(val);
+}
+
+// Can only do a "cubical" grid, i.e. there is only one linear gridsize.
+// The grid is generated for use in Fourier space. Its size is determined by the
+// half_size parameter; if false, it will give a full size cube, otherwise a
+// "half + 1" sized grid for use in a real-FFT. Note that this function returns
+// a complex vector.
+template <class out_type>
+std::vector<std::complex<out_type> > resolution_independent_random_grid_FS(unsigned gridsize, gsl_rng *rng, bool half_size = true)
+{
+  unsigned i, j, k; // just some for-loop indices
+
+  // Build output vector
+  ULONG output_size;
+  if (half_size)
+    output_size = gridsize * gridsize * (gridsize/2+1);
+  else
+    output_size = gridsize * gridsize * gridsize;
+  std::vector<std::complex<out_type> > out_grid(output_size);
+
+  // The strides are also dependent on grid size:
+  unsigned j_stride;
+  if (half_size)
+    j_stride = (gridsize/2+1);
+  else
+    j_stride = gridsize;
+  unsigned i_stride = gridsize * j_stride;
+  // Note: i_stride does not necessarily go with i in this function, but
+  // rather with the i that one would have in an outer for-loop with index
+  // i of the form:
+  // for i=[0-N) for j=[0-N) for k=[0-N) (or k=[0-(N/2+1)))
+  // Same goes for the j_stride. k_stride == 1 in this way.
+
+  // N.B.: don't make the for loops multi-threaded!
+  
+  // Fill out:
+  for (i = 0; i < gridsize/2; i++) // "layer" count; start in the corners, and move out one cubical layer at a time 
+  {
+    for (k = 0; k < i+1; k++) // first do the two "walls"
+    {
+      for (j = 0; j < i; j++) // "slim" side
+      {
+        out_grid[i * i_stride + j * j_stride + k] = complex_gaussian_random_number<out_type>(rng); // corner 1
+        out_grid[(gridsize - 1 - i) * i_stride + j * j_stride + k] = complex_gaussian_random_number<out_type>(rng); // corner 2
+        out_grid[i * i_stride + (gridsize - 1 - j) * j_stride + k] = complex_gaussian_random_number<out_type>(rng); // corner 3
+        out_grid[(gridsize - 1 - i) * i_stride + (gridsize - 1 - j) * j_stride + k] = complex_gaussian_random_number<out_type>(rng); // corner 4
+        if (not half_size)
+        {
+          out_grid[i * i_stride + j * j_stride + (gridsize - 1 - k)] = complex_gaussian_random_number<out_type>(rng); // corner 5
+          out_grid[(gridsize - 1 - i) * i_stride + j * j_stride + (gridsize - 1 - k)] = complex_gaussian_random_number<out_type>(rng); // corner 6
+          out_grid[i * i_stride + (gridsize - 1 - j) * j_stride + (gridsize - 1 - k)] = complex_gaussian_random_number<out_type>(rng); // corner 7
+          out_grid[(gridsize - 1 - i) * i_stride + (gridsize - 1 - j) * j_stride + (gridsize - 1 - k)] = complex_gaussian_random_number<out_type>(rng); // corner 8
+        }
+      }
+      for (j = 0; j < i + 1; j++) // "broad" side
+      {
+        out_grid[j * i_stride + i * j_stride + k] = complex_gaussian_random_number<out_type>(rng); // corner 1
+        out_grid[(gridsize - 1 - j) * i_stride + i * j_stride + k] = complex_gaussian_random_number<out_type>(rng); // corner 2
+        out_grid[j * i_stride + (gridsize - 1 - i) * j_stride + k] = complex_gaussian_random_number<out_type>(rng); // corner 3
+        out_grid[(gridsize - 1 - j) * i_stride + (gridsize - 1 - i) * j_stride + k] = complex_gaussian_random_number<out_type>(rng); // corner 4
+        if (not half_size)
+        {
+          out_grid[j * i_stride + i * j_stride + (gridsize - 1 - k)] = complex_gaussian_random_number<out_type>(rng); // corner 5
+          out_grid[(gridsize - 1 - j) * i_stride + i * j_stride + (gridsize - 1 - k)] = complex_gaussian_random_number<out_type>(rng); // corner 6
+          out_grid[j * i_stride + (gridsize - 1 - i) * j_stride + (gridsize - 1 - k)] = complex_gaussian_random_number<out_type>(rng); // corner 7
+          out_grid[(gridsize - 1 - j) * i_stride + (gridsize - 1 - i) * j_stride + (gridsize - 1 - k)] = complex_gaussian_random_number<out_type>(rng); // corner 8
+        }
+      }
+    }
+    for (j = 0; j < i; j++) // then the "roof" (what remains on the top)
+      for (k = 0; k < i; k++)
+      {
+        out_grid[j * i_stride + k * j_stride + i] = complex_gaussian_random_number<out_type>(rng); // corner 1
+        out_grid[(gridsize - 1 - j) * i_stride + k * j_stride + i] = complex_gaussian_random_number<out_type>(rng); // corner 2
+        out_grid[j * i_stride + (gridsize - 1 - k) * j_stride + i] = complex_gaussian_random_number<out_type>(rng); // corner 3
+        out_grid[(gridsize - 1 - j) * i_stride + (gridsize - 1 - k) * j_stride + i] = complex_gaussian_random_number<out_type>(rng); // corner 4
+        if (not half_size)
+        {
+          out_grid[j * i_stride + k * j_stride + (gridsize - 1 - i)] = complex_gaussian_random_number<out_type>(rng); // corner 5
+          out_grid[(gridsize - 1 - j) * i_stride + k * j_stride + (gridsize - 1 - i)] = complex_gaussian_random_number<out_type>(rng); // corner 6
+          out_grid[j * i_stride + (gridsize - 1 - k) * j_stride + (gridsize - 1 - i)] = complex_gaussian_random_number<out_type>(rng); // corner 7
+          out_grid[(gridsize - 1 - j) * i_stride + (gridsize - 1 - k) * j_stride + (gridsize - 1 - i)] = complex_gaussian_random_number<out_type>(rng); // corner 8
+        }
+      }
+  }
+
+  if (half_size) // Finally, fill out the nyquist plane:
+    for (i = 0; i < gridsize; i++) 
+      for (j = 0; j < gridsize; j++) 
+        out_grid[i_stride*i + j_stride*j + gridsize/2] = complex_gaussian_random_number<out_type>(rng);
+
+  return out_grid;
+}
diff --git a/rankorder.cc b/rankorder.cc
new file mode 100644
index 0000000..95c282b
--- /dev/null
+++ b/rankorder.cc
@@ -0,0 +1,57 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#include "define_opt.h"
+#include "fftw_array.h"
+
+#include <math.h>
+
+using namespace std;
+
+real_prec rankorder_leclercq_ZA(real_prec delta_ZA)
+{
+  // threshold value between two approximation ranges
+  real_prec delta_th = pow(0.610/0.371, 1./(1.752-1.424));
+  real_prec delta_Nbody;
+  if (delta_ZA < delta_th)
+    delta_Nbody = 0.610 * pow(delta_ZA + 1, 1.424) - 1;
+  else
+    delta_Nbody = 0.371 * pow(delta_ZA + 1, 1.752) - 1;
+  return delta_Nbody;
+}
+
+real_prec rankorder_leclercq_2LPT(real_prec delta_2LPT)
+{
+  // threshold value between two approximation ranges
+  real_prec delta_th = pow(0.642/0.257, 1./(1.922-1.401));
+  real_prec delta_Nbody;
+  if (delta_2LPT < delta_th)
+    delta_Nbody = 0.642 * pow(delta_2LPT + 1, 1.401) - 1;
+  else
+    delta_Nbody = 0.257 * pow(delta_2LPT + 1, 1.922) - 1;
+  return delta_Nbody;
+}
+
+void rankorder_leclercq_ZA(real_prec *delta_ZA, real_prec *delta_Nbody, ULONG size)
+{
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD
+  for (long i = 0; i < static_cast<long>(size); i++)
+    delta_Nbody[i] = rankorder_leclercq_ZA(delta_ZA[i]);
+}
+
+void rankorder_leclercq_2LPT(real_prec *delta_2LPT, real_prec *delta_Nbody, ULONG size)
+{
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD
+  for (long i = 0; i < static_cast<long>(size); i++)
+    delta_Nbody[i] = rankorder_leclercq_2LPT(delta_2LPT[i]);
+}
diff --git a/rankorder.h b/rankorder.h
new file mode 100644
index 0000000..2131e4b
--- /dev/null
+++ b/rankorder.h
@@ -0,0 +1,16 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#include "define_opt.h"
+#include "fftw_array.h"
+
+real_prec rankorder_leclercq_ZA(real_prec delta_ZA);
+real_prec rankorder_leclercq_2LPT(real_prec delta_2LPT);
+void rankorder_leclercq_ZA(real_prec *delta_ZA, real_prec *delta_Nbody, ULONG size);
+void rankorder_leclercq_2LPT(real_prec *delta_2LPT, real_prec *delta_Nbody, ULONG size);
diff --git a/rsd.cc b/rsd.cc
new file mode 100644
index 0000000..de55af0
--- /dev/null
+++ b/rsd.cc
@@ -0,0 +1,68 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#include "define_opt.h"
+#include <cmath>
+#include <iostream>
+#include "math_funcs.h" // pacman_coordinate
+#include "BarcodeException.h"
+
+using namespace std;
+
+void calc_pos_rsd(ULONG Npart, real_prec L3, real_prec xobs, real_prec yobs, real_prec zobs, real_prec *x, real_prec *y,
+                  real_prec *z, real_prec *vx, real_prec *vy, real_prec *vz, real_prec *xr, real_prec *yr,
+                  real_prec *zr, real_prec ascale, real_prec Omega_M, real_prec Omega_L, bool planepar, bool periodic)
+{
+  // Units:
+  // - xobs, yobs, zobs, x, y, z, xr, yr, zr in Mpc/h
+  // - vx, vy, vz in km/s
+  
+  real_prec Omega_C = num_1 - Omega_M - Omega_L;        
+  real_prec Hub = 100. * sqrt(Omega_M / ascale / ascale / ascale + Omega_L + Omega_C / ascale / ascale);           
+
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD
+  for (ULONG i = 0; i < Npart; ++i)
+  {
+    // setting observation coordinates
+    real_prec xpos = x[i] - xobs;
+    real_prec ypos = y[i] - yobs;
+    real_prec zpos = z[i] - zobs;
+
+    real_prec rpos = sqrt(xpos*xpos+ypos*ypos+zpos*zpos);
+
+    real_prec v_norm = 1. / Hub / ascale;
+
+    if (!planepar) // planepar==false
+    {
+      real_prec ruxv = (xpos * vx[i] + ypos * vy[i] + zpos * vz[i]) / rpos * v_norm; // ru*v (v component in the direction of unit-r-vector)
+      real_prec r_new = rpos + ruxv;
+      xr[i] = (xpos / rpos) * r_new + xobs; // == r_new*sin(thetapos)*cos(phipos)+xobs, where phipos = atan2(ypos,xpos) and thetapos = acos(zpos/rpos)
+      yr[i] = (ypos / rpos) * r_new + yobs; // == r_new*sin(thetapos)*sin(phipos)+yobs
+      zr[i] = (zpos / rpos) * r_new + zobs; // == r_new*cos(thetapos)+zobs
+    }
+    else // planepar==true
+    {
+      real_prec ruxv = vz[i] * v_norm; // ru*v (v component in the direction of unit-r-vector)
+      xr[i] = x[i];
+      yr[i] = y[i];
+      zr[i] = z[i] + ruxv;
+    }
+
+    if (periodic) {
+      if (!planepar) {
+        throw BarcodeException("Periodic boundary conditions not yet implemented for non-plane-parallel RSDs!");
+      } else {
+        pacman_coordinate(&(zr[i]), L3);
+      }
+    }
+  }
+}
+
diff --git a/rsd.h b/rsd.h
new file mode 100644
index 0000000..4d33226
--- /dev/null
+++ b/rsd.h
@@ -0,0 +1,15 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#include "define_opt.h"
+#include "fftw_array.h"
+
+void calc_pos_rsd(ULONG Npart, real_prec L3, real_prec xobs, real_prec yobs, real_prec zobs, real_prec *x, real_prec *y,
+                  real_prec *z, real_prec *vx, real_prec *vy, real_prec *vz, real_prec *xr, real_prec *yr,
+                  real_prec *zr, real_prec ascale, real_prec Omega_M, real_prec Omega_L, bool planepar, bool periodic);
diff --git a/sample_maker.cc b/sample_maker.cc
new file mode 100644
index 0000000..36b4475
--- /dev/null
+++ b/sample_maker.cc
@@ -0,0 +1,35 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#include "struct_main.h"
+
+#include <gsl/gsl_randist.h>
+
+#include "call_hamil.h"
+
+
+void security_recursion(struct DATA *data, gsl_rng * seed)
+{
+  if(data->numerical->INV_SUCCESS==0)
+    {
+      call_hamil(data, seed);
+      
+      security_recursion(data,seed);
+    }
+}
+
+
+void sample_maker(struct DATA *data, gsl_rng * seed)
+{    
+  if(data->numerical->INV_SUCCESS==0)    
+    security_recursion(data, seed);
+  
+  data->numerical->INV_SUCCESS=0;	     
+}
+
diff --git a/sample_maker.h b/sample_maker.h
new file mode 100644
index 0000000..170bef0
--- /dev/null
+++ b/sample_maker.h
@@ -0,0 +1,11 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#pragma once
+void sample_maker(struct DATA *data, gsl_rng * seed);
diff --git a/struct_hamil.h b/struct_hamil.h
new file mode 100644
index 0000000..cf12e77
--- /dev/null
+++ b/struct_hamil.h
@@ -0,0 +1,403 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#pragma once
+
+// EGP: cleared out most includes, just left the two here:
+#include <gsl/gsl_rng.h>  // For the gsl_rng type // EGP: added this one
+#include "define_opt.h"  // For the ULONG and real_prec types
+// #include "curses_funcs.h"
+// EGP: toch maar weer terug gezet, gingen dingen mis
+/* #include <math.h> */
+/* #include <fstream> */
+/* #include <iomanip> */
+// #include <iostream>
+/* #include <string>          */
+/* #include <string.h> */
+/* #include <cassert> */
+/* #include <cfloat> */
+/* #include <stdlib.h> */
+/* #include <stdio.h> */
+/* #include <fftw3.h> */
+/* #include <omp.h> */
+/* #include <sstream> */
+/* #include <netinet/in.h> */
+
+/* #include <gsl/gsl_errno.h> */
+/* #include <gsl/gsl_spline.h> */
+/* #include <gsl/gsl_randist.h> */
+/* #include <gsl/gsl_integration.h> */
+/* #include <gsl/gsl_sf.h> */
+
+/* #include "fftw_array.h" */
+/* #include "planck/paramfile.h" */
+
+/* #include "define_opt.h" */
+
+#include <vector>
+#include "BarcodeException.h"
+#include "HMC_models.h"
+#include "struct_main.h"
+#include <sstream>
+
+using namespace std;
+
+/* --- structures --- */
+
+struct HAMIL_NUMERICAL {
+  unsigned N1;  /* Number of cells per axis */
+  unsigned N2;  /* Number of cells per axis */
+  unsigned N3;  /* Number of cells per axis */
+  ULONG N;
+  ULONG Nhalf;
+
+  int mk;
+
+  real_prec L1;
+  real_prec L2;
+  real_prec L3;
+
+  real_prec vol;
+
+  real_prec d1;
+  real_prec d2;
+  real_prec d3;
+
+  real_prec min1;
+  real_prec min2;
+  real_prec min3;
+
+  // Redshift space distortions
+  real_prec xobs;
+  real_prec yobs;
+  real_prec zobs;
+  bool planepar;
+  bool periodic;
+
+  ULONG N_bin;
+
+  ULONG iGibbs;
+  ULONG rejections;
+  ULONG total_steps_lim;
+
+  // EGP: some extra parameters for statistics on performance:
+  bool accepted;
+  real_prec epsilon;
+  ULONG Neps;
+  real_prec dH, dK, dE, dprior, dlikeli, psi_prior, psi_likeli, psi_prior_i,
+  psi_prior_f, psi_likeli_i, psi_likeli_f, H_kin_i, H_kin_f;
+
+  int method;			/* which method */
+  int filter;			/* which filter method */
+  ULONG itmax;			/* maximal number of iterations */
+  int i_reset;			/* number to reset */
+  int INV_SUCCESS;		/* indicate inversion success */
+
+  real_prec epsi;  /* precision */
+
+  real_prec kth;
+
+  // EGP: pseudo-timestep parameters
+  real_prec N_eps_fac;
+  real_prec eps_fac;
+  real_prec eps_fac_target;
+  real_prec eps_fac_power;
+  ULONG s_eps_total;
+
+  // mass type
+  bool mass_fs;
+  bool mass_rs;
+  int mass_type;
+  ULONG massnum_init;
+  ULONG massnum_burn;
+
+  // EGP: testing:
+  real_prec mass_factor;
+
+  /// TESTING ///
+  real_prec grad_psi_prior_factor;
+  real_prec grad_psi_likeli_factor;
+  bool grad_psi_prior_conjugate;
+  bool grad_psi_likeli_conjugate;
+  bool grad_psi_prior_times_i;
+  bool grad_psi_likeli_times_i;
+  bool div_dH_by_N;
+  int calc_h;
+  real_prec deltaQ_factor;
+
+  bool correct_delta;
+
+  int particle_kernel;
+  real_prec particle_kernel_h;
+
+  // FFTW
+  real_prec *in_r2c;
+  real_prec *out_c2r;
+  complex_prec *in_c2r;
+  complex_prec *out_r2c;
+  struct plan_pkg *R2Cplan;
+  struct plan_pkg *C2Rplan;
+};
+
+struct HAMIL_DATA {
+  struct HAMIL_NUMERICAL *numerical;  // numerical parameters
+
+  // set arrays
+  real_prec *mass_f;
+  real_prec *mass_r;
+  // real_prec *massi;
+  real_prec *gradpsi;
+  real_prec *x;
+  // real_prec *growth;
+
+  real_prec *posx;
+  real_prec *posy;
+  real_prec *posz;
+
+  real_prec *signal_PS;
+  real_prec *window;
+  real_prec *func;
+  real_prec *noise;
+  real_prec *nobs;
+  real_prec *deltaX;
+  real_prec *corrf;
+
+  // function pointers
+  void (*partial_f_delta_x_log_like)(struct HAMIL_DATA*, real_prec*,
+                                     real_prec*);
+  real_prec (*log_like)(struct HAMIL_DATA*, real_prec*);
+  void (*grad_f_delta_x_comp)(struct HAMIL_DATA *, real_prec *, real_prec *, unsigned int);
+
+  real_prec (*log_prior)(struct HAMIL_DATA*, real_prec*);
+  void (*grad_log_prior)(struct HAMIL_DATA*, real_prec*, real_prec*);
+
+  // model flags
+  int sfmodel;
+  bool rsd_model;
+
+  // set scalars
+  real_prec Nmean_Gal;
+  real_prec rho_c;
+  real_prec code_norm;
+  real_prec mu;
+
+  real_prec sigma_fac;
+  real_prec sigma_min;
+  real_prec delta_min;
+
+  real_prec biasP;
+  real_prec biasE;
+
+  real_prec z;
+  real_prec ascale;
+
+  real_prec D1;
+  real_prec D2;
+
+  real_prec OM;
+  real_prec OL;
+  real_prec h;
+
+  // external inputs
+  real_prec *M;
+
+  // curses
+  // struct CURSES_STRUCT *curses;
+
+  // stuff for SPH_kernel_3D_cells
+  const int N_cells;
+  const std::vector<int> kernel_cells_i, kernel_cells_j, kernel_cells_k;
+
+
+  // default constructor; initialize everything yourself!
+  HAMIL_DATA() : N_cells(0), kernel_cells_i(0, 0), kernel_cells_j(0, 0),
+  kernel_cells_k(0, 0) {
+    numerical = new HAMIL_NUMERICAL;
+  }
+
+
+  // secondary constructor; used by call_hamil
+  HAMIL_DATA(struct DATA *data, real_prec *A, real_prec *B_f, real_prec *B_r,
+             real_prec *C, real_prec *D, real_prec *E, vector<int> cells_i,
+             std::vector<int> cells_j, std::vector<int> cells_k,
+             int N_cells_i) :
+  N_cells(N_cells_i),
+  kernel_cells_i(cells_i.begin(), cells_i.end()),
+  kernel_cells_j(cells_j.begin(), cells_j.end()),
+  kernel_cells_k(cells_k.begin(), cells_k.end()) {
+    numerical = new HAMIL_NUMERICAL;
+
+    numerical->N1 = data->numerical->N1;
+    numerical->N2 = data->numerical->N2;
+    numerical->N3 = data->numerical->N3;
+    numerical->N = data->numerical->N;
+    numerical->Nhalf = data->numerical->Nhalf;
+    numerical->L1 = data->numerical->L1;
+    numerical->L2 = data->numerical->L2;
+    numerical->L3 = data->numerical->L3;
+    numerical->vol = data->numerical->vol;
+    numerical->d1 = data->numerical->d1;
+    numerical->d2 = data->numerical->d2;
+    numerical->d3 = data->numerical->d3;
+    numerical->min1 = data->numerical->xllc;
+    numerical->min2 = data->numerical->yllc;
+    numerical->min3 = data->numerical->zllc;
+    // Redshift space distortions
+    numerical->xobs = data->numerical->xobs;
+    numerical->yobs = data->numerical->yobs;
+    numerical->zobs = data->numerical->zobs;
+    numerical->planepar = data->numerical->planepar;
+    numerical->periodic = data->numerical->periodic;
+    
+    numerical->N_bin = data->numerical->N_bin;
+    numerical->mk = data->numerical->mk;
+
+    numerical->kth = data->numerical->slength;
+
+    numerical->iGibbs = data->numerical->iGibbs;
+
+    numerical->rejections = 0; // EGP: added for rejection rate
+    numerical->total_steps_lim = data->numerical->total_steps_lim;
+
+    numerical->itmax = 2000;      
+    numerical->INV_SUCCESS = 0;/* indicate inversion success initialize with 0*/
+    
+    numerical->N_eps_fac = data->numerical->N_eps_fac;
+    numerical->eps_fac = data->numerical->eps_fac;
+    numerical->eps_fac_target = data->numerical->eps_fac_target;
+    numerical->eps_fac_power = data->numerical->eps_fac_power;
+    numerical->s_eps_total = data->numerical->s_eps_total;
+
+    numerical->mass_type = data->numerical->mass_type;
+    switch (numerical->mass_type) {
+      case 0:  // 0: R: all ones (essentially: no mass term)
+      numerical->mass_rs = true;
+      numerical->mass_fs = false;
+      break;
+      case 1:  // 1: FS: inverse power spectrum
+      numerical->mass_rs = false;
+      numerical->mass_fs = true;
+      break;
+      case 2:  // 2: FS+FS: inverse power spectrum + likelihood force spectrum
+      numerical->mass_rs = false;
+      numerical->mass_fs = true;
+      break;
+      case 3:  // 3: FS+FS: inverse power spectrum + mean likelihood force (Wang+13)
+      numerical->mass_rs = false;
+      numerical->mass_fs = true;
+      break;
+      case 4:  // 4: FS: power spectrum
+      numerical->mass_rs = false;
+      numerical->mass_fs = true;
+      break;
+      case 5:  // 5: FS+R: inverse power spectrum + 1st order likelihood force expansion (Jasche+13)
+      numerical->mass_rs = true;
+      numerical->mass_fs = true;
+      break;
+      case 6:  // 1st order likelihood force expansion (Jasche+13) (R)
+      numerical->mass_rs = true;
+      numerical->mass_fs = false;
+      break;
+      case 60:  // type 0 until burn-in, type 6 afterwards
+      numerical->mass_rs = true;
+      numerical->mass_fs = false;
+      break;
+      default:
+      stringstream message;
+      message << "mass_type " << numerical->mass_type << " is not a valid value!";
+      throw BarcodeException(message.str());
+    }
+
+    numerical->massnum_init = data->numerical->massnum_init;
+    numerical->massnum_burn = data->numerical->massnum_burn;
+
+    gradpsi = A;
+    mass_f = B_f;
+    mass_r = B_r;
+    posx = C;
+    posy = D;
+    posz = E;  
+
+    z = data->cosmology->z;
+    ascale = data->cosmology->ascale;
+
+    D1 = data->cosmology->D1;
+    D2 = data->cosmology->D2;
+
+    OM = data->cosmology->omega_m;
+    OL = data->cosmology->omega_q;
+    h = data->cosmology->h;
+
+    biasP = data->observational->biasP;
+    biasE = data->observational->biasE;
+
+    x = data->observational->signal;
+    nobs = data->observational->nobs;
+    deltaX = data->observational->signalX;
+    noise = data->observational->noise_sf;
+    corrf = data->observational->corrf;
+    
+    Nmean_Gal = data->observational->Nmean_Gal;
+    rho_c = data->observational->rho_c;
+    code_norm = data->numerical->code_norm;
+    mu = data->observational->mu;
+
+    sigma_fac = data->observational->sigma_fac; 
+    sigma_min = data->observational->sigma_min; 
+    delta_min = data->observational->delta_min; 
+
+    signal_PS = data->observational->Power;
+    
+    window = data->observational->window;
+
+    // function pointers
+    log_like = data->observational->log_like;
+    partial_f_delta_x_log_like = data->observational->partial_f_delta_x_log_like;
+    grad_f_delta_x_comp = data->observational->grad_f_delta_x_comp;
+    log_prior = data->observational->log_prior;
+    grad_log_prior = data->observational->grad_log_prior;
+
+    // model flags
+    sfmodel = data->numerical->sfmodel;
+    rsd_model = data->numerical->rsd_model;
+
+    // Testing:
+    numerical->mass_factor = data->numerical->mass_factor;
+    numerical->grad_psi_prior_factor = data->numerical->grad_psi_prior_factor;
+    numerical->grad_psi_likeli_factor = data->numerical->grad_psi_likeli_factor;
+    numerical->grad_psi_prior_conjugate = data->numerical->grad_psi_prior_conjugate;
+    numerical->grad_psi_likeli_conjugate = data->numerical->grad_psi_likeli_conjugate;
+    numerical->grad_psi_prior_times_i = data->numerical->grad_psi_prior_times_i;
+    numerical->grad_psi_likeli_times_i = data->numerical->grad_psi_likeli_times_i;
+    numerical->div_dH_by_N = data->numerical->div_dH_by_N;
+    numerical->calc_h = data->numerical->calc_h;
+    numerical->deltaQ_factor = data->numerical->deltaQ_factor;
+
+    numerical->particle_kernel = data->numerical->particle_kernel;
+    numerical->particle_kernel_h = data->numerical->particle_kernel_h;
+
+    // FFTW
+    numerical->in_r2c = data->numerical->in_r2c;
+    numerical->out_c2r = data->numerical->out_c2r;
+    numerical->in_c2r = data->numerical->in_c2r;
+    numerical->out_r2c = data->numerical->out_r2c;
+    numerical->R2Cplan = data->numerical->R2Cplan;
+    numerical->C2Rplan = data->numerical->C2Rplan;
+
+    // curses
+    // curses = data->curses;
+
+    numerical->correct_delta = data->numerical->correct_delta;
+  }    
+
+  ~HAMIL_DATA() {
+    delete numerical;
+  }
+};
+
diff --git a/struct_main.h b/struct_main.h
new file mode 100644
index 0000000..57aea0d
--- /dev/null
+++ b/struct_main.h
@@ -0,0 +1,258 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#pragma once
+
+#include <string>         
+#include <fstream>
+#include <vector>
+
+#include "define_opt.h"
+#include "curses_funcs.h"
+#include "fftwrapper.h"  // struct plan_pkg
+
+using namespace std;
+
+/* --- structures --- */
+struct COSMOLOGY
+{
+  real_prec omega_r;			/* radiation density */
+  real_prec omega_k;			/* curvature density */
+  real_prec omega_m;			/* matter density */
+  real_prec omega_b;			/* baryon density */
+  real_prec omega_q;			/* dark energy density */
+  real_prec w, wprime;		        /* de eos parameter w and first time derivative wprime=dw/da */
+  real_prec sigma8;			/* mass fluctuation on 8Mpc/h scales */
+  real_prec rsmooth;			/* smoothing radius: rsmooth = 8 yields sigma8 */
+  real_prec n_s;			/* index of primordial dm power spectrum */
+  real_prec h;			        /* Hubble parameter */
+  real_prec beta;			/* lens galaxy distribution parameter 1 */
+  real_prec A_spec;			/* Normalization of the power spectrum */  
+  real_prec z;  
+  real_prec ascale;
+  real_prec mu;
+
+  real_prec D1;
+  real_prec D2;
+};
+
+struct OBSERVATIONAL
+{	
+  real_prec *Power;
+  real_prec *signal;
+  real_prec *signalX;
+  real_prec *window;
+  real_prec *noise_sf;
+  real_prec *nobs;
+  real_prec *corrf;
+
+  real_prec biasP;
+  real_prec biasE;
+  real_prec Nmean_Gal;
+  real_prec rho_c;
+  real_prec mu;
+
+  real_prec sigma_fac;
+  real_prec sigma_min;
+  real_prec delta_min;
+
+  // EGP function pointers:
+  void (*partial_f_delta_x_log_like)(struct HAMIL_DATA*, real_prec*, real_prec*);
+  real_prec (*log_like)(struct HAMIL_DATA*, real_prec*);
+  void (*grad_f_delta_x_comp)(struct HAMIL_DATA *, real_prec *, real_prec *, unsigned int);
+
+  real_prec (*log_prior)(struct HAMIL_DATA*, real_prec*);
+  void (*grad_log_prior)(struct HAMIL_DATA*, real_prec*, real_prec*);
+  // EGP end function pointers
+};
+
+struct NUMERICAL
+{	
+  // EGP added:
+  // some flags
+  bool random_test;
+  bool random_test_rsd;
+
+  int window_type;
+
+  int data_model;
+  bool negative_obs;
+  int likelihood;
+  int prior;
+  int sfmodel;
+  bool rsd_model;
+  // log file for performance statistics from HamiltonianMC
+  ofstream performance_log;
+  // pseudo-timestep parameters
+  real_prec N_eps_fac;
+  real_prec eps_fac;
+  real_prec eps_fac_target;
+  real_prec eps_fac_initial;
+  real_prec eps_fac_power;
+  real_prec s_eps_total_fac;
+  real_prec s_eps_total_scaling;
+  int s_eps_total_Nx_norm;
+  ULONG s_eps_total;
+  // initial guess:
+  int initial_guess;
+  string initial_guess_file;
+  int initial_guess_smoothing_type;
+  real_prec initial_guess_smoothing_scale;
+  // mass stuff
+  // bool mass_fs;
+  int mass_type;
+  ULONG massnum_init;
+  ULONG massnum_burn;
+  // output
+  unsigned outnum;
+  unsigned outnum_ps;
+  // restarting
+  unsigned start_at;
+  // testing:
+  real_prec mass_factor;
+  int calc_h;
+  // EGP: end added
+
+  ULONG seed;
+
+  ULONG N_bin;
+
+  int inputmode;
+  
+  int INV_SUCCESS;
+
+  int filter;
+
+  bool PRECON;
+
+  string codename;
+  string fnamePS;
+  string dataFileName;
+  string fname_ps_sample;
+  string fname_PROT_SPEC;
+  string fname_PROT_SIGNAL_BOX;
+  string fname_PROT_SIGNAL_SLICE;
+  
+  string dir;
+
+  int mk;
+
+  unsigned N1;			/* Number of cells per axis */
+  unsigned N2;			/* Number of cells per axis */
+  unsigned N3;			/* Number of cells per axis */
+  ULONG N; // total cell number
+  ULONG Nhalf;
+
+  bool readPS;
+  
+  real_prec code_norm;
+  
+  real_prec slength;			
+
+  ULONG iGibbs;
+  ULONG N_Gibbs;
+  ULONG rejections;
+  ULONG total_steps_lim;
+  ULONG count_attempts;  // counter for total accepted + rejected steps
+                         // used for epsilon/acceptance tables
+
+  // stuff for self-adjusting epsilon
+  int eps_fac_update_type;
+  unsigned N_a_eps_update;
+  real_prec acc_min;
+  real_prec acc_max;
+  int eps_down_smooth;
+  real_prec eps_up_fac;
+  vector<bool> acc_flag_N_a;
+  vector<real_prec> epsilon_N_a;
+  // related: circular buffer for keeping track of "recent" acceptance rate
+  vector<short> acc_recent;
+
+  real_prec xllc;
+  real_prec yllc;
+  real_prec zllc;
+  
+  // Redshift space distortions
+  real_prec xobs;
+  real_prec yobs;
+  real_prec zobs;
+  bool planepar;
+  bool periodic;
+  
+  real_prec L1;			/* Length of axis */
+  real_prec L2;			/* Length of axis */
+  real_prec L3;			/* Length of axis */
+
+  real_prec vol;
+  
+  real_prec d1;			/* grid spacing x-direction */
+  real_prec d2;			/* grid spacing y-direction */
+  real_prec d3;			/* grid spacing z-direction */
+
+
+  /// TESTING ///
+  real_prec grad_psi_prior_factor;
+  real_prec grad_psi_likeli_factor;
+  bool grad_psi_prior_conjugate;
+  bool grad_psi_likeli_conjugate;
+  bool grad_psi_prior_times_i;
+  bool grad_psi_likeli_times_i;
+  bool div_dH_by_N;
+  real_prec deltaQ_factor;
+
+  int particle_kernel;
+  real_prec particle_kernel_h_rel;
+  real_prec particle_kernel_h;
+
+  int N_cells;
+  std::vector<int> kernel_cells_i, kernel_cells_j, kernel_cells_k;
+
+  // FFTW
+  real_prec *in_r2c;
+  real_prec *out_c2r;
+  complex_prec *in_c2r;
+  complex_prec *out_r2c;
+  struct plan_pkg *R2Cplan;
+  struct plan_pkg *C2Rplan;
+  // these two could be used with a common complex array to reduce copying
+  // struct plan_pkg *R2Bplan; 
+  // struct plan_pkg *B2Rplan;
+  // these two could be used with both common complex and real arrays
+  // struct plan_pkg *A2Bplan; 
+  // struct plan_pkg *B2Aplan;
+
+  bool correct_delta;
+
+  // ~NUMERICAL() {
+  // }
+};
+
+
+
+struct DATA
+{
+  struct COSMOLOGY *cosmology;		/* cosmological model */
+  struct OBSERVATIONAL *observational;	/* numerical parameters */
+  struct NUMERICAL *numerical;		/* numerical parameters */
+  struct CURSES_STRUCT *curses;
+  
+  DATA()   // Example of a constructor used in a structure.
+  {
+    cosmology=new COSMOLOGY;
+    observational=new OBSERVATIONAL;
+    numerical=new NUMERICAL;
+    // curses = new CURSES_STRUCT;
+  }
+
+  ~DATA() {
+    delete cosmology;
+    delete observational;
+    delete numerical;
+  }
+};
diff --git a/tools/2D_corr_fct.cc b/tools/2D_corr_fct.cc
new file mode 100644
index 0000000..9a4c928
--- /dev/null
+++ b/tools/2D_corr_fct.cc
@@ -0,0 +1,312 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+#include <cmath>
+#include <string>
+#include <sstream>
+#include <iostream>
+
+#include "../struct_main.h"
+#include "../fftw_array.h"
+
+#include "../IOfunctionsGen.h"
+#include "../convenience.h"
+
+using namespace std;
+
+real_prec pacman_center_on_origin(unsigned ix, unsigned Ni, real_prec di) {
+  if (ix <= Ni/2)
+    return di * static_cast<real_prec>(ix);
+  else
+    return -di * static_cast<real_prec>(Ni - ix);
+}
+
+void measure_corr2D(unsigned N1, unsigned N2, unsigned N3, real_prec L1,/* real_prec L2,
+                    real_prec L3,*/ real_prec d1, real_prec d2, real_prec d3,
+                    ULONG N_bin, real_prec *signal, real_prec *rmode,
+                    ULONG *nmode, real_prec *corr, bool planepar = true) {
+  ULONG N = N1 * N2 * N3;
+  ULONG N_bin_sq = N_bin * N_bin;
+
+  cout << "... Fourier transforming signal" << endl;
+  fftw_array<complex_prec> Signal(N);
+  FFT3dR2C(N1, N2, N3, signal, Signal);
+
+  // measure the greatest |r| in the box
+  real_prec rmax = L1/2 * sqrt(3);
+  // For periodic box, there are no distances longer than sqrt(3)*L1/2.
+
+  // bin width in r-space
+  real_prec dr = rmax / static_cast<real_prec>(N_bin);
+
+  // Compute the Power Spectrum : P(k)
+  cout << "... computing power spectrum" << endl;
+  absolute_squared_array(Signal, Signal, N);  // P(k) -> Signal (real part)
+
+  cout << "... computing correlation function" << endl;
+  fftw_array<real_prec> dummy(N);
+  FFT3dC2R(N1, N2, N3, Signal, dummy);  // 3D correlation function -> dummy
+
+  // Initialize the arrays
+  fillZero(rmode, N_bin_sq);
+  fillZero(corr, N_bin_sq);
+  fillZero(nmode, N_bin_sq);
+
+  // Linearize the 3D corr function: sum over shells
+  double progress = 0.0;
+  cout << "... linearizing the 3D correlation function; progress:" << endl;
+  for (unsigned i = 0; i < N1; i++)
+    for (unsigned j = 0; j < N2; j++) {
+      progress = static_cast<double>(N3*(j+N2*i))/static_cast<double>(N)*100.;
+      cout << progress << " %  \r" << flush;
+      for (unsigned k = 0; k < N3; k++) {
+        real_prec xpos = pacman_center_on_origin(i, N1, d1);
+        real_prec ypos = pacman_center_on_origin(j, N2, d2);
+        real_prec zpos = pacman_center_on_origin(k, N3, d3);
+
+        real_prec rtot = sqrt(xpos*xpos + ypos*ypos + zpos*zpos);
+
+        real_prec rpar, rperp;
+
+        if (planepar == true) {
+          rpar  = sqrt(zpos*zpos);
+          rperp = sqrt(xpos*xpos + ypos*ypos);
+        } else {
+          // work in progress
+          throw std::runtime_error("non-plane-parallel option not yet implemented");
+        }
+
+        ULONG nbin_perp = static_cast<ULONG>(rperp/dr);
+        ULONG nbin_par  = static_cast<ULONG>(rpar/dr);
+
+        if (nbin_perp < N_bin && nbin_par < N_bin) {
+          // par on "x axis", perp on "y axis" (reverse of regular sigma-pi
+          // plots in literature!); par is LOS (RSD direction):
+          // ULONG ii = N_bin * nbin_par + nbin_perp;
+          // perp on "x axis", par on "y axis" (regular order in sigma-pi plots
+          // in literature):
+          ULONG ii = nbin_par + N_bin * nbin_perp;
+          // EGP, 29 July 2016:
+          // Right, this was wrong! I was using matplotlib.pcolormesh for
+          // plotting the image, but was using x and y the wrong way around
+          // (see http://matplotlib.org/examples/pylab_examples/pcolor_demo.html
+          // , there you see that pcolorshow has to use y, x = np.mgrid...).
+          // Because of this, the plot seemed right, but was actually wrong.
+          // With imshow the axes were again flipped, which they shouldn't be.
+          // The expected behaviour is that when loading an array with numpy
+          // memmap, and reshaping it into a square 2D array, that the first
+          // Nx numbers in the (1D) array are plotted along the bottom (with
+          // origin='bottom'), from left to right, the next Nx above that, etc.
+          // In our case, the horizontal axis is r_perp, so r_perp should vary
+          // the fastest. In ULONG ii above this is not the case.
+          // So, long story short: this tool does not produce the file I
+          // expect.
+          // Now that I know, I changed the loading function in Python to flip
+          // the axes once again, since I already produced all the 2Dcf files
+          // and don't want to do that again :)
+
+          rmode[ii] += rtot;
+          corr[ii] += dummy[k+N3*(j+N2*i)];
+          nmode[ii] += 1;
+        }
+      }
+    }
+
+  // And finally normalize the shells by the volume of the shells
+  cout << "... finally, normalizing correlation function shells" << endl;
+#pragma omp parallel for
+  for (ULONG l = 0; l < N_bin_sq; ++l) {
+    if (nmode[l] > 0) {
+      rmode[l] /= static_cast<real_prec>(nmode[l]);
+      corr[l] /= static_cast<real_prec>(static_cast<real_prec>(nmode[l]) *
+                                        static_cast<real_prec>(N));
+    }
+  }
+}
+
+
+// This version is destructive on signal!
+void measure_corr2D_destructive(unsigned N1, unsigned N2, unsigned N3, real_prec L1,/* real_prec L2,
+                    real_prec L3,*/ real_prec d1, real_prec d2, real_prec d3,
+                    ULONG N_bin, real_prec *signal, real_prec *rmode,
+                    ULONG *nmode, real_prec *corr, bool planepar = true) {
+  cout << "N.B.: this function destroys the contents of signal! Use measure_corr2D for non-destructive." << endl;
+
+  ULONG N = N1 * N2 * N3;
+  ULONG Nhalf = (N1/2 + 1) * N2 * N3;
+  ULONG N_bin_sq = N_bin * N_bin;
+
+  cout << "... Fourier transforming signal" << endl;
+  fftw_array<complex_prec> Signal(Nhalf);
+  fftR2C(N1, N2, N3, signal, Signal);
+
+  // Compute the Power Spectrum : P(k)
+  cout << "... computing power spectrum" << endl;
+  absolute_squared_array(Signal, Signal, Nhalf);  // P(k) -> Signal (real part)
+
+  cout << "... computing correlation function" << endl;
+  fftC2R(N1, N2, N3, Signal, signal);  // 3D correlation function -> signal
+
+  // Initialize the arrays
+  fillZero(rmode, N_bin_sq);
+  fillZero(corr, N_bin_sq);
+  fillZero(nmode, N_bin_sq);
+
+  // measure the greatest |r| in the box
+  real_prec rmax = L1/2 * sqrt(3);
+  // For periodic box, there are no distances longer than sqrt(3)*L1/2.
+
+  // bin width in r-space
+  real_prec dr = rmax / static_cast<real_prec>(N_bin);
+
+  // Linearize the 3D corr function: sum over shells
+  double progress = 0.0;
+  cout << "... linearizing the 3D correlation function; progress:" << endl;
+  for (unsigned i = 0; i < N1; i++)
+    for (unsigned j = 0; j < N2; j++) {
+      progress = static_cast<double>(N3*(j+N2*i))/static_cast<double>(N)*100.;
+      cout << progress << " %  \r" << flush;
+      for (unsigned k = 0; k < N3; k++) {
+        real_prec xpos = pacman_center_on_origin(i, N1, d1);
+        real_prec ypos = pacman_center_on_origin(j, N2, d2);
+        real_prec zpos = pacman_center_on_origin(k, N3, d3);
+
+        real_prec rpar, rperp;
+
+        if (planepar == true) {
+          rpar  = sqrt(zpos*zpos);
+          rperp = sqrt(xpos*xpos + ypos*ypos);
+        } else {
+          // work in progress
+          throw std::runtime_error("non-plane-parallel option not yet implemented");
+        }
+
+        ULONG nbin_perp = static_cast<ULONG>(rperp/dr);
+        ULONG nbin_par  = static_cast<ULONG>(rpar/dr);
+
+        if (nbin_perp < N_bin && nbin_par < N_bin) {
+          // par on "x axis", perp on "y axis" (reverse of regular sigma-pi
+          // plots in literature!); par is LOS (RSD direction):
+          // ULONG ii = N_bin * nbin_par + nbin_perp;
+          // perp on "x axis", par on "y axis" (regular order in sigma-pi plots
+          // in literature):
+          ULONG ii = nbin_par + N_bin * nbin_perp;
+          // EGP, 29 July 2016:
+          // Right, this was wrong! I was using matplotlib.pcolormesh for
+          // plotting the image, but was using x and y the wrong way around
+          // (see http://matplotlib.org/examples/pylab_examples/pcolor_demo.html
+          // , there you see that pcolorshow has to use y, x = np.mgrid...).
+          // Because of this, the plot seemed right, but was actually wrong.
+          // With imshow the axes were again flipped, which they shouldn't be.
+          // The expected behaviour is that when loading an array with numpy
+          // memmap, and reshaping it into a square 2D array, that the first
+          // Nx numbers in the (1D) array are plotted along the bottom (with
+          // origin='bottom'), from left to right, the next Nx above that, etc.
+          // In our case, the horizontal axis is r_perp, so r_perp should vary
+          // the fastest. In ULONG ii above this is not the case.
+          // So, long story short: this tool does not produce the file I
+          // expect.
+          // Now that I know, I changed the loading function in Python to flip
+          // the axes once again, since I already produced all the 2Dcf files
+          // and don't want to do that again :)
+
+          real_prec rtot = sqrt(xpos*xpos + ypos*ypos + zpos*zpos);
+          rmode[ii] += rtot;
+          corr[ii] += signal[k+N3*(j+N2*i)];
+          nmode[ii] += 1;
+        }
+      }
+    }
+
+  // And finally normalize the shells by the volume of the shells
+  cout << "... finally, normalizing correlation function shells" << endl;
+#pragma omp parallel for
+  for (ULONG l = 0; l < N_bin_sq; ++l) {
+    if (nmode[l] > 0) {
+      rmode[l] /= static_cast<real_prec>(nmode[l]);
+      corr[l] /= static_cast<real_prec>(static_cast<real_prec>(nmode[l]) *
+                                        static_cast<real_prec>(N));
+    }
+  }
+}
+
+
+void load_arguments(int argc, char *argv[], string &fname_in, unsigned &N1,
+                    real_prec &L1, unsigned &N_bin, string &fname_out) {
+  stringstream N1_arg, L1_arg, N_bin_arg;
+
+  if (argc >= 5) {
+    fname_in = string(argv[1]);
+    N1_arg << argv[2];
+    N1_arg >> N1;
+    L1_arg << argv[3];
+    L1_arg >> L1;
+    N_bin_arg << argv[4];
+    N_bin_arg >> N_bin;
+
+    if (argc >= 6) {
+      fname_out = string(argv[5]);
+    } else {
+      fname_out = fname_in + string("_corr2D");
+    }
+  } else {
+    cerr << "Need 4 parameters (file in, N1, L1, N_bin)!" << endl
+         << "N.B.: filenames must be given without extension (which must be "
+            ".dat)." << endl
+         << "Set N_bin to 0 to automatically get the largest number of bins "
+            "without gaps." << endl
+         << "Optional: fname_out (default: fname_in+'_corr2D')." << endl;
+    exit(1);
+  }
+}
+
+int main(int argc, char *argv[]) {
+  cout << "Note: plane-parallel RSDs only in this version!" << endl;
+  bool planepar = true;
+
+  unsigned N1, N_bin;
+  real_prec L1;
+  string fname_in, fname_out;
+  load_arguments(argc, argv, fname_in, N1, L1, N_bin, fname_out);
+
+  if (N_bin > static_cast<unsigned int>(N1))
+    cout << "Warning: N_bin larger than N1 will cause gaps in the correlation "
+            "function!" << endl;
+
+  real_prec d1 = L1 / static_cast<real_prec>(N1);
+
+  // determine number of bins automatically
+  if (N_bin == 0) {
+    real_prec rmax = L1/2 * sqrt(3);
+    // real_prec dmin = min(d1, min(d2, d3));
+    real_prec dmin = d1;
+    N_bin = static_cast<unsigned>(ceil(rmax/dmin));
+    stringstream N_bin_ss;
+    N_bin_ss << N_bin;
+    fname_out += string("_Nbin") + N_bin_ss.str();
+  }
+
+  unsigned N_bin_sq = N_bin * N_bin;
+  ULONG N = N1*N1*N1;
+
+  fftw_array<real_prec> grid(N), rmode(N_bin_sq), corr(N_bin_sq);
+  fftw_array<ULONG> nmode(N_bin_sq);
+
+  // get data from input file
+  get_scalar(fname_in, grid, N1, N1, N1);
+
+  // do measurement
+  measure_corr2D_destructive(N1, N1, N1, L1, /*L1, L1,*/ d1, d1, d1, N_bin, grid, rmode, nmode,
+                 corr, planepar);
+
+  // output results
+  dump_scalar(rmode, N_bin_sq, 1, 1, fname_out + string("_r"));
+  dump_scalar(corr, N_bin_sq, 1, 1, fname_out + string("_eta"));
+
+  return(0);
+}
diff --git a/tools/2D_corr_fct_interp.cc b/tools/2D_corr_fct_interp.cc
new file mode 100644
index 0000000..6c91046
--- /dev/null
+++ b/tools/2D_corr_fct_interp.cc
@@ -0,0 +1,433 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#if defined(MULTITHREAD) | defined(MULTITHREAD_FFTW) | defined(MULTITHREAD_RNG)
+#include <omp.h>
+#endif  // openmp headers
+#include <cmath>
+#include <string>
+#include <sstream>
+#include <iostream>
+#include <algorithm>  // min
+
+#include "../struct_main.h"
+#include "../fftw_array.h"
+
+#include "../IOfunctionsGen.h"
+#include "../math_funcs.h"  // interpolate_CIC
+#include "../convenience.h"
+
+// using namespace std;  // forbidden by Google C++ Style Guide
+using std::cout;
+using std::cerr;
+using std::endl;
+using std::string;
+
+
+void interp_field(real_prec *input, unsigned N1, unsigned N2, unsigned N3, real_prec L1,
+                  real_prec L2, real_prec L3, unsigned N1_out, unsigned N2_out,
+                  unsigned N3_out, real_prec *output) {
+  real_prec dx = L1 / static_cast<real_prec>(N1);
+  real_prec dy = L2 / static_cast<real_prec>(N2);
+  real_prec dz = L3 / static_cast<real_prec>(N3);
+  real_prec dx_out = L1 / static_cast<real_prec>(N1_out);
+  real_prec dy_out = L2 / static_cast<real_prec>(N2_out);
+  real_prec dz_out = L3 / static_cast<real_prec>(N3_out);
+
+  #ifdef MULTITHREAD
+  #pragma omp parallel for
+  #endif  // MULTITHREAD
+  for (unsigned i = 0; i < N1_out; ++i)
+    for (unsigned j = 0; j < N2_out; ++j)
+      for (unsigned k = 0; k < N3_out; ++k) {
+        ULONG ix = k + N3_out*(j+N2_out*i);
+
+        // +0.5, assuming we associate grid values with values at center of grid
+        // cells
+        real_prec posx = dx_out * (0.5 + static_cast<real_prec>(i));
+        real_prec posy = dy_out * (0.5 + static_cast<real_prec>(j));
+        real_prec posz = dz_out * (0.5 + static_cast<real_prec>(k));
+
+        output[ix] = interpolate_CIC(N1, N2, N3, L1, L2, L3, dx, dy, dz,
+                                     posx, posy, posz, input);
+      }
+}
+
+
+real_prec pacman_center_on_origin(unsigned ix, unsigned Ni, real_prec di) {
+  if (ix <= Ni/2)
+    return di * static_cast<real_prec>(ix);
+  else
+    return -di * static_cast<real_prec>(Ni - ix);
+}
+
+
+// This version is destructive on signal!
+void measure_corr2D(unsigned N1, unsigned N2, unsigned N3, real_prec L1, /*real_prec L2,
+                    real_prec L3,*/ real_prec d1, real_prec d2, real_prec d3,
+                    ULONG N_bin, real_prec *signal, real_prec *rmode,
+                    ULONG *nmode, real_prec *corr, real_prec L_max,
+                    bool planepar = true) {
+  cout << "N.B.: function measure_corr2D in this program destroys the contents of signal! Use measure_corr2D from 2D_corr_fct.cc for non-destructive version." << endl;
+
+  ULONG N = N1 * N2 * N3;
+  ULONG Nhalf = (N1/2 + 1) * N2 * N3;
+  ULONG N_bin_sq = N_bin * N_bin;
+
+  cout << "... Fourier transforming signal" << endl;
+  fftw_array<complex_prec> Signal(Nhalf);
+  fftR2C(N1, N2, N3, signal, Signal);
+
+  // Compute the Power Spectrum : P(k)
+  cout << "... computing power spectrum" << endl;
+  absolute_squared_array(Signal, Signal, Nhalf);  // P(k) -> Signal (real part)
+
+  cout << "... computing correlation function" << endl;
+  fftC2R(N1, N2, N3, Signal, signal);  // 3D correlation function -> signal
+
+  // Initialize the arrays
+  fillZero(rmode, N_bin_sq);
+  fillZero(corr, N_bin_sq);
+  fillZero(nmode, N_bin_sq);
+
+  // measure the greatest |r| in the box
+  real_prec rmax = L1/2 * sqrt(3);
+  // For periodic box, there are no distances longer than sqrt(3)*L1/2.
+
+  // bin width in r-space
+  real_prec dr = rmax / static_cast<real_prec>(N_bin);
+
+  // Linearize the 3D corr function: sum over shells
+//  double progress = 0.0;
+  cout << "... linearizing the 3D correlation function; progress:" << endl;
+  #pragma omp parallel for
+  for (unsigned i = 0; i < N1; i++)
+    for (unsigned j = 0; j < N2; j++) {
+      // progress = static_cast<double>(N3*(j+N2*i))/static_cast<double>(N)*100.;
+      // cout << progress << " %  \r" << flush;
+      for (unsigned k = 0; k < N3; k++) {
+        real_prec xpos = pacman_center_on_origin(i, N1, d1);
+        real_prec ypos = pacman_center_on_origin(j, N2, d2);
+        real_prec zpos = pacman_center_on_origin(k, N3, d3);
+
+        real_prec rpar, rperp;
+
+        if (planepar == true) {
+          rpar  = sqrt(zpos*zpos);
+          rperp = sqrt(xpos*xpos + ypos*ypos);
+        } else {
+          // work in progress
+          throw std::runtime_error("non-plane-parallel option not yet implemented");
+        }
+
+        if (rpar < L_max && rperp < L_max) {
+          ULONG nbin_perp = static_cast<ULONG>(rperp/dr);
+          ULONG nbin_par  = static_cast<ULONG>(rpar/dr);
+
+          if (nbin_perp < N_bin && nbin_par < N_bin) {
+            // par on "x axis", perp on "y axis" (reverse of regular sigma-pi
+            // plots in literature!); par is LOS (RSD direction):
+            // ULONG ii = N_bin * nbin_par + nbin_perp;
+            // perp on "x axis", par on "y axis" (regular order in sigma-pi plots
+            // in literature):
+            ULONG ii = nbin_par + N_bin * nbin_perp;
+            // EGP, 29 July 2016:
+            // Right, this was wrong! I was using matplotlib.pcolormesh for
+            // plotting the image, but was using x and y the wrong way around
+            // (see http://matplotlib.org/examples/pylab_examples/pcolor_demo.html
+            // , there you see that pcolorshow has to use y, x = np.mgrid...).
+            // Because of this, the plot seemed right, but was actually wrong.
+            // With imshow the axes were again flipped, which they shouldn't be.
+            // The expected behaviour is that when loading an array with numpy
+            // memmap, and reshaping it into a square 2D array, that the first
+            // Nx numbers in the (1D) array are plotted along the bottom (with
+            // origin='bottom'), from left to right, the next Nx above that, etc.
+            // In our case, the horizontal axis is r_perp, so r_perp should vary
+            // the fastest. In ULONG ii above this is not the case.
+            // So, long story short: this tool does not produce the file I
+            // expect.
+            // Now that I know, I changed the loading function in Python to flip
+            // the axes once again, since I already produced all the 2Dcf files
+            // and don't want to do that again :)
+            real_prec rtot = sqrt(xpos*xpos + ypos*ypos + zpos*zpos);
+#pragma omp atomic
+            rmode[ii] += rtot;
+#pragma omp atomic
+            corr[ii] += signal[k+N3*(j+N2*i)];
+#pragma omp atomic
+            nmode[ii] += 1;
+          }
+        }
+      }
+    }
+
+  // And finally normalize the shells by the volume of the shells
+  cout << "... finally, normalizing correlation function shells" << endl;
+#pragma omp parallel for
+  for (ULONG l = 0; l < N_bin_sq; ++l) {
+    if (nmode[l] > 0) {
+      rmode[l] /= static_cast<real_prec>(nmode[l]);
+      corr[l] /= static_cast<real_prec>(static_cast<real_prec>(nmode[l]) *
+                                        static_cast<real_prec>(N));
+    }
+  }
+}
+
+
+void measure_corr2D_FFTzeropad(real_prec *signal, unsigned N1, unsigned N2, unsigned N3,
+                               unsigned N1_interp, unsigned N2_interp, unsigned N3_interp,
+                               real_prec L1, real_prec L2, real_prec L3,
+                               ULONG N_bin, real_prec *rmode, ULONG *nmode, real_prec *corr,
+                               bool planepar = true) {
+  ULONG N_interp = N1_interp * N2_interp * N3_interp;
+  unsigned N3half = N3/2 + 1;
+  ULONG Nhalf = (N1/2 + 1) * N2 * N3;
+  unsigned N3half_interp = N3_interp/2 + 1;
+  ULONG Nhalf_interp = (N1_interp/2 + 1) * N2_interp * N3_interp;
+  ULONG N_bin_sq = N_bin * N_bin;
+
+  real_prec d1_interp = L1 / static_cast<real_prec>(N1_interp);
+  real_prec d2_interp = L2 / static_cast<real_prec>(N2_interp);
+  real_prec d3_interp = L3 / static_cast<real_prec>(N3_interp);
+
+  cout << "... Fourier transforming signal" << endl;
+  fftw_array<complex_prec> Signal(Nhalf);
+  fftR2C(N1, N2, N3, signal, Signal);
+
+  // Compute the Power Spectrum : P(k)
+  cout << "... computing power spectrum" << endl;
+  absolute_squared_array(Signal, Signal, Nhalf);  // P(k) -> Signal (real part)
+
+  // transfer to larger, zeropadded array
+  cout << "... copying to larger, zero-padded Fourier space array" << endl;
+  fftw_array<complex_prec> Power_Interp(Nhalf_interp);
+  fillZero(Power_Interp, Nhalf_interp);
+  for (unsigned i = 0; i < N1; i++) {
+    unsigned I = i;
+    if (i >= N1/2) {
+      I = N1_interp - (N1 - i);
+    }
+    for (unsigned j = 0; j < N2; j++) {
+      unsigned J = j;
+      if (j >= N2/2) {
+        J = N2_interp - (N2 - j);
+      }
+      for (unsigned k = 0; k < N3half; ++k) {
+        ULONG ix_input = k + N3half * (j + N2 * i);
+        ULONG ix_interp = k + N3half_interp * (J + N2_interp * I);
+        re(Power_Interp[ix_interp]) = re(Signal[ix_input]);
+        im(Power_Interp[ix_interp]) = im(Signal[ix_input]);
+      }
+    }
+  }
+
+  cout << "... computing correlation function" << endl;
+  fftw_array<real_prec> power_interp(N_interp);
+  fftC2R(N1_interp, N3_interp, N3_interp, Power_Interp, power_interp);  // 3D correlation function
+
+  // Initialize the arrays
+  fillZero(rmode, N_bin_sq);
+  fillZero(corr, N_bin_sq);
+  fillZero(nmode, N_bin_sq);
+
+  // measure the greatest |r| in the box
+  real_prec rmax = L1/2 * sqrt(3);
+  // For periodic box, there are no distances longer than sqrt(3)*L1/2.
+
+  // bin width in r-space
+  real_prec dr = rmax / static_cast<real_prec>(N_bin);
+
+  // Linearize the 3D corr function: sum over shells
+//  double progress = 0.0;
+  cout << "... linearizing the 3D correlation function; progress:" << endl;
+#pragma omp parallel for
+  for (unsigned i = 0; i < N1_interp; i++)
+    for (unsigned j = 0; j < N2_interp; j++) {
+      // progress = static_cast<double>(N3*(j+N2*i))/static_cast<double>(N)*100.;
+      // cout << progress << " %  \r" << flush;
+      for (unsigned k = 0; k < N3_interp; k++) {
+        real_prec xpos = pacman_center_on_origin(i, N1_interp, d1_interp);
+        real_prec ypos = pacman_center_on_origin(j, N2_interp, d2_interp);
+        real_prec zpos = pacman_center_on_origin(k, N3_interp, d3_interp);
+
+        real_prec rpar, rperp;
+
+        if (planepar == true) {
+          rpar  = sqrt(zpos*zpos);
+          rperp = sqrt(xpos*xpos + ypos*ypos);
+        } else {
+          // work in progress
+          throw std::runtime_error("non-plane-parallel option not yet implemented");
+        }
+
+        ULONG nbin_perp = static_cast<ULONG>(rperp/dr);
+        ULONG nbin_par  = static_cast<ULONG>(rpar/dr);
+
+        if (nbin_perp < N_bin && nbin_par < N_bin) {
+          // par on "x axis", perp on "y axis" (reverse of regular sigma-pi
+          // plots in literature!); par is LOS (RSD direction):
+          // ULONG ii = N_bin * nbin_par + nbin_perp;
+          // perp on "x axis", par on "y axis" (regular order in sigma-pi plots
+          // in literature):
+          ULONG ii = nbin_par + N_bin * nbin_perp;
+          // EGP, 29 July 2016:
+          // Right, this was wrong! I was using matplotlib.pcolormesh for
+          // plotting the image, but was using x and y the wrong way around
+          // (see http://matplotlib.org/examples/pylab_examples/pcolor_demo.html
+          // , there you see that pcolorshow has to use y, x = np.mgrid...).
+          // Because of this, the plot seemed right, but was actually wrong.
+          // With imshow the axes were again flipped, which they shouldn't be.
+          // The expected behaviour is that when loading an array with numpy
+          // memmap, and reshaping it into a square 2D array, that the first
+          // Nx numbers in the (1D) array are plotted along the bottom (with
+          // origin='bottom'), from left to right, the next Nx above that, etc.
+          // In our case, the horizontal axis is r_perp, so r_perp should vary
+          // the fastest. In ULONG ii above this is not the case.
+          // So, long story short: this tool does not produce the file I
+          // expect.
+          // Now that I know, I changed the loading function in Python to flip
+          // the axes once again, since I already produced all the 2Dcf files
+          // and don't want to do that again :)
+          real_prec rtot = sqrt(xpos*xpos + ypos*ypos + zpos*zpos);
+#pragma omp atomic
+          rmode[ii] += rtot;
+#pragma omp atomic
+          corr[ii] += power_interp[k+N3_interp*(j+N2_interp*i)];
+#pragma omp atomic
+          nmode[ii] += 1;
+        }
+      }
+    }
+
+  // And finally normalize the shells by the volume of the shells
+  cout << "... finally, normalizing correlation function shells" << endl;
+#pragma omp parallel for
+  for (ULONG l = 0; l < N_bin_sq; ++l) {
+    if (nmode[l] > 0) {
+      rmode[l] /= static_cast<real_prec>(nmode[l]);
+      corr[l] /= static_cast<real_prec>(static_cast<real_prec>(nmode[l]) *
+                                        static_cast<real_prec>(N_interp));
+    }
+  }
+}
+
+
+void load_arguments(int argc, char *argv[], string &fname_in, unsigned &N1,
+                    real_prec &L1, unsigned &N1_interp, unsigned &N_bin,
+                    unsigned &interp_mode, real_prec &L_max, string &fname_out) {
+  stringstream N1_arg, L1_arg, N1_interp_arg, N_bin_arg, interp_mode_arg, L_max_arg;
+
+  if (argc >= 6) {
+    fname_in = string(argv[1]);
+    N1_arg << argv[2];
+    N1_arg >> N1;
+    L1_arg << argv[3];
+    L1_arg >> L1;
+    N1_interp_arg << argv[4];
+    N1_interp_arg >> N1_interp;
+    N_bin_arg << argv[5];
+    N_bin_arg >> N_bin;
+    interp_mode_arg << argv[6];
+    interp_mode_arg >> interp_mode;
+    L_max_arg << argv[7];
+    L_max_arg >> L_max;
+
+    if (argc >= 9) {
+      fname_out = string(argv[8]);
+    } else {
+      fname_out = fname_in + string("_interpCIC") + N1_interp_arg.str() + string("_corr2D");
+    }
+  } else {
+    std::cerr  << "Need 7 parameters (file in, N1, L1, N1_interp, N_bin, interp_mode, L_max)!\n"
+               << "N.B.: filenames must be given without extension (which must be .dat).\n"
+               << "- Set N_bin to 0 to automatically get the largest number of bins without gaps.\n"
+               << "- Interp_mode can be 0 (CIC) or 1 (Fourier zero padding).\n"
+               << "- Optional: fname_out (default: fname_in+'_interpCIC[N1_interp]_corr2D')."
+               << std::endl;
+    exit(1);
+  }
+}
+
+
+int main(int argc, char *argv[]) {
+#ifdef MULTITHREAD_FFTW
+  fftw_init_threads();
+  fftw_plan_with_nthreads(omp_get_max_threads());
+  printf("Compiled with MULTITHREAD_FFTW support, with %dthreads\n",
+         omp_get_max_threads());
+#endif
+
+  cout << "Note: plane-parallel RSDs only in this version!" << endl;
+  bool planepar = true;
+
+  unsigned N1, N1_interp, N_bin, interp_mode;
+  real_prec L1, L_max;
+  string fname_in, fname_out;
+  load_arguments(argc, argv, fname_in, N1, L1, N1_interp, N_bin, interp_mode, L_max, fname_out);
+
+  ULONG N = N1*N1*N1;
+  ULONG N_interp = N1_interp*N1_interp*N1_interp;
+
+  fftw_array<real_prec> input(N);
+
+  // get data from input file
+  get_scalar(fname_in, input, N1, N1, N1);
+
+  // corr2D preparations
+  if (N_bin > N1_interp)
+    cout << "Warning: N_bin larger than N1_interp will cause gaps in the correlation "
+            "function!" << endl;
+
+  real_prec d1_interp = L1 / static_cast<real_prec>(N1_interp);
+
+  // determine number of bins automatically
+  if (N_bin == 0) {
+    real_prec rmax = L1/2 * sqrt(3);
+    // real_prec dmin = min(d1, min(d2, d3));
+    real_prec dmin = d1_interp;
+    N_bin = static_cast<unsigned>(ceil(rmax/dmin));
+    stringstream N_bin_ss;
+    N_bin_ss << N_bin;
+    fname_out += string("_Nbin") + N_bin_ss.str();
+  }
+
+  unsigned N_bin_sq = N_bin * N_bin;
+
+  fftw_array<real_prec> rmode(N_bin_sq), corr(N_bin_sq);
+  fftw_array<ULONG> nmode(N_bin_sq);
+
+  // interpolate
+  switch (interp_mode) {
+    case 0: {
+      cout << "... interpolate" << endl;
+      fftw_array<real_prec> grid(N_interp);
+      interp_field(input, N1, N1, N1, L1, L1, L1, N1_interp, N1_interp, N1_interp, grid);
+      cout << "... do measurement" << endl;
+      measure_corr2D(N1_interp, N1_interp, N1_interp, L1, /*L1, L1,*/ d1_interp, d1_interp, d1_interp, N_bin, grid, rmode, nmode,
+                     corr, L_max, planepar);
+      break;
+    }
+
+    case 1: {
+      measure_corr2D_FFTzeropad(input, N1, N1, N1,
+          N1_interp, N1_interp, N1_interp,
+          L1, L1, L1,
+          N_bin, rmode, nmode, corr, planepar);
+      break;
+    }
+  }
+
+  // output results
+  dump_scalar(rmode, N_bin_sq, 1, 1, fname_out + string("_r"));
+  dump_scalar(corr, N_bin_sq, 1, 1, fname_out + string("_eta"));
+
+#ifdef MULTITHREAD_FFTW
+  fftw_cleanup_threads();
+#endif
+}
diff --git a/tools/2D_powspec.cc b/tools/2D_powspec.cc
new file mode 100644
index 0000000..0283f51
--- /dev/null
+++ b/tools/2D_powspec.cc
@@ -0,0 +1,165 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#include "../define_opt.h"
+#include "../struct_main.h"
+#include "../fftw_array.h"
+#include <cmath>
+#include <sstream>
+#include "../IOfunctionsGen.h"
+#include "../math_funcs.h"
+
+#include "../convenience.h"
+
+using namespace std;
+
+// N.B.: planepar = false is not yet properly implemented! Only use planepar = true!
+// This means that the redshift distortions must be applied in the z-direction.
+void measure_spec2D(unsigned N1, unsigned N2, unsigned N3,
+                    real_prec L1, real_prec L2, real_prec L3,
+                    real_prec *signal, real_prec *kmode, real_prec *power,
+                    ULONG N_bin, bool planepar = true) {
+  ULONG N = N1*N2*N3;
+  ULONG N_bin_sq = N_bin * N_bin;
+
+  real_prec NORM = L1*L2*L3 / static_cast<real_prec>(4.*M_PI) / static_cast<real_prec>(N*N);
+  //
+  // TODO: CHECK IF THIS NORMALIZATION IS RIGHT!!
+  //
+
+  fftw_array<complex_prec> Signal(N);
+  FFT3dR2C(N1, N2, N3, signal, Signal);
+
+  // measure the greatest |k| in the box
+  real_prec kmax = sqrt(k_squared(N1/2, N2/2, N3/2, L1, L2, L3, N1, N2, N3));
+  // bin width in k-space
+  real_prec dk = kmax / real_prec(N_bin-1);  
+
+  //// Compute the Power Spectrum : P(k)
+
+  // Initialize the arrays
+  fillZero(power, N_bin_sq);
+  fillZero(kmode, N_bin_sq);
+  fftw_array<ULONG> nmode(N_bin_sq);
+  fillZero(nmode, N_bin_sq);
+
+  for(unsigned i = 0; i < N1; i++)
+    for(unsigned j=0;j<N2;j++)
+      for(unsigned k=0;k<N3;k++)
+      {           
+        real_prec ktot = sqrt(k_squared(i, j, k, L1, L2, L3, N1, N2, N3));
+        real_prec kx = calc_kx(i, L1, N1);
+        real_prec ky = calc_kz(j, L2, N2);
+        real_prec kz = calc_kz(k, L3, N3);
+
+        real_prec kperp, kpar;
+
+        if (planepar == true)
+        {
+          kpar  = sqrt(kz*kz);
+          kperp = sqrt(kx*kx+ky*ky);
+        }
+        else
+        {
+          throw std::runtime_error("non-plane-parallel option not yet implemented");
+          //real_prec pos1[3],pos2[3];
+
+          //work in progress...!!!
+          //pos1[0]=kx;
+          //pos1[1]=ky;
+          //pos1[2]=kz;
+
+          //pos2[0]=kx;
+          //pos2[1]=ky;
+          //pos2[2]=kz;
+
+          // EGP: these two were already commented out, the others above I commented out
+          //kpar =calc_rpar (pos1,pos2);
+          //kperp=calc_rperp(pos1,pos2);
+        }         
+
+        ULONG nbin_perp = static_cast<ULONG>(kperp/dk);
+        ULONG nbin_par  = static_cast<ULONG>(kpar/dk);
+
+        if (nbin_perp < N_bin && nbin_par < N_bin )
+        {
+          //ULONG ii = N_bin * nbin_par + nbin_perp; // => par on "x axis", perp on "y axis". See 2D_corr_fct for why this is wrong (sigma-pi plots).
+          ULONG ii = nbin_par + N_bin * nbin_perp; // => perp on "x axis", par on "y axis"
+          kmode[ii] += 1 * ktot;
+          power[ii] += absolute_squared( Signal[k + N3*(j + N2*i)] );
+          nmode[ii] += 1;
+        }
+      }
+
+#pragma omp parallel for
+  for (ULONG l = 0; l < N_bin_sq; ++l)
+  {
+    if (nmode[l] > 0)
+    {     
+      kmode[l] =        kmode[l] / static_cast<real_prec>(nmode[l]);
+      power[l] = NORM * power[l] / static_cast<real_prec>(nmode[l]);
+    }
+  }
+}
+
+
+void load_arguments(int argc, char *argv[], string &fname_in, unsigned &N1, real_prec &L1, unsigned &N_bin, string &fname_out)
+{
+  stringstream N1_arg, L1_arg, N_bin_arg;
+
+  if (argc >= 5)
+  {
+    fname_in = string(argv[1]);
+    N1_arg << argv[2];
+    N1_arg >> N1;
+    L1_arg << argv[3];
+    L1_arg >> L1;
+    N_bin_arg << argv[4];
+    N_bin_arg >> N_bin;
+
+    if (argc >= 6)
+      fname_out = string(argv[5]);
+    else
+      fname_out = fname_in + string("_pow2D");
+  }
+  else
+  {
+    cerr << "Need 4 parameters (file in, N1, L1, N_bin)! N.B.: filenames must be given without extension (which must be .dat). Optional: fname_out (default: fname_in+'_pow2D')." << endl;
+    exit(1);
+  }
+}
+
+int main(int argc, char *argv[])
+{
+  cout << "Note: plane-parallel RSDs only in this version!" << endl;
+  bool planepar = true;
+
+  unsigned N1, N_bin;
+  real_prec L1;
+  string fname_in, fname_out;
+  load_arguments(argc, argv, fname_in, N1, L1, N_bin, fname_out);
+
+  ULONG N = N1*N1*N1;
+  unsigned N_bin_sq = N_bin * N_bin;
+
+  fftw_array<real_prec> power2D(N_bin_sq), kmode2D(N_bin_sq);
+  fftw_array<real_prec> grid(N);
+
+  // get data from input file
+  get_scalar(fname_in, grid, N1, N1, N1);
+
+  // do measurement
+  measure_spec2D(N1, N1, N1, L1, L1, L1, grid, kmode2D, power2D, N_bin, planepar);
+
+  // output results
+  dump_scalar(kmode2D, N_bin_sq, 1, 1, fname_out + string("_k"));
+  dump_scalar(power2D, N_bin_sq, 1, 1, fname_out + string("_P"));
+
+  return(0);
+}
diff --git a/tools/LAG2EULer.cc b/tools/LAG2EULer.cc
new file mode 100644
index 0000000..f8d7af0
--- /dev/null
+++ b/tools/LAG2EULer.cc
@@ -0,0 +1,91 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#include "../define_opt.h"
+#include "../struct_main.h"
+#include "../fftw_array.h"
+#include <sstream>
+#include "../math_funcs.h" // create_GARFIELD
+#include "../IOfunctionsGen.h"
+#include "../Lag2Eul.h"
+#include "../init_par.h" // INIT_COSMOLOGY
+
+using namespace std;
+
+void load_arguments(int argc, char *argv[], string &fname_in, unsigned &N1, real_prec &L1, string &fname_out, real_prec &ascale)
+{
+  stringstream N1_arg, L1_arg, ascale_arg;
+
+  if (argc >= 5)
+  {
+    fname_in = string(argv[1]);
+    N1_arg << argv[2];
+    N1_arg >> N1;
+    L1_arg << argv[3];
+    L1_arg >> L1;
+    fname_out = string(argv[4]);
+    if (argc >= 6)
+    {
+      ascale_arg << argv[5];
+      ascale_arg >> ascale;
+    }
+    else
+    {
+      ascale = 1.;
+    }
+  }
+  else
+  {
+    cerr << "Need 4 parameters (file in, N1, L1, file out)! N.B.: filenames must be given without extension (which must be .dat). Optional: a_scale (default 1)." << endl;
+    exit(1);
+  }
+}
+
+int main(int argc, char *argv[])
+{
+  unsigned N1;
+  real_prec L1, ascale;
+  string fname_in, fname_out;
+  load_arguments(argc, argv, fname_in, N1, L1, fname_out, ascale);
+ 
+  ULONG N = N1*N1*N1;
+
+  real_prec d1 = L1/static_cast<real_prec>(N1);
+  real_prec min1 = 0.;
+
+  fftw_array<real_prec> delta_lag(N), delta_eul(N), posx(N), posy(N), posz(N);
+
+  // load input file
+  get_scalar(fname_in, delta_lag, N1, N1, N1);
+
+  struct COSMOLOGY *c = new COSMOLOGY;
+  c->ascale = ascale;
+  INIT_COSMOLOGY(c, string("LAG2EULer"));
+
+  // other
+  int mk = 1; // CIC -> don't need kernel_scale
+  real_prec kth = 4.; // whatever
+
+  // Lag2Eul
+  unsigned facL = 1;
+  bool reggrid = true;
+  gsl_rng *gBaseRand = nullptr; // rng -> not necessary
+  real_prec kernel_scale = 0.; // CIC -> don't need kernel_scale
+
+  int sfmodel = 1;
+  std::cout << "N.B.: sfmodel is fixed to 1 (Zel'dovich)!" << std::endl;
+
+  Lag2Eul(delta_lag, delta_eul, posx, posy, posz, N1, N1, N1, L1, L1, L1, d1, d1, d1, min1, min1, min1, c->D1, c->D2,
+          c->ascale, c->omega_m, c->omega_q, sfmodel, mk, kth, facL,
+          reggrid, gBaseRand, "", kernel_scale, nullptr, nullptr);
+
+  quick_dump_scalar(delta_eul, N1, fname_out, 0, false);
+
+  return(0);
+}
diff --git a/tools/corr_fct.cc b/tools/corr_fct.cc
new file mode 100644
index 0000000..65cd4a0
--- /dev/null
+++ b/tools/corr_fct.cc
@@ -0,0 +1,134 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#include "../struct_main.h"
+#include "../fftw_array.h"
+#include <cmath>
+
+#include "../convenience.h"
+
+#include "../IOfunctionsGen.h"
+
+using namespace std;
+
+real_prec pacman_center_on_origin(unsigned ix, unsigned Ni, real_prec di)
+{
+  if (ix <= Ni/2)
+    return di * static_cast<real_prec>(ix);
+  else
+    return -di * static_cast<real_prec>(Ni - ix);
+}
+
+void measure_corr_grid(unsigned N1, unsigned N2, unsigned N3, real_prec L1, /*real_prec L2, real_prec L3,*/ real_prec d1, real_prec d2, real_prec d3, ULONG N_bin, real_prec *signal, real_prec *rmode, ULONG *nmode, real_prec *corr)
+{
+  ULONG N = N1 * N2 * N3;
+  fftw_array<complex_prec> Signal(N);
+  FFT3dR2C(N1, N2, N3, signal, Signal);
+
+  // measure the greatest |r| in the box
+  real_prec rmax = L1/2 * sqrt(3);
+  // For periodic box, there are no distances longer than sqrt(3)*L1/2.
+
+  // bin width in r-space
+  real_prec dr = rmax / static_cast<real_prec>(N_bin);
+
+  // Compute the Power Spectrum : P(k) 
+  fftw_array<real_prec> dummy(N);
+  absolute_squared_array(Signal, dummy, N); // P(k) -> dummy
+
+  fftw_array<complex_prec> AUX(N);
+  complexify_array(dummy, AUX, N); // P(k) -> AUX
+
+  FFT3dC2R (N1, N2, N3, AUX, dummy); // 3D correlation function -> dummy
+
+  // Initialize the arrays
+  fillZero(rmode, N_bin);
+  fillZero(corr, N_bin);
+  fillZero(nmode, N_bin);
+
+  // Linearize the 3D corr function: sum over shells
+  for(unsigned i=0;i<N1;i++)
+    for(unsigned j=0;j<N2;j++)
+      for(unsigned k=0;k<N3;k++)
+      { 
+        real_prec xpos = pacman_center_on_origin(i, N1, d1);
+        real_prec ypos = pacman_center_on_origin(j, N2, d2);
+        real_prec zpos = pacman_center_on_origin(k, N3, d3);
+
+        real_prec rtot = sqrt(xpos*xpos + ypos*ypos + zpos*zpos);
+
+        ULONG nbin = static_cast<ULONG>(rtot/dr);
+
+        //if (rtot <= rmax) // this condition is useless, it will never happen with the given rmax
+        //{
+          rmode[nbin] += rtot;
+          corr[nbin] += dummy[k+N3*(j+N2*i)];
+          nmode[nbin] += 1;
+        //}
+        //else
+          //cout << "rtot " << rtot << " is groter dan rmax " << rmax << "! Verhip!" << endl;
+      }
+
+  // And finally normalize the shells by the volume of the shells
+#pragma omp parallel for
+  for (ULONG l = 0; l < N_bin; ++l)
+  {
+    if(nmode[l] > 0)
+    {     
+      rmode[l] /= static_cast<real_prec>(nmode[l]);
+      corr[l] /= static_cast<real_prec>(static_cast<real_prec>(nmode[l]) * static_cast<real_prec>(N));
+    }
+  }
+}
+
+int main(/*int argc, char *argv[]*/)
+{
+  //int N1 = 64;
+  unsigned N1 = 256;
+  real_prec L1 = 1000.;
+  unsigned N_bin = 200;
+
+  ULONG N = N1*N1*N1;
+
+  real_prec d1 = L1 / static_cast<real_prec>(N1);
+
+  fftw_array<real_prec> grid(N), rmode(N_bin), corr(N_bin);
+  fftw_array<ULONG> nmode(N_bin);
+
+  // First the old regular field (or the 64 cubed one)
+  //get_scalar(string("/Users/patrick/Downloads/MD_clusters_Mvir_weighted_histogram"), grid, N1, N1, N1);
+  get_scalar(string("/Users/patrick/Downloads/MD_clusters_Mvir_weighted_delta"), grid, N1, N1, N1);
+
+  measure_corr_grid(N1, N1, N1, L1, /*L1, L1,*/ d1, d1, d1, N_bin, grid, rmode, nmode, corr);
+
+  //dump_scalar(rmode, static_cast<int>(N_bin), 1, 1, L1, 0., 0., 0, string("corr_fct_data/corr_r_64_NGP"));
+  //dump_scalar(corr, static_cast<int>(N_bin), 1, 1, L1, 0., 0., 0, string("corr_fct_data/corr_eta_64_NGP"));
+  dump_scalar(rmode, N_bin, 1, 1, string("corr_fct_data/corr_r_NGP"));
+  dump_scalar(corr, N_bin, 1, 1, string("corr_fct_data/corr_eta_NGP"));
+
+  // Then the new regular field (with CIC instead of NGP)
+  string dir("/Users/patrick/barcode/testcodes/MD_cluster_density_data/");
+
+  get_scalar(dir + string("delta_regCIC"), grid, N1, N1, N1);
+
+  measure_corr_grid(N1, N1, N1, L1,/* L1, L1,*/ d1, d1, d1, N_bin, grid, rmode, nmode, corr);
+
+  dump_scalar(rmode, N_bin, 1, 1, string("corr_fct_data/corr_r_CIC"));
+  dump_scalar(corr, N_bin, 1, 1, string("corr_fct_data/corr_eta_CIC"));
+
+  // Then the redshift-distorted field
+  get_scalar(dir + string("delta_rsdCIC"), grid, N1, N1, N1);
+
+  measure_corr_grid(N1, N1, N1, L1, /*L1, L1,*/ d1, d1, d1, N_bin, grid, rmode, nmode, corr);
+
+  dump_scalar(rmode, N_bin, 1, 1, string("corr_fct_data/corr_r_rsd_CIC"));
+  dump_scalar(corr, N_bin, 1, 1, string("corr_fct_data/corr_eta_rsd_CIC"));
+
+  return(0);
+}
diff --git a/tools/density.cc b/tools/density.cc
new file mode 100644
index 0000000..727c825
--- /dev/null
+++ b/tools/density.cc
@@ -0,0 +1,87 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#include <string>  // string
+#include <sstream>  // stringstream
+
+#include "../define_opt.h"  // real_prec, ULONG
+#include "../fftw_array.h"  // fftw_array
+#include "../IOfunctionsGen.h"  // get_scalar, dump_scalar
+#include "../massFunctions.h"  // getDensity_SPH
+#include "../convenience.h" // fill_one
+
+using std::cout;
+using std::cerr;
+using std::endl;
+using std::string;
+
+void load_arguments(int argc, char *argv[], string &fname_inx,
+                    string &fname_iny, string &fname_inz, unsigned &N1,
+                    real_prec &L1, ULONG &N_part, string &fname_out) {
+  stringstream N1_arg, L1_arg, N_part_arg;
+
+  int N_required = 6;
+
+  if (argc - 1 >= N_required) {
+    fname_inx = string(argv[1]);
+    fname_iny = string(argv[2]);
+    fname_inz = string(argv[3]);
+    N1_arg << argv[4];
+    N1_arg >> N1;
+    L1_arg << argv[5];
+    L1_arg >> L1;
+    N_part_arg << argv[6];
+    N_part_arg >> N_part;
+
+    if (argc - 1 >= N_required + 1)
+      fname_out = string(argv[7]);
+    else
+      fname_out = string("density");
+  } else {
+    cerr << "Need " << N_required << " parameters (input files x, y, z, N1, L1, N_part)!" << endl
+         << "N.B.: filenames must be given without extension (which must be"
+            " .dat)." << endl
+         << "Optional: fname_out (default: 'density')." << endl;
+    exit(1);
+  }
+}
+
+int main(int argc, char *argv[]) {
+  unsigned N1;
+  ULONG N_part;
+  real_prec L1;
+  string fn_inx, fn_iny, fn_inz, fn_out;
+  load_arguments(argc, argv, fn_inx, fn_iny, fn_inz, N1, L1, N_part, fn_out);
+
+  real_prec d1 = L1/static_cast<real_prec>(N1);
+
+  ULONG N = N1*N1*N1;
+
+  fftw_array<real_prec> density(N), x(N_part), y(N_part), z(N_part);
+
+  // get data from input file
+  read_array(fn_inx, x, N_part);
+  read_array(fn_iny, y, N_part);
+  read_array(fn_inz, z, N_part);
+
+  // do density estimation
+  real_prec min1 = 0.;
+  fftw_array<real_prec> particle_mass(N_part);
+  fill_one(particle_mass, N_part);
+  bool weightmass = true;
+  real_prec kernel_scale = d1;  // default in barcode
+
+  getDensity_SPH(N1, N1, N1, L1, L1, L1, d1, d1, d1, min1, min1, min1, x, y, z,
+                 particle_mass, N_part, density, weightmass, kernel_scale);
+
+  // output results
+  quick_dump_scalar(density, N1, fn_out, 0, false);
+
+  return(0);
+}
diff --git a/tools/interp_upres.cc b/tools/interp_upres.cc
new file mode 100644
index 0000000..54c36a2
--- /dev/null
+++ b/tools/interp_upres.cc
@@ -0,0 +1,112 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#include <cmath>
+#include <string>
+#include <sstream>
+#include <vector>
+
+#include "../define_opt.h"
+#include "../struct_main.h"
+#include "../fftw_array.h"
+#include "../IOfunctionsGen.h"
+#include "../math_funcs.h"  // interpolate_CIC
+
+// using namespace std;  // forbidden by Google C++ Style Guide
+using std::cout;
+using std::cerr;
+using std::endl;
+using std::string;
+using std::vector;
+
+void load_arguments(int argc, char *argv[], string &fname_in, unsigned &N1,
+                    real_prec &L1, unsigned &N1_out, string &fname_out) {
+  stringstream N1_arg, L1_arg, N1_out_arg, Nbar_arg, seed_arg;
+
+  if (argc >= 5) {
+    fname_in = string(argv[1]);
+    N1_arg << argv[2];
+    N1_arg >> N1;
+    L1_arg << argv[3];
+    L1_arg >> L1;
+    N1_out_arg << argv[4];
+    N1_out_arg >> N1_out;
+
+    if (argc >= 6) {
+      fname_out = string(argv[5]);
+    } else {
+      fname_out = fname_in + string("_interpCIC") + N1_out_arg.str();
+    }
+
+    cout << "will output to file: " << fname_out << endl;
+  } else {
+    cerr << "Need 4 parameters (file in, N1, L1, N1_out)! " << endl
+         << "N.B.: filenames must be given without extension (which must be "
+            ".dat)." << endl
+         << "Optional: fname_out (default: fname_in+'_interpCIC[N1_out]')."
+         << endl;
+    exit(1);
+  }
+}
+
+void interp_field(real_prec *input, unsigned N1, unsigned N2, unsigned N3, real_prec L1,
+                  real_prec L2, real_prec L3, unsigned N1_out, unsigned N2_out,
+                  unsigned N3_out, real_prec *output) {
+  real_prec dx = L1 / static_cast<real_prec>(N1);
+  real_prec dy = L2 / static_cast<real_prec>(N2);
+  real_prec dz = L3 / static_cast<real_prec>(N3);
+  real_prec dx_out = L1 / static_cast<real_prec>(N1_out);
+  real_prec dy_out = L2 / static_cast<real_prec>(N2_out);
+  real_prec dz_out = L3 / static_cast<real_prec>(N3_out);
+
+  #ifdef MULTITHREAD
+  #pragma omp parallel for
+  #endif  // MULTITHREAD
+  for (unsigned i = 0; i < N1_out; ++i)
+    for (unsigned j = 0; j < N2_out; ++j)
+      for (unsigned k = 0; k < N3_out; ++k) {
+        ULONG ix = k + N3_out*(j+N2_out*i);
+
+        // +0.5, assuming we associate grid values with values at center of grid
+        // cells
+        real_prec posx = dx_out * (0.5 + static_cast<real_prec>(i));
+        real_prec posy = dy_out * (0.5 + static_cast<real_prec>(j));
+        real_prec posz = dz_out * (0.5 + static_cast<real_prec>(k));
+
+        output[ix] = interpolate_CIC(N1, N2, N3, L1, L2, L3, dx, dy, dz,
+                                     posx, posy, posz, input);
+      }
+}
+
+
+int main(int argc, char *argv[]) {
+  unsigned N1, N1_out;
+  real_prec L1;
+  string fname_in, fname_out;
+  load_arguments(argc, argv, fname_in, N1, L1, N1_out, fname_out);
+
+  ULONG N = N1*N1*N1;
+  ULONG N_out = N1_out*N1_out*N1_out;
+
+  fftw_array<real_prec> input(N);
+
+  // get data from input file
+  get_scalar(fname_in, input, N1, N1, N1);
+
+  // interpolate
+  cout << "... interpolate" << endl;
+  fftw_array<real_prec> result(N_out);
+  interp_field(input, N1, N1, N1, L1, L1, L1, N1_out, N1_out, N1_out, result);
+
+  // output results
+  quick_dump_scalar(result, N1_out, fname_out, 0, false);
+
+  return(0);
+}
+
diff --git a/tools/poisson_upres.cc b/tools/poisson_upres.cc
new file mode 100644
index 0000000..0f1aa36
--- /dev/null
+++ b/tools/poisson_upres.cc
@@ -0,0 +1,156 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#include "../define_opt.h"
+#include "../struct_main.h"
+#include "../fftw_array.h"
+#include <cmath>
+#include <sstream>
+#include "../IOfunctionsGen.h"
+#include "../math_funcs.h"
+
+#include "../massFunctions.h" // getDensity
+
+using namespace std;
+
+vector< vector<real_prec> > discrete_poisson_sample(real_prec *lambda, unsigned N1, unsigned N2, unsigned N3, real_prec L1, real_prec L2, real_prec L3, gsl_rng *gBaseRand)
+{
+  real_prec d1 = L1/static_cast<real_prec>(N1);
+  real_prec d2 = L2/static_cast<real_prec>(N2);
+  real_prec d3 = L3/static_cast<real_prec>(N3);
+
+  ULONG N = static_cast<ULONG>(N1) * static_cast<ULONG>(N2) * static_cast<ULONG>(N3);
+
+  fftw_array<ULONG> number(N);
+  ULONG Npart = 0;
+
+  cout << "... drawing particle numbers per cell" << endl;
+  for (ULONG i = 0; i < N; ++i)
+  {
+    number[i] = static_cast<ULONG>( floor(static_cast<real_prec>( gsl_ran_poisson(gBaseRand, lambda[i]) )) );
+    Npart += number[i];
+  }
+
+  vector< vector<real_prec> > pos_pois(Npart, vector<real_prec>(3));
+  ULONG part_ix = 0;
+
+  cout << "... creating particle positions" << endl;
+  for (unsigned i=0;i<N1;i++)
+    for (unsigned j=0;j<N2;j++)
+      for (unsigned k=0;k<N3;k++)
+      {
+        ULONG lc=k+N3*(j+N2*i);                     
+        for(ULONG kk=0; kk < number[lc]; kk++)
+        {
+          real_prec rx = static_cast<real_prec>(gsl_rng_uniform(gBaseRand)) * d1;
+          real_prec ry = static_cast<real_prec>(gsl_rng_uniform(gBaseRand)) * d2;
+          real_prec rz = static_cast<real_prec>(gsl_rng_uniform(gBaseRand)) * d3;
+
+          pos_pois[part_ix][0] = d1 * (static_cast<real_prec>(i)) + rx;
+          pos_pois[part_ix][1] = d2 * (static_cast<real_prec>(j)) + ry;
+          pos_pois[part_ix][2] = d3 * (static_cast<real_prec>(k)) + rz;
+
+          ++part_ix;
+        }
+      }
+
+  return (pos_pois);
+}
+
+void load_arguments(int argc, char *argv[], string &fname_in, unsigned &N1, real_prec &L1, unsigned &N1_out, real_prec &Nbar, ULONG &seed, string &fname_out)
+{
+  stringstream N1_arg, L1_arg, N1_out_arg, Nbar_arg, seed_arg;
+
+  if (argc >= 7)
+  {
+    fname_in = string(argv[1]);
+    N1_arg << argv[2];
+    N1_arg >> N1;
+    L1_arg << argv[3];
+    L1_arg >> L1;
+    N1_out_arg << argv[4];
+    N1_out_arg >> N1_out;
+    Nbar_arg << argv[5];
+    Nbar_arg >> Nbar;
+    seed_arg << argv[6];
+    seed_arg >> seed;
+
+    if (argc >= 8)
+      fname_out = string(argv[7]);
+    else
+      fname_out = fname_in + string("_poisCIC") + N1_out_arg.str() + string("_Nbar") + Nbar_arg.str();
+
+    cout << "will output to file: " << fname_out << endl;
+  }
+  else
+  {
+    cerr << "Need 6 parameters (file in, N1, L1, N1_out, Nbar, seed)! " << endl
+         << "N.B.: filenames must be given without extension (which must be "
+            ".dat)." << endl
+         << "Optional: fname_out (default: "
+            "fname_in+'_poisCIC[N1_out]_Nbar[Nbar]')." << endl << endl
+         << "Nbar is the number of poisson particles per cell. For a decent "
+            "representation of the previous density field, an Nbar of "
+            "8^log2(N1_out/N1) seems to work well."
+         << endl;
+    exit(1);
+  }
+}
+
+int main(int argc, char *argv[])
+{
+  unsigned N1, N1_out;
+  ULONG seed;
+  real_prec L1, Nbar;
+  string fname_in, fname_out;
+  load_arguments(argc, argv, fname_in, N1, L1, N1_out, Nbar, seed, fname_out);
+
+  ULONG N = N1*N1*N1;
+  ULONG N_out = N1_out*N1_out*N1_out;
+
+  //real_prec d1 = L1 / static_cast<real_prec>(N1);
+  real_prec d1_out = L1 / static_cast<real_prec>(N1_out);
+
+  fftw_array<real_prec> input(N);
+
+  // get data from input file
+  get_scalar(fname_in, input, N1, N1, N1);
+  // convert input from delta to lambda
+  for (ULONG i = 0; i < N; ++i)
+    input[i] = Nbar * (1 + input[i]);
+
+  // initialize RNG
+  gsl_rng *gBaseRand;
+  gBaseRand = gsl_rng_alloc(gsl_rng_mt19937);
+  gsl_rng_set (gBaseRand, seed);
+
+  // do poisson sample; get positions
+  vector< vector<real_prec> > pos_pois = discrete_poisson_sample(input, N1, N1, N1, L1, L1, L1, gBaseRand);
+  // convert to usable format
+  ULONG Npart = pos_pois.size();
+  fftw_array<real_prec> posx(Npart), posy(Npart), posz(Npart), pmass(Npart);
+  for (ULONG i = 0; i < Npart; ++i)
+  {
+    posx[i] = pos_pois[i][0];
+    posy[i] = pos_pois[i][1];
+    posz[i] = pos_pois[i][2];
+    //pmass[i] = 1.; // particle mass (not really useful, but necessary for getDensity)
+  }
+
+  // calculate new density (CIC))
+  cout << "... calculate new density from particle sample" << endl;
+  fftw_array<real_prec> output(N_out);
+  bool pmass_used = false;
+  getDensity_CIC(N1_out, N1_out, N1_out, L1, L1, L1, d1_out, d1_out, d1_out, 0, 0, 0, posx, posy, posz, pmass, Npart, output, pmass_used);
+
+  // output results
+  quick_dump_scalar(output, N1_out, fname_out, 0, false);
+
+  return(0);
+}
diff --git a/tools/powspec.cc b/tools/powspec.cc
new file mode 100644
index 0000000..c3cb738
--- /dev/null
+++ b/tools/powspec.cc
@@ -0,0 +1,72 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#include <string>  // string
+#include <sstream>  // stringstream
+
+#include "../define_opt.h"  // real_prec, ULONG
+#include "../fftw_array.h"  // fftw_array
+#include "../IOfunctionsGen.h"  // get_scalar, dump_scalar
+#include "../IOfunctions.h"  // dump_measured_spec
+#include "../field_statistics.h" // measure_spectrum
+
+using std::cout;
+using std::cerr;
+using std::endl;
+using std::string;
+
+void load_arguments(int argc, char *argv[], string &fname_in, unsigned &N1,
+                    real_prec &L1, ULONG &N_bin, string &fname_out) {
+  stringstream N1_arg, L1_arg, N_bin_arg;
+
+  if (argc >= 5) {
+    fname_in = string(argv[1]);
+    N1_arg << argv[2];
+    N1_arg >> N1;
+    L1_arg << argv[3];
+    L1_arg >> L1;
+    N_bin_arg << argv[4];
+    N_bin_arg >> N_bin;
+
+    if (argc >= 6)
+      fname_out = string(argv[5]);
+    else
+      fname_out = fname_in + string("_pow");
+  } else {
+    cerr << "Need 4 parameters (file in, N1, L1, N_bin)!" << endl
+         << "N.B.: filenames must be given without extension (which must be"
+            " .dat)." << endl
+         << "Optional: fname_out (default: fname_in+'_pow')." << endl;
+    exit(1);
+  }
+}
+
+int main(int argc, char *argv[]) {
+  unsigned N1;
+  ULONG N_bin;
+  real_prec L1;
+  string fname_in, fname_out;
+  load_arguments(argc, argv, fname_in, N1, L1, N_bin, fname_out);
+
+  ULONG N = N1*N1*N1;
+
+  fftw_array<real_prec> power(N_bin), kmode(N_bin);
+  fftw_array<real_prec> input(N);
+
+  // get data from input file
+  get_scalar(fname_in, input, N1, N1, N1);
+
+  // do measurement
+  measure_spectrum(N1, N1, N1, L1, L1, L1, input, kmode, power, N_bin);
+
+  // output results
+  dump_measured_spec(kmode, power, fname_out, N_bin);
+
+  return(0);
+}
diff --git a/transf.cpp b/transf.cpp
new file mode 100644
index 0000000..e1babd9
--- /dev/null
+++ b/transf.cpp
@@ -0,0 +1,182 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#include "transf.h"
+#include "fftw_array.h"
+#include "math_funcs.h"
+#include "fftwrapper.h"
+#include <cassert>
+#include <fstream>
+#include <cmath>
+#include "IOfunctionsGen.h"
+
+// barcoderunner only:
+void transflpt(unsigned int N1, unsigned int N2, unsigned int N3, real_prec L1, real_prec L2, real_prec L3,
+               int filtertype, std::string dir)
+{
+  // EGP:
+  // Watch out, there is still a (probably wrong) FOURIER_DEF normalisation factor below here!!!
+
+  bool zeld=false;
+  bool secordlpt=false;
+  bool denslpt=false;
+
+  switch (filtertype)
+  {
+    case 1:
+      zeld=true;
+      break;
+    case 2:
+      secordlpt=true;
+      break;
+    case 3:
+      denslpt=true;
+      break;
+  }
+
+
+  ULONG N=N1*N2*N3;
+
+  fftw_array<real_prec> out(N);
+
+  int bmax=100;
+  char buffer1[bmax];
+  sprintf(buffer1,"init_spec.dat");
+  std::string inputFileName= dir + std::string(buffer1);
+  std::ifstream inStream;
+  inStream.open(inputFileName.data());
+  assert(inStream.is_open());
+  ULONG Nkr=10000;
+  real_prec dkr=static_cast<real_prec>(1.e-3);
+  fftw_array<real_prec> kr(Nkr), pkr(Nkr);
+  for(ULONG i=0;i<Nkr;i++) // EGP: int i -> ULONG i
+  {
+    inStream >> kr[i] >> pkr[i];
+  }
+  inStream.close();
+  real_prec minkr=kr[0];
+
+  real_prec kNL=0.;//0.25;//attention for z=0!! from fig 3 in http://arxiv.org/pdf/1203.5785.pdf
+
+  {
+    real_prec dvar=0.;
+    ULONG ikk=0;
+    while (dvar<1.)
+    {
+      dvar+=static_cast<real_prec>(4.*M_PI*dkr*kr[ikk]*kr[ikk]*(pkr[ikk]/(4.*M_PI)));
+      ikk++;
+    }
+    if (ikk>0) {
+      std::cout << "delta^2=1-> kNL=" << kr[ikk - 1] << std::endl;
+    }
+
+    kNL=kr[ikk-1];
+  }
+
+
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD
+  for (unsigned i=0;i<N1;i++)
+    for (unsigned j=0;j<N2;j++)
+      for (unsigned k=0;k<N3;k++)
+      {
+        ULONG ii=k+N3*(j+N2*i);
+
+        real_prec kmod=sqrt(k_squared(i,j,k,L1,L2,L3,N1,N2,N3));
+
+        if (zeld==true)
+        {
+          real_prec fac=static_cast<real_prec>(0.085);
+          out[ii]=exp(-fac*kmod*kmod/(kNL*kNL));
+        }
+
+        if (secordlpt==true)
+        {
+          real_prec kmodnorm1=kmod/kNL;
+          real_prec kmodnorm2=kmodnorm1*kmodnorm1;
+          real_prec kmodnorm3=kmodnorm2*kmodnorm1;
+          real_prec kmodnorm4=kmodnorm3*kmodnorm1;
+
+          out[ii]=static_cast<real_prec>(exp(0.6*kmodnorm1-1.7*kmodnorm2+0.623*kmodnorm3-0.078*kmodnorm4));
+        }
+
+        if (denslpt==true)
+        {
+          //real_prec Vol=L1*L2*L3;
+          //real_prec Norm;
+//#ifdef FOURIER_DEF_1
+          //Norm=1./Vol;
+//#endif
+//#ifdef FOURIER_DEF_2
+          //Norm=real_prec(N)*real_prec(N)/Vol;
+//#endif
+          real_prec dvar=0.;
+          ULONG ikr=0;
+          if (kmod*0.5>minkr)
+          {
+            ikr=static_cast<ULONG>(floor((kmod*0.5-minkr)/dkr));//attention for z=0!!
+#ifdef MULTITHREAD
+#pragma omp parallel for reduction(+:dvar)
+#endif // MULTITHREAD
+            for (ULONG ikk=0;ikk<ikr;ikk++)
+              dvar+=static_cast<real_prec>(dkr*kr[ikk]*kr[ikk]*(pkr[ikk]/(4.*M_PI)));//normalisation??
+          }
+
+          real_prec dmod=static_cast<real_prec>(4.*M_PI*dvar);
+
+          out[ii]=static_cast<real_prec>(exp(0.58*dmod));
+        }
+      }
+
+
+  {
+    fftw_array<complex_prec>  AUX(N);
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD
+    for(ULONG i=0;i<N;i++)
+    {
+      re(AUX[i])=out[i];
+      im(AUX[i])=0.;
+    }
+    FFT3d (N1,N2,N3, to_Rspace, AUX, AUX);
+
+    real_prec wtot=0.;
+#ifdef MULTITHREAD
+#pragma omp parallel for reduction(+:wtot)
+#endif // MULTITHREAD
+    for(ULONG i=0;i<N;i++)
+      wtot+=re(AUX[i]);
+
+#ifdef MULTITHREAD
+#pragma omp parallel for
+#endif // MULTITHREAD
+    for(ULONG i=0;i<N;i++)
+    {
+      out[i]/=wtot;
+    }
+  }
+
+  std::string fname;
+  switch (filtertype)
+  {
+    case 1:
+      fname= dir + std::string("auxtransfzeld");
+      break;
+    case 2:
+      fname= dir + std::string("auxtransf2lpt");
+      break;
+    case 3:
+      fname= dir + std::string("auxtransfdens");
+      break;
+  }
+
+  dump_scalar(out, N1, N2, N3, fname);
+}
diff --git a/transf.h b/transf.h
new file mode 100644
index 0000000..2b1355d
--- /dev/null
+++ b/transf.h
@@ -0,0 +1,19 @@
+/*
+ * Barcode
+ * Copyright E.G.P. Bos and F.S. Kitaura
+ *
+ * Distributed under the terms of the MIT License.
+ * The full license is in the file LICENSE, distributed with this software.
+ */
+
+
+#ifndef BARCODE_TRANSF_H
+#define BARCODE_TRANSF_H
+
+#include <string>
+#include "define_opt.h"
+
+void transflpt(unsigned int N1, unsigned int N2, unsigned int N3, real_prec L1, real_prec L2, real_prec L3,
+               int filtertype, std::string dir);
+
+#endif //BARCODE_TRANSF_H