nedmalloc.h

/* nedalloc, an alternative malloc implementation for multiple threads without
lock contention based on dlmalloc v2.8.4. (C) 2005-2010 Niall Douglas

Boost Software License - Version 1.0 - August 17th, 2003

Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:

The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/

#ifndef NEDMALLOC_H
#define NEDMALLOC_H

/*! \file nedmalloc.h
\brief Defines the functionality provided by nedalloc.
*/

/*! \mainpage

<a href="../../Readme.html">Please see the Readme.html</a>
*/

/*! \def NEDMALLOC_DEBUG
\brief Defines the assertion checking performed by nedalloc

NEDMALLOC_DEBUG can be defined to cause DEBUG to be set differently for nedmalloc
than for the rest of the build. Remember to set NDEBUG to disable all assertion
checking too.
*/

/*! \def ENABLE_LARGE_PAGES
\brief Defines whether nedalloc uses large pages (>=2Mb)

ENABLE_LARGE_PAGES enables support for requesting memory from the system in large
(typically >=2Mb) pages if the host OS supports this. These occupy just a single
TLB entry and can significantly improve performance in large working set applications.
*/

/*! \def ENABLE_FAST_HEAP_DETECTION
\brief Defines whether nedalloc takes platform specific shortcuts when detecting foreign blocks.

ENABLE_FAST_HEAP_DETECTION enables special logic to detect blocks allocated
by the system heap. This avoids 1.5%-2% overhead when checking for non-nedmalloc
blocks, but it assumes that the NT and glibc heaps function in a very specific
fashion which may not hold true across OS upgrades.
*/

/*! \def HAVE_CPP0XRVALUEREFS
\ingroup C++
\brief Enables rvalue references

Define to enable the usage of rvalue references which enables move semantics and
other things. Automatically defined if __cplusplus indicates a C++0x compiler,
otherwise you'll need to set it yourself.
*/

/*! \def HAVE_CPP0XVARIADICTEMPLATES
\ingroup C++
\brief Enables variadic templates

Define to enable the usage of variadic templates which enables the use of arbitrary
numbers of policies and other useful things. Automatically defined if __cplusplus
indicates a C++0x compiler, otherwise you'll need to set it yourself.
*/

/*! \def HAVE_CPP0XSTATICASSERT
\ingroup C++
\brief Enables static assertions

Define to enable the usage of static assertions. Automatically defined if __cplusplus
indicates a C++0x compiler, otherwise you'll need to set it yourself.
*/

/*! \def HAVE_CPP0XTYPETRAITS
\ingroup C++
\brief Enables type traits

Define to enable the usage of &lt;type_traits&gt;. Automatically defined if __cplusplus
indicates a C++0x compiler, otherwise you'll need to set it yourself.
*/

#if __cplusplus > 199711L || defined(HAVE_CPP0X) /* Do we have C++0x? */
#undef HAVE_CPP0XRVALUEREFS
#define HAVE_CPP0XRVALUEREFS 1
#undef HAVE_CPP0XVARIADICTEMPLATES
#define HAVE_CPP0XVARIADICTEMPLATES 1
#undef HAVE_CPP0XSTATICASSERT
#define HAVE_CPP0XSTATICASSERT 1
#undef HAVE_CPP0XTYPETRAITS
#define HAVE_CPP0XTYPETRAITS 1
#endif

#include <stddef.h>   /* for size_t */

/*! \def NEDMALLOCEXTSPEC
\brief Defines how nedalloc's API is to be made visible.

NEDMALLOCEXTSPEC can be defined to be __declspec(dllexport) or
__attribute__ ((visibility("default"))) or whatever you like. It defaults
to extern unless NEDMALLOC_DLL_EXPORTS is set as it would be when building
nedmalloc.dll.
 */
#ifndef NEDMALLOCEXTSPEC
 #ifdef NEDMALLOC_DLL_EXPORTS
  #ifdef WIN32
   #define NEDMALLOCEXTSPEC extern __declspec(dllexport)
  #elif defined(__GNUC__)
   #define NEDMALLOCEXTSPEC extern __attribute__ ((visibility("default")))
  #endif
  #ifndef ENABLE_TOLERANT_NEDMALLOC
   #define ENABLE_TOLERANT_NEDMALLOC 1
  #endif
 #else
  #define NEDMALLOCEXTSPEC extern
 #endif
#endif

/*! \def NEDMALLOCDEPRECATED
\brief Defined to mark an API as deprecated */
#ifndef NEDMALLOCDEPRECATED
#if defined(_MSC_VER) && !defined(__GCCXML__)
 #define NEDMALLOCDEPRECATED __declspec(deprecated)
#elif defined(__GNUC__) && !defined(__GCCXML__)
 #define NEDMALLOCDEPRECATED __attribute ((deprecated))
#else
//! Marks a function as being deprecated
 #define NEDMALLOCDEPRECATED
#endif
#endif

/*! \def RESTRICT
\brief Defined to the restrict keyword or equivalent if available */
#ifndef RESTRICT
#if __STDC_VERSION__ >= 199901L		/* C99 or better */
 #define RESTRICT restrict
#else
 #if defined(_MSC_VER) && _MSC_VER>=1400
  #define RESTRICT __restrict
 #endif
 #ifdef __GNUC__
  #define RESTRICT __restrict
 #endif
#endif
#ifndef RESTRICT
 #define RESTRICT
#endif
#endif

#if defined(_MSC_VER) && _MSC_VER>=1400
 #define NEDMALLOCPTRATTR __declspec(restrict)
 #define NEDMALLOCNOALIASATTR __declspec(noalias)
#endif
#ifdef __GNUC__
 #define NEDMALLOCPTRATTR __attribute__ ((malloc))
#endif
/*! \def NEDMALLOCPTRATTR
\brief Defined to the specifier for a pointer which points to a memory block. Like NEDMALLOCNOALIASATTR, but sadly not identical. */
#ifndef NEDMALLOCPTRATTR
 #define NEDMALLOCPTRATTR
#endif
/*! \def NEDMALLOCNOALIASATTR
\brief Defined to the specifier for a pointer which does not alias any other variable. */
#ifndef NEDMALLOCNOALIASATTR
 #define NEDMALLOCNOALIASATTR
#endif

/*! \def USE_MAGIC_HEADERS
\brief Defines whether nedalloc should use magic headers in foreign heap block detection

USE_MAGIC_HEADERS causes nedalloc to allocate an extra three sizeof(size_t)
to each block. nedpfree() and nedprealloc() can then automagically know when
to free a system allocated block. Enabling this typically adds 20-50% to
application memory usage, and is mandatory if USE_ALLOCATOR is not 1.
*/
#ifndef USE_MAGIC_HEADERS
 #define USE_MAGIC_HEADERS 0
#endif

/*! \def USE_ALLOCATOR
\brief Defines the underlying allocator to use

USE_ALLOCATOR can be one of these settings (it defaults to 1):
  0: System allocator (nedmalloc now simply acts as a threadcache) which is
     very useful for testing with valgrind and Glowcode.
     WARNING: Intended for DEBUG USE ONLY - not all functions work correctly.
  1: dlmalloc
*/
#ifndef USE_ALLOCATOR
 #define USE_ALLOCATOR 1 /* dlmalloc */
#endif

#if !USE_ALLOCATOR && !USE_MAGIC_HEADERS
#error If you are using the system allocator then you MUST use magic headers
#endif

/*! \def REPLACE_SYSTEM_ALLOCATOR
\brief Defines whether to replace the system allocator (malloc(), free() et al) with nedalloc's implementation.

REPLACE_SYSTEM_ALLOCATOR on POSIX causes nedalloc's functions to be called
malloc, free etc. instead of nedmalloc, nedfree etc. You may or may not want
this. On Windows it causes nedmalloc to patch all loaded DLLs and binaries
to replace usage of the system allocator.

Always turns on ENABLE_TOLERANT_NEDMALLOC.
*/
#ifdef REPLACE_SYSTEM_ALLOCATOR
 #if USE_ALLOCATOR==0
  #error Cannot combine using the system allocator with replacing the system allocator
 #endif
 #ifndef ENABLE_TOLERANT_NEDMALLOC
  #define ENABLE_TOLERANT_NEDMALLOC 1
 #endif
 #ifndef WIN32	/* We have a dedicated patcher for Windows */
  #define nedmalloc               malloc
  #define nedmalloc2              malloc2
  #define nedcalloc               calloc
  #define nedrealloc              realloc
  #define nedrealloc2             realloc2
  #define nedfree                 free
  #define nedfree2                free2
  #define nedmemalign             memalign
  #define nedmallinfo             mallinfo
  #define nedmallopt              mallopt
  #define nedmalloc_trim          malloc_trim
  #define nedmalloc_stats         malloc_stats
  #define nedmalloc_footprint     malloc_footprint
  #define nedindependent_calloc   independent_calloc
  #define nedindependent_comalloc independent_comalloc
  #ifdef __GNUC__
   #define nedmemsize             malloc_usable_size
  #endif
 #endif
#endif

/*! \def ENABLE_TOLERANT_NEDMALLOC
\brief Defines whether nedalloc should check for blocks from the system allocator.

ENABLE_TOLERANT_NEDMALLOC is automatically turned on if REPLACE_SYSTEM_ALLOCATOR
is set or the Windows DLL is being built. This causes nedmalloc to detect when a
system allocator block is passed to it and to handle it appropriately. Note that
without USE_MAGIC_HEADERS there is a very tiny chance that nedmalloc will segfault
on non-Windows builds (it uses Win32 SEH to trap segfaults on Windows and there
is no comparable system on POSIX).
*/

#if defined(__cplusplus)
extern "C" {
#endif
/*! \brief Returns information about a memory pool */
struct nedmallinfo {
  size_t arena;    /*!< non-mmapped space allocated from system */
  size_t ordblks;  /*!< number of free chunks */
  size_t smblks;   /*!< always 0 */
  size_t hblks;    /*!< always 0 */
  size_t hblkhd;   /*!< space in mmapped regions */
  size_t usmblks;  /*!< maximum total allocated space */
  size_t fsmblks;  /*!< always 0 */
  size_t uordblks; /*!< total allocated space */
  size_t fordblks; /*!< total free space */
  size_t keepcost; /*!< releasable (via malloc_trim) space */
};
#if defined(__cplusplus)
}
#endif

/*! \def NO_NED_NAMESPACE
\brief Defines the use of the nedalloc namespace for the C functions.

NO_NED_NAMESPACE prevents the functions from being defined in the nedalloc
namespace when in C++ (uses the global C namespace instead).
*/
/*! \def THROWSPEC
\brief Defined to throw() or noexcept(true) (as in, throws nothing) under C++, otherwise nothing.
*/
#if defined(__cplusplus)
 #if !defined(NO_NED_NAMESPACE)
namespace nedalloc {
 #else
extern "C" {
 #endif
 #if __cplusplus > 199711L
  #define THROWSPEC noexcept(true)
 #else
  #define THROWSPEC throw()
 #endif
#else
 #define THROWSPEC
#endif

/* These are the global functions */

/*! \defgroup v2malloc The v2 malloc API

\warning This API is being completely retired in v1.10 beta 2 and replaced with the API
being developed for inclusion into the C1X programming language standard

For the v1.10 release which was generously sponsored by
<a href="http://www.ara.com/" target="_blank">Applied Research Associates (USA)</a>, 
a new general purpose allocator API was designed which is intended to remedy many 
of the long standing problems and inefficiencies introduced by the ISO C allocator 
API. Internally nedalloc's implementations of nedmalloc(), nedcalloc(), nedmemalign() 
and nedrealloc() call into this API:

<ul>
	<li><code>void* malloc2(size_t bytes, size_t alignment, unsigned flags)</code></li>
	<li><code>void* realloc2(void* mem, size_t bytes, size_t alignment, unsigned 
	flags)</code></li>
	<li><code>void free2(void* mem, unsigned flags)</code></li>
</ul>

If nedmalloc.h is being included by C++ code, the alignment and flags parameters 
default to zero which makes the new API identical to the old API (roll on the introduction 
of default parameters to C!). The ability for realloc2() to take an alignment is
<em>particularly</em> useful for extending aligned vector arrays such as SSE/AVX 
vector arrays. Hitherto SSE/AVX vector code had to jump through all sorts of unpleasant 
hoops to maintain alignment :(.

Note that using any of these flags other than M2_ZERO_MEMORY or any alignment 
other than zero inhibits the threadcache.

Currently MREMAP support is limited to Linux and Windows. Patches implementing 
support for other platforms are welcome.

On Linux the non portable mremap() kernel function is currently used, so in fact 
the M2_RESERVE_* options are currently ignored.

On Windows, there are two different MREMAP implementations which are chosen according 
to whether a 32 bit or a 64 bit build is being performed. The 32 bit implementation 
is based on Win32 file mappings where it reserves the address space within the Windows 
VM system, so you can safely specify silly reservation quantities like 2Gb per block 
and not exhaust local process address space. Note however that on x86 this costs 
2Kb (1Kb if PAE is off) of kernel memory per Mb reserved, and as kernel memory has 
a hard limit of 447Mb on x86 you will find the total address space reservable in 
the system is limited. On x64, or if you define WIN32_DIRECT_USE_FILE_MAPPINGS=0 
on x86, a much faster implementation of using VirtualAlloc(MEM_RESERVE) to directly 
reserve the address space is used.

When using M2_RESERVE_* with realloc2(), the setting only takes effect when the 
mmapped chunk has exceeded its reservation space and a new reservation space needs 
to be created.
*/

#ifndef M2_FLAGS_DEFINED
#define M2_FLAGS_DEFINED

/*! \def M2_ZERO_MEMORY
\ingroup v2malloc
\brief Sets the contents of the allocated block (or any increase in the allocated 
block) to zero.

Note that this zeroes only the increase from what dlmalloc thinks 
the chunk's size is, so if you realloc2() a block which wasn't allocated using 
malloc2() using this flag then you may have garbage just before the newly extended 
space.

\li <strong>Rationale:</strong> Memory returned by the system is guaranteed to 
be zero on most platforms, and hence dlmalloc knows when it can skip zeroing 
memory. This improves performance.
*/
#define M2_ZERO_MEMORY          (1<<0)

/*! \def M2_PREVENT_MOVE
\ingroup v2malloc
\brief Cause realloc2() to attempt to extend a block in place, but to never move 
it.

\li <strong>Rationale:</strong> C++ makes almost no use of realloc(), even for 
contiguous arrays such as std::vector<> because most C++ objects cannot be relocated 
in memory without a copy or rvalue construction (though some clever STL implementations 
specialise for Plain Old Data (POD) types, and use realloc() then and only then). 
This flag allows C++ containers to speculatively try to extend in place, thus 
improving performance <em>especially</em> for large allocations which will use 
mmap().
*/
#define M2_PREVENT_MOVE         (1<<1)

/*! \def M2_ALWAYS_MMAP
\ingroup v2malloc
\brief Always allocate as though mmap_threshold were being exceeded.

In the case of realloc2(), note that setting this bit will not necessarily mmap a chunk 
which isn't already mmapped, but it will force a mmapped chunk if new memory 
needs allocating.

\li <strong>Rationale:</strong> If you know that an array you are allocating 
is going to be repeatedly extended up into the hundred of kilobytes range, then 
you can avoid the constant memory copying into larger blocks by specifying this 
flag at the beginning along with one of the M2_RESERVE_* flags below. This can
<strong>greatly</strong> improve performance for large arrays.
*/
#define M2_ALWAYS_MMAP          (1<<2)
#define M2_RESERVED1            (1<<3)
#define M2_RESERVED2            (1<<4)
#define M2_RESERVED3            (1<<5)
#define M2_RESERVED4            (1<<6)
#define M2_RESERVED5            (1<<7)
#define M2_RESERVE_ISMULTIPLIER (1<<15)
/* 7 bits is given to the address reservation specifier.
This lets you set a multiplier (bit 15 set) or a 1<< shift value.
*/
#define M2_RESERVE_MASK         0x00007f00

/*! \def M2_RESERVE_MULT(n)
\ingroup v2malloc
\brief Reserve n times as much address space such that mmapped realloc2(size <= 
n * original size) avoids memory copying and hence is much faster.
*/
#define M2_RESERVE_MULT(n)      (M2_RESERVE_ISMULTIPLIER|(((n)<<8)&M2_RESERVE_MASK))

/*! \def M2_RESERVE_SHIFT(n)
\ingroup v2malloc
\brief Reserve (1<<n) bytes of address space such that mmapped realloc2(size <= 
(1<<n)) avoids memory copying and hence is much faster.
*/
#define M2_RESERVE_SHIFT(n)     (((n)<<8)&M2_RESERVE_MASK)
#define M2_FLAGS_MASK           0x0000ffff
#define M2_CUSTOM_FLAGS_BEGIN   (1<<16)
#define M2_CUSTOM_FLAGS_MASK    0xffff0000

/*! \def NM_SKIP_TOLERANCE_CHECKS
\ingroup v2malloc
\brief Causes nedmalloc to not inspect the block being passed to see if it belongs
to the system allocator. Can improve speed by up to 10%.
*/
#define NM_SKIP_TOLERANCE_CHECKS (1<<31)
#endif /* M2_FLAGS_DEFINED */


#if defined(__cplusplus)
/*! \brief Gets the usable size of an allocated block.

Note this will always be bigger than what was
asked for due to rounding etc. Optionally returns 1 in isforeign if the block came from the
system allocator - note that there is a small (>0.01%) but real chance of segfault on non-Windows
systems when passing non-nedmalloc blocks if you don't use USE_MAGIC_HEADERS.
*/
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR size_t nedblksize(int *RESTRICT isforeign, void *RESTRICT mem, unsigned flags=0) THROWSPEC;
#else
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR size_t nedblksize(int *RESTRICT isforeign, void *RESTRICT mem, unsigned flags) THROWSPEC;
#endif
/*! \brief Identical to nedblksize() except without the isforeign */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR size_t nedmemsize(void *RESTRICT mem) THROWSPEC;

/*! \brief Equivalent to nedpsetvalue((nedpool *) 0, v) */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void nedsetvalue(void *v) THROWSPEC;

/*! \brief Equivalent to nedpmalloc2((nedpool *) 0, size, 0, 0) */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmalloc(size_t size) THROWSPEC;
/*! \brief Equivalent to nedpmalloc2((nedpool *) 0, no*size, 0, M2_ZERO_MEMORY) */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedcalloc(size_t no, size_t size) THROWSPEC;
/*! \brief Equivalent to nedprealloc2((nedpool *) 0, size, mem, size, 0, M2_RESERVE_MULT(8)) */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedrealloc(void *mem, size_t size) THROWSPEC;
/*! \brief Equivalent to nedpfree2((nedpool *) 0, mem, 0) */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void   nedfree(void *mem) THROWSPEC;
/*! \brief Equivalent to nedpmalloc2((nedpool *) 0, size, alignment, 0) */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmemalign(size_t alignment, size_t bytes) THROWSPEC;

#if defined(__cplusplus)
/*! \ingroup v2malloc
\brief Equivalent to nedpmalloc2((nedpool *) 0, size, alignment, flags) */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmalloc2(size_t size, size_t alignment=0, unsigned flags=0) THROWSPEC;
/*! \ingroup v2malloc
\brief Equivalent to nedprealloc2((nedpool *) 0, mem, size, alignment, flags) */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedrealloc2(void *mem, size_t size, size_t alignment=0, unsigned flags=0) THROWSPEC;
/*! \ingroup v2malloc
\brief Equivalent to nedpfree2((nedpool *) 0, mem, flags) */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void nedfree2(void *mem, unsigned flags=0) THROWSPEC;
#else
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmalloc2(size_t size, size_t alignment, unsigned flags) THROWSPEC;
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedrealloc2(void *mem, size_t size, size_t alignment, unsigned flags) THROWSPEC;
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void nedfree2(void *mem, unsigned flags) THROWSPEC;
#endif

/*! \brief Equivalent to nedpmallinfo((nedpool *) 0) */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR struct nedmallinfo nedmallinfo(void) THROWSPEC;
/*! \brief Equivalent to nedpmallopt((nedpool *) 0, parno, value) */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR int    nedmallopt(int parno, int value) THROWSPEC;
/*! \brief Returns the internal allocation granularity and the magic header XOR used for internal consistency checks. */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void*  nedmalloc_internals(size_t *granularity, size_t *magic) THROWSPEC;
/*! \brief Equivalent to nedpmalloc_trim((nedpool *) 0, pad) */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR int    nedmalloc_trim(size_t pad) THROWSPEC;
/*! \brief Equivalent to nedpmalloc_stats((nedpool *) 0) */
NEDMALLOCEXTSPEC void   nedmalloc_stats(void) THROWSPEC;
/*! \brief Equivalent to nedpmalloc_footprint((nedpool *) 0) */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR size_t nedmalloc_footprint(void) THROWSPEC;
/*! \brief Equivalent to nedpindependent_calloc((nedpool *) 0, elemsno, elemsize, chunks) */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedindependent_calloc(size_t elemsno, size_t elemsize, void **chunks) THROWSPEC;
/*! \brief Equivalent to nedpindependent_comalloc((nedpool *) 0, elems, sizes, chunks) */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedindependent_comalloc(size_t elems, size_t *sizes, void **chunks) THROWSPEC;

/*! \brief Destroys the system memory pool used by the functions above.

Useful for when you have nedmalloc in a DLL you're about to unload.
If you call ANY nedmalloc functions after calling this you will
get a fatal exception!
*/
NEDMALLOCEXTSPEC void neddestroysyspool() THROWSPEC;

/*! \brief A nedpool type */
struct nedpool_t;
/*! \brief A nedpool type */
typedef struct nedpool_t nedpool;

/*! \brief Creates a memory pool for use with the nedp* functions below.

Capacity is how much to allocate immediately (if you know you'll be allocating a lot
of memory very soon) which you can leave at zero. Threads specifies how many threads
will *normally* be accessing the pool concurrently. Setting this to zero means it
extends on demand, but be careful of this as it can rapidly consume system resources
where bursts of concurrent threads use a pool at once.
*/
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR nedpool *nedcreatepool(size_t capacity, int threads) THROWSPEC;

/*! \brief Destroys a memory pool previously created by nedcreatepool().
*/
NEDMALLOCEXTSPEC void neddestroypool(nedpool *p) THROWSPEC;

/*! \brief Returns a zero terminated snapshot of threadpools existing at the time of call.

Call nedfree() on the returned list when you are done. Returns zero if there is only the
system pool in existence.
*/
NEDMALLOCEXTSPEC nedpool **nedpoollist() THROWSPEC;

/*! \brief Sets a value to be associated with a pool.

You can retrieve this value by passing any memory block allocated from that pool.
*/
NEDMALLOCEXTSPEC void nedpsetvalue(nedpool *p, void *v) THROWSPEC;

/*! \brief Gets a previously set value using nedpsetvalue() or zero if memory is unknown.

Optionally can also retrieve pool. You can detect an unknown block by the return
being zero and *p being unmodifed.
*/
NEDMALLOCEXTSPEC void *nedgetvalue(nedpool **p, void *mem) THROWSPEC;

/*! \brief Trims the thread cache for the calling thread, returning any existing cache
data to the central pool.

Remember to ALWAYS call with zero if you used the system pool. Setting disable to
non-zero replicates neddisablethreadcache().
*/
NEDMALLOCEXTSPEC void nedtrimthreadcache(nedpool *p, int disable) THROWSPEC;

/*! \brief Disables the thread cache for the calling thread, returning any existing cache
data to the central pool.

Remember to ALWAYS call with zero if you used the system pool.
*/
NEDMALLOCEXTSPEC void neddisablethreadcache(nedpool *p) THROWSPEC;

/*! \brief Releases all memory in all threadcaches in the pool, and writes all
accumulated memory operations to the log if enabled.

You can pass zero for filepath to use the compiled default, or else a char[MAX_PATH]
containing the path you wish to use for the log file. The log file is always
appended to if it already exists. After writing the logs, the logging ability
is disabled for that pool.

\warning Do NOT call this if the pool is in use - this call is NOT threadsafe.
*/
NEDMALLOCEXTSPEC size_t nedflushlogs(nedpool *p, char *filepath) THROWSPEC;


/*! \brief Equivalent to nedpmalloc2(p, size, 0, 0) */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedpmalloc(nedpool *p, size_t size) THROWSPEC;
/*! \brief Equivalent to nedpmalloc2(p, no*size, 0, M2_ZERO_MEMORY) */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedpcalloc(nedpool *p, size_t no, size_t size) THROWSPEC;
/*! \brief Equivalent to nedprealloc2(p, mem, size, 0, M2_RESERVE_MULT(8)) */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedprealloc(nedpool *p, void *mem, size_t size) THROWSPEC;
/*! \brief Equivalent to nedpfree2(p, mem, 0) */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void   nedpfree(nedpool *p, void *mem) THROWSPEC;
/*! \brief Equivalent to nedpmalloc2(p, bytes, alignment, 0) */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedpmemalign(nedpool *p, size_t alignment, size_t bytes) THROWSPEC;
#if defined(__cplusplus)
/*! \ingroup v2malloc
\brief Allocates a block of memory sized \em size from pool \em p, aligned to \em alignment and according to the flags \em flags.
*/
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedpmalloc2(nedpool *p, size_t size, size_t alignment=0, unsigned flags=0) THROWSPEC;
/*! \ingroup v2malloc
\brief Resizes the block of memory at \em mem in pool \em p to size \em size, aligned to \em alignment and according to the flags \em flags.
*/
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedprealloc2(nedpool *p, void *mem, size_t size, size_t alignment=0, unsigned flags=0) THROWSPEC;
/*! \brief Frees the block \em mem from the pool \em p according to flags \em flags. */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void   nedpfree2(nedpool *p, void *mem, unsigned flags=0) THROWSPEC;
#else
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedpmalloc2(nedpool *p, size_t size, size_t alignment, unsigned flags) THROWSPEC;
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedprealloc2(nedpool *p, void *mem, size_t size, size_t alignment, unsigned flags) THROWSPEC;
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void   nedpfree2(nedpool *p, void *mem, unsigned flags) THROWSPEC;
#endif
/*! \brief Returns information about the memory pool */
NEDMALLOCEXTSPEC struct nedmallinfo nedpmallinfo(nedpool *p) THROWSPEC;
/*! \brief Changes the operational parameters of the memory pool */
NEDMALLOCEXTSPEC int    nedpmallopt(nedpool *p, int parno, int value) THROWSPEC;
/*! \brief Tries to release as much free memory back to the system as possible, leaving \em pad remaining per threadpool. */
NEDMALLOCEXTSPEC int    nedpmalloc_trim(nedpool *p, size_t pad) THROWSPEC;
/*! \brief Prints some operational statistics to stdout. */
NEDMALLOCEXTSPEC void   nedpmalloc_stats(nedpool *p) THROWSPEC;
/*! \brief Returns how much memory is currently in use by the memory pool */
NEDMALLOCEXTSPEC size_t nedpmalloc_footprint(nedpool *p) THROWSPEC;
/*! \brief Returns a series of guaranteed consecutive cleared memory allocations.

  independent_calloc is similar to calloc, but instead of returning a
  single cleared space, it returns an array of pointers to n_elements
  independent elements that can hold contents of size elem_size, each
  of which starts out cleared, and can be independently freed,
  realloc'ed etc. The elements are guaranteed to be adjacently
  allocated (this is not guaranteed to occur with multiple callocs or
  mallocs), which may also improve cache locality in some
  applications.

  The "chunks" argument is optional (i.e., may be null, which is
  probably the most typical usage). If it is null, the returned array
  is itself dynamically allocated and should also be freed when it is
  no longer needed. Otherwise, the chunks array must be of at least
  n_elements in length. It is filled in with the pointers to the
  chunks.

  In either case, independent_calloc returns this pointer array, or
  null if the allocation failed.  If n_elements is zero and "chunks"
  is null, it returns a chunk representing an array with zero elements
  (which should be freed if not wanted).

  Each element must be individually freed when it is no longer
  needed. If you'd like to instead be able to free all at once, you
  should instead use regular calloc and assign pointers into this
  space to represent elements.  (In this case though, you cannot
  independently free elements.)

  independent_calloc simplifies and speeds up implementations of many
  kinds of pools.  It may also be useful when constructing large data
  structures that initially have a fixed number of fixed-sized nodes,
  but the number is not known at compile time, and some of the nodes
  may later need to be freed. For example:

  struct Node { int item; struct Node* next; };

  struct Node* build_list() {
    struct Node** pool;
    int n = read_number_of_nodes_needed();
    if (n <= 0) return 0;
    pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0);
    if (pool == 0) die();
    // organize into a linked list...
    struct Node* first = pool[0];
    for (i = 0; i < n-1; ++i)
      pool[i]->next = pool[i+1];
    free(pool);     // Can now free the array (or not, if it is needed later)
    return first;
  }
*/
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedpindependent_calloc(nedpool *p, size_t elemsno, size_t elemsize, void **chunks) THROWSPEC;
/*! \brief Returns a series of guaranteed consecutive allocations.

  independent_comalloc allocates, all at once, a set of n_elements
  chunks with sizes indicated in the "sizes" array.    It returns
  an array of pointers to these elements, each of which can be
  independently freed, realloc'ed etc. The elements are guaranteed to
  be adjacently allocated (this is not guaranteed to occur with
  multiple callocs or mallocs), which may also improve cache locality
  in some applications.

  The "chunks" argument is optional (i.e., may be null). If it is null
  the returned array is itself dynamically allocated and should also
  be freed when it is no longer needed. Otherwise, the chunks array
  must be of at least n_elements in length. It is filled in with the
  pointers to the chunks.

  In either case, independent_comalloc returns this pointer array, or
  null if the allocation failed.  If n_elements is zero and chunks is
  null, it returns a chunk representing an array with zero elements
  (which should be freed if not wanted).

  Each element must be individually freed when it is no longer
  needed. If you'd like to instead be able to free all at once, you
  should instead use a single regular malloc, and assign pointers at
  particular offsets in the aggregate space. (In this case though, you
  cannot independently free elements.)

  independent_comallac differs from independent_calloc in that each
  element may have a different size, and also that it does not
  automatically clear elements.

  independent_comalloc can be used to speed up allocation in cases
  where several structs or objects must always be allocated at the
  same time.  For example:

  struct Head { ... }
  struct Foot { ... }

  void send_message(char* msg) {
    int msglen = strlen(msg);
    size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) };
    void* chunks[3];
    if (independent_comalloc(3, sizes, chunks) == 0)
      die();
    struct Head* head = (struct Head*)(chunks[0]);
    char*        body = (char*)(chunks[1]);
    struct Foot* foot = (struct Foot*)(chunks[2]);
    // ...
  }

  In general though, independent_comalloc is worth using only for
  larger values of n_elements. For small values, you probably won't
  detect enough difference from series of malloc calls to bother.

  Overuse of independent_comalloc can increase overall memory usage,
  since it cannot reuse existing noncontiguous small chunks that
  might be available for some of the elements.
*/
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedpindependent_comalloc(nedpool *p, size_t elems, size_t *sizes, void **chunks) THROWSPEC;

#if defined(__cplusplus)
} /* namespace or extern "C" */
#include <new>
#include <memory>
#ifdef HAVE_CPP0XTYPETRAITS
#include <type_traits>
#endif

// Touch into existence for future platforms
namespace std { namespace tr1 { } }

/*! \defgroup C++ C++ language support

Thanks to the generous support of Applied Research Associates (USA), nedalloc has extensive
C++ language support which uses C++ metaprogramming techniques to provide a policy driven
STL container reimplementor. The metaprogramming silently overrides or replaces the STL implementation
on your system (MSVC and GCC are the two currently supported) to \b substantially improve
the performance of STL containers by making use of nedalloc's additional features.

Sounds difficult to use? Not really. Simply do this:
\code
using namespace nedalloc;
typedef nedallocatorise<std::vector, unsigned int, 
	nedpolicy::typeIsPOD<true>::policy,
	nedpolicy::mmap<>::policy,
	nedpolicy::reserveN<26>::policy			// 1<<26 = 64Mb. 10,000,000 * sizeof(unsigned int) = 38Mb.
>::value myvectortype;
myvectortype a;
for(int n=0; n<10000000; n++)
    a.push_back(n);
\endcode

The metaprogramming requires a new C++ compiler (> year 2008), and it will readily make use
of a C++0x compiler where it will use rvalue referencing, variadic templates, type traits and more.
Visual Studio 2008 or later is sufficent, as is GCC v4.4 or later.

nedalloc's metaprogramming is designed to be extensible, so the rest of this page is intended for those
wishing to customise the metaprogramming. If you simply wish to know how to use the
nedalloc::nedallocator STL allocator or the nedalloc::nedallocatorise STL reimplementor, please refer
to test.cpp which gives several examples of usage.

<h2>Extending the metaprogramming:</h2>
A nedallocator policy looks as follows:
\code
namespace nedpolicy {
	template<size_t size, size_t alignment> struct sizedalign
	{
		template<class Base> class policy : public Base
		{
			template<class implementation> friend class nedallocatorI::baseimplementation;
		protected:
			size_t policy_alignment(size_t bytes) const
			{
				return (bytes < size) ? alignment : 0;
			}
		};
	};
}
\endcode
The policy above implements a size based alignment, so if the block being allocated is
less than \em size then it causes \em alignment to be used, otherwise it does not align.
The sizedalign struct is merely a template parameter encapsulator used to capture
additional parameters, so the real policy is in fact the class policy held within in.
If you did not need to specify any additional parameters e.g. if you were defining
policy_nedpool(), then you would directly define a policy returning your nedpool and pass
it directly to nedallocator<>.

The primary policy functions which are intended to be overridden are listed in
nedalloc::nedallocatorI::baseimplementation in nedmalloc.h and are prefixed by "policy_".
However, there is absolutely no reason why the meatier functions such as
nedalloc::nedallocatorI::baseimplementation::allocate() cannot be overriden, and indeed
some of the policies defined in nedmalloc.h do just that.

Policy composition is handled by a dedicated recursive variadic template called
nedalloc::nedallocatorI::policycompositor. If you have \em really specialised needs, you
can partially specialise this class to make it do all sorts of interesting things - hence
its separation into its own class.
*/

/*! \brief The nedalloc namespace */
namespace nedalloc {

/*! \def NEDSTATIC_ASSERT(expr, msg)
\brief Generates a static assertion if (expr)==0 at compile time.

Make SURE your message contains no spaces or anything else which would make it an invalid
variable name.
*/
#ifndef HAVE_CPP0XSTATICASSERT
template<bool> struct StaticAssert;
template<> struct StaticAssert<true>
{
	StaticAssert() { }
};
#define NEDSTATIC_ASSERT(expr, msg) \
	nedalloc::StaticAssert<(expr)!=0> ERROR_##msg
#else
#define NEDSTATIC_ASSERT(expr, msg) static_assert((expr)!=0, #msg )
#endif

/*! \brief The policy namespace in which all nedallocator policies live. */
namespace nedpolicy {
	/*! \class empty
	\ingroup C++
	\brief An empty policy which does nothing.
	*/
	template<class Base> class empty : public Base
	{
	};
}

/*! \brief The implementation namespace where the internals live. */
namespace nedallocatorI
{
	using namespace std;
	using namespace tr1;

	/* Roll on variadic templates is all I can say! */
#ifdef HAVE_CPP0XVARIADICTEMPLATES
	template<class Impl, template<class> class... policies> class policycompositor;
	template<class Impl, template<class> class A, template<class> class... policies> class policycompositor<Impl, A, policies...>
	{
		typedef policycompositor<Impl, policies...> temp;
	public:
		typedef A<typename temp::value> value;
	};
#else
	template<class Impl,
			template<class> class A=nedpolicy::empty,
			template<class> class B=nedpolicy::empty,
			template<class> class C=nedpolicy::empty,
			template<class> class D=nedpolicy::empty,
			template<class> class E=nedpolicy::empty,
			template<class> class F=nedpolicy::empty,
			template<class> class G=nedpolicy::empty,
			template<class> class H=nedpolicy::empty,
			template<class> class I=nedpolicy::empty,
			template<class> class J=nedpolicy::empty,
			template<class> class K=nedpolicy::empty,
			template<class> class L=nedpolicy::empty,
			template<class> class M=nedpolicy::empty,
			template<class> class N=nedpolicy::empty,
			template<class> class O=nedpolicy::empty
		> class policycompositor
	{
		typedef policycompositor<Impl, B, C, D, E, F, G, H, I, J, K, L, M, N, O> temp;
	public:
		typedef A<typename temp::value> value;
	};
#endif
	template<class Impl> class policycompositor<Impl>
	{
	public:
		typedef Impl value;
	};
}

template<typename T,
#ifdef HAVE_CPP0XVARIADICTEMPLATES
	template<class> class... policies
#else
	template<class> class policy1=nedpolicy::empty,
	template<class> class policy2=nedpolicy::empty,
	template<class> class policy3=nedpolicy::empty,
	template<class> class policy4=nedpolicy::empty,
	template<class> class policy5=nedpolicy::empty,
	template<class> class policy6=nedpolicy::empty,
	template<class> class policy7=nedpolicy::empty,
	template<class> class policy8=nedpolicy::empty,
	template<class> class policy9=nedpolicy::empty,
	template<class> class policy10=nedpolicy::empty,
	template<class> class policy11=nedpolicy::empty,
	template<class> class policy12=nedpolicy::empty,
	template<class> class policy13=nedpolicy::empty,
	template<class> class policy14=nedpolicy::empty,
	template<class> class policy15=nedpolicy::empty
#endif
> class nedallocator;

namespace nedallocatorI
{
	/*! \brief The base implementation class */
	template<class implementation> class baseimplementation
	{
		//NEDSTATIC_ASSERT(false, Bad_policies_specified);
	};
	/*! \brief The base implementation class */
	template<typename T,
#ifdef HAVE_CPP0XVARIADICTEMPLATES
		template<class> class... policies
#else
		template<class> class policy1,
		template<class> class policy2,
		template<class> class policy3,
		template<class> class policy4,
		template<class> class policy5,
		template<class> class policy6,
		template<class> class policy7,
		template<class> class policy8,
		template<class> class policy9,
		template<class> class policy10,
		template<class> class policy11,
		template<class> class policy12,
		template<class> class policy13,
		template<class> class policy14,
		template<class> class policy15
#endif
	> class baseimplementation<nedallocator<T,
#ifdef HAVE_CPP0XVARIADICTEMPLATES
policies...
#else
	policy1, policy2, policy3, policy4, policy5,
	policy6, policy7, policy8, policy9, policy10,
	policy11, policy12, policy13, policy14, policy15
#endif
	> >
	{
	protected:
		//! \brief The most derived nedallocator implementation type
		typedef nedallocator<T,
#ifdef HAVE_CPP0XVARIADICTEMPLATES
			policies...
#else
			policy1, policy2, policy3, policy4, policy5,
			policy6, policy7, policy8, policy9, policy10,
			policy11, policy12, policy13, policy14, policy15
#endif
		> implementationType;
		//! \brief Returns a this for the most derived nedallocator implementation type
		implementationType *_this() { return static_cast<implementationType *>(this); }
		//! \brief Returns a this for the most derived nedallocator implementation type
		const implementationType *_this() const { return static_cast<const implementationType *>(this); }
		//! \brief Specifies the nedpool to use. Defaults to zero (the system pool).
		nedpool *policy_nedpool(size_t bytes) const
		{
			return 0;
		}
		//! \brief Specifies the granularity to use. Defaults to \em bytes (no granularity).
		size_t policy_granularity(size_t bytes) const
		{
			return bytes;
		}
		//! \brief Specifies the alignment to use. Defaults to zero (no alignment).
		size_t policy_alignment(size_t bytes) const
		{
			return 0;
		}
		//! \brief Specifies the flags to use. Defaults to zero (no flags).
		unsigned policy_flags(size_t bytes) const
		{
			return 0;
		}
		//! \brief Specifies what to do when the allocation fails. Defaults to throwing std::bad_alloc.
		void policy_throwbadalloc(size_t bytes) const
		{
			throw std::bad_alloc();
		}
		//! \brief Specifies if the type is POD. Is std::is_trivially_copyable<T>::value on C++0x compilers, otherwise false.
		static const bool policy_typeIsPOD=
#ifdef HAVE_CPP0XTYPETRAITS
#if defined(__GNUC__) && (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) < 40900
			is_pod<T>::value;
#else
			is_trivially_copyable<T>::value;
#endif
#else
			false;
#endif
	public:
		typedef T *pointer;
		typedef const T *const_pointer;
		typedef T &reference;
		typedef const T &const_reference;
		typedef T value_type;
		typedef size_t size_type;
		typedef ptrdiff_t difference_type;
		T *address(T &r) const { return &r; }
		const T *address(const T &s) const { return &s; }
		size_t max_size() const { return (static_cast<size_t>(0) - static_cast<size_t>(1)) / sizeof(T); }
		bool operator!=(const baseimplementation &other) const { return !(*this == other); }
		bool operator==(const baseimplementation &other) const { return true; }

		void construct(T *const p, const T &t) const {
			void *const _p = static_cast<void *>(p);
			new (_p) T(t);
		}
		void destroy(T *const p) const {
			p->~T();
		}
		baseimplementation() { }
		baseimplementation(const baseimplementation &) { }
#ifdef HAVE_CPP0XRVALUEREFS
		baseimplementation(baseimplementation &&) { }
#endif
		template<typename U> struct rebind {
			typedef nedallocator<U,
#ifdef HAVE_CPP0XVARIADICTEMPLATES
				policies...
#else
				policy1, policy2, policy3, policy4, policy5,
				policy6, policy7, policy8, policy9, policy10,
				policy11, policy12, policy13, policy14, policy15
#endif
			> other;
		};
		template<typename U> baseimplementation(const nedallocator<U,
#ifdef HAVE_CPP0XVARIADICTEMPLATES
			policies...
#else
			policy1, policy2, policy3, policy4, policy5,
			policy6, policy7, policy8, policy9, policy10,
			policy11, policy12, policy13, policy14, policy15
#endif
		> &) { }

		T *allocate(const size_t n) const {
			// Leave these spelled out to aid debugging
			const size_t t_size = sizeof(T);
			size_t size = _this()->policy_granularity(n*t_size);
			nedpool *pool = _this()->policy_nedpool(size);
			size_t alignment = _this()->policy_alignment(size);
			unsigned flags = _this()->policy_flags(size);
			void *ptr = nedpmalloc2(pool, size, alignment, flags);
			if(!ptr)
				_this()->policy_throwbadalloc(size);
			return static_cast<T *>(ptr);
		}
		void deallocate(T *p, const size_t n) const {
			nedpfree(0/*not needed*/, p);
		}
		template<typename U> T *allocate(const size_t n, const U * /* hint */) const {
			return allocate(n);
		}
	private:
		baseimplementation &operator=(const baseimplementation &);
	};

}

namespace nedpolicy
{
	/*! \class granulate
	\ingroup C++
	\brief A policy setting the granularity of the allocated memory.

	Memory is sized according to (size+granularity-1) & ~(granularity-1).
	In other words, granularity \b must be a power of two.
	*/
	template<size_t granularity> struct granulate
	{
		template<class Base> class policy : public Base
		{
			template<class implementation> friend class nedallocatorI::baseimplementation;
		protected:
			size_t policy_granularity(size_t bytes) const
			{
				return (bytes+granularity-1) & ~(granularity-1);
			}
		};
	};
	/*! \class align
	\ingroup C++
	\brief A policy setting the alignment of the allocated memory.
	*/
	template<size_t alignment> struct align
	{
		template<class Base> class policy : public Base
		{
			template<class implementation> friend class nedallocatorI::baseimplementation;
		protected:
			size_t policy_alignment(size_t bytes) const
			{
				return alignment;
			}
		};
	};
	/*! \class zero
	\ingroup C++
	\brief A policy causing the zeroing of the allocated memory.
	*/
	template<bool dozero=true> struct zero
	{
		template<class Base> class policy : public Base
		{
			template<class implementation> friend class nedallocatorI::baseimplementation;
		protected:
			unsigned policy_flags(size_t bytes) const
			{
				return dozero ? Base::policy_flags(bytes)|M2_ZERO_MEMORY : Base::policy_flags(bytes);
			}
		};
	};
	/*! \class preventmove
	\ingroup C++
	\brief A policy preventing the moving of the allocated memory.
	*/
	template<bool doprevent=true> struct preventmove
	{
		template<class Base> class policy : public Base
		{
			template<class implementation> friend class nedallocatorI::baseimplementation;
		protected:
			unsigned policy_flags(size_t bytes) const
			{
				return doprevent ? Base::policy_flags(bytes)|M2_PREVENT_MOVE : Base::policy_flags(bytes);
			}
		};
	};
	/*! \class mmap
	\ingroup C++
	\brief A policy causing the mmapping of the allocated memory.
	*/
	template<bool dommap=true> struct mmap
	{
		template<class Base> class policy : public Base
		{
			template<class implementation> friend class nedallocatorI::baseimplementation;
		protected:
			unsigned policy_flags(size_t bytes) const
			{
				return dommap ? Base::policy_flags(bytes)|M2_ALWAYS_MMAP : Base::policy_flags(bytes);
			}
		};
	};
	/*! \class reserveX
	\ingroup C++
	\brief A policy causing the address reservation of X times the allocated memory.
	*/
	template<size_t X> struct reserveX
	{
		template<class Base> class policy : public Base
		{
			template<class implementation> friend class nedallocatorI::baseimplementation;
		protected:
			unsigned policy_flags(size_t bytes) const
			{
				return Base::policy_flags(bytes)|M2_RESERVE_MULT(X);
			}
		};
	};
	/*! \class reserveN
	\ingroup C++
	\brief A policy causing the address reservation of (1<<N) bytes of memory.
	*/
	template<size_t N> struct reserveN
	{
		template<class Base> class policy : public Base
		{
			template<class implementation> friend class nedallocatorI::baseimplementation;
		protected:
			unsigned policy_flags(size_t bytes) const
			{
				return Base::policy_flags(bytes)|M2_RESERVE_SHIFT(N);
			}
		};
	};
	/*! \class badalloc
	\ingroup C++
	\brief A policy specifying what to throw when an allocation failure occurs.

	A type specialisation exists for badalloc<void> which is equivalent to new(nothrow)
	i.e. return zero and don't throw anything.
	*/
	template<typename T> struct badalloc
	{
		template<class Base> class policy : public Base
		{
			template<class implementation> friend class nedallocatorI::baseimplementation;
		protected:
			void policy_throwbadalloc(size_t bytes) const
			{
				throw T();
			}
		};
	};
	template<> struct badalloc<void>
	{
		template<class Base> class policy : public Base
		{
			template<class implementation> friend class nedallocatorI::baseimplementation;
		protected:
			void policy_throwbadalloc(size_t bytes) const
			{
			}
		};
	};
	/*! \class typeIsPOD
	\ingroup C++
	\brief A policy forcing the treatment of the type as Plain Old Data (POD)

	On C++0x compilers, the &lt;type_traits&gt; is_trivially_copyable<type>::value is used by default.
	When treated as POD, memcpy() is used instead
	of copy construction and realloc() is permitted to move the memory contents when
	resizing.
	*/
	template<bool ispod> struct typeIsPOD
	{
		template<class Base> class policy : public Base
		{
			template<class implementation> friend class nedallocatorI::baseimplementation;
		protected:
			static const bool policy_typeIsPOD=ispod;
		};
	};
}

/*! \class nedallocator
\ingroup C++
\brief A policy driven STL allocator which uses nedmalloc

One of the lesser known features of STL container classes is their ability to take
an allocator implementation class, so where you had std::vector<Foo> you can now
have std::vector<Foo, nedalloc::nedallocator< std::vector<Foo> > such that
std::vector<> will now use nedalloc as the policy specifies.

You <b>almost certainly</b> don't want to use this directly except in the naive
case. See nedalloc::nedallocatorise to see what I mean.
*/
template<typename T,
#ifdef HAVE_CPP0XVARIADICTEMPLATES
	template<class> class... policies
#else
	template<class> class policy1,
	template<class> class policy2,
	template<class> class policy3,
	template<class> class policy4,
	template<class> class policy5,
	template<class> class policy6,
	template<class> class policy7,
	template<class> class policy8,
	template<class> class policy9,
	template<class> class policy10,
	template<class> class policy11,
	template<class> class policy12,
	template<class> class policy13,
	template<class> class policy14,
	template<class> class policy15
#endif
> class nedallocator : public nedallocatorI::policycompositor<
#ifdef HAVE_CPP0XVARIADICTEMPLATES
	nedallocatorI::baseimplementation<nedallocator<T, policies...> >,
	policies...
#else
	nedallocatorI::baseimplementation<nedallocator<T,
	policy1, policy2, policy3, policy4, policy5,
	policy6, policy7, policy8, policy9, policy10,
	policy11, policy12, policy13, policy14, policy15
	> >,
	policy1, policy2, policy3, policy4, policy5,
	policy6, policy7, policy8, policy9, policy10,
	policy11, policy12, policy13, policy14, policy15
#endif
>::value
{
	typedef typename nedallocatorI::policycompositor<
#ifdef HAVE_CPP0XVARIADICTEMPLATES
		nedallocatorI::baseimplementation<nedallocator<T, policies...> >,
		policies...
#else
		nedallocatorI::baseimplementation<nedallocator<T,
		policy1, policy2, policy3, policy4, policy5,
		policy6, policy7, policy8, policy9, policy10,
		policy11, policy12, policy13, policy14, policy15
		> >,
		policy1, policy2, policy3, policy4, policy5,
		policy6, policy7, policy8, policy9, policy10,
		policy11, policy12, policy13, policy14, policy15
#endif
	>::value Base;
public:
	nedallocator() { }
	nedallocator(const nedallocator &o) : Base(o) { }
#ifdef HAVE_CPP0XRVALUEREFS
	nedallocator(nedallocator &&o) : Base(std::move(o)) { }
#endif
	/* This templated constructor and rebind() are used by MSVC's secure iterator checker.
	I think it's best to not copy state even though it may break policies which store data. */
	template<typename U> nedallocator(const nedallocator<U,
#ifdef HAVE_CPP0XVARIADICTEMPLATES
		policies...
#else
		policy1, policy2, policy3, policy4, policy5,
		policy6, policy7, policy8, policy9, policy10,
		policy11, policy12, policy13, policy14, policy15
#endif
	> &o) { }
#ifdef HAVE_CPP0XRVALUEREFS
	template<typename U> nedallocator(nedallocator<U,
#ifdef HAVE_CPP0XVARIADICTEMPLATES
		policies...
#else
		policy1, policy2, policy3, policy4, policy5,
		policy6, policy7, policy8, policy9, policy10,
		policy11, policy12, policy13, policy14, policy15
#endif
	> &&o) { }
#endif

	template<typename U> struct rebind {
		typedef nedallocator<U,
#ifdef HAVE_CPP0XVARIADICTEMPLATES
			policies...
#else
			policy1, policy2, policy3, policy4, policy5,
			policy6, policy7, policy8, policy9, policy10,
			policy11, policy12, policy13, policy14, policy15
#endif
		> other;
	};
};

namespace nedallocatorI {
	// Holds a static allocator instance shared by anything allocating from allocator
	template<class allocator> struct StaticAllocator
	{
		static allocator &get()
		{
			static allocator a;
			return a;
		}
	};
	// RAII holder for a Newed object
	template<typename T, class allocator> struct PtrHolder
	{
		T *mem;
		PtrHolder(T *_mem) : mem(_mem) { }
		~PtrHolder()
		{
			if(mem)
			{
				allocator &a=nedallocatorI::StaticAllocator<allocator>::get();
				a.deallocate(mem, sizeof(T));
				mem=0;
			}
		}
		T *release() { T *ret=mem; mem=0; return ret; }
		T *operator *() { return mem; }
		const T *operator *() const { return mem; }
	};
}
/*! \brief Allocates the memory for an instance of object \em T and constructs it.

If an exception is thrown during construction, the memory is freed before
rethrowing the exception.

Usage is very simple:
\code
	SSEVectorType *foo1=New<SSEVectorType>(4, 5, 6, 7);
\endcode
*/
#ifdef HAVE_CPP0XVARIADICTEMPLATES
template<typename T, class allocator=nedallocator<T>, typename... Parameters> inline T *New(const Parameters&... parameters)
#else
template<typename T, class allocator> inline T *New()
#endif
{
	allocator &a=nedallocatorI::StaticAllocator<allocator>::get();
	nedallocatorI::PtrHolder<T, allocator> ret(a.allocate(sizeof(T)));
	if(*ret)
	{
#ifdef HAVE_CPP0XVARIADICTEMPLATES
		new((void *) *ret) T(parameters...);
#else
		new((void *) *ret) T;
#endif
	}
	return ret.release();
}
#ifndef HAVE_CPP0XVARIADICTEMPLATES
// Extremely annoying not to have default template arguments for functions pre-C++0x
template<typename T> inline T *New()
{
	return New<T, nedallocator<T> >();
}
// Also, it's painful to replicate function overloads :(
#define NEDMALLOC_NEWIMPL \
template<typename T, class allocator, NEDMALLOC_NEWIMPLTYPES> inline T *New(NEDMALLOC_NEWIMPLPARSDEFS) \
{ \
	allocator &a=nedallocatorI::StaticAllocator<allocator>::get(); \
	nedallocatorI::PtrHolder<T, allocator> ret(a.allocate(sizeof(T))); \
	if(*ret) \
	{ \
		new((void *) *ret) T(NEDMALLOC_NEWIMPLPARS); \
	} \
	return ret.release(); \
} \
template<typename T, NEDMALLOC_NEWIMPLTYPES> inline T *New(NEDMALLOC_NEWIMPLPARSDEFS)\
{ \
	return New<T, nedallocator<T> >(NEDMALLOC_NEWIMPLPARS); \
}
#define NEDMALLOC_NEWIMPLTYPES typename P1
#define NEDMALLOC_NEWIMPLPARSDEFS const P1 &p1
#define NEDMALLOC_NEWIMPLPARS p1
NEDMALLOC_NEWIMPL
#undef NEDMALLOC_NEWIMPLTYPES
#undef NEDMALLOC_NEWIMPLPARSDEFS
#undef NEDMALLOC_NEWIMPLPARS

#define NEDMALLOC_NEWIMPLTYPES typename P1, typename P2
#define NEDMALLOC_NEWIMPLPARSDEFS const P1 &p1, const P2 &p2
#define NEDMALLOC_NEWIMPLPARS p1, p2
NEDMALLOC_NEWIMPL
#undef NEDMALLOC_NEWIMPLTYPES
#undef NEDMALLOC_NEWIMPLPARSDEFS
#undef NEDMALLOC_NEWIMPLPARS

#define NEDMALLOC_NEWIMPLTYPES typename P1, typename P2, typename P3
#define NEDMALLOC_NEWIMPLPARSDEFS const P1 &p1, const P2 &p2, const P3 &p3
#define NEDMALLOC_NEWIMPLPARS p1, p2, p3
NEDMALLOC_NEWIMPL
#undef NEDMALLOC_NEWIMPLTYPES
#undef NEDMALLOC_NEWIMPLPARSDEFS
#undef NEDMALLOC_NEWIMPLPARS

#define NEDMALLOC_NEWIMPLTYPES typename P1, typename P2, typename P3, typename P4
#define NEDMALLOC_NEWIMPLPARSDEFS const P1 &p1, const P2 &p2, const P3 &p3, const P4 &p4
#define NEDMALLOC_NEWIMPLPARS p1, p2, p3, p4
NEDMALLOC_NEWIMPL
#undef NEDMALLOC_NEWIMPLTYPES
#undef NEDMALLOC_NEWIMPLPARSDEFS
#undef NEDMALLOC_NEWIMPLPARS

#define NEDMALLOC_NEWIMPLTYPES typename P1, typename P2, typename P3, typename P4, typename P5
#define NEDMALLOC_NEWIMPLPARSDEFS const P1 &p1, const P2 &p2, const P3 &p3, const P4 &p4, const P5 &p5
#define NEDMALLOC_NEWIMPLPARS p1, p2, p3, p4, p5
NEDMALLOC_NEWIMPL
#undef NEDMALLOC_NEWIMPLTYPES
#undef NEDMALLOC_NEWIMPLPARSDEFS
#undef NEDMALLOC_NEWIMPLPARS

#undef NEDMALLOC_NEWIMPL
#endif

/*! \brief Destructs an instance of object T, and releases the memory used to store it.
*/
template<class allocator, typename T> inline void Delete(const T *_obj)
{
	T *obj=const_cast<T *>(_obj);
	allocator &a=nedallocatorI::StaticAllocator<allocator>::get();
	obj->~T();
	a.deallocate(obj, sizeof(T));
}
template<typename T> inline void Delete(const T *obj) { Delete<nedallocator<T> >(obj); }

/*! \class nedallocatorise
\ingroup C++
\brief Reimplements a given STL container to make full and efficient usage of nedalloc
\param stlcontainer The STL container you wish to reimplement
\param T The type to be contained
\param policies... Any policies you want applied to the allocator


This is a clever bit of C++ metaprogramming if I do say so myself! What it does
is to specialise a STL container implementation to make full use of nedalloc's
advanced facilities, so for example if you do:
\code
using namespace nedalloc;
typedef nedallocatorise<std::vector, unsigned int, 
	nedpolicy::typeIsPOD<true>::policy,
	nedpolicy::mmap<>::policy,
	nedpolicy::reserveN<26>::policy			// 1<<26 = 64Mb. 10,000,000 * sizeof(unsigned int) = 38Mb.
>::value myvectortype;
myvectortype a;
for(int n=0; n<10000000; n++)
    a.push_back(n);
\endcode
What happens here is that nedallocatorise reimplements the parts of
std::vector which extend and shrink the actual memory allocation.
Because the typeIsPOD policy is specified, it means that realloc()
rather than realloc(M2_PREVENT_MOVE) can be used. Also, because the
mmap and the reserveN policies are specified, std::vector immediately
reserves 64Mb of address space and forces the immediate use of mmap().
This allows you to push_back() a lot of data very, very quickly indeed.
You will also find that pop_back() actually reduces the allocation now
(most implementations don't bother ever releasing memory except when
reaching empty or when resize() is called). When mmapped, reserve()
is automatically held at a minimum of &lt;page size&gt;/sizeof(type) though
larger values are respected.

test.cpp has a benchmark of the speed differences you may realise, plus
an example of usage.
*/
template<template<typename, class> class stlcontainer,
	typename T,
#ifdef HAVE_CPP0XVARIADICTEMPLATES
	template<class> class... policies
#else
	template<class> class policy1=nedpolicy::empty,
	template<class> class policy2=nedpolicy::empty,
	template<class> class policy3=nedpolicy::empty,
	template<class> class policy4=nedpolicy::empty,
	template<class> class policy5=nedpolicy::empty,
	template<class> class policy6=nedpolicy::empty,
	template<class> class policy7=nedpolicy::empty,
	template<class> class policy8=nedpolicy::empty,
	template<class> class policy9=nedpolicy::empty,
	template<class> class policy10=nedpolicy::empty,
	template<class> class policy11=nedpolicy::empty,
	template<class> class policy12=nedpolicy::empty,
	template<class> class policy13=nedpolicy::empty,
	template<class> class policy14=nedpolicy::empty,
	template<class> class policy15=nedpolicy::empty
#endif
> class nedallocatorise
{
public:
	//! The reimplemented STL container type
	typedef stlcontainer<T, nedallocator<T,
#ifdef HAVE_CPP0XVARIADICTEMPLATES
		policies...
#else
		policy1, policy2, policy3, policy4, policy5,
		policy6, policy7, policy8, policy9, policy10,
		policy11, policy12, policy13, policy14, policy15
#endif
		> > value;
};

} /* namespace */
#endif

/* Some miscellaneous dlmalloc option documentation */

#ifdef DOXYGEN_IS_PARSING_ME
/* Just some false defines to keep doxygen happy */

#define NEDMALLOC_DEBUG DEBUG
#define ENABLE_LARGE_PAGES undef
#define ENABLE_FAST_HEAP_DETECTION undef
#define REPLACE_SYSTEM_ALLOCATOR undef
#define ENABLE_TOLERANT_NEDMALLOC undef
#define NO_NED_NAMESPACE undef

/*! \def MALLOC_ALIGNMENT
\brief Defines what alignment normally returned blocks should use. Is 16 bytes on Mac OS X, otherwise 8 bytes. */
#define MALLOC_ALIGNMENT 8

/*! \def USE_LOCKS
\brief Defines the threadsafety of nedalloc

USE_LOCKS can be 2 if you want to define your own MLOCK_T, INITIAL_LOCK,
ACQUIRE_LOCK, RELEASE_LOCK, TRY_LOCK, IS_LOCKED and NULL_LOCK_INITIALIZER.
*/
#define USE_LOCKS 1

/*! \def DEFAULT_GRANULARITY
\brief Defines the granularity in which to request or free system memory.
*/
#define DEFAULT_GRANULARITY (2*1024*1024)

/*! \def DEFAULT_TRIM_THRESHOLD
\brief Defines how much memory must be free before returning it to the system.
*/
#define DEFAULT_TRIM_THRESHOLD (2*1024*1024)

/*! \def DEFAULT_MMAP_THRESHOLD
\brief Defines the threshold above which mmap() is used to perform direct allocation.
*/
#define DEFAULT_MMAP_THRESHOLD (256*1024)

/*! \def MAX_RELEASE_CHECK_RATE
\brief Defines how many free() ops should occur before checking how much free memory there is.
*/
#define MAX_RELEASE_CHECK_RATE 4095

/*! \def NEDMALLOC_FORCERESERVE
\brief Lets you force address space reservation in the \b standard malloc API

Note that by default realloc() sets M2_RESERVE_MULT(8) when thunking to realloc2(),
so you probably don't need to override this
*/
#define NEDMALLOC_FORCERESERVE(p, mem, size) 0

/*! \def NEDMALLOC_TESTLOGENTRY
\brief Used to determine whether a given memory operation should be logged.
*/
#define NEDMALLOC_TESTLOGENTRY(tc, np, type, mspace, size, mem, alignment, flags, returned) ((type)&ENABLE_LOGGING)

/*! \def NEDMALLOC_STACKBACKTRACEDEPTH
\brief Turns on stack backtracing in the logger.

You almost certainly want to constrain what gets logged using NEDMALLOC_TESTLOGENTRY
if you turn this on as the sheer volume of data output can make execution very slow.
*/
#define NEDMALLOC_STACKBACKTRACEDEPTH 0

#endif

#endif