forked from ned14/nedmalloc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
nedmalloc.h
1620 lines (1450 loc) · 59.8 KB
/
nedmalloc.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* nedalloc, an alternative malloc implementation for multiple threads without
lock contention based on dlmalloc v2.8.4. (C) 2005-2010 Niall Douglas
Boost Software License - Version 1.0 - August 17th, 2003
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
#ifndef NEDMALLOC_H
#define NEDMALLOC_H
/*! \file nedmalloc.h
\brief Defines the functionality provided by nedalloc.
*/
/*! \mainpage
<a href="../../Readme.html">Please see the Readme.html</a>
*/
/*! \def NEDMALLOC_DEBUG
\brief Defines the assertion checking performed by nedalloc
NEDMALLOC_DEBUG can be defined to cause DEBUG to be set differently for nedmalloc
than for the rest of the build. Remember to set NDEBUG to disable all assertion
checking too.
*/
/*! \def ENABLE_LARGE_PAGES
\brief Defines whether nedalloc uses large pages (>=2Mb)
ENABLE_LARGE_PAGES enables support for requesting memory from the system in large
(typically >=2Mb) pages if the host OS supports this. These occupy just a single
TLB entry and can significantly improve performance in large working set applications.
*/
/*! \def ENABLE_FAST_HEAP_DETECTION
\brief Defines whether nedalloc takes platform specific shortcuts when detecting foreign blocks.
ENABLE_FAST_HEAP_DETECTION enables special logic to detect blocks allocated
by the system heap. This avoids 1.5%-2% overhead when checking for non-nedmalloc
blocks, but it assumes that the NT and glibc heaps function in a very specific
fashion which may not hold true across OS upgrades.
*/
/*! \def HAVE_CPP0XRVALUEREFS
\ingroup C++
\brief Enables rvalue references
Define to enable the usage of rvalue references which enables move semantics and
other things. Automatically defined if __cplusplus indicates a C++0x compiler,
otherwise you'll need to set it yourself.
*/
/*! \def HAVE_CPP0XVARIADICTEMPLATES
\ingroup C++
\brief Enables variadic templates
Define to enable the usage of variadic templates which enables the use of arbitrary
numbers of policies and other useful things. Automatically defined if __cplusplus
indicates a C++0x compiler, otherwise you'll need to set it yourself.
*/
/*! \def HAVE_CPP0XSTATICASSERT
\ingroup C++
\brief Enables static assertions
Define to enable the usage of static assertions. Automatically defined if __cplusplus
indicates a C++0x compiler, otherwise you'll need to set it yourself.
*/
/*! \def HAVE_CPP0XTYPETRAITS
\ingroup C++
\brief Enables type traits
Define to enable the usage of <type_traits>. Automatically defined if __cplusplus
indicates a C++0x compiler, otherwise you'll need to set it yourself.
*/
#if __cplusplus > 199711L || defined(HAVE_CPP0X) /* Do we have C++0x? */
#undef HAVE_CPP0XRVALUEREFS
#define HAVE_CPP0XRVALUEREFS 1
#undef HAVE_CPP0XVARIADICTEMPLATES
#define HAVE_CPP0XVARIADICTEMPLATES 1
#undef HAVE_CPP0XSTATICASSERT
#define HAVE_CPP0XSTATICASSERT 1
#undef HAVE_CPP0XTYPETRAITS
#define HAVE_CPP0XTYPETRAITS 1
#endif
#include <stddef.h> /* for size_t */
/*! \def NEDMALLOCEXTSPEC
\brief Defines how nedalloc's API is to be made visible.
NEDMALLOCEXTSPEC can be defined to be __declspec(dllexport) or
__attribute__ ((visibility("default"))) or whatever you like. It defaults
to extern unless NEDMALLOC_DLL_EXPORTS is set as it would be when building
nedmalloc.dll.
*/
#ifndef NEDMALLOCEXTSPEC
#ifdef NEDMALLOC_DLL_EXPORTS
#ifdef WIN32
#define NEDMALLOCEXTSPEC extern __declspec(dllexport)
#elif defined(__GNUC__)
#define NEDMALLOCEXTSPEC extern __attribute__ ((visibility("default")))
#endif
#ifndef ENABLE_TOLERANT_NEDMALLOC
#define ENABLE_TOLERANT_NEDMALLOC 1
#endif
#else
#define NEDMALLOCEXTSPEC extern
#endif
#endif
/*! \def NEDMALLOCDEPRECATED
\brief Defined to mark an API as deprecated */
#ifndef NEDMALLOCDEPRECATED
#if defined(_MSC_VER) && !defined(__GCCXML__)
#define NEDMALLOCDEPRECATED __declspec(deprecated)
#elif defined(__GNUC__) && !defined(__GCCXML__)
#define NEDMALLOCDEPRECATED __attribute ((deprecated))
#else
//! Marks a function as being deprecated
#define NEDMALLOCDEPRECATED
#endif
#endif
/*! \def RESTRICT
\brief Defined to the restrict keyword or equivalent if available */
#ifndef RESTRICT
#if __STDC_VERSION__ >= 199901L /* C99 or better */
#define RESTRICT restrict
#else
#if defined(_MSC_VER) && _MSC_VER>=1400
#define RESTRICT __restrict
#endif
#ifdef __GNUC__
#define RESTRICT __restrict
#endif
#endif
#ifndef RESTRICT
#define RESTRICT
#endif
#endif
#if defined(_MSC_VER) && _MSC_VER>=1400
#define NEDMALLOCPTRATTR __declspec(restrict)
#define NEDMALLOCNOALIASATTR __declspec(noalias)
#endif
#ifdef __GNUC__
#define NEDMALLOCPTRATTR __attribute__ ((malloc))
#endif
/*! \def NEDMALLOCPTRATTR
\brief Defined to the specifier for a pointer which points to a memory block. Like NEDMALLOCNOALIASATTR, but sadly not identical. */
#ifndef NEDMALLOCPTRATTR
#define NEDMALLOCPTRATTR
#endif
/*! \def NEDMALLOCNOALIASATTR
\brief Defined to the specifier for a pointer which does not alias any other variable. */
#ifndef NEDMALLOCNOALIASATTR
#define NEDMALLOCNOALIASATTR
#endif
/*! \def USE_MAGIC_HEADERS
\brief Defines whether nedalloc should use magic headers in foreign heap block detection
USE_MAGIC_HEADERS causes nedalloc to allocate an extra three sizeof(size_t)
to each block. nedpfree() and nedprealloc() can then automagically know when
to free a system allocated block. Enabling this typically adds 20-50% to
application memory usage, and is mandatory if USE_ALLOCATOR is not 1.
*/
#ifndef USE_MAGIC_HEADERS
#define USE_MAGIC_HEADERS 0
#endif
/*! \def USE_ALLOCATOR
\brief Defines the underlying allocator to use
USE_ALLOCATOR can be one of these settings (it defaults to 1):
0: System allocator (nedmalloc now simply acts as a threadcache) which is
very useful for testing with valgrind and Glowcode.
WARNING: Intended for DEBUG USE ONLY - not all functions work correctly.
1: dlmalloc
*/
#ifndef USE_ALLOCATOR
#define USE_ALLOCATOR 1 /* dlmalloc */
#endif
#if !USE_ALLOCATOR && !USE_MAGIC_HEADERS
#error If you are using the system allocator then you MUST use magic headers
#endif
/*! \def REPLACE_SYSTEM_ALLOCATOR
\brief Defines whether to replace the system allocator (malloc(), free() et al) with nedalloc's implementation.
REPLACE_SYSTEM_ALLOCATOR on POSIX causes nedalloc's functions to be called
malloc, free etc. instead of nedmalloc, nedfree etc. You may or may not want
this. On Windows it causes nedmalloc to patch all loaded DLLs and binaries
to replace usage of the system allocator.
Always turns on ENABLE_TOLERANT_NEDMALLOC.
*/
#ifdef REPLACE_SYSTEM_ALLOCATOR
#if USE_ALLOCATOR==0
#error Cannot combine using the system allocator with replacing the system allocator
#endif
#ifndef ENABLE_TOLERANT_NEDMALLOC
#define ENABLE_TOLERANT_NEDMALLOC 1
#endif
#ifndef WIN32 /* We have a dedicated patcher for Windows */
#define nedmalloc malloc
#define nedmalloc2 malloc2
#define nedcalloc calloc
#define nedrealloc realloc
#define nedrealloc2 realloc2
#define nedfree free
#define nedfree2 free2
#define nedmemalign memalign
#define nedmallinfo mallinfo
#define nedmallopt mallopt
#define nedmalloc_trim malloc_trim
#define nedmalloc_stats malloc_stats
#define nedmalloc_footprint malloc_footprint
#define nedindependent_calloc independent_calloc
#define nedindependent_comalloc independent_comalloc
#ifdef __GNUC__
#define nedmemsize malloc_usable_size
#endif
#endif
#endif
/*! \def ENABLE_TOLERANT_NEDMALLOC
\brief Defines whether nedalloc should check for blocks from the system allocator.
ENABLE_TOLERANT_NEDMALLOC is automatically turned on if REPLACE_SYSTEM_ALLOCATOR
is set or the Windows DLL is being built. This causes nedmalloc to detect when a
system allocator block is passed to it and to handle it appropriately. Note that
without USE_MAGIC_HEADERS there is a very tiny chance that nedmalloc will segfault
on non-Windows builds (it uses Win32 SEH to trap segfaults on Windows and there
is no comparable system on POSIX).
*/
#if defined(__cplusplus)
extern "C" {
#endif
/*! \brief Returns information about a memory pool */
struct nedmallinfo {
size_t arena; /*!< non-mmapped space allocated from system */
size_t ordblks; /*!< number of free chunks */
size_t smblks; /*!< always 0 */
size_t hblks; /*!< always 0 */
size_t hblkhd; /*!< space in mmapped regions */
size_t usmblks; /*!< maximum total allocated space */
size_t fsmblks; /*!< always 0 */
size_t uordblks; /*!< total allocated space */
size_t fordblks; /*!< total free space */
size_t keepcost; /*!< releasable (via malloc_trim) space */
};
#if defined(__cplusplus)
}
#endif
/*! \def NO_NED_NAMESPACE
\brief Defines the use of the nedalloc namespace for the C functions.
NO_NED_NAMESPACE prevents the functions from being defined in the nedalloc
namespace when in C++ (uses the global C namespace instead).
*/
/*! \def THROWSPEC
\brief Defined to throw() or noexcept(true) (as in, throws nothing) under C++, otherwise nothing.
*/
#if defined(__cplusplus)
#if !defined(NO_NED_NAMESPACE)
namespace nedalloc {
#else
extern "C" {
#endif
#if __cplusplus > 199711L
#define THROWSPEC noexcept(true)
#else
#define THROWSPEC throw()
#endif
#else
#define THROWSPEC
#endif
/* These are the global functions */
/*! \defgroup v2malloc The v2 malloc API
\warning This API is being completely retired in v1.10 beta 2 and replaced with the API
being developed for inclusion into the C1X programming language standard
For the v1.10 release which was generously sponsored by
<a href="http://www.ara.com/" target="_blank">Applied Research Associates (USA)</a>,
a new general purpose allocator API was designed which is intended to remedy many
of the long standing problems and inefficiencies introduced by the ISO C allocator
API. Internally nedalloc's implementations of nedmalloc(), nedcalloc(), nedmemalign()
and nedrealloc() call into this API:
<ul>
<li><code>void* malloc2(size_t bytes, size_t alignment, unsigned flags)</code></li>
<li><code>void* realloc2(void* mem, size_t bytes, size_t alignment, unsigned
flags)</code></li>
<li><code>void free2(void* mem, unsigned flags)</code></li>
</ul>
If nedmalloc.h is being included by C++ code, the alignment and flags parameters
default to zero which makes the new API identical to the old API (roll on the introduction
of default parameters to C!). The ability for realloc2() to take an alignment is
<em>particularly</em> useful for extending aligned vector arrays such as SSE/AVX
vector arrays. Hitherto SSE/AVX vector code had to jump through all sorts of unpleasant
hoops to maintain alignment :(.
Note that using any of these flags other than M2_ZERO_MEMORY or any alignment
other than zero inhibits the threadcache.
Currently MREMAP support is limited to Linux and Windows. Patches implementing
support for other platforms are welcome.
On Linux the non portable mremap() kernel function is currently used, so in fact
the M2_RESERVE_* options are currently ignored.
On Windows, there are two different MREMAP implementations which are chosen according
to whether a 32 bit or a 64 bit build is being performed. The 32 bit implementation
is based on Win32 file mappings where it reserves the address space within the Windows
VM system, so you can safely specify silly reservation quantities like 2Gb per block
and not exhaust local process address space. Note however that on x86 this costs
2Kb (1Kb if PAE is off) of kernel memory per Mb reserved, and as kernel memory has
a hard limit of 447Mb on x86 you will find the total address space reservable in
the system is limited. On x64, or if you define WIN32_DIRECT_USE_FILE_MAPPINGS=0
on x86, a much faster implementation of using VirtualAlloc(MEM_RESERVE) to directly
reserve the address space is used.
When using M2_RESERVE_* with realloc2(), the setting only takes effect when the
mmapped chunk has exceeded its reservation space and a new reservation space needs
to be created.
*/
#ifndef M2_FLAGS_DEFINED
#define M2_FLAGS_DEFINED
/*! \def M2_ZERO_MEMORY
\ingroup v2malloc
\brief Sets the contents of the allocated block (or any increase in the allocated
block) to zero.
Note that this zeroes only the increase from what dlmalloc thinks
the chunk's size is, so if you realloc2() a block which wasn't allocated using
malloc2() using this flag then you may have garbage just before the newly extended
space.
\li <strong>Rationale:</strong> Memory returned by the system is guaranteed to
be zero on most platforms, and hence dlmalloc knows when it can skip zeroing
memory. This improves performance.
*/
#define M2_ZERO_MEMORY (1<<0)
/*! \def M2_PREVENT_MOVE
\ingroup v2malloc
\brief Cause realloc2() to attempt to extend a block in place, but to never move
it.
\li <strong>Rationale:</strong> C++ makes almost no use of realloc(), even for
contiguous arrays such as std::vector<> because most C++ objects cannot be relocated
in memory without a copy or rvalue construction (though some clever STL implementations
specialise for Plain Old Data (POD) types, and use realloc() then and only then).
This flag allows C++ containers to speculatively try to extend in place, thus
improving performance <em>especially</em> for large allocations which will use
mmap().
*/
#define M2_PREVENT_MOVE (1<<1)
/*! \def M2_ALWAYS_MMAP
\ingroup v2malloc
\brief Always allocate as though mmap_threshold were being exceeded.
In the case of realloc2(), note that setting this bit will not necessarily mmap a chunk
which isn't already mmapped, but it will force a mmapped chunk if new memory
needs allocating.
\li <strong>Rationale:</strong> If you know that an array you are allocating
is going to be repeatedly extended up into the hundred of kilobytes range, then
you can avoid the constant memory copying into larger blocks by specifying this
flag at the beginning along with one of the M2_RESERVE_* flags below. This can
<strong>greatly</strong> improve performance for large arrays.
*/
#define M2_ALWAYS_MMAP (1<<2)
#define M2_RESERVED1 (1<<3)
#define M2_RESERVED2 (1<<4)
#define M2_RESERVED3 (1<<5)
#define M2_RESERVED4 (1<<6)
#define M2_RESERVED5 (1<<7)
#define M2_RESERVE_ISMULTIPLIER (1<<15)
/* 7 bits is given to the address reservation specifier.
This lets you set a multiplier (bit 15 set) or a 1<< shift value.
*/
#define M2_RESERVE_MASK 0x00007f00
/*! \def M2_RESERVE_MULT(n)
\ingroup v2malloc
\brief Reserve n times as much address space such that mmapped realloc2(size <=
n * original size) avoids memory copying and hence is much faster.
*/
#define M2_RESERVE_MULT(n) (M2_RESERVE_ISMULTIPLIER|(((n)<<8)&M2_RESERVE_MASK))
/*! \def M2_RESERVE_SHIFT(n)
\ingroup v2malloc
\brief Reserve (1<<n) bytes of address space such that mmapped realloc2(size <=
(1<<n)) avoids memory copying and hence is much faster.
*/
#define M2_RESERVE_SHIFT(n) (((n)<<8)&M2_RESERVE_MASK)
#define M2_FLAGS_MASK 0x0000ffff
#define M2_CUSTOM_FLAGS_BEGIN (1<<16)
#define M2_CUSTOM_FLAGS_MASK 0xffff0000
/*! \def NM_SKIP_TOLERANCE_CHECKS
\ingroup v2malloc
\brief Causes nedmalloc to not inspect the block being passed to see if it belongs
to the system allocator. Can improve speed by up to 10%.
*/
#define NM_SKIP_TOLERANCE_CHECKS (1<<31)
#endif /* M2_FLAGS_DEFINED */
#if defined(__cplusplus)
/*! \brief Gets the usable size of an allocated block.
Note this will always be bigger than what was
asked for due to rounding etc. Optionally returns 1 in isforeign if the block came from the
system allocator - note that there is a small (>0.01%) but real chance of segfault on non-Windows
systems when passing non-nedmalloc blocks if you don't use USE_MAGIC_HEADERS.
*/
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR size_t nedblksize(int *RESTRICT isforeign, void *RESTRICT mem, unsigned flags=0) THROWSPEC;
#else
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR size_t nedblksize(int *RESTRICT isforeign, void *RESTRICT mem, unsigned flags) THROWSPEC;
#endif
/*! \brief Identical to nedblksize() except without the isforeign */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR size_t nedmemsize(void *RESTRICT mem) THROWSPEC;
/*! \brief Equivalent to nedpsetvalue((nedpool *) 0, v) */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void nedsetvalue(void *v) THROWSPEC;
/*! \brief Equivalent to nedpmalloc2((nedpool *) 0, size, 0, 0) */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmalloc(size_t size) THROWSPEC;
/*! \brief Equivalent to nedpmalloc2((nedpool *) 0, no*size, 0, M2_ZERO_MEMORY) */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedcalloc(size_t no, size_t size) THROWSPEC;
/*! \brief Equivalent to nedprealloc2((nedpool *) 0, size, mem, size, 0, M2_RESERVE_MULT(8)) */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedrealloc(void *mem, size_t size) THROWSPEC;
/*! \brief Equivalent to nedpfree2((nedpool *) 0, mem, 0) */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void nedfree(void *mem) THROWSPEC;
/*! \brief Equivalent to nedpmalloc2((nedpool *) 0, size, alignment, 0) */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmemalign(size_t alignment, size_t bytes) THROWSPEC;
#if defined(__cplusplus)
/*! \ingroup v2malloc
\brief Equivalent to nedpmalloc2((nedpool *) 0, size, alignment, flags) */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmalloc2(size_t size, size_t alignment=0, unsigned flags=0) THROWSPEC;
/*! \ingroup v2malloc
\brief Equivalent to nedprealloc2((nedpool *) 0, mem, size, alignment, flags) */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedrealloc2(void *mem, size_t size, size_t alignment=0, unsigned flags=0) THROWSPEC;
/*! \ingroup v2malloc
\brief Equivalent to nedpfree2((nedpool *) 0, mem, flags) */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void nedfree2(void *mem, unsigned flags=0) THROWSPEC;
#else
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmalloc2(size_t size, size_t alignment, unsigned flags) THROWSPEC;
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedrealloc2(void *mem, size_t size, size_t alignment, unsigned flags) THROWSPEC;
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void nedfree2(void *mem, unsigned flags) THROWSPEC;
#endif
/*! \brief Equivalent to nedpmallinfo((nedpool *) 0) */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR struct nedmallinfo nedmallinfo(void) THROWSPEC;
/*! \brief Equivalent to nedpmallopt((nedpool *) 0, parno, value) */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR int nedmallopt(int parno, int value) THROWSPEC;
/*! \brief Returns the internal allocation granularity and the magic header XOR used for internal consistency checks. */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void* nedmalloc_internals(size_t *granularity, size_t *magic) THROWSPEC;
/*! \brief Equivalent to nedpmalloc_trim((nedpool *) 0, pad) */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR int nedmalloc_trim(size_t pad) THROWSPEC;
/*! \brief Equivalent to nedpmalloc_stats((nedpool *) 0) */
NEDMALLOCEXTSPEC void nedmalloc_stats(void) THROWSPEC;
/*! \brief Equivalent to nedpmalloc_footprint((nedpool *) 0) */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR size_t nedmalloc_footprint(void) THROWSPEC;
/*! \brief Equivalent to nedpindependent_calloc((nedpool *) 0, elemsno, elemsize, chunks) */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedindependent_calloc(size_t elemsno, size_t elemsize, void **chunks) THROWSPEC;
/*! \brief Equivalent to nedpindependent_comalloc((nedpool *) 0, elems, sizes, chunks) */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedindependent_comalloc(size_t elems, size_t *sizes, void **chunks) THROWSPEC;
/*! \brief Destroys the system memory pool used by the functions above.
Useful for when you have nedmalloc in a DLL you're about to unload.
If you call ANY nedmalloc functions after calling this you will
get a fatal exception!
*/
NEDMALLOCEXTSPEC void neddestroysyspool() THROWSPEC;
/*! \brief A nedpool type */
struct nedpool_t;
/*! \brief A nedpool type */
typedef struct nedpool_t nedpool;
/*! \brief Creates a memory pool for use with the nedp* functions below.
Capacity is how much to allocate immediately (if you know you'll be allocating a lot
of memory very soon) which you can leave at zero. Threads specifies how many threads
will *normally* be accessing the pool concurrently. Setting this to zero means it
extends on demand, but be careful of this as it can rapidly consume system resources
where bursts of concurrent threads use a pool at once.
*/
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR nedpool *nedcreatepool(size_t capacity, int threads) THROWSPEC;
/*! \brief Destroys a memory pool previously created by nedcreatepool().
*/
NEDMALLOCEXTSPEC void neddestroypool(nedpool *p) THROWSPEC;
/*! \brief Returns a zero terminated snapshot of threadpools existing at the time of call.
Call nedfree() on the returned list when you are done. Returns zero if there is only the
system pool in existence.
*/
NEDMALLOCEXTSPEC nedpool **nedpoollist() THROWSPEC;
/*! \brief Sets a value to be associated with a pool.
You can retrieve this value by passing any memory block allocated from that pool.
*/
NEDMALLOCEXTSPEC void nedpsetvalue(nedpool *p, void *v) THROWSPEC;
/*! \brief Gets a previously set value using nedpsetvalue() or zero if memory is unknown.
Optionally can also retrieve pool. You can detect an unknown block by the return
being zero and *p being unmodifed.
*/
NEDMALLOCEXTSPEC void *nedgetvalue(nedpool **p, void *mem) THROWSPEC;
/*! \brief Trims the thread cache for the calling thread, returning any existing cache
data to the central pool.
Remember to ALWAYS call with zero if you used the system pool. Setting disable to
non-zero replicates neddisablethreadcache().
*/
NEDMALLOCEXTSPEC void nedtrimthreadcache(nedpool *p, int disable) THROWSPEC;
/*! \brief Disables the thread cache for the calling thread, returning any existing cache
data to the central pool.
Remember to ALWAYS call with zero if you used the system pool.
*/
NEDMALLOCEXTSPEC void neddisablethreadcache(nedpool *p) THROWSPEC;
/*! \brief Releases all memory in all threadcaches in the pool, and writes all
accumulated memory operations to the log if enabled.
You can pass zero for filepath to use the compiled default, or else a char[MAX_PATH]
containing the path you wish to use for the log file. The log file is always
appended to if it already exists. After writing the logs, the logging ability
is disabled for that pool.
\warning Do NOT call this if the pool is in use - this call is NOT threadsafe.
*/
NEDMALLOCEXTSPEC size_t nedflushlogs(nedpool *p, char *filepath) THROWSPEC;
/*! \brief Equivalent to nedpmalloc2(p, size, 0, 0) */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedpmalloc(nedpool *p, size_t size) THROWSPEC;
/*! \brief Equivalent to nedpmalloc2(p, no*size, 0, M2_ZERO_MEMORY) */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedpcalloc(nedpool *p, size_t no, size_t size) THROWSPEC;
/*! \brief Equivalent to nedprealloc2(p, mem, size, 0, M2_RESERVE_MULT(8)) */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedprealloc(nedpool *p, void *mem, size_t size) THROWSPEC;
/*! \brief Equivalent to nedpfree2(p, mem, 0) */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void nedpfree(nedpool *p, void *mem) THROWSPEC;
/*! \brief Equivalent to nedpmalloc2(p, bytes, alignment, 0) */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedpmemalign(nedpool *p, size_t alignment, size_t bytes) THROWSPEC;
#if defined(__cplusplus)
/*! \ingroup v2malloc
\brief Allocates a block of memory sized \em size from pool \em p, aligned to \em alignment and according to the flags \em flags.
*/
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedpmalloc2(nedpool *p, size_t size, size_t alignment=0, unsigned flags=0) THROWSPEC;
/*! \ingroup v2malloc
\brief Resizes the block of memory at \em mem in pool \em p to size \em size, aligned to \em alignment and according to the flags \em flags.
*/
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedprealloc2(nedpool *p, void *mem, size_t size, size_t alignment=0, unsigned flags=0) THROWSPEC;
/*! \brief Frees the block \em mem from the pool \em p according to flags \em flags. */
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void nedpfree2(nedpool *p, void *mem, unsigned flags=0) THROWSPEC;
#else
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedpmalloc2(nedpool *p, size_t size, size_t alignment, unsigned flags) THROWSPEC;
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedprealloc2(nedpool *p, void *mem, size_t size, size_t alignment, unsigned flags) THROWSPEC;
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void nedpfree2(nedpool *p, void *mem, unsigned flags) THROWSPEC;
#endif
/*! \brief Returns information about the memory pool */
NEDMALLOCEXTSPEC struct nedmallinfo nedpmallinfo(nedpool *p) THROWSPEC;
/*! \brief Changes the operational parameters of the memory pool */
NEDMALLOCEXTSPEC int nedpmallopt(nedpool *p, int parno, int value) THROWSPEC;
/*! \brief Tries to release as much free memory back to the system as possible, leaving \em pad remaining per threadpool. */
NEDMALLOCEXTSPEC int nedpmalloc_trim(nedpool *p, size_t pad) THROWSPEC;
/*! \brief Prints some operational statistics to stdout. */
NEDMALLOCEXTSPEC void nedpmalloc_stats(nedpool *p) THROWSPEC;
/*! \brief Returns how much memory is currently in use by the memory pool */
NEDMALLOCEXTSPEC size_t nedpmalloc_footprint(nedpool *p) THROWSPEC;
/*! \brief Returns a series of guaranteed consecutive cleared memory allocations.
independent_calloc is similar to calloc, but instead of returning a
single cleared space, it returns an array of pointers to n_elements
independent elements that can hold contents of size elem_size, each
of which starts out cleared, and can be independently freed,
realloc'ed etc. The elements are guaranteed to be adjacently
allocated (this is not guaranteed to occur with multiple callocs or
mallocs), which may also improve cache locality in some
applications.
The "chunks" argument is optional (i.e., may be null, which is
probably the most typical usage). If it is null, the returned array
is itself dynamically allocated and should also be freed when it is
no longer needed. Otherwise, the chunks array must be of at least
n_elements in length. It is filled in with the pointers to the
chunks.
In either case, independent_calloc returns this pointer array, or
null if the allocation failed. If n_elements is zero and "chunks"
is null, it returns a chunk representing an array with zero elements
(which should be freed if not wanted).
Each element must be individually freed when it is no longer
needed. If you'd like to instead be able to free all at once, you
should instead use regular calloc and assign pointers into this
space to represent elements. (In this case though, you cannot
independently free elements.)
independent_calloc simplifies and speeds up implementations of many
kinds of pools. It may also be useful when constructing large data
structures that initially have a fixed number of fixed-sized nodes,
but the number is not known at compile time, and some of the nodes
may later need to be freed. For example:
struct Node { int item; struct Node* next; };
struct Node* build_list() {
struct Node** pool;
int n = read_number_of_nodes_needed();
if (n <= 0) return 0;
pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0);
if (pool == 0) die();
// organize into a linked list...
struct Node* first = pool[0];
for (i = 0; i < n-1; ++i)
pool[i]->next = pool[i+1];
free(pool); // Can now free the array (or not, if it is needed later)
return first;
}
*/
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedpindependent_calloc(nedpool *p, size_t elemsno, size_t elemsize, void **chunks) THROWSPEC;
/*! \brief Returns a series of guaranteed consecutive allocations.
independent_comalloc allocates, all at once, a set of n_elements
chunks with sizes indicated in the "sizes" array. It returns
an array of pointers to these elements, each of which can be
independently freed, realloc'ed etc. The elements are guaranteed to
be adjacently allocated (this is not guaranteed to occur with
multiple callocs or mallocs), which may also improve cache locality
in some applications.
The "chunks" argument is optional (i.e., may be null). If it is null
the returned array is itself dynamically allocated and should also
be freed when it is no longer needed. Otherwise, the chunks array
must be of at least n_elements in length. It is filled in with the
pointers to the chunks.
In either case, independent_comalloc returns this pointer array, or
null if the allocation failed. If n_elements is zero and chunks is
null, it returns a chunk representing an array with zero elements
(which should be freed if not wanted).
Each element must be individually freed when it is no longer
needed. If you'd like to instead be able to free all at once, you
should instead use a single regular malloc, and assign pointers at
particular offsets in the aggregate space. (In this case though, you
cannot independently free elements.)
independent_comallac differs from independent_calloc in that each
element may have a different size, and also that it does not
automatically clear elements.
independent_comalloc can be used to speed up allocation in cases
where several structs or objects must always be allocated at the
same time. For example:
struct Head { ... }
struct Foot { ... }
void send_message(char* msg) {
int msglen = strlen(msg);
size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) };
void* chunks[3];
if (independent_comalloc(3, sizes, chunks) == 0)
die();
struct Head* head = (struct Head*)(chunks[0]);
char* body = (char*)(chunks[1]);
struct Foot* foot = (struct Foot*)(chunks[2]);
// ...
}
In general though, independent_comalloc is worth using only for
larger values of n_elements. For small values, you probably won't
detect enough difference from series of malloc calls to bother.
Overuse of independent_comalloc can increase overall memory usage,
since it cannot reuse existing noncontiguous small chunks that
might be available for some of the elements.
*/
NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedpindependent_comalloc(nedpool *p, size_t elems, size_t *sizes, void **chunks) THROWSPEC;
#if defined(__cplusplus)
} /* namespace or extern "C" */
#include <new>
#include <memory>
#ifdef HAVE_CPP0XTYPETRAITS
#include <type_traits>
#endif
// Touch into existence for future platforms
namespace std { namespace tr1 { } }
/*! \defgroup C++ C++ language support
Thanks to the generous support of Applied Research Associates (USA), nedalloc has extensive
C++ language support which uses C++ metaprogramming techniques to provide a policy driven
STL container reimplementor. The metaprogramming silently overrides or replaces the STL implementation
on your system (MSVC and GCC are the two currently supported) to \b substantially improve
the performance of STL containers by making use of nedalloc's additional features.
Sounds difficult to use? Not really. Simply do this:
\code
using namespace nedalloc;
typedef nedallocatorise<std::vector, unsigned int,
nedpolicy::typeIsPOD<true>::policy,
nedpolicy::mmap<>::policy,
nedpolicy::reserveN<26>::policy // 1<<26 = 64Mb. 10,000,000 * sizeof(unsigned int) = 38Mb.
>::value myvectortype;
myvectortype a;
for(int n=0; n<10000000; n++)
a.push_back(n);
\endcode
The metaprogramming requires a new C++ compiler (> year 2008), and it will readily make use
of a C++0x compiler where it will use rvalue referencing, variadic templates, type traits and more.
Visual Studio 2008 or later is sufficent, as is GCC v4.4 or later.
nedalloc's metaprogramming is designed to be extensible, so the rest of this page is intended for those
wishing to customise the metaprogramming. If you simply wish to know how to use the
nedalloc::nedallocator STL allocator or the nedalloc::nedallocatorise STL reimplementor, please refer
to test.cpp which gives several examples of usage.
<h2>Extending the metaprogramming:</h2>
A nedallocator policy looks as follows:
\code
namespace nedpolicy {
template<size_t size, size_t alignment> struct sizedalign
{
template<class Base> class policy : public Base
{
template<class implementation> friend class nedallocatorI::baseimplementation;
protected:
size_t policy_alignment(size_t bytes) const
{
return (bytes < size) ? alignment : 0;
}
};
};
}
\endcode
The policy above implements a size based alignment, so if the block being allocated is
less than \em size then it causes \em alignment to be used, otherwise it does not align.
The sizedalign struct is merely a template parameter encapsulator used to capture
additional parameters, so the real policy is in fact the class policy held within in.
If you did not need to specify any additional parameters e.g. if you were defining
policy_nedpool(), then you would directly define a policy returning your nedpool and pass
it directly to nedallocator<>.
The primary policy functions which are intended to be overridden are listed in
nedalloc::nedallocatorI::baseimplementation in nedmalloc.h and are prefixed by "policy_".
However, there is absolutely no reason why the meatier functions such as
nedalloc::nedallocatorI::baseimplementation::allocate() cannot be overriden, and indeed
some of the policies defined in nedmalloc.h do just that.
Policy composition is handled by a dedicated recursive variadic template called
nedalloc::nedallocatorI::policycompositor. If you have \em really specialised needs, you
can partially specialise this class to make it do all sorts of interesting things - hence
its separation into its own class.
*/
/*! \brief The nedalloc namespace */
namespace nedalloc {
/*! \def NEDSTATIC_ASSERT(expr, msg)
\brief Generates a static assertion if (expr)==0 at compile time.
Make SURE your message contains no spaces or anything else which would make it an invalid
variable name.
*/
#ifndef HAVE_CPP0XSTATICASSERT
template<bool> struct StaticAssert;
template<> struct StaticAssert<true>
{
StaticAssert() { }
};
#define NEDSTATIC_ASSERT(expr, msg) \
nedalloc::StaticAssert<(expr)!=0> ERROR_##msg
#else
#define NEDSTATIC_ASSERT(expr, msg) static_assert((expr)!=0, #msg )
#endif
/*! \brief The policy namespace in which all nedallocator policies live. */
namespace nedpolicy {
/*! \class empty
\ingroup C++
\brief An empty policy which does nothing.
*/
template<class Base> class empty : public Base
{
};
}
/*! \brief The implementation namespace where the internals live. */
namespace nedallocatorI
{
using namespace std;
using namespace tr1;
/* Roll on variadic templates is all I can say! */
#ifdef HAVE_CPP0XVARIADICTEMPLATES
template<class Impl, template<class> class... policies> class policycompositor;
template<class Impl, template<class> class A, template<class> class... policies> class policycompositor<Impl, A, policies...>
{
typedef policycompositor<Impl, policies...> temp;
public:
typedef A<typename temp::value> value;
};
#else
template<class Impl,
template<class> class A=nedpolicy::empty,
template<class> class B=nedpolicy::empty,
template<class> class C=nedpolicy::empty,
template<class> class D=nedpolicy::empty,
template<class> class E=nedpolicy::empty,
template<class> class F=nedpolicy::empty,
template<class> class G=nedpolicy::empty,
template<class> class H=nedpolicy::empty,
template<class> class I=nedpolicy::empty,
template<class> class J=nedpolicy::empty,
template<class> class K=nedpolicy::empty,
template<class> class L=nedpolicy::empty,
template<class> class M=nedpolicy::empty,
template<class> class N=nedpolicy::empty,
template<class> class O=nedpolicy::empty
> class policycompositor
{
typedef policycompositor<Impl, B, C, D, E, F, G, H, I, J, K, L, M, N, O> temp;
public:
typedef A<typename temp::value> value;
};
#endif
template<class Impl> class policycompositor<Impl>
{
public:
typedef Impl value;
};
}
template<typename T,
#ifdef HAVE_CPP0XVARIADICTEMPLATES
template<class> class... policies
#else
template<class> class policy1=nedpolicy::empty,
template<class> class policy2=nedpolicy::empty,
template<class> class policy3=nedpolicy::empty,
template<class> class policy4=nedpolicy::empty,
template<class> class policy5=nedpolicy::empty,
template<class> class policy6=nedpolicy::empty,
template<class> class policy7=nedpolicy::empty,
template<class> class policy8=nedpolicy::empty,
template<class> class policy9=nedpolicy::empty,
template<class> class policy10=nedpolicy::empty,
template<class> class policy11=nedpolicy::empty,
template<class> class policy12=nedpolicy::empty,
template<class> class policy13=nedpolicy::empty,
template<class> class policy14=nedpolicy::empty,
template<class> class policy15=nedpolicy::empty
#endif
> class nedallocator;
namespace nedallocatorI
{
/*! \brief The base implementation class */
template<class implementation> class baseimplementation
{
//NEDSTATIC_ASSERT(false, Bad_policies_specified);
};
/*! \brief The base implementation class */
template<typename T,
#ifdef HAVE_CPP0XVARIADICTEMPLATES
template<class> class... policies
#else
template<class> class policy1,
template<class> class policy2,
template<class> class policy3,
template<class> class policy4,
template<class> class policy5,
template<class> class policy6,
template<class> class policy7,
template<class> class policy8,
template<class> class policy9,
template<class> class policy10,
template<class> class policy11,
template<class> class policy12,
template<class> class policy13,
template<class> class policy14,
template<class> class policy15
#endif
> class baseimplementation<nedallocator<T,
#ifdef HAVE_CPP0XVARIADICTEMPLATES
policies...
#else
policy1, policy2, policy3, policy4, policy5,
policy6, policy7, policy8, policy9, policy10,
policy11, policy12, policy13, policy14, policy15
#endif
> >
{
protected:
//! \brief The most derived nedallocator implementation type
typedef nedallocator<T,
#ifdef HAVE_CPP0XVARIADICTEMPLATES
policies...
#else
policy1, policy2, policy3, policy4, policy5,
policy6, policy7, policy8, policy9, policy10,
policy11, policy12, policy13, policy14, policy15
#endif
> implementationType;
//! \brief Returns a this for the most derived nedallocator implementation type
implementationType *_this() { return static_cast<implementationType *>(this); }
//! \brief Returns a this for the most derived nedallocator implementation type
const implementationType *_this() const { return static_cast<const implementationType *>(this); }
//! \brief Specifies the nedpool to use. Defaults to zero (the system pool).
nedpool *policy_nedpool(size_t bytes) const
{
return 0;
}
//! \brief Specifies the granularity to use. Defaults to \em bytes (no granularity).
size_t policy_granularity(size_t bytes) const
{
return bytes;
}
//! \brief Specifies the alignment to use. Defaults to zero (no alignment).
size_t policy_alignment(size_t bytes) const
{
return 0;
}
//! \brief Specifies the flags to use. Defaults to zero (no flags).
unsigned policy_flags(size_t bytes) const
{
return 0;
}
//! \brief Specifies what to do when the allocation fails. Defaults to throwing std::bad_alloc.
void policy_throwbadalloc(size_t bytes) const
{
throw std::bad_alloc();
}
//! \brief Specifies if the type is POD. Is std::is_trivially_copyable<T>::value on C++0x compilers, otherwise false.
static const bool policy_typeIsPOD=
#ifdef HAVE_CPP0XTYPETRAITS
#if defined(__GNUC__) && (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) < 40900
is_pod<T>::value;
#else
is_trivially_copyable<T>::value;
#endif
#else
false;
#endif
public: