forked from DenizThatMenace/dlmalloc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
malloc-2.6.2k2.c
2725 lines (2136 loc) · 84.8 KB
/
malloc-2.6.2k2.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
A version of malloc/free/realloc written by Doug Lea and released to the
public domain. Send questions/comments/complaints/performance data
* preliminary VERSION 2.6.2k Sun Dec 24 12:08:41 1995 Doug Lea (dl at gee)
Note: There may be an updated version of this malloc obtainable at
ftp://g.oswego.edu/pub/misc/malloc.c
Check before installing!
* Overview
Vital statistics:
Alignment: 8-byte
Assumed pointer representation: 4 bytes
Assumed size_t representation: 4 bytes
Minimum wastage per allocated chunk: 4 bytes
Maximum wastage per allocated chunk: 24 bytes
Minimum allocated size: 16 bytes (12 bytes usable, 4 overhead)
Maximum allocated size: 2147483640 (2^31 - 8) bytes
Explanations:
Malloced chunks have space overhead of 4 bytes for the size
field. When a chunk is in use, only the `front' size is used,
plus a bit in the NEXT adjacent chunk saying that its previous
chunk is in use.
When a chunk is freed, 12 additional bytes are needed; 4 for
the trailing size field and 8 bytes for free list
pointers. Thus, the minimum allocatable size is 16 bytes,
of which 12 bytes are usable.
It is assumed that 32 bits suffice to represent chunk sizes.
The maximum size chunk is 2^31 - 8 bytes.
malloc(0) returns a pointer to something of the minimum
allocatable size. Requests for negative sizes (when size_t is
signed) or those greater than (2^31 - 8) bytes will also return
a minimum-sized chunk.
8 byte alignment is currently hardwired into the design. This
seems to suffice for all current machines and C compilers.
Calling memalign will return a chunk that is both 8-byte
aligned and meets the requested (power of two) alignment.
Alignnment demands, plus the minimum allocatable size restriction
make the worst-case wastage 24 bytes. This occurs only for
a request of zero. The worst case for requests >= 16 bytes is 15
bytes. (Empirically, average wastage is around 5 to 7 bytes.)
Structure:
This malloc, like any other, is a compromised design.
Chunks of memory are maintained using a `boundary tag' method as
described in e.g., Knuth or Standish. (See the paper by Paul
Wilson ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a
survey of such techniques.) Sizes of free chunks are stored both
in the front of each chunk and at the end. This makes
consolidating fragmented chunks into bigger chunks very fast. The
size fields also hold bits representing whether chunks are free or
in use.
Available chunks are kept in any of four places:
* `av': An array of chunks serving as bin headers for consolidated
chunks. Each bin is doubly linked. The bins are approximately
proportionally (log) spaced. There are a lot of these bins
(128). This may look excessive, but works very well in
practice. All procedures maintain the invariant that no
consolidated chunk physically borders another one. Chunks in
bins are kept in size order, with ties going to the
approximately least recently used chunk.
* `top': The top-most available chunk (i.e., the one bordering the
end of available memory) is treated specially. It is never
included in any bin, is always kept fully consolidated, is used
only if no other chunk is available, and is released back to
the system if it is very large (see TRIM_THRESHOLD).
* `last_remainder': A bin holding only the remainder of the
most recently split (non-top) chunk. This bin is checked
before other non-fitting chunks, so as to provide better
locality for runs of sequentially allocated chunks.
* `recycle_list': A list of chunks all of sizes less than
the max_recycle_size) that have been returned via free
but not yet otherwise processed.
See below for further descriptions of these structures.
The main allocation algorithm contains aspects of some of the most
well-known memory allocation strategies:
* Best fit -- when using exact matches or scanning for smallest
usable chunks.
* (Roving) First fit -- when using the last remainder from a
previous request
* Address-ordered fit -- by keeping bins in approximately LRU
order, those with lower addresses tend to be used before
other equal-sized chunks. Also, by using top-most memory only
when necessary.
* Quick-lists -- Normal processing is bypassed for small
chunks that have been freed and again soon thereafter re-malloced.
Empirically none of these strategies alone appears as good (in
space, time, or usually both) as a mixed strategy.
* Descriptions of public routines
malloc:
The requested size is first converted into a usable form, `nb'.
This currently means to add 4 bytes overhead plus possibly more to
obtain 8-byte alignment and/or to obtain a size of at least
MINSIZE (currently 16 bytes), the smallest allocatable size.
(All fits are considered `exact' if they are within MINSIZE bytes.)
From there, the first successful of the following steps is
taken. A few steps differ slightly for `small' (< 504 bytes)
versus other requests:
1. If the most recently returned (via free) chunk is of exactly
the right size and borders another in-use chunk it is taken.
2. For small requests, the bin corresponding to the request size
is scanned, and if a chunk of exactly the right size is found,
it is taken.
3. The rest of the recycle_list is processed: If a chunk exactly
fitting is found, it is taken, otherwise the chunk is freed and
consolidated.
4. If a non-small request, the bin corresponding to the request
size is scanned, as in step (2). (The only reason these steps
are inverted for large and small requests is that for large
ones, consolidated recycled chunks could have generated a
chunk that was not an exact match but was of a size that
later turned out to be best-fitting.)
5. The most recently remaindered chunk is used if it is
big enough and any of the following hold:
* It is exactly the right size
* The remainder was created from a previous malloc call
with a request of the same size as the current request size.
* The request size is < 512 bytes (In other words, for this
step, consecutive small requests are treated as if they
were all of the same size.)
6. Other bins are scanned in increasing size order, using a
chunk big enough to fulfill the request, and splitting off any
remainder.
7. The chunk bordering the end of memory (`top') is split off.
If the current top is not big enough, it is extended by
obtaining more space from the system (normally using sbrk,
but definable to anything else via the MORECORE macro).
Memory is gathered from the system (in system page-sized
units) in a way that allows chunks obtained across different
sbrk calls to be consolidated, but does not require
contiguous memory. Thus, it should be safe to intersperse
mallocs with other sbrk calls.
free:
There are four cases:
1. free(0) has no effect.
2. If the size of the chunk is <= max_recycle_size, it
is placed on the recycle_list for later processing.
3. If a returned chunk borders the current high end of memory,
it is consolidated into the top, and if the total unused
topmost memory exceeds the trim threshold, malloc_trim is
called. The default value of the trim threshold is high enough
so that trimming should only occur if the program is
maintaining enough unused memory to be worth releasing.
4. Other chunks are consolidated as they arrive, and
placed in corresponding bins. (This includes the case of
consolidating with the current `last_remainder').
realloc:
Reallocation proceeds in the usual way. If a chunk can be extended,
it is, else a malloc-copy-free sequence is taken.
The old unix realloc convention of allowing the last-free'd chunk
to be used as an argument to realloc is no longer supported.
I don't know of any programs still relying on this feature,
and allowing it would also allow too many other incorrect
usages of realloc to be sensible.
Unless the #define REALLOC_ZERO_BYTES_FREES below is set,
realloc with a size argument of zero (re)allocates a minimum-sized
chunk.
memalign:
memalign requests more than enough space from malloc, finds a spot
within that chunk that meets the alignment request, and then
possibly frees the leading and trailing space. Overreliance on
memalign is a sure way to fragment space.
valloc:
valloc just invokes memalign with alignment argument equal
to the page size of the system (or as near to this as can
be figured out from all the includes/defines below.)
calloc:
calloc calls malloc, then zeroes out the allocated chunk.
cfree:
cfree just calls free.
malloc_trim:
This routine gives memory back to the system (via negative
arguments to sbrk) if there is unused memory at the `high' end of
the malloc pool. You can call this after freeing large blocks of
memory to potentially reduce the system-level memory requirements
of a program. However, it cannot guarantee to reduce memory. Under
some allocation patterns, some large free blocks of memory will be
locked between two used chunks, so they cannot be given back to
the system.
The `pad' argument to malloc_trim represents the amount of free
trailing space to leave untrimmed. If this argument is zero,
only the minimum amount of memory to maintain internal data
structures will be left (one page or less). Non-zero arguments
can be supplied to maintain enough trailing space to service
future expected allocations without having to re-obtain memory
from the system.
malloc_usable_size:
This routine tells you how many bytes you can actually use in
an allocated chunk, which may be up to 24 bytes more than you
requested (although typically much less; often 0). You can use
this many bytes without worrying about overwriting other allocated
objects. Not a particularly great programming practice, but still
sometimes useful.
malloc_stats:
Prints on stderr the amount of space obtain from the system, the
maximum amount (which may be more than current if malloc_trim got
called), and the current number of bytes allocated via malloc (or
realloc, etc) but not yet freed. (Note that this is the number of
bytes allocated, not the number requested. It will be larger than
the number requested because of overhead.)
mallinfo:
This version of malloc supports to the extent possible the
standard SVID/XPG mallinfo routine that returns a struct
containing the same kind of information you can get from
malloc_stats. It is included mainly for use on SVID/XPG compliant
systems that have a /usr/include/malloc.h defining struct
mallinfo. (If you'd like to install such a thing yourself, cut out
the preliminary declarations as described below and save them in a
malloc.h file. But there's no compelling reason to bother to do
this.)
mallinfo() returns (by-copy) a mallinfo struct. The SVID/XPG
malloinfo struct contains a bunch of fields, most of which are not
even meaningful in this version of malloc. They are left blank
(zero). (Actually, I don't even know what some of them mean. These
fields are filled with numbers that might possibly be of interest.)
The fields that are meaningful are:
int arena; -- total space allocated from system
int ordblks; -- number of non-inuse, non-recycling chunks
int smblks; -- number of chunks in recycle list
int fsmblks; -- total space in recycle list
int uordblks; -- total allocated space
int fordblks; -- total non-inuse, non-recycling space
int keepcost; -- top-most, releasable (via malloc_trim) space
mallopt:
mallopt is the general SVID/XPG interface to tunable parameters.
The format is to provide a (parameter-number, parameter-value) pair.
mallopt then sets the corresponding parameter to the argument
value if it can (i.e., so long as the value is meaningful),
and returns 1 if successful else 0.
To be compliant, several parameter numbers are predefined
that have no effect on this malloc. However the following
are supported:
M_MXFAST (parameter number 1) is the maximum size of chunks that
may be placed on the recycle_list when they are freed. This is a
form of quick-list. However, unlike most implmentations of
quick-lists, space for such small chunks is NOT segregated. If
the space is needed for chunks of other sizes, it will be used.
For small chunk sizes, the time savings from bypassing normal
malloc processing can be significant (although hardly ever
excessively so; even for programs that constantly allocate and
free chunks all of the same size the observed savings is almost
always less than 10%).
But bypassing normal malloc processing usually also increases
fragmentation, and thus increases space usage. However, for
small enough chunk sizes, the observed additional space usage is
normally so small not to matter.
Using the M_MXFAST option allows you to decide whether and how
you'd like to make this trade-off.
The default value is 72 bytes. This was arrived at entirely
empirically by finding the best compromise value across a suite
of test programs.
Setting it to zero disables recycling all together. Setting it
to a value of greater than about 500 bytes is unlikely to be very
effective, for two reasons: (1) The malloc implementation is
tuned for the assumption that the value is small. (2)
Empirically, it is most often fastest not to bypass normal
processing for larger chunk sizes.
A byproduct of setting M_MXFAST is that malloc_trim is NOT
called from free when chunks less than max_recycle_size are
freed. So if you want automatic trimming in programs that only
allocate small chunks, you need to set M_MXFAST to zero. In programs
that allocate mixtures of sizes, this generally won't matter --
trim will get called soon enough anyway.
M_TRIM_THRESHOLD (parameter number -1) is the maximum amount of
unused top-most memory to keep before releasing via malloc_trim
in free().
Automatic trimming is mainly useful in long-lived programs.
Because trimming can be slow, and can sometimes be wasteful (in
cases where programs immediately afterward allocate more large
chunks) the value should be high enough so that your overall
system performance would improve by releasing. As a rough
guide, you might set to a value close to the average size of a
process (program) running on your system. Releasing this much
memory would allow such a process to run in memory.
The default value of 256K bytes appears to be a good
compromise. Must be greater than page size to have any useful
effect. To disable trimming completely, you can set to
(unsigned long)(-1);
M_TOP_PAD (parameter number -2) is the amount of extra `padding'
space to allocate or retain whenever sbrk is called.
It is used in two ways internally:
* When sbrk is called to extend the top of the arena to satisfy
a new malloc request, this much padding is added to the sbrk
request.
* When malloc_trim is called automatically from free(),
it is used as the `pad' argument.
In both cases, the actual amount of padding is rounded
so that the end of the arena is always a system page boundary.
Default value is 2K bytes.
The main reason for using padding is to avoid calling sbrk so
often. Having even a small pad greatly reduces the likelihood
that nearly every malloc request during program start-up (or
after trimming) will invoke sbrk, which needlessly wastes
time. In systems where sbrk is relatively slow, it can pay to
increase this value, at the expense of carrying around more
top-most memory than the program needs. Setting it to 0 reduces
best-case (but not necessarily typical-case) memory usage to
a dead minimum.
* Debugging:
Because freed chunks may be overwritten with link fields, this
malloc will often die when freed memory is overwritten by user
programs. This can be very effective (albeit in an annoying way)
in helping users track down dangling pointers.
If you compile with -DDEBUG, a number of assertion checks are
enabled that will catch more memory errors. You probably won't be
able to make much sense of the actual assertion errors, but they
should help you locate incorrectly overwritten memory. The
checking is fairly extensive, and will slow down execution
noticeably. Calling malloc_stats or mallinfo with DEBUG set will
attempt to check every allocated and free chunk in the course of
computing the summmaries.
Setting DEBUG may also be helpful if you are trying to modify
this code. The assertions in the check routines spell out in more
detail the assumptions and invariants underlying the algorithms.
* Performance differences from previous versions
Users of malloc-2.5.X will find that generally, the current
version conserves space better, especially when large chunks are
allocated amid many other small ones. For example, it wastes much
less memory when user programs occasionally do things like
allocate space for GIF images amid other requests. Because of the
additional processing that leads to better behavior, it is
just-barely detectably slower than version 2.5.3 for some (but not
all) programs that only allocate small uniform chunks.
Using the default mallopt settings, Version 2.6.2 has very
similar space characteristics as 2.6.1, but is normally
faster. (In test cases, observed space differences range from
about -5% to +5%, and speed improvements range from about -5% to
+15%. The space differences result in part from new page
alignment policies.)
* Concurrency
Except when compiled using the special defines below for Linux
libc using weak aliases, this malloc is NOT designed to work in
multithreaded applications. No semaphores or other concurrency
control are provided to ensure that multiple malloc or free calls
don't run at the same time, which could be disasterous. A single
semaphore could be used across malloc, realloc, and free. It would
be hard to obtain finer granularity.
* Implementation notes
(The following includes lightly edited explanations by Colin Plumb.)
An allocated chunk looks like this:
chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Size of chunk, in bytes |P|
mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| User data starts here... .
. .
. (malloc_usable_space() bytes) .
. |
nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Size of next chunk |1|
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
Where "chunk" is the front of the chunk for the purpose of most of
the malloc code, but "mem" is the pointer that is returned to the
user. "Nextchunk" is the beginning of the next contiguous chunk.
Chunks always begin on odd-word boundries, so the mem portion
(which is returned to the user) is on an even word boundary, and
thus double-word aligned.
Free chunks are stored in circular doubly-linked lists, and look like this:
chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Size of chunk, in bytes |P|
mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Forward pointer to next chunk in list |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Back pointer to previous chunk in list |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Unused space (may be 0 bytes long) .
. .
. |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Size of chunk, in bytes |
nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Size of next chunk |0|
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
The P (PREV_INUSE) bit, stored in the unused low-order bit of the
chunk size (which is always a multiple of two words), is an in-use
bit for the *previous* chunk. If that bit is *clear*, then the
word before the current chunk size contains the previous chunk
size, and can be used to find the front of the previous chunk.
(The very first chunk allocated always has this bit set,
preventing access to non-existent (or non-owned) memory.)
The only exception to all this is the special chunk `top', which
doesn't bother using the trailing size field since there is no
next contiguous chunk that would have to index off it. (After
initialization, `top' is forced to always exist. If it would
become less than MINSIZE bytes long, it is replenished via
malloc_extend_top.)
The bins, `av_' are an array of pointers serving as the heads of
(initially empty) doubly-linked lists of chunks. Bins for sizes <
512 bytes contain chunks of all the same size, spaced 8 bytes
apart. Larger bins are approximately logarithmically spaced. (See
the table below.) The `av_' array is never mentioned directly
in the code, but instead via bin access macros.
The chunks in each bin are linked in decreasing sorted order by
size. This is irrelevant for the small bins, which all contain
the same-sized chunks, but facilitates best-fit allocation for
larger chunks. (These lists are just sequential. Keeping them in
order almost never requires enough traversal to warrant using
fancier ordered data structures.) Chunks of the same size are
linked with the most recently freed at the front, and allocations
are taken from the back. This results in LRU or FIFO allocation
order, which tends to give each chunk an equal opportunity to be
consolidated with adjacent freed chunks, resulting in larger free
chunks and less fragmentation.
The exception to this ordering is that freed chunks of size <=
max_recycle_size are scanned in LIFO order (i.e., the most
recently freed chunk is scanned first) and used if possible in
malloc (or if not usable, placed into a normal FIFO bin). This
ordering adapts better to size-phasing in user programs. The
recycle_list that holds these chunks is a simple singly-linked
list that uses the `fd' pointers of the chunks for linking.
The special chunks `top' and `last_remainder' get their own bins,
(this is implemented via yet more trickery with the av_ array),
although `top' is never properly linked to its bin since it is
always handled specially.
Search is generally via best-fit; i.e., the smallest (with ties
going to approximately the least recently used) chunk that fits is
selected. The use of `top' is in accord with this rule. In
effect, `top' is treated as larger (and thus less well fitting)
than any other available chunk since it can be extended to be as
large as necessary (up to system limitations).
The exception to this search rule is that in the absence of exact
fits, runs of same-sized (or merely `small') requests use the
remainder of the chunk used for the previous such request whenever
possible. This limited use of a `first-fit' style allocation
strategy tends to give contiguous chunks coextensive lifetimes,
which improves locality and sometimes reduces fragmentation in the
long run.
All allocations are made from the the `lowest' part of any found
chunk. (The implementation invariant is that prev_inuse is always
true of any allocated chunk; i.e., that each allocated chunk
borders a previously allocated and still in-use chunk.) This
policy holds even for chunks on the recycle_list. Recycled chunks
that do not border used chunks are bypassed. (However, the policy
holds only approximately in this case. A taken chunk might border
one that is not really in use, but is instead still on the recycle
list.) This also tends to reduce fragmentation, improve locality,
and increase the likelihood that malloc_trim will actually release
memory.
To help compensate for the large number of bins, a one-level index
structure is used for bin-by-bin searching. `binblocks' is a
one-word bitvector recording whether groups of BINBLOCKWIDTH bins
have any (possibly) non-empty bins, so they can be skipped over
all at once during during traversals. The bits are NOT always
cleared as soon as all bins in a block are empty, but instead only
when all are noticed to be empty during traversal in malloc.
* Style
The implementation is in straight, hand-tuned ANSI C. Among other
consequences, it uses a lot of macros. These would be nicer as
inlinable procedures, but using macros allows use with
non-inlining compilers. The use of macros etc., requires that, to
be at all usable, this code be compiled using an optimizing
compiler (for example gcc -O2) that can simplify expressions and
control paths. Also, because there are so many different twisty
paths through malloc steps, the code is not exactly elegant.
* History:
V2.6.2 Tue Dec 5 06:52:55 1995 Doug Lea (dl at gee)
* Re-introduce recycle_list, similar to `returns' list in V2.5.X.
* Use last_remainder in more cases.
* Pack bins using idea from [email protected]
* Use ordered bins instead of best-fit threshhold
* Eliminate block-local decls to simplify tracing and debugging.
* Support another case of realloc via move into top
* Fix error occuring when initial sbrk_base not word-aligned.
* Rely on page size for units instead of SBRK_UNIT to
avoid surprises about sbrk alignment conventions.
* Add mallinfo, mallopt. Thanks to Raymond Nijssen
([email protected]) for the suggestion.
* Add `pad' argument to malloc_trim and top_pad mallopt parameter.
* More precautions for cases where other routines call sbrk,
courtesy of Wolfram Gloger ([email protected]).
* Added macros etc., allowing use in linux libc from
H.J. Lu ([email protected])
* Inverted this history list
V2.6.1 Sat Dec 2 14:10:57 1995 Doug Lea (dl at gee)
* Re-tuned and fixed to behave more nicely with V2.6.0 changes.
* Removed all preallocation code since under current scheme
the work required to undo bad preallocations exceeds
the work saved in good cases for most test programs.
* No longer use return list or unconsolidated bins since
no scheme using them consistently outperforms those that don't
given above changes.
* Use best fit for very large chunks to prevent some worst-cases.
* Added some support for debugging
V2.6.0 Sat Nov 4 07:05:23 1995 Doug Lea (dl at gee)
* Removed footers when chunks are in use. Thanks to
Paul Wilson ([email protected]) for the suggestion.
V2.5.4 Wed Nov 1 07:54:51 1995 Doug Lea (dl at gee)
* Added malloc_trim, with help from Wolfram Gloger
V2.5.3 Tue Apr 26 10:16:01 1994 Doug Lea (dl at g)
V2.5.2 Tue Apr 5 16:20:40 1994 Doug Lea (dl at g)
* realloc: try to expand in both directions
* malloc: swap order of clean-bin strategy;
* realloc: only conditionally expand backwards
* Try not to scavenge used bins
* Use bin counts as a guide to preallocation
* Occasionally bin return list chunks in first scan
* Add a few optimizations from [email protected]
V2.5.1 Sat Aug 14 15:40:43 1993 Doug Lea (dl at g)
* faster bin computation & slightly different binning
* merged all consolidations to one part of malloc proper
(eliminating old malloc_find_space & malloc_clean_bin)
* Scan 2 returns chunks (not just 1)
* Propagate failure in realloc if malloc returns 0
* Add stuff to allow compilation on non-ANSI compilers
from [email protected]
V2.5 Sat Aug 7 07:41:59 1993 Doug Lea (dl at g.oswego.edu)
* removed potential for odd address access in prev_chunk
* removed dependency on getpagesize.h
* misc cosmetics and a bit more internal documentation
* anticosmetics: mangled names in macros to evade debugger strangeness
* tested on sparc, hp-700, dec-mips, rs6000
with gcc & native cc (hp, dec only) allowing
Detlefs & Zorn comparison study (in SIGPLAN Notices.)
Trial version Fri Aug 28 13:14:29 1992 Doug Lea (dl at g.oswego.edu)
* Based loosely on libg++-1.2X malloc. (It retains some of the overall
structure of old version, but most details differ.)
*/
/* ---------- To make a malloc.h, start cutting here ------------ */
/* preliminaries */
#ifndef __STD_C
#ifdef __STDC__
#define __STD_C 1
#else
#if __cplusplus
#define __STD_C 1
#else
#define __STD_C 0
#endif /*__cplusplus*/
#endif /*__STDC__*/
#endif /*__STD_C*/
#ifndef Void_t
#if __STD_C
#define Void_t void
#else
#define Void_t char
#endif
#endif /*Void_t*/
#if __STD_C
#include <stddef.h> /* for size_t */
#else
#include <sys/types.h>
#endif
#include <stdio.h> /* needed for malloc_stats */
#if DEBUG /* define DEBUG to get run-time debug assertions */
#include <assert.h>
#else
#define assert(x) ((void)0)
#endif
#ifdef __cplusplus
extern "C" {
#endif
/*
Compile-time options
*/
/*
REALLOC_ZERO_BYTES_FREES should be set if a call to
realloc with zero bytes should be the same as a call to free.
Some people think it should. Otherwise, since this malloc
returns a unique pointer for malloc(0), so does realloc(p, 0).
*/
/* #define REALLOC_ZERO_BYTES_FREES */
/*
HAVE_MEMCPY should be defined if you are not otherwise using
ANSI STD C, but still have memcpy and memset in your C library
and want to use them. By default defined.
*/
#define HAVE_MEMCPY
/* how to zero out and copy memory (needed in calloc, realloc) */
#if __STD_C || defined(HAVE_MEMCPY)
void* memset(void*, int, size_t);
void* memcpy(void*, const void*, size_t);
#define MALLOC_ZERO(charp, nbytes) memset(charp, 0, nbytes)
#define MALLOC_COPY(dest,src,nbytes) memcpy((dest), (src), (nbytes))
#else
/* We only invoke with multiples of size_t units, with size_t alignment */
#define MALLOC_ZERO(charp, nbytes) \
{ \
size_t* mzp = (size_t*)(charp); \
size_t mzn = (nbytes) / sizeof(size_t); \
while (mzn-- > 0) *mzp++ = 0; \
}
#define MALLOC_COPY(dest,src,nbytes) \
{ \
size_t* mcsrc = (size_t*) src; \
size_t* mcdst = (size_t*) dest; \
long mcn = (nbytes) / sizeof(size_t); \
while (mcn-- > 0) *mcdst++ = *mcsrc++; \
}
#endif
/*
Define HAVE_MMAP to optionally make malloc() use mmap() to
allocate very large blocks. These will be returned to the
operating system immediately after a free().
*/
#ifndef HAVE_MMAP
#define HAVE_MMAP 1
#endif
#if HAVE_MMAP
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
#define MAP_ANONYMOUS MAP_ANON
#endif
#endif /* HAVE_MMAP */
/*
HAVE_USR_INCLUDE_MALLOC_H should be set if you have a
/usr/include/malloc.h file that includes an SVID2/XPG2 declaration
of struct mallinfo. If so, it is included; else an SVID2/XPG2
compliant version is declared within this file. Since these must be
precisely the same for mallinfo and mallopt to work anyway, the main
reason to define this would be to prevent multiple-declaration
errors in files already including malloc.h.
*/
/* #define HAVE_USR_INCLUDE_MALLOC_H */
#if HAVE_USR_INCLUDE_MALLOC_H
#include "/usr/include/malloc.h"
#else
/* SVID2/XPG mallinfo structure */
struct mallinfo {
int arena; /* total space allocated from system */
int ordblks; /* number of non-inuse, non-recycling chunks */
int smblks; /* number of chunks in recycle list */
int hblks; /* unused -- always zero */
int hblkhd; /* unused -- always zero */
int usmblks; /* unused -- always zero */
int fsmblks; /* total space in recycle list */
int uordblks; /* total allocated space */
int fordblks; /* total non-inuse, non-recycling space */
int keepcost; /* top-most, releasable (via malloc_trim) space */
};
/* SVID2/XPG mallopt options */
#define M_MXFAST 1
#define M_NLBLKS 2
#define M_GRAIN 3
#define M_KEEP 4
#endif
/* mallopt options that actually do something */
#ifndef M_MXFAST
#define M_MXFAST 1
#endif
#define M_TRIM_THRESHOLD -1
#define M_TOP_PAD -2
#define M_MMAP_THRESHOLD -3
#define M_MMAP_MAX -4
/*
Initial values of tunable parameters
*/
#ifndef DEFAULT_TRIM_THRESHOLD
#define DEFAULT_TRIM_THRESHOLD (256 * 1024)
#endif
#ifndef DEFAULT_TOP_PAD
#define DEFAULT_TOP_PAD (2 * 1024)
#endif
#ifndef DEFAULT_RECYCLE_SIZE
#define DEFAULT_RECYCLE_SIZE (72)
#endif
#ifndef DEFAULT_MMAP_THRESHOLD
#define DEFAULT_MMAP_THRESHOLD (512 * 1024)
#endif
#ifndef DEFAULT_MMAP_MAX
#define DEFAULT_MMAP_MAX (16)
#endif
#ifdef INTERNAL_LINUX_C_LIB
#if __STD_C
Void_t * __default_morecore_init (ptrdiff_t);
Void_t *(*__morecore)(ptrdiff_t) = __default_morecore_init;
#else
Void_t * __default_morecore_init ();
Void_t *(*__morecore)() = __default_morecore_init;
#endif
#define MORECORE (*__morecore)
#define MORECORE_FAILURE 0
#else /* INTERNAL_LINUX_C_LIB */
#if __STD_C
extern Void_t* sbrk(ptrdiff_t);
#else
extern Void_t* sbrk();
#endif
#define MORECORE sbrk
#define MORECORE_FAILURE -1
#endif /* INTERNAL_LINUX_C_LIB */
#if defined(INTERNAL_LINUX_C_LIB) && defined(__ELF__)
#define CALLOC __libc_calloc
#define FREE __libc_free
#define MALLOC __libc_malloc
#define MEMALIGN __libc_memalign
#define REALLOC __libc_realloc
#define VALLOC __libc_valloc
#define MALLINFO __libc_mallinfo
#define MALLOPT __libc_mallopt
#pragma weak calloc = __libc_calloc
#pragma weak free = __libc_free
#pragma weak cfree = __libc_free
#pragma weak malloc = __libc_malloc
#pragma weak memalign = __libc_memalign
#pragma weak realloc = __libc_realloc
#pragma weak valloc = __libc_valloc
#pragma weak mallinfo = __libc_mallinfo
#pragma weak mallopt = __libc_mallopt
#else
#define CALLOC calloc
#define FREE free
#define MALLOC malloc
#define MEMALIGN memalign
#define REALLOC realloc
#define VALLOC valloc
#define MALLINFO mallinfo
#define MALLOPT mallopt
#endif
/* mechanics for getpagesize; adapted from bsd/gnu getpagesize.h */
#if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE)
extern size_t getpagesize();
# define malloc_getpagesize getpagesize()
#else
# include <sys/param.h>
# ifdef EXEC_PAGESIZE
# define malloc_getpagesize EXEC_PAGESIZE
# else
# ifdef NBPG
# ifndef CLSIZE
# define malloc_getpagesize NBPG
# else
# define malloc_getpagesize (NBPG * CLSIZE)
# endif
# else
# ifdef NBPC
# define malloc_getpagesize NBPC
# else
# ifdef PAGESIZE
# define malloc_getpagesize PAGESIZE
# else
# define malloc_getpagesize (8192) /* just guess */
# endif
# endif
# endif
# endif
#endif
/* Declarations of public routines */
#if __STD_C
Void_t* MALLOC(size_t);
void FREE(Void_t*);
Void_t* REALLOC(Void_t*, size_t);
Void_t* MEMALIGN(size_t, size_t);
Void_t* VALLOC(size_t);
Void_t* CALLOC(size_t, size_t);
void cfree(Void_t*);
int malloc_trim(size_t);
size_t malloc_usable_size(Void_t*);
void malloc_stats();
int MALLOPT(int, int);
struct mallinfo MALLINFO(void);
#else
Void_t* MALLOC();
void FREE();
Void_t* REALLOC();
Void_t* MEMALIGN();
Void_t* VALLOC();
Void_t* CALLOC();
void cfree();
int malloc_trim();
size_t malloc_usable_size();
void malloc_stats();
int MALLOPT();
struct mallinfo MALLINFO();
#endif
#ifdef __cplusplus
}; /* end of extern "C" */
#endif
/* ---------- To make a malloc.h, end cutting here ------------ */
/* CHUNKS */
struct malloc_chunk
{
size_t size; /* Size in bytes, including overhead. */
struct malloc_chunk* fd; /* double links -- used only if free. */
struct malloc_chunk* bk;
size_t unused; /* to pad decl to min chunk size */
};
/* size field is or'ed with PREV_INUSE when previous adjacent chunk in use */