malloc和free

104 阅读 0 评论 69 点赞

我是靠谱客的博主迷人香氛，这篇文章主要介绍malloc和free，现在分享给大家，希望可以做个参考。

昨天看见一位同学分享面试经历。

中间有这么一句话：”一是Java的内存分配原理与C/C++不同，C/C++每次采用malloc或new申请内存时都要进行brk和mmap等系统调用，而系统调用发生在内核空间，每次都要中断进行切换，这需要一定的开销“。

因为这个同学最终拿到了阿里offer，且这个回答得到了面试官”（面试官一直点头表示对我回答的赞同）嗯，看来你对这块的确掌握了“的回应，不知道是这位同学没有详查面试官的反应还是面试官确实给出了比较positive的答复，心里有点复杂，阿里一直是哥的神啊。

java不了解，没有发言权，对于glibc的内存管理机制还是了解的，个人这个回答起码是不精确的。

问了下身边同事，有些不太清楚，有些认为是直接跟OS要的，有些知道个大概，但是能完完整整的把底层机制解释出来的几乎没有。

所以今天特意结合glibc源代码解释一下这个问题。

关于malloc和free，linux所使用的ptmalloc在glibc的malloc/malloc.c中。

首先，我们熟知的malloc和free原型在这个文件里是找不到的，glibc很恶心的一点就是编译选项太多，因为确实一套代码需要支持的东西太多了，而且已经用了数十年，各种风格。

malloc是用宏定义的形式，在新旧版本函数名宏定义也不一样，不过新的glibc已经对此做了一些整理，类似控制采用libc_hidden_def/libc_hidden_proto两个宏，老的版本比较死板，可能维护者们也受不了了。

我办公室电脑上用的版本较老，具体什么版本记不清楚了，函数原型是void_t* public_mALLOc(size_t bytes)。

家里用的2.20，函数原型为void* __libc_malloc(size_t bytes)。不过函数体都是一样。

建议使用新版本阅读相关代码，逻辑要清晰些。

public_mALLOc也好，__libc_malloc也好，都是一个wrapper函数，是对_int_malloc(mstate av, size_t bytes)的封装，这才是整个内存分配的核心。

那就从外层开始

void *
__libc_malloc (size_t bytes)
{
mstate ar_ptr;
void *victim;

//检查是否有内存分配钩子，如存在，调用钩子，返回。
void *(*hook) (size_t, const void *)
= atomic_forced_read (__malloc_hook);
if (__builtin_expect (hook != NULL, 0))
return (*hook)(bytes, RETURN_ADDRESS (0));

//获取分配区指针，失败返回。
arena_lookup (ar_ptr);
arena_lock (ar_ptr, bytes);
if (!ar_ptr)
return 0;

//调用_int_malloc分配内存
victim = _int_malloc (ar_ptr, bytes);

//分配失败，
if (!victim)
{
LIBC_PROBE (memory_malloc_retry, 1, bytes);

//先跳到下面看一下这个arena_get_retry
ar_ptr = arena_get_retry (ar_ptr, bytes);
if (__builtin_expect (ar_ptr != NULL, 1))
{

//如果获取到有效分配区，分配内存，然后解锁
victim = _int_malloc (ar_ptr, bytes);
(void) mutex_unlock (&ar_ptr->mutex);
}
}
else

//成功分配内存，释放锁，返回地址
(void) mutex_unlock (&ar_ptr->mutex);
assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
ar_ptr == arena_for_chunk (mem2chunk (victim)));
return victim;
}

/* If we don't have the main arena, then maybe the failure is due to running
out of mmapped areas, so we can try allocating on the main arena.
Otherwise, it is likely that sbrk() has failed and there is still a chance
to mmap(), so try one of the other arenas. */

//如果使用的不是主分配区，那么以上内存分配失败可能是分配区mmap区域内存耗尽。

//所以可以尝试从main分配区获取内存，就像是sbrk失败但是还是可以试试mmap。

//所以试试其他分配区
static mstate
arena_get_retry (mstate ar_ptr, size_t bytes)
{
LIBC_PROBE (memory_arena_retry, 2, bytes, ar_ptr);

//不是主分配区
if (ar_ptr != &main_arena)
{

//释放当前分配区的分配锁
(void) mutex_unlock (&ar_ptr->mutex);

//获取主分配区地址

ar_ptr = &main_arena;

//加锁主分配区
(void) mutex_lock (&ar_ptr->mutex);
}
else
{

//如果是主分配区，试试其他分配区
/* Grab ar_ptr->next prior to releasing its lock. */
mstate prev = ar_ptr->next ? ar_ptr : 0;
(void) mutex_unlock (&ar_ptr->mutex);

//获取其他分配区
ar_ptr = arena_get2 (prev, bytes, ar_ptr);
}

return ar_ptr;
}

然后是真正的内存申请函数，在分配区av中分配一块bytes大小的内存

在介绍这个函数之前，先介绍一下一些重要的数据结构如下：

struct malloc_state
{
/* Serialize access. */
mutex_t mutex; //分配区互斥锁
/* Flags (formerly in max_fast). */
int flags; //
/* Fastbins */
mfastbinptr fastbinsY[NFASTBINS];
/* Base of the topmost chunk -- not otherwise kept in a bin */
mchunkptr top;
/* The remainder from the most recent split of a small request */因为小内存申请拆分的bin，保留其指针
mchunkptr last_remainder;
/* Normal bins packed as described above */
mchunkptr bins[NBINS * 2 - 2];
/* Bitmap of bins */
unsigned int binmap[BINMAPSIZE];
/* Linked list */
struct malloc_state *next;
/* Linked list for free arenas. */
struct malloc_state *next_free;
/* Memory allocated from the system in this arena. */
INTERNAL_SIZE_T system_mem;
INTERNAL_SIZE_T max_system_mem;
};

/*
This struct declaration is misleading (but accurate and necessary).
It declares a "view" into memory allowing access to necessary
fields at known offsets from a given base. See explanation below.
*/

//ptmalloc的内存表示形式
struct malloc_chunk {
INTERNAL_SIZE_T prev_size; /* Size of previous chunk (if free). */chunk大小
INTERNAL_SIZE_T size; /* Size in bytes, including overhead. */
struct malloc_chunk* fd; /* double links -- used only if free. */
struct malloc_chunk* bk;
/* Only used for large blocks: pointer to next larger size. */
struct malloc_chunk* fd_nextsize; /* double links -- used only if free. */
struct malloc_chunk* bk_nextsize;
};

static void *
_int_malloc (mstate av, size_t bytes)
{
INTERNAL_SIZE_T nb; /* normalized request size */
unsigned int idx; /* associated bin index */
mbinptr bin; /* associated bin */

mchunkptr victim; /* inspected/selected chunk */
INTERNAL_SIZE_T size; /* its size */
int victim_index; /* its bin index */

mchunkptr remainder; /* remainder from a split */
unsigned long remainder_size; /* its size */

unsigned int block; /* bit map traverser */
unsigned int bit; /* bit map traverser */
unsigned int map; /* current word of binmap */

mchunkptr fwd; /* misc temp for linking */
mchunkptr bck; /* misc temp for linking */

const char *errstr = NULL;

/*
Convert request size to internal form by adding SIZE_SZ bytes
overhead plus possibly more to obtain necessary alignment and/or
to obtain a size of at least MINSIZE, the smallest allocatable
size. Also, checked_request2size traps (returning 0) request sizes
that are so large that they wrap around zero when padded and
aligned.
*/

//ptmalloc内部内存分配都是以chunk为单位，根据chunk大小，来决定如果获取相关chunk

//所以，在开始分配之前，先将需要分配的内存大小bytes转换成需要的chunk大小nb
checked_request2size (bytes, nb);

/*
If the size qualifies as a fastbin, first check corresponding bin.
This code is safe to execute even if av is not yet initialized, so we
can try it without checking, which saves some time on this fast path.
*/

//如果nb不超过fastbin中最大chunk大小，尝试从fastbins中分配chunk

//那什么是fastbin呢，源代码中注释如下：

An array of lists holding recently freed small chunks. Fastbins
are not doubly linked. It is faster to single-link them, and
since chunks are never removed from the middles of these lists,
double linking is not necessary

其实看到这里，我们已经可以断定：不是每次malloc都要进行系统调用的。

if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
{

//根据nb计算fbindex
idx = fastbin_index (nb);

//相关idx指向的地址
mfastbinptr *fb = &fastbin (av, idx);
mchunkptr pp = *fb;

//这里要插入一点介绍，老版本的malloc有一个ATOMIC_FASTBINS的宏，决定是否开启相关优化

//基于程序健壮性（malloc和free互相干扰）和现在大部分cpu都支持CAS的考虑，

//最新版本上已经去掉了这个宏，改成了标配原子优化，会对效率有略微影响。

//ATOMTIC_FASTBINS基于compare-and-swap原子操作优化采用lock-free的技术实现单链表删除头结点操作

//当一只指针原始值等于某个比较值时，将值改为新值，无论是否修改，都返回原始值。
do
{
victim = pp;//pp为原始指针，victim为比较量
if (victim == NULL)
break;
}
while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim))//计算一个新值
!= victim);//比较新值和比较量victim，如果相等（原子修改成功）继续，不等退出。
if (victim != 0)
{
if (__builtin_expect (fastbin_index (chunksize (victim)) != idx, 0))
{
errstr = "malloc(): memory corruption (fast)";
errout:
malloc_printerr (check_action, errstr, chunk2mem (victim));
return NULL;
}
check_remalloced_chunk (av, victim, nb);//debug malloc
void *p = chunk2mem (victim);//去掉头部的内存块信息，将实际给用户使用的内存起始地址返回
alloc_perturb (p, bytes);//依据perturb_byte初始化，perturb_byte由mallocopt设置
return p;//成功分配到了内存，返回地址。
}
}

/*
If a small request, check regular bin. Since these "smallbins"
hold one size each, no searching within bins is necessary.
(For a large request, we need to wait until unsorted chunks are
processed to find best fit. But for small ones, fits are exact
anyway, so we can check now, which is faster.)
*/

if (in_smallbin_range (nb))

// < MIN_LARGE_SIZE(一般是4096（64*2*sizeof（size_t），取决于宏MALLOC_ALIGNMENT，

//powerpc32不一样，具体请源代码注释，这里就不详细给出了)
{
idx = smallbin_index (nb);//依据nb计算idx
bin = bin_at (av, idx);//计算bin在av分配区bins向量中的地址，算法如下。
//(mbinptr) (((char *) &((m)->bins[((i) - 1) * 2])) - offsetof (struct malloc_chunk, fd))

if ((victim = last (bin)) != bin)
{
if (victim == 0) /* initialization check */
malloc_consolidate (av);//tears down chunks held in fastbins and init check
else
{
bck = victim->bk;
if (__glibc_unlikely (bck->fd != victim))
{
errstr = "malloc(): smallbin double linked list corrupted";
goto errout;
}

//更新这个bin的内存状态
set_inuse_bit_at_offset (victim, nb);
bin->bk = bck;
bck->fd = bin;

if (av != &main_arena)
victim->size |= NON_MAIN_ARENA;//置上非主分配区标志，倒数第三位
check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);//去掉victim的头8个字节，返回给用户实际可使用空间
alloc_perturb (p, bytes);//初始化
return p;
}
}
}

/*
If this is a large request, consolidate fastbins before continuing.
While it might look excessive to kill all fastbins before
even seeing if there is space available, this avoids
fragmentation problems normally associated with fastbins.
Also, in practice, programs tend to have runs of either small or
large requests, but less often mixtures, so consolidation is not
invoked all that often in most programs. And the programs that
it is called frequently in otherwise tend to fragment.

如果要申请一大块内存，先归一fastbins，尽管看都不看是否还有可用空间就干掉所有的fastbins看起来代价昂贵。

但是这能有效避免fastbins带来的内存碎片问题。

同时，在实际使用中，程序要么就经常使用大块内存，要么经常使用小块内存，但是并不进场混淆使用大小快内存。

所以，consolidation并不经常被调用。

ps：实际上glibc的ptmalloc是有一个内存池的抽象存在的，但是这个内存池假定了大多数程序的使用场景，且是是一些较小的程序使用的场景。

对于大型程序复杂的内存要求并不是支持的很好，且大型程序经常都有自己独特的内存使用要求，

所以像apache，编译器，解释器等程序通常都实现自己的内存池，尽量避免直接使用glibc，毕竟一样东西一种设计并不能满足所有需求。
*/
else
{
idx = largebin_index (nb);//普通32机器上即largebin_index_32(sz) 宏
if (have_fastchunks (av))
malloc_consolidate (av);
}

/*
Process recently freed or remaindered chunks, taking one only if
it is exact fit, or, if this a small request, the chunk is remainder from
the most recent non-exact fit. Place other traversed chunks in
bins. Note that this step is the only place in any routine where
chunks are placed in bins.

遍历最近释放的或者拆分的chunk，如果有完全适合的就取一个。

或者，如果是一个小内存申请，

同时将其他遍历的chunks放在bins中。

声明：这是唯一的将chunks装入bins中的程序。

The outer loop here is needed because we might not realize until
near the end of malloc that we should have consolidated, so must
do so and retry. This happens at most once, and only when we would
otherwise need to expand memory to service a "small" request.

*/

for (;; )
{
int iters = 0;

//#define unsorted_chunks(M) (bin_at (M, 1)) M的bin数组里面第一个就是unsorted_chunk
while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))//转一圈，while退出
{
bck = victim->bk;//先保留bk指针
if (__builtin_expect (victim->size <= 2 * SIZE_SZ, 0)//分配区要大于最小分配数
|| __builtin_expect (victim->size > av->system_mem, 0))//要小于总内存数
malloc_printerr (check_action, "malloc(): memory corruption",
chunk2mem (victim));
size = chunksize (victim);

/*
If a small request, try to use last remainder if it is the
only chunk in unsorted bin. This helps promote locality for
runs of consecutive small requests. This is the only
exception to best-fit, and applies only when there is
no exact fit for a small chunk.

如果是小的内存申请，尝试使用最近拆分剩余的chunk，如果这个chunk是这个unsorted bin中唯一的chunk。
*/

if (in_smallbin_range (nb) &&//分配一个小内存，且上面的small bins没找到相应的chunk
bck == unsorted_chunks (av) &&//unsorted中的唯一chunk
victim == av->last_remainder &&//最后拆分
(unsigned long) (size) > (unsigned long) (nb + MINSIZE))//大于所需+MINSIZE，剩余的chunk还要能继续使用
{
/* split and reattach remainder */
remainder_size = size - nb;//该chunk中剩余大小
remainder = chunk_at_offset (victim, nb);//剩余的chunk地址
unsorted_chunks (av)->bk = unsorted_chunks (av)->fd = remainder;
av->last_remainder = remainder;//更新last_remainder指针
remainder->bk = remainder->fd = unsorted_chunks (av);
if (!in_smallbin_range (remainder_size))//非small，即large，因为在unsorted中，只有一个，所以nextsize指针置NULL
{
remainder->fd_nextsize = NULL;
remainder->bk_nextsize = NULL;
}

//设置chunk信息
set_head (victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head (remainder, remainder_size | PREV_INUSE);
set_foot (remainder, remainder_size);

check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}

/* remove from unsorted list */
unsorted_chunks (av)->bk = bck;//更新对应指针，将最后一个chunk删除
bck->fd = unsorted_chunks (av);

/* Take now instead of binning if exact fit */

//正好碰到一个chunk的大小等于所需大小，将当前chunk返回。
if (size == nb)
{
set_inuse_bit_at_offset (victim, size);
if (av != &main_arena)
victim->size |= NON_MAIN_ARENA;
check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}

/* place chunk in bin */

//遍历到一个smallbin，将之插入到对应index的smallbin链表的表头
if (in_smallbin_range (size))
{
victim_index = smallbin_index (size);
bck = bin_at (av, victim_index);
fwd = bck->fd;
}
else//else就是large咯，处理跟small一样，插入对应index的largebin链表的表头
{
victim_index = largebin_index (size);
bck = bin_at (av, victim_index);
fwd = bck->fd;

/* maintain large bins in sorted order */
if (fwd != bck)//large中有空闲节点，按大小（从大到小）排序
{
/* Or with inuse bit to speed comparisons */
size |= PREV_INUSE;
/* if smaller than smallest, bypass loop below */
assert ((bck->bk->size & NON_MAIN_ARENA) == 0);
if ((unsigned long) (size) < (unsigned long) (bck->bk->size))//小于最小的，直接丢后面
{
fwd = bck;
bck = bck->bk;

victim->fd_nextsize = fwd->fd;
victim->bk_nextsize = fwd->fd->bk_nextsize;
fwd->fd->bk_nextsize = victim->bk_nextsize->fd_nextsize = victim;
}
else//一个一个遍历，找到合适位置并插入
{
assert ((fwd->size & NON_MAIN_ARENA) == 0);
while ((unsigned long) size < fwd->size)
{
fwd = fwd->fd_nextsize;
assert ((fwd->size & NON_MAIN_ARENA) == 0);
}

if ((unsigned long) size == (unsigned long) fwd->size)
/* Always insert in the second position. */
fwd = fwd->fd;
else
{
victim->fd_nextsize = fwd;
victim->bk_nextsize = fwd->bk_nextsize;
fwd->bk_nextsize = victim;
victim->bk_nextsize->fd_nextsize = victim;
}
bck = fwd->bk;
}
}
else//largebin为空，直接插入
victim->fd_nextsize = victim->bk_nextsize = victim;
}

mark_bin (av, victim_index);
victim->bk = bck;
victim->fd = fwd;
fwd->bk = victim;
bck->fd = victim;

#define MAX_ITERS 10000

//避免在这个循环中中浪费太多时间，

//这就是另外一个影响效率的地方咯，没有特别针对性，需要处理的形式多样，

//所以也没有有针对性设计的pool有效率，尽管做了一些中庸处理
if (++iters >= MAX_ITERS)
break;
}

//这个循环若不是异常退出，正常运行到此，unsorted里面的chunk都应该已经被处理完了。

//整个分配区至此都是有序的。
/*
If a large request, scan through the chunks of current bin in
sorted order to find smallest that fits. Use the skip list for this.
*/

if (!in_smallbin_range (nb))//large request
{
bin = bin_at (av, idx);

/* skip scan if empty or largest chunk is too small */
if ((victim = first (bin)) != bin &&
(unsigned long) (victim->size) >= (unsigned long) (nb))

//large表不为空，且申请大小不超过其size
{

//遍历，找到合适chunk
victim = victim->bk_nextsize;
while (((unsigned long) (size = chunksize (victim)) <
(unsigned long) (nb)))
victim = victim->bk_nextsize;

/* Avoid removing the first entry for a size so that the skip
list does not have to be rerouted. */
if (victim != last (bin) && victim->size == victim->fd->size)
victim = victim->fd;

//从找到的chunk中切分出所需大小
remainder_size = size - nb;
unlink (victim, bck, fwd);

/* Exhaust */

//剩余的太小，会导致这个剩余的chunk失效，干脆整个返回给应用，相当于多给了。
if (remainder_size < MINSIZE)
{
set_inuse_bit_at_offset (victim, size);
if (av != &main_arena)
victim->size |= NON_MAIN_ARENA;
}
/* Split */

//执行切分大小，上面已经有介绍，此处略去。
else
{
remainder = chunk_at_offset (victim, nb);
/* We cannot assume the unsorted list is empty and therefore
have to perform a complete insert here. */
bck = unsorted_chunks (av);
fwd = bck->fd;
if (__glibc_unlikely (fwd->bk != bck))
{
errstr = "malloc(): corrupted unsorted chunks";
goto errout;
}
remainder->bk = bck;
remainder->fd = fwd;
bck->fd = remainder;
fwd->bk = remainder;
if (!in_smallbin_range (remainder_size))
{
remainder->fd_nextsize = NULL;
remainder->bk_nextsize = NULL;
}
set_head (victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head (remainder, remainder_size | PREV_INUSE);
set_foot (remainder, remainder_size);
}
check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}
}

/*
Search for a chunk by scanning bins, starting with next largest
bin. This search is strictly by best-fit; i.e., the smallest
(with ties going to approximately the least recently used) chunk
that fits is selected.

The bitmap avoids needing to check that most blocks are nonempty.
The particular case of skipping all bins during warm-up phases
when no chunks have been returned yet is faster than it might look.
*/

++idx;
bin = bin_at (av, idx);
block = idx2block (idx);
map = av->binmap[block];//通过binmap查找比idx大的chunk是否有空闲。
bit = idx2bit (idx);

for (;; )
{
/* Skip rest of block if there are no more set bits in this block. */
if (bit > map || bit == 0)
{
do
{
if (++block >= BINMAPSIZE) /* out of bins，超过总共的bin数 */
goto use_top;
}
while ((map = av->binmap[block]) == 0);//找不到binmap不为0的block（32个bit，即32个chunk）

//找到空闲
bin = bin_at (av, (block << BINMAPSHIFT));
bit = 1;
}

/* Advance to bin with set bit. There must be one. */
while ((bit & map) == 0)
{
bin = next_bin (bin);
bit <<= 1;
assert (bit != 0);
}

/* Inspect the bin. It is likely to be non-empty */
victim = last (bin);

/* If a false alarm (empty bin), clear the bit. */
if (victim == bin)//最后一个等于第一个，空的。
{
av->binmap[block] = map &= ~bit; /* Write through */
bin = next_bin (bin);
bit <<= 1;
}
else
{

//计算chunksize
size = chunksize (victim);

/* We know the first chunk in this bin is big enough to use. */
assert ((unsigned long) (size) >= (unsigned long) (nb));

remainder_size = size - nb;

然后分割，如上，不再介绍
/* unlink */
unlink (victim, bck, fwd);

/* Exhaust */
if (remainder_size < MINSIZE)
{
set_inuse_bit_at_offset (victim, size);
if (av != &main_arena)
victim->size |= NON_MAIN_ARENA;
}
/* Split */
else
{
remainder = chunk_at_offset (victim, nb);

/* We cannot assume the unsorted list is empty and therefore
have to perform a complete insert here. */
bck = unsorted_chunks (av);
fwd = bck->fd;
if (__glibc_unlikely (fwd->bk != bck))
{
errstr = "malloc(): corrupted unsorted chunks 2";
goto errout;
}
remainder->bk = bck;
remainder->fd = fwd;
bck->fd = remainder;
fwd->bk = remainder;

/* advertise as last remainder */
if (in_smallbin_range (nb))
av->last_remainder = remainder;
if (!in_smallbin_range (remainder_size))
{
remainder->fd_nextsize = NULL;
remainder->bk_nextsize = NULL;
}
set_head (victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head (remainder, remainder_size | PREV_INUSE);
set_foot (remainder, remainder_size);
}
check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}
}

//到此，如果还没有分配到内存，可能是所有的bins都已经被用了，

//或者是所需的大小超过常规分配区的最大，则从topchunk中分配了

//需要注意的是，因为主分配区可以map到heap，所以处理方式和其他分配区不同
use_top:
/*
If large enough, split off the chunk bordering the end of memory
(held in av->top). Note that this is in accord with the best-fit
search rule. In effect, av->top is treated as larger (and thus
less well fitting) than any other available chunk since it can
be extended to be as large as necessary (up to system
limitations).

We require that av->top always exists (i.e., has size >=
MINSIZE) after initialization, so if it would otherwise be
exhausted by current request, it is replenished. (The main
reason for ensuring it exists is that we may need MINSIZE space
to put in fenceposts in sysmalloc.)
*/

victim = av->top;

//获取top分配区并计算其大小
size = chunksize (victim);

//大于所需+MINISIZE(作为fencepost)
if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
{

//切分top thunk，不更新fencepost
remainder_size = size - nb;
remainder = chunk_at_offset (victim, nb);
av->top = remainder;
set_head (victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head (remainder, remainder_size | PREV_INUSE);

check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}

/* When we are using atomic ops to free fast chunks we can get
here for all block sizes. */

//再撞一次运气，看看从这个函数最开始没有撞到运气到现在是否有人free了。

else if (have_fastchunks (av))
{
malloc_consolidate (av);
/* restore original bin index */
if (in_smallbin_range (nb))
idx = smallbin_index (nb);
else
idx = largebin_index (nb);
}

/*
Otherwise, relay to handle system-dependent cases
*/
else//没有办法了，还是没有搞到，只能跟妈妈（OS）要了，这才到了真正系统调用的时候。

//实际上当程序稳定运行之后，走到这个地方的概率是非常低的，这个sysmalloc函数比较复杂。

//以后有时间单独介绍。
{
void *p = sysmalloc (nb, av);
if (p != NULL)
alloc_perturb (p, bytes);
return p;
}
}
}

/*
------------------------------ free ------------------------------
*/

static void
_int_free (mstate av, mchunkptr p, int have_lock)
{
INTERNAL_SIZE_T size; /* its size */
mfastbinptr *fb; /* associated fastbin */
mchunkptr nextchunk; /* next contiguous chunk */
INTERNAL_SIZE_T nextsize; /* its size */
int nextinuse; /* true if nextchunk is used */
INTERNAL_SIZE_T prevsize; /* size of previous contiguous chunk */
mchunkptr bck; /* misc temp for linking */
mchunkptr fwd; /* misc temp for linking */

const char *errstr = NULL;
int locked = 0;

size = chunksize (p);

/* Little security check which won't hurt performance: the
allocator never wrapps around at the end of the address space.
Therefore we can exclude some size values which might appear
here by accident or by "design" from some intruder. */
if (__builtin_expect ((uintptr_t) p > (uintptr_t) -size, 0)
|| __builtin_expect (misaligned_chunk (p), 0))
{
errstr = "free(): invalid pointer";
errout:
if (!have_lock && locked)
(void) mutex_unlock (&av->mutex);
malloc_printerr (check_action, errstr, chunk2mem (p));
return;
}
/* We know that each chunk is at least MINSIZE bytes in size or a
multiple of MALLOC_ALIGNMENT. */
if (__glibc_unlikely (size < MINSIZE || !aligned_OK (size)))
{
errstr = "free(): invalid size";
goto errout;
}

check_inuse_chunk(av, p);

/*
If eligible, place chunk on a fastbin so it can be found
and used quickly in malloc.
*/

if ((unsigned long)(size) <= (unsigned long)(get_max_fast ())

#if TRIM_FASTBINS
/*
If TRIM_FASTBINS set, don't place chunks
bordering top into fastbins
*/
&& (chunk_at_offset(p, size) != av->top)
#endif
) {

if (__builtin_expect (chunk_at_offset (p, size)->size <= 2 * SIZE_SZ, 0)
|| __builtin_expect (chunksize (chunk_at_offset (p, size))
>= av->system_mem, 0))
{
/* We might not have a lock at this point and concurrent modifications
of system_mem might have let to a false positive. Redo the test
after getting the lock. */
if (have_lock
|| ({ assert (locked == 0);
mutex_lock(&av->mutex);
locked = 1;
chunk_at_offset (p, size)->size <= 2 * SIZE_SZ
|| chunksize (chunk_at_offset (p, size)) >= av->system_mem;
}))
{
errstr = "free(): invalid next size (fast)";
goto errout;
}
if (! have_lock)
{
(void)mutex_unlock(&av->mutex);
locked = 0;
}
}

free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);

set_fastchunks(av);
unsigned int idx = fastbin_index(size);
fb = &fastbin (av, idx);

/* Atomically link P to its fastbin: P->FD = *FB; *FB = P; */
mchunkptr old = *fb, old2;
unsigned int old_idx = ~0u;
do
{
/* Check that the top of the bin is not the record we are going to add
(i.e., double free). */
if (__builtin_expect (old == p, 0))
{
errstr = "double free or corruption (fasttop)";
goto errout;
}
/* Check that size of fastbin chunk at the top is the same as
size of the chunk that we are adding. We can dereference OLD
only if we have the lock, otherwise it might have already been
deallocated. See use of OLD_IDX below for the actual check. */
if (have_lock && old != NULL)
old_idx = fastbin_index(chunksize(old));
p->fd = old2 = old;
}
while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2)) != old2);

if (have_lock && old != NULL && __builtin_expect (old_idx != idx, 0))
{
errstr = "invalid fastbin entry (free)";
goto errout;
}
}

/*
Consolidate other non-mmapped chunks as they arrive.
*/

else if (!chunk_is_mmapped(p)) {
if (! have_lock) {
(void)mutex_lock(&av->mutex);
locked = 1;
}

nextchunk = chunk_at_offset(p, size);

/* Lightweight tests: check whether the block is already the
top block. */
if (__glibc_unlikely (p == av->top))
{
errstr = "double free or corruption (top)";
goto errout;
}
/* Or whether the next chunk is beyond the boundaries of the arena. */
if (__builtin_expect (contiguous (av)
&& (char *) nextchunk
>= ((char *) av->top + chunksize(av->top)), 0))
{
errstr = "double free or corruption (out)";
goto errout;
}
/* Or whether the block is actually not marked used. */
if (__glibc_unlikely (!prev_inuse(nextchunk)))
{
errstr = "double free or corruption (!prev)";
goto errout;
}

nextsize = chunksize(nextchunk);
if (__builtin_expect (nextchunk->size <= 2 * SIZE_SZ, 0)
|| __builtin_expect (nextsize >= av->system_mem, 0))
{
errstr = "free(): invalid next size (normal)";
goto errout;
}

free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);

/* consolidate backward */
if (!prev_inuse(p)) {
prevsize = p->prev_size;
size += prevsize;
p = chunk_at_offset(p, -((long) prevsize));
unlink(p, bck, fwd);
}

if (nextchunk != av->top) {
/* get and clear inuse bit */
nextinuse = inuse_bit_at_offset(nextchunk, nextsize);

/* consolidate forward */
if (!nextinuse) {
unlink(nextchunk, bck, fwd);
size += nextsize;
} else
clear_inuse_bit_at_offset(nextchunk, 0);

/*
Place the chunk in unsorted chunk list. Chunks are
not placed into regular bins until after they have
been given one chance to be used in malloc.
*/

bck = unsorted_chunks(av);
fwd = bck->fd;
if (__glibc_unlikely (fwd->bk != bck))
{
errstr = "free(): corrupted unsorted chunks";
goto errout;
}
p->fd = fwd;
p->bk = bck;
if (!in_smallbin_range(size))
{
p->fd_nextsize = NULL;
p->bk_nextsize = NULL;
}
bck->fd = p;
fwd->bk = p;

set_head(p, size | PREV_INUSE);
set_foot(p, size);

check_free_chunk(av, p);
}

/*
If the chunk borders the current high end of memory,
consolidate into top
*/

else {
size += nextsize;
set_head(p, size | PREV_INUSE);
av->top = p;
check_chunk(av, p);
}

/*
If freeing a large space, consolidate possibly-surrounding
chunks. Then, if the total unused topmost memory exceeds trim
threshold, ask malloc_trim to reduce top.

Unless max_fast is 0, we don't know if there are fastbins
bordering top, so we cannot tell for sure whether threshold
has been reached unless fastbins are consolidated. But we
don't want to consolidate on each free. As a compromise,
consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD
is reached.
*/

if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
if (have_fastchunks(av))
malloc_consolidate(av);

if (av == &main_arena) {
#ifndef MORECORE_CANNOT_TRIM
if ((unsigned long)(chunksize(av->top)) >=
(unsigned long)(mp_.trim_threshold))
systrim(mp_.top_pad, av);
#endif
} else {
/* Always try heap_trim(), even if the top chunk is not
large, because the corresponding heap might go away. */
heap_info *heap = heap_for_ptr(top(av));

assert(heap->ar_ptr == av);
heap_trim(heap, mp_.top_pad);
}
}

if (! have_lock) {
assert (locked);
(void)mutex_unlock(&av->mutex);
}
}
/*
If the chunk was allocated via mmap, release via munmap().
*/

else {
munmap_chunk (p);
}
}