Linux內存管理之slab機制（初始化） 3

←手機掃碼閱讀火星人 @ 2014-03-03 , reply:0

Linux內存管理之slab機制（初始化） 3

第二階段代碼分析Start_kernel()->kmem_cache_init_late()

view plaincopy to clipboardprint?/*Slab系統初始化分兩個部分，先初始化一些基本的，待系統初始化工作進行的差不多時，再配置一些特殊功能。*/
void __init kmem_cache_init_late(void)
{
struct kmem_cache *cachep;
/* 初始化階段local cache的大小是固定的，要根據對象大小重新計算 */
/* 6) resize the head arrays to their final sizes */
mutex_lock(&cache_chain_mutex);
list_for_each_entry(cachep, &cache_chain, next)
      if (enable_cpucache(cachep, GFP_NOWAIT))
         BUG();
mutex_unlock(&cache_chain_mutex);

/* Done! */
/* 大功告成，general cache終於全部建立起來了 */
g_cpucache_up = FULL;

/* Annotate slab for lockdep -- annotate the malloc caches */
init_lock_keys();

/*
   * Register a cpu startup notifier callback that initializes
   * cpu_cache_get for all new cpus
   */
   /* 註冊cpu up回調函數，cpu up時配置local cache */
register_cpu_notifier(&cpucache_notifier);

/*
   * The reap timers are started later, with a module init call: That part
   * of the kernel is not yet operational.
   */
}
/*Slab系統初始化分兩個部分，先初始化一些基本的，待系統初始化工作進行的差不多時，再配置一些特殊功能。*/
void __init kmem_cache_init_late(void)
{
struct kmem_cache *cachep;
/* 初始化階段local cache的大小是固定的，要根據對象大小重新計算 */
/* 6) resize the head arrays to their final sizes */
mutex_lock(&cache_chain_mutex);
list_for_each_entry(cachep, &cache_chain, next)
if (enable_cpucache(cachep, GFP_NOWAIT))
BUG();
mutex_unlock(&cache_chain_mutex);

/* Done! */
/* 大功告成，general cache終於全部建立起來了 */
g_cpucache_up = FULL;

/* Annotate slab for lockdep -- annotate the malloc caches */
init_lock_keys();

/*
   * Register a cpu startup notifier callback that initializes
   * cpu_cache_get for all new cpus
   */
   /* 註冊cpu up回調函數，cpu up時配置local cache */
register_cpu_notifier(&cpucache_notifier);

/*
   * The reap timers are started later, with a module init call: That part
   * of the kernel is not yet operational.
   */
}view plaincopy to clipboardprint?/* Called with cache_chain_mutex held always */
/*local cache 初始化*/
static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
{
int err;
int limit, shared;

/*
   * The head array serves three purposes:
   * - create a LIFO ordering, i.e. return objects that are cache-warm
   * - reduce the number of spinlock operations.
   * - reduce the number of linked list operations on the slab and
   * bufctl chains: array operations are cheaper.
   * The numbers are guessed, we should auto-tune as described by
   * Bonwick.
   */ /* 根據對象大小計算local cache中對象數目上限 */
if (cachep->buffer_size > 131072)
      limit = 1;
else if (cachep->buffer_size > PAGE_SIZE)
      limit = 8;
else if (cachep->buffer_size > 1024)
      limit = 24;
else if (cachep->buffer_size > 256)
      limit = 54;
else
      limit = 120;

/*
   * CPU bound tasks (e.g. network routing) can exhibit cpu bound
   * allocation behaviour: Most allocs on one cpu, most free operations
   * on another cpu. For these cases, an efficient object passing between
   * cpus is necessary. This is provided by a shared array. The array
   * replaces Bonwick's magazine layer.
   * On uniprocessor, it's functionally equivalent (but less efficient)
   * to a larger limit. Thus disabled by default.
   */
shared = 0;
/* 多核系統，設置shared local cache中對象數目 */
if (cachep->buffer_size <= PAGE_SIZE && num_possible_cpus() > 1)
      shared = 8;

#if DEBUG
/*
   * With debugging enabled, large batchcount lead to excessively long
   * periods with disabled local interrupts. Limit the batchcount
   */
if (limit > 32)
      limit = 32;
#endif
/* 配置local cache */
err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared, gfp);
if (err)
      printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
            cachep->name, -err);
return err;
}
/* Called with cache_chain_mutex held always */
/*local cache 初始化*/
static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
{
int err;
int limit, shared;

/*
   * The head array serves three purposes:
   * - create a LIFO ordering, i.e. return objects that are cache-warm
   * - reduce the number of spinlock operations.
   * - reduce the number of linked list operations on the slab and
   * bufctl chains: array operations are cheaper.
   * The numbers are guessed, we should auto-tune as described by
   * Bonwick.
   */ /* 根據對象大小計算local cache中對象數目上限 */
if (cachep->buffer_size > 131072)
limit = 1;
else if (cachep->buffer_size > PAGE_SIZE)
limit = 8;
else if (cachep->buffer_size > 1024)
limit = 24;
else if (cachep->buffer_size > 256)
limit = 54;
else
limit = 120;

/*
   * CPU bound tasks (e.g. network routing) can exhibit cpu bound
   * allocation behaviour: Most allocs on one cpu, most free operations
   * on another cpu. For these cases, an efficient object passing between
   * cpus is necessary. This is provided by a shared array. The array
   * replaces Bonwick's magazine layer.
   * On uniprocessor, it's functionally equivalent (but less efficient)
   * to a larger limit. Thus disabled by default.
   */
shared = 0;
/* 多核系統，設置shared local cache中對象數目 */
if (cachep->buffer_size <= PAGE_SIZE && num_possible_cpus() > 1)
shared = 8;

#if DEBUG
/*
   * With debugging enabled, large batchcount lead to excessively long
   * periods with disabled local interrupts. Limit the batchcount
   */
if (limit > 32)
limit = 32;
#endif
/* 配置local cache */
err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared, gfp);
if (err)
printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
         cachep->name, -err);
return err;
}view plaincopy to clipboardprint?/* Always called with the cache_chain_mutex held */
/*配置local cache、shared local cache和slab三鏈*/
static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
            int batchcount, int shared, gfp_t gfp)
{
struct ccupdate_struct *new;
int i;

new = kzalloc(sizeof(*new), gfp);
if (!new)
      return -ENOMEM;
/* 為每個cpu分配新的struct array_cache對象 */
for_each_online_cpu(i) {
      new->new = alloc_arraycache(cpu_to_node(i), limit,
                     batchcount, gfp);
      if (!new->new) {
         for (i--; i >= 0; i--)
            kfree(new->new);
         kfree(new);
         return -ENOMEM;
      }
}
new->cachep = cachep;
/* 用新的struct array_cache對象替換舊的struct array_cache對象
，在支持cpu熱插拔的系統上，離線cpu可能沒有釋放local cache
，使用的仍是舊local cache，參見__kmem_cache_destroy函數
。雖然cpu up時要重新配置local cache，也無濟於事。考慮下面的情景
：共有Cpu A和Cpu B，Cpu B down后，destroy Cache X，由於此時Cpu B是down狀態
，所以Cache X中Cpu B的local cache未釋放，過一段時間Cpu B又up了
，更新cache_chain 鏈中所有cache的local cache，但此時Cache X對象已經釋放回
cache_cache中了，其Cpu B local cache並未被更新。又過了一段時間
，系統需要創建新的cache，將Cache X對象分配出去，其Cpu B仍然是舊的
local cache，需要進行更新。
*/
on_each_cpu(do_ccupdate_local, (void *)new, 1);

check_irq_on();
cachep->batchcount = batchcount;
cachep->limit = limit;
cachep->shared = shared;
/* 釋放舊的local cache */
for_each_online_cpu(i) {
      struct array_cache *ccold = new->new;
      if (!ccold)
         continue;
      spin_lock_irq(&cachep->nodelists->list_lock);
      /* 釋放舊local cache中的對象 */
      free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i));
      spin_unlock_irq(&cachep->nodelists->list_lock);
      /* 釋放舊的struct array_cache對象 */
      kfree(ccold);
}
kfree(new);
/* 初始化shared local cache 和slab三鏈 */
return alloc_kmemlist(cachep, gfp);
}
/* Always called with the cache_chain_mutex held */
/*配置local cache、shared local cache和slab三鏈*/
static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
int batchcount, int shared, gfp_t gfp)
{
struct ccupdate_struct *new;
int i;

new = kzalloc(sizeof(*new), gfp);
if (!new)
return -ENOMEM;
/* 為每個cpu分配新的struct array_cache對象 */
for_each_online_cpu(i) {
new->new = alloc_arraycache(cpu_to_node(i), limit,
batchcount, gfp);
if (!new->new) {
for (i--; i >= 0; i--)
kfree(new->new);
kfree(new);
return -ENOMEM;
}
}
new->cachep = cachep;
/* 用新的struct array_cache對象替換舊的struct array_cache對象
，在支持cpu熱插拔的系統上，離線cpu可能沒有釋放local cache
，使用的仍是舊local cache，參見__kmem_cache_destroy函數
。雖然cpu up時要重新配置local cache，也無濟於事。考慮下面的情景
：共有Cpu A和Cpu B，Cpu B down后，destroy Cache X，由於此時Cpu B是down狀態
，所以Cache X中Cpu B的local cache未釋放，過一段時間Cpu B又up了
，更新cache_chain 鏈中所有cache的local cache，但此時Cache X對象已經釋放回
cache_cache中了，其Cpu B local cache並未被更新。又過了一段時間
，系統需要創建新的cache，將Cache X對象分配出去，其Cpu B仍然是舊的
local cache，需要進行更新。
*/
on_each_cpu(do_ccupdate_local, (void *)new, 1);

check_irq_on();
cachep->batchcount = batchcount;
cachep->limit = limit;
cachep->shared = shared;
/* 釋放舊的local cache */
for_each_online_cpu(i) {
struct array_cache *ccold = new->new;
if (!ccold)
continue;
spin_lock_irq(&cachep->nodelists->list_lock);
/* 釋放舊local cache中的對象 */
free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i));
spin_unlock_irq(&cachep->nodelists->list_lock);
/* 釋放舊的struct array_cache對象 */
kfree(ccold);
}
kfree(new);
/* 初始化shared local cache 和slab三鏈 */
return alloc_kmemlist(cachep, gfp);
} 更新本地cacheview plaincopy to clipboardprint?/*更新每個cpu的struct array_cache對象*/
static void do_ccupdate_local(void *info)
{
struct ccupdate_struct *new = info;
struct array_cache *old;

check_irq_off();
old = cpu_cache_get(new->cachep);
   /* 指向新的struct array_cache對象 */
new->cachep->array = new->new;
      /* 保存舊的struct array_cache對象 */
new->new = old;
}
/*更新每個cpu的struct array_cache對象*/
static void do_ccupdate_local(void *info)
{
struct ccupdate_struct *new = info;
struct array_cache *old;

check_irq_off();
old = cpu_cache_get(new->cachep);
   /* 指向新的struct array_cache對象 */
new->cachep->array = new->new;
/* 保存舊的struct array_cache對象 */
new->new = old;
}view plaincopy to clipboardprint?/*初始化shared local cache和slab三鏈，初始化完成後，slab三鏈中沒有任何slab*/
static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
{
int node;
struct kmem_list3 *l3;
struct array_cache *new_shared;
struct array_cache **new_alien = NULL;

for_each_online_node(node) {
      /* NUMA相關 */
            if (use_alien_caches) {
                     new_alien = alloc_alien_cache(node, cachep->limit, gfp);
                     if (!new_alien)
                              goto fail;
            }

      new_shared = NULL;
      if (cachep->shared) {
         /* 分配shared local cache */
         new_shared = alloc_arraycache(node,
            cachep->shared*cachep->batchcount,
                  0xbaadf00d, gfp);
         if (!new_shared) {
            free_alien_cache(new_alien);
            goto fail;
         }
      }
      /* 獲得舊的slab三鏈 */
      l3 = cachep->nodelists;
      if (l3) {
         /* 就slab三鏈指針不為空，需要先釋放舊的資源 */
         struct array_cache *shared = l3->shared;

         spin_lock_irq(&l3->list_lock);
         /* 釋放舊的shared local cache中的對象 */
         if (shared)
            free_block(cachep, shared->entry,
                     shared->avail, node);
         /* 指向新的shared local cache */
         l3->shared = new_shared;
         if (!l3->alien) {
            l3->alien = new_alien;
            new_alien = NULL;
         }/* 計算cache中空閑對象的上限 */
         l3->free_limit = (1 + nr_cpus_node(node)) *
                  cachep->batchcount + cachep->num;
         spin_unlock_irq(&l3->list_lock);
         /* 釋放舊shared local cache的struct array_cache對象 */
         kfree(shared);
         free_alien_cache(new_alien);
         continue;/*訪問下一個節點*/
      }
      /* 如果沒有舊的l3，分配新的slab三鏈 */
      l3 = kmalloc_node(sizeof(struct kmem_list3), gfp, node);
      if (!l3) {
         free_alien_cache(new_alien);
         kfree(new_shared);
         goto fail;
      }
      /* 初始化slab三鏈 */
      kmem_list3_init(l3);
      l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
            ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
      l3->shared = new_shared;
      l3->alien = new_alien;
      l3->free_limit = (1 + nr_cpus_node(node)) *
                  cachep->batchcount + cachep->num;
      cachep->nodelists = l3;
}
return 0;

fail:
if (!cachep->next.next) {
      /* Cache is not active yet. Roll back what we did */
      node--;
      while (node >= 0) {
         if (cachep->nodelists) {
            l3 = cachep->nodelists;

            kfree(l3->shared);
            free_alien_cache(l3->alien);
            kfree(l3);
            cachep->nodelists = NULL;
         }
         node--;
      }
}
return -ENOMEM;
}
/*初始化shared local cache和slab三鏈，初始化完成後，slab三鏈中沒有任何slab*/
static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
{
int node;
struct kmem_list3 *l3;
struct array_cache *new_shared;
struct array_cache **new_alien = NULL;

for_each_online_node(node) {
   /* NUMA相關 */
            if (use_alien_caches) {
                     new_alien = alloc_alien_cache(node, cachep->limit, gfp);
                     if (!new_alien)
                              goto fail;
            }

new_shared = NULL;
if (cachep->shared) {
/* 分配shared local cache */
new_shared = alloc_arraycache(node,
cachep->shared*cachep->batchcount,
0xbaadf00d, gfp);
if (!new_shared) {
free_alien_cache(new_alien);
goto fail;
}
}
/* 獲得舊的slab三鏈 */
l3 = cachep->nodelists;
if (l3) {
/* 就slab三鏈指針不為空，需要先釋放舊的資源 */
struct array_cache *shared = l3->shared;

spin_lock_irq(&l3->list_lock);
/* 釋放舊的shared local cache中的對象 */
if (shared)
free_block(cachep, shared->entry,
shared->avail, node);
/* 指向新的shared local cache */
l3->shared = new_shared;
if (!l3->alien) {
l3->alien = new_alien;
new_alien = NULL;
}/* 計算cache中空閑對象的上限 */
l3->free_limit = (1 + nr_cpus_node(node)) *
cachep->batchcount + cachep->num;
spin_unlock_irq(&l3->list_lock);
/* 釋放舊shared local cache的struct array_cache對象 */
kfree(shared);
free_alien_cache(new_alien);
continue;/*訪問下一個節點*/
}
   /* 如果沒有舊的l3，分配新的slab三鏈 */
l3 = kmalloc_node(sizeof(struct kmem_list3), gfp, node);
if (!l3) {
free_alien_cache(new_alien);
kfree(new_shared);
goto fail;
}
   /* 初始化slab三鏈 */
kmem_list3_init(l3);
l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
((unsigned long)cachep) % REAPTIMEOUT_LIST3;
l3->shared = new_shared;
l3->alien = new_alien;
l3->free_limit = (1 + nr_cpus_node(node)) *
cachep->batchcount + cachep->num;
cachep->nodelists = l3;
}
return 0;

fail:
if (!cachep->next.next) {
/* Cache is not active yet. Roll back what we did */
node--;
while (node >= 0) {
if (cachep->nodelists) {
l3 = cachep->nodelists;

kfree(l3->shared);
free_alien_cache(l3->alien);
kfree(l3);
cachep->nodelists = NULL;
}
node--;
}
}
return -ENOMEM;
} 看一個輔助函數view plaincopy to clipboardprint?/*分配struct array_cache對象。*/
static struct array_cache *alloc_arraycache(int node, int entries,
                     int batchcount, gfp_t gfp)
{
/* struct array_cache後面緊接著的是entry數組，合在一起申請內存 */
int memsize = sizeof(void *) * entries + sizeof(struct array_cache);
struct array_cache *nc = NULL;
/* 分配一個local cache對象，kmalloc從general cache中分配 */
nc = kmalloc_node(memsize, gfp, node);
/*
   * The array_cache structures contain pointers to free object.
   * However, when such objects are allocated or transfered to another
   * cache the pointers are not cleared and they could be counted as
   * valid references during a kmemleak scan. Therefore, kmemleak must
   * not scan such objects.
   */
kmemleak_no_scan(nc);
   /* 初始化local cache */
if (nc) {
      nc->avail = 0;
      nc->limit = entries;
      nc->batchcount = batchcount;
      nc->touched = 0;
      spin_lock_init(&nc->lock);
}
return nc;
}
/*分配struct array_cache對象。*/
static struct array_cache *alloc_arraycache(int node, int entries,
      int batchcount, gfp_t gfp)
{
/* struct array_cache後面緊接著的是entry數組，合在一起申請內存 */
int memsize = sizeof(void *) * entries + sizeof(struct array_cache);
struct array_cache *nc = NULL;
/* 分配一個local cache對象，kmalloc從general cache中分配 */
nc = kmalloc_node(memsize, gfp, node);
/*
   * The array_cache structures contain pointers to free object.
   * However, when such objects are allocated or transfered to another
   * cache the pointers are not cleared and they could be counted as
   * valid references during a kmemleak scan. Therefore, kmemleak must
   * not scan such objects.
   */
kmemleak_no_scan(nc);
   /* 初始化local cache */
if (nc) {
nc->avail = 0;
nc->limit = entries;
nc->batchcount = batchcount;
nc->touched = 0;
spin_lock_init(&nc->lock);
}
return nc;
} 源代碼中涉及了slab的分配、釋放等操作在後面分析中陸續總結。slab相關數據結構、工作機制以及整體框架在分析完了slab的創建、釋放工作后再做總結，這樣可能會對slab機制有更好的了解。當然，從代碼中看運行機制會更有說服了，也是一種習慣。

《解決方案》

謝謝分享

Tags:

[火星人 ] Linux內存管理之slab機制（初始化） 3已經有820次圍觀

本文地址：http://coctec.com/docs/service/show-post-1159.html

Linux內存管理之slab機制（初始化） 3

Linux內存管理之slab機制（初始化） 3

熱門文章

最新文章