Here is a quick and easy way to profile memory with nothing more complicated than GNU libc. Just add this to your program source (you don't even have to call it):
static void *(*prev_malloc_hook) (size_t __size, __const __malloc_ptr_t);
static void *(*prev_realloc_hook) (void *__ptr, size_t __size, __const __malloc_ptr_t);
static void (*prev_free_hook) (void *__ptr, __const __malloc_ptr_t);
static pthread_mutex_t allocation_lock;
static void noisy_pre();
static void noisy_post();
static void *noisy_malloc (size_t size, const void *caller)
{
noisy_pre();
void *p = malloc(size);
noisy_post();
fprintf(stderr, "allocate: %p %d bytes from %p\n", p, size, caller);
return p;
}
static void *noisy_realloc (void *ptr, size_t size, const void *caller)
{
noisy_pre();
void *p = realloc(ptr, size);
noisy_post();
fprintf(stderr, "reallocate: %p -> %p to %d bytes from %p\n", ptr, p, size, caller);
return p;
}
static void noisy_free (void *ptr, const void *caller)
{
noisy_pre();
free(ptr);
noisy_post();
fprintf(stderr, "free: %p from %p\n", ptr, caller);
}
static void noisy_init (void)
{
pthread_mutex_init(&allocation_lock, NULL);
prev_malloc_hook = __malloc_hook;
prev_realloc_hook = __realloc_hook;
prev_free_hook = __free_hook;
__malloc_hook = noisy_malloc;
__realloc_hook = noisy_realloc;
__free_hook = noisy_free;
}
static void noisy_pre()
{
/* Take the allocation_lock */
pthread_mutex_lock(&allocation_lock);
/* Restore old hooks */
__malloc_hook = prev_malloc_hook;
__realloc_hook = prev_realloc_hook;
__free_hook = prev_free_hook;
}
static void noisy_post()
{
/* Re-save previous hooks (they may have changed) */
prev_malloc_hook = __malloc_hook;
prev_realloc_hook = __realloc_hook;
prev_free_hook = __free_hook;
/* Restore our hooks */
__malloc_hook = noisy_malloc;
__realloc_hook = noisy_realloc;
__free_hook = noisy_free;
/* Release the allocation_lock */
pthread_mutex_unlock(&allocation_lock);
}
/* Override initializing hook from the C library. */
void (*__malloc_initialize_hook) (void) = noisy_init;
Your program will spew output about all the allocations it performs. Run that through this awk snippet to get a useful report:
awk '/^free: / { fun[owner[$2]] -= size[$2]; delete size[$2]; delete owner[$2]; }
/^reallocate: / { fun[owner[$2]] -= size[$2]; delete size[$2]; delete owner[$2];
size[$4] = $6; owner[$4] = $9; fun[$9] += $6; }
/^allocate: / { size[$2] = $3; owner[$2] = $6; fun[$6] += $3; }
END { for(i in fun) { print i " has " fun[i] " bytes"; }}' |
sort -k3n
See also the C++ new/delete profiler.