Poor Man's Memory Profiler

Here is a quick and easy way to profile memory with nothing more complicated than GNU libc. Just add this to your program source (you don't even have to call it):

static void *(*prev_malloc_hook)  (size_t __size, __const __malloc_ptr_t);
static void *(*prev_realloc_hook) (void *__ptr, size_t __size, __const __malloc_ptr_t);
static void  (*prev_free_hook)    (void *__ptr, __const __malloc_ptr_t);
static pthread_mutex_t allocation_lock;
static void noisy_pre();
static void noisy_post();


static void *noisy_malloc (size_t size, const void *caller)
{
        noisy_pre();
        void *p = malloc(size);
        noisy_post();
        fprintf(stderr, "allocate: %p %d bytes from %p\n", p, size, caller);
        return p;
}
static void *noisy_realloc (void *ptr, size_t size, const void *caller)
{
        noisy_pre();
        void *p = realloc(ptr, size);
        noisy_post();
        fprintf(stderr, "reallocate: %p -> %p to %d bytes from %p\n", ptr, p, size, caller);
        return p;
}
static void noisy_free (void *ptr, const void *caller)
{
        noisy_pre();
        free(ptr);
        noisy_post();
        fprintf(stderr, "free: %p from %p\n", ptr, caller);
}
static void noisy_init (void)
{
        pthread_mutex_init(&allocation_lock, NULL);
        prev_malloc_hook  = __malloc_hook;
        prev_realloc_hook = __realloc_hook;
        prev_free_hook    = __free_hook;
        __malloc_hook     = noisy_malloc;
        __realloc_hook    = noisy_realloc;
        __free_hook       = noisy_free;
}
static void noisy_pre()
{
        /* Take the allocation_lock */
        pthread_mutex_lock(&allocation_lock);

        /* Restore old hooks */
        __malloc_hook  = prev_malloc_hook;
        __realloc_hook = prev_realloc_hook;
        __free_hook    = prev_free_hook;
}
static void noisy_post()
{
        /* Re-save previous hooks (they may have changed) */
        prev_malloc_hook  = __malloc_hook;
        prev_realloc_hook = __realloc_hook;
        prev_free_hook    = __free_hook;

        /* Restore our hooks */
        __malloc_hook     = noisy_malloc;
        __realloc_hook    = noisy_realloc;
        __free_hook       = noisy_free;

        /* Release the allocation_lock */
        pthread_mutex_unlock(&allocation_lock);
}

/* Override initializing hook from the C library. */
void (*__malloc_initialize_hook) (void) = noisy_init;

Your program will spew output about all the allocations it performs. Run that through this awk snippet to get a useful report:

awk '/^free: /       { fun[owner[$2]] -= size[$2]; delete size[$2]; delete owner[$2]; }
     /^reallocate: / { fun[owner[$2]] -= size[$2]; delete size[$2]; delete owner[$2];
                       size[$4] = $6; owner[$4] = $9; fun[$9] += $6; }
     /^allocate: /   { size[$2] = $3; owner[$2] = $6; fun[$6] += $3; }
     END { for(i in fun) { print i " has " fun[i] " bytes"; }}' |
sort -k3n

See also the C++ new/delete profiler.