3
votes

I'm trying to understand why Valgrind reports a "Invalid read of size 4" error. Code compile and give correct output on Linux console.
The goal is to build a dynamic array of struct record(up to 10Mil items) that grow dynamically and organize its by language through the struct list.

Code:

#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <sys/types.h>
#include <unistd.h>
#include "../crc64.c"

typedef struct {
  char          cat;
  uint64_t      crc;
  int           id;
} record;

typedef struct {
  int           count;
  char          *lang;
  record        **records;
} list;

record *records = NULL;
int record_count = 0;
list *lists = NULL;
int list_count = 0;

void addItemToList(record *r, char *lang){  
  int found = 0;
  for(int i = 0; i<list_count; i++){
    if(strcmp(lists[i].lang, lang) == 0){
      list *l = &lists[i];
      found = 1;      
      record **tmp  = realloc(l->records, (l->count + 1) * sizeof(record *));
      if (tmp == NULL)
        printf("Problem on realloc - records/list\n");
      else{
        l->records = tmp;
        l->count ++;
        l->records[l->count -1] = r;
      }        
      break;
    }
  }
  if(found == 0){
    list_count ++;
    list *tmp = realloc(lists, list_count  * sizeof(list));
    if(!tmp) 
      printf("Error on realloc - list");
    lists = tmp;
    lists[list_count - 1].count =1 ;
    lists[list_count - 1].lang = lang ;
    record **tmp1 = realloc(NULL, sizeof(record *));
    if(!tmp1)
      printf("Error on realloc records/list \n");          
    lists[list_count - 1].records = tmp1;      
    tmp1[0] = r;    
  }  
}

int addRecord(char cat, char *name, int id, char lang[3]){  
  record *tmp;
  if(record_count == 0){
    tmp = malloc(1 * sizeof(record));         
  }  
  else 
    tmp = realloc(records, (record_count + 1)  * sizeof(record));
  if(tmp == NULL){
    printf("Error on m(re)alloc records\n");
    return(1);
  }  

  records = tmp;
  record r = {cat, crc64(name), id};
  records[record_count ] = r; 
  addItemToList(&(records[record_count]), lang);
  record_count ++;
  return 0;  
}

int main(void){
  addRecord('l', "torino",1, "it");
  addRecord('l', "berlin",20, "de");
  addRecord('l', "paris",30, "fr");  
  addRecord('l', "hamburg",21, "de");
  addRecord('l', "sassari",2, "it");
  addRecord('l', "cagliari",3, "it");
  addRecord('l', "milano",4, "it");


  for(int i=0; i< list_count;i++){
    printf("lang: %s, count :%d\n", lists[i].lang, lists[i].count);  
    for (int z = 0; z < lists[i].count; z ++){
      printf("  crc:  %lu -   id: %d \n", lists[i].records[z]->crc, lists[i].records[z]->id);
    }
  }
  return 0;
}

here output from Valgrind:

cc -std=c99  -O0 -g tt.c -o tt && valgrind --track-origins=yes ./tt
lang: it, count :4
==17435== Invalid read of size 4
==17435==    at 0x400BAC: main (tt.c:92)
==17435==  Address 0x51d0050 is 16 bytes inside a block of size 24 free'd
==17435==    at 0x4C29097: realloc (vg_replace_malloc.c:525)
==17435==    by 0x400990: addRecord (tt.c:65)
==17435==    by 0x400A8E: main (tt.c:81)
==17435== 
==17435== Invalid read of size 8
==17435==    at 0x400BE0: main (tt.c:92)
==17435==  Address 0x51d0048 is 8 bytes inside a block of size 24 free'd
==17435==    at 0x4C29097: realloc (vg_replace_malloc.c:525)
==17435==    by 0x400990: addRecord (tt.c:65)
==17435==    by 0x400A8E: main (tt.c:81)
==17435== 
  crc:  10540480176773849829 -   id: 1 
  crc:  5100567372334599520 -   id: 2 
  crc:  16805344662159858020 -   id: 3 
  crc:  16314500525507880138 -   id: 4 
lang: de, count :2
  crc:  3766391767329109829 -   id: 20 
  crc:  12127946872667643737 -   id: 21 
lang: fr, count :1
  crc:  2180538375615994033 -   id: 30 
1

1 Answers

5
votes

You are reallocating the records, but you don't update the pointers.

tmp = realloc(records, (record_count + 1)  * sizeof(record));

When you do this, all pointers to the old records array become invalid.

Here is a simpler example.

record *array = malloc(sizeof(*array));
record *r1 = &array[0];
array = realloc(array, sizeof(*array) * 2);
record *r2 = &array[1];
// r1 is probably invalid, since 'array' changed

There are a few ways you can fix this.

  1. When you realloc, go through and update all the pointers. This is a real pain.

  2. Alloc every record separately, instead of in one big array. (No, this won't waste memory. At least not compared to the 8 bytes per record already wasted due to field order.)

  3. Instead of pointers to records, use indexes into the record array. These don't need to be updated.