

I've found a bug in GCC 4.6.3 (default in Ubuntu LTS) or my code is wrong - uaygsfdbzf

So, there is this simple code which converts array of bytes to a hex string in a newly allocated buffer: [space limitation here, posted in comments]
======
uaygsfdbzf
So, there is this simple code which converts array of bytes to a hex string in
a newly allocated buffer:

    
    
        #include <stdlib.h>
    
        int data_to_hex(const char* s, int len, char** d) {
            int i, n;
            char ch;
    
            if (len < 1 || len > 1024 * 1024 * 1024) {
                return -1;
            }
    
            *d = (char*)malloc((2 * len) + 1);
            if (!(*d)) {
                return -1;
            }
    
            n = 0;
            for (i = 0; i < len; i++) {
    
                ch = (s[i] >> 4) & 0x0f;
                if (ch <= 0x09) {
                    (*d)[n] = ch + '0';
                } else {
                    (*d)[n] = ch - 10 + 'a';
                }
                n++;
    
                ch = s[i] & 0x0f;
                if (ch <= 0x09) {
                    (*d)[n] = ch + '0';
                } else {
                    (*d)[n]= ch - 10 + 'a';
                }
                n++;
            }
    
            (*d)[n] = '\0';
    
            return n;
        }
    

When compiled without optimizations it works OK, but segfaults with
optimizations ON. I've generated the assembly code using this GCC command:

gcc -Wall -Wextra -O2 -march=pentium -S -masm=intel gccbug.c

And here is the result:

    
    
            .file   "gccbug.c"
            .intel_syntax noprefix
            .text
            .p2align 4,,15
            .globl  data_to_hex
            .type   data_to_hex, @function
        data_to_hex:
        .LFB12:
            .cfi_startproc
            push    ebp
            .cfi_def_cfa_offset 8
            .cfi_offset 5, -8
            push    edi
            .cfi_def_cfa_offset 12
            .cfi_offset 7, -12
            push    esi
            .cfi_def_cfa_offset 16
            .cfi_offset 6, -16
            push    ebx
            .cfi_def_cfa_offset 20
            .cfi_offset 3, -20
            sub esp, 44
            .cfi_def_cfa_offset 64
            or  edi, -1
            mov esi, DWORD PTR [esp+68]
            mov ebx, DWORD PTR [esp+72]
            lea eax, [esi-1]
            cmp eax, 1073741823
            ja  .L2
            add esi, esi
            mov eax, esi
            mov DWORD PTR [esp+20], esi
            inc eax
            mov DWORD PTR [esp], eax
            call    malloc
            mov DWORD PTR [esp+24], eax
            mov DWORD PTR [ebx], eax
            test    eax, eax
            je  .L2
            mov ecx, DWORD PTR [esp+64]
            mov edi, 1
            xor edx, edx
            jmp .L8
            .p2align 4,,7
            .p2align 3
        .L12:
            lea ebp, [eax+48]
            mov esi, DWORD PTR [esp+24]
            mov eax, ebp
            mov BYTE PTR [esi+edx], al
            mov al, BYTE PTR [ecx]
            mov ebp, DWORD PTR [ebx]
            and eax, 15
            cmp al, 9
            jg  .L5
        .L13:
            add eax, 48
            add edx, 2
            mov BYTE PTR [ebp+0+edi], al
            inc ecx
            add edi, 2
            mov eax, DWORD PTR [ebx]
            cmp edx, DWORD PTR [esp+20]
            je  .L7
        .L14:
            mov DWORD PTR [esp+24], eax
        .L8:
            mov al, BYTE PTR [ecx]
            shr al, 4
            mov BYTE PTR [esp+31], al
            cmp al, 9
            jle .L12
            mov al, BYTE PTR [esp+31]
            mov esi, DWORD PTR [esp+24]
            lea ebp, [eax+87]
            mov eax, ebp
            mov BYTE PTR [esi+edx], al
            mov al, BYTE PTR [ecx]
            mov ebp, DWORD PTR [ebx]
            and eax, 15
            cmp al, 9
            jle .L13
        .L5:
            add eax, 87
            add edx, 2
            mov BYTE PTR [ebp+0+edi], al
            inc ecx
            add edi, 2
            mov eax, DWORD PTR [ebx]
            cmp edx, DWORD PTR [esp+20]
            jne .L14
        .L7:
            mov BYTE PTR [eax-2147483648], 0
            mov edi, -2147483648
        .L2:
            add esp, 44
            .cfi_def_cfa_offset 20
            mov eax, edi
            pop ebx
            .cfi_def_cfa_offset 16
            .cfi_restore 3
            pop esi
            .cfi_def_cfa_offset 12
            .cfi_restore 6
            pop edi
            .cfi_def_cfa_offset 8
            .cfi_restore 7
            pop ebp
            .cfi_def_cfa_offset 4
            .cfi_restore 5
            ret
            .cfi_endproc
        .LFE12:
            .size   data_to_hex, .-data_to_hex
            .ident  "GCC: 4.6.3"
            .section    .note.GNU-stack,"",@progbits
    

\-------------

Take notice of this code:

    
    
        .L7:
            mov BYTE PTR [eax-2147483648], 0
            mov edi, -2147483648
    

It will end-up something like this:

    
    
        (*d)[-2147483648] = '\0';
    
        return -2147483648;
    

instead of the written code:

    
    
        (*d)[n] = '\0';
    
        return n;
    

Why is this happening?

Thank you.

~~~
tokenrove
Well, it does look like a gcc bug at first glance. It's no surprise that the
constant involved is 2*1024^3, since that's the maximum value it expects n can
ever be. Since gcc assumes signed integer arithmetic does not overflow for
optimization purposes, it's usually a better idea to use unsigned types (such
as size_t) for natural numbers like len, n, and i, and I would assume your
code would compile fine with size_t instead of int. Removing the check of len
against 1024^3 would probably also stop this behavior from happening.

~~~
uaygsfdbzf
Thanks for confirming.

> Removing the check of len against 1024^3 would probably also stop this
> behavior from happening.

Yes, removing it or even checking against 1024 instead of 1024^3 is preventing
the bug. Strange behaviour.

> it's usually a better idea to use unsigned types (such as size_t) for
> natural numbers like len, n, and i, and I would assume your code would
> compile fine with size_t instead of int.

When I don't need all the space of the unsigned type I like to use signed
types to be able to return -1 as an indication of error. Well, should we
report a bug?

------
hhjj
HN is not gcc bug tracker nor stack overflow. Your code is wrong.

