zlib and gzip generating different data

Question

I have a chunk of data which is supposed to be zlib compressed data (I was not 100% sure).

I first tried to uncompress it with gzip by prepending "1F 8B 08 00 00 00 00 00". Just like in the accepted answer of this thread (https://unix.stackexchange.com/questions/22834/how-to-uncompress-zlib-data-in-unix). It worked out and it was probably the right approach, because the output contained a lot of human readable strings.

I then tried to implement this in a c++ program using zlib. But it seems that zlib generates a different output. Am I missing something? zlib and gzip should be basically the same (despite the headers and trailers), shouldn't they? Or do I have a simple error in my code below? (the chunk of data is shortened for the sake of simplicity)

unsigned char* decompressed;
unsigned char* dataChunk = /*...*/;
printHex(dataChunk, 160);
int error = inflateZlib(dataChunk, 160, decompressed, 1000);
printHex(decompressed, 160);
//zerr(error);

printHex(unsigned char* data, size_t n)
{
    for(size_t i = 0; i < n; i++)
    {
        std::cout << std::hex << (uint16_t)data[i] << " ";
    }
    std::cout << std::dec << "\n-\n";
}

int inflateZlib(unsigned char* data, size_t length, unsigned char* decompressed, size_t maxDecompressed)
{
    decompressed = new unsigned char[maxDecompressed];

    z_stream infstream;
    infstream.zalloc = Z_NULL;
    infstream.zfree = Z_NULL;
    infstream.opaque = Z_NULL;
    infstream.avail_in = (uInt)(length); // size of input
    infstream.next_in = (Bytef *)data; // input char array
    infstream.avail_out = (uInt)maxDecompressed; // size of output
    infstream.next_out = (Bytef *)decompressed; // output char array
    // the actual DE-compression work.
    int ret = inflateInit(&infstream);
    zerr(ret);
    ret = inflate(&infstream, Z_NO_FLUSH);
    zerr(ret);
    inflateEnd(&infstream);

    return ret;
}

This produces the following output:

78 9c bd 58 4b 88 23 45 18 ee 3c 67 e3 24 93 cc ae 8a f8 42 10 c4 cb 1a 33 a3 7b f0 60 e6 e0 e6 e0 49 90 bd 29 4d 4d 77 25 dd 99 ee ea de aa ee 4c 32 82 2c e8 c1 93 ac 47 c5 45 f 82 8 5e 16 f ba 78 18 45 d0 83 7 95 15 5c d0 c3 aa b0 b2 ee 65 5c f0 e4 c5 bf aa 1f a9 ea 74 cf 64 7 31 c3 24 9d fa fe bf ea ab ff 59 15 ab 62 6a b5 5d 9b 8c 18 2a 5b 15 47 d3 b4 92 55 35 b5 ba b7 3d c6 46 b0 a3 35 3 1c 50 64 61 93 7a a4 67 d5 0 e1 c2 d8 e4 92 75 fe 56 b3 ca a6 76 c2 f0 1c 8f 
-
0 0 6 c0 83 50 0 0 16 b0 78 9c bd 58 4b 88 23 45 18 ee 3c 67 e3 24 93 cc ae 8a f8 42 10 c4 cb 1a 33 a3 7b f0 60 e6 e0 e6 e0 49 90 bd 29 4d 4d 77 25 dd 99 ee ea de aa ee 4c 32 82 2c e8 c1 93 ac 47 c5 45 f 82 8 5e 16 f ba 78 18 45 d0 83 7 95 15 5c d0 c3 aa b0 b2 ee 65 5c f0 e4 c5 bf aa 1f a9 ea 74 cf 64 7 31 c3 24 9d fa fe bf ea ab ff 59 15 ab 62 6a b5 5d 9b 8c 18 2a 5b 15 47 d3 b4 92 55 35 b5 ba b7 3d c6 46 b0 a3 35 3 1c 50 64 61 93 7a a4 67 d5 0 e1 c2 d8 e4 92 75 
-

which is not what I want. Whereas gzip:

printf "\x1f\x8b\x08\x00\x00\x00\x00\x00\x78\x9c\xbd\x58\x4b\x88\x23\x45\x18\xee\x3c\x67\xe3\x24\x93\xcc\xae\x8a\xf8\x42\x10\xc4\xcb\x1a\x33\xa3\x7b\xf0\x60\xe6\xe0\xe6\xe0\x49\x90\xbd\x29\x4d\x4d\x77\x25\xdd\x99\xee\xea\xde\xaa\xee\x4c\x32\x82\x2c\xe8\xc1\x93\xac\x47\xc5\x45\xf\x82\x8\x5e\x16\xf\xba\x78\x18\x45\xd0\x83\x7\x95\x15\x5c\xd0\xc3\xaa\xb0\xb2\xee\x65\x5c\xf0\xe4\xc5\xbf\xaa\x1f\xa9\xea\x74\xcf\x64\x7\x31\xc3\x24\x9d\xfa\xfe\xbf\xea\xab\xff\x59\x15\xab\x62\x6a\xb5\x5d\x9b\x8c\x18\x2a\x5b\x15\x47\xd3\xb4\x92\x55\x35\xb5\xba\xb7\x3d\xc6\x46\xb0\xa3\x35\x3\x1c\x50\x64\x61\x93\x7a\xa4\x67\xd5\x0\xe1\xc2\xd8\xe4\x92\x75\xfe\x56\xb3\xca\xa6\x76\xc2\xf0\x1c\x8f" | gzip -dc | hexdump -C

produces:

gzip: stdin: unexpected end of file
00000000  68 03 64 00 05 77 69 6e  67 73 61 02 68 03 6c 00  |h.d..wingsa.h.l.|
00000010  00 00 01 68 04 64 00 06  6f 62 6a 65 63 74 6b 00  |...h.d..objectk.|
00000020  0c 74 65 74 72 61 68 65  64 72 6f 6e 31 68 05 64  |.tetrahedron1h.d|
00000030  00 06 77 69 6e 67 65 64  6c 00 00 00 06 6c 00 00  |..wingedl....l..|
00000040  00 05 68 02 64 00 08 63  6f 6c 6f                 |..h.d..colo|
0000004b

which is what I want.

Retired Ninja Retired Ninja · Accepted Answer · 2014-09-10T05:23:10

I was able to decode the data you provided by using zlib 1.2.8 and the inflateInit2 function with 32 for windowBits. I used 32 based on this information from the zlib documentation:

windowBits can also be zero to request that inflate use the window size in the zlib header of the compressed stream.

and

Add 32 to windowBits to enable zlib and gzip decoding with automatic header detection

Here's the full code. I stripped out error checking since I don't have a zerr function. It doesn't appear you're using Visual C++, so you will want to remove the #pragma to avoid a warning as well.

#include <iostream>
#include <iomanip>
#include <cstdint>
#include <cctype>
#include "zlib.h"

#pragma comment(lib, "zdll.lib")

const size_t block_size = 16;

void printLine(unsigned char* data, size_t offset, size_t n)
{
    if(n)
    {
        std::cout << std::setw(8) << std::setfill('0') << std::right << offset << " ";
        for(size_t x = 0; x < block_size; ++x)
        {
            if(x % (block_size/2) == 0) std::cout << " ";
            uint16_t d = x < n ? data[x] : 0;
            std::cout << std::hex << std::setw(2) << d << " ";
        }
        std::cout << "|";
        for(size_t x = 0; x < block_size; ++x)
        {
            int c = (x < n && isalnum(data[x])) ? data[x] : '.';
            std::cout << static_cast<char>(c);
        }
        std::cout << "|\n";
    }
}

void printHex(unsigned char* data, size_t n)
{
    const size_t blocks = n / block_size;
    const size_t remainder = n % block_size;
    for(size_t i = 0; i < blocks; i++)
    {
        size_t offset = i * block_size;
        printLine(&data[offset], offset, block_size);
    }
    size_t offset = blocks * block_size;
    printLine(&data[offset], offset, remainder);
    std::cout << "\n";
}

int inflateZlib(unsigned char* data, uint32_t length, unsigned char* decompressed, uint32_t maxDecompressed)
{
    z_stream infstream;
    infstream.zalloc = Z_NULL;
    infstream.zfree = Z_NULL;
    infstream.opaque = Z_NULL;
    infstream.avail_in = length;
    infstream.next_in = data;
    infstream.avail_out = maxDecompressed;
    infstream.next_out = decompressed;
    inflateInit2(&infstream, 32);
    inflate(&infstream, Z_FINISH);
    inflateEnd(&infstream);
    return infstream.total_out;
}

int main()
{
    unsigned char dataChunk[] = 
        "\x1f\x8b\x08\x00\x00\x00\x00\x00\x78\x9c\xbd\x58\x4b\x88\x23\x45"
        "\x18\xee\x3c\x67\xe3\x24\x93\xcc\xae\x8a\xf8\x42\x10\xc4\xcb\x1a"
        "\x33\xa3\x7b\xf0\x60\xe6\xe0\xe6\xe0\x49\x90\xbd\x29\x4d\x4d\x77"
        "\x25\xdd\x99\xee\xea\xde\xaa\xee\x4c\x32\x82\x2c\xe8\xc1\x93\xac"
        "\x47\xc5\x45\xf\x82\x8\x5e\x16\xf\xba\x78\x18\x45\xd0\x83\x7\x95"
        "\x15\x5c\xd0\xc3\xaa\xb0\xb2\xee\x65\x5c\xf0\xe4\xc5\xbf\xaa\x1f"
        "\xa9\xea\x74\xcf\x64\x07\x31\xc3\x24\x9d\xfa\xfe\xbf\xea\xab\xff"
        "\x59\x15\xab\x62\x6a\xb5\x5d\x9b\x8c\x18\x2a\x5b\x15\x47\xd3\xb4"
        "\x92\x55\x35\xb5\xba\xb7\x3d\xc6\x46\xb0\xa3\x35\x03\x1c\x50\x64"
        "\x61\x93\x7a\xa4\x67\xd5\x00\xe1\xc2\xd8\xe4\x92\x75\xfe\x56\xb3"
        "\xca\xa6\x76\xc2\xf0\x1c\x8f";
    unsigned char decompressed[1000] = {};

    printHex(dataChunk, sizeof(dataChunk));
    uint32_t len = inflateZlib(dataChunk, sizeof(dataChunk), decompressed, sizeof(decompressed));
    printHex(decompressed, len);
    return 0;
}

zlib and gzip generating different data

2 Answers