3
votes

I am new to System.IO.Compression

I am trying to compress and decompress some information.

For compression I used code project and it seems to work. I am compressing string at the moment.

For decompressing I would like to docompress a Stream

This is what I have at the moment

        var zipString = _compressor.Compress(request);
        using (var sw = new StreamWriter(req.GetRequestStream()))
        {
            sw.Write(zipString);
            sw.Close();
        }

        WebResponse respStream = req.GetResponse();
        Stream resp = respStream.GetResponseStream();                      
        resp = _compressor.UnCompress(resp);

Here is the code for compressing

    public string Compress(string s)
    {
        //Transforming string into byte
        var byteArray = new byte[s.Length];
        var indexBa = 0;
        foreach (var item in s.ToCharArray())
        {
            byteArray[indexBa++] = (byte) item;
        }

        //prepare for compress
        var ms = new MemoryStream();
        var sw = new GZipStream(ms, CompressionMode.Compress);

        //compress
        sw.Write(byteArray,0,byteArray.Length);

        //close sw, DO NOT FLUSH because byes will go missing...
        sw.Close();

        //transform byte[] zip data to string
        byteArray = ms.ToArray();
        var sb = new StringBuilder(byteArray.Length);
        foreach (var item in byteArray)
        {
            sb.Append((char) item);
        }
        ms.Close();
        sw.Dispose();
        ms.Dispose();
        return sb.ToString();
    }

And here is the code for decompressing

    public Stream UnCompress(Stream s)
    {
        int readCount;
        MemoryStream mem = new MemoryStream();
        byte[] tmp = new byte[128];
        while ((readCount = s.Read(tmp, 0, 128)) != 0)
        {
            mem.Write(tmp, 0, readCount);
        }
        StreamReader reader = new StreamReader(mem);
        char[] buffer = new char[mem.Length];
        mem.Position = 0;
        reader.ReadBlock(buffer, 0, (int)mem.Length);

        byte[] zip = Encoding.UTF8.GetBytes(buffer, 0, (int)mem.Length);
        Stream wr = new MemoryStream();
        wr.Write(zip, 0, zip.Length);
        wr.Position = 0;
        using (var stream = new GZipStream(wr, CompressionMode.Decompress))
        {                
            mem = new MemoryStream();
            while ((readCount = stream.Read(tmp, 0, 128)) != 0)
            {
                mem.Write(tmp, 0, readCount);
            }
            mem.Position = 0;
            return mem;
        }        
    }

At this point I get error while decompressing in following line:

while ((readCount = stream.Read(tmp, 0, 128)) != 0)
{
     mem.Write(tmp, 0, readCount);
}

It just does not enter while loop and says:

The magic number in GZip header is not correct. Make sure you are passing in a GZip stream

My decompress method is based on following stackoverflow post

Try 2:

        public Stream UnCompress(Stream s)
        {
            byte[] byteArray = null;
            using (var m = new MemoryStream())
            {
                int count;
                do
                {
                    byte[] buf = new byte[1024];
                    count = s.Read(buf, 0, 1024);
                    m.Write(buf, 0, count);
                } while (s.CanRead && count > 0);
                byteArray = m.ToArray();
            }
            var indexBa = 0;
            foreach (var item in s.ToString().ToCharArray())
            {
                byteArray[indexBa++] = (byte) item;
            }
            //Prepare for decompress
            var ms = new MemoryStream(byteArray);
            var sr = new GZipStream(ms,
                CompressionMode.Decompress);

            //Reset variable to collect uncompressed result
            byteArray = new byte[byteArray.Length];

            //Decompress
            var rByte = sr.Read(byteArray, 0, byteArray.Length);

            //Transform byte[] unzip data to string
            var sB = new MemoryStream(rByte);

            for (var i = 0; i < rByte; i++)
            {
                sB.Write(byteArray,0,rByte);
            }
            sr.Close();
            ms.Close();
            sr.Dispose();
            ms.Dispose();
            return sB;
        }

Result : same error in line var rByte = sr.Read(byteArray, 0, byteArray.Length); Update 1:

As given in code project decompressing string perfectly works.

public static string UnZip(string value)
{
    //Transform string into byte[]
    byte[] byteArray = new byte[value.Length];
    int indexBA = 0;
    foreach (char item in value.ToCharArray())
    {
        byteArray[indexBA++] = (byte)item;
    }

    //Prepare for decompress
    System.IO.MemoryStream ms = new System.IO.MemoryStream(byteArray);
    System.IO.Compression.GZipStream sr = new System.IO.Compression.GZipStream(ms,
        System.IO.Compression.CompressionMode.Decompress);

    //Reset variable to collect uncompressed result
    byteArray = new byte[byteArray.Length];

    //Decompress
    int rByte = sr.Read(byteArray, 0, byteArray.Length);

    //Transform byte[] unzip data to string
    System.Text.StringBuilder sB = new System.Text.StringBuilder(rByte);
    //Read the number of bytes GZipStream red and do not a for each bytes in
    //resultByteArray;
    for (int i = 0; i < rByte; i++)
    {
        sB.Append((char)byteArray[i]);
    }
    sr.Close();
    ms.Close();
    sr.Dispose();
    ms.Dispose();
    return sB.ToString();
}
1
Aside from anything else, converting from arbitrary binary data to text like this is a really bad idea. You'll end up with text containing various unprintable characters that may well not be transmitted accurately in many cases. I'd strongly advise you to change your Compress method to accept and return a byte array, then you can investigate which of the multiple transformations that are involved is causing the problem.Jon Skeet
First of all I am blessed that you have commented on my post. I tried few things and few times I was getting error related to hexadecimal at some index. Does that mean while compressing there are some characters being generated that are not being able to decompress?Cybercop
Don't think of compression as operating on text at all. Think of it as operating on binary data - binary input, binary output. Separately, think about how to convert your input string into binary (probably Encoding.UTF8.GetBytes) and how to convert the output into text if absolutely necessary (e.g. Convert.ToBase64String)Jon Skeet

1 Answers

5
votes

Your code has many issues, so it's easier to just write another version from scratch. Note that doing this:

var byteArray = new byte[s.Length];
var indexBa = 0;
foreach (var item in s.ToCharArray())
{
    byteArray[indexBa++] = (byte) item;
}

to convert string to byte array is not a good idea. Instead choose some encoding and convert with it. Also, returning result of compression as string is also not a good idea - return that a byte array instead, because that byte array does not really represent any string - it's just arbitrary binary data.

Sample code:

static byte[] Compress(byte[] data) {
    // `compressed` will contain result of compression
    using (var compressed = new MemoryStream()) {
        // source is our original uncompressed data
        using (var source = new MemoryStream(data)) {
            using (var gzip = new GZipStream(compressed, CompressionMode.Compress)) {
                // just write whole source into gzip stream with CopyTo
                source.CopyTo(gzip);
            }
        }
        return compressed.ToArray();
    }
}

static byte[] CompressString(string s, Encoding encoding) {
    return Compress(encoding.GetBytes(s));
}

static string CompressStringToBase64(string s, Encoding encoding) {
    return Convert.ToBase64String(CompressString(s, encoding));
}

static byte[] Decompress(Stream source) {
    using (var gzip = new GZipStream(source, CompressionMode.Decompress)) {
        using (var decompressed = new MemoryStream()) {
            gzip.CopyTo(decompressed);
            return decompressed.ToArray();
        }
    }
}

static byte[] Decompress(byte[] data) {
    using (var ms = new MemoryStream(data)) {
        return Decompress(ms);
    }
}

static string DecompressString(Stream source, Encoding encoding) {
    return encoding.GetString(Decompress(source));
}

Test

var source = "Some string";
var compressed = CompressString(source, Encoding.UTF8);            
var decompressed = DecompressString(new MemoryStream(compressed), Encoding.UTF8);
Debug.Assert(source == decompressed);