CryptoJS AES CBC 256 decrypt adds additional byte in the middle of plaintext

Question

I am writing a Chrome extension which is using CryptoJS for some Apache Thrift work. I am currently trying to get CryptoJS working. I'm facing a problem with CryptoJS decryption of data encrypted by CryptoJS. I'm attaching a test case below, after the problem description.

What is happening is the following, I have an array of "bytes":

var bArr = [11,0,1,0,0,0,6,100,105,103,101,115,116,11,0,2,0,0,0,152,67,119,65,66,65,65,65,65,69,109,78,111,99,109,57,116,90,83,49,48,90,88,78,48,76,87,78,115,97,87,86,117,100,65,103,65,65,103,65,65,49,68,69,75,65,65,77,65,65,65,65,65,86,75,102,66,85,103,115,65,66,65,65,65,65,67,81,49,90,68,99,119,77,71,73,120,78,67,48,121,78,84,90,107,76,84,81,119,77,109,81,116,79,84,65,48,90,105,48,52,79,84,86,105,78,68,73,50,89,109,78,108,78,84,99,76,65,65,85,65,65,65,65,85,89,50,104,121,98,50,49,108,76,87,78,115,97,87,86,117,100,67,49,122,90,87,78,121,90,88,81,65,11,0,3,0,0,0,36,52,51,52,55,54,56,98,53,45,50,48,102,102,45,52,99,100,102,45,56,53,97,50,45,57,49,49,56,50,98,55,98,51,102,57,53,0];
var stringToEncode = String.fromCharCode.apply(null, bArr);

I encrypt it with CryptoJS and then decrypt. First 25 bytes before encryption:

11,0,1,0,0,0,6,100,105,103,101,115,116,11,0,2,0,0,0,152,67,119,65,66,65

After the decryption:

11,0,1,0,0,0,6,100,105,103,101,115,116,11,0,2,0,0,0,194,152,67,119,65,66

The only difference is an additional 194 at position 20. All other bytes are exactly the same, except of padding, obviously. I'm trying to understand where is this coming from.

More info regarding hat are these bytes. This is a Thrift structure with 3 fields, field 2 contains a Base64 representation of another, embedded Thrift struct. First 20 bytes of input exmplained:

11,0,1: Thrift field of type string, fid 1
0,0,0,6: int32 length of the value of fid 1
100,67,119,65,66,65: string "digest"
11,0,2: Thrift field of type string, fid 2
0,0,0,152: int32 length of the value of fid 2
67 ... until next byte 11: Base64 representation of the embedded struct

Because of the decryption issue Thrift parser incorrectly recognises the length of fid 2 value.

I believe I am using AES256 CBC with a 32 byte key (SHA256) and 16 byte IV in OpenSSL mode with PKCS7 padding.

This is my qunit test.

test("Decryption", function() {
  var bArr = [11,0,1,0,0,0,6,100,105,103,101,115,116,11,0,2,0,0,0,152,67,119,65,66,65,65,65,65,69,109,78,111,99,109,57,116,90,83,49,48,90,88,78,48,76,87,78,115,97,87,86,117,100,65,103,65,65,103,65,65,49,68,69,75,65,65,77,65,65,65,65,65,86,75,102,66,85,103,115,65,66,65,65,65,65,67,81,49,90,68,99,119,77,71,73,120,78,67,48,121,78,84,90,107,76,84,81,119,77,109,81,116,79,84,65,48,90,105,48,52,79,84,86,105,78,68,73,50,89,109,78,108,78,84,99,76,65,65,85,65,65,65,65,85,89,50,104,121,98,50,49,108,76,87,78,115,97,87,86,117,100,67,49,122,90,87,78,121,90,88,81,65,11,0,3,0,0,0,36,52,51,52,55,54,56,98,53,45,50,48,102,102,45,52,99,100,102,45,56,53,97,50,45,57,49,49,56,50,98,55,98,51,102,57,53,0];
  var stringToEncode = String.fromCharCode.apply(null, bArr);
  var symmetricKey = "v3JElaRswYgxOt4b";

  var key = CryptoJS.enc.Latin1.parse( CryptoJS.enc.Latin1.stringify( CryptoJS.SHA256( symmetricKey ) ) );
  var iv  = CryptoJS.lib.WordArray.random( 16 );

  var encrypted = CryptoJS.AES.encrypt( stringToEncode,
                                        key,
                                        { iv: iv, format: CryptoJS.format.OpenSSL }
                                      ).ciphertext.toString(CryptoJS.enc.Latin1);

  var decrypted = CryptoJS.AES.decrypt( { ciphertext: CryptoJS.enc.Latin1.parse(encrypted) },
                                          key,
                                          { iv: iv, padding: CryptoJS.pad.NoPadding }
                                      ).toString(CryptoJS.enc.Latin1);

  var buf = [];
  for (var i=0; i<decrypted.length; i++) {
    buf.push( decrypted.charCodeAt(i) );
  }

  var bstr1 = "";
  for (var i=0; i<bArr.length; i++) {
    bstr1 += (i>0) ? ","+bArr[i] : bArr[i]+"";
  }
  var bstr2 = "";
  for (var i=0; i<buf.length; i++) {
    bstr2 += (i>0) ? ","+buf[i] : buf[i]+"";
  }

  console.log("------------------------------------------");
  console.log(bstr1);
  console.log(bstr2);
  console.log("------------------------------------------");

  equal( stringToEncode.slice(0,200), decrypted.slice(0,200) );
});

My test HTML wrapper loads these:

<script src="../bower_components/jquery/dist/jquery.min.js"></script>
<script src="../bower_components/js-base64/base64.js"></script>
<script src="../bower_components/thrift/lib/js/src/thrift.js"></script>
<script src="../bower_components/underscore/underscore-min.js"></script>
<script src="../bower_components/qunit/qunit/qunit.js"></script>
<script src="../bower_components/browserify-cryptojs/components/core.js"></script>
<script src="../bower_components/browserify-cryptojs/components/sha256.js"></script>
<script src="../bower_components/browserify-cryptojs/components/enc-base64.js"></script>
<script src="../bower_components/browserify-cryptojs/components/cipher-core.js"></script>
<script src="../bower_components/browserify-cryptojs/components/format-hex.js"></script>
<script src="../bower_components/browserify-cryptojs/components/aes.js"></script>
<script src="../bower_components/browserify-cryptojs/components/pad-nopadding.js"></script>
<!-- the Test Suite-->
<script type="text/javascript" src="test-client.js" charset="utf-8"></script>
<!-- CSS-->
<link rel="stylesheet" href="../bower_components/qunit/qunit/qunit.css" type="text/css" media="screen" />

And my bower.json is:

{
  "name": "gossiperl-client-chrome",
  "version": "0.1.0",
  "main": "manifest.json",
  "dependencies": {
    "jquery": "~1.11.0",
    "underscore": "~1.7.0",
    "thrift": "radekg/thrift#js-binary-protocol",
    "js-base64": "~2.1.5",
    "qunit": "~1.14.0",
    "browserify-cryptojs": "~0.3.1"
  },
  "authors": [
    "radekg <[email protected]>"
  ],
  "description": "Gossiperl Chrome client with a sample application",
  "keywords": [
    "gossiperl",
    "client"
  ],
  "license": "MIT",
  "homepage": "http://....com",
  "private": true
}

C2 or 194 is part of a two byte encoded character in UTF-8. Welcome to JavaScripts string / binary hell. Oh, and 152 is the first character that's not part of US-ASCII - requiring two bytes to be encoded in UTF-8. — Maarten Bodewes
I'm looking for it, but CryptoJS does not have a direct array to WordArray conversion. The answer is probably to convert to hexadecimals first, then create a WordArray out of it (which is pretty inefficient, but JavaScript generally is for byte array / crypto operations). — Maarten Bodewes
That's a bit sad. All other software in Erlang, java, mono, ruby expects the data in this format - without encoding the outside digest. Maybe creating a wordarray directly from my array would be an option as you say. Or look at NaCl :-/ — user56250
Hold your horses, just a bit of encoding / decoding should not stop you :P — Maarten Bodewes

Maarten Bodewes Maarten Bodewes · Accepted Answer · 2015-01-03T17:30:35

The issue is that CryptoJS treats the input as UTF-8 input string unless it is already a WordArray. This is of course a problem if your input isn't UTF-8. What you are seeing is that the value above 0x80 (128) is converted into two bytes to fix the UTF-8 encoding.

You could use a direct conversion to hex and then to WordArray as long as WordArray does not support direct conversion from arrays to WordArray. It's a bit strange that this feature is missing.

The following converts an array with unsigned byte values to hexadecimals (with a bit of a guard with regard to invalid byte values):

function tohex(unsignedByteArray) {
    var hex = "";
    for (var i = 0; i < unsignedByteArray.length; i++) {
        var c = unsignedByteArray[i];
        if (c < 0 || c > 255) {
            throw "Value not an unsigned byte in array";
        }
        var h = c.toString(16);
        if (h.length == 1) {
            hex += "0" + h;
        } else {
            hex += h;
        }
    }
    return hex;
}

function fromhex(hex) {
    if (hex.length % 2 !== 0) {
        throw "Hex string should contain even number of hex digits, one per byte";
    }
    var unsignedByteArray = [];
    for (var i = 0; i < hex.length; i = i + 2) {
        var h = hex.substring(i, i + 2);
        if (!/^[0-9a-f]{2}$/i.test(h)) {
            throw "Invalid hexdigit at offset " + i;
        }
        var c = parseInt(h, 16);
        unsignedByteArray[unsignedByteArray.length] = c;
    }
    return unsignedByteArray;
}

So you would be able to use these functions like this:

var bArr = [11, 0, 1, 0, 0, 0, 6, 100, 105, 103, 101, 115, 116, 11, 0, 2, 0, 0, 0, 152, 67, 119, 65, 66, 65, 65, 65, 65, 69, 109, 78, 111, 99, 109, 57, 116, 90, 83, 49, 48, 90, 88, 78, 48, 76, 87, 78, 115, 97, 87, 86, 117, 100, 65, 103, 65, 65, 103, 65, 65, 49, 68, 69, 75, 65, 65, 77, 65, 65, 65, 65, 65, 86, 75, 102, 66, 85, 103, 115, 65, 66, 65, 65, 65, 65, 67, 81, 49, 90, 68, 99, 119, 77, 71, 73, 120, 78, 67, 48, 121, 78, 84, 90, 107, 76, 84, 81, 119, 77, 109, 81, 116, 79, 84, 65, 48, 90, 105, 48, 52, 79, 84, 86, 105, 78, 68, 73, 50, 89, 109, 78, 108, 78, 84, 99, 76, 65, 65, 85, 65, 65, 65, 65, 85, 89, 50, 104, 121, 98, 50, 49, 108, 76, 87, 78, 115, 97, 87, 86, 117, 100, 67, 49, 122, 90, 87, 78, 121, 90, 88, 81, 65, 11, 0, 3, 0, 0, 0, 36, 52, 51, 52, 55, 54, 56, 98, 53, 45, 50, 48, 102, 102, 45, 52, 99, 100, 102, 45, 56, 53, 97, 50, 45, 57, 49, 49, 56, 50, 98, 55, 98, 51, 102, 57, 53, 0];
var bArrHex = tohex(bArr);

var stringToEncode = CryptoJS.enc.Hex.parse(bArrHex);

var symmetricKey = "v3JElaRswYgxOt4b";

var key = CryptoJS.enc.Latin1.parse(CryptoJS.enc.Latin1.stringify(CryptoJS.SHA256(symmetricKey)));

var iv = CryptoJS.lib.WordArray.random(16);

var encrypted = CryptoJS.AES.encrypt(stringToEncode, key, { iv: iv, format: CryptoJS.format.OpenSSL });

var decrypted = CryptoJS.AES.decrypt(encrypted, key, { iv: iv, format: CryptoJS.format.OpenSSL });

var result = fromhex(decrypted.toString(CryptoJS.enc.Hex));

console.log(result);

if (bArr.toString() == result.toString()) {
    console.log("success");
}

Note that the encrypted automatically encodes to base64 when used as a string. You cannot use Latin1 encoding for ciphertext. Note that your key should also contain random bytes, not only printable characters as it does now.

Finally note that sending AES encrypted text without MAC is inherently unsafe, for instance because of padding oracle attacks and the fact that anybody can change the data in transit.

CryptoJS AES CBC 256 decrypt adds additional byte in the middle of plaintext

2 Answers