I found a paper of a method that's perfect for this case-- two bytes overhead, simple to implement. Here's the code:
uint8_t lfsr(uint8_t x) {
return (x >> 1) ^ (-(x&1) & 0x8E);
}
void eccComputeChecks(uint8_t *data, int data_len, uint8_t *out_c0, uint8_t *out_c1) {
uint8_t c0 = 0;
uint8_t c1 = 0;
for (int i = 0; i < data_len; ++i) {
c0 ^= data[i];
c1 = lfsr(c1 ^ data[i]);
}
*out_c0 = c0;
*out_c1 = c1;
}
void eccEncode(uint8_t *data, int data_len, uint8_t check[2]) {;
eccComputeChecks(data, data_len, &check[0], &check[1]);
}
bool eccDecode(uint8_t *data, int data_len, uint8_t check[2]) {
uint8_t s0, s1;
eccComputeChecks(data, data_len, &s0, &s1);
s0 ^= check[0];
s1 ^= check[1];
if (s0 && s1) {
int error_index = data_len - 255;
while (s1 != s0) {
s1 = lfsr(s1);
error_index++;
}
if (error_index < 0 || error_index >= data_len) {
return false;
}
data[error_index] ^= s0;
} else if (s0 || s1) {
}
return true;
}