I've got a DynamoDB table set up with a Stream using the "new and old" option.
I then create a Lambda and set a trigger for this DynamoDB table.
If I make a single change to an item in the DynamoDB table, I immediately see a log entry for the lambda (i.e. it's working as expected). Then, after refreshing the Lambda log, I see a SECOND entry for the exact same Event. If I do some console logging, I get the following output:
{
eventID: '993816ae020b3e179be01be3b6483818',
eventName: 'MODIFY',
eventVersion: '1.1',
eventSource: 'aws:dynamodb',
awsRegion: 'us-west-2',
dynamodb: {
ApproximateCreationDateTime: 1583960109,
Keys: { Guid: [Object], Timestamp: [Object] },
NewImage: {
Brand: [Object],
'aws:rep:updatetime': [Object],
EntryType: [Object],
RequestId: [Object],
KnownClients: [Object],
Guid: [Object],
'aws:rep:deleting': [Object],
EntryCount: [Object],
Timestamp: [Object],
'aws:rep:updateregion': [Object]
},
OldImage: {
Brand: [Object],
'aws:rep:updatetime': [Object],
EntryType: [Object],
RequestId: [Object],
KnownClients: [Object],
Guid: [Object],
'aws:rep:deleting': [Object],
EntryCount: [Object],
Timestamp: [Object],
'aws:rep:updateregion': [Object]
},
SequenceNumber: '156626400000000015112302820',
SizeBytes: 604,
StreamViewType: 'NEW_AND_OLD_IMAGES'
},
eventSourceARN: 'arn:aws:dynamodb:us-west-2:123456789123:table/dynamotabletest/stream/2020-02-07T19:24:06.547'
}
and here's the output for the SECOND event which is logged 1 second after the first event:
{
eventID: 'bdc5295c3eabf7e5fb036a9cbd4fcca9',
eventName: 'MODIFY',
eventVersion: '1.1',
eventSource: 'aws:dynamodb',
awsRegion: 'us-west-2',
dynamodb: {
ApproximateCreationDateTime: 1583960109,
Keys: { Guid: [Object], Timestamp: [Object] },
NewImage: {
Brand: [Object],
'aws:rep:updatetime': [Object],
EntryType: [Object],
RequestId: [Object],
KnownClients: [Object],
Guid: [Object],
'aws:rep:deleting': [Object],
EntryCount: [Object],
Timestamp: [Object],
'aws:rep:updateregion': [Object]
},
OldImage: {
Brand: [Object],
'aws:rep:updatetime': [Object],
EntryType: [Object],
RequestId: [Object],
KnownClients: [Object],
Guid: [Object],
'aws:rep:deleting': [Object],
EntryCount: [Object],
Timestamp: [Object],
'aws:rep:updateregion': [Object]
},
SequenceNumber: '156626500000000015112302880',
SizeBytes: 604,
StreamViewType: 'NEW_AND_OLD_IMAGES'
},
eventSourceARN: 'arn:aws:dynamodb:us-west-2:123456789123:table/dynamotabletest/stream/2020-02-07T19:24:06.547'
}
I did a text compare, and the ONLY difference is the eventID and the SequenceNumber.
Anyone know what's happening here?
Here's my code for the lamdba that listens for a trigger event:
'use strict';
const AWS = require('aws-sdk');
var parse = AWS.DynamoDB.Converter.output;
const firehose = new AWS.Firehose({ region: 'us-east-2' });
exports.handler = (event, context, callback) => {
var fireHoseInput = [];
event.Records.forEach((record) => {
console.log(record);
if ((record.eventName == "INSERT")||(record.eventName == "MODIFY")) {
fireHoseInput.push({ Data: JSON.stringify(parse({ "M": record.dynamodb.NewImage })) });
}
});
console.log('Firehose Input: ', fireHoseInput);
var params = {
DeliveryStreamName: 'test-dynamodb-stream',
Records: fireHoseInput
};
if(fireHoseInput.length != 0)
{
firehose.putRecordBatch(params, function (err, data) {
if (err) console.log(err, err.stack); // an error occurred
else console.log(data); // successful response
});
}
else
{
console.log("No data to transmit");
}
callback(null, `Successfully processed records.`);
};
Any ideas as to why a single change to a record in my DynamoDB table is triggering 2 identical events in the lambda that's listening to that DynamoDB Stream?
UPDATE: I did a little console logging earlier in the lambda to get the raw data from the DynamoDB Stream event that my lambda is listening for with the trigger. Here's what I found in the FIRST event that fires: NewImage
shows the update I made to a field, while OldImage
shows the old value in the field.
Then in the SECOND event that's firing, that field I changed is showing as the same newly updated value in both NewImage
and OldImage
, but the only field that changed is aws:rep:updatetime
. So presumably what is happening is that the change to the data field is captured in the first event, then the second event no longer cares about the change to that data field, but updates the replication timestamp which causes the DynamoDB stream to fire a second event with the same data (except now the aws:rep:updatetime
is changed). So sounds like I need to update my lambda to ignore that 2nd event if the only field that changed was aws:rep:updatetime
(which I don't care about in my data flow). Thoughts?
aws:rep:updatetime
is the only field that is changing in the 2nd event that fires... See above for details! - Shafique