56
votes

How to fetch/scan all items from AWS dynamodb using node.js. I am posting my code here.

var docClient = new aws.DynamoDB.DocumentClient();
    var params = {
    TableName:"users",
    KeyConditionExpression:"user_status=:status",
    ExpressionAttributeValues: {
        ":status": "Y"
    }
    };

    var queryExecute = function(callback) {
        docClient.query(params,function(err,result) {
            if(err) {
                console.log(err)
                callback(err);
                } else {
                console.log(result);

                if(result.LastEvaluatedKey) {
                    params.ExclusiveStartKey = result.LastEvaluatedKey;
                    queryExecute(callback);
                    } else {
                        callback(err,items);
                    }
                }
            });
        }
        queryExecute(callback); 

This is giving me below error.

ValidationException: Query condition missed key schema element: `user_id`.

Here primary key is user_id. I don't want to use it with my query condition, because I need to set a value if I mentioned primary key in KeyConditionExpression. May be I am wrong. However please suggest me a good way to fetch all items from dynamodb, which is having user_status = "Y"

12

12 Answers

59
votes

If you would like to get the data from DynamoDB without using Hash key value, you need to use Scan API.

Note: The Scan API reads all the items in the table to get the results. So, it is a costly operation in DynamoDB.

Alternate Approach : Use GSI

Scan Code for the above sceanario:-

var docClient = new AWS.DynamoDB.DocumentClient();

var params = {
    TableName: "users",
    FilterExpression: "#user_status = :user_status_val",
    ExpressionAttributeNames: {
        "#user_status": "user_status",
    },
    ExpressionAttributeValues: { ":user_status_val": 'somestatus' }

};

docClient.scan(params, onScan);
var count = 0;

function onScan(err, data) {
    if (err) {
        console.error("Unable to scan the table. Error JSON:", JSON.stringify(err, null, 2));
    } else {        
        console.log("Scan succeeded.");
        data.Items.forEach(function(itemdata) {
           console.log("Item :", ++count,JSON.stringify(itemdata));
        });

        // continue scanning if we have more items
        if (typeof data.LastEvaluatedKey != "undefined") {
            console.log("Scanning for more...");
            params.ExclusiveStartKey = data.LastEvaluatedKey;
            docClient.scan(params, onScan);
        }
    }
}
72
votes

This is working for me:

export const scanTable = async (tableName) => {
    const params = {
        TableName: tableName,
    };

    const scanResults = [];
    const items;
    do{
        items =  await documentClient.scan(params).promise();
        items.Items.forEach((item) => scanResults.push(item));
        params.ExclusiveStartKey  = items.LastEvaluatedKey;
    }while(typeof items.LastEvaluatedKey !== "undefined");
    
    return scanResults;

};
11
votes

AWS documentation example didn't work for me. @Hank approach did the trick.

Using handler inside a lambda:

const AWS = require('aws-sdk');
const docClient = new AWS.DynamoDB.DocumentClient({
    // optional tuning - 50% faster(cold) / 20% faster(hot)
    apiVersion: '2012-08-10',
    sslEnabled: false,
    paramValidation: false,
    convertResponseTypes: false
});

const tableName = 'series';

exports.handler = async (event, context, callback) => {
    let params = { TableName: tableName };

    let scanResults = [];
    let items;

    do {
        items = await docClient.scan(params).promise();
        items.Items.forEach((item) => scanResults.push(item));
        params.ExclusiveStartKey = items.LastEvaluatedKey;
    } while (typeof items.LastEvaluatedKey != "undefined");

    callback(null, scanResults);
};
8
votes

Using Promises and async

const aws = require('aws-sdk');
aws.config.update({ region: 'us-east-1' });
const documentClient = new aws.DynamoDB.DocumentClient();

const scanAll = async (params) => {
  let lastEvaluatedKey = 'dummy'; // string must not be empty
  const itemsAll = [];
  while (lastEvaluatedKey) {
    const data = await documentClient.scan(params).promise();
    itemsAll.push(...data.Items);
    lastEvaluatedKey = data.LastEvaluatedKey;
    if (lastEvaluatedKey) {
      params.ExclusiveStartKey = lastEvaluatedKey;
    }
  }
  return itemsAll;
}

Use like this

const itemsAll = scanAll(params);

The code is the same for query (just replace scan with query)

5
votes
const AWS = require('aws-sdk');
const docClient = new AWS.DynamoDB.DocumentClient({
    // optional tuning - 50% faster(cold) / 20% faster(hot)
    apiVersion: '2012-08-10',
    sslEnabled: false,
    paramValidation: false,
    convertResponseTypes: false,
    region: 'us-east-2' // put your region
});
const tableName = 'tableName'; // put your tablename

exports.handler = async (event, context, callback) => {
    let params = { TableName: tableName };

    let scanResults = [];
    let items;

    do {
        items = await docClient.scan(params).promise();
        items.Items.forEach((item) => scanResults.push(item));
        params.ExclusiveStartKey = items.LastEvaluatedKey;
    } while (typeof items.LastEvaluatedKey != "undefined");

    callback(null, scanResults);
};
4
votes

I use promises like this:

let AWS = require('aws-sdk');
let docClient = new AWS.DynamoDB.DocumentClient();

async function dbRead(params) {
    let promise = docClient.scan(params).promise();
    let result = await promise;
    let data = result.Items;
    if (result.LastEvaluatedKey) {
        params.ExclusiveStartKey = result.LastEvaluatedKey;
        data = data.concat(await dbRead(params));
    }
    return data;
}

and to use it:

let params = {
  TableName: 'Table'
};
let data = await dbRead(params);
2
votes

A node express solution that returns the data in JSON format:

let datapack=[];
item = {
        TableName: ddbTable,
        FilterExpression: "aws = :e AND begins_with ( Id, :t )",
        ExpressionAttributeValues: {
            ":t"    :   "contact",
            ":e"    :   aws
        },
        ProjectionExpression: "Id,FirstName,LastName,cEmail",
    };
    docClient.scan(item, onScan);
    function onScan(err, data) {
        if (err) {
            console.error("Unable to scan the table. Error JSON:", JSON.stringify(err, null, 2));
        } else {        
            datapack = datapack.concat(data.Items);
            });
            if (typeof data.LastEvaluatedKey != "undefined") {
                item.ExclusiveStartKey = data.LastEvaluatedKey;
                docClient.scan(item, onScan);
            } else {
                res.json(datapack);
            }
        }
    }
2
votes

Here is an answer that will deliver indexed results rather than using an expensive scan, also in JSON format node/express. Note use of docClient.query:

 datapack=[];
    item = {
        TableName: ddbTable,
        IndexName: "cEmailIndex",
        KeyConditionExpression : "aws = :e AND begins_with ( cEmail, :t )",
        ExpressionAttributeValues: {
            ":t"    :   search,
            ":e"    :   aws
        },
        ProjectionExpression: "Id,FirstName,LastName,cEmail",
    };
    docClient.query(item, onScan);
    function onScan(err, data) {
        if (err) {
            console.error("Unable to scan the table. Error JSON:", JSON.stringify(err, null, 2));
        } else {
            datapack = datapack.concat(data.Items);
            if (typeof data.LastEvaluatedKey != "undefined") {
                item.ExclusiveStartKey = data.LastEvaluatedKey;
                docClient.query(item, onScan);
            } else {
                // console.log(JSON.stringify(datapack));
                res.json(datapack);
            }
        }
    }
1
votes

You can use the ScanPaginator from @aws/dynamodb-query-iterator:

import { ScanPaginator } from '@aws/dynamodb-query-iterator';
import DynamoDB = require('aws-sdk/clients/dynamodb');

const paginator = new ScanPaginator(
  new DynamoDB.DocumentClient(),
  {
    TableName: "users",
    FilterExpression: "#user_status = :user_status_val",
    ExpressionAttributeNames: {
      "#user_status": "user_status",
    },
    ExpressionAttributeValues: { ":user_status_val": 'somestatus' }
  }
);

for await (const page of paginator) {
    // do something with `page`, e.g. myFunction(page.Items)
}
1
votes

For those who are NOT USING AWS.DynamoDB.DocumentClient, this solution will work. I have split the functionality into multiple modules for easy readability and using async/await.

const AWS = require("aws-sdk");
AWS.config.update({
    // update table region here
    region: "us-west-2"
});
var dynamodb = new AWS.DynamoDB();
const performAsynScanOperation = (scanParams) => {
    return new Promise((resolve, reject) => {
        dynamodb.scan(scanParams, function (err, responseData) {
            if (err) {
                reject(err)
            } else {
                resolve(responseData)
            }
        })
    })
}

const getAllRecords = async (tableName) => {
    let allItems = [];
    let LastEvaluatedKeyFlag = true;
    let scanParams = { TableName: tableName }
    while (LastEvaluatedKeyFlag) {
        let responseData = await performAsynScanOperation(scanParams)
        let batchItems = responseData.Items;
        allItems = allItems.concat(batchItems);
        if (responseData.LastEvaluatedKey) {
            LastEvaluatedKeyFlag = true;
            console.log('LastEvaluatedKey', responseData.LastEvaluatedKey)
            scanParams.ExclusiveStartKey = responseData.LastEvaluatedKey
        } else {
            LastEvaluatedKeyFlag = false;
        }
    }
    return allItems;
}
getAllRecords('<Name of table>').then((allItems)=>{
  console.log(allItems)
})
0
votes

This is a drop-in replacement to scan all records:

const scanAll = async (params) => {
    let all = [];
    while (true) {
        let data = await new Promise((resolve, reject) => {
            db.scan(params, function (err, data) {
                if (err)
                    reject(err);
                else
                    resolve(data);
            });
        });
        all = all.concat(data.Items);
        if (data.LastEvaluatedKey)
            params.ExclusiveStartKey = data.LastEvaluatedKey;
        else
            break;
    }
    return all;
};

Usage:

scanAll(query)
    .catch((err) => {

    })
    .then((records) => {

    });
}
0
votes

The scan method reads every item in the table and returns all the data in the table. You can provide an optional filter_expression, so that only the items matching your criteria are returned. However, the filter is applied only after the entire table has been scanned. ref

I'm sharing refactored onScan function hope it helps.

var AWS = require("aws-sdk");

AWS.config.update({
    region: "us-west-2",
    endpoint: "http://localhost:8000"
});

var docClient = new AWS.DynamoDB.DocumentClient();

async function read() {
        const params = {
            TableName: "tableName"
            // options can be passed here e.g.
            // FilterExpression: "#yr between :start_yr and :end_yr",
        };

        let items = [];
        return new Promise((resolve, reject) => {
            function onScan(err, data) {
                if (err) {
                    console.error("Unable to scan the table. Error JSON:", JSON.stringify(err, null, 2));
                    reject();
                } else {
                    items = items.concat(data.Items);

                    // continue scanning if we have more items, because
                    // scan can retrieve a maximum of 1MB of data
                    if (typeof data.LastEvaluatedKey !== "undefined") {
                        params.ExclusiveStartKey = data.LastEvaluatedKey;
                        docClient.scan(params, onScan);
                    } else {
                        resolve(items);
                    }
                }
            }
            docClient.scan(params, onScan);
        });
    }