Update DynamoDB Items with Node.js

On a previous post we proceeded into inserting items to DynamoDB using Node.js. DynamoDB also supports updating items.

We will use the Login table for the update examples.
When issuing an update you must specify the primary key of the item you want to update.

var updateName = function(email,fullName,callback) {
	
	var docClient = new AWS.DynamoDB.DocumentClient();
	
	var params = {
			TableName:"Users",
			Key: {
				email : email
			},
			UpdateExpression: "set fullname = :fullname",
		    ExpressionAttributeValues:{
		        ":fullname":fullName
		    },
		    ReturnValues:"UPDATED_NEW"
		};
	
	docClient.update(params,callback);
}

We can proceed on more advanced statements using conditional updates. Conditional updates can help us in many cases such as handling concurrent updates. In our case we will update an item’s Full name only if it starts with a certain prefix.

var updateConditionally = function(email,fullName,prefix,callback) {
	
	var docClient = new AWS.DynamoDB.DocumentClient();
	
	var params = {
			TableName:"Users",
			Key: {
				email : email
			},
			UpdateExpression: "set fullname = :fullname",
			ConditionExpression: "begins_with(fullname,:prefix)",
			ExpressionAttributeValues:{
		        ":fullname":fullName,
		        ":prefix":prefix
		    },
		    ReturnValues:"UPDATED_NEW"
		};
	
	docClient.update(params,callback);
}

Another feature is atomic counters. We can issue updates to a DynamoDB item and increase the attribute values. We will add an extra field called count. Also we will add another update function, which once called will update the field specified, but will also increase the counter attribute. Thus the counter attribute will represent how many times an update was performed on a specific item.

var addUpdateCounter = function(email,callback) {
	
	var docClient = new AWS.DynamoDB.DocumentClient();
	
	var params = {
			TableName:"Users",
			Key: {
				email : email
			},
			UpdateExpression: "set #counter = :counter",
			ExpressionAttributeNames:{
		        "#counter":"counter"
		    },
			ExpressionAttributeValues:{
		        ":counter":0
		    },
			ReturnValues:"UPDATED_NEW"
		};
	
	docClient.update(params,callback);
}

var updateAndIncreaseCounter = function(email,fullName,callback) {

	var docClient = new AWS.DynamoDB.DocumentClient();
	
	var params = {
			TableName:"Users",
			Key: {
				email : email
			},
			UpdateExpression: "set fullname = :fullname ADD #counter :incva",
			ExpressionAttributeNames:{
		        "#counter":"counter"
		    },
			ExpressionAttributeValues:{
		        ":fullname":fullName,
		        ":incva":1
		    },
		    ReturnValues:"UPDATED_NEW"
		};
	
	docClient.update(params,callback);
}

You can find the sourcecode on github.

Scan DynamoDB Items with Node.js

On previous posts we covered how to query a DynamoDB database
Query DynamoDB Part 1
Query DynamoDB Part 2.

Apart from issuing queries DynamoDB also offers Scan functionality.
What scan does is fetching all the Items you might have on your DynamoDB Table.
Therefore scan does not require any rules based on our partition key or your global/local secondary indexes.
What scan offers is filtering based on the items already fetched and return specific attributes from the items fetched.

The snippet below issues a scan on the Logins table by adding filtering and selecting only the email field.

var scanLogins = function(date,callback) {

	var docClient = new AWS.DynamoDB.DocumentClient();

	var params = {
		TableName:"Logins",
		ProjectionExpression: "email",
	    FilterExpression: "#timestamp < :from",
	    ExpressionAttributeNames: {
	        "#timestamp": "timestamp",
	    },
	    ExpressionAttributeValues: {
	         ":from": date.getTime()
	    }
	};

	var items = []
	
	var scanExecute = function(callback) {
	
		docClient.scan(params,function(err,result) {

			if(err) {
				callback(err);
			} else {
				
				items = items.concat(result.Items);

				if(result.LastEvaluatedKey) {

					params.ExclusiveStartKey = result.LastEvaluatedKey;
					scanExecute(callback);				
				} else {
					callback(err,items);
				}	
			}
		});
	}
	
	scanExecute(callback);
};

Before using scan to an application we have to take into consideration that scan fetches all table items. Therefore It has a high cost both on charges and performance. Also it might consume your provision capacity.
It is better to stick to queries and avoid scans.

You can find the sourcecode on github.

Query DynamoDB Items with Node.js Part 2

On a previous post we had the chance to issue some basic DynamoDB query actions.

However apart from the basic actions the DynamoDB api provides us with some extra functionality.

Projections is a feature that has a select-like functionality.
You choose which attributes from a DynamoDB Item shall be fetched. Keep in mind that using projection will not have any impact on your query billing.

var getRegisterDate = function(email,callback) {
	
	var docClient = new AWS.DynamoDB.DocumentClient();
	
	var params = {
		    TableName: "Users",
		    KeyConditionExpression: "#email = :email",
		    ExpressionAttributeNames:{
		        "#email": "email"
		    },
		    ExpressionAttributeValues: {
		        ":email":email
		    },
		    ProjectionExpression: 'registerDate'
		};
	
	docClient.query(params,callback);
}

Apart from selecting the attributes we can also specify the order according to our range key. We shall query the logins Table in a Descending order using scanIndexForward.

var fetchLoginsDesc = function(email,callback) {

	var docClient = new AWS.DynamoDB.DocumentClient();

	var params = {
	    TableName:"Logins",
	    KeyConditionExpression:"#email = :emailValue",
	    ExpressionAttributeNames: {
	    	"#email":"email"
	    },
	    ExpressionAttributeValues: {
	    	":emailValue":email
	    },
	    ScanIndexForward: false
	};
	
	docClient.query(params,callback);
}

A common functionality of databases is counting the items persisted in a collection. In our case we want to count the login occurrences of a specific user. However pay extra attention since the count functionality does nothing more than counting the total items fetched, therefore it will cost you as if you fetched the items.

var countLogins = function(email,callback) {

	var docClient = new AWS.DynamoDB.DocumentClient();

	var params = {
	    TableName:"Logins",
	    KeyConditionExpression:"#email = :emailValue",
	    ExpressionAttributeNames: {
	    	"#email":"email"
	    },
	    ExpressionAttributeValues: {
	    	":emailValue":email
	    },
	    Select:'COUNT'
	};
	
	docClient.query(params,callback);
}

Another feature of DynamoDB is getting items in batches even if they belong on different tables. This is really helpful in cases where data that belong on a specific context are spread through different tables. Every get item is handled and charged as a DynamoDB read action. In case of batch get item all table keys should be specified since every query’s purpose on BatchGetItem is to fetch a single Item.
It is important to know that you can fetch up to 1 MB of data and up to 100 items per BatchGetTime request.

var getMultipleInformation = function(email,name,callback) {
	
	var params = {
			"RequestItems" : {
			    "Users": {
			      "Keys" : [
			        {"email" : { "S" : email }}
			      ]
			    },
			    "Supervisors": {
				   "Keys" : [
					{"name" : { "S" : name }}
				  ]
			    }
			  }
			};
	
	dynamodb.batchGetItem(params,callback);
};

You can find the sourcecode on github

Query DynamoDB Items with Node.js

On a previous post we proceeded on inserting data on a DynamoDB database.

On this tutorial we will issue some basic queries against our DynamoDB tables.

The main rule is that every query has to use the hash key.

The simplest form of query is using the hash key only. We will query the Users table on this one. There would be only one result, therefore there is no use on iterating the Items list.

var getUser = function(email,callback) {
	
	var docClient = new AWS.DynamoDB.DocumentClient();
	
	var params = {
		    TableName: "Users",
		    KeyConditionExpression: "#email = :email",
		    ExpressionAttributeNames:{
		        "#email": "email"
		    },
		    ExpressionAttributeValues: {
		        ":email":email
		    }
		};
	
	docClient.query(params,callback);
};

However we can issue more complex queries using conditions.
Logins Table suits well for an example. We will issue a query that will fetch login attempts between to dates.

var queryLogins = function(email,from,to,callback) {

	var docClient = new AWS.DynamoDB.DocumentClient();
	
	var params = {
	    TableName:"Logins",
	    KeyConditionExpression:"#email = :emailValue and #timestamp BETWEEN :from AND :to",
	    ExpressionAttributeNames: {
	    	"#email":"email",
	    	"#timestamp":"timestamp"
	    },
	    ExpressionAttributeValues: {
	    	":emailValue":email,
	    	":from": from.getTime(),
	    	":to":to.getTime()
	    }			
	};
	
	var items = []
	
	var queryExecute = function(callback) {
	
		docClient.query(params,function(err,result) {

			if(err) {
				callback(err);
			} else {
			
				console.log(result)
				
				items = items.concat(result.Items);
			
				if(result.LastEvaluatedKey) {

					params.ExclusiveStartKey = result.LastEvaluatedKey;
					queryExecute(callback);				
				} else {
					callback(err,items);
				}	
			}
		});
	}
	
	queryExecute(callback);
};

Keep in mind that DynamoDB Fetches data in pages, therefore you have to issue the same request more than once in case of multiple pages. Therefore you have to use the last evaluated key to your next request. In case of many entries be aware that you should handle the call stack size.

Last but not least querying on indexes is one of the basic actions. It is the same routine either for local or global secondary indexes.
Keep in mind that the results fetched depend on the projection type we specified once creating the Table. In our case the projection type is for all fields.

We shall use the Supervisors table.

	var docClient = new AWS.DynamoDB.DocumentClient();
	
	var params = {
		    TableName: "Supervisors",
		    IndexName: "FactoryIndex",
		    KeyConditionExpression:"#company = :companyValue and #factory = :factoryValue",
		    ExpressionAttributeNames: {
		    	"#company":"company",
		    	"#factory":"factory"
		    },
		    ExpressionAttributeValues: {
		    	":companyValue": company,
		    	":factoryValue": factory
		    }
		};

	docClient.query(params,callback);

You can find full source code with unit tests on github.

Insert Items to DynamoDB Tables using Node.js

On a previous article we learned how to create DynamoDB Tables using Node.js.

Next step is to insert items to the DynamoDB Tables previously created.

Keep in mind that for the insert action the most basic step is to specify the the primary key.
For the table users the primary key is the attribute email. You can add as many attributes as you want however the cumulative size should not surpass 400 KB.

var AWS = require("aws-sdk");

	var dynamodb = new AWS.DynamoDB();
	var params = {
			TableName:"Users",
		    Item:{
		    	email : { S:"jon@doe.com"},
		        fullname: { S:"Jon Doe"}
		    }
		};
	
	dynamodb.putItem(params,callback);

DynamoDB also supports Batch writes. In this case the main benefit lies on less I/O, however nothing changes regarding consumed capacity. In our case we will add a batch of login attempts.

var AWS = require("aws-sdk");

var insetBatchLogins = function(callback) {
	
	var dynamodb = new AWS.DynamoDB();
	var batchRequest = {
			RequestItems: {
				"Logins": [
				           {
				        	   PutRequest: { 
				        		   Item: {
				        			   "email": { S: "jon@doe.com" },
				        			   "timestamp": { N: "1467041009976" }
				        			   }
				           }},
				           {
				        	   PutRequest: { 
				        		   Item: {
				        			   "email": { S: "jon@doe.com" },
				        			   "timestamp": { N: "1467041019976" }
				        			   }
				           }}]
		    }
		};

	dynamodb.batchWriteItem(batchRequest,callback);
};

In case of an insert with a global/local secondary index all you have to do is to specify the corresponding attributes for the index. Take into consideration that you can have empty index related attributes or even duplicates.

	var dynamodb = new AWS.DynamoDB();
	
	var params = {
			TableName:"Supervisors",
		    Item:{
		    	name: { S:"Random SuperVisor"},
		    	company: { S:"Random Company"},
		    	factory: { S:"Jon Doe"}
		    }
		};
	
	dynamodb.putItem(params,callback);

You can find the sourcecode on github.

Create DynamoDB tables with Node.js

On this post we will create Tables on a DynamoDB Database using node.js

Before getting started we need to have local dynamodb installed since we want to avoid any costs for dynamodb usage. There was a previous post on local dynamodb.

In case you use docker you can find a local dynamodb image or you can create one on you own as described here.

Using local DynamoDB and node.js is extremely handy for debugging. Local dynamodb provides as with an web user interface on http://localhost:8000/shell. The local dynamodb shell is a javascript shell, therefore the actions for node.js can be issued straight to the DynamoDB shell.

The actions would be the same as described on the corresponding java tutorial.

First step is to create a table with a hash key. In this case the email of the user would be the hash key.

var createUsers = function(callback) {

	var dynamodb = new AWS.DynamoDB();

	var params = {
	    TableName : "Users",
	    KeySchema: [       
	        { AttributeName: "email", KeyType: "HASH"}
	    ],
	    AttributeDefinitions: [       
	        { AttributeName: "email", AttributeType: "S" }
	    ],
	    ProvisionedThroughput: {       
	        ReadCapacityUnits: 5, 
	        WriteCapacityUnits: 5
		   }
		};

	dynamodb.createTable(params, callback);	
};

The next table will be called Logins. Logins should keep track each time the user logged in. To do so apart from using a hash key we will also use a range key for the date it occurred.

var createLogins = function(callback) {

	var dynamodb = new AWS.DynamoDB();

	var params = {
	    TableName : "Logins",
	    KeySchema: [       
	        { AttributeName: "email", KeyType: "HASH"},
	        { AttributeName: "timestamp", KeyType: "RANGE"}
		],
	    AttributeDefinitions: [       
	        { AttributeName: "email", AttributeType: "S" },
	        { AttributeName: "timestamp", AttributeType: "N" }
	    ],
	    ProvisionedThroughput: {       
	        ReadCapacityUnits: 5, 
	        WriteCapacityUnits: 5
		   }
		};

	dynamodb.createTable(params, callback);	
};

Next table is Supervisors. The hash key of Supervisor would be his name. A supervisor will work for a company. The company will be our global secondary index. Since the companies own more than one factories the field factory would be the range key.

var createSupervisors = function(callback) {

	var dynamodb = new AWS.DynamoDB();

	var params = {
	    TableName : "Supervisors",
	    KeySchema: [       
	        { AttributeName: "name", KeyType: "HASH"}
		],
	    AttributeDefinitions: [       
	        { AttributeName: "name", AttributeType: "S" },
	        { AttributeName: "company", AttributeType: "S" },
	        { AttributeName: "factory", AttributeType: "S" }    
	    ],
	    ProvisionedThroughput: {       
	        ReadCapacityUnits: 5, 
	        WriteCapacityUnits: 5
		   },
		GlobalSecondaryIndexes: [{
				IndexName: "FactoryIndex",
				KeySchema: [
				    {
				    	AttributeName: "company",
				    	KeyType: "HASH"
				    },
					{
						AttributeName: "factory",
						KeyType: "RANGE"
					}
				],
				Projection: {
					ProjectionType: "ALL"
				},
				ProvisionedThroughput: {
					ReadCapacityUnits: 1,
					WriteCapacityUnits: 1
				}
		    }]
	};

	dynamodb.createTable(params, callback);	
};

Next table would be the table Companies. The hash key would be the parent company and the range key the subsidiary company. Each company has a CEO. The CEO would be the range key for the local secondary index.

var createCompanies = function(callback) {

	var dynamodb = new AWS.DynamoDB();

	var params = {
	    TableName : "Companies",
	    KeySchema: [       
	        { AttributeName: "name", KeyType: "HASH"},
	        { AttributeName: "subsidiary", KeyType: "RANGE"}
		],
	    AttributeDefinitions: [       
	        { AttributeName: "name", AttributeType: "S" },
	        { AttributeName: "subsidiary", AttributeType: "S" },
	        { AttributeName: "ceo", AttributeType: "S" }    
	    ],
	    ProvisionedThroughput: {       
	        ReadCapacityUnits: 5, 
	        WriteCapacityUnits: 5
		   },
		LocalSecondaryIndexes: [{
				IndexName: "CeoIndex",
				KeySchema: [
				    {
				    	AttributeName: "name",
				    	KeyType: "HASH"
				    },
					{
						AttributeName: "ceo",
						KeyType: "RANGE"
					}
				],
				Projection: {
					ProjectionType: "ALL"
				}
		    }]
	};

	dynamodb.createTable(params, callback);	
};

You can find the source code on github.

Scheduling jobs on a Sails.js application

In one of my projects there was the need to put scheduled tasks on my Sails.js application.
Agenda and node-schedule are the tools of my choice when scheduling jobs on a node.js app. What we are gona cover is adding scheduling to our Sails.js application using node-schedule and agenda.

To get started let’s create our application

sails new SailsScheduling
cd SailsScheduling

My approach to use node-schedule is to add some configuration on the bootstrap.js file.

npm install node-schedule --save

We will add a service to our Sails.js application. Services on a Sails.js application reside on the api/services/ path.

Suppose that we implement a service that will send emails

/**
 * Created by gkatzioura on 6/20/16.
 */

var send = function (text,callback) {

  sails.log.info("Should send text: "+text)
  callback();
};

module.exports =  {
  send: send
}

Then we add our job triggering code on bootstrap.js.

/**
 * Bootstrap
 * (sails.config.bootstrap)
 *
 * An asynchronous bootstrap function that runs before your Sails app gets lifted.
 * This gives you an opportunity to set up your data model, run jobs, or perform some special logic.
 *
 * For more information on bootstrapping your app, check out:
 * http://sailsjs.org/#!/documentation/reference/sails.config/sails.config.bootstrap.html
 */
var scheduler = require('node-schedule');

module.exports.bootstrap = function(cb) {

  // It's very important to trigger this callback method when you are finished
  // with the bootstrap!  (otherwise your server will never lift, since it's waiting on the bootstrap)

  var emailService = EmailService;


  var minuteJob  = scheduler.scheduleJob('* * * * *', function(){
    EmailService.send("Random text",function (err, result) {
      sails.log.info("Job executed")
    });
  });

  cb();
};

The next example would use agenda. Instead of rolling out our own configuration we will use sails-hook-jobs which integrates wonderfully to our sails application as a grunt task.

npm install mongodb@~1.4 --save
npm install sails-hook-jobs --save

We need mongodb 1.4 version for mongo-skin.

Agenda is backed by mongodb.
For docker users you can issue

docker run --name some-mongo -d mongo

and have a mongodb server up and running.

Next step is creating the file config/jobs.js containing the configuration.

/**
 * Default jobs configuration
 * (sails.config.jobs)
 *
 * For more information using jobs in your app, check out:
 * https://github.com/vbuzzano/sails-hook-jobs
 */

module.exports.jobs = {

  // Where are jobs files
  "jobsDirectory": "api/jobs",

  // agenda configuration. 
  // for more details about configuration,
  // check https://github.com/rschmukler/agenda
  "db": { 
    "address"    : "localhost:27017/jobs",
    "collection" : "agendaJobs" 
  },
  "name": "process name",
  "processEvery": "10 seconds",
  "maxConcurrency": 20,
  "defaultConcurrency": 5,
  "defaultLockLifetime": 10000
};

Next step is to create the directory jobs on our api folder.
In order to add a job we should create a javascript source file on the api/jobs folder.
You file should have the ending Job.js. Pay special attention to this, you do not want to spend hours on figuring out what went wrong like I did.

Our job would send an email every five minutes.

module.exports = function(agenda) {
  var job = {

    frequency: 'every 5 minutes',
    run: function(job, done) {
      EmailService.send("Test email",function (err,result) {

        if(err) {
          sails.log.error("Job was not executed properly");
          done(err);
        } else {
          sails.log.info("Agenda job was executed");
          done();
        }
      });
    },
  };
  return job;
}

All in all there are definitely more tools out there for Sails.js scheduling.
My personal choice is agenda, due to its approach on managing your jobs and integrating as a sails task.

You can find the source code on github.

Scheduling jobs on Node.js with agenda

There are many ways to schedule jobs in your application. A very common practice is to back our jobs with persistence.
By doing so we will be informed in the future if the job did fail or succeed and when it should be the next execution.

Agenda is a light-weight job scheduling library for node.js. It is backed with mongodb.

It is really simple to get started. The first thing we have to do is to configure the mongodb database connection sting.

var Agenda = require('agenda');

var connectionString = "127.0.0.1:27017/scheduled_jobs";
var agenda = new Agenda({db: { address: connectionString, collection: 'jobs' }});

Next step would be to specify jobs.

Suppose we have an EmailService like this

EmailService = {
        send:function(callback){
            console.log("sending email");
            callback();
        }
};

Then we shall define a job

agenda.define('send email', function(job, done) {
    EmailService.send(function(err,result) {
        if(err) {
            done(err);
        } else {
            done();  
        }
    });  
});

We just defined a job with agenda in a human way.

agenda.on('ready',function() {
 agenda.every('1 day','send email'); 
 agenda.start();
});

Once we defined the jobs we need to set the time interval that the agenda instance will look up for new jobs.

agenda.processEvery('1 minute');

By querying to our mongodb database we are able to receive our job status.

>db.jobs.find({})
{ "_id" : ObjectId("5767110c779be08d4e1b3109"), "name" : "send email", "type" : "single", "data" : null, "priority" : 0, "repeatInterval" : "1 day", "repeatTimezone" : null, "lastModifiedBy" : null, "nextRunAt" : ISODate("2016-06-20T21:39:24.931Z"), "lockedAt" : null, "lastRunAt" : ISODate("2016-06-19T21:39:24.931Z"), "lastFinishedAt" : ISODate("2016-06-19T21:39:24.932Z") }

Agenda is pretty featureful.
For example you can use it with cron format too.

For example running our job every minute

agenda.on('ready',function() {
 agenda.every('* * * * *','send email');
 agenda.start();
});

Other cool features is retrieving jobs in a mongodb query format and modifying them.

agenda.jobs({name: 'send email'}, function(err, jobs) {
});

Last but not least agenda-ui is a great tool for visualizing agenda jobs.

Overall agenda is my personal favorite when it comes to adding josb to my node.js application.
It is backed by mongodb and easy to configure.
I believe that one of its main strengths is giving you good control over your jobs.

Scheduling jobs on Node.js with node-schedule

Batching is a great part of todays software development. The business world runs on batch from bank statements to promotion emails.

Node.js has some good libraries for such cases.

Node Schedule is a light cron like scheduler for node.

npm install node-schedule

In case your are used to cron and the cron expression format, it will be pretty easy for you.


var scheduler = require('node-schedule');
 
var montlyJob  = scheduler.scheduleJob('0 0 1 * *', function(){
  console.log('I run the first day of the month');
});

But you also have a javascript object approach

var scheduler = require('node-schedule');

var rule = new scheduler.RecurrenceRule();
rule.hour = 7
rule.dayOfWeek = new schedule.Range(0,6)
 
var dailyJob = schedule.scheduleJob(date, function(){
  console.log('I run on days at 7:00');
});

scheduler.scheduleJob(rule,task);

Also you can have tasks submitted by giving a date

var scheduler = require('node-schedule');

var date = new Date(2017, 1, 1, 0, 0, 0);
var newYearJob = scheduler.scheduleJob(date, function() {
    console.log("Happy new year");
});

However in case your job is not needed you can cancel it pretty easy


newYearJob.cancel();

Writing unit tests for Sails.js app using mocha

Sails.js is a wonderful node.js framework.

Writing unit tests for Sails.js using mocha is pretty easy.
On the before method of a mocha test you have to lift the sails application and on the after function you have to lower it.

var Sails = require('sails');

describe('SailsMochaTest',function() {

    before(function(done) {
        this.timeout(50000);

        Sails.lift({},
            function(err,server) {
                if(err) {
                    done(err);
                } else {
                    done(err,sails);
                }
            });
    });

    it('testmethod',function(done) {

        Sails.services.sampleService.fetchRecords()
            .then(function(results) {
                done();
            })
            .catch(function(err) {
                done(err);
            });
    });

    after(function(done) {
        Sails.lower(done);
    });
});

This works pretty good however there is a gotcha. In case you want to execute tests simultaneously, for example using the –recursive argument on mocha, you will get an exception.

Cannot load or lift an app after it has already been lowered. 
You can make a new app instance with:
var SailsApp = require('sails').Sails;
var sails = new SailsApp();

For a case like this you can follow the solution recommended and lift a new sails app.

var SailsApp = require('sails').Sails;

describe('SailsMochaTest',function() {
    
    var sails = new SailsApp();

    before(function(done) {
        sails.lift({},
            function(err,server) {
                if(err) {
                    done(err);
                } else {
                    done(err,sails);
                }
            });
    });

    it('testmethod',function(done) {

        sails.services.sampleService.fetchRecords()
            .then(function(results) {
                done();
            })
            .catch(function(err) {
                done(err);
            });
    });

    after(function(done) {
        sails.lower(done);
    });
});