Cached sequential unique identifiers with Node.js and MongoDB
April 29, 2012 Leave a comment
Acquiring sequential id with MongoDB is simple, as it supports
$inc
command for atomic sequence increment. However, naive implementation requires hit to database every single time id is required, and this can create latency and overhead issues. Typical case is for user tracking where application needs to get unique global id for every user in load balanced array of node.js instances.
This is more optimized method and works if you’re running simultaneously several Node.js instances. This method fetches a unique number range from database, uses them from memory and fetches new range when it runs out. This example assumes you use https://github.com/mongodb/node-mongodb-native .
Initialize the id starting point.
On instance startup, the implementation initializes the id to starting value (if it does not exists) and fetch the current status from database. In this example the starting value is 1000.
function init_id( seqname, next ) { idcollection = new mongodb.Collection(client, 'ids'); function _findId() { idcollection.findOne({_id: seqname}, function(err, doc) { if ( err ) { console.log( 'ERROR MONGO', 'ids', err ); return next(err); } if( doc ) { return next( false, { _id: seqname, waiters: [], high: doc.index, index: doc.index } ) } idcollection.insert( {_id: seqname, index: 1000}, {safe: true}, function(err, doc) { if ( err ) { console.log( 'ERROR MONGO', 'ids', err ); return next(err); } return _findId(); }); }); } _findId(); }
callback ‘next’ is called with object initialized to the current range from database.
init_id( 'myseq', function(err, idstatus ) { // we have now id status } ...
Sequence generation function
Next we define function that is called to fetch the next id. Tricky part is that if code needs to fetch next batch of unique identifiers it needs to queue the other callers until fetch completes so we don’t end up fetching more than one range increment at a time.
The high and index properties were set to current value in initialization so first call to next_id will always trigger fetch.
var INDEX_STEP = 10; // range to prefetch per query function next_id( idstatus, next ) { if (idstatus.high > idstatus.index) { // id available from memory return next(false, idstatus.index++); } // need to fetch, put callback in wait list idstatus.waiters.push( next ) if (idstatus.infetch) { // already fetch in progress return; } // initiate fetch _fetch( INDEX_STEP ); function _fetch( step ) { // use findandmodify to increment index and return new value idstatus.infetch = true; idcollection.findAndModify( {_id: idstatus._id}, [['_id','asc']], {$inc: {index: step}}, {new: true}, _after_fetch); } function _after_fetch(err, object) { function _notify_waiters( err ) { // give id to all waiters while ( idstatus.waiters.length ) { if ( err ) { (idstatus.waiters.shift())( err ) } else { if (idstatus.high <= idstatus.index) { // we got more waiters during fetch and // exhausted this batch, get next batch return _fetch( INDEX_STEP ); } (idstatus.waiters.shift())( false, idstatus.index++ ) } } idstatus.infetch = false; } if (err) return _notify_waiters( err ) if (!object) return _notify_waiters('index not found') idstatus.high = object.index // the current index must be reset to the allocated range // start, because there could be several parallel nodes making // incremental queries to the db so each node does not get // sequential ranges. idstatus.index = object.index - INDEX_STEP _notify_waiters(); } }
Code gets next id as argument to callback
next_id( idstatus, function(err, id) { // 'id' is next unique id to use! });
Note.
- Identifiers are sequential (growing) but not incremental, as multiple node instances will at some point make requests at the same time.
- Each startup increments the current value of sequence in database by STEP_INDEX amount if next_id is called at least once
- INDEX_STEP must be large enough to avoid race condition, or optimally should implement some kind of exponential retry