Lets take a look at a few different options for handling sanitizing user input fields in both node.js as well as the browser.

The easiest solution was to start with the sanitize-html npm module and use that, but I wanted it to be middleware using Express. Using middleware, I was able to sanitize every single value that was submitted to the server. In this particular use, node was acting as a middle layer between the browser and a REST API so this use case was perfect.

Here is the code for the actual module:

[sourcecode lang=”javascript”]

/* jshint node:true, unused: vars, camelcase: false */

‘use strict’;

var sanitizer = require(‘sanitize-html’),

_ = require(‘underscore’);

module.exports = function(config, errors) {

return function(req, res, next) {

if (req.body) {

_.each(req.body, function(value, key) {

if(!parseInt(value,10) && value !== null) {

if(typeof value === ‘string’) {

value = value.replace(/>/gi, ‘>’);

value = value.replace(/</gi, ‘<‘);

value = value.replace(/(©|"|&)/gi, ”);

}

req.body[key] = sanitizer(value, {

allowedTags: []

});

}

});

}

return next();

};

};

[/sourcecode]

A few odd things I noticed when using the sanitize-html module:

  • It seemed to change nulls in a way that I couldn’t quite figure out. I just know my API started crying once I implemented it with null values.
  • Number values seemed to be converted to strings, which again my API was very unhappy about.
  • Id manually convert HTML entities for into the actual characters before sanitizing so that they’d be caught by the sanitizer.
  • In my particular use, I’m being extremely strict in that Im not allowing any HTML tags at all. The allowedTags array is empty, but we could have added a few there to allow certain things like bold or italics etc.

Here’s the tests for the module so you can get a glimpse of what kind of XSS it will scrub out:

[sourcecode lang=”javascript”]

/* global describe, expect, it, sinon, beforeEach */

/* jshint node: true, expr: true, camelcase: false, unused: vars */

var middleware;

describe(‘Middleware.sanitizer’, function() {

‘use strict’;

var req, next;

beforeEach(function() {

middleware = require(‘../../middleware/sanitizer’);

req = {

body: {

test1: ‘This is clean’,

test2: ‘This isnt!’,

test3: ‘Im technically allowed.’,

test4: null,

test5: 1,

test6: ‘This work’

}

};

next = sinon.spy();

});

it(‘should be defined’, function() {

expect(middleware).to.exist;

});

it(‘should be a function’, function() {

expect(middleware).to.be.a(‘function’);

});

it(‘should sanitize XSS from body’, function() {

middleware()(req, {}, next);

expect(req.body.test1).to.equal(‘This is clean’);

expect(req.body.test2).to.equal(‘This isnt!’);

expect(req.body.test3).to.equal(‘Im technically allowed.’);

expect(req.body.test4).to.equal(null);

expect(req.body.test5).to.equal(1);

expect(req.body.test6).to.equal(‘This work’);

});

it(‘should call next callback’, function() {

middleware()(req, {}, next);

expect(next).calledOnce;

});

});

[/sourcecode]

Finally, to implement it into our Express app we just added it to the configuration section:

[sourcecode lang=”javascript”]

// … existing code …

app.use(middleware.sanitizer());

app.use(app.router);

app.use(middleware.errorHandler({

dumpExceptions: true,

showStack: true

}));

[/sourcecode]

An interesting note is that we ensure that we are using more or less the same sanitizing logic on the front-end as well. If we weren’t, we could get stuck in a situation where the user would submit XSS into a required field, and pass the form field validation for it being required, but when it was submitted the field would be scrubbed and effectively submitted as a blank to the API. If your API handles that validation thats fine, or you have model validation other than the sanitizer thats also fine – but it saves the user a round trip of getting their input sanitized and then returned back with some kind of error message. By implementing a sanitizer on the front-end you can give the user instant feedback that HTML basically isn’t allowed. Heres a quick input field scrubber for the front-end:

[sourcecode lang=”javascript”]

var scrubInput = function(input) {

var curValue = $(input).val();

var newValue = curValue

.replace(/>/gi, ‘>’)

.replace(/</gi, ‘<‘)

.replace(/<(.|n)*?>/gim, ”)

.replace(/(>|<|©|"|&)/gi, ”);

if(curValue !== newValue) {

$(input).val(newValue);

}

}

[/sourcecode]

Coupled with Backbone views, you can do something like:

[sourcecode lang=”javascript”]

events: {

‘keyup input,textarea’ : ‘scrubInputs’,

‘change input,textarea’ : ‘scrubInputs’,

‘blur input,textarea’ : ‘scrubInputs’

},

scrubInputs: function(event) {

app.utility.scrubInput(event.target);

}

[/sourcecode]

This is what it looks like in action:

input_scrubbing

And of course, tests for the scrubber:

[sourcecode lang=”javascript”]

describe(‘scrubInput’, function() {

it(‘should be defined’, function() {

expect(utility.scrubInput).toBeDefined();

});

it(‘should scrub DOM element value for HTML entities’, function() {

var input = document.createElement(‘INPUT’);

input.type = ‘text’;

var values = [

‘This is OK’,

‘This is OK’,

‘This is OK’,

‘This is OK’,

‘This is OK©’,

‘This is OK"’

];

for(var i = 0; i < values.length; i+=1) {

input.value = values[i];

utility.scrubInput(input);

expect(input.value).toEqual(‘This is OK’);

}

});

});

[/sourcecode]

Using a method like the above, albeit a little bit scorched earth, should get you pretty far in preventing users from submitting anything that even remotely looks like HTML or XSS.