Lets take a look at a few different options for handling sanitizing user input fields in both node.js as well as the browser.
The easiest solution was to start with the sanitize-html npm module and use that, but I wanted it to be middleware using Express. Using middleware, I was able to sanitize every single value that was submitted to the server. In this particular use, node was acting as a middle layer between the browser and a REST API so this use case was perfect.
Here is the code for the actual module:
[sourcecode lang=”javascript”]
/* jshint node:true, unused: vars, camelcase: false */
‘use strict’;
var sanitizer = require(‘sanitize-html’),
_ = require(‘underscore’);
module.exports = function(config, errors) {
return function(req, res, next) {
if (req.body) {
_.each(req.body, function(value, key) {
if(!parseInt(value,10) && value !== null) {
if(typeof value === ‘string’) {
value = value.replace(/>/gi, ‘>’);
value = value.replace(/</gi, ‘<‘);
value = value.replace(/(©|"|&)/gi, ”);
}
req.body[key] = sanitizer(value, {
allowedTags: []
});
}
});
}
return next();
};
};
[/sourcecode]
A few odd things I noticed when using the sanitize-html module:
- It seemed to change nulls in a way that I couldn’t quite figure out. I just know my API started crying once I implemented it with null values.
- Number values seemed to be converted to strings, which again my API was very unhappy about.
- Id manually convert HTML entities for into the actual characters before sanitizing so that they’d be caught by the sanitizer.
- In my particular use, I’m being extremely strict in that Im not allowing any HTML tags at all. The allowedTags array is empty, but we could have added a few there to allow certain things like bold or italics etc.
Here’s the tests for the module so you can get a glimpse of what kind of XSS it will scrub out:
[sourcecode lang=”javascript”]
/* global describe, expect, it, sinon, beforeEach */
/* jshint node: true, expr: true, camelcase: false, unused: vars */
var middleware;
describe(‘Middleware.sanitizer’, function() {
‘use strict’;
var req, next;
beforeEach(function() {
middleware = require(‘../../middleware/sanitizer’);
req = {
body: {
test1: ‘This is clean’,
test2: ‘This isnt!’,
test3: ‘Im technically allowed.’,
test4: null,
test5: 1,
test6: ‘This
}
};
next = sinon.spy();
});
it(‘should be defined’, function() {
expect(middleware).to.exist;
});
it(‘should be a function’, function() {
expect(middleware).to.be.a(‘function’);
});
it(‘should sanitize XSS from body’, function() {
middleware()(req, {}, next);
expect(req.body.test1).to.equal(‘This is clean’);
expect(req.body.test2).to.equal(‘This isnt!’);
expect(req.body.test3).to.equal(‘Im technically allowed.’);
expect(req.body.test4).to.equal(null);
expect(req.body.test5).to.equal(1);
expect(req.body.test6).to.equal(‘This work’);
});
it(‘should call next callback’, function() {
middleware()(req, {}, next);
expect(next).calledOnce;
});
});
[/sourcecode]
Finally, to implement it into our Express app we just added it to the configuration section:
[sourcecode lang=”javascript”]
// … existing code …
app.use(middleware.sanitizer());
app.use(app.router);
app.use(middleware.errorHandler({
dumpExceptions: true,
showStack: true
}));
[/sourcecode]
An interesting note is that we ensure that we are using more or less the same sanitizing logic on the front-end as well. If we weren’t, we could get stuck in a situation where the user would submit XSS into a required field, and pass the form field validation for it being required, but when it was submitted the field would be scrubbed and effectively submitted as a blank to the API. If your API handles that validation thats fine, or you have model validation other than the sanitizer thats also fine – but it saves the user a round trip of getting their input sanitized and then returned back with some kind of error message. By implementing a sanitizer on the front-end you can give the user instant feedback that HTML basically isn’t allowed. Heres a quick input field scrubber for the front-end:
[sourcecode lang=”javascript”]
var scrubInput = function(input) {
var curValue = $(input).val();
var newValue = curValue
.replace(/>/gi, ‘>’)
.replace(/</gi, ‘<‘)
.replace(/<(.|n)*?>/gim, ”)
.replace(/(>|<|©|"|&)/gi, ”);
if(curValue !== newValue) {
$(input).val(newValue);
}
}
[/sourcecode]
Coupled with Backbone views, you can do something like:
[sourcecode lang=”javascript”]
events: {
‘keyup input,textarea’ : ‘scrubInputs’,
‘change input,textarea’ : ‘scrubInputs’,
‘blur input,textarea’ : ‘scrubInputs’
},
scrubInputs: function(event) {
app.utility.scrubInput(event.target);
}
[/sourcecode]
This is what it looks like in action:
And of course, tests for the scrubber:
[sourcecode lang=”javascript”]
describe(‘scrubInput’, function() {
it(‘should be defined’, function() {
expect(utility.scrubInput).toBeDefined();
});
it(‘should scrub DOM element value for HTML entities’, function() {
var input = document.createElement(‘INPUT’);
input.type = ‘text’;
var values = [
‘This is OK’,
‘This is
‘This is
‘This is
‘This is OK©’,
‘This is
];
for(var i = 0; i < values.length; i+=1) {
input.value = values[i];
utility.scrubInput(input);
expect(input.value).toEqual(‘This is OK’);
}
});
});
[/sourcecode]
Using a method like the above, albeit a little bit scorched earth, should get you pretty far in preventing users from submitting anything that even remotely looks like HTML or XSS.