Sanitizing XSS and HTML with Express middleware

Lets take a look at a few different options for handling sanitizing user input fields in both node.js as well as the browser.

The easiest solution was to start with the sanitize-html npm module and use that, but I wanted it to be middleware using Express. Using middleware, I was able to sanitize every single value that was submitted to the server. In this particular use, node was acting as a middle layer between the browser and a REST API so this use case was perfect.

Here is the code for the actual module:

[sourcecode lang=”javascript”]

/* jshint node:true, unused: vars, camelcase: false */

‘use strict’;

var sanitizer = require(‘sanitize-html’),

_ = require(‘underscore’);

module.exports = function(config, errors) {

return function(req, res, next) {

if (req.body) {

_.each(req.body, function(value, key) {

if(!parseInt(value,10) && value !== null) {

if(typeof value === ‘string’) {

value = value.replace(/>/gi, ‘>’);

value = value.replace(/</gi, ‘<‘);

value = value.replace(/(©|"|&)/gi, ”);

}

req.body[key] = sanitizer(value, {

allowedTags: []

});

}

});

}

return next();

};

[/sourcecode]

A few odd things I noticed when using the sanitize-html module:

It seemed to change nulls in a way that I couldn’t quite figure out. I just know my API started crying once I implemented it with null values.
Number values seemed to be converted to strings, which again my API was very unhappy about.
Id manually convert HTML entities for into the actual characters before sanitizing so that they’d be caught by the sanitizer.
In my particular use, I’m being extremely strict in that Im not allowing any HTML tags at all. The allowedTags array is empty, but we could have added a few there to allow certain things like bold or italics etc.

Here’s the tests for the module so you can get a glimpse of what kind of XSS it will scrub out:

[sourcecode lang=”javascript”]

/* global describe, expect, it, sinon, beforeEach */

/* jshint node: true, expr: true, camelcase: false, unused: vars */

var middleware;

describe(‘Middleware.sanitizer’, function() {

‘use strict’;

var req, next;

beforeEach(function() {

middleware = require(‘../../middleware/sanitizer’);

req = {

body: {

test1: ‘This is clean’,

test2: ‘This isnt!’,

test3: ‘Im technically allowed.’,

test4: null,

test5: 1,

test6: ‘This work’

}

};

next = sinon.spy();

});

it(‘should be defined’, function() {

expect(middleware).to.exist;

});

it(‘should be a function’, function() {

expect(middleware).to.be.a(‘function’);

});

it(‘should sanitize XSS from body’, function() {

middleware()(req, {}, next);

expect(req.body.test1).to.equal(‘This is clean’);

expect(req.body.test2).to.equal(‘This isnt!’);

expect(req.body.test3).to.equal(‘Im technically allowed.’);

expect(req.body.test4).to.equal(null);

expect(req.body.test5).to.equal(1);

expect(req.body.test6).to.equal(‘This work’);

});

it(‘should call next callback’, function() {

middleware()(req, {}, next);

expect(next).calledOnce;

});

[/sourcecode]

Finally, to implement it into our Express app we just added it to the configuration section:

[sourcecode lang=”javascript”]

// … existing code …

app.use(middleware.sanitizer());

app.use(app.router);

app.use(middleware.errorHandler({

dumpExceptions: true,

showStack: true

}));

[/sourcecode]

An interesting note is that we ensure that we are using more or less the same sanitizing logic on the front-end as well. If we weren’t, we could get stuck in a situation where the user would submit XSS into a required field, and pass the form field validation for it being required, but when it was submitted the field would be scrubbed and effectively submitted as a blank to the API. If your API handles that validation thats fine, or you have model validation other than the sanitizer thats also fine – but it saves the user a round trip of getting their input sanitized and then returned back with some kind of error message. By implementing a sanitizer on the front-end you can give the user instant feedback that HTML basically isn’t allowed. Heres a quick input field scrubber for the front-end:

[sourcecode lang=”javascript”]

var scrubInput = function(input) {

var curValue = $(input).val();

var newValue = curValue

.replace(/>/gi, ‘>’)

.replace(/</gi, ‘<‘)

.replace(/<(.|n)*?>/gim, ”)

if(curValue !== newValue) {

$(input).val(newValue);

}

[/sourcecode]

Coupled with Backbone views, you can do something like:

[sourcecode lang=”javascript”]

events: {

‘keyup input,textarea’ : ‘scrubInputs’,

‘change input,textarea’ : ‘scrubInputs’,

‘blur input,textarea’ : ‘scrubInputs’

scrubInputs: function(event) {

app.utility.scrubInput(event.target);

}

[/sourcecode]

This is what it looks like in action:

input_scrubbing

And of course, tests for the scrubber:

[sourcecode lang=”javascript”]

describe(‘scrubInput’, function() {

it(‘should be defined’, function() {

expect(utility.scrubInput).toBeDefined();

});

it(‘should scrub DOM element value for HTML entities’, function() {

var input = document.createElement(‘INPUT’);

input.type = ‘text’;

var values = [

‘This is OK’,

‘This is OK©’,

‘This is OK"’

];

for(var i = 0; i < values.length; i+=1) {

input.value = values[i];

utility.scrubInput(input);

expect(input.value).toEqual(‘This is OK’);

}

});

[/sourcecode]

Using a method like the above, albeit a little bit scorched earth, should get you pretty far in preventing users from submitting anything that even remotely looks like HTML or XSS.