Sanitizing XSS and HTML with Express middleware

Lets take a look at a few different options for handling sanitizing user input fields in both node.js as well as the browser.

The easiest solution was to start with the sanitize-html npm module and use that, but I wanted it to be middleware using Express. Using middleware, I was able to sanitize every single value that was submitted to the server. In this particular use, node was acting as a middle layer between the browser and a REST API so this use case was perfect.

Here is the code for the actual module:

/* jshint node:true, unused: vars, camelcase: false */
'use strict';
var sanitizer = require('sanitize-html'),
    _ = require('underscore');

module.exports = function(config, errors) {
    return function(req, res, next) {
        if (req.body) {
            _.each(req.body, function(value, key) {
                if(!parseInt(value,10) && value !== null) {
                    if(typeof value === 'string') {
                        value = value.replace(/>/gi, '>');
                        value = value.replace(/&lt;/gi, '<');
                        value = value.replace(/(&copy;|&quot;|&amp;)/gi, '');
                    }
                    req.body[key] = sanitizer(value, {
                        allowedTags: []
                    });
                }
            });
        }
        return next();
    };
};

A few odd things I noticed when using the sanitize-html module:

  • It seemed to change nulls in a way that I couldn’t quite figure out. I just know my API started crying once I implemented it with null values.
  • Number values seemed to be converted to strings, which again my API was very unhappy about.
  • Id manually convert HTML entities for into the actual characters before sanitizing so that they’d be caught by the sanitizer.
  • In my particular use, I’m being extremely strict in that Im not allowing any HTML tags at all. The allowedTags array is empty, but we could have added a few there to allow certain things like bold or italics etc.

Here’s the tests for the module so you can get a glimpse of what kind of XSS it will scrub out:

/* global describe, expect, it, sinon, beforeEach */
/* jshint node: true, expr: true, camelcase: false, unused: vars */

var middleware;

describe('Middleware.sanitizer', function() {
    'use strict';

    var req, next;
    beforeEach(function() {
        middleware = require('../../middleware/sanitizer');

        req = {
            body: {
                test1: 'This is clean',
                test2: 'This <script>var isNot=true;</script> isnt!',
                test3: 'Im <b>technically</b> allowed.',
                test4: null,
                test5: 1,
                test6: 'This &lt;shouldnt&gt; work'
            }
        };
        next = sinon.spy();
    });

    it('should be defined', function() {
        expect(middleware).to.exist;
    });

    it('should be a function', function() {
        expect(middleware).to.be.a('function');
    });

    it('should sanitize XSS from body', function() {
        middleware()(req, {}, next);
        expect(req.body.test1).to.equal('This is clean');
        expect(req.body.test2).to.equal('This  isnt!');
        expect(req.body.test3).to.equal('Im technically allowed.');
        expect(req.body.test4).to.equal(null);
        expect(req.body.test5).to.equal(1);
        expect(req.body.test6).to.equal('This  work');
    });

    it('should call next callback', function() {
        middleware()(req, {}, next);
        expect(next).calledOnce;
    });
});

Finally, to implement it into our Express app we just added it to the configuration section:

// … existing code …
app.use(middleware.sanitizer());
app.use(app.router);
app.use(middleware.errorHandler({
    dumpExceptions: true,
    showStack: true
}));

An interesting note is that we ensure that we are using more or less the same sanitizing logic on the front-end as well. If we weren’t, we could get stuck in a situation where the user would submit XSS into a required field, and pass the form field validation for it being required, but when it was submitted the field would be scrubbed and effectively submitted as a blank to the API. If your API handles that validation thats fine, or you have model validation other than the sanitizer thats also fine – but it saves the user a round trip of getting their input sanitized and then returned back with some kind of error message. By implementing a sanitizer on the front-end you can give the user instant feedback that HTML basically isn’t allowed. Heres a quick input field scrubber for the front-end:

var scrubInput = function(input) {
    var curValue = $(input).val();
    var newValue = curValue
        .replace(/&gt;/gi, '>')
        .replace(/&lt;/gi, '<')
        .replace(/<(.|n)*?>/gim, '')
        .replace(/(&gt;|&lt;|&copy;|&quot;|&amp;)/gi, '');

    if(curValue !== newValue) {
        $(input).val(newValue);
    }
}

Coupled with Backbone views, you can do something like:

events: {
    'keyup input,textarea' : 'scrubInputs',
    'change input,textarea' : 'scrubInputs',
    'blur input,textarea' : 'scrubInputs'
},

scrubInputs: function(event) {
    app.utility.scrubInput(event.target);
}

This is what it looks like in action:

input_scrubbing

And of course, tests for the scrubber:

describe('scrubInput', function() {
    it('should be defined', function() {
        expect(utility.scrubInput).toBeDefined();
    });
    it('should scrub DOM element value for HTML entities', function() {
        var input = document.createElement('INPUT');
        input.type = 'text';

        var values = [
            'This is OK',
            'This is <not>OK</not>',
            'This is &lt;not&gt;OK</not>',
            'This is <not>OK',
            'This is OK&copy;',
            'This is <note>OK&quot;'
        ];
        for(var i = 0; i < values.length; i+=1) {
            input.value = values[i];
            utility.scrubInput(input);
            expect(input.value).toEqual('This is OK');
        }
    });
});

Using a method like the above, albeit a little bit scorched earth, should get you pretty far in preventing users from submitting anything that even remotely looks like HTML or XSS.

One thought on “Sanitizing XSS and HTML with Express middleware

  1. Great article. First of all you’re right about the null. For some reason sanitize(null) !== null. We decided to only sanitize strings (and recursively sanitize objects) in case it somehow affected the other primitive types too.

    Secondly, this may not be necessary in your app, but we often have nested objects coming through req.body. Therefore we had to recursively apply the sanitization to deeper levels.

    Thanks overall for this post. I haven’t found a whole lot of other information out there about sanitizing inputs.

    Like

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s