From 42af23120d2ed62c46a4d54236428a98b99e7cd1 Mon Sep 17 00:00:00 2001 From: Tom Spencer Date: Fri, 15 Jul 2016 14:28:11 +0100 Subject: [PATCH 1/3] Refactored for v1.0. - `parse_reply` function is now called `parse`. - Module is exported directly. - String.prototype no longer polluted. - Define dev dependencies in package.json. - Code updated according to jshint styleguide. --- .travis.yml | 10 +++ README.md | 41 ++++++++- lib/emailreplyparser.js | 146 ++++++++++++++------------------ package.json | 20 ++++- test/email_reply_parser_test.js | 115 ++++++++++++------------- 5 files changed, 187 insertions(+), 145 deletions(-) create mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..fca1e50 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,10 @@ +language: node_js +node_js: + - '0.4' + - '0.6' + - '0.8' + - '0.10' + - '0.12' + - '4' + - '5' + - '6' diff --git a/README.md b/README.md index 8e50175..833a281 100755 --- a/README.md +++ b/README.md @@ -2,6 +2,23 @@ Node.js port of Github's EmailReplyParser, a small library to parse plain text email content. +## Usage + +``` js +var EmailReplyParser = require('emailreplyparser') + +// To parse the reply from an email body +var parsed = EmailReplyParser.parse(emailBody) + +// To parse the reply from an email body, preserving signatures +var parsed = EmailReplyParser.parse(emailBody, true) + +// Reads in an email and produces an array of fragments. +// Each fragment represents a part of the email. +var fragments = EmailReplyParser.read(emailBody) +``` + +For examples, refer to the tests. ## Known Issues (Taken from Github's version) @@ -65,6 +82,24 @@ Apparently, prefixing lines with `>` isn't universal either: To: Rick -### To run the tests -* Install nodeunit `npm install nodeunit` -* Run the tests: `nodeunit test/email_reply_parser_test.js` \ No newline at end of file +## To run the tests + +* Install dependencies `npm install` +* Run the tests: `npm test` + +## Upgrading to v1.0 + +- The `EmailReplyParser` is now exported directly. If upgrading from pre 1.0, change the following: + +``` js +var EmailReplyParser = require('emailreplyparser').EmailReplyParser +``` + +to: + +``` js +var EmailReplyParser = require('emailreplyparser') +``` + +- The `parse_reply` function is now called `parse`. +- The module no longer adds any methods to the `String` prototype. If your code was relying on the `trim`, `ltrim`, `strim`, `gsub`, `reverse` or `chomp` methods to be available on the prototype, you'll need to make changes. diff --git a/lib/emailreplyparser.js b/lib/emailreplyparser.js index 445a831..fc98da2 100644 --- a/lib/emailreplyparser.js +++ b/lib/emailreplyparser.js @@ -25,77 +25,36 @@ // // EmailReplyParser also attempts to figure out which of these blocks should // be hidden from users. -var EmailReplyParser = { - VERSION: "0.4", - // Public: Splits an email body into a list of Fragments. - // - // text - A String email body. - // - // Returns an Email instance. - read: function(text) { - var email = new Email(); - return email.read(text); - }, +'use strict'; - // Public: Get the text of the visible portions of the given email body. - // - // text - A String email body. - // [optional, default: false] include_signatures - Whether or not to include signatures in reply - // - // Returns a String. - parse_reply: function (text, include_signatures) { - if(typeof(include_signatures)==='undefined') include_signatures = false; - return this.read(text).visible_text(include_signatures); - } -}; +/* jshint eqnull: true */ -String.prototype.trim = function() { - return this.replace(/^\s*|\s*$/g, ""); -} +// String manipulation utilities +var trim = function(str) { + return str.replace(/^\s*|\s*$/g, ''); +}; -String.prototype.ltrim = function() { - return this.replace(/^\s*/g, ""); -} +var ltrim = function(str) { + return str.replace(/^\s*/g, ''); +}; -String.prototype.rtrim = function() { - return this.replace(/\s*$/g, ""); -} +var rtrim = function(str) { + return str.replace(/\s*$/g, ''); +}; -String.prototype.reverse = function() { - var s = ""; - var i = this.length; - while (i>0) { - s += this.substring(i-1,i); +var reverse = function(str) { + var s = ''; + var i = str.length; + while (i > 0) { + s += str.substring(i-1, i); i--; } return s; -} - -//http://flochip.com/2011/09/06/rubys-string-gsub-in-javascript/ -String.prototype.gsub = function(source, pattern, replacement) { - var match, result; - if (!((pattern != null) && (replacement != null))) { - return source; - } - result = ''; - while (source.length > 0) { - if ((match = source.match(pattern))) { - result += source.slice(0, match.index); - result += replacement; - source = source.slice(match.index + match[0].length); - } - else { - result += source; - source = ''; - } - } - return result; }; -//http://3dmdesign.com/development/extending-javascript-strings-with-chomp-using-prototypes -String.prototype.chomp = function() { - return this.replace(/(\n|\r)+$/, ''); +var chomp = function(str) { + return str.replace(/(\n|\r)+$/, ''); }; // An Email instance represents a parsed body String. @@ -117,8 +76,6 @@ Email.prototype = { // // Returns a String. visible_text: function(include_signatures) { - if(typeof(include_signatures)==='undefined') include_signatures = false; - var visible_text = []; for (var key in this.fragments) { if (!this.fragments[key].hidden || (include_signatures && this.fragments[key].signature)) { @@ -126,7 +83,7 @@ Email.prototype = { } } - return visible_text.join("\n").rtrim(); + return rtrim(visible_text.join('\n')); }, // Splits the given text into a list of Fragments. This is roughly done by @@ -153,11 +110,11 @@ Email.prototype = { // The text is reversed initially due to the way we check for hidden // fragments. - text = text.reverse(); + text = reverse(text); // This determines if any 'visible' Fragment has been found. Once any // visible Fragment is found, stop looking for hidden ones. - this.found_visible = false + this.found_visible = false; // This instance variable points to the current Fragment. If the matched // line fits, it should be added to this Fragment. Otherwise, finish it @@ -191,16 +148,16 @@ Email.prototype = { // // Returns nothing. scan_line: function(line) { - var SIG_REGEX = '(--|__|\\w-$)|(^(\\w+\\s*){1,3} ' + ("Sent from my").reverse() + '$)'; + var SIG_REGEX = '(--|__|\\w-$)|(^(\\w+\\s*){1,3} ' + reverse('Sent from my') + '$)'; - line = line.chomp('\n'); + line = chomp(line); if (!(new RegExp(SIG_REGEX)).test(line)) { - line = line.ltrim(); + line = ltrim(line); } // Mark the current Fragment as a signature if the current line is '' // and the Fragment starts with a common signature indicator. - if (this.fragment != null && line == '') { + if (this.fragment != null && line === '') { if ((new RegExp(SIG_REGEX)).test(this.fragment.lines[this.fragment.lines.length - 1])) { this.fragment.signature = true; this.finish_fragment(); @@ -214,7 +171,7 @@ Email.prototype = { // If the line matches the current fragment, add it. Note that a common // reply header also counts as part of the quoted Fragment, even though // it doesn't start with `>`. - if (this.fragment != null && ((this.fragment.quoted == is_quoted) || (this.fragment.quoted && (this.quote_header(line) || line == '')))) { + if (this.fragment != null && ((this.fragment.quoted === is_quoted) || (this.fragment.quoted && (this.quote_header(line) || line === '')))) { this.fragment.lines.push(line); } // Otherwise, finish the fragment and start a new one. @@ -257,30 +214,32 @@ Email.prototype = { // Player 2 (signature, hidden) // finish_fragment: function() { - if (this.fragment != null) { + if(this.fragment != null) { this.fragment.finish(); if (!this.found_visible) { - if (this.fragment.quoted || this.fragment.signature || this.fragment.to_s().trim() == '') + if (this.fragment.quoted || this.fragment.signature || trim(this.fragment.to_s()) === '') { this.fragment.hidden = true; - else + } + else { this.found_visible = true; + } } this.fragments.push(this.fragment); this.fragment = null; } } -} +}; // Fragments // Represents a group of paragraphs in the email sharing common attributes. // Paragraphs should get their own fragment if they are a quoted area or a // signature. -var Fragment = function(quoted, first_line) { - this.initialize(quoted, first_line) -}; +function Fragment(quoted, first_line) { + this.initialize(quoted, first_line); +} Fragment.prototype = { // This is an Array of String lines of content. Since the content is @@ -297,16 +256,15 @@ Fragment.prototype = { this.quoted = quoted; this.lines = [first_line]; this.content = null; - this.lines = this.lines.filter(function(){return true}); + this.lines = this.lines.filter(function() { return true; }); }, // Builds the string content by joining the lines and reversing them. // // Returns nothing. finish: function() { - this.content = this.lines.join("\n"); + this.content = reverse(this.lines.join('\n')); this.lines = []; - this.content = this.content.reverse(); }, to_s: function() { @@ -314,6 +272,30 @@ Fragment.prototype = { } }; -module.exports.EmailReplyParser = EmailReplyParser; +var EmailReplyParser = { + VERSION: '1.0', + + // Public: Splits an email body into a list of Fragments. + // + // text - A String email body. + // + // Returns an Email instance. + read: function(text) { + var email = new Email(); + return email.read(text); + }, + + // Public: Get the text of the visible portions of the given email body. + // + // text - A String email body. + // [optional, default: undefined] include_signatures - Whether or not to include signatures in reply + // + // Returns a String. + parse: function (text, include_signatures) { + return this.read(text).visible_text(include_signatures); + } +}; + +module.exports = EmailReplyParser; -//console.log(EmailReplyParser.read("I get proper rendering as well.\n\nSent from a magnificent torch of pixels\n\nOn Dec 16, 2011, at 12:47 PM, Corey Donohoe\n\nwrote:\n\n> Was this caching related or fixed already? I get proper rendering here.\n>\n> ![](https://img.skitch.com/20111216-m9munqjsy112yqap5cjee5wr6c.jpg)\n>\n> ---\n> Reply to this email directly or view it on GitHub:\n> https://github.com/github/github/issues/2278#issuecomment-3182418\n")); \ No newline at end of file +//console.log(EmailReplyParser.read("I get proper rendering as well.\n\nSent from a magnificent torch of pixels\n\nOn Dec 16, 2011, at 12:47 PM, Corey Donohoe\n\nwrote:\n\n> Was this caching related or fixed already? I get proper rendering here.\n>\n> ![](https://img.skitch.com/20111216-m9munqjsy112yqap5cjee5wr6c.jpg)\n>\n> ---\n> Reply to this email directly or view it on GitHub:\n> https://github.com/github/github/issues/2278#issuecomment-3182418\n")); diff --git a/package.json b/package.json index 4c7c9b0..9983d71 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "emailreplyparser", - "version": "0.0.5", + "version": "1.0.0", "description": "Node.js Port of GitHub's email_reply_parser.rb", "author": "Michael Owens (http://mowens.com/)", "repository": { @@ -8,6 +8,20 @@ "url": "https://github.com/mowens/emailreplyparser.git" }, "main": "./lib/emailreplyparser", - "engines": { "node": ">= 0.4.0" }, - "keywords": ["email", "parser", "emailreplyparser", "email_reply_parser"] + "engines": { + "node": ">= 0.4.0" + }, + "scripts": { + "test": "nodeunit test/email_reply_parser_test.js" + }, + "keywords": [ + "email", + "parser", + "emailreplyparser", + "email_reply_parser" + ], + "devDependencies": { + "lodash": "^4.13.1", + "nodeunit": "^0.9.1" + } } diff --git a/test/email_reply_parser_test.js b/test/email_reply_parser_test.js index 3165f3b..1c43231 100755 --- a/test/email_reply_parser_test.js +++ b/test/email_reply_parser_test.js @@ -1,12 +1,12 @@ -var fs = require('fs'); +'use strict'; -var _ = require('underscore'); +var fs = require('fs'); +var _ = require('lodash'); -var EmailReplyParser = require('../lib/emailreplyparser').EmailReplyParser; +var EmailReplyParser = require('../lib/emailreplyparser'); function get_email(name) { var data = fs.readFileSync(__dirname + '/emails/' + name + '.txt', 'ascii'); - return EmailReplyParser.read(data); } @@ -15,21 +15,21 @@ function get_raw_email(name) { } exports.test_reads_simple_body = function(test){ - reply = get_email('email_1_1'); + var reply = get_email('email_1_1'); test.equal(3, reply.fragments.length); test.deepEqual([false, false, false], _.map(reply.fragments, function(f) { return f.quoted; })); test.deepEqual([false, true, true], _.map(reply.fragments, function(f) { return f.signature; })); test.deepEqual([false, true, true], _.map(reply.fragments, function(f) { return f.hidden; })); - test.equal("Hi folks\n\nWhat is the best way to clear a Riak bucket of all key, values after\nrunning a test?\nI am currently using the Java HTTP API.\n", reply.fragments[0].to_s()); + test.equal('Hi folks\n\nWhat is the best way to clear a Riak bucket of all key, values after\nrunning a test?\nI am currently using the Java HTTP API.\n', reply.fragments[0].to_s()); - test.equal("-Abhishek Kona\n\n", reply.fragments[1].to_s()); + test.equal('-Abhishek Kona\n\n', reply.fragments[1].to_s()); test.done(); -} +}; exports.test_reads_top_post = function(test){ - reply = get_email('email_1_3'); + var reply = get_email('email_1_3'); test.equal(5, reply.fragments.length); test.deepEqual([false, false, true, false, false], _.map(reply.fragments, function(f) { return f.quoted; })); @@ -41,27 +41,27 @@ exports.test_reads_top_post = function(test){ test.ok((/^On [^\:]+\:/m).test(reply.fragments[2].to_s())); test.ok((new RegExp('^_')).test(reply.fragments[4].to_s())); test.done(); -} +}; exports.test_reads_bottom_post = function(test){ - reply = get_email('email_1_2'); + var reply = get_email('email_1_2'); test.equal(6, reply.fragments.length); test.deepEqual([false, true, false, true, false, false], _.map(reply.fragments, function(f) { return f.quoted; })); test.deepEqual([false, false, false, false, false, true], _.map(reply.fragments, function(f) { return f.signature; })); test.deepEqual([false, false, false, true, true, true], _.map(reply.fragments, function(f) { return f.hidden; })); - test.equal("Hi,", reply.fragments[0].to_s()); + test.equal('Hi,', reply.fragments[0].to_s()); test.ok((new RegExp('^On [^\:]+\:')).test(reply.fragments[1].to_s())); test.ok((/^You can list/m).test(reply.fragments[2].to_s())); test.ok((/^> /m).test(reply.fragments[3].to_s())); test.ok((new RegExp('^_')).test(reply.fragments[5].to_s())); test.done(); -} +}; exports.test_reads_inline_replies = function(test){ - reply = get_email('email_1_8'); + var reply = get_email('email_1_8'); test.equal(7, reply.fragments.length); test.deepEqual([true, false, true, false, true, false, false], _.map(reply.fragments, function(f) { return f.quoted; })); @@ -76,26 +76,26 @@ exports.test_reads_inline_replies = function(test){ test.equal('', reply.fragments[5].to_s().trim()); test.ok((new RegExp('^-')).test(reply.fragments[6].to_s())); test.done(); -} +}; exports.test_recognizes_date_string_above_quote = function(test){ - reply = get_email('email_1_4'); + var reply = get_email('email_1_4'); test.ok((/^Awesome/).test(reply.fragments[0].to_s())); test.ok((/^On/m).test(reply.fragments[1].to_s())); test.ok((/Loader/m).test(reply.fragments[1].to_s())); test.done(); -} +}; exports.test_a_complex_body_with_only_one_fragment = function(test){ - reply = get_email('email_1_5'); + var reply = get_email('email_1_5'); test.equal(1, reply.fragments.length); test.done(); -} +}; exports.test_reads_email_with_correct_signature = function(test){ - reply = get_email('correct_sig'); + var reply = get_email('correct_sig'); test.equal(2, reply.fragments.length); @@ -105,81 +105,82 @@ exports.test_reads_email_with_correct_signature = function(test){ test.ok((new RegExp('^-- \nrick')).test(reply.fragments[1].to_s())); test.done(); -} +}; exports.test_deals_with_multiline_reply_headers = function(test){ - reply = get_email('email_1_6'); + var reply = get_email('email_1_6'); test.ok((new RegExp('^I get')).test(reply.fragments[0].to_s())); test.ok((/^On/m).test(reply.fragments[1].to_s())); test.ok((new RegExp('Was this')).test(reply.fragments[1].to_s())); test.done(); -} +}; exports.test_does_not_modify_input_string = function(test){ - original = "The Quick Brown Fox Jumps Over The Lazy Dog"; + var original = 'The Quick Brown Fox Jumps Over The Lazy Dog'; EmailReplyParser.read(original); - test.equal("The Quick Brown Fox Jumps Over The Lazy Dog", original); + test.equal('The Quick Brown Fox Jumps Over The Lazy Dog', original); test.done(); -} +}; exports.test_returns_only_the_visible_fragments_as_a_string = function(test){ - reply = get_email('email_2_1'); + var reply = get_email('email_2_1'); String.prototype.rtrim = function() { - return this.replace(/\s*$/g, ""); - } + return this.replace(/\s*$/g, ''); + }; - var fragments = _.select(reply.fragments, function(f) { return !f.hidden; }); - var fragments = _.map(fragments, function(f) { return f.to_s(); }); - test.equal(fragments.join("\n").rtrim(), reply.visible_text()); + var fragments = _(reply.fragments) + .filter(function(f) { return !f.hidden; }) + .map(function(f) { return f.to_s(); }); + test.equal(fragments.join('\n').rtrim(), reply.visible_text()); test.done(); -} +}; exports.test_parse_out_just_top_for_outlook_reply = function(test){ - body = get_raw_email('email_2_1'); - test.equal("Outlook with a reply", EmailReplyParser.parse_reply(body)); + var body = get_raw_email('email_2_1'); + test.equal('Outlook with a reply', EmailReplyParser.parse(body)); test.done(); -} +}; exports.test_parse_out_sent_from_iPhone = function(test){ - body = get_raw_email('email_iPhone'); - test.equal("Here is another email", EmailReplyParser.parse_reply(body)); + var body = get_raw_email('email_iPhone'); + test.equal('Here is another email', EmailReplyParser.parse(body)); test.done(); -} +}; exports.test_parse_out_sent_from_BlackBerry = function(test){ - body = get_raw_email('email_BlackBerry'); - test.equal("Here is another email", EmailReplyParser.parse_reply(body)); + var body = get_raw_email('email_BlackBerry'); + test.equal('Here is another email', EmailReplyParser.parse(body)); test.done(); -} +}; exports.test_parse_out_send_from_multiword_mobile_device = function(test){ - body = get_raw_email('email_multi_word_sent_from_my_mobile_device'); - test.equal("Here is another email", EmailReplyParser.parse_reply(body)); + var body = get_raw_email('email_multi_word_sent_from_my_mobile_device'); + test.equal('Here is another email', EmailReplyParser.parse(body)); test.done(); -} +}; exports.test_do_not_parse_out_send_from_in_regular_sentence = function(test){ - body = get_raw_email('email_sent_from_my_not_signature'); - test.equal("Here is another email\n\nSent from my desk, is much easier then my mobile phone.", EmailReplyParser.parse_reply(body)); + var body = get_raw_email('email_sent_from_my_not_signature'); + test.equal('Here is another email\n\nSent from my desk, is much easier then my mobile phone.', EmailReplyParser.parse(body)); test.done(); -} +}; exports.test_retains_bullets = function(test){ - body = get_raw_email('email_bullets'); - test.equal("test 2 this should list second\n\nand have spaces\n\nand retain this formatting\n\n\n - how about bullets\n - and another", EmailReplyParser.parse_reply(body)); + var body = get_raw_email('email_bullets'); + test.equal('test 2 this should list second\n\nand have spaces\n\nand retain this formatting\n\n\n - how about bullets\n - and another', EmailReplyParser.parse(body)); test.done(); -} +}; -exports.test_parse_reply = function(test){ - body = get_raw_email('email_1_2'); - test.equal(EmailReplyParser.read(body).visible_text(), EmailReplyParser.parse_reply(body)); +exports.test_parse = function(test){ + var body = get_raw_email('email_1_2'); + test.equal(EmailReplyParser.read(body).visible_text(), EmailReplyParser.parse(body)); test.done(); -} +}; exports.test_correctly_reads_top_post_when_line_starts_with_On = function(test){ - reply = get_email('email_1_7'); + var reply = get_email('email_1_7'); test.equal(5, reply.fragments.length); test.deepEqual([false, false, true, false, false], _.map(reply.fragments, function(f) { return f.quoted; })); @@ -191,4 +192,4 @@ exports.test_correctly_reads_top_post_when_line_starts_with_On = function(test){ test.ok((/^On [^\:]+\:/m).test(reply.fragments[2].to_s())); test.ok((new RegExp('^_')).test(reply.fragments[4].to_s())); test.done(); -} +}; From efc1c89936f63f9d30133e0922c5a6d46c77bdf4 Mon Sep 17 00:00:00 2001 From: Tom Spencer Date: Fri, 15 Jul 2016 14:45:31 +0100 Subject: [PATCH 2/3] Normalise line endings --- lib/emailreplyparser.js | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/emailreplyparser.js b/lib/emailreplyparser.js index fc98da2..2a17eb2 100644 --- a/lib/emailreplyparser.js +++ b/lib/emailreplyparser.js @@ -97,6 +97,9 @@ Email.prototype = { // in 1.9 we want to operate on the raw bytes // text = text.dup.force_encoding('binary') if text.respond_to?(:force_encoding) + // Normalize line endings. + text = text.replace('\r\n', '\n'); + // Check for multi-line reply headers. Some clients break up // the "On DATE, NAME wrote:" line into multiple lines. var patt = /^(On\s(\n|.)*wrote:)$/m; From 08348750e8788bf1d45c8996e6fad5de656821d0 Mon Sep 17 00:00:00 2001 From: Tom Spencer Date: Fri, 15 Jul 2016 15:06:16 +0100 Subject: [PATCH 3/3] Fix travis version range --- .travis.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index fca1e50..22e38a0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,8 +1,5 @@ language: node_js node_js: - - '0.4' - - '0.6' - - '0.8' - '0.10' - '0.12' - '4'