Skip to content

Commit 0a321af

Browse files
committed
Allow localhost URLs, if they are prefixed by a protocol (ex: 'http://localhost')
1 parent 64da649 commit 0a321af

File tree

2 files changed

+44
-14
lines changed

2 files changed

+44
-14
lines changed

src/Autolinker.js

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -203,18 +203,20 @@ Autolinker.prototype = {
203203
* address. Ex: '[email protected]'
204204
* 5. Group that matches a URL in the input text. Ex: 'http://google.com', 'www.google.com', or just 'google.com'.
205205
* This also includes a path, url parameters, or hash anchors. Ex: google.com/path/to/file?q1=1&q2=2#myAnchor
206-
* 6. A protocol-relative ('//') match for the case of a 'www.' prefixed URL. Will be an empty string if it is not a
206+
* 6. Group that matches a protocol URL (i.e. 'http://google.com'). This is used to match protocol URLs with just a single
207+
* word, like 'http://localhost', where we won't double check that the domain name has at least one '.' in it.
208+
* 7. A protocol-relative ('//') match for the case of a 'www.' prefixed URL. Will be an empty string if it is not a
207209
* protocol-relative match. We need to know the character before the '//' in order to determine if it is a valid match
208210
* or the // was in a string we don't want to auto-link.
209-
* 7. A protocol-relative ('//') match for the case of a known TLD prefixed URL. Will be an empty string if it is not a
211+
* 8. A protocol-relative ('//') match for the case of a known TLD prefixed URL. Will be an empty string if it is not a
210212
* protocol-relative match. See #6 for more info.
211213
*/
212214
matcherRegex : (function() {
213215
var twitterRegex = /(^|[^\w])@(\w{1,15})/, // For matching a twitter handle. Ex: @gregory_jacobs
214216

215217
emailRegex = /(?:[\-;:&=\+\$,\w\.]+@)/, // something@ for email addresses (a.k.a. local-part)
216218

217-
protocolRegex = /(?:[A-Za-z]{3,9}:(?![A-Za-z]{3,9}:\/\/)(?:\/\/)?)/, // match protocol, allow in format http:// or mailto:
219+
protocolRegex = /(?:[A-Za-z]{3,9}:(?![A-Za-z]{3,9}:\/\/)(?:\/\/)?)/, // match protocol, allow in format "http://" or "mailto:". However, do not match the first part of something like 'link:http://www.google.com' (i.e. don't match "link:")
218220
wwwRegex = /(?:www\.)/, // starting with 'www.'
219221
domainNameRegex = /[A-Za-z0-9\.\-]*[A-Za-z0-9\-]/, // anything looking at all like a domain, non-unicode domains, not ending in a period
220222
tldRegex = /\.(?:international|construction|contractors|enterprises|photography|productions|foundation|immobilien|industries|management|properties|technology|christmas|community|directory|education|equipment|institute|marketing|solutions|vacations|bargains|boutique|builders|catering|cleaning|clothing|computer|democrat|diamonds|graphics|holdings|lighting|partners|plumbing|supplies|training|ventures|academy|careers|company|cruises|domains|exposed|flights|florist|gallery|guitars|holiday|kitchen|neustar|okinawa|recipes|rentals|reviews|shiksha|singles|support|systems|agency|berlin|camera|center|coffee|condos|dating|estate|events|expert|futbol|kaufen|luxury|maison|monash|museum|nagoya|photos|repair|report|social|supply|tattoo|tienda|travel|viajes|villas|vision|voting|voyage|actor|build|cards|cheap|codes|dance|email|glass|house|mango|ninja|parts|photo|shoes|solar|today|tokyo|tools|watch|works|aero|arpa|asia|best|bike|blue|buzz|camp|club|cool|coop|farm|fish|gift|guru|info|jobs|kiwi|kred|land|limo|link|menu|mobi|moda|name|pics|pink|post|qpon|rich|ruhr|sexy|tips|vote|voto|wang|wien|wiki|zone|bar|bid|biz|cab|cat|ceo|com|edu|gov|int|kim|mil|net|onl|org|pro|pub|red|tel|uno|wed|xxx|xyz|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cu|cv|cw|cx|cy|cz|de|dj|dk|dm|do|dz|ec|ee|eg|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|za|zm|zw)\b/, // match our known top level domains (TLDs)
@@ -242,23 +244,23 @@ Autolinker.prototype = {
242244

243245
'(', // *** Capturing group $5, which is used to match a URL
244246
'(?:', // parens to cover match for protocol (optional), and domain
245-
'(?:', // non-capturing paren for a protocol-prefixed url (ex: http://google.com)
247+
'(', // *** Capturing group $6, for a protocol-prefixed url (ex: http://google.com)
246248
protocolRegex.source,
247249
domainNameRegex.source,
248250
')',
249251

250252
'|',
251253

252254
'(?:', // non-capturing paren for a 'www.' prefixed url (ex: www.google.com)
253-
'(.?//)?', // *** Capturing group $6 for an optional protocol-relative URL. Must be at the beginning of the string or start with a non-word character
255+
'(.?//)?', // *** Capturing group $7 for an optional protocol-relative URL. Must be at the beginning of the string or start with a non-word character
254256
wwwRegex.source,
255257
domainNameRegex.source,
256258
')',
257259

258260
'|',
259261

260262
'(?:', // non-capturing paren for known a TLD url (ex: google.com)
261-
'(.?//)?', // *** Capturing group $7 for an optional protocol-relative URL. Must be at the beginning of the string or start with a non-word character
263+
'(.?//)?', // *** Capturing group $8 for an optional protocol-relative URL. Must be at the beginning of the string or start with a non-word character
262264
domainNameRegex.source,
263265
tldRegex.source,
264266
')',
@@ -437,7 +439,7 @@ Autolinker.prototype = {
437439
processTextNode : function( text ) {
438440
var me = this; // for closure
439441

440-
return text.replace( this.matcherRegex, function( matchStr, $1, $2, $3, $4, $5, $6, $7 ) {
442+
return text.replace( this.matcherRegex, function( matchStr, $1, $2, $3, $4, $5, $6, $7, $8 ) {
441443
var matchDescObj = me.processCandidateMatch.apply( me, arguments ); // match description object
442444

443445
// Return out with no changes for match types that are disabled (url, email, twitter), or for matches that are
@@ -468,6 +470,8 @@ Autolinker.prototype = {
468470
* @param {String} twitterHandle The actual Twitter user (i.e the word after the @ sign in a Twitter match).
469471
* @param {String} emailAddressMatch The matched email address for an email address match.
470472
* @param {String} urlMatch The matched URL string for a URL match.
473+
* @param {String} protocolUrlMatch The match URL string for a protocol match. Ex: 'http://yahoo.com'. This is used to match
474+
* something like 'http://localhost', where we won't double check that the domain name has at least one '.' in it.
471475
* @param {String} wwwProtocolRelativeMatch The '//' for a protocol-relative match from a 'www' url, with the character that
472476
* comes before the '//'.
473477
* @param {String} tldProtocolRelativeMatch The '//' for a protocol-relative match from a TLD (top level domain) match, with
@@ -487,7 +491,7 @@ Autolinker.prototype = {
487491
*/
488492
processCandidateMatch : function(
489493
matchStr, twitterMatch, twitterHandlePrefixWhitespaceChar, twitterHandle,
490-
emailAddressMatch, urlMatch, wwwProtocolRelativeMatch, tldProtocolRelativeMatch
494+
emailAddressMatch, urlMatch, protocolUrlMatch, wwwProtocolRelativeMatch, tldProtocolRelativeMatch
491495
) {
492496
var protocolRelativeMatch = wwwProtocolRelativeMatch || tldProtocolRelativeMatch,
493497
match, // Will be an Autolinker.match.Match object
@@ -498,7 +502,7 @@ Autolinker.prototype = {
498502

499503
// Return out with `null` for match types that are disabled (url, email, twitter), or for matches that are
500504
// invalid (false positives from the matcherRegex, which can't use look-behinds since they are unavailable in JS).
501-
if( !this.isValidMatch( twitterMatch, emailAddressMatch, urlMatch, protocolRelativeMatch ) ) {
505+
if( !this.isValidMatch( twitterMatch, emailAddressMatch, urlMatch, protocolUrlMatch, protocolRelativeMatch ) ) {
502506
return null;
503507
}
504508

@@ -570,16 +574,18 @@ Autolinker.prototype = {
570574
* @param {String} emailAddressMatch The matched Email address, if there was one. Will be empty string if the match is not
571575
* an Email address match.
572576
* @param {String} urlMatch The matched URL, if there was one. Will be an empty string if the match is not a URL match.
577+
* @param {String} protocolUrlMatch The match URL string for a protocol match. Ex: 'http://yahoo.com'. This is used to match
578+
* something like 'http://localhost', where we won't double check that the domain name has at least one '.' in it.
573579
* @param {String} protocolRelativeMatch The protocol-relative string for a URL match (i.e. '//'), possibly with a preceding
574580
* character (ex, a space, such as: ' //', or a letter, such as: 'a//'). The match is invalid if there is a word character
575581
* preceding the '//'.
576582
* @return {Boolean} `true` if the match given is valid and should be processed, or `false` if the match is invalid and/or
577583
* should just not be processed (such as, if it's a Twitter match, but {@link #twitter} matching is disabled}.
578584
*/
579-
isValidMatch : function( twitterMatch, emailAddressMatch, urlMatch, protocolRelativeMatch ) {
580-
if(
585+
isValidMatch : function( twitterMatch, emailAddressMatch, urlMatch, protocolUrlMatch, protocolRelativeMatch ) {
586+
if(
581587
( twitterMatch && !this.twitter ) || ( emailAddressMatch && !this.email ) || ( urlMatch && !this.urls ) ||
582-
( urlMatch && urlMatch.indexOf( '.' ) === -1 ) || // At least one period ('.') must exist in the URL match for us to consider it an actual URL
588+
( urlMatch && ( !protocolUrlMatch || !(/:\/\//).test( protocolUrlMatch ) ) && urlMatch.indexOf( '.' ) === -1 ) || // At least one period ('.') must exist in the URL match for us to consider it an actual URL, *unless* it was a full protocol match (like 'http://localhost')
583589
( urlMatch && /^[A-Za-z]{3,9}:/.test( urlMatch ) && !/:.*?[A-Za-z]/.test( urlMatch ) ) || // At least one letter character must exist in the domain name after a protocol match. Ex: skip over something like "git:1.0"
584590
( protocolRelativeMatch && this.invalidProtocolRelMatchRegex.test( protocolRelativeMatch ) ) // a protocol-relative match which has a word character in front of it (so we can skip something like "abc//google.com")
585591
) {

tests/AutolinkerSpec.js

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,12 @@ describe( "Autolinker", function() {
2929
var result = autolinker.link( "Joe went to http://yahoo.com" );
3030
expect( result ).toBe( 'Joe went to <a href="http://yahoo.com">yahoo.com</a>' );
3131
} );
32+
33+
34+
it( "should automatically link localhost URLs when there is a protocol", function() {
35+
var result = autolinker.link( "Joe went to http://localhost today" );
36+
expect( result ).toBe( 'Joe went to <a href="http://localhost">localhost</a> today' );
37+
} );
3238

3339

3440
it( "should automatically link URLs in the form of http://www.yahoo.com (i.e. protocol and 'www' prefix)", function() {
@@ -85,18 +91,36 @@ describe( "Autolinker", function() {
8591
} );
8692

8793

94+
it( "should automatically link a localhost URL with a port number and a path", function() {
95+
var result = autolinker.link( "Joe went to http://localhost:8000/page today." );
96+
expect( result ).toBe( 'Joe went to <a href="http://localhost:8000/page">localhost:8000/page</a> today.' );
97+
} );
98+
99+
88100
it( "should automatically link URLs with a port number and a query string", function() {
89101
var result = autolinker.link( "Joe went to http://yahoo.com:8000?page=index today." );
90102
expect( result ).toBe( 'Joe went to <a href="http://yahoo.com:8000?page=index">yahoo.com:8000?page=index</a> today.' );
91103
} );
92104

93105

106+
it( "should automatically link a localhost URL with a port number and a query string", function() {
107+
var result = autolinker.link( "Joe went to http://localhost:8000?page=index today." );
108+
expect( result ).toBe( 'Joe went to <a href="http://localhost:8000?page=index">localhost:8000?page=index</a> today.' );
109+
} );
110+
111+
94112
it( "should automatically link URLs with a port number and a hash string", function() {
95113
var result = autolinker.link( "Joe went to http://yahoo.com:8000#page=index today." );
96114
expect( result ).toBe( 'Joe went to <a href="http://yahoo.com:8000#page=index">yahoo.com:8000#page=index</a> today.' );
97115
} );
98116

99117

118+
it( "should automatically link a localhost URL with a port number and a hash string", function() {
119+
var result = autolinker.link( "Joe went to http://localhost:8000#page=index today." );
120+
expect( result ).toBe( 'Joe went to <a href="http://localhost:8000#page=index">localhost:8000#page=index</a> today.' );
121+
} );
122+
123+
100124
it( "should NOT automatically link strings of the form 'git:d' (using the heuristic that the domain name does not have a '.' in it)", function() {
101125
var result = autolinker.link( 'Something like git:d should not be linked as a URL' );
102126
expect( result ).toBe( 'Something like git:d should not be linked as a URL' );
@@ -141,10 +165,10 @@ describe( "Autolinker", function() {
141165
}
142166
} );
143167

144-
it( "should NOT include preceding : introductions without a space", function() {
168+
it( "should NOT include preceding ':' introductions without a space", function() {
145169
var result = autolinker.link( 'the link:http://example.com/' );
146170
expect( result ).toBe( 'the link:<a href="http://example.com/">example.com</a>' );
147-
});
171+
} );
148172

149173
} );
150174

0 commit comments

Comments
 (0)