@@ -40,9 +40,9 @@ import type { StripPrefixConfigObj } from '../autolinker';
4040
4141/**
4242 * Context object containing all the state needed by the state machine functions.
43- *
43+ *
4444 * ## Historical note
45- *
45+ *
4646 * In v4.1.1, we used nested functions to handle the context via closures, but
4747 * this necessitated re-creating the functions for each call to `parseMatches()`,
4848 * which made them difficult for v8 to JIT optimize. In v4.1.2, we lifted all of
@@ -273,7 +273,7 @@ export function parseMatches(text: string, args: ParseMatchesArgs): Match[] {
273273 // String(charIdx),
274274 // char,
275275 // `10: ${char.charCodeAt(0)}\n0x: ${char.charCodeAt(0).toString(16)}\nU+${char.codePointAt(0)}`,
276- // stateMachines.map(machine => `${machine.type}${'matchType' in machine ? ` (${machine.matchType})` : ''}`).join('\n') || '(none)',
276+ // stateMachines.map(machine => `${StateMachineType[ machine.type] }${'matchType' in machine ? ` (${UrlStateMachineMatchType[ machine.matchType] })` : ''}`).join('\n') || '(none)',
277277 // stateMachines.map(machine => State[machine.state]).join('\n') || '(none)',
278278 // String(charIdx),
279279 // stateMachines.map(m => m.startIdx).join('\n'),
@@ -1102,7 +1102,7 @@ function captureMatchIfValidAndRemove(context: ParseMatchesContext, stateMachine
11021102 matchedText = excludeUnbalancedTrailingBracesAndPunctuation ( matchedText ) ;
11031103
11041104 switch ( stateMachine . type ) {
1105- case 'url' : {
1105+ case StateMachineType . Url : {
11061106 // We don't want to accidentally match a URL that is preceded by an
11071107 // '@' character, which would be an email address
11081108 const charBeforeUrlMatch = text . charAt ( stateMachine . startIdx - 1 ) ;
@@ -1116,9 +1116,8 @@ function captureMatchIfValidAndRemove(context: ParseMatchesContext, stateMachine
11161116 // that begin with 'www.' so that users may turn off 'www'
11171117 // matches. As such, we need to correct for that now if the
11181118 // URL begins with 'www.'
1119- const urlMatchType : UrlMatchType = stateMachine . matchType ;
1120- switch ( urlMatchType ) {
1121- case 'scheme' : {
1119+ switch ( stateMachine . matchType ) {
1120+ case UrlStateMachineMatchType . Scheme : {
11221121 // Autolinker accepts many characters in a url's scheme (like `fake://test.com`).
11231122 // However, in cases where a URL is missing whitespace before an obvious link,
11241123 // (for example: `nowhitespacehttp://www.test.com`), we only want the match to start
@@ -1139,14 +1138,14 @@ function captureMatchIfValidAndRemove(context: ParseMatchesContext, stateMachine
11391138 break ;
11401139 }
11411140
1142- case 'tld' : {
1141+ case UrlStateMachineMatchType . Tld : {
11431142 if ( ! isValidTldMatch ( matchedText ) ) {
11441143 return ; // not a valid match
11451144 }
11461145 break ;
11471146 }
11481147
1149- case 'ipV4' : {
1148+ case UrlStateMachineMatchType . IpV4 : {
11501149 if ( ! isValidIpV4Address ( matchedText ) ) {
11511150 return ; // not a valid match
11521151 }
@@ -1155,15 +1154,15 @@ function captureMatchIfValidAndRemove(context: ParseMatchesContext, stateMachine
11551154
11561155 /* istanbul ignore next */
11571156 default :
1158- assertNever ( urlMatchType ) ;
1157+ assertNever ( stateMachine ) ;
11591158 }
11601159
11611160 matches . push (
11621161 new UrlMatch ( {
11631162 tagBuilder : tagBuilder ,
11641163 matchedText : matchedText ,
11651164 offset : startIdx ,
1166- urlMatchType : urlMatchType ,
1165+ urlMatchType : toUrlMatchType ( stateMachine . matchType ) ,
11671166 url : matchedText ,
11681167 protocolRelativeMatch : matchedText . slice ( 0 , 2 ) === '//' ,
11691168
@@ -1177,7 +1176,7 @@ function captureMatchIfValidAndRemove(context: ParseMatchesContext, stateMachine
11771176 break ;
11781177 }
11791178
1180- case 'email' : {
1179+ case StateMachineType . Email : {
11811180 // if the email address has a valid TLD, add it to the list of matches
11821181 if ( isValidEmail ( matchedText ) ) {
11831182 matches . push (
@@ -1192,7 +1191,7 @@ function captureMatchIfValidAndRemove(context: ParseMatchesContext, stateMachine
11921191 break ;
11931192 }
11941193
1195- case 'hashtag' : {
1194+ case StateMachineType . Hashtag : {
11961195 if ( isValidHashtag ( matchedText ) ) {
11971196 matches . push (
11981197 new HashtagMatch ( {
@@ -1207,7 +1206,7 @@ function captureMatchIfValidAndRemove(context: ParseMatchesContext, stateMachine
12071206 break ;
12081207 }
12091208
1210- case 'mention' : {
1209+ case StateMachineType . Mention : {
12111210 if ( isValidMention ( matchedText , mentionServiceName ) ) {
12121211 matches . push (
12131212 new MentionMatch ( {
@@ -1222,7 +1221,7 @@ function captureMatchIfValidAndRemove(context: ParseMatchesContext, stateMachine
12221221 break ;
12231222 }
12241223
1225- case 'phone' : {
1224+ case StateMachineType . Phone : {
12261225 // remove any trailing spaces that were considered as "separator"
12271226 // chars by the state machine
12281227 matchedText = matchedText . replace ( / + $ / g, '' ) ;
@@ -1266,6 +1265,25 @@ const oppositeBrace: { [char: string]: string } = {
12661265 ']' : '[' ,
12671266} ;
12681267
1268+ /**
1269+ * Helper function to convert a UrlStateMachineMatchType value to its
1270+ * UrlMatchType equivalent.
1271+ */
1272+ function toUrlMatchType ( stateMachineMatchType : UrlStateMachineMatchType ) : UrlMatchType {
1273+ switch ( stateMachineMatchType ) {
1274+ case UrlStateMachineMatchType . Scheme :
1275+ return 'scheme' ;
1276+ case UrlStateMachineMatchType . Tld :
1277+ return 'tld' ;
1278+ case UrlStateMachineMatchType . IpV4 :
1279+ return 'ipV4' ;
1280+
1281+ /* istanbul ignore next */
1282+ default :
1283+ assertNever ( stateMachineMatchType ) ;
1284+ }
1285+ }
1286+
12691287/**
12701288 * Determines if a match found has unmatched closing parenthesis,
12711289 * square brackets or curly brackets. If so, these unbalanced symbol(s) will be
@@ -1398,6 +1416,16 @@ const enum State {
13981416 PhoneNumberPoundChar , // '#' for pound character
13991417}
14001418
1419+ // The type of state machine
1420+ // For debugging: temporarily remove `const` from `const enum`
1421+ const enum StateMachineType {
1422+ Url = 0 ,
1423+ Email ,
1424+ Hashtag ,
1425+ Mention ,
1426+ Phone ,
1427+ }
1428+
14011429type StateMachine =
14021430 | UrlStateMachine
14031431 | EmailStateMachine
@@ -1411,8 +1439,16 @@ interface AbstractStateMachine {
14111439 acceptStateReached : boolean ;
14121440}
14131441
1442+ // The type of URL state machine
1443+ // For debugging: temporarily remove `const` from `const enum`
1444+ const enum UrlStateMachineMatchType {
1445+ Scheme = 0 , // http://, https://, file://, etc. match
1446+ Tld , // Top-level Domain (TLD)
1447+ IpV4 , // 192.168.0.1
1448+ }
1449+
14141450interface AbstractUrlStateMachine extends AbstractStateMachine {
1415- readonly type : 'url' ;
1451+ readonly type : StateMachineType . Url ;
14161452}
14171453
14181454type UrlStateMachine = SchemeUrlStateMachine | TldUrlStateMachine | IpV4UrlStateMachine ;
@@ -1421,44 +1457,44 @@ type UrlStateMachine = SchemeUrlStateMachine | TldUrlStateMachine | IpV4UrlState
14211457 * State machine with metadata for capturing TLD (top-level domain) URLs.
14221458 */
14231459interface SchemeUrlStateMachine extends AbstractUrlStateMachine {
1424- readonly matchType : 'scheme' ;
1460+ readonly matchType : UrlStateMachineMatchType . Scheme ;
14251461}
14261462
14271463/**
14281464 * State machine with metadata for capturing TLD (top-level domain) URLs.
14291465 */
14301466interface TldUrlStateMachine extends AbstractUrlStateMachine {
1431- readonly matchType : 'tld' ;
1467+ readonly matchType : UrlStateMachineMatchType . Tld ;
14321468}
14331469
14341470/**
14351471 * State machine for capturing IPv4 addresses that are not prefixed with a
14361472 * scheme (such as 'http://').
14371473 */
14381474interface IpV4UrlStateMachine extends AbstractUrlStateMachine {
1439- readonly matchType : 'ipV4' ;
1475+ readonly matchType : UrlStateMachineMatchType . IpV4 ;
14401476 octetsEncountered : number ; // if we encounter a number of octets other than 4, it's not an IPv4 address
14411477}
14421478
14431479/**
14441480 * State machine for capturing email addresses.
14451481 */
14461482interface EmailStateMachine extends AbstractStateMachine {
1447- readonly type : 'email' ;
1483+ readonly type : StateMachineType . Email ;
14481484}
14491485
14501486/**
14511487 * State machine for capturing hashtags.
14521488 */
14531489interface HashtagStateMachine extends AbstractStateMachine {
1454- readonly type : 'hashtag' ;
1490+ readonly type : StateMachineType . Hashtag ;
14551491}
14561492
14571493/**
14581494 * State machine for capturing hashtags.
14591495 */
14601496interface MentionStateMachine extends AbstractStateMachine {
1461- readonly type : 'mention' ;
1497+ readonly type : StateMachineType . Mention ;
14621498}
14631499
14641500/**
@@ -1469,43 +1505,43 @@ interface MentionStateMachine extends AbstractStateMachine {
14691505 * otherwise potentially think a phone number is part of a domain label.
14701506 */
14711507interface PhoneNumberStateMachine extends AbstractStateMachine {
1472- readonly type : 'phone' ;
1508+ readonly type : StateMachineType . Phone ;
14731509}
14741510
14751511function createSchemeUrlStateMachine ( startIdx : number , state : State ) : SchemeUrlStateMachine {
14761512 return {
1477- type : 'url' ,
1513+ type : StateMachineType . Url ,
14781514 startIdx,
14791515 state,
14801516 acceptStateReached : false ,
1481- matchType : 'scheme' ,
1517+ matchType : UrlStateMachineMatchType . Scheme ,
14821518 } ;
14831519}
14841520
14851521function createTldUrlStateMachine ( startIdx : number , state : State ) : TldUrlStateMachine {
14861522 return {
1487- type : 'url' ,
1523+ type : StateMachineType . Url ,
14881524 startIdx,
14891525 state,
14901526 acceptStateReached : false ,
1491- matchType : 'tld' ,
1527+ matchType : UrlStateMachineMatchType . Tld ,
14921528 } ;
14931529}
14941530
14951531function createIpV4UrlStateMachine ( startIdx : number , state : State ) : IpV4UrlStateMachine {
14961532 return {
1497- type : 'url' ,
1533+ type : StateMachineType . Url ,
14981534 startIdx,
14991535 state,
15001536 acceptStateReached : false ,
1501- matchType : 'ipV4' ,
1537+ matchType : UrlStateMachineMatchType . IpV4 ,
15021538 octetsEncountered : 1 , // starts at 1 because we create this machine when encountering the first octet
15031539 } ;
15041540}
15051541
15061542function createEmailStateMachine ( startIdx : number , state : State ) : EmailStateMachine {
15071543 return {
1508- type : 'email' ,
1544+ type : StateMachineType . Email ,
15091545 startIdx,
15101546 state,
15111547 acceptStateReached : false ,
@@ -1514,7 +1550,7 @@ function createEmailStateMachine(startIdx: number, state: State): EmailStateMach
15141550
15151551function createHashtagStateMachine ( startIdx : number , state : State ) : HashtagStateMachine {
15161552 return {
1517- type : 'hashtag' ,
1553+ type : StateMachineType . Hashtag ,
15181554 startIdx,
15191555 state,
15201556 acceptStateReached : false ,
@@ -1523,7 +1559,7 @@ function createHashtagStateMachine(startIdx: number, state: State): HashtagState
15231559
15241560function createMentionStateMachine ( startIdx : number , state : State ) : MentionStateMachine {
15251561 return {
1526- type : 'mention' ,
1562+ type : StateMachineType . Mention ,
15271563 startIdx,
15281564 state,
15291565 acceptStateReached : false ,
@@ -1532,13 +1568,16 @@ function createMentionStateMachine(startIdx: number, state: State): MentionState
15321568
15331569function createPhoneNumberStateMachine ( startIdx : number , state : State ) : PhoneNumberStateMachine {
15341570 return {
1535- type : 'phone' ,
1571+ type : StateMachineType . Phone ,
15361572 startIdx,
15371573 state,
15381574 acceptStateReached : false ,
15391575 } ;
15401576}
15411577
15421578function isSchemeUrlStateMachine ( machine : StateMachine ) : machine is SchemeUrlStateMachine {
1543- return machine . type === 'url' && machine . matchType === 'scheme' ;
1579+ return (
1580+ machine . type === StateMachineType . Url &&
1581+ machine . matchType === UrlStateMachineMatchType . Scheme
1582+ ) ;
15441583}
0 commit comments