@@ -471,6 +471,8 @@ export function isEmpty(regex: ExtRegex): boolean {
471471 return regex . type === 'literal' && CharSet . isEmpty ( regex . charset )
472472}
473473
474+ export class CacheOverflowError extends Error { }
475+
474476export function codePointDerivative ( codePoint : number , regex : StdRegex , cache : Table . Table < StdRegex > ) : StdRegex
475477export function codePointDerivative ( codePoint : number , regex : ExtRegex , cache : Table . Table < ExtRegex > ) : ExtRegex
476478export function codePointDerivative ( codePoint : number , regex : ExtRegex , cache : Table . Table < ExtRegex > ) : ExtRegex {
@@ -521,6 +523,13 @@ function codePointDerivativeAux(codePoint: number, regex: ExtRegex, cache: Table
521523function codePointDerivativeAux ( codePoint : number , regex : ExtRegex , cache : Table . Table < ExtRegex > ) : ExtRegex {
522524 const cachedResult = Table . get ( codePoint , regex . hash , cache )
523525 if ( cachedResult === undefined ) {
526+ // Rather throw an error when cache grows too large than getting OOM killed.
527+ // At least errors can be caught and handled. The limit is somewhat arbitrary.
528+ // TODO: maybe make this user configurable:
529+ if ( Table . size ( cache ) >= 10_000 ) {
530+ throw new CacheOverflowError ( 'Cache overflow while computing DFA transitions.' )
531+ }
532+
524533 const result = codePointDerivative ( codePoint , regex , cache )
525534 Table . set ( codePoint , regex . hash , result , cache )
526535 return result
@@ -608,6 +617,13 @@ function allNonEmptyIntersections(
608617 return resultCached
609618 }
610619
620+ // Rather throw an error when cache grows too large than getting OOM killed.
621+ // At least errors can be caught and handled. The limit is somewhat arbitrary.
622+ // TODO: maybe make this user configurable:
623+ if ( Table . size ( cache ) >= 10_000 ) {
624+ throw new CacheOverflowError ( )
625+ }
626+
611627 const result : CharSet . CharSet [ ] = [ ]
612628 for ( const classA of classesA ) {
613629 for ( const classB of classesB ) {
@@ -668,12 +684,20 @@ export function derivativeClasses(
668684 }
669685 checkedAllCases ( regex )
670686}
687+
671688function derivativeClassesAux (
672689 regex : ExtRegex ,
673690 cache : DerivativeClassesCache
674691) {
675692 const cachedResult = cache . classes . get ( regex . hash )
676693 if ( cachedResult === undefined ) {
694+ // Rather throw an error when cache grows too large than getting OOM killed.
695+ // At least errors can be caught and handled. The limit is somewhat arbitrary.
696+ // TODO: maybe make this user configurable:
697+ if ( cache . classes . size >= 10_000 ) {
698+ throw new CacheOverflowError ( )
699+ }
700+
677701 const result = derivativeClasses ( regex , cache )
678702 cache . classes . set ( regex . hash , result )
679703 return result
@@ -687,6 +711,8 @@ function derivativeClassesAux(
687711///// exclusive standard regex utils /////
688712//////////////////////////////////////////////
689713
714+ export class VeryLargeSyntaxTreeError extends Error { }
715+
690716/**
691717 * TODO: docs
692718 *
@@ -697,7 +723,20 @@ export function toRegExp(regex: StdRegex): RegExp {
697723}
698724
699725export function toString ( regex : ExtRegex ) : string {
700- return '^(' + astToString ( toRegExpAST ( regex ) ) + ')$'
726+ const size = nodeCount ( regex )
727+ if ( size > 1_000_000 ) {
728+ throw new VeryLargeSyntaxTreeError (
729+ "Won't try to convert to RegExp. Syntax tree has over 1_000_000 nodes."
730+ )
731+ }
732+
733+ // Render parenthesis as non-capturing groups if there is a large number of them,
734+ // i.e. `/(?:abc)` instead of `/(abc)/`. `new RegExp(...)` throws an error if there
735+ // is a large number of capturing groups. Non-capturing groups are a bit more verbose
736+ // but at large sizes like this it doesn't matter anyway:
737+ const useNonCapturingGroups = size > 10_000
738+
739+ return '^(' + astToString ( toRegExpAST ( regex ) , { useNonCapturingGroups } ) + ')$'
701740}
702741
703742// TODO: information is duplicated in parser:
@@ -786,37 +825,43 @@ function toRegExpAST(regex: ExtRegex): RegExpAST {
786825 checkedAllCases ( regex )
787826}
788827
789- function astToString ( ast : RegExpAST ) : string {
828+ type RenderOptions = {
829+ useNonCapturingGroups : boolean
830+ }
831+
832+ function astToString ( ast : RegExpAST , options : RenderOptions ) : string {
790833 switch ( ast . type ) {
791834 case 'epsilon' :
792835 return ''
793836 case 'literal' :
794837 return CharSet . toString ( ast . charset )
795838 case 'concat' :
796- return maybeWithParens ( ast . left , ast ) + maybeWithParens ( ast . right , ast )
839+ return maybeWithParens ( ast . left , ast , options ) + maybeWithParens ( ast . right , ast , options )
797840 case 'union' :
798- return maybeWithParens ( ast . left , ast ) + '|' + maybeWithParens ( ast . right , ast )
841+ return maybeWithParens ( ast . left , ast , options ) + '|' + maybeWithParens ( ast . right , ast , options )
799842 case 'star' :
800- return maybeWithParens ( ast . inner , ast ) + '*'
843+ return maybeWithParens ( ast . inner , ast , options ) + '*'
801844 case 'plus' :
802- return maybeWithParens ( ast . inner , ast ) + '+'
845+ return maybeWithParens ( ast . inner , ast , options ) + '+'
803846 case 'optional' :
804- return maybeWithParens ( ast . inner , ast ) + '?'
847+ return maybeWithParens ( ast . inner , ast , options ) + '?'
805848 case 'boundedQuantifier' :
806- return maybeWithParens ( ast . inner , ast ) + '{' + ast . count + '}'
849+ return maybeWithParens ( ast . inner , ast , options ) + '{' + ast . count + '}'
807850 case 'complement' :
808- return '¬' + maybeWithParens ( ast . inner , ast )
851+ return '¬' + maybeWithParens ( ast . inner , ast , options )
809852 case 'intersection' :
810- return maybeWithParens ( ast . left , ast ) + '∩' + maybeWithParens ( ast . right , ast )
853+ return maybeWithParens ( ast . left , ast , options ) + '∩' + maybeWithParens ( ast . right , ast , options )
811854 }
812855 checkedAllCases ( ast )
813856}
814857
815- function maybeWithParens ( ast : RegExpAST , parent : RegExpAST ) : string {
858+ function maybeWithParens ( ast : RegExpAST , parent : RegExpAST , options : RenderOptions ) : string {
816859 if ( ast . type === parent . type || precLevel ( ast . type ) > precLevel ( parent . type ) )
817- return astToString ( ast )
860+ return astToString ( ast , options )
861+ else if ( options . useNonCapturingGroups )
862+ return '(?:' + astToString ( ast , options ) + ')'
818863 else
819- return '(' + astToString ( ast ) + ')'
864+ return '(' + astToString ( ast , options ) + ')'
820865}
821866
822867/**
@@ -938,6 +983,43 @@ function sizeMemoizedAux(
938983 }
939984}
940985
986+ export function nodeCount (
987+ regex : ExtRegex ,
988+ cache : Map < number , number > = new Map ( )
989+ ) : number {
990+ switch ( regex . type ) {
991+ case 'epsilon' :
992+ return 1
993+ case 'literal' :
994+ return 1
995+ case 'concat' :
996+ return nodeCountAux ( regex . left , cache ) + nodeCountAux ( regex . right , cache ) + 1
997+ case 'union' :
998+ return nodeCountAux ( regex . left , cache ) + nodeCountAux ( regex . right , cache ) + 1
999+ case 'star' :
1000+ return nodeCountAux ( regex . inner , cache ) + 1
1001+ case 'intersection' :
1002+ return nodeCountAux ( regex . left , cache ) + nodeCountAux ( regex . right , cache ) + 1
1003+ case 'complement' :
1004+ return nodeCountAux ( regex . inner , cache ) + 1
1005+ }
1006+ checkedAllCases ( regex )
1007+ }
1008+
1009+ function nodeCountAux (
1010+ regex : ExtRegex ,
1011+ cache : Map < number , number >
1012+ ) : number {
1013+ const cachedResult = cache . get ( regex . hash )
1014+ if ( cachedResult === undefined ) {
1015+ const result = nodeCount ( regex , cache )
1016+ cache . set ( regex . hash , result )
1017+ return result
1018+ } else {
1019+ return cachedResult
1020+ }
1021+ }
1022+
9411023// export function equivalent(regex1: ExtRegex, regex2: ExtRegex): boolean {
9421024// if (equal(regex1, regex2)) {
9431025// return true
0 commit comments