@@ -6,7 +6,7 @@ import * as Table from './table';
66/**
77 * TODO
88 */
9- type StdRegexWithoutHash = (
9+ type StdRegexWithoutMetaInfo = (
1010 | { type : "epsilon" }
1111 | { type : "literal" , charset : CharSet . CharSet }
1212 | { type : "concat" , left : StdRegex , right : StdRegex }
@@ -17,7 +17,7 @@ type StdRegexWithoutHash = (
1717/**
1818 * TODO
1919 */
20- type ExtRegexWithoutHash = (
20+ type ExtRegexWithoutMetaInfo = (
2121 | { type : "epsilon" }
2222 | { type : "literal" , charset : CharSet . CharSet }
2323 | { type : "concat" , left : ExtRegex , right : ExtRegex }
@@ -31,40 +31,81 @@ type ExtRegexWithoutHash = (
3131/**
3232 * TODO: docs
3333 */
34- export type StdRegex = StdRegexWithoutHash & { hash : number }
34+ export type StdRegex = StdRegexWithoutMetaInfo & { hash : number , isStdRegex : true }
3535
3636/**
3737 * TODO: docs
3838 */
39- export type ExtRegex = ExtRegexWithoutHash & { hash : number }
39+ export type ExtRegex = ExtRegexWithoutMetaInfo & { hash : number , isStdRegex : boolean }
4040
41- export function withHash ( regex : StdRegexWithoutHash ) : StdRegex
42- export function withHash ( regex : ExtRegexWithoutHash ) : ExtRegex
43- export function withHash ( regex : ExtRegexWithoutHash ) : ExtRegex {
41+ export function withMetaInfo ( regex : StdRegexWithoutMetaInfo ) : StdRegex
42+ export function withMetaInfo ( regex : ExtRegexWithoutMetaInfo ) : ExtRegex
43+ export function withMetaInfo ( regex : ExtRegexWithoutMetaInfo ) : ExtRegex {
4444 if ( regex . type === 'epsilon' )
45- return { ...regex , hash : hashStr ( regex . type ) }
45+ return {
46+ ...regex ,
47+ hash : hashStr ( regex . type ) ,
48+ isStdRegex : true ,
49+ }
4650 else if ( regex . type === 'literal' )
47- return { ...regex , hash : hashNums ( [ hashStr ( regex . type ) , regex . charset . hash ] ) }
48- else if ( regex . type === 'concat' || regex . type === 'union' || regex . type === 'intersection' )
49- return { ...regex , hash : hashNums ( [
50- hashStr ( regex . type ) ,
51- // Need non-commutative hash operator for `concat`, otherwise "ac" and "ca" are the same:
52- regex . left . hash ,
53- regex . right . hash ,
54- ] ) }
55- else if ( regex . type === 'star' || regex . type === 'complement' )
56- return { ...regex , hash : hashNums ( [ hashStr ( regex . type ) , regex . inner . hash ] ) }
51+ return {
52+ ...regex ,
53+ hash : hashNums ( [ hashStr ( regex . type ) , regex . charset . hash ] ) ,
54+ isStdRegex : true ,
55+ }
56+ else if ( regex . type === 'concat' || regex . type === 'union' )
57+ return {
58+ ...regex ,
59+ hash : hashNums ( [
60+ hashStr ( regex . type ) ,
61+ // Need non-commutative hash operator for `concat`, otherwise "ac" and "ca" are the same:
62+ regex . left . hash ,
63+ regex . right . hash ,
64+ ] ) ,
65+ isStdRegex : regex . left . isStdRegex && regex . right . isStdRegex ,
66+ }
67+ else if ( regex . type === 'intersection' )
68+ return {
69+ ...regex ,
70+ hash : hashNums ( [
71+ hashStr ( regex . type ) ,
72+ regex . left . hash ,
73+ regex . right . hash ,
74+ ] ) ,
75+ isStdRegex : false ,
76+ }
77+ else if ( regex . type === 'star' )
78+ return {
79+ ...regex ,
80+ hash : hashNums ( [ hashStr ( regex . type ) , regex . inner . hash ] ) ,
81+ isStdRegex : regex . inner . isStdRegex ,
82+ }
83+ else if ( regex . type === 'complement' )
84+ return {
85+ ...regex ,
86+ hash : hashNums ( [ hashStr ( regex . type ) , regex . inner . hash ] ) ,
87+ isStdRegex : false
88+ }
5789 checkedAllCases ( regex )
5890}
5991
92+ /**
93+ * TODO
94+ *
95+ * @public
96+ */
97+ export function isStdRegex ( regex : ExtRegex ) : regex is StdRegex {
98+ return regex . isStdRegex
99+ }
100+
60101//////////////////////////////////////////////
61102///// primitive composite constructors ///////
62103//////////////////////////////////////////////
63104
64- export const epsilon : StdRegex = withHash ( { type : 'epsilon' } )
105+ export const epsilon : StdRegex = withMetaInfo ( { type : 'epsilon' } )
65106
66107export function literal ( charset : CharSet . CharSet ) : StdRegex {
67- return withHash ( { type : 'literal' , charset } )
108+ return withMetaInfo ( { type : 'literal' , charset } )
68109}
69110
70111export const empty : StdRegex = literal ( CharSet . empty )
@@ -113,7 +154,7 @@ export function concat(left: ExtRegex, right: ExtRegex): ExtRegex {
113154 return concat ( left , right . right )
114155 }
115156
116- return withHash ( { type : 'concat' , left, right } )
157+ return withMetaInfo ( { type : 'concat' , left, right } )
117158}
118159
119160function extractFront ( regex : StdRegex ) : [ StdRegex , StdRegex ]
@@ -212,7 +253,7 @@ export function union(left: ExtRegex, right: ExtRegex): ExtRegex {
212253 // r + (s · r) = (s + ε) · r
213254 return concat ( union ( leftInit , rightInit ) , leftLast )
214255
215- return withHash ( { type : 'union' , left, right } )
256+ return withMetaInfo ( { type : 'union' , left, right } )
216257}
217258
218259export function star ( inner : StdRegex ) : StdRegex
@@ -231,7 +272,7 @@ export function star(inner: ExtRegex): ExtRegex {
231272 // (r∗ · s∗)∗ = (r + s)∗
232273 return star ( union ( inner . left . inner , inner . right . inner ) )
233274 else
234- return withHash ( { type : "star" , inner } )
275+ return withMetaInfo ( { type : "star" , inner } )
235276}
236277
237278export function intersection ( left : ExtRegex , right : ExtRegex ) : ExtRegex {
@@ -257,7 +298,7 @@ export function intersection(left: ExtRegex, right: ExtRegex): ExtRegex {
257298 // R & S ≈ R∩S
258299 return literal ( CharSet . intersection ( left . charset , right . charset ) )
259300
260- return withHash ( { type : "intersection" , left, right } )
301+ return withMetaInfo ( { type : "intersection" , left, right } )
261302}
262303
263304/**
@@ -274,7 +315,7 @@ export function complement(inner: ExtRegex): ExtRegex {
274315 // // ¬S ≈ (Σ\S
275316 // return literal(CharSet.complement(inner.charset))
276317 else
277- return withHash ( { type : "complement" , inner } )
318+ return withMetaInfo ( { type : "complement" , inner } )
278319}
279320
280321//////////////////////////////////////////////
@@ -733,7 +774,7 @@ export function toString(regex: ExtRegex): string {
733774 // Render parenthesis as non-capturing groups if there is a large number of them,
734775 // i.e. `/(?:abc)` instead of `/(abc)/`. `new RegExp(...)` throws an error if there
735776 // is a large number of capturing groups. Non-capturing groups are a bit more verbose
736- // but at large sizes like this it doesn't matter anyway :
777+ // but at large sizes like this it hardly still hurts readability :
737778 const useNonCapturingGroups = size > 10_000
738779
739780 return '^(' + astToString ( toRegExpAST ( regex ) , { useNonCapturingGroups } ) + ')$'
0 commit comments