From 90314722be57e9f64df98e3382dada8080419ccb Mon Sep 17 00:00:00 2001 From: Jesse Date: Thu, 13 Aug 2015 18:00:34 +0800 Subject: [PATCH] Add re2 and pcre jit test and disable some test items that cannot obtain exptected result by posix library. Updated gcc-performance.html for CentOS 6.4. --- doc/gcc-performance.html | 625 ++++++------------------------- performance/Jamfile.v2 | 28 +- performance/command_line.cpp | 82 +++- performance/input.html | 2 +- performance/main.cpp | 55 ++- performance/regex_comparison.hpp | 23 +- 6 files changed, 287 insertions(+), 528 deletions(-) diff --git a/doc/gcc-performance.html b/doc/gcc-performance.html index 5dcea95ab..80f38d7ff 100644 --- a/doc/gcc-performance.html +++ b/doc/gcc-performance.html @@ -1,543 +1,142 @@ - - Regular Expression Performance Comparison (gcc 3.2) - + Regular Expression Performance Comparison - + +

Regular Expression Performance Comparison

-

The following tables provide comparisons between the following regular +

+ The following tables provide comparisons between the following regular expression libraries:

+

GRETA.

The Boost regex library.

-

The GNU regular expression library.

-

Philip Hazel's PCRE library.

-

Details

-

Machine: Intel Pentium 4 2.8GHz PC.

-

Compiler: GNU C++ version 3.2 20020927 (prerelease).

-

C++ Standard Library: GNU libstdc++ version 20020927.

-

OS: Cygwin.

-

Boost version: 1.31.0.

-

PCRE version: 4.1.

-

As ever care should be taken in interpreting the results, only sensible regular +

Henry Spencer's regular expression library + - this is provided for comparison as a typical non-backtracking implementation.

+

Philip Hazel's PCRE library.

+

Details

+

Machine: Intel Xeon E5405 2.0GHz Server.

+

Compiler: GNU C++ version 4.4.7 20120313 (Red Hat 4.4.7-4).

+

C++ Standard Library: GNU libstdc++ version 20120313.

+

OS: CentOS 6.4.

+

Boost version: 1.56.0.

+

PCRE version: 8.37.

+

+ As ever care should be taken in interpreting the results, only sensible regular expressions (rather than pathological cases) are given, most are taken from the Boost regex examples, or from the Library of Regular Expressions. In addition, some variation in the relative performance of these libraries can be expected on other machines - as memory access and processor caching effects can be quite large for most finite state - machine algorithms. In each case the first figure given is the relative time - taken (so a value of 1.0 is as good as it gets), while the second figure is the - actual time taken.

-

Averages

-

The following are the average relative scores for all the tests: the perfect + machine algorithms.

+

Averages

+

The following are the average relative scores for all the tests: the perfect regular expression library would score 1, in practice anything less than 2 - is pretty good.

- - - - - - - - - - - - - -
BoostBoost + C++ localePOSIXPCRE
1.45031.49124108.3721.56255
-
-
+ is pretty good.

+

+ + + + + + + + + + + +
GRETAGRETA
(non-recursive mode)
BoostBoost + C++ localePOSIXPCREPCRE JITDynamic Xpressivegoogle RE2
5.015048.364695.777555.7310712.80164.812981.422273.867123.74945
+

Comparison 1: Long Search

For each of the following regular expressions the time taken to find all occurrences of the expression within a long English language text was measured (mtent12.txt from Project Gutenberg, 19Mb). 

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ExpressionBoostBoost + C++ localePOSIXPCRE
Twain3.49
- (0.205s)
4.09
- (0.24s)
65.2
- (3.83s)
1
- (0.0588s)
Huck[[:alpha:]]+3.86
- (0.203s)
4.52
- (0.238s)
100
- (5.26s)
1
- (0.0526s)
[[:alpha:]]+ing1.01
- (1.23s)
1
- (1.22s)
4.95
- (6.04s)
4.67
- (5.71s)
^[^ ]*?Twain1
- (0.31s)
1.05
- (0.326s)
NA3.32
- (1.03s)
Tom|Sawyer|Huckleberry|Finn1.02
- (0.125s)
1
- (0.123s)
165
- (20.3s)
1.08
- (0.133s)
(Tom|Sawyer|Huckleberry|Finn).{0,30}river|river.{0,30}(Tom|Sawyer|Huckleberry|Finn)1
- (0.345s)
1.03
- (0.355s)
NA1.71
- (0.59s)
-
-
+

+ + + + + + + +
ExpressionGRETAGRETA
(non-recursive mode)
BoostBoost + C++ localePOSIXPCREPCRE JITDynamic XpressiveRE2
Twain3.64
(0.0256s)
3.64
(0.0256s)
4.98
(0.035s)
5.16
(0.0362s)
3.96
(0.0278s)
4.8
(0.0338s)
2.84
(0.02s)
3.64
(0.0256s)
1
(0.00703s)
Huck[[:alpha:]]+4.96
(0.0259s)
4.96
(0.0259s)
6.45
(0.0338s)
6.33
(0.0331s)
4.9
(0.0256s)
6.09
(0.0319s)
3.7
(0.0194s)
4.78
(0.025s)
1
(0.00523s)
[[:alpha:]]+ing10.4
(1.46s)
19
(2.66s)
3.5
(0.49s)
3.57
(0.5s)
8.43
(1.18s)
14.2
(1.99s)
4.5
(0.63s)
3.11
(0.435s)
1
(0.14s)
^[^ +]*?Twain5.61
(0.47s)
20.4
(1.71s)
2.69
(0.225s)
2.69
(0.225s)
NA4.96
(0.415s)
1.24
(0.104s)
2.84
(0.237s)
1
(0.0838s)
Tom|Sawyer|Huckleberry|Finn6.94
(0.23s)
13.4
(0.445s)
1.38
(0.0456s)
1.36
(0.045s)
1
(0.0331s)
2
(0.0663s)
1.6
(0.0531s)
1.45
(0.0481s)
2.53
(0.0838s)
(Tom|Sawyer|Huckleberry|Finn).{0,30}river|river.{0,30}(Tom|Sawyer|Huckleberry|Finn)5.71
(0.45s)
6.86
(0.54s)
1.44
(0.114s)
1.48
(0.116s)
1.43
(0.113s)
2.6
(0.205s)
1
(0.0788s)
1.3
(0.102s)
1.05
(0.0825s)
+

Comparison 2: Medium Sized Search

For each of the following regular expressions the time taken to find all occurrences of the expression within a medium sized English language text was - measured (the first 50K from mtent12.txt). 

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ExpressionBoostBoost + C++ localePOSIXPCRE
Twain1.8
- (0.000519s)
2.14
- (0.000616s)
9.08
- (0.00262s)
1
- (0.000289s)
Huck[[:alpha:]]+3.65
- (0.000499s)
4.36
- (0.000597s)
1
- (0.000137s)
1.43
- (0.000196s)
[[:alpha:]]+ing1
- (0.00258s)
1
- (0.00258s)
5.28
- (0.0136s)
5.63
- (0.0145s)
^[^ ]*?Twain1
- (0.000929s)
1.03
- (0.000957s)
NA2.82
- (0.00262s)
Tom|Sawyer|Huckleberry|Finn1
- (0.000812s)
1
- (0.000812s)
60.1
- (0.0488s)
1.28
- (0.00104s)
(Tom|Sawyer|Huckleberry|Finn).{0,30}river|river.{0,30}(Tom|Sawyer|Huckleberry|Finn)1.02
- (0.00178s)
1
- (0.00174s)
242
- (0.421s)
1.3
- (0.00227s)
-
-
-

Comparison 3: C++ Code Search

-

For each of the following regular expressions the time taken to find all - occurrences of the expression within the C++ source file - boost/crc.hpp was measured. 

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ExpressionBoostBoost + C++ localePOSIXPCRE
^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?(class|struct)[[:space:]]*(\<\w+\>([ - ]*\([^)]*\))?[[:space:]]*)*(\<\w*\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?(\{|:[^;\{()]*\{)1.04
- (0.000144s)
1
- (0.000139s)
862
- (0.12s)
4.56
- (0.000636s)
(^[ - ]*#(?:[^\\\n]|\\[^\n_[:punct:][:alnum:]]*[\n[:punct:][:word:]])*)|(//[^\n]*|/\*.*?\*/)|\<([+-]?(?:(?:0x[[:xdigit:]]+)|(?:(?:[[:digit:]]*\.)?[[:digit:]]+(?:[eE][+-]?[[:digit:]]+)?))u?(?:(?:int(?:8|16|32|64))|L)?)\>|('(?:[^\\']|\\.)*'|"(?:[^\\"]|\\.)*")|\<(__asm|__cdecl|__declspec|__export|__far16|__fastcall|__fortran|__import|__pascal|__rtti|__stdcall|_asm|_cdecl|__except|_export|_far16|_fastcall|__finally|_fortran|_import|_pascal|_stdcall|__thread|__try|asm|auto|bool|break|case|catch|cdecl|char|class|const|const_cast|continue|default|delete|do|double|dynamic_cast|else|enum|explicit|extern|false|float|for|friend|goto|if|inline|int|long|mutable|namespace|new|operator|pascal|private|protected|public|register|reinterpret_cast|return|short|signed|sizeof|static|static_cast|struct|switch|template|this|throw|true|try|typedef|typeid|typename|union|unsigned|using|virtual|void|volatile|wchar_t|while)\>1
- (0.0139s)
1.01
- (0.0141s)
NA1.55
- (0.0216s)
^[ ]*#[ ]*include[ ]+("[^"]+"|<[^>]+>)1.04
- (0.000332s)
1
- (0.000318s)
130
- (0.0413s)
1.72
- (0.000547s)
^[ ]*#[ ]*include[ ]+("boost/[^"]+"|<boost/[^>]+>)1.02
- (0.000323s)
1
- (0.000318s)
150
- (0.0476s)
1.72
- (0.000547s)
-
-

-

Comparison 4: HTML Document Search + measured (the first 50K from mtent12.txt - up to the end of Chapter 1). 

+

+ + + + + + + +
ExpressionGRETAGRETA
(non-recursive mode)
BoostBoost + C++ localePOSIXPCREPCRE JITDynamic XpressiveRE2
Twain1.53
(5.98e-05s)
1.56
(6.1e-05s)
3.62
(0.000142s)
3.62
(0.000142s)
3.31
(0.000129s)
3.31
(0.000129s)
1.19
(4.64e-05s)
1.69
(6.59e-05s)
1
(3.91e-05s)
Huck[[:alpha:]]+3.48
(6.59e-05s)
3.48
(6.59e-05s)
6.06
(0.000115s)
5.81
(0.00011s)
4.77
(9.03e-05s)
5.35
(0.000101s)
2.52
(4.76e-05s)
3.35
(6.35e-05s)
1
(1.89e-05s)
[[:alpha:]]+ing12
(0.00375s)
22.2
(0.00695s)
3.44
(0.00107s)
3.5
(0.00109s)
9.25
(0.00289s)
15.5
(0.00484s)
5.19
(0.00162s)
3.56
(0.00111s)
1
(0.000313s)
^[^ +]*?Twain5.57
(0.00121s)
16.5
(0.00359s)
2.88
(0.000625s)
2.88
(0.000625s)
NA5.03
(0.00109s)
1.24
(0.000269s)
2.97
(0.000645s)
1
(0.000217s)
Tom|Sawyer|Huckleberry|Finn5.87
(0.000674s)
11.7
(0.00135s)
2.77
(0.000317s)
2.77
(0.000317s)
2.02
(0.000232s)
3.7
(0.000425s)
1
(0.000115s)
2.55
(0.000293s)
1.85
(0.000212s)
(Tom|Sawyer|Huckleberry|Finn).{0,30}river|river.{0,30}(Tom|Sawyer|Huckleberry|Finn)3.77
(0.000791s)
8.09
(0.0017s)
2.93
(0.000615s)
2.93
(0.000615s)
2.07
(0.000435s)
3.86
(0.000811s)
1.19
(0.000249s)
2.21
(0.000464s)
1
(0.00021s)
+

+

Comparison 3: C++ Code Search

+

For each of the following regular expressions the time taken to find all + occurrences of the expression within the C++ source file + boost/crc.hpp was measured. 

+

+ + + + + +
ExpressionGRETAGRETA
(non-recursive mode)
BoostBoost + C++ localePOSIXPCREPCRE JITDynamic XpressiveRE2
^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?(class|struct)[[:space:]]*(\<\w+\>([ ]*\([^)]*\))?[[:space:]]*)*(\<\w*\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?(\{|:[^;\{()]*\{)22.9
(0.00162s)
22.9
(0.00162s)
1.47
(0.000104s)
1.48
(0.000105s)
NA4.41
(0.000313s)
1
(7.08e-05s)
1.66
(0.000117s)
1.93
(0.000137s)
(^[ ]*#(?:[^\\\n]|\\[^\n_[:punct:][:alnum:]]*[\n[:punct:][:word:]])*)|(//[^\n]*|/\*.*?\*/)|\<([+-]?(?:(?:0x[[:xdigit:]]+)|(?:(?:[[:digit:]]*\.)?[[:digit:]]+(?:[eE][+-]?[[:digit:]]+)?))u?(?:(?:int(?:8|16|32|64))|L)?)\>|('(?:[^\\']|\\.)*'|"(?:[^\\"]|\\.)*")|\<(__asm|__cdecl|__declspec|__export|__far16|__fastcall|__fortran|__import|__pascal|__rtti|__stdcall|_asm|_cdecl|__except|_export|_far16|_fastcall|__finally|_fortran|_import|_pascal|_stdcall|__thread|__try|asm|auto|bool|break|case|catch|cdecl|char|class|const|const_cast|continue|default|delete|do|double|dynamic_cast|else|enum|explicit|extern|false|float|for|friend|goto|if|inline|int|long|mutable|namespace|new|operator|pascal|private|protected|public|register|reinterpret_cast|return|short|signed|sizeof|static|static_cast|struct|switch|template|this|throw|true|try|typedef|typeid|typename|union|unsigned|using|virtual|void|volatile|wchar_t|while)\>6.98
(0.0043s)
6.6
(0.00406s)
6.73
(0.00414s)
6.6
(0.00406s)
NA11.8
(0.00727s)
1
(0.000615s)
NANA
^[ ]*#[ ]*include[ ]+("[^"]+"|<[^>]+>)6.32
(0.000479s)
21.7
(0.00164s)
2.13
(0.000161s)
2.13
(0.000161s)
NA3.29
(0.000249s)
1
(7.57e-05s)
1.97
(0.000149s)
1.87
(0.000142s)
^[ ]*#[ ]*include[ ]+("boost/[^"]+"|<boost/[^>]+>)6.32
(0.000479s)
22.5
(0.0017s)
2.13
(0.000161s)
2.13
(0.000161s)
NA3.29
(0.000249s)
1
(7.57e-05s)
1.94
(0.000146s)
1.84
(0.000139s)
+

+

+

Comparison 4: HTML Document Search

-

For each of the following regular expressions the time taken to find all - occurrences of the expression within the html file libs/libraries.htm - was measured. 

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ExpressionBoostBoost + C++ localePOSIXPCRE
beman|john|dave1.03
- (0.000367s)
1
- (0.000357s)
47.4
- (0.0169s)
1.16
- (0.000416s)
<p>.*?</p>1.25
- (0.000459s)
1
- (0.000367s)
NA1.03
- (0.000376s)
<a[^>]+href=("[^"]*"|[^[:space:]]+)[^>]*>1
- (0.000509s)
1.02
- (0.000518s)
305
- (0.155s)
1.1
- (0.000558s)
<h[12345678][^>]*>.*?</h[12345678]>1.04
- (0.00025s)
1
- (0.00024s)
NA1.16
- (0.000279s)
<img[^>]+src=("[^"]*"|[^[:space:]]+)[^>]*>2.22
- (0.000489s)
1.69
- (0.000372s)
148
- (0.0326s)
1
- (0.00022s)
<font[^>]+face=("[^"]*"|[^[:space:]]+)[^>]*>.*?</font>1.71
- (0.000371s)
1.75
- (0.000381s)
NA1
- (0.000218s)
-
-
-

Comparison 3: Simple Matches

-

For each of the following regular expressions the time taken to match against +

For each of the following regular expressions the time taken to find all + occurrences of the expression within the html file libs/libraries.htm + was measured. 

+

+ + + + + + + +
ExpressionGRETAGRETA
(non-recursive mode)
BoostBoost + C++ localePOSIXPCREPCRE JITDynamic XpressiveRE2
beman|john|dave4.38
(0.000791s)
8.76
(0.00158s)
1.73
(0.000313s)
1.73
(0.000313s)
2.97
(0.000537s)
2.19
(0.000396s)
1
(0.000181s)
2.62
(0.000474s)
1.49
(0.000269s)
<a[^>]+href=("[^"]*"|[^[:space:]]+)[^>]*>2.45
(0.000425s)
3.49
(0.000605s)
3.44
(0.000596s)
3.44
(0.000596s)
51.4
(0.00891s)
2.76
(0.000479s)
1
(0.000173s)
4.51
(0.000781s)
32.9
(0.0057s)
<img[^>]+src=("[^"]*"|[^[:space:]]+)[^>]*>1.12
(6.47e-05s)
1.14
(6.59e-05s)
3.71
(0.000215s)
3.66
(0.000212s)
8.17
(0.000474s)
3.07
(0.000178s)
1
(5.8e-05s)
3.41
(0.000198s)
2.02
(0.000117s)
<p>.*?</p>1.21
(9.03e-05s)
1.26
(9.4e-05s)
2.85
(0.000212s)
2.85
(0.000212s)
NA2.52
(0.000188s)
1
(7.45e-05s)
3.41
(0.000254s)
6.82
(0.000508s)
<h[12345678][^>]*>.*?</h[12345678]>1.73
(0.000139s)
1.97
(0.000159s)
2.76
(0.000222s)
2.73
(0.00022s)
NA2.48
(0.0002s)
1
(8.06e-05s)
4.85
(0.000391s)
6.3
(0.000508s)
<font[^>]+face=("[^"]*"|[^[:space:]]+)[^>]*>.*?</font>1.27
(7.2e-05s)
1.31
(7.45e-05s)
3.74
(0.000212s)
3.96
(0.000225s)
NA3.18
(0.000181s)
1
(5.68e-05s)
3.1
(0.000176s)
1.02
(5.8e-05s)
+

+

Comparison 3: Simple Matches

+

+ For each of the following regular expressions the time taken to match against the text indicated was measured. 

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ExpressionTextBoostBoost + C++ localePOSIXPCRE
abcabc1.36
- (2.15e-07s)
1.36
- (2.15e-07s)
2.76
- (4.34e-07s)
1
- (1.58e-07s)
^([0-9]+)(\-| |$)(.*)$100- this is a line of ftp response which contains a message string1.55
- (7.26e-07s)
1.51
- (7.07e-07s)
319
- (0.000149s)
1
- (4.67e-07s)
([[:digit:]]{4}[- ]){3}[[:digit:]]{3,4}1234-5678-1234-4561.96
- (9.54e-07s)
1.96
- (9.54e-07s)
44.5
- (2.17e-05s)
1
- (4.87e-07s)
^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$john@johnmaddock.co.uk1.22
- (1.51e-06s)
1.23
- (1.53e-06s)
162
- (0.000201s)
1
- (1.24e-06s)
^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$foo12@foo.edu1.28
- (1.47e-06s)
1.3
- (1.49e-06s)
104
- (0.00012s)
1
- (1.15e-06s)
^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$bob.smith@foo.tv1.28
- (1.47e-06s)
1.3
- (1.49e-06s)
113
- (0.00013s)
1
- (1.15e-06s)
^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$EH10 2QQ1.38
- (4.68e-07s)
1.41
- (4.77e-07s)
13.5
- (4.59e-06s)
1
- (3.39e-07s)
^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$G1 1AA1.28
- (4.35e-07s)
1.25
- (4.25e-07s)
11.7
- (3.97e-06s)
1
- (3.39e-07s)
^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$SW1 1ZZ1.32
- (4.53e-07s)
1.31
- (4.49e-07s)
12.2
- (4.2e-06s)
1
- (3.44e-07s)
^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$4/1/20011.16
- (3.82e-07s)
1.2
- (3.96e-07s)
13.9
- (4.59e-06s)
1
- (3.29e-07s)
^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$12/12/20011.38
- (4.49e-07s)
1.38
- (4.49e-07s)
16
- (5.2e-06s)
1
- (3.25e-07s)
^[-+]?[[:digit:]]*\.?[[:digit:]]*$1231.19
- (7.64e-07s)
1.16
- (7.45e-07s)
7.51
- (4.81e-06s)
1
- (6.4e-07s)
^[-+]?[[:digit:]]*\.?[[:digit:]]*$+3.141591.32
- (8.97e-07s)
1.31
- (8.88e-07s)
14
- (9.48e-06s)
1
- (6.78e-07s)
^[-+]?[[:digit:]]*\.?[[:digit:]]*$-3.141591.32
- (8.97e-07s)
1.31
- (8.88e-07s)
14
- (9.48e-06s)
1
- (6.78e-07s)
-
-
+

+ + + + + + + + + + + + + + + +
ExpressionTextGRETAGRETA
(non-recursive mode)
BoostBoost + C++ localePOSIXPCREPCRE JITDynamic XpressiveRE2
abcabc3.12
(9.66e-08s)
4.62
(1.43e-07s)
11.5
(3.58e-07s)
11.7
(3.62e-07s)
4.54
(1.41e-07s)
5.08
(1.57e-07s)
1
(3.1e-08s)
6.46
(2e-07s)
3.77
(1.17e-07s)
^([0-9]+)(\-| |$)(.*)$100- this is a line of ftp response which contains a message string1.81
(2.77e-07s)
3.38
(5.15e-07s)
4.19
(6.39e-07s)
4.25
(6.48e-07s)
95
(1.45e-05s)
2.41
(3.67e-07s)
1
(1.53e-07s)
2.91
(4.43e-07s)
3.5
(5.34e-07s)
([[:digit:]]{4}[- ]){3}[[:digit:]]{3,4}1234-5678-1234-4564.95
(3.48e-07s)
8
(5.63e-07s)
12.5
(8.77e-07s)
13
(9.16e-07s)
3.32
(2.34e-07s)
5.36
(3.77e-07s)
1
(7.03e-08s)
6.64
(4.67e-07s)
4.54
(3.19e-07s)
^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$john@johnmaddock.co.uk5.64
(1.18e-06s)
5.64
(1.18e-06s)
7.18
(1.51e-06s)
7.27
(1.53e-06s)
30.2
(6.33e-06s)
4.18
(8.77e-07s)
1
(2.1e-07s)
5
(1.05e-06s)
1.61
(3.39e-07s)
^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$foo12@foo.edu6.86
(1.03e-06s)
6.86
(1.03e-06s)
8.76
(1.32e-06s)
9.27
(1.39e-06s)
29.5
(4.43e-06s)
4.89
(7.34e-07s)
1
(1.5e-07s)
6.1
(9.16e-07s)
2.03
(3.05e-07s)
^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$bob.smith@foo.tv7.1
(1.05e-06s)
7.1
(1.05e-06s)
8.9
(1.32e-06s)
8.65
(1.28e-06s)
31.5
(4.65e-06s)
4.84
(7.15e-07s)
1
(1.48e-07s)
6.32
(9.35e-07s)
2.13
(3.15e-07s)
^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$EH10 2QQ3.9
(1.81e-07s)
5.95
(2.77e-07s)
11.7
(5.44e-07s)
10.9
(5.05e-07s)
4.21
(1.96e-07s)
4.62
(2.15e-07s)
1
(4.65e-08s)
5.33
(2.48e-07s)
6.05
(2.81e-07s)
^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$G1 1AA3.54
(1.65e-07s)
5.85
(2.72e-07s)
10.7
(4.96e-07s)
10.9
(5.05e-07s)
3.9
(1.81e-07s)
4.62
(2.15e-07s)
1
(4.65e-08s)
5.54
(2.57e-07s)
5.95
(2.77e-07s)
^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$SW1 1ZZ3.85
(1.81e-07s)
5.87
(2.77e-07s)
10.5
(4.96e-07s)
10.1
(4.77e-07s)
3.95
(1.86e-07s)
4.56
(2.15e-07s)
1
(4.71e-08s)
5.27
(2.48e-07s)
5.97
(2.81e-07s)
^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$4/1/20013.32
(1.74e-07s)
5.09
(2.67e-07s)
8.73
(4.58e-07s)
8.91
(4.67e-07s)
3.68
(1.93e-07s)
4.14
(2.17e-07s)
1
(5.25e-08s)
4.64
(2.43e-07s)
2.91
(1.53e-07s)
^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$12/12/20013.27
(1.72e-07s)
5
(2.62e-07s)
9.27
(4.86e-07s)
9.64
(5.05e-07s)
3.91
(2.05e-07s)
4.14
(2.17e-07s)
1
(5.25e-08s)
5.36
(2.81e-07s)
5.55
(2.91e-07s)
^[-+]?[[:digit:]]*\.?[[:digit:]]*$1233.03
(1.34e-07s)
5.03
(2.22e-07s)
10.3
(4.53e-07s)
10.2
(4.48e-07s)
8.11
(3.58e-07s)
4.97
(2.19e-07s)
1
(4.41e-08s)
5.3
(2.34e-07s)
5.84
(2.57e-07s)
^[-+]?[[:digit:]]*\.?[[:digit:]]*$+3.141593
(1.57e-07s)
4.64
(2.43e-07s)
10.5
(5.53e-07s)
9.45
(4.96e-07s)
10.2
(5.34e-07s)
4.55
(2.38e-07s)
1
(5.25e-08s)
4.64
(2.43e-07s)
5.36
(2.81e-07s)
^[-+]?[[:digit:]]*\.?[[:digit:]]*$-3.141592.97
(1.57e-07s)
4.58
(2.43e-07s)
10.4
(5.53e-07s)
9.35
(4.96e-07s)
10.1
(5.34e-07s)
4.49
(2.38e-07s)
1
(5.3e-08s)
4.94
(2.62e-07s)
5.39
(2.86e-07s)
+


-

© Copyright John Maddock 2003

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

+

?? Copyright John Maddock 2003

+

Use, modification and distribution are subject to the Boost Software License, + Version 1.0. (See accompanying file LICENSE_1_0.txt + or copy at http://www.boost.org/LICENSE_1_0.txt)

+ diff --git a/performance/Jamfile.v2 b/performance/Jamfile.v2 index ca47cb9ee..60eb1fbff 100644 --- a/performance/Jamfile.v2 +++ b/performance/Jamfile.v2 @@ -3,12 +3,14 @@ # (See accompanying file LICENSE_1_0.txt or copy at # http://www.boost.org/LICENSE_1_0.txt. -SOURCES = command_line main time_boost time_greta time_localised_boost time_pcre time_dynamic_xpressive time_posix time_safe_greta ; +SOURCES = command_line main time_boost time_greta time_localised_boost time_pcre time_pcre_jit time_dynamic_xpressive time_posix time_safe_greta time_re2 ; local HS_REGEX_PATH = [ modules.peek : HS_REGEX_PATH ] ; local USE_POSIX = [ modules.peek : USE_POSIX ] ; local PCRE_PATH = [ modules.peek : PCRE_PATH ] ; local USE_PCRE = [ modules.peek : USE_PCRE ] ; +local GRETA_PATH = [ modules.peek : GRETA_PATH ] ; +local USE_RE2 = [ modules.peek : USE_RE2 ] ; if $(HS_REGEX_PATH) { @@ -20,31 +22,51 @@ else if $(USE_POSIX) POSIX_OPTS = BOOST_HAS_POSIX=1 ; } -lib pcre : : pcre ; +lib pcre : : pcre /usr/local/lib ; if $(PCRE_PATH) { +# currently pcre have more source files PCRE_SOURCES = $(PCRE_PATH)/chartables.c $(PCRE_PATH)/get.c $(PCRE_PATH)/pcre.c $(PCRE_PATH)/study.c ; PCRE_OPTS = BOOST_HAS_PCRE=1 $(PCRE_PATH) ; } else if $(USE_PCRE) { - PCRE_OPTS = BOOST_HAS_PCRE=1 ; + PCRE_OPTS = BOOST_HAS_PCRE=1 BOOST_HAS_PCRE_JIT=1 ; PCRE_SOURCES = pcre ; } +if $(GRETA_PATH) +{ + GRETA_SOURCES = $(GRETA_PATH)/regexpr2.cpp $(GRETA_PATH)/syntax2.cpp ; + GRETA_OPTS = BOOST_HAS_GRETA=1 $(GRETA_PATH) ; +} + +lib re2 : : re2 ; + +if $(USE_RE2) +{ + RE2_OPTS = BOOST_HAS_RE2=1 ; + RE2_SOURCES = re2 ; +} + exe regex_comparison : $(SOURCES).cpp $(HS_SOURCES) $(PCRE_SOURCES) + $(GRETA_SOURCES) + $(RE2_SOURCES) ../build//boost_regex ../../test/build//boost_prg_exec_monitor/static : BOOST_REGEX_NO_LIB=1 BOOST_REGEX_STATIC_LINK=1 + BOOST_HAS_XPRESSIVE=1 $(POSIX_OPTS) $(PCRE_OPTS) + $(GRETA_OPTS) + $(RE2_OPTS) ; diff --git a/performance/command_line.cpp b/performance/command_line.cpp index 2d2ac7ba1..dac2bcc4d 100644 --- a/performance/command_line.cpp +++ b/performance/command_line.cpp @@ -33,7 +33,9 @@ bool time_greta = false; bool time_safe_greta = false; bool time_posix = false; bool time_pcre = false; +bool time_pcre_jit = false; bool time_xpressive = false; +bool time_re2 = false; bool time_std = false; bool test_matches = false; @@ -55,7 +57,9 @@ double boost_total = 0; double locale_boost_total = 0; double posix_total = 0; double pcre_total = 0; +double pcre_jit_total = 0; double xpressive_total = 0; +double re2_total = 0; double std_total = 0; unsigned greta_test_count = 0; unsigned safe_greta_test_count = 0; @@ -63,7 +67,9 @@ unsigned boost_test_count = 0; unsigned locale_boost_test_count = 0; unsigned posix_test_count = 0; unsigned pcre_test_count = 0; +unsigned pcre_jit_test_count = 0; unsigned xpressive_test_count = 0; +unsigned re2_test_count = 0; unsigned std_test_count = 0; int handle_argument(const std::string& what) @@ -86,10 +92,18 @@ int handle_argument(const std::string& what) else if(what == "-pcre") time_pcre = true; #endif +#ifdef BOOST_HAS_PCRE_JIT + else if(what == "-pcrejit") + time_pcre_jit = true; +#endif #ifdef BOOST_HAS_XPRESSIVE else if(what == "-xpressive" || what == "-dxpr") time_xpressive = true; #endif +#ifdef BOOST_HAS_RE2 + else if(what == "-re2") + time_re2 = true; +#endif #ifndef BOOST_NO_CXX11_HDR_REGEX else if(what == "-std") time_std = true; @@ -108,9 +122,15 @@ int handle_argument(const std::string& what) #ifdef BOOST_HAS_PCRE time_pcre = true; #endif +#ifdef BOOST_HAS_PCRE_JIT + time_pcre_jit = true; +#endif #ifdef BOOST_HAS_XPRESSIVE time_xpressive = true; #endif +#ifdef BOOST_HAS_RE2 + time_re2 = true; +#endif #ifndef BOOST_NO_CXX11_HDR_REGEX time_std = true; #endif @@ -174,9 +194,15 @@ int show_usage() #ifdef BOOST_HAS_PCRE " -pcre Apply tests to PCRE library\n" #endif +#ifdef BOOST_HAS_PCRE_JIT + " -pcrejit Apply tests to PCRE library (int JIT mode)\n" +#endif #ifdef BOOST_HAS_XPRESSIVE " -dxpr Apply tests to dynamic xpressive library\n" #endif +#ifdef BOOST_HAS_RE2 + " -re2 Apply tests to google RE2 library\n" +#endif #ifndef BOOST_NO_CXX11_HDR_REGEX " -std Apply tests to std::regex.\n" #endif @@ -283,10 +309,18 @@ void output_html_results(bool show_description, const std::string& tagname) if(time_pcre == true) os << "PCRE"; #endif +#ifdef BOOST_HAS_PCRE_JIT + if(time_pcre_jit == true) + os << "PCRE JIT"; +#endif #ifdef BOOST_HAS_XPRESSIVE if(time_xpressive == true) os << "Dynamic Xpressive"; #endif +#ifdef BOOST_HAS_RE2 + if(time_re2 == true) + os << "RE2"; +#endif #ifndef BOOST_NO_CXX11_HDR_REGEX if(time_std == true) os << "std::regex"; @@ -362,6 +396,17 @@ void output_html_results(bool show_description, const std::string& tagname) } } #endif +#if defined(BOOST_HAS_PCRE_JIT) + if(time_pcre_jit == true) + { + print_result(os, first->pcre_jit_time, first->factor); + if(first->pcre_jit_time > 0) + { + pcre_jit_total += first->pcre_jit_time / first->factor; + ++pcre_jit_test_count; + } + } +#endif #if defined(BOOST_HAS_XPRESSIVE) if(time_xpressive == true) { @@ -373,6 +418,17 @@ void output_html_results(bool show_description, const std::string& tagname) } } #endif +#if defined(BOOST_HAS_RE2) + if(time_re2 == true) + { + print_result(os, first->re2_time, first->factor); + if(first->re2_time > 0) + { + re2_total += first->re2_time / first->factor; + ++re2_test_count; + } + } +#endif #ifndef BOOST_NO_CXX11_HDR_REGEX if(time_std == true) { @@ -450,12 +506,24 @@ std::string get_averages_table() os << "PCRE"; } #endif +#ifdef BOOST_HAS_PCRE_JIT + if(time_pcre_jit == true) + { + os << "PCRE JIT"; + } +#endif #ifdef BOOST_HAS_XPRESSIVE if(time_xpressive == true) { os << "Dynamic Xpressive"; } #endif +#ifdef BOOST_HAS_RE2 + if(time_re2 == true) + { + os << "google RE2"; + } +#endif #ifndef BOOST_NO_CXX11_HDR_REGEX if(time_std == true) { @@ -473,25 +541,31 @@ std::string get_averages_table() os << "" << (greta_total / greta_test_count) << "\n"; if(time_safe_greta == true) os << "" << (safe_greta_total / safe_greta_test_count) << "\n"; -#endif -#if defined(BOOST_HAS_POSIX) - if(time_boost == true) - os << "" << (boost_total / boost_test_count) << "\n"; #endif if(time_boost == true) os << "" << (boost_total / boost_test_count) << "\n"; if(time_localised_boost == true) os << "" << (locale_boost_total / locale_boost_test_count) << "\n"; +#if defined(BOOST_HAS_POSIX) if(time_posix == true) os << "" << (posix_total / posix_test_count) << "\n"; +#endif #if defined(BOOST_HAS_PCRE) if(time_pcre == true) os << "" << (pcre_total / pcre_test_count) << "\n"; #endif +#if defined(BOOST_HAS_PCRE_JIT) + if(time_pcre_jit == true) + os << "" << (pcre_jit_total / pcre_jit_test_count) << "\n"; +#endif #if defined(BOOST_HAS_XPRESSIVE) if(time_xpressive == true) os << "" << (xpressive_total / xpressive_test_count) << "\n"; #endif +#if defined(BOOST_HAS_RE2) + if(time_re2 == true) + os << "" << (re2_total / re2_test_count) << "\n"; +#endif #ifndef BOOST_NO_CXX11_HDR_REGEX if(time_std == true) os << "" << (std_total / std_test_count) << "\n"; diff --git a/performance/input.html b/performance/input.html index 425dedebc..706229392 100644 --- a/performance/input.html +++ b/performance/input.html @@ -60,7 +60,7 @@

Comparison 4: HTML Document Search

occurrences of the expression within the html file libs/libraries.htm was measured. 

%html_search%

-

Comparison 3: Simple Matches

+

Comparison 5: Simple Matches

For each of the following regular expressions the time taken to match against the text indicated was measured. 

diff --git a/performance/main.cpp b/performance/main.cpp index b7ba8a526..0bdd52e17 100644 --- a/performance/main.cpp +++ b/performance/main.cpp @@ -66,6 +66,14 @@ void test_match(const std::string& re, const std::string& text, const std::strin std::cout << "\tPCRE regex: " << time << "s\n"; } #endif +#ifdef BOOST_HAS_PCRE_JIT + if(time_pcre_jit == true) + { + time = pcrj::time_match(re, text, icase); + r.pcre_jit_time = time; + std::cout << "\tPCRE JIT regex: " << time << "s\n"; + } +#endif #ifdef BOOST_HAS_XPRESSIVE if(time_xpressive == true) { @@ -74,6 +82,14 @@ void test_match(const std::string& re, const std::string& text, const std::strin std::cout << "\txpressive regex: " << time << "s\n"; } #endif +#ifdef BOOST_HAS_RE2 + if(time_re2 == true) + { + time = gre2::time_match(re, text, icase); + r.re2_time = time; + std::cout << "\tRE2 regex: " << time << "s\n"; + } +#endif #ifndef BOOST_NO_CXX11_HDR_REGEX if(time_std == true) { @@ -135,6 +151,14 @@ void test_find_all(const std::string& re, const std::string& text, const std::st std::cout << "\tPCRE regex: " << time << "s\n"; } #endif +#ifdef BOOST_HAS_PCRE_JIT + if(time_pcre_jit == true) + { + time = pcrj::time_find_all(re, text, icase); + r.pcre_jit_time = time; + std::cout << "\tPCRE JIT regex: " << time << "s\n"; + } +#endif #ifdef BOOST_HAS_XPRESSIVE if(time_xpressive == true) { @@ -143,6 +167,14 @@ void test_find_all(const std::string& re, const std::string& text, const std::st std::cout << "\txpressive regex: " << time << "s\n"; } #endif +#ifdef BOOST_HAS_RE2 + if(time_re2 == true) + { + time = gre2::time_find_all(re, text, icase); + r.re2_time = time; + std::cout << "\tRE2 regex: " << time << "s\n"; + } +#endif #ifndef BOOST_NO_CXX11_HDR_REGEX if(time_std == true) { @@ -226,10 +258,13 @@ int cpp_main(int argc, char * argv[]) const char* boost_include_expression = "^[ \t]*#[ \t]*include[ \t]+(\"boost/[^\"]+\"|]+>)"; + bool time_posix_orig = time_posix; + time_posix = false; test_find_all(class_expression, file_contents); test_find_all(highlight_expression, file_contents); test_find_all(include_expression, file_contents); test_find_all(boost_include_expression, file_contents); + time_posix = time_posix_orig; } output_html_results(false, "%code_search%"); @@ -237,11 +272,15 @@ int cpp_main(int argc, char * argv[]) { load_file(file_contents, "../../../libs/libraries.htm"); test_find_all("beman|john|dave", file_contents, true); - test_find_all("

.*?

", file_contents, true); test_find_all("]+href=(\"[^\"]*\"|[^[:space:]]+)[^>]*>", file_contents, true); - test_find_all("]*>.*?", file_contents, true); test_find_all("]+src=(\"[^\"]*\"|[^[:space:]]+)[^>]*>", file_contents, true); + bool time_posix_orig = time_posix; + time_posix = false; + // POSIX-Extended unspport Non greedy repeats + test_find_all("

.*?

", file_contents, true); + test_find_all("]*>.*?", file_contents, true); test_find_all("]+face=(\"[^\"]*\"|[^[:space:]]+)[^>]*>.*?", file_contents, true); + time_posix = time_posix_orig; } output_html_results(false, "%html_search%"); @@ -252,7 +291,10 @@ int cpp_main(int argc, char * argv[]) test_find_all("Twain", file_contents); test_find_all("Huck[[:alpha:]]+", file_contents); test_find_all("[[:alpha:]]+ing", file_contents); + bool time_posix_orig = time_posix; + time_posix = false; test_find_all("^[^\n]*?Twain", file_contents); + time_posix = time_posix_orig; test_find_all("Tom|Sawyer|Huckleberry|Finn", file_contents); test_find_all("(Tom|Sawyer|Huckleberry|Finn).{0,30}river|river.{0,30}(Tom|Sawyer|Huckleberry|Finn)", file_contents); } @@ -260,16 +302,17 @@ int cpp_main(int argc, char * argv[]) if(test_long_twain) { - load_file(file_contents, "mtent13.txt"); + load_file(file_contents, "mtent12.txt"); test_find_all("Twain", file_contents); test_find_all("Huck[[:alpha:]]+", file_contents); test_find_all("[[:alpha:]]+ing", file_contents); - test_find_all("^[^\n]*?Twain", file_contents); - test_find_all("Tom|Sawyer|Huckleberry|Finn", file_contents); + bool time_posix_orig = time_posix; time_posix = false; + test_find_all("^[^\n]*?Twain", file_contents); // POSIX-Extended: the escape character is not "special" inside a character class declaration + time_posix = time_posix_orig; + test_find_all("Tom|Sawyer|Huckleberry|Finn", file_contents); test_find_all("(Tom|Sawyer|Huckleberry|Finn).{0,30}river|river.{0,30}(Tom|Sawyer|Huckleberry|Finn)", file_contents); - time_posix = true; } output_html_results(false, "%long_twain_search%"); diff --git a/performance/regex_comparison.hpp b/performance/regex_comparison.hpp index 4ed968fd8..db819cf95 100644 --- a/performance/regex_comparison.hpp +++ b/performance/regex_comparison.hpp @@ -26,7 +26,9 @@ extern bool time_greta; extern bool time_safe_greta; extern bool time_posix; extern bool time_pcre; +extern bool time_pcre_jit; extern bool time_xpressive; +extern bool time_re2; extern bool time_std; extern bool test_matches; @@ -55,7 +57,9 @@ struct results double safe_greta_time; double posix_time; double pcre_time; + double pcre_jit_time; double xpressive_time; + double re2_time; double std_time; double factor; std::string expression; @@ -67,7 +71,9 @@ struct results safe_greta_time(-1), posix_time(-1), pcre_time(-1), + pcre_jit_time(-1), xpressive_time(-1), + re2_time(-1), std_time(-1), factor((std::numeric_limits::max)()), expression(ex), @@ -87,8 +93,12 @@ struct results factor = posix_time; if((pcre_time >= 0) && (pcre_time < factor)) factor = pcre_time; + if((pcre_jit_time >= 0) && (pcre_jit_time < factor)) + factor = pcre_jit_time; if((xpressive_time >= 0) && (xpressive_time < factor)) factor = xpressive_time; + if((re2_time >= 0) && (re2_time < factor)) + factor = re2_time; if((std_time >= 0) && (std_time < factor)) factor = std_time; } @@ -114,6 +124,12 @@ namespace pcr { double time_match(const std::string& re, const std::string& text, bool icase); double time_find_all(const std::string& re, const std::string& text, bool icase); +} +namespace pcrj { +// pcre jit tests: +double time_match(const std::string& re, const std::string& text, bool icase); +double time_find_all(const std::string& re, const std::string& text, bool icase); + } namespace g { // greta tests: @@ -138,8 +154,13 @@ namespace dxpr { double time_match(const std::string& re, const std::string& text, bool icase); double time_find_all(const std::string& re, const std::string& text, bool icase); } +namespace gre2 { +// re2 tests: +double time_match(const std::string& re, const std::string& text, bool icase); +double time_find_all(const std::string& re, const std::string& text, bool icase); +} namespace stdr { -// xpressive tests: +// C11 tests: double time_match(const std::string& re, const std::string& text, bool icase); double time_find_all(const std::string& re, const std::string& text, bool icase); }