1313import sys
1414import numpy
1515import threading
16+ import re
1617
1718is_cpu_amd_intel = False # DEPRECATION WARNING: WILL BE REMOVED IN FUTURE RELEASE
1819from numexpr import interpreter , expressions , use_vml
@@ -259,10 +260,17 @@ def __init__(self, astnode):
259260 def __str__ (self ):
260261 return 'Immediate(%d)' % (self .node .value ,)
261262
262-
263+ _forbidden_re = re . compile ( '[\;[\:]|__' )
263264def stringToExpression (s , types , context ):
264265 """Given a string, convert it to a tree of ExpressionNode's.
265266 """
267+ # sanitize the string for obvious attack vectors that NumExpr cannot
268+ # parse into its homebrew AST. This is to protect the call to `eval` below.
269+ # We forbid `;`, `:`. `[` and `__`
270+ # We would like to forbid `.` but it is both a reference and decimal point.
271+ if _forbidden_re .search (s ) is not None :
272+ raise ValueError (f'Expression { s } has forbidden control characters.' )
273+
266274 old_ctx = expressions ._context .get_current_context ()
267275 try :
268276 expressions ._context .set_new_context (context )
@@ -285,8 +293,10 @@ def stringToExpression(s, types, context):
285293 t = types .get (name , default_type )
286294 names [name ] = expressions .VariableNode (name , type_to_kind [t ])
287295 names .update (expressions .functions )
296+
288297 # now build the expression
289298 ex = eval (c , names )
299+
290300 if expressions .isConstant (ex ):
291301 ex = expressions .ConstantNode (ex , expressions .getKind (ex ))
292302 elif not isinstance (ex , expressions .ExpressionNode ):
@@ -611,9 +621,7 @@ def NumExpr(ex, signature=(), **kwargs):
611621
612622 Returns a `NumExpr` object containing the compiled function.
613623 """
614- # NumExpr can be called either directly by the end-user, in which case
615- # kwargs need to be sanitized by getContext, or by evaluate,
616- # in which case kwargs are in already sanitized.
624+
617625 # In that case _frame_depth is wrong (it should be 2) but it doesn't matter
618626 # since it will not be used (because truediv='auto' has already been
619627 # translated to either True or False).
@@ -758,7 +766,7 @@ def getArguments(names, local_dict=None, global_dict=None, _frame_depth: int=2):
758766_names_cache = CacheDict (256 )
759767_numexpr_cache = CacheDict (256 )
760768_numexpr_last = {}
761-
769+ _numexpr_sanity = set ()
762770evaluate_lock = threading .Lock ()
763771
764772# MAYBE: decorate this function to add attributes instead of having the
@@ -861,7 +869,7 @@ def evaluate(ex: str,
861869 out : numpy .ndarray = None ,
862870 order : str = 'K' ,
863871 casting : str = 'safe' ,
864- _frame_depth : int = 3 ,
872+ _frame_depth : int = 3 ,
865873 ** kwargs ) -> numpy .ndarray :
866874 """
867875 Evaluate a simple array expression element-wise using the virtual machine.
@@ -909,6 +917,8 @@ def evaluate(ex: str,
909917 _frame_depth: int
910918 The calling frame depth. Unless you are a NumExpr developer you should
911919 not set this value.
920+
921+
912922 """
913923 # We could avoid code duplication if we called validate and then re_evaluate
914924 # here, but they we have difficulties with the `sys.getframe(2)` call in
@@ -921,10 +931,6 @@ def evaluate(ex: str,
921931 else :
922932 raise e
923933
924-
925-
926-
927-
928934def re_evaluate (local_dict : Optional [Dict ] = None ,
929935 _frame_depth : int = 2 ) -> numpy .ndarray :
930936 """
0 commit comments