22import avro .schema
33import sys
44import urlparse
5- from typing import Any
5+ from typing import Any , Union
66
77class ValidationException (Exception ):
88 pass
99
10+ class ClassValidationException (ValidationException ):
11+ pass
12+
1013def validate (expected_schema , datum , identifiers = set (), strict = False , foreign_properties = set ()):
1114 # type: (avro.schema.Schema, Any, Set[unicode], bool, Set[unicode]) -> bool
12- try :
13- return validate_ex (expected_schema , datum , identifiers , strict = strict , foreign_properties = foreign_properties )
14- except ValidationException :
15- return False
15+ return validate_ex (expected_schema , datum , identifiers , strict = strict , foreign_properties = foreign_properties , raise_ex = False )
1616
1717INT_MIN_VALUE = - (1 << 31 )
1818INT_MAX_VALUE = (1 << 31 ) - 1
1919LONG_MIN_VALUE = - (1 << 63 )
2020LONG_MAX_VALUE = (1 << 63 ) - 1
2121
22- def indent (v , nolead = False ): # type: (str, bool) -> str
22+ def indent (v , nolead = False ): # type: (Union[ str, unicode], bool) -> unicode
2323 if nolead :
24- return v .splitlines ()[0 ] + "\n " .join ([" " + l for l in v .splitlines ()[1 :]])
24+ return v .splitlines ()[0 ] + u "\n " .join ([u " " + l for l in v .splitlines ()[1 :]])
2525 else :
26- return "\n " .join ([" " + l for l in v .splitlines ()])
26+ return u "\n " .join ([" " + l for l in v .splitlines ()])
2727
2828def friendly (v ): # type: (Any) -> Any
2929 if isinstance (v , avro .schema .NamedSchema ):
@@ -37,11 +37,11 @@ def friendly(v): # type: (Any) -> Any
3737 else :
3838 return v
3939
40- def multi (v , q = "" ): # type: (str, str) -> str
40+ def multi (v , q = "" ): # type: (Union[ str, unicode], Union[ str, unicode] ) -> unicode
4141 if '\n ' in v :
42- return "%s%s%s\n " % (q , v , q )
42+ return u "%s%s%s\n " % (q , v , q )
4343 else :
44- return "%s%s%s" % (q , v , q )
44+ return u "%s%s%s" % (q , v , q )
4545
4646def vpformat (datum ): # type: (Any) -> str
4747 a = pprint .pformat (datum )
@@ -50,8 +50,8 @@ def vpformat(datum): # type: (Any) -> str
5050 return a
5151
5252def validate_ex (expected_schema , datum , identifiers = None , strict = False ,
53- foreign_properties = None ):
54- # type: (avro.schema.Schema, Any, Set[unicode], bool, Set[unicode]) -> bool
53+ foreign_properties = None , raise_ex = True ):
54+ # type: (avro.schema.Schema, Any, Set[unicode], bool, Set[unicode], bool ) -> bool
5555 """Determine if a python datum is an instance of a schema."""
5656
5757 if not identifiers :
@@ -66,93 +66,154 @@ def validate_ex(expected_schema, datum, identifiers=None, strict=False,
6666 if datum is None :
6767 return True
6868 else :
69- raise ValidationException ("the value `%s` is not null" % vpformat (datum ))
69+ if raise_ex :
70+ raise ValidationException (u"the value `%s` is not null" % vpformat (datum ))
71+ else :
72+ return False
7073 elif schema_type == 'boolean' :
7174 if isinstance (datum , bool ):
7275 return True
7376 else :
74- raise ValidationException ("the value `%s` is not boolean" % vpformat (datum ))
77+ if raise_ex :
78+ raise ValidationException (u"the value `%s` is not boolean" % vpformat (datum ))
79+ else :
80+ return False
7581 elif schema_type == 'string' :
7682 if isinstance (datum , basestring ):
7783 return True
7884 elif isinstance (datum , bytes ):
79- datum = datum .decode ("utf-8" )
85+ datum = datum .decode (u "utf-8" )
8086 return True
8187 else :
82- raise ValidationException ("the value `%s` is not string" % vpformat (datum ))
88+ if raise_ex :
89+ raise ValidationException (u"the value `%s` is not string" % vpformat (datum ))
90+ else :
91+ return False
8392 elif schema_type == 'bytes' :
8493 if isinstance (datum , str ):
8594 return True
8695 else :
87- raise ValidationException ("the value `%s` is not bytes" % vpformat (datum ))
96+ if raise_ex :
97+ raise ValidationException (u"the value `%s` is not bytes" % vpformat (datum ))
98+ else :
99+ return False
88100 elif schema_type == 'int' :
89101 if ((isinstance (datum , int ) or isinstance (datum , long ))
90102 and INT_MIN_VALUE <= datum <= INT_MAX_VALUE ):
91103 return True
92104 else :
93- raise ValidationException ("`%s` is not int" % vpformat (datum ))
105+ if raise_ex :
106+ raise ValidationException (u"`%s` is not int" % vpformat (datum ))
107+ else :
108+ return False
94109 elif schema_type == 'long' :
95110 if ((isinstance (datum , int ) or isinstance (datum , long ))
96111 and LONG_MIN_VALUE <= datum <= LONG_MAX_VALUE ):
97112 return True
98113 else :
99- raise ValidationException ("the value `%s` is not long" % vpformat (datum ))
114+ if raise_ex :
115+ raise ValidationException (u"the value `%s` is not long" % vpformat (datum ))
116+ else :
117+ return False
100118 elif schema_type in ['float' , 'double' ]:
101119 if (isinstance (datum , int ) or isinstance (datum , long )
102120 or isinstance (datum , float )):
103121 return True
104122 else :
105- raise ValidationException ("the value `%s` is not float or double" % vpformat (datum ))
123+ if raise_ex :
124+ raise ValidationException (u"the value `%s` is not float or double" % vpformat (datum ))
125+ else :
126+ return False
106127 elif isinstance (expected_schema , avro .schema .FixedSchema ):
107128 if isinstance (datum , str ) and len (datum ) == expected_schema .size :
108129 return True
109130 else :
110- raise ValidationException ("the value `%s` is not fixed" % vpformat (datum ))
131+ if raise_ex :
132+ raise ValidationException (u"the value `%s` is not fixed" % vpformat (datum ))
133+ else :
134+ return False
111135 elif isinstance (expected_schema , avro .schema .EnumSchema ):
112136 if expected_schema .name == "Any" :
113137 if datum is not None :
114138 return True
115139 else :
116- raise ValidationException ("Any type must be non-null" )
140+ if raise_ex :
141+ raise ValidationException (u"'Any' type must be non-null" )
142+ else :
143+ return False
117144 if datum in expected_schema .symbols :
118145 return True
119146 else :
120- raise ValidationException ("the value `%s`\n is not a valid symbol in enum %s, expected one of %s" % (vpformat (datum ), expected_schema .name , "'" + "', '" .join (expected_schema .symbols ) + "'" ))
147+ if raise_ex :
148+ raise ValidationException (u"the value `%s`\n is not a valid symbol in enum %s, expected one of %s" % (vpformat (datum ), expected_schema .name , "'" + "', '" .join (expected_schema .symbols ) + "'" ))
149+ else :
150+ return False
121151 elif isinstance (expected_schema , avro .schema .ArraySchema ):
122152 if isinstance (datum , list ):
123153 for i , d in enumerate (datum ):
124154 try :
125- validate_ex (expected_schema .items , d , identifiers , strict = strict , foreign_properties = foreign_properties )
155+ if not validate_ex (expected_schema .items , d , identifiers , strict = strict , foreign_properties = foreign_properties , raise_ex = raise_ex ):
156+ return False
126157 except ValidationException as v :
127- raise ValidationException ("At position %i\n %s" % (i , indent (str (v ))))
128- return True
129- else :
130- raise ValidationException ("the value `%s` is not a list, expected list of %s" % (vpformat (datum ), friendly (expected_schema .items )))
131- elif isinstance (expected_schema , avro .schema .MapSchema ):
132- if (isinstance (datum , dict ) and
133- False not in [isinstance (k , basestring ) for k in datum .keys ()] and
134- False not in [validate (expected_schema .values , v , strict = strict ) for v in datum .values ()]):
158+ if raise_ex :
159+ raise ValidationException (u"At position %i\n %s" % (i , indent (str (v ))))
160+ else :
161+ return False
135162 return True
136163 else :
137- raise ValidationException ("`%s` is not a valid map value, expected\n %s" % (vpformat (datum ), vpformat (expected_schema .values )))
164+ if raise_ex :
165+ raise ValidationException (u"the value `%s` is not a list, expected list of %s" % (vpformat (datum ), friendly (expected_schema .items )))
166+ else :
167+ return False
138168 elif isinstance (expected_schema , avro .schema .UnionSchema ):
139- if True in [validate (s , datum , identifiers , strict = strict ) for s in expected_schema .schemas ]:
140- return True
141- else :
142- errors = []
143- for s in expected_schema .schemas :
144- try :
145- validate_ex (s , datum , identifiers , strict = strict , foreign_properties = foreign_properties )
146- except ValidationException as e :
147- errors .append (str (e ))
148- raise ValidationException ("the value %s is not a valid type in the union, expected one of:\n %s" % (multi (vpformat (datum ), '`' ), "\n " .join (["- %s, but\n %s" % (friendly (expected_schema .schemas [i ]), indent (multi (errors [i ]))) for i in range (0 , len (expected_schema .schemas ))])))
169+ for s in expected_schema .schemas :
170+ if validate_ex (s , datum , identifiers , strict = strict , raise_ex = False ):
171+ return True
172+
173+ if not raise_ex :
174+ return False
175+
176+ errors = [] # type: List[unicode]
177+ for s in expected_schema .schemas :
178+ try :
179+ validate_ex (s , datum , identifiers , strict = strict , foreign_properties = foreign_properties , raise_ex = True )
180+ except ClassValidationException as e :
181+ raise
182+ except ValidationException as e :
183+ errors .append (unicode (e ))
184+
185+ raise ValidationException (u"the value %s is not a valid type in the union, expected one of:\n %s" % (
186+ multi (vpformat (datum ), '`' ), u"\n " .join ([
187+ u"- %s, but\n %s" % (
188+ friendly (expected_schema .schemas [i ]), indent (multi (errors [i ])))
189+ for i in range (0 , len (expected_schema .schemas ))])))
149190
150191 elif isinstance (expected_schema , avro .schema .RecordSchema ):
151192 if not isinstance (datum , dict ):
152- raise ValidationException ("`%s`\n is not a dict" % vpformat (datum ))
193+ if raise_ex :
194+ raise ValidationException (u"`%s`\n is not a dict" % vpformat (datum ))
195+ else :
196+ return False
197+
198+ classmatch = None
199+ for f in expected_schema .fields :
200+ if f .name == "class" :
201+ d = datum .get ("class" )
202+ if not d :
203+ if raise_ex :
204+ raise ValidationException (u"Missing 'class' field" )
205+ else :
206+ return False
207+ if not validate_ex (f .type , d , identifiers , strict = strict , foreign_properties = foreign_properties , raise_ex = raise_ex ):
208+ return False
209+ classmatch = d
210+ break
153211
154212 errors = []
155213 for f in expected_schema .fields :
214+ if f .name == "class" :
215+ continue
216+
156217 if f .name in datum :
157218 fieldval = datum [f .name ]
158219 else :
@@ -162,12 +223,14 @@ def validate_ex(expected_schema, datum, identifiers=None, strict=False,
162223 fieldval = None
163224
164225 try :
165- validate_ex (f .type , fieldval , identifiers , strict = strict , foreign_properties = foreign_properties )
226+ if not validate_ex (f .type , fieldval , identifiers , strict = strict , foreign_properties = foreign_properties , raise_ex = raise_ex ):
227+ return False
166228 except ValidationException as v :
167229 if f .name not in datum :
168- errors .append ("missing required field `%s`" % f .name )
230+ errors .append (u "missing required field `%s`" % f .name )
169231 else :
170- errors .append ("could not validate field `%s` because\n %s" % (f .name , multi (indent (str (v )))))
232+ errors .append (u"could not validate field `%s` because\n %s" % (f .name , multi (indent (str (v )))))
233+
171234 if strict :
172235 for d in datum :
173236 found = False
@@ -176,14 +239,25 @@ def validate_ex(expected_schema, datum, identifiers=None, strict=False,
176239 found = True
177240 if not found :
178241 if d not in identifiers and d not in foreign_properties and d [0 ] not in ("@" , "$" ):
242+ if not raise_ex :
243+ return False
179244 split = urlparse .urlsplit (d )
180245 if split .scheme :
181- errors .append ("could not validate extension field `%s` because it is not recognized and strict is True. Did you include a $schemas section?" % (d ))
246+ errors .append (u "could not validate extension field `%s` because it is not recognized and strict is True. Did you include a $schemas section?" % (d ))
182247 else :
183- errors .append ("could not validate field `%s` because it is not recognized and strict is True, valid fields are: %s" % (d , ", " .join (fn .name for fn in expected_schema .fields )))
248+ errors .append (u "could not validate field `%s` because it is not recognized and strict is True, valid fields are: %s" % (d , ", " .join (fn .name for fn in expected_schema .fields )))
184249
185250 if errors :
186- raise ValidationException ("\n " .join (errors ))
251+ if raise_ex :
252+ if classmatch :
253+ raise ClassValidationException (u"%s record %s" % (classmatch , "\n " .join (errors )))
254+ else :
255+ raise ValidationException (u"\n " .join (errors ))
256+ else :
257+ return False
187258 else :
188259 return True
189- raise ValidationException ("Unrecognized schema_type %s" % schema_type )
260+ if raise_ex :
261+ raise ValidationException (u"Unrecognized schema_type %s" % schema_type )
262+ else :
263+ return False
0 commit comments