55from data_diff .diff_tables import DiffResultWrapper
66
77
8- def jsonify_error (table1 : List [str ], table2 : List [str ], dbt_model : str , error : str ) -> ' FailedDiff' :
8+ def jsonify_error (table1 : List [str ], table2 : List [str ], dbt_model : str , error : str ) -> " FailedDiff" :
99 return FailedDiff (
1010 status = "failed" ,
1111 model = dbt_model ,
@@ -15,10 +15,12 @@ def jsonify_error(table1: List[str], table2: List[str], dbt_model: str, error: s
1515 ).json ()
1616
1717
18- def jsonify (diff : DiffResultWrapper ,
19- dbt_model : str ,
20- with_summary : bool = False ,
21- with_columns : Optional [Dict [str , List [str ]]] = None ) -> 'JsonDiff' :
18+ def jsonify (
19+ diff : DiffResultWrapper ,
20+ dbt_model : str ,
21+ with_summary : bool = False ,
22+ with_columns : Optional [Dict [str , List [str ]]] = None ,
23+ ) -> "JsonDiff" :
2224 """
2325 Converts the diff result into a JSON-serializable format.
2426 Optionally add stats summary and schema diff.
@@ -35,7 +37,6 @@ def jsonify(diff: DiffResultWrapper,
3537
3638 t1_exclusive_rows , t2_exclusive_rows , diff_rows = _group_rows (diff_info , schema )
3739
38-
3940 diff_rows_jsonified = []
4041 for row in diff_rows :
4142 diff_rows_jsonified .append (_jsonify_diff (row , key_columns ))
@@ -47,11 +48,11 @@ def jsonify(diff: DiffResultWrapper,
4748 t2_exclusive_rows_jsonified = []
4849 for row in t2_exclusive_rows :
4950 t2_exclusive_rows_jsonified .append (_jsonify_exclusive (row , key_columns ))
50-
51+
5152 summary = None
5253 if with_summary :
5354 summary = _jsonify_diff_summary (diff .get_stats_dict ())
54-
55+
5556 columns = None
5657 if with_columns :
5758 columns = _jsonify_columns_diff (with_columns , list (key_columns ))
@@ -60,11 +61,8 @@ def jsonify(diff: DiffResultWrapper,
6061 t1_exclusive_rows
6162 or t2_exclusive_rows
6263 or diff_rows
63- or with_columns and (
64- with_columns ['added' ]
65- or with_columns ['removed' ]
66- or with_columns ['changed' ]
67- )
64+ or with_columns
65+ and (with_columns ["added" ] or with_columns ["removed" ] or with_columns ["changed" ])
6866 )
6967 return JsonDiff (
7068 status = "success" ,
@@ -73,23 +71,20 @@ def jsonify(diff: DiffResultWrapper,
7371 dataset1 = list (table1 .table_path ),
7472 dataset2 = list (table2 .table_path ),
7573 rows = RowsDiff (
76- exclusive = ExclusiveDiff (
77- dataset1 = t1_exclusive_rows_jsonified ,
78- dataset2 = t2_exclusive_rows_jsonified
79- ),
74+ exclusive = ExclusiveDiff (dataset1 = t1_exclusive_rows_jsonified , dataset2 = t2_exclusive_rows_jsonified ),
8075 diff = diff_rows_jsonified ,
8176 ),
8277 summary = summary ,
8378 columns = columns ,
8479 ).json ()
8580
8681
87-
8882@dataclass
8983class JsonExclusiveRowValue :
9084 """
9185 Value of a single column in a row
9286 """
87+
9388 isPK : bool
9489 value : Any
9590
@@ -99,6 +94,7 @@ class JsonDiffRowValue:
9994 """
10095 Pair of diffed values for 2 rows with equal PKs
10196 """
97+
10298 dataset1 : Any
10399 dataset2 : Any
104100 isDiff : bool
@@ -163,38 +159,40 @@ class RowsDiff:
163159
164160@dataclass
165161class FailedDiff :
166- status : str # Literal ["failed"]
162+ status : str # Literal ["failed"]
167163 model : str
168164 dataset1 : List [str ]
169165 dataset2 : List [str ]
170166 error : str
171167
172- version : str = '1.0.0'
168+ version : str = "1.0.0"
169+
173170
174171@dataclass
175172class JsonDiff :
176- status : str # Literal ["success"]
177- result : str # Literal ["different", "identical"]
173+ status : str # Literal ["success"]
174+ result : str # Literal ["different", "identical"]
178175 model : str
179176 dataset1 : List [str ]
180177 dataset2 : List [str ]
181178 rows : RowsDiff
182179 summary : Optional [JsonDiffSummary ]
183180 columns : Optional [JsonColumnsSummary ]
184181
185- version : str = ' 1.0.0'
182+ version : str = " 1.0.0"
186183
187184
188- def _group_rows (diff_info : DiffResultWrapper ,
189- schema : List [str ]) -> Tuple [List [Dict [str , Any ]], List [Dict [str , Any ]], List [Dict [str , Any ]]]:
185+ def _group_rows (
186+ diff_info : DiffResultWrapper , schema : List [str ]
187+ ) -> Tuple [List [Dict [str , Any ]], List [Dict [str , Any ]], List [Dict [str , Any ]]]:
190188 t1_exclusive_rows = []
191189 t2_exclusive_rows = []
192190 diff_rows = []
193191
194192 for row in diff_info .diff :
195193 row_w_schema = dict (zip (schema , row ))
196- is_t1_exclusive = row_w_schema [' is_exclusive_a' ]
197- is_t2_exclusive = row_w_schema [' is_exclusive_b' ]
194+ is_t1_exclusive = row_w_schema [" is_exclusive_a" ]
195+ is_t2_exclusive = row_w_schema [" is_exclusive_b" ]
198196
199197 if is_t1_exclusive :
200198 t1_exclusive_rows .append (row_w_schema )
@@ -204,83 +202,72 @@ def _group_rows(diff_info: DiffResultWrapper,
204202
205203 else :
206204 diff_rows .append (row_w_schema )
207-
205+
208206 return t1_exclusive_rows , t2_exclusive_rows , diff_rows
209207
210208
211209def _jsonify_diff (row : Dict [str , Any ], key_columns : List [str ]) -> Dict [str , JsonDiffRowValue ]:
212210 columns = collections .defaultdict (dict )
213211 for field , value in row .items ():
214- if field in (' is_exclusive_a' , ' is_exclusive_b' ):
212+ if field in (" is_exclusive_a" , " is_exclusive_b" ):
215213 continue
216214
217- if field .startswith (' is_diff_' ):
218- column_name = field .replace (' is_diff_' , '' )
219- columns [column_name ][' isDiff' ] = bool (value )
215+ if field .startswith (" is_diff_" ):
216+ column_name = field .replace (" is_diff_" , "" )
217+ columns [column_name ][" isDiff" ] = bool (value )
220218
221- elif field .endswith ('_a' ):
222- column_name = field .replace ('_a' , '' )
223- columns [column_name ][' dataset1' ] = value
224- columns [column_name ][' isPK' ] = column_name in key_columns
219+ elif field .endswith ("_a" ):
220+ column_name = field .replace ("_a" , "" )
221+ columns [column_name ][" dataset1" ] = value
222+ columns [column_name ][" isPK" ] = column_name in key_columns
225223
226- elif field .endswith ('_b' ):
227- column_name = field .replace ('_b' , '' )
228- columns [column_name ]['dataset2' ] = value
229- columns [column_name ]['isPK' ] = column_name in key_columns
230-
231- return {
232- column : JsonDiffRowValue (** data )
233- for column , data in columns .items ()
234- }
224+ elif field .endswith ("_b" ):
225+ column_name = field .replace ("_b" , "" )
226+ columns [column_name ]["dataset2" ] = value
227+ columns [column_name ]["isPK" ] = column_name in key_columns
228+
229+ return {column : JsonDiffRowValue (** data ) for column , data in columns .items ()}
235230
236231
237232def _jsonify_exclusive (row : Dict [str , Any ], key_columns : List [str ]) -> Dict [str , JsonExclusiveRowValue ]:
238233 columns = collections .defaultdict (dict )
239234 for field , value in row .items ():
240- if field in (' is_exclusive_a' , ' is_exclusive_b' ):
235+ if field in (" is_exclusive_a" , " is_exclusive_b" ):
241236 continue
242- if field .startswith (' is_diff_' ):
237+ if field .startswith (" is_diff_" ):
243238 continue
244- if field .endswith ('_b' ) and row ['is_exclusive_b' ]:
245- column_name = field .replace ('_b' , '' )
246- columns [column_name ]['isPK' ] = column_name in key_columns
247- columns [column_name ]['value' ] = value
248- elif field .endswith ('_a' ) and row ['is_exclusive_a' ]:
249- column_name = field .replace ('_a' , '' )
250- columns [column_name ]['isPK' ] = column_name in key_columns
251- columns [column_name ]['value' ] = value
252- return {
253- column : JsonExclusiveRowValue (** data )
254- for column , data in columns .items ()
255- }
239+ if field .endswith ("_b" ) and row ["is_exclusive_b" ]:
240+ column_name = field .replace ("_b" , "" )
241+ columns [column_name ]["isPK" ] = column_name in key_columns
242+ columns [column_name ]["value" ] = value
243+ elif field .endswith ("_a" ) and row ["is_exclusive_a" ]:
244+ column_name = field .replace ("_a" , "" )
245+ columns [column_name ]["isPK" ] = column_name in key_columns
246+ columns [column_name ]["value" ] = value
247+ return {column : JsonExclusiveRowValue (** data ) for column , data in columns .items ()}
256248
257249
258250def _jsonify_diff_summary (stats_dict : dict ) -> JsonDiffSummary :
259251 return JsonDiffSummary (
260252 rows = Rows (
261- total = Total (
262- dataset1 = stats_dict ["rows_A" ],
263- dataset2 = stats_dict ["rows_B" ]
264- ),
253+ total = Total (dataset1 = stats_dict ["rows_A" ], dataset2 = stats_dict ["rows_B" ]),
265254 exclusive = ExclusiveRows (
266255 dataset1 = stats_dict ["exclusive_A" ],
267256 dataset2 = stats_dict ["exclusive_B" ],
268257 ),
269258 updated = stats_dict ["updated" ],
270- unchanged = stats_dict ["unchanged" ]
259+ unchanged = stats_dict ["unchanged" ],
271260 ),
272- stats = Stats (
273- diffCounts = stats_dict ["stats" ]['diff_counts' ]
274- )
261+ stats = Stats (diffCounts = stats_dict ["stats" ]["diff_counts" ]),
275262 )
276263
277264
278265def _jsonify_columns_diff (columns_diff : Dict [str , List [str ]], key_columns : List [str ]) -> JsonColumnsSummary :
279266 return JsonColumnsSummary (
280267 primaryKey = key_columns ,
281- exclusive = ExclusiveColumns (
282- dataset2 = list (columns_diff .get (' added' , [])),
283- dataset1 = list (columns_diff .get (' removed' , [])),
268+ exclusive = ExclusiveColumns (
269+ dataset2 = list (columns_diff .get (" added" , [])),
270+ dataset1 = list (columns_diff .get (" removed" , [])),
284271 ),
285- typeChanged = list (columns_diff .get (' changed' , [])),
286- )
272+ typeChanged = list (columns_diff .get (" changed" , [])),
273+ )
0 commit comments