Skip to content

Commit 31bf69b

Browse files
v25.1.2
1 parent 3bcf7b6 commit 31bf69b

File tree

8 files changed

+40
-25
lines changed

8 files changed

+40
-25
lines changed

.gitignore

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,27 +4,32 @@ drop_tables_in_schema.py
44
*.pyc
55
/.idea
66
Vault-Direct-Data-Python-Scripts.iml
7-
/direct-data/*
8-
/resources/*
9-
hr/*
7+
/vaults/*
8+
/direct_data_testing_lr/*
9+
/finance/*
1010

11+
/accelerators/databricks/accelerator_test.py
12+
/accelerators/redshift/accelerator_test.py
13+
/accelerators/snowflake/accelerator_test.py
14+
/accelerators/sql_database/accelerator_test.py
1115
/accelerators/databricks/resources/finance/
1216
/accelerators/databricks/resources/hr/
17+
/accelerators/databricks/resources/direct_data_testing_lr/
18+
/accelerators/databricks/resources/hr_demo/
1319
/accelerators/snowflake/resources/finance/
1420
/accelerators/snowflake/resources/hr/
21+
/accelerators/snowflake/resources/direct_data_testing_lr/
22+
/accelerators/snowflake/resources/hr_demo/
1523
/accelerators/redshift/resources/finance/
1624
/accelerators/redshift/resources/hr/
17-
/direct-data/
18-
/accelerators/redshift/resources/finance
19-
/accelerators/redshift/direct-data
20-
/accelerators/databricks/accelerator_test.py
21-
/accelerators/redshift/accelerator_test.py
22-
/accelerators/snowflake/accelerator_test.py
23-
/hr/
2425
/accelerators/redshift/resources/direct_data_testing_lr/
2526
/accelerators/redshift/resources/hr_demo/
26-
/accelerators/snowflake/resources/direct_data_testing_lr/
27-
/accelerators/snowflake/resources/hr_demo/
28-
/accelerators/databricks/resources/direct_data_testing_lr/
29-
/accelerators/databricks/resources/hr_demo/
30-
/hr_demo/
27+
/accelerators/sql_database/resources/finance/
28+
/accelerators/sql_database/resources/hr/
29+
/accelerators/sql_database/resources/direct_data_testing_lr/
30+
/accelerators/sql_database/resources/hr_demo/
31+
/direct-data/
32+
/hr/
33+
34+
/vaults
35+
csv_to_parquet.py

accelerators/databricks/scripts/extract_doc_content.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33

44
from pandas import Series
55

6+
import pyrfc6266
7+
68
from common.services.aws_s3_service import AwsS3Service
79
from common.services.vault_service import VaultService
810
from common.api.model.response.document_response import DocumentExportResponse
@@ -92,9 +94,10 @@ def run(s3_service: AwsS3Service, vault_service: VaultService, convert_to_parque
9294
# If the individual document export was successful, download it and put on S3.
9395
if exported_document.responseStatus == "SUCCESS":
9496
file_staging_response: VaultResponse = vault_service.download_item_from_file_staging(exported_document=exported_document)
97+
filename: str = pyrfc6266.parse_filename(file_staging_response.headers.get("Content-Disposition"))
9598
log_message(log_level='Debug',
9699
message=f'File Staging results: {file_staging_response.responseMessage}')
97-
s3_service.put_object(key=f'{direct_data_folder}/{exported_document.id}_{exported_document.major_version_number__v}_{exported_document.minor_version_number__v}',
100+
s3_service.put_object(key=f'{direct_data_folder}/{exported_document.id}/{exported_document.major_version_number__v}_{exported_document.minor_version_number__v}/{filename}',
98101
body=file_staging_response.binary_content)
99102
is_vault_job_finished = True
100103
else:

accelerators/databricks/scripts/load_data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ def handle_metadata_changes(s3_service: AwsS3Service,
141141
def run(s3_service: AwsS3Service, databricks_service: DatabricksService, direct_data_params: dict,
142142
convert_to_parquet: bool):
143143
log_message(log_level='Info',
144-
message=f'---Executing load_data_into_databricks.py---')
144+
message=f'---Executing load_data.py---')
145145
try:
146146
databricks_service.db_connection.open()
147147
starting_directory = f"{s3_service.direct_data_folder}/{s3_service.extract_folder}"

accelerators/redshift/scripts/extract_doc_content.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33

44
from pandas import Series
55

6+
import pyrfc6266
7+
68
from common.services.aws_s3_service import AwsS3Service
79
from common.services.vault_service import VaultService
810
from common.api.model.response.document_response import DocumentExportResponse
@@ -93,9 +95,10 @@ def run(s3_service: AwsS3Service, vault_service: VaultService, convert_to_parque
9395
# If the individual document export was successful, download it and put on S3.
9496
if exported_document.responseStatus == "SUCCESS":
9597
file_staging_response: VaultResponse = vault_service.download_item_from_file_staging(exported_document=exported_document)
98+
filename: str = pyrfc6266.parse_filename(file_staging_response.headers.get("Content-Disposition"))
9699
log_message(log_level='Debug',
97100
message=f'File Staging results: {file_staging_response.responseMessage}')
98-
s3_service.put_object(key=f'{direct_data_folder}/{exported_document.id}_{exported_document.major_version_number__v}_{exported_document.minor_version_number__v}',
101+
s3_service.put_object(key=f'{direct_data_folder}/{exported_document.id}/{exported_document.major_version_number__v}_{exported_document.minor_version_number__v}/{filename}',
99102
body=file_staging_response.binary_content)
100103
is_vault_job_finished = True
101104
else:

accelerators/redshift/scripts/load_data.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -102,9 +102,9 @@ def handle_metadata_deletes(s3_service: AwsS3Service,
102102
redshift_service.db_connection.execute_query(drop_table_command)
103103
else:
104104
if columns:
105-
alter_command = f"""ALTER TABLE {redshift_service.schema}.{table_name}
106-
{", ".join(f'DROP COLUMN "{col}"' for col in columns)}"""
107-
redshift_service.db_connection.execute_query(alter_command)
105+
for col in columns:
106+
alter_command = f"""ALTER TABLE {redshift_service.schema}.{table_name} DROP COLUMN "{col}";"""
107+
redshift_service.db_connection.execute_query(alter_command)
108108

109109

110110
def handle_metadata_changes(s3_service: AwsS3Service,
@@ -217,7 +217,7 @@ def load_data_into_tables(redshift_service: RedshiftService,
217217

218218
def run(s3_service: AwsS3Service, redshift_service: RedshiftService, direct_data_params: dict):
219219
log_message(log_level='Info',
220-
message=f'---Executing load_data_into_snowflake.py---')
220+
message=f'---Executing load_data.py---')
221221
try:
222222
starting_directory = f"{s3_service.direct_data_folder}/{s3_service.extract_folder}"
223223
extract_type = direct_data_params['extract_type']

accelerators/snowflake/scripts/extract_doc_content.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33

44
from pandas import Series
55

6+
import pyrfc6266
7+
68
from common.services.aws_s3_service import AwsS3Service
79
from common.services.vault_service import VaultService
810
from common.api.model.response.document_response import DocumentExportResponse
@@ -91,9 +93,10 @@ def run(s3_service: AwsS3Service, vault_service: VaultService, convert_to_parque
9193
# If the individual document export was successful, download it and put on S3.
9294
if exported_document.responseStatus == "SUCCESS":
9395
file_staging_response: VaultResponse = vault_service.download_item_from_file_staging(exported_document=exported_document)
96+
filename: str = pyrfc6266.parse_filename(file_staging_response.headers.get("Content-Disposition"))
9497
log_message(log_level='Debug',
9598
message=f'File Staging results: {file_staging_response.responseMessage}')
96-
s3_service.put_object(key=f'{direct_data_folder}/{exported_document.id}_{exported_document.major_version_number__v}_{exported_document.minor_version_number__v}',
99+
s3_service.put_object(key=f'{direct_data_folder}/{exported_document.id}/{exported_document.major_version_number__v}_{exported_document.minor_version_number__v}/{filename}',
97100
body=file_staging_response.binary_content)
98101
is_vault_job_finished = True
99102
else:

accelerators/snowflake/scripts/load_data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ def handle_metadata_changes(s3_service: AwsS3Service,
142142
def run(s3_service: AwsS3Service, snowflake_service: SnowflakeService, direct_data_params: dict,
143143
convert_to_parquet: bool):
144144
log_message(log_level='Info',
145-
message=f'---Executing load_data_into_snowflake.py---')
145+
message=f'---Executing load_data.py---')
146146
try:
147147
starting_directory = f"{s3_service.direct_data_folder}/{s3_service.extract_folder}"
148148
extract_type = direct_data_params['extract_type']

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,5 @@ fastparquet
2020
snowflake-connector-python
2121
pyodbc~=5.2.0
2222
cryptography~=44.0.0
23-
PyJWT~=2.10.1
23+
PyJWT~=2.10.1
24+
pyrfc6266~=1.0.2

0 commit comments

Comments
 (0)