|
| 1 | +import os |
| 2 | + |
| 3 | +import yaml |
| 4 | + |
| 5 | +from datacontract.model.exceptions import DataContractException |
| 6 | + |
| 7 | + |
| 8 | +def to_athena_soda_configuration(server): |
| 9 | + s3_region = os.getenv("DATACONTRACT_S3_REGION") |
| 10 | + s3_access_key_id = os.getenv("DATACONTRACT_S3_ACCESS_KEY_ID") |
| 11 | + s3_secret_access_key = os.getenv("DATACONTRACT_S3_SECRET_ACCESS_KEY") |
| 12 | + s3_session_token = os.getenv("DATACONTRACT_S3_SESSION_TOKEN") |
| 13 | + |
| 14 | + # Validate required parameters |
| 15 | + if not s3_access_key_id: |
| 16 | + raise DataContractException( |
| 17 | + type="athena-connection", |
| 18 | + name="missing_access_key_id", |
| 19 | + reason="AWS access key ID is required. Set the DATACONTRACT_S3_ACCESS_KEY_ID environment variable.", |
| 20 | + engine="datacontract", |
| 21 | + ) |
| 22 | + |
| 23 | + if not s3_secret_access_key: |
| 24 | + raise DataContractException( |
| 25 | + type="athena-connection", |
| 26 | + name="missing_secret_access_key", |
| 27 | + reason="AWS secret access key is required. Set the DATACONTRACT_S3_SECRET_ACCESS_KEY environment variable.", |
| 28 | + engine="datacontract", |
| 29 | + ) |
| 30 | + |
| 31 | + if not hasattr(server, "schema_") or not server.schema_: |
| 32 | + raise DataContractException( |
| 33 | + type="athena-connection", |
| 34 | + name="missing_schema", |
| 35 | + reason="Schema is required for Athena connection. Specify the schema where your tables exist in the server configuration.", |
| 36 | + engine="datacontract", |
| 37 | + ) |
| 38 | + |
| 39 | + if not hasattr(server, "stagingDir") or not server.stagingDir: |
| 40 | + raise DataContractException( |
| 41 | + type="athena-connection", |
| 42 | + name="missing_s3_staging_dir", |
| 43 | + reason="S3 staging directory is required for Athena connection. This should be the Amazon S3 Query Result Location (e.g., 's3://my-bucket/athena-results/').", |
| 44 | + engine="datacontract", |
| 45 | + ) |
| 46 | + |
| 47 | + # Validate S3 staging directory format |
| 48 | + if not server.stagingDir.startswith("s3://"): |
| 49 | + raise DataContractException( |
| 50 | + type="athena-connection", |
| 51 | + name="invalid_s3_staging_dir", |
| 52 | + reason=f"S3 staging directory must start with 's3://'. Got: {server.s3_staging_dir}. Example: 's3://my-bucket/athena-results/'", |
| 53 | + engine="datacontract", |
| 54 | + ) |
| 55 | + |
| 56 | + data_source = { |
| 57 | + "type": "athena", |
| 58 | + "access_key_id": s3_access_key_id, |
| 59 | + "secret_access_key": s3_secret_access_key, |
| 60 | + "schema": server.schema_, |
| 61 | + "staging_dir": server.stagingDir, |
| 62 | + } |
| 63 | + |
| 64 | + if s3_region: |
| 65 | + data_source["region_name"] = s3_region |
| 66 | + elif server.region_name: |
| 67 | + data_source["region_name"] = server.region_name |
| 68 | + |
| 69 | + if server.catalog: |
| 70 | + # Optional, Identify the name of the Data Source, also referred to as a Catalog. The default value is `awsdatacatalog`. |
| 71 | + data_source["catalog"] = server.catalog |
| 72 | + |
| 73 | + if s3_session_token: |
| 74 | + data_source["aws_session_token"] = s3_session_token |
| 75 | + |
| 76 | + soda_configuration = {f"data_source {server.type}": data_source} |
| 77 | + |
| 78 | + soda_configuration_str = yaml.dump(soda_configuration) |
| 79 | + return soda_configuration_str |
0 commit comments