-
Notifications
You must be signed in to change notification settings - Fork 1.6k
Open
Labels
api: dataprocIssues related to the Dataproc API.Issues related to the Dataproc API.type: questionRequest for information or clarification. Not an issue.Request for information or clarification. Not an issue.
Description
I want to automate creation of workflows in dataproc. I manually created a workflow and got its equivalent rest api request (POST) from the GCP console UI. I want its equivalent thing via dataproc python sdk. I saw the documentation , but it was very complex to understand and the examples with very few and not that complicated. Can someone help me translate following api request to python sdk equivalent ?
"method": "POST",
"body": {
"id": "load1",
"name": "",
"labels": {},
"placement": {
"managedCluster": {
"clusterName": "cluster-1",
"config": {
"configBucket": "bucket1",
"gceClusterConfig": {
"serviceAccountScopes": [
"https://www.googleapis.com/auth/cloud-platform"
],
"networkUri": "",
"subnetworkUri": "",
"internalIpOnly": false,
"zoneUri": "",
"metadata": {},
"tags": [],
"shieldedInstanceConfig": {
"enableSecureBoot": false,
"enableVtpm": false,
"enableIntegrityMonitoring": false
}
},
"masterConfig": {
"numInstances": 1,
"machineTypeUri": "n1-standard-4",
"diskConfig": {
"bootDiskType": "pd-standard",
"bootDiskSizeGb": "150",
"numLocalSsds": 0,
"localSsdInterface": "SCSI"
},
"minCpuPlatform": "",
"imageUri": ""
},
"softwareConfig": {
"imageVersion": "2.0-ubuntu18",
"properties": {
"dataproc:dataproc.allow.zero.workers": "true"
},
"optionalComponents": []
},
"initializationActions": []
},
"labels": {}
}
},
"jobs": [
{
"pysparkJob": {
"mainPythonFileUri": "gs://temp.py",
"pythonFileUris": [],
"jarFileUris": [],
"fileUris": [],
"archiveUris": [],
"properties": {},
"args": [
"arg1"
]
},
"stepId": "start_job",
"labels": {},
"prerequisiteStepIds": []
},
{
"pysparkJob": {
"mainPythonFileUri": "gs://temp1.py",
"pythonFileUris": [],
"jarFileUris": [
"gs://spark-lib/bigquery/spark-bigquery-latest_2.12.jar"
],
"fileUris": [],
"archiveUris": [],
"properties": {},
"args": [
"arg1"
]
},
"stepId": "tb1",
"labels": {},
"prerequisiteStepIds": [
"start_job"
]
},
{
"pysparkJob": {
"mainPythonFileUri": "gs://temp1.py",
"pythonFileUris": [],
"jarFileUris": [
"gs://spark-lib/bigquery/spark-bigquery-latest_2.12.jar"
],
"fileUris": [],
"archiveUris": [],
"properties": {},
"args": [
"arg1"
]
},
"stepId": "tb2",
"labels": {},
"prerequisiteStepIds": [
"start_job"
]
},
{
"pysparkJob": {
"mainPythonFileUri": "gs://temp1.py",
"pythonFileUris": [],
"jarFileUris": [
"gs://spark-lib/bigquery/spark-bigquery-latest_2.12.jar"
],
"fileUris": [],
"archiveUris": [],
"properties": {},
"args": [
"arg1"
]
},
"stepId": "tb3",
"labels": {},
"prerequisiteStepIds": [
"start_job"
]
},
{
"pysparkJob": {
"mainPythonFileUri": "gs://temp1.py",
"pythonFileUris": [],
"jarFileUris": [
"gs://spark-lib/bigquery/spark-bigquery-latest_2.12.jar"
],
"fileUris": [],
"archiveUris": [],
"properties": {},
"args": [
"arg1"
]
},
"stepId": "tb4",
"labels": {},
"prerequisiteStepIds": [
"start_job"
]
},
{
"pysparkJob": {
"mainPythonFileUri": "gs://end_job.py",
"pythonFileUris": [],
"jarFileUris": [],
"fileUris": [],
"archiveUris": [],
"properties": {},
"args": [
"arg1"
]
},
"stepId": "end_job",
"labels": {},
"prerequisiteStepIds": [
"tb1",
"tb2",
"tb3",
"tb4"
]
}
],
"parameters": [],
"dagTimeout": "1800s"
},
"path": "/v1/projects/project1/regions/region-name/workflowTemplates/",
"params": {}
}```
Metadata
Metadata
Assignees
Labels
api: dataprocIssues related to the Dataproc API.Issues related to the Dataproc API.type: questionRequest for information or clarification. Not an issue.Request for information or clarification. Not an issue.