Skip to content

Convert dataproc-workflow REST API (POST) request into equivalent python (dataproc_v1) sdk code. #11086

@Rstar1998

Description

@Rstar1998

I want to automate creation of workflows in dataproc. I manually created a workflow and got its equivalent rest api request (POST) from the GCP console UI. I want its equivalent thing via dataproc python sdk. I saw the documentation , but it was very complex to understand and the examples with very few and not that complicated. Can someone help me translate following api request to python sdk equivalent ?

    "method": "POST",
    "body": {
        "id": "load1",
        "name": "",
        "labels": {},
        "placement": {
            "managedCluster": {
                "clusterName": "cluster-1",
                "config": {
                    "configBucket": "bucket1",
                    "gceClusterConfig": {
                        "serviceAccountScopes": [
                            "https://www.googleapis.com/auth/cloud-platform"
                        ],
                        "networkUri": "",
                        "subnetworkUri": "",
                        "internalIpOnly": false,
                        "zoneUri": "",
                        "metadata": {},
                        "tags": [],
                        "shieldedInstanceConfig": {
                            "enableSecureBoot": false,
                            "enableVtpm": false,
                            "enableIntegrityMonitoring": false
                        }
                    },
                    "masterConfig": {
                        "numInstances": 1,
                        "machineTypeUri": "n1-standard-4",
                        "diskConfig": {
                            "bootDiskType": "pd-standard",
                            "bootDiskSizeGb": "150",
                            "numLocalSsds": 0,
                            "localSsdInterface": "SCSI"
                        },
                        "minCpuPlatform": "",
                        "imageUri": ""
                    },
                    "softwareConfig": {
                        "imageVersion": "2.0-ubuntu18",
                        "properties": {
                            "dataproc:dataproc.allow.zero.workers": "true"
                        },
                        "optionalComponents": []
                    },
                    "initializationActions": []
                },
                "labels": {}
            }
        },
        "jobs": [
            {
                "pysparkJob": {
                    "mainPythonFileUri": "gs://temp.py",
                    "pythonFileUris": [],
                    "jarFileUris": [],
                    "fileUris": [],
                    "archiveUris": [],
                    "properties": {},
                    "args": [
                        "arg1"
                    ]
                },
                "stepId": "start_job",
                "labels": {},
                "prerequisiteStepIds": []
            },
            {
                "pysparkJob": {
                    "mainPythonFileUri": "gs://temp1.py",
                    "pythonFileUris": [],
                    "jarFileUris": [
                        "gs://spark-lib/bigquery/spark-bigquery-latest_2.12.jar"
                    ],
                    "fileUris": [],
                    "archiveUris": [],
                    "properties": {},
                    "args": [
                        "arg1"
                    ]
                },
                "stepId": "tb1",
                "labels": {},
                "prerequisiteStepIds": [
                    "start_job"
                ]
            },
            {
                "pysparkJob": {
                    "mainPythonFileUri": "gs://temp1.py",
                    "pythonFileUris": [],
                    "jarFileUris": [
                        "gs://spark-lib/bigquery/spark-bigquery-latest_2.12.jar"
                    ],
                    "fileUris": [],
                    "archiveUris": [],
                    "properties": {},
                    "args": [
                        "arg1"
                    ]
                },
                "stepId": "tb2",
                "labels": {},
                "prerequisiteStepIds": [
                    "start_job"
                ]
            },
            {
                "pysparkJob": {
                    "mainPythonFileUri": "gs://temp1.py",
                    "pythonFileUris": [],
                    "jarFileUris": [
                        "gs://spark-lib/bigquery/spark-bigquery-latest_2.12.jar"
                    ],
                    "fileUris": [],
                    "archiveUris": [],
                    "properties": {},
                    "args": [
                        "arg1"
                       
                    ]
                },
                "stepId": "tb3",
                "labels": {},
                "prerequisiteStepIds": [
                    "start_job"
                ]
            },
            {
                "pysparkJob": {
                    "mainPythonFileUri": "gs://temp1.py",
                    "pythonFileUris": [],
                    "jarFileUris": [
                        "gs://spark-lib/bigquery/spark-bigquery-latest_2.12.jar"
                    ],
                    "fileUris": [],
                    "archiveUris": [],
                    "properties": {},
                    "args": [
                        "arg1"
                    ]
                },
                "stepId": "tb4",
                "labels": {},
                "prerequisiteStepIds": [
                    "start_job"
                ]
            },
            {
                "pysparkJob": {
                    "mainPythonFileUri": "gs://end_job.py",
                    "pythonFileUris": [],
                    "jarFileUris": [],
                    "fileUris": [],
                    "archiveUris": [],
                    "properties": {},
                    "args": [
                        "arg1"
                    ]
                },
                "stepId": "end_job",
                "labels": {},
                "prerequisiteStepIds": [
                    "tb1",
                    "tb2",
                    "tb3",
                    "tb4"
                ]
            }
        ],
        "parameters": [],
        "dagTimeout": "1800s"
    },
    "path": "/v1/projects/project1/regions/region-name/workflowTemplates/",
    "params": {}
}```

Metadata

Metadata

Assignees

No one assigned

    Labels

    api: dataprocIssues related to the Dataproc API.type: questionRequest for information or clarification. Not an issue.

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions