diff --git a/builder/deploy/deployer.go b/builder/deploy/deployer.go index 5858752a..e5885e68 100644 --- a/builder/deploy/deployer.go +++ b/builder/deploy/deployer.go @@ -1047,13 +1047,19 @@ func (d *deployer) CheckResourceAvailable(ctx context.Context, clusterId string, } if clusterResources.Status == types.ClusterStatusUnavailable { - return false, fmt.Errorf("failed to check cluster available resource due to cluster %s status is %s", + err := fmt.Errorf("failed to check cluster available resource due to cluster %s status is %s", clusterId, clusterResources.Status) + return false, errorx.ClusterUnavailable(err, errorx.Ctx(). + Set("cluster ID", clusterId). + Set("region", clusterResources.Region)) } if clusterResources.ResourceStatus != types.StatusUncertain && !CheckResource(clusterResources, hardWare) { - return false, fmt.Errorf("required resource on cluster %s is not enough with resource status %s", + err := fmt.Errorf("required resource on cluster %s is not enough with resource status %s", clusterId, clusterResources.ResourceStatus) + return false, errorx.NotEnoughResource(err, errorx.Ctx(). + Set("cluster ID", clusterId). + Set("region", clusterResources.Region)) } return true, nil diff --git a/common/errorx/error_task.go b/common/errorx/error_task.go index bfa86bb2..a8d3c08e 100644 --- a/common/errorx/error_task.go +++ b/common/errorx/error_task.go @@ -7,6 +7,8 @@ const ( multiHostInferenceNotSupported multiHostInferenceReplicaCount multiHostNotebookNotSupported + notEnoughResource + clusterUnavailable ) var ( @@ -61,6 +63,30 @@ var ( // // zh-HK: 多主機推理僅支持大於 0 的最低副本數 ErrMultiHostInferenceReplicaCount = CustomError{prefix: errTaskPrefix, code: multiHostInferenceReplicaCount} + // not enough resource to run the task + // + // Description: The task requires more resources than are available in the cluster. This error occurs when the cluster does not have sufficient capacity to run the task. + // + // Description_ZH: 任务需要的资源超过了集群可用的资源。当集群资源不足时,会出现此错误。 + // + // en-US: Not enough resource to run the task + // + // zh-CN: 集群资源不足 + // + // zh-HK: 集群資源不足 + ErrNotEnoughResource = CustomError{prefix: errTaskPrefix, code: notEnoughResource} + // cluster is unavailable to run the task + // + // Description: The cluster is currently unavailable, either due to maintenance or other reasons. This error occurs when the cluster is not ready to accept new tasks. + // + // Description_ZH: 集群当前不可用,可能是由于维护或其他原因。当集群未准备好接受新任务时,会出现此错误。 + // + // en-US: Cluster is unavailable to run the task + // + // zh-CN: 集群当前不可用 + // + // zh-HK: 集群當前不可用 + ErrClusterUnavailable = CustomError{prefix: errTaskPrefix, code: clusterUnavailable} ) func NoEntryFile(err error, ctx context) error { @@ -71,3 +97,21 @@ func NoEntryFile(err error, ctx context) error { code: noEntryFile, } } + +func NotEnoughResource(err error, ctx context) error { + return CustomError{ + prefix: errTaskPrefix, + context: ctx, + err: err, + code: notEnoughResource, + } +} + +func ClusterUnavailable(err error, ctx context) error { + return CustomError{ + prefix: errTaskPrefix, + context: ctx, + err: err, + code: clusterUnavailable, + } +}