Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions builder/deploy/deployer.go
Original file line number Diff line number Diff line change
Expand Up @@ -1047,13 +1047,19 @@ func (d *deployer) CheckResourceAvailable(ctx context.Context, clusterId string,
}

if clusterResources.Status == types.ClusterStatusUnavailable {
return false, fmt.Errorf("failed to check cluster available resource due to cluster %s status is %s",
err := fmt.Errorf("failed to check cluster available resource due to cluster %s status is %s",
clusterId, clusterResources.Status)
return false, errorx.ClusterUnavailable(err, errorx.Ctx().
Set("cluster ID", clusterId).
Set("region", clusterResources.Region))
}

if clusterResources.ResourceStatus != types.StatusUncertain && !CheckResource(clusterResources, hardWare) {
return false, fmt.Errorf("required resource on cluster %s is not enough with resource status %s",
err := fmt.Errorf("required resource on cluster %s is not enough with resource status %s",
clusterId, clusterResources.ResourceStatus)
return false, errorx.NotEnoughResource(err, errorx.Ctx().
Set("cluster ID", clusterId).
Set("region", clusterResources.Region))
}

return true, nil
Expand Down
44 changes: 44 additions & 0 deletions common/errorx/error_task.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ const (
multiHostInferenceNotSupported
multiHostInferenceReplicaCount
multiHostNotebookNotSupported
notEnoughResource
clusterUnavailable
)

var (
Expand Down Expand Up @@ -61,6 +63,30 @@ var (
//
// zh-HK: 多主機推理僅支持大於 0 的最低副本數
ErrMultiHostInferenceReplicaCount = CustomError{prefix: errTaskPrefix, code: multiHostInferenceReplicaCount}
// not enough resource to run the task
//
// Description: The task requires more resources than are available in the cluster. This error occurs when the cluster does not have sufficient capacity to run the task.
//
// Description_ZH: 任务需要的资源超过了集群可用的资源。当集群资源不足时,会出现此错误。
//
// en-US: Not enough resource to run the task
//
// zh-CN: 集群资源不足
//
// zh-HK: 集群資源不足
ErrNotEnoughResource = CustomError{prefix: errTaskPrefix, code: notEnoughResource}
// cluster is unavailable to run the task
//
// Description: The cluster is currently unavailable, either due to maintenance or other reasons. This error occurs when the cluster is not ready to accept new tasks.
//
// Description_ZH: 集群当前不可用,可能是由于维护或其他原因。当集群未准备好接受新任务时,会出现此错误。
//
// en-US: Cluster is unavailable to run the task
//
// zh-CN: 集群当前不可用
//
// zh-HK: 集群當前不可用
ErrClusterUnavailable = CustomError{prefix: errTaskPrefix, code: clusterUnavailable}
)

func NoEntryFile(err error, ctx context) error {
Expand All @@ -71,3 +97,21 @@ func NoEntryFile(err error, ctx context) error {
code: noEntryFile,
}
}

func NotEnoughResource(err error, ctx context) error {
return CustomError{
prefix: errTaskPrefix,
context: ctx,
err: err,
code: notEnoughResource,
}
}

func ClusterUnavailable(err error, ctx context) error {
return CustomError{
prefix: errTaskPrefix,
context: ctx,
err: err,
code: clusterUnavailable,
}
}
Loading