diff --git a/aigateway/component/openai.go b/aigateway/component/openai.go index 637ae5cd..3c28a71d 100644 --- a/aigateway/component/openai.go +++ b/aigateway/component/openai.go @@ -5,6 +5,7 @@ import ( "encoding/json" "fmt" "log/slog" + "strings" "time" "github.com/google/uuid" @@ -54,16 +55,19 @@ func (m *openaiComponentImpl) GetAvailableModels(c context.Context, userName str if err != nil { slog.Error("get deploy hardware ") } + // Check if engine_args contains tool-call-parser parameter + supportFunctionCall := strings.Contains(deploy.EngineArgs, "tool-call-parser") m := types.Model{ - Object: "model", - Created: deploy.CreatedAt.Unix(), - Task: string(deploy.Task), - CSGHubModelID: deploy.Repository.Path, - SvcName: deploy.SvcName, - SvcType: deploy.Type, - Hardware: hardwareInfo, - RuntimeFramework: deploy.RuntimeFramework, - ImageID: deploy.ImageID, + Object: "model", + Created: deploy.CreatedAt.Unix(), + Task: string(deploy.Task), + CSGHubModelID: deploy.Repository.Path, + SvcName: deploy.SvcName, + SvcType: deploy.Type, + Hardware: hardwareInfo, + RuntimeFramework: deploy.RuntimeFramework, + ImageID: deploy.ImageID, + SupportFunctionCall: supportFunctionCall, } modelName := "" if deploy.Repository.HFPath != "" { diff --git a/aigateway/types/openai.go b/aigateway/types/openai.go index 1db7485c..c6ca9598 100644 --- a/aigateway/types/openai.go +++ b/aigateway/types/openai.go @@ -16,9 +16,10 @@ type Model struct { SvcName string `json:"-"` // the internal service name in CSGHub SvcType int `json:"-"` // the internal service type like dedicated or serverless in CSGHub - Hardware types.HardWare `json:"-"` // the deployed hardware - RuntimeFramework string `json:"-"` // the deployed framework - ImageID string `json:"-"` // the deployed image + Hardware types.HardWare `json:"-"` // the deployed hardware + RuntimeFramework string `json:"-"` // the deployed framework + ImageID string `json:"-"` // the deployed image + SupportFunctionCall bool `json:"support_function_call,omitempty"` // whether the model supports function calling } // ModelList represents the model list response diff --git a/api/handler/cluster.go b/api/handler/cluster.go index 36d72630..a7973f4b 100644 --- a/api/handler/cluster.go +++ b/api/handler/cluster.go @@ -2,9 +2,11 @@ package handler import ( "encoding/csv" + "fmt" "log/slog" "net/http" "strconv" + "time" "github.com/gin-gonic/gin" "opencsg.com/csghub-server/api/httpbase" @@ -29,6 +31,11 @@ type ClusterHandler struct { c component.ClusterComponent } +const ( + deployTimeLayout = "2006-01-02 15:04:05" + deployDateOnlyLayout = "2006-01-02" +) + // Getclusters godoc // @Security ApiKey // @Summary Get cluster list @@ -146,6 +153,8 @@ func (h *ClusterHandler) GetDeploys(ctx *gin.Context) { // @Produce text/csv // @Param status query string false "status" default(all) Enums(all, running, stopped, deployfailed) // @Param search query string false "search" default("") +// @Param start_time query string false "filter deploys created after or at this time" +// @Param end_time query string false "filter deploys created before or at this time" // @Success 200 {string} string "CSV file" // @Failure 400 {object} types.APIBadRequest "Bad request" // @Failure 500 {object} types.APIInternalServerError "Internal server error" @@ -165,6 +174,11 @@ func (h *ClusterHandler) GetDeploysReport(ctx *gin.Context) { req.Status = []int{code.DeployFailed} } req.Query = ctx.Query("search") + if err := bindDeployDateRange(ctx, &req); err != nil { + slog.Error("Invalid date range for deploy report", slog.Any("error", err)) + httpbase.BadRequest(ctx, err.Error()) + return + } filename := "deploys_report.csv" ctx.Header("Content-Type", "text/csv; charset=utf-8") @@ -189,7 +203,6 @@ func (h *ClusterHandler) GetDeploysReport(ctx *gin.Context) { }) writer.Flush() - const timeLayout = "2006-01-02 15:04:05" totalProcessed := 0 for { @@ -207,7 +220,7 @@ func (h *ClusterHandler) GetDeploysReport(ctx *gin.Context) { d.DeployName, d.User.Username, d.Resource, - d.CreateTime.Local().Format(timeLayout), + d.CreateTime.Local().Format(deployTimeLayout), d.Status, strconv.Itoa(d.TotalTimeInMin), strconv.Itoa(d.TotalFeeInCents), @@ -246,3 +259,43 @@ func (h *ClusterHandler) Update(ctx *gin.Context) { } httpbase.OK(ctx, result) } + +func bindDeployDateRange(ctx *gin.Context, req *types.DeployReq) error { + startTime := ctx.Query("start_time") + endTime := ctx.Query("end_time") + if startTime == "" && endTime == "" { + return nil + } + if startTime == "" || endTime == "" { + return fmt.Errorf("start_time and end_time must be provided together") + } + parsedStart, err := parseDeployQueryTime(startTime, false) + if err != nil { + return err + } + parsedEnd, err := parseDeployQueryTime(endTime, true) + if err != nil { + return err + } + req.StartTime = &parsedStart + req.EndTime = &parsedEnd + return nil +} + +func parseDeployQueryTime(value string, isEnd bool) (time.Time, error) { + layouts := []string{deployTimeLayout, deployDateOnlyLayout} + for _, layout := range layouts { + parsed, err := time.ParseInLocation(layout, value, time.UTC) + if err != nil { + continue + } + if layout == deployDateOnlyLayout { + if isEnd { + parsed = parsed.Add(24*time.Hour - time.Nanosecond) + } + return parsed, nil + } + return parsed, nil + } + return time.Time{}, fmt.Errorf("invalid datetime format, use '%s' or '%s'", deployTimeLayout, deployDateOnlyLayout) +} diff --git a/api/handler/cluster_test.go b/api/handler/cluster_test.go index 4ef49761..29dcac81 100644 --- a/api/handler/cluster_test.go +++ b/api/handler/cluster_test.go @@ -4,6 +4,7 @@ import ( "context" "net/http" "testing" + "time" "github.com/gin-gonic/gin" "github.com/stretchr/testify/mock" @@ -98,12 +99,25 @@ func Test_GetDeploysReport(t *testing.T) { }, } + start := "2024-01-01 00:00:00" + end := "2024-01-31" + expectedStart, err := time.ParseInLocation(time.DateTime, start, time.UTC) + require.NoError(t, err) + endDate, err := time.ParseInLocation("2006-01-02", end, time.UTC) + require.NoError(t, err) + expectedEnd := endDate.Add(24*time.Hour - time.Nanosecond) tester.mocks.clusterComponent.EXPECT(). GetDeploys(context.Background(), mock.Anything). + Run(func(_ context.Context, req types.DeployReq) { + require.NotNil(t, req.StartTime) + require.True(t, req.StartTime.Equal(expectedStart)) + require.NotNil(t, req.EndTime) + require.True(t, req.EndTime.Equal(expectedEnd)) + }). Once(). Return(rows, len(rows), nil) - tester.Execute() + tester.WithQuery("start_time", start).WithQuery("end_time", end).Execute() // assert response headers and body resp := tester.Response() @@ -117,3 +131,21 @@ func Test_GetDeploysReport(t *testing.T) { require.Contains(t, body, "alice") require.Contains(t, body, "bob") } + +func Test_GetDeploysReport_InvalidDateRange(t *testing.T) { + tester := newClusterTester(t).withHandlerFunc(func(clusterHandler *ClusterHandler) gin.HandlerFunc { + return clusterHandler.GetDeploysReport + }) + + tester.WithQuery("start_time", "2024-01-01").Execute() + tester.ResponseEqSimple(t, http.StatusBadRequest, httpbase.R{Msg: "start_time and end_time must be provided together"}) +} + +func Test_GetDeploysReport_InvalidFormat(t *testing.T) { + tester := newClusterTester(t).withHandlerFunc(func(clusterHandler *ClusterHandler) gin.HandlerFunc { + return clusterHandler.GetDeploysReport + }) + + tester.WithQuery("start_time", "invalid").WithQuery("end_time", "2024-01-01").Execute() + tester.ResponseEqSimple(t, http.StatusBadRequest, httpbase.R{Msg: "invalid datetime format, use '2006-01-02 15:04:05' or '2006-01-02'"}) +} diff --git a/builder/store/database/deploy_task.go b/builder/store/database/deploy_task.go index 58f15c39..12684282 100644 --- a/builder/store/database/deploy_task.go +++ b/builder/store/database/deploy_task.go @@ -400,7 +400,14 @@ func (s *deployTaskStoreImpl) ListDeployByType(ctx context.Context, req types.De query = query.Where("deploy_name LIKE ? OR \"user\".\"username\" LIKE ? OR cluster_id LIKE ?", "%"+req.Query+"%", "%"+req.Query+"%", "%"+req.Query+"%") } - query = query.Order("created_at DESC").Limit(req.PageSize).Offset((req.Page - 1) * req.PageSize) + if req.StartTime != nil { + query = query.Where("deploy.created_at >= ?", req.StartTime) + } + if req.EndTime != nil { + query = query.Where("deploy.created_at <= ?", req.EndTime) + } + + query = query.Order("deploy.created_at DESC").Limit(req.PageSize).Offset((req.Page - 1) * req.PageSize) _, err := query.Exec(ctx, &result) if err != nil { err = errorx.HandleDBError(err, nil) diff --git a/builder/store/database/deploy_task_test.go b/builder/store/database/deploy_task_test.go index dab076b4..d3258cdd 100644 --- a/builder/store/database/deploy_task_test.go +++ b/builder/store/database/deploy_task_test.go @@ -3,6 +3,7 @@ package database_test import ( "context" "testing" + "time" "github.com/stretchr/testify/require" "opencsg.com/csghub-server/builder/deploy/common" @@ -567,6 +568,19 @@ func TestDeployTaskStore_ListDeployBytype(t *testing.T) { require.Nil(t, err) } + now := time.Now().UTC().Truncate(time.Second) + older := now.Add(-72 * time.Hour) + middle := now.Add(-12 * time.Hour) + latest := now.Add(-1 * time.Hour) + _, err := db.BunDB.ExecContext(ctx, "UPDATE deploys SET created_at = ?, updated_at = ? WHERE deploy_name = ?", older, older, "running1") + require.NoError(t, err) + _, err = db.BunDB.ExecContext(ctx, "UPDATE deploys SET created_at = ?, updated_at = ? WHERE deploy_name = ?", older.Add(24*time.Hour), older.Add(24*time.Hour), "stopped1") + require.NoError(t, err) + _, err = db.BunDB.ExecContext(ctx, "UPDATE deploys SET created_at = ?, updated_at = ? WHERE deploy_name = ?", middle, middle, "running2") + require.NoError(t, err) + _, err = db.BunDB.ExecContext(ctx, "UPDATE deploys SET created_at = ?, updated_at = ? WHERE deploy_name = ?", latest, latest, "deploy1") + require.NoError(t, err) + // Only test running ones var req types.DeployReq req.Page = 1 @@ -578,6 +592,25 @@ func TestDeployTaskStore_ListDeployBytype(t *testing.T) { result, _, err = store.ListDeployByType(ctx, req) require.Nil(t, err) require.Equal(t, 2, len(result)) + + startWindow := now.Add(-24 * time.Hour) + req.StartTime = &startWindow + endDate, err := time.ParseInLocation("2006-01-02", now.Format("2006-01-02"), time.UTC) + require.NoError(t, err) + endWindow := endDate.Add(24*time.Hour - time.Nanosecond) + req.EndTime = &endWindow + result, _, err = store.ListDeployByType(ctx, req) + require.Nil(t, err) + require.Equal(t, 1, len(result)) + require.Equal(t, "running2", result[0].DeployName) + + startWindow = now.Add(-5 * time.Hour) + req.StartTime = &startWindow + endWindow = now + req.EndTime = &endWindow + result, _, err = store.ListDeployByType(ctx, req) + require.Nil(t, err) + require.Equal(t, 0, len(result)) } func TestDeployTaskStore_DeleteDeployByID(t *testing.T) { db := tests.InitTestDB() diff --git a/builder/store/database/migrations/20251112120000_add_ms_swift_datasets.down.sql b/builder/store/database/migrations/20251112120000_add_ms_swift_datasets.down.sql new file mode 100644 index 00000000..636e0b60 --- /dev/null +++ b/builder/store/database/migrations/20251112120000_add_ms_swift_datasets.down.sql @@ -0,0 +1,6 @@ +SET statement_timeout = 0; + +--bun:split + +-- Rollback SQL statements for MS-SWIFT dataset tags +DELETE FROM tag_rules WHERE runtime_framework = 'ms-swift' AND repo_type = 'dataset' AND category = 'task' AND source = 'ms'; diff --git a/builder/store/database/migrations/20251112120000_add_ms_swift_datasets.up.sql b/builder/store/database/migrations/20251112120000_add_ms_swift_datasets.up.sql new file mode 100644 index 00000000..6b57e001 --- /dev/null +++ b/builder/store/database/migrations/20251112120000_add_ms_swift_datasets.up.sql @@ -0,0 +1,378 @@ +SET statement_timeout = 0; + +--bun:split + +-- Generated SQL statements for MS-SWIFT dataset tags +-- Source: https://swift.readthedocs.io/en/latest/Instruction/Supported-models-and-datasets.html#datasets + +INSERT INTO tags (name, category, "group", scope, built_in, show_name) VALUES ('ms-swift', 'task', 'finetune', 'dataset', false, 'ms-swift') ON CONFLICT ("name", category, scope) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-MO', 'NuminaMath-1.5', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-MO', 'NuminaMath-CoT', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-MO', 'NuminaMath-TIR', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'COIG-CQIA', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'CodeAlpaca-20k', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'DISC-Law-SFT', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'DISC-Med-SFT', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'Duet-v0.5', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'GuanacoDataset', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'LLaVA-Instruct-150K', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'LLaVA-Pretrain', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'LaTeX_OCR', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'LongAlpaca-12k', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'M3IT', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'MATH-lighteval', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'Magpie-Qwen2-Pro-200K-Chinese', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'Magpie-Qwen2-Pro-200K-English', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'Magpie-Qwen2-Pro-300K-Filtered', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'MathInstruct', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'MovieChat-1K-test', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'Open-Platypus', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'OpenO1-SFT', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'OpenOrca', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'OpenOrca-Chinese', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'SFT-Nectar', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'ShareGPT-4o', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'ShareGPT4V', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'SkyPile-150B', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'WizardLM_evol_instruct_V2_196k', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'alpaca-cleaned', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'alpaca-gpt4-data-en', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'alpaca-gpt4-data-zh', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'blossom-math-v2', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'captcha-images', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'chartqa_digit_r1v_format', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'clevr_cogen_a_train', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'coco', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'databricks-dolly-15k', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'deepctrl-sft-data', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'egoschema', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'firefly-train-1.1M', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'function-calling-chatml', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'generated_chat_0.4M', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'guanaco_belle_merge_v1.0', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'hh-rlhf', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'hh_rlhf_cn', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'lawyer_llama_data', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'leetcode-solutions-python', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'lmsys-chat-1m', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'math-trn-format', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'ms_agent_for_agentfabric', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'orpo-dpo-mix-40k', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'pile', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'ruozhiba', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'school_math_0.25M', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'sharegpt_gpt4', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'sql-create-context', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'stack-exchange-paired', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'starcoderdata', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'synthetic_text_to_sql', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'texttosqlv2_25000_v2', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'the-stack', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'tigerbot-law-plugin', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'train_0.5M_CN', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'train_1M_CN', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'train_2M_CN', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'tulu-v2-sft-mixture', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'ultrafeedback-binarized-preferences-cleaned-kto', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'webnovel_cn', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'wikipedia-cn-20230720-filtered', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('AI-ModelScope', 'zhihu_rlhf_3k', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('DAMO_NLP', 'jd', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('FreedomIntelligence', 'medical-o1-reasoning-SFT', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('HumanLLMs', 'Human-Like-DPO-Dataset', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('LLM-Research', 'xlam-function-calling-60k', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('MTEB', 'scidocs-reranking', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('MTEB', 'stackoverflowdupquestions-reranking', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('OmniData', 'Zhihu-KOL', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('OmniData', 'Zhihu-KOL-More-Than-100-Upvotes', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('PowerInfer', 'LONGCOT-Refine-500K', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('PowerInfer', 'QWQ-LONGCOT-500K', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('ServiceNow-AI', 'R1-Distill-SFT', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('TIGER-Lab', 'MATH-plus', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('Tongyi-DataEngine', 'SA1B-Dense-Caption', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('Tongyi-DataEngine', 'SA1B-Paired-Captions-Images', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('YorickHe', 'CoT', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('YorickHe', 'CoT_zh', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('ZhipuAI', 'LongWriter-6k', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('bespokelabs', 'Bespoke-Stratos-17k', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('codefuse-ai', 'CodeExercise-Python-27k', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('codefuse-ai', 'Evol-instruction-66k', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('damo', 'MSAgent-Bench', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('damo', 'nlp_polylm_multialpaca_sft', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('damo', 'zh_cls_fudan-news', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('damo', 'zh_ner-JAVE', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('hjh0119', 'shareAI-Llama3-DPO-zh-en-emoji', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('huangjintao', 'AgentInstruct_copy', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('iic', '100PoisonMpts', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('iic', 'DocQA-RL-1.6K', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('iic', 'MSAgent-MultiRole', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('iic', 'MSAgent-Pro', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('iic', 'ms_agent', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('iic', 'ms_bench', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('liucong', 'Chinese-DeepSeek-R1-Distill-data-110k-SFT', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('lmms-lab', 'multimodal-open-r1-8k-verified', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('lvjianjin', 'AdvertiseGen', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('mapjack', 'openwebtext_dataset', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('modelscope', 'DuReader_robust-QG', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('modelscope', 'MathR', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('modelscope', 'MathR-32B-Distill', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('modelscope', 'chinese-poetry-collection', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('modelscope', 'clue', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('modelscope', 'coco_2014_caption', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('modelscope', 'gsm8k', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('open-r1', 'verifiable-coding-problems-python', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('open-r1', 'verifiable-coding-problems-python-10k', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('open-r1', 'verifiable-coding-problems-python-10k_decontaminated', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('open-r1', 'verifiable-coding-problems-python_decontaminated', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('open-thoughts', 'OpenThoughts-114k', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'self-cognition', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('sentence-transformers', 'stsb', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('shenweizhou', 'alpha-umi-toolbench-processed-v2', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('simpleai', 'HC3', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('simpleai', 'HC3-Chinese', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('speech_asr', 'speech_asr_aishell1_trainsets', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'A-OKVQA', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'ChartQA', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'Chinese-Qwen3-235B-2507-Distill-data-110k-SFT', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'Chinese-Qwen3-235B-Thinking-2507-Distill-data-110k-SFT', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'GRIT', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'GenQA', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'Infinity-Instruct', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'Mantis-Instruct', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'MideficsDataset', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'Multimodal-Mind2Web', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'OCR-VQA', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'OK-VQA_train', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'OpenHermes-2.5', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'RLAIF-V-Dataset', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'RedPajama-Data-1T', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'RedPajama-Data-V2', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'ScienceQA', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'SlimOrca', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'TextCaps', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'ToolBench', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'VQAv2', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'VideoChatGPT', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'WebInstructSub', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'aya_collection', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'chinese-c4', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'cinepile', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'classical_chinese_translate', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'cosmopedia-100k', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'dolma', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'dolphin', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'github-code', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'gpt4v-dataset', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'llava-data', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'llava-instruct-mix-vsft', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'llava-med-zh-instruct-60k', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'lnqa', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'longwriter-6k-filtered', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'medical_zh', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'moondream2-coyo-5M-captions', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'no_robots', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'orca_dpo_pairs', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'path-vqa', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'pile-val-backup', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'pixelprose', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'refcoco', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'refcocog', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'sharegpt', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'swift-sft-mixture', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'tagengo-gpt4', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'train_3.5M_CN', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'ultrachat_200k', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('swift', 'wikipedia', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('tany0699', 'garbage265', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('tastelikefeet', 'competition_math', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('wyj123456', 'GPT4all', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('wyj123456', 'code_alpaca_en', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('wyj123456', 'finance_en', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('wyj123456', 'instinwild', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('wyj123456', 'instruct', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; + +INSERT INTO tag_rules (namespace, repo_name, category, tag_name, repo_type, runtime_framework, source) VALUES ('zouxuhong', 'Countdown-Tasks-3to4', 'task', 'ms-swift', 'dataset', 'ms-swift', 'ms') ON CONFLICT (namespace, repo_name, category) DO NOTHING; diff --git a/common/types/accounting.go b/common/types/accounting.go index 32a03db8..a194c525 100644 --- a/common/types/accounting.go +++ b/common/types/accounting.go @@ -219,6 +219,22 @@ type AcctStatementsRes struct { SkuDesc string `json:"sku_desc"` } +// AcctStatementsResFiltered is a filtered version of AcctStatementsRes that excludes certain fields +type AcctStatementsResFiltered struct { + ID int64 `json:"id"` + UserUUID string `json:"user_id"` + Value float64 `json:"value"` + Scene int `json:"scene"` + InstanceName string `json:"instance_name"` + CreatedAt time.Time `json:"created_at"` + Consumption float64 `json:"consumption"` + UserName string `json:"user_name"` + SkuID int64 `json:"sku_id"` + SkuType int `json:"sku_type"` + SkuKind int `json:"sku_kind"` + SkuDesc string `json:"sku_desc"` +} + type RechargeReq struct { Value float64 `json:"value" binding:"min=1"` OpUID string `json:"op_uid"` diff --git a/common/types/deploy.go b/common/types/deploy.go index 5104f234..77c9fae1 100644 --- a/common/types/deploy.go +++ b/common/types/deploy.go @@ -1,5 +1,7 @@ package types +import "time" + type DeployReq struct { CurrentUser string `json:"current_user"` PageOpts @@ -8,6 +10,8 @@ type DeployReq struct { DeployTypes []int `json:"deploy_types"` Status []int `json:"status"` Query string `json:"query"` + StartTime *time.Time `json:"start_time,omitempty"` + EndTime *time.Time `json:"end_time,omitempty"` } type ServiceEvent struct { diff --git a/common/types/repo.go b/common/types/repo.go index 1a6dfafc..0414d4ec 100644 --- a/common/types/repo.go +++ b/common/types/repo.go @@ -6,11 +6,13 @@ import ( var REPOCARD_FILENAME = "README.md" -type RepositoryType string -type RepositorySource string -type RepositorySyncStatus string -type PipelineTask string -type InferenceEngine string +type ( + RepositoryType string + RepositorySource string + RepositorySyncStatus string + PipelineTask string + InferenceEngine string +) type SensitiveCheckStatus int @@ -61,11 +63,11 @@ const ( SyncStatusCompleted RepositorySyncStatus = "completed" SyncStatusCanceled RepositorySyncStatus = "canceled" - SensitiveCheckFail SensitiveCheckStatus = -1 //sensitive content detected - SensitiveCheckPending SensitiveCheckStatus = 0 //default - SensitiveCheckPass SensitiveCheckStatus = 1 //pass - SensitiveCheckSkip SensitiveCheckStatus = 2 //skip - SensitiveCheckException SensitiveCheckStatus = 3 //error happen + SensitiveCheckFail SensitiveCheckStatus = -1 // sensitive content detected + SensitiveCheckPending SensitiveCheckStatus = 0 // default + SensitiveCheckPass SensitiveCheckStatus = 1 // pass + SensitiveCheckSkip SensitiveCheckStatus = 2 // skip + SensitiveCheckException SensitiveCheckStatus = 3 // error happen EndpointPublic int = 1 // public - anyone can access EndpointPrivate int = 2 // private - access with read permission @@ -95,8 +97,10 @@ const ( MaxFileTreeSize int = 500 ) -var Sorts = []string{"trending", "recently_update", "most_download", "most_favorite", "most_star"} -var Sources = []string{"opencsg", "huggingface", "local"} +var ( + Sorts = []string{"trending", "recently_update", "most_download", "most_favorite", "most_star"} + Sources = []string{"opencsg", "huggingface", "local"} +) type RepoRequest struct { Namespace string `json:"namespace"` @@ -160,56 +164,59 @@ type InstanceInfo struct { // repo object(cover model/space/code/dataset) for deployer type DeployRepo struct { - DeployID int64 `json:"deploy_id,omitempty"` - DeployName string `json:"deploy_name,omitempty"` - SpaceID int64 `json:"space_id,omitempty"` - Path string `json:"model_id,omitempty"` // csghub ask for model_id = namespace/name - Namespace string `json:"namespace,omitempty"` - Name string `json:"name,omitempty"` - Status string `json:"status"` - GitPath string `json:"git_path,omitempty"` - GitBranch string `json:"git_branch,omitempty"` - Sdk string `json:"sdk,omitempty"` - SdkVersion string `json:"sdk_version,omitempty"` - Env string `json:"env,omitempty"` - Secret string `json:"secret,omitempty"` - Template string `json:"template,omitempty"` - Hardware string `json:"hardware,omitempty"` - ImageID string `json:"image_id,omitempty"` - UserID int64 `json:"user_id,omitempty"` - ModelID int64 `json:"repo_model_id,omitempty"` // for URM code logic - RepoID int64 `json:"repository_id,omitempty"` - RuntimeFramework string `json:"runtime_framework,omitempty"` - ContainerPort int `json:"container_port,omitempty"` - Annotation string `json:"annotation,omitempty"` - MinReplica int `json:"min_replica,omitempty"` - MaxReplica int `json:"max_replica,omitempty"` - SvcName string `json:"svc_name,omitempty"` - Endpoint string `json:"endpoint,omitempty"` - CreatedAt time.Time `json:"created_at,omitempty"` - UpdatedAt time.Time `json:"updated_at,omitempty"` - ClusterID string `json:"cluster_id,omitempty"` - SecureLevel int `json:"secure_level,omitempty"` - ActualReplica int `json:"actual_replica,omitempty"` - DesiredReplica int `json:"desired_replica,omitempty"` - Instances []Instance `json:"instances,omitempty"` - InstanceName string `json:"instance_name,omitempty"` - Private bool `json:"private"` - Type int `json:"type,omitempty"` - ProxyEndpoint string `json:"proxy_endpoint,omitempty"` - UserUUID string `json:"user_uuid,omitempty"` - SKU string `json:"sku,omitempty"` - OrderDetailID int64 `json:"order_detail_id,omitempty"` - PayMode PayMode `json:"pay_mode,omitempty"` - Provider string `json:"provider,omitempty"` - ResourceType string `json:"resource_type,omitempty"` - RepoTag string `json:"repo_tag,omitempty"` - Task string `json:"task,omitempty"` - EngineArgs string `json:"engine_args,omitempty"` - Variables string `json:"variables,omitempty"` - Entrypoint string `json:"entrypoint,omitempty"` - Reason string `json:"reason,omitempty"` - Message string `json:"message,omitempty"` + DeployID int64 `json:"deploy_id,omitempty"` + DeployName string `json:"deploy_name,omitempty"` + SpaceID int64 `json:"space_id,omitempty"` + Path string `json:"model_id,omitempty"` // csghub ask for model_id = namespace/name + Namespace string `json:"namespace,omitempty"` + Name string `json:"name,omitempty"` + Status string `json:"status"` + GitPath string `json:"git_path,omitempty"` + GitBranch string `json:"git_branch,omitempty"` + Sdk string `json:"sdk,omitempty"` + SdkVersion string `json:"sdk_version,omitempty"` + Env string `json:"env,omitempty"` + Secret string `json:"secret,omitempty"` + Template string `json:"template,omitempty"` + Hardware string `json:"hardware,omitempty"` + ImageID string `json:"image_id,omitempty"` + UserID int64 `json:"user_id,omitempty"` + ModelID int64 `json:"repo_model_id,omitempty"` // for URM code logic + RepoID int64 `json:"repository_id,omitempty"` + RuntimeFramework string `json:"runtime_framework,omitempty"` + ContainerPort int `json:"container_port,omitempty"` + Annotation string `json:"annotation,omitempty"` + MinReplica int `json:"min_replica,omitempty"` + MaxReplica int `json:"max_replica,omitempty"` + SvcName string `json:"svc_name,omitempty"` + Endpoint string `json:"endpoint,omitempty"` + CreatedAt time.Time `json:"created_at,omitempty"` + UpdatedAt time.Time `json:"updated_at,omitempty"` + ClusterID string `json:"cluster_id,omitempty"` + SecureLevel int `json:"secure_level,omitempty"` + ActualReplica int `json:"actual_replica,omitempty"` + DesiredReplica int `json:"desired_replica,omitempty"` + Instances []Instance `json:"instances,omitempty"` + InstanceName string `json:"instance_name,omitempty"` + Private bool `json:"private"` + Type int `json:"type,omitempty"` + ProxyEndpoint string `json:"proxy_endpoint,omitempty"` + UserUUID string `json:"user_uuid,omitempty"` + SKU string `json:"sku,omitempty"` + OrderDetailID int64 `json:"order_detail_id,omitempty"` + PayMode PayMode `json:"pay_mode,omitempty"` + Provider string `json:"provider,omitempty"` + ResourceType string `json:"resource_type,omitempty"` + RepoTag string `json:"repo_tag,omitempty"` + Task string `json:"task,omitempty"` + EngineArgs string `json:"engine_args,omitempty"` + Variables string `json:"variables,omitempty"` + Entrypoint string `json:"entrypoint,omitempty"` + Reason string `json:"reason,omitempty"` + Message string `json:"message,omitempty"` + SupportFunctionCall bool `json:"support_function_call,omitempty"` + + Since string `json:"since,omitempty"` } type RuntimeFrameworkReq struct { diff --git a/component/callback/git_callback.go b/component/callback/git_callback.go index ba60ba4f..0eba4966 100644 --- a/component/callback/git_callback.go +++ b/component/callback/git_callback.go @@ -437,13 +437,11 @@ func (c *gitCallbackComponentImpl) updateDatasetTags(ctx context.Context, namesp namespace, name := repo.OriginNamespaceAndName() if namespace == "" || name == "" { slog.Debug("not an evaluation dataset, ignore it", slog.Any("repo id", repo.Path)) - return nil } // use mirror namespace and name to find dataset evalDataset, err = c.tagRuleStore.FindByRepo(ctx, string(types.EvaluationCategory), namespace, name, string(types.DatasetRepo)) if err != nil { slog.Debug("not an evaluation dataset, ignore it", slog.Any("repo id", repo.Path)) - return nil } } else { slog.Error("failed to query evaluation dataset", slog.Any("repo id", repo.Path), slog.Any("error", err)) @@ -451,14 +449,46 @@ func (c *gitCallbackComponentImpl) updateDatasetTags(ctx context.Context, namesp } } + + // check if it's a task dataset (e.g., ms-swift) + taskDataset, err := c.tagRuleStore.FindByRepo(ctx, "task", namespace, repoName, string(types.DatasetRepo)) + slog.Info("taskDataset", slog.Any("taskDataset", taskDataset)) + if err != nil && !errors.Is(err, sql.ErrNoRows) { + slog.Error("failed to query task dataset", slog.Any("repo id", repo.Path), slog.Any("error", err)) + } + // if not found, check with mirror namespace and name + if errors.Is(err, sql.ErrNoRows) { + mirrorNamespace, mirrorName := repo.OriginNamespaceAndName() + if mirrorNamespace != "" && mirrorName != "" { + taskDataset, err = c.tagRuleStore.FindByRepo(ctx, "task", mirrorNamespace, mirrorName, string(types.DatasetRepo)) + if err != nil && !errors.Is(err, sql.ErrNoRows) { + slog.Error("failed to query task dataset with mirror path", slog.Any("repo id", repo.Path), slog.Any("error", err)) + } + } + } + tagIds := []int64{} - tagIds = append(tagIds, evalDataset.Tag.ID) - if evalDataset.RuntimeFramework != "" { + + if evalDataset != nil && evalDataset.RuntimeFramework != "" { + tagIds = append(tagIds, evalDataset.Tag.ID) rTag, _ := c.tagStore.FindTag(ctx, evalDataset.RuntimeFramework, string(types.DatasetRepo), "runtime_framework") if rTag != nil { tagIds = append(tagIds, rTag.ID) } } + // add task tag if found + if taskDataset != nil && taskDataset.Tag.ID > 0 { + tagIds = append(tagIds, taskDataset.Tag.ID) + if taskDataset.RuntimeFramework != "" { + taskRTag, _ := c.tagStore.FindTag(ctx, taskDataset.RuntimeFramework, string(types.DatasetRepo), "runtime_framework") + if taskRTag != nil { + tagIds = append(tagIds, taskRTag.ID) + } + } + } + if len(tagIds) == 0 { + return nil + } err = c.tagStore.UpsertRepoTags(ctx, repo.ID, []int64{}, tagIds) if err != nil { diff --git a/component/repo.go b/component/repo.go index aec32a81..c432e467 100644 --- a/component/repo.go +++ b/component/repo.go @@ -242,8 +242,8 @@ func (c *repoComponentImpl) CreateRepo(ctx context.Context, req types.CreateRepo temPath := strings.SplitN(uuid.NewString(), "-", 2) dbRepo := database.Repository{ UserID: user.ID, - //Path: path.Join(req.Namespace, req.Name), - //GitPath: fmt.Sprintf("%ss_%s/%s", string(req.RepoType), req.Namespace, req.Name), + // Path: path.Join(req.Namespace, req.Name), + // GitPath: fmt.Sprintf("%ss_%s/%s", string(req.RepoType), req.Namespace, req.Name), Path: path.Join(temPath[0], temPath[1]), GitPath: fmt.Sprintf("%ss_%s/%s", string(req.RepoType), temPath[0], temPath[1]), Name: req.Name, @@ -487,7 +487,7 @@ func (c *repoComponentImpl) PublicToUser(ctx context.Context, repoType types.Rep if !isAdmin { repoOwnerIDs = append(repoOwnerIDs, user.ID) - //get user's orgs + // get user's orgs for _, org := range user.Orgs { repoOwnerIDs = append(repoOwnerIDs, org.UserID) } @@ -1764,7 +1764,7 @@ func (c *repoComponentImpl) AllowAdminAccess(ctx context.Context, repoType types func (c *repoComponentImpl) GetUserRepoPermission(ctx context.Context, userName string, repo *database.Repository) (*types.UserRepoPermission, error) { if userName == "" { - //anonymous user only has read permission to public repo + // anonymous user only has read permission to public repo return &types.UserRepoPermission{CanRead: !repo.Private, CanWrite: false, CanAdmin: false}, nil } @@ -1783,7 +1783,7 @@ func (c *repoComponentImpl) GetUserRepoPermission(ctx context.Context, userName } if ns.NamespaceType == "user" { - //owner has full permission + // owner has full permission if userName == namespace { return &types.UserRepoPermission{ CanRead: true, @@ -1791,7 +1791,7 @@ func (c *repoComponentImpl) GetUserRepoPermission(ctx context.Context, userName CanAdmin: true, }, nil } else { - //other user has read permission to pubic repo + // other user has read permission to pubic repo return &types.UserRepoPermission{ CanRead: !repo.Private, CanWrite: false, CanAdmin: false, }, nil @@ -2010,7 +2010,6 @@ func (c *repoComponentImpl) MirrorFromSaas(ctx context.Context, namespace, name, mirror.MirrorTaskID = taskId m, err = c.mirrorStore.Create(ctx, &mirror) - if err != nil { return fmt.Errorf("failed to create mirror: %w", err) } @@ -2224,7 +2223,6 @@ func (c *repoComponentImpl) ListRuntimeFramework(ctx context.Context, repoType t Description: modelFrame.Description, Type: modelFrame.Type, }) - } return frameList, nil } @@ -2247,7 +2245,7 @@ func (c *repoComponentImpl) ListRuntimeFrameworkV2(ctx context.Context, repoType if systemDriverVersion != "" && modelFrame.ComputeType == string(types.ResourceTypeGPU) { frameDriverVersion, _ := version.NewVersion(modelFrame.DriverVersion) systemDriverVersion, _ := version.NewVersion(systemDriverVersion) - //ignore unsupported driver version + // ignore unsupported driver version if frameDriverVersion.GreaterThan(systemDriverVersion) { continue } @@ -2567,37 +2565,41 @@ func (c *repoComponentImpl) DeployDetail(ctx context.Context, detailReq types.De entrypoint = val } + // Check if engine_args contains tool-call-parser parameter + supportFunctionCall := strings.Contains(deploy.EngineArgs, "tool-call-parser") + resDeploy := types.DeployRepo{ - DeployID: deploy.ID, - DeployName: deploy.DeployName, - RepoID: deploy.RepoID, - SvcName: deploy.SvcName, - Status: deployStatusCodeToString(code), - Hardware: deploy.Hardware, - Env: deploy.Env, - RuntimeFramework: deploy.RuntimeFramework, - ImageID: deploy.ImageID, - MinReplica: deploy.MinReplica, - MaxReplica: deploy.MaxReplica, - GitBranch: deploy.GitBranch, - ClusterID: deploy.ClusterID, - SecureLevel: deploy.SecureLevel, - CreatedAt: deploy.CreatedAt, - UpdatedAt: deploy.UpdatedAt, - Endpoint: endpoint, - ActualReplica: actualReplica, - DesiredReplica: desiredReplica, - Instances: instList, - Private: endpointPrivate, - Path: repoPath, - ProxyEndpoint: proxyEndPoint, - SKU: deploy.SKU, - Task: string(deploy.Task), - EngineArgs: deploy.EngineArgs, - Variables: deploy.Variables, - Entrypoint: entrypoint, - Reason: deploy.Reason, - Message: deploy.Message, + DeployID: deploy.ID, + DeployName: deploy.DeployName, + RepoID: deploy.RepoID, + SvcName: deploy.SvcName, + Status: deployStatusCodeToString(code), + Hardware: deploy.Hardware, + Env: deploy.Env, + RuntimeFramework: deploy.RuntimeFramework, + ImageID: deploy.ImageID, + MinReplica: deploy.MinReplica, + MaxReplica: deploy.MaxReplica, + GitBranch: deploy.GitBranch, + ClusterID: deploy.ClusterID, + SecureLevel: deploy.SecureLevel, + CreatedAt: deploy.CreatedAt, + UpdatedAt: deploy.UpdatedAt, + Endpoint: endpoint, + ActualReplica: actualReplica, + DesiredReplica: desiredReplica, + Instances: instList, + Private: endpointPrivate, + Path: repoPath, + ProxyEndpoint: proxyEndPoint, + SKU: deploy.SKU, + Task: string(deploy.Task), + EngineArgs: deploy.EngineArgs, + Variables: deploy.Variables, + Entrypoint: entrypoint, + Reason: deploy.Reason, + Message: deploy.Message, + SupportFunctionCall: supportFunctionCall, } return &resDeploy, nil @@ -3071,7 +3073,7 @@ func (c *repoComponentImpl) DeployUpdate(ctx context.Context, updateReq types.De if err != nil { return fmt.Errorf("cannot find available runtime framework by name , %w", err) } - //update runtime image once user changed cpu to gpu + // update runtime image once user changed cpu to gpu req.RuntimeFrameworkID = &frame.ID } } @@ -3169,8 +3171,23 @@ func (c *repoComponentImpl) DeployStart(ctx context.Context, startReq types.Depl } if exist { - // deploy instance is running - return errors.New("stop deploy first") + // check deploy status + _, status, _, err := c.deployer.Status(ctx, deployRepo, false) + if err != nil { + return fmt.Errorf("failed to get deploy status, %w", err) + } + + // if deploy is in running status, return error + const deployStatusRunning = 4 + if status == deployStatusRunning { + return errors.New("stop deploy first") + } + + // if deploy exists but not running, stop it first + err = c.deployer.Stop(ctx, deployRepo) + if err != nil { + return fmt.Errorf("failed to stop existing deploy, %w", err) + } } // start deploy @@ -3417,7 +3434,6 @@ func (c *repoComponentImpl) DiffBetweenTwoCommits(ctx context.Context, req types LeftCommitId: req.LeftCommitID, RightCommitId: rightCommit, }) - if err != nil { return nil, fmt.Errorf("failed to get diff, err: %w", err) } @@ -3773,7 +3789,7 @@ func (c *repoComponentImpl) ValidateYaml(ctx context.Context, req types.Validate } categoryContents := make(map[string]any) - //parse yaml string + // parse yaml string err := yaml.Unmarshal([]byte(meta), categoryContents) if err != nil { slog.Error("error unmarshall meta for tags", slog.Any("error", err), slog.String("meta", meta)) diff --git a/component/repo_ce_test.go b/component/repo_ce_test.go index bbe7711c..b728d7b8 100644 --- a/component/repo_ce_test.go +++ b/component/repo_ce_test.go @@ -118,6 +118,111 @@ func TestRepoComponent_DeployStart(t *testing.T) { require.Nil(t, err) } +func TestRepoComponent_DeployStart_ExistAndRunning(t *testing.T) { + ctx := context.TODO() + repo := initializeTestRepoComponent(ctx, t) + mockUserRepoAdminPermission(ctx, repo.mocks.stores, "user") + + deploy := &database.Deploy{ + ID: 1, + SpaceID: 2, + ModelID: 3, + SvcName: "svc", + ClusterID: "cluster", + RuntimeFramework: "fm", + SKU: "111", + } + repo.mocks.stores.DeployTaskMock().EXPECT().GetDeployByID(ctx, int64(1)).Return(deploy, nil) + + repo.mocks.stores.SpaceResourceMock().EXPECT().FindByID(ctx, int64(111)).Return(&database.SpaceResource{ + ID: 111, + Resources: `{ "gpu": { "type": "A10", "num": "1", "resource_name": "nvidia.com/gpu", "labels": { "aliyun.accelerator/nvidia_name": "NVIDIA-A10" } }, "cpu": { "type": "Intel", "num": "12" }, "memory": "46Gi" }`, + }, nil) + + repo.mocks.deployer.EXPECT().CheckResourceAvailable(ctx, "cluster", int64(0), mock.Anything).Return(true, nil) + + deployRepo := types.DeployRepo{ + DeployID: 1, + Namespace: "ns", + Name: "n", + SvcName: "svc", + ClusterID: "cluster", + SpaceID: 2, + ModelID: 3, + } + repo.mocks.deployer.EXPECT().Exist(ctx, deployRepo).Return(true, nil) + + // status 4 means running + repo.mocks.deployer.EXPECT().Status(ctx, deployRepo, false).Return("svc", 4, []types.Instance{}, nil) + + err := repo.DeployStart(ctx, types.DeployActReq{ + RepoType: types.ModelRepo, + Namespace: "ns", + Name: "n", + CurrentUser: "user", + DeployID: 1, + DeployType: 1, + InstanceName: "i1", + }) + require.NotNil(t, err) + require.Contains(t, err.Error(), "stop deploy first") +} + +func TestRepoComponent_DeployStart_ExistButNotRunning(t *testing.T) { + ctx := context.TODO() + repo := initializeTestRepoComponent(ctx, t) + mockUserRepoAdminPermission(ctx, repo.mocks.stores, "user") + + deploy := &database.Deploy{ + ID: 1, + SpaceID: 2, + ModelID: 3, + SvcName: "svc", + ClusterID: "cluster", + RuntimeFramework: "fm", + SKU: "111", + } + repo.mocks.stores.DeployTaskMock().EXPECT().GetDeployByID(ctx, int64(1)).Return(deploy, nil) + + repo.mocks.stores.SpaceResourceMock().EXPECT().FindByID(ctx, int64(111)).Return(&database.SpaceResource{ + ID: 111, + Resources: `{ "gpu": { "type": "A10", "num": "1", "resource_name": "nvidia.com/gpu", "labels": { "aliyun.accelerator/nvidia_name": "NVIDIA-A10" } }, "cpu": { "type": "Intel", "num": "12" }, "memory": "46Gi" }`, + }, nil) + + repo.mocks.deployer.EXPECT().CheckResourceAvailable(ctx, "cluster", int64(0), mock.Anything).Return(true, nil) + + deployRepo := types.DeployRepo{ + DeployID: 1, + Namespace: "ns", + Name: "n", + SvcName: "svc", + ClusterID: "cluster", + SpaceID: 2, + ModelID: 3, + } + repo.mocks.deployer.EXPECT().Exist(ctx, deployRepo).Return(true, nil) + + // status 2 means failed (not running) + repo.mocks.deployer.EXPECT().Status(ctx, deployRepo, false).Return("svc", 2, []types.Instance{}, nil) + + // should call Stop first + repo.mocks.deployer.EXPECT().Stop(ctx, deployRepo).Return(nil) + + // then start deploy + repo.mocks.deployer.EXPECT().StartDeploy(ctx, deploy).Return(nil) + + err := repo.DeployStart(ctx, types.DeployActReq{ + RepoType: types.ModelRepo, + Namespace: "ns", + Name: "n", + CurrentUser: "user", + DeployID: 1, + DeployType: 1, + InstanceName: "i1", + }) + require.Nil(t, err) +} + func TestRepoComponentImpl_Update(t *testing.T) { ctx := context.TODO() diff --git a/configs/evaluation/evalscope.json b/configs/evaluation/evalscope.json index 2b85898e..12793acd 100644 --- a/configs/evaluation/evalscope.json +++ b/configs/evaluation/evalscope.json @@ -5,14 +5,14 @@ "engine_images": [ { "compute_type": "cpu", - "image": "opencsghq/evalscope:0.15.1-cpu", - "engine_version": "0.15.1" + "image": "opencsghq/evalscope:1.1.1-cpu", + "engine_version": "1.1.1" }, { "compute_type": "gpu", - "image": "opencsghq/evalscope:0.15.1-cu120", - "driver_version": "12.1", - "engine_version": "0.15.1" + "image": "opencsghq/evalscope:1.1.1-cu124", + "driver_version": "12.4", + "engine_version": "1.1.1" } ], "supported_archs": [ diff --git a/configs/inference/nvidia-sglang.json b/configs/inference/nvidia-sglang.json new file mode 100644 index 00000000..da6a9742 --- /dev/null +++ b/configs/inference/nvidia-sglang.json @@ -0,0 +1,167 @@ +{ + "engine_name": "nvidia-sglang", + "container_port": 8000, + "enabled": 1, + "model_format": "safetensors", + "engine_images": [ + { + "compute_type": "gpu", + "image": "opencsghq/nvidia-sglang:25.10-py3", + "driver_version": "13.0", + "engine_version": "25.10-py3" + } + ], + "engine_args": [ + { + "name": "context-length", + "value": "8192", + "format": "--context-length %s" + }, + { + "name": "tensor-parallel-size", + "value": "1", + "format": "--tensor-parallel-size %s" + }, + { + "name": "chunked-prefill-size", + "value": "1024", + "format": "--chunked-prefill-size %s" + }, + { + "name": "cpu-offload-gb", + "value": "1", + "format": "--cpu-offload-gb %s" + }, + { + "name": "dp-size", + "value": "1", + "format": "--dp-size %s" + }, + { + "name": "enable-dp-attention", + "value": "enable", + "format": "--enable-dp-attention" + }, + { + "name": "enable-ep-moe", + "value": "enable", + "format": "--enable-ep-moe" + }, + { + "name": "custom-options", + "value": "", + "format": "%s" + } + ], + "supported_archs": [ + "ArceeForCausalLM", + "BaichuanForCausalLM", + "BailingMoeForCausalLM", + "BailingMoeV2ForCausalLM", + "BertForSequenceClassification", + "BertModel", + "CLIPModel", + "ChatGLMForConditionalGeneration", + "ChatGLMModel", + "Cohere2ForCausalLM", + "CohereForCausalLM", + "Contriever", + "DbrxForCausalLM", + "DeciLMForCausalLM", + "DeepseekForCausalLM", + "DeepseekV2ForCausalLM", + "DeepseekV3ForCausalLM", + "DeepseekV3ForCausalLMNextN", + "DeepseekVL2ForCausalLM", + "Ernie4_5_ForCausalLM", + "Ernie4_5_MoeForCausalLM", + "Ernie4_5_MoeForCausalLMMTP", + "ExaoneForCausalLM", + "GPT2LMHeadModel", + "GPTBigCodeForCausalLM", + "Gemma2ForCausalLM", + "Gemma2ForSequenceClassification", + "Gemma3ForCausalLM", + "Gemma3ForConditionalGeneration", + "Gemma3nForCausalLM", + "Gemma3nForConditionalGeneration", + "GemmaForCausalLM", + "Glm4ForCausalLM", + "Glm4MoeForCausalLM", + "Glm4MoeForCausalLMNextN", + "Glm4vForConditionalGeneration", + "Glm4vMoeForConditionalGeneration", + "GptOssForCausalLM", + "GraniteForCausalLM", + "GraniteMoeForCausalLM", + "GraniteMoeHybridForCausalLM", + "Grok1ForCausalLM", + "Grok1ModelForCausalLM", + "HunYuanDenseV1ForCausalLM", + "HunYuanMoEV1ForCausalLM", + "InternLM2ForCausalLM", + "InternLM2ForRewardModel", + "InternLM3ForCausalLM", + "InternS1ForConditionalGeneration", + "InternVLChatModel", + "KimiVLForConditionalGeneration", + "Llama4ForCausalLM", + "Llama4ForConditionalGeneration", + "LlamaEmbeddingModel", + "LlamaForCausalLM", + "LlamaForCausalLMEagle", + "LlamaForCausalLMEagle3", + "LlamaForClassification", + "LlavaVidForCausalLM", + "MiMoForCausalLM", + "MiMoMTP", + "MiniCPM3ForCausalLM", + "MiniCPMForCausalLM", + "MiniCPMO", + "MiniCPMV", + "MiniMaxM2ForCausalLM", + "Mistral3ForConditionalGeneration", + "MistralForCausalLM", + "MistralModel", + "MixtralForCausalLM", + "MllamaForConditionalGeneration", + "MultiModalityCausalLM", + "NemotronForCausalLM", + "Olmo2ForCausalLM", + "OlmoForCausalLM", + "OlmoeForCausalLM", + "PersimmonForCausalLM", + "Phi3ForCausalLM", + "Phi3SmallForCausalLM", + "Phi4FlashForCausalLM", + "Phi4MMForCausalLM", + "PhiForCausalLM", + "PhiMoEForCausalLM", + "PixtralVisionModel", + "QWenLMHeadModel", + "QuantMixtralForCausalLM", + "Qwen2AudioForConditionalGeneration", + "Qwen2ForCausalLM", + "Qwen2ForCausalLMEagle", + "Qwen2MoeForCausalLM", + "Qwen2VLForConditionalGeneration", + "Qwen2_5_VLForConditionalGeneration", + "Qwen3ForCausalLM", + "Qwen3MoeForCausalLM", + "Qwen3NextForCausalLM", + "SmolLM3ForCausalLM", + "SolarForCausalLM", + "StableLmForCausalLM", + "Starcoder2ForCausalLM", + "Step3VLForConditionalGeneration", + "TorchNativeLlamaForCausalLM", + "TorchNativePhi3ForCausalLM", + "TransformersForCausalLM", + "VILAForConditionalGeneration", + "XLMRobertaForSequenceClassification", + "XLMRobertaModel", + "XverseForCausalLM", + "XverseMoeForCausalLM", + "YiVLForCausalLM" + ] +} diff --git a/configs/inference/nvidia-vllm.json b/configs/inference/nvidia-vllm.json new file mode 100644 index 00000000..c3089b0c --- /dev/null +++ b/configs/inference/nvidia-vllm.json @@ -0,0 +1,208 @@ +{ + "engine_name": "nvidia-vllm", + "enabled": 1, + "container_port": 8000, + "model_format": "safetensors", + "engine_images": [ + { + "compute_type": "gpu", + "image": "opencsghq/nvidia-vllm:25.10-py3", + "driver_version": "13.0", + "engine_version": "25.10-py3" + } + ], + "engine_args": [ + { + "name": "block-size", + "value": "128", + "format": "--block-size %s" + }, + { + "name": "dtype", + "value": "auto", + "format": "--dtype %s" + }, + { + "name": "gpu-memory-utilization", + "value": "0.8", + "format": "--gpu-memory-utilization %s" + }, + { + "name": "max-model-len", + "value": "8192", + "format": "--max-model-len %s" + }, + { + "name": "tensor-parallel-size", + "value": "1", + "format": "--tensor-parallel-size %s" + }, + { + "name": "max-num-seqs", + "value": "256", + "format": "--max-num-seqs %s" + }, + { + "name": "scheduling-policy", + "value": "fcfs", + "format": "--scheduling-policy %s" + }, + { + "name": "cpu-offload-gb", + "value": "0", + "format": "--cpu-offload-gb %s" + }, + { + "name": "pipeline-parallel-size", + "value": "1", + "format": "--pipeline-parallel-size %s" + }, + { + "name": "guided-decoding-backend", + "value": "xgrammar", + "format": "--guided-decoding-backend %s" + }, + { + "name": "swap-space", + "value": "4", + "format": "--swap-space %s" + }, + { + "name": "load-format", + "value": "auto", + "format": "--load-format %s" + }, + { + "name": "max-num-batched-tokens", + "value": "4096", + "format": "--max-num-batched-tokens %s" + }, + { + "name": "enable-prefix-caching", + "value": "enable", + "format": "--enable-prefix-caching" + }, + { + "name": "enable-chunked-prefill", + "value": "enable", + "format": "--enable-chunked-prefill" + }, + { + "name": "enforce-eager", + "value": "enable", + "format": "--enforce-eager" + }, + { + "name": "disable-custom-all-reduce", + "value": "enable", + "format": "--disable-custom-all-reduce" + }, + { + "name": "limit-mm-per-prompt", + "value": "image=5,video=5", + "format": "--limit-mm-per-prompt %s" + }, + { + "name": "custom-options", + "value": "", + "format": "%s" + } + ], + "supported_archs": [ + "ApertusForCausalLM", + "AquilaForCausalLM", + "ArceeForCausalLM", + "ArcticForCausalLM", + "BaiChuanForCausalLM", + "BailingMoeForCausalLM", + "BambaForCausalLM", + "BloomForCausalLM", + "BartForConditionalGeneration", + "MBartForConditionalGeneration", + "ChatGLMModel", + "ChatGLMForConditionalGeneration", + "CohereForCausalLM", + "Cohere2ForCausalLM", + "DbrxForCausalLM", + "DeciLMForCausalLM", + "DeepseekForCausalLM", + "DeepseekV2ForCausalLM", + "DeepseekV3ForCausalLM", + "Dots1ForCausalLM", + "Ernie4_5ForCausalLM", + "Ernie4_5_MoeForCausalLM", + "ExaoneForCausalLM", + "Exaone4ForCausalLM", + "Fairseq2LlamaForCausalLM", + "FalconForCausalLM", + "FalconMambaForCausalLM", + "FalconH1ForCausalLM", + "GemmaForCausalLM", + "Gemma2ForCausalLM", + "Gemma3ForCausalLM", + "Gemma3nForCausalLM", + "GlmForCausalLM", + "Glm4ForCausalLM", + "Glm4MoeForCausalLM", + "GPT2LMHeadModel", + "GPTBigCodeForCausalLM", + "GPTJForCausalLM", + "GPTNeoXForCausalLM", + "GptOssForCausalLM", + "GraniteForCausalLM", + "GraniteMoeForCausalLM", + "GraniteMoeHybridForCausalLM", + "GraniteMoeSharedForCausalLM", + "GritLM", + "Grok1ModelForCausalLM", + "HunYuanDenseV1ForCausalLM", + "HunYuanMoEV1ForCausalLM", + "HCXVisionForCausalLM", + "InternLMForCausalLM", + "InternLM2ForCausalLM", + "InternLM3ForCausalLM", + "JAISLMHeadModel", + "JambaForCausalLM", + "Lfm2ForCausalLM", + "LlamaForCausalLM", + "MambaForCausalLM", + "Mamba2ForCausalLM", + "MiMoForCausalLM", + "MiniCPMForCausalLM", + "MiniCPM3ForCausalLM", + "MistralForCausalLM", + "MixtralForCausalLM", + "MotifForCausalLM", + "MPTForCausalLM", + "NemotronForCausalLM", + "NemotronHForCausalLM", + "OLMoForCausalLM", + "OLMo2ForCausalLM", + "OLMoEForCausalLM", + "OPTForCausalLM", + "OrionForCausalLM", + "PhiForCausalLM", + "Phi3ForCausalLM", + "PhiMoEForCausalLM", + "Phi4FlashForCausalLM", + "PersimmonForCausalLM", + "Plamo2ForCausalLM", + "QWenLMHeadModel", + "Qwen2ForCausalLM", + "Qwen2MoeForCausalLM", + "Qwen3ForCausalLM", + "Qwen3MoeForCausalLM", + "Qwen3NextForCausalLM", + "SeedOssForCausalLM", + "StableLmForCausalLM", + "Starcoder2ForCausalLM", + "SolarForCausalLM", + "SmolLM3ForCausalLM", + "TeleChat2ForCausalLM", + "TeleFLMForCausalLM", + "XverseForCausalLM", + "MiniMaxM1ForCausalLM", + "MiniMaxText01ForCausalLM", + "Zamba2ForCausalLM" + ] +} diff --git a/docker/evaluation/Dockerfile.evalscope-cpu b/docker/evaluation/Dockerfile.evalscope-cpu index 7d0742bd..dfb540d2 100644 --- a/docker/evaluation/Dockerfile.evalscope-cpu +++ b/docker/evaluation/Dockerfile.evalscope-cpu @@ -2,8 +2,12 @@ FROM modelscope-registry.cn-hangzhou.cr.aliyuncs.com/modelscope-repo/modelscope: RUN apt-get update && apt-get -y install dumb-init \ && apt-get clean && rm -rf /var/lib/apt/lists/* RUN pip install --no-cache-dir minio oss2 csghub-sdk==0.7.2 +RUN pip install -U --no-cache-dir evalscope==1.1.1 +# Download NLTK resources required for BLEU and other metrics +RUN python -c "import nltk; nltk.download('punkt_tab', quiet=True)" COPY ./evalscope/ /etc/csghub/ -RUN chmod +x /etc/csghub/*.sh +RUN chmod +x /etc/csghub/*.sh && \ + chmod +x /etc/csghub/*.py WORKDIR /workspace/ ENV HUGGINGFACE_HUB_CACHE=/workspace/ \ HF_HUB_ENABLE_HF_TRANSFER=0 \ diff --git a/docker/evaluation/Dockerfile.evalscope-gpu b/docker/evaluation/Dockerfile.evalscope-gpu index e5d668bd..d8688e24 100644 --- a/docker/evaluation/Dockerfile.evalscope-gpu +++ b/docker/evaluation/Dockerfile.evalscope-gpu @@ -2,9 +2,13 @@ FROM modelscope-registry.cn-hangzhou.cr.aliyuncs.com/modelscope-repo/modelscope: RUN apt-get update && apt-get -y install dumb-init \ && apt-get clean && rm -rf /var/lib/apt/lists/* RUN pip install --no-cache-dir minio oss2 csghub-sdk==0.7.2 +RUN pip install -U --no-cache-dir evalscope==1.1.1 +# Download NLTK resources required for BLEU and other metrics +RUN python -c "import nltk; nltk.download('punkt_tab', quiet=True)" COPY ./evalscope/ /etc/csghub/ RUN ln -s /usr/bin/python3 /usr/bin/python &&\ - chmod +x /etc/csghub/*.sh + chmod +x /etc/csghub/*.sh && \ + chmod +x /etc/csghub/*.py WORKDIR /workspace/ ENV HUGGINGFACE_HUB_CACHE=/workspace/ \ HF_HUB_ENABLE_HF_TRANSFER=0 \ diff --git a/docker/evaluation/evalscope/CUSTOM_DATASETS.md b/docker/evaluation/evalscope/CUSTOM_DATASETS.md new file mode 100644 index 00000000..eb6c1a1c --- /dev/null +++ b/docker/evaluation/evalscope/CUSTOM_DATASETS.md @@ -0,0 +1,84 @@ +# Custom Datasets for Evalscope + +## 支持的自定义数据集 + +### civil_comments (任意组织) + +Civil Comments 数据集用于毒性检测评估。**支持来自任意组织的 civil_comments 数据集**。 + +**支持的数据集 ID 格式:** +- `google/civil_comments` - Google 官方版本 +- `abc/civil_comments` - ABC 组织的 fork 版本 +- `your-org/civil_comments` - 任意组织的版本 +- 任何包含 "civil_comments" 的数据集 ID + +**数据集划分 (splits):** +- `train` - 训练集(不用于评估) +- `validation` - 验证集(用于超参数调优) +- `test` - 测试集(**用于最终评估,默认使用**) + +**默认行为:** +系统默认使用 `test` 集进行评估,这是标准的评估实践。test 集包含模型从未见过的数据,能够真实反映模型的泛化能力。 + +**动态注册机制:** +系统会自动从环境变量 `DATASET_IDS` 中读取数据集 ID,识别包含 "civil_comments" 的数据集,并动态注册使用实际的 dataset_id。 + +**使用方式:** + +在运行评估时,将 `DATASET_IDS` 设置为包含 civil_comments 的数据集 ID,系统会自动识别并注册。 + +**示例:** + +```bash +# 使用 Google 官方版本 +DATASET_IDS="google/civil_comments" \ +MODEL_IDS="your-model-id" \ +./start.sh + +# 使用 ABC 组织的版本 +DATASET_IDS="abc/civil_comments" \ +MODEL_IDS="your-model-id" \ +./start.sh + +# 同时评估多个数据集(只有 civil_comments 会被注册) +DATASET_IDS="abc/civil_comments,test/hellaswag" \ +MODEL_IDS="your-model-id" \ +./start.sh +``` + +## 高级配置 + +### 使用不同的数据集划分 + +如果需要使用 `validation` 集而非 `test` 集进行评估,可以修改 `custom_datasets.py` 中的注册代码: + +```python +# 在 register_custom_datasets() 函数中修改 +civil_comments_meta = BenchmarkMeta( + name='civil_comments', + dataset_id='google/civil_comments', + dataset_name='civil_comments', + subset_list=['default'], + metrics=['accuracy', 'f1'], + few_shot_num=0, + benchmark_cls=CivilComments, + split='validation' # 修改这里:使用 validation 集 +) +``` + +**注意**: 不推荐使用 `train` 集进行评估,因为会导致评估结果失真。 + +## 添加新的自定义数据集 + +要添加新的自定义数据集支持,请编辑 `custom_datasets.py` 文件: + +1. 创建一个继承自 `Benchmark` 的类 +2. 实现 `load()` 和 `format_sample()` 方法 +3. 在 `register_custom_datasets()` 函数中注册新数据集 + +## 技术说明 + +- 自定义数据集在评估开始前自动注册 +- 不修改 evalscope 源码,仅通过插件方式扩展 +- 支持本地路径和远程数据集加载 + diff --git a/docker/evaluation/evalscope/README.md b/docker/evaluation/evalscope/README.md new file mode 100644 index 00000000..3c3a5f47 --- /dev/null +++ b/docker/evaluation/evalscope/README.md @@ -0,0 +1,135 @@ +# Civil Comments 自定义数据集支持 + +## 📋 快速开始 + +支持从环境变量 `DATASET_IDS` 动态注册任意组织的 civil_comments 数据集。 + +### 使用示例 + +```bash +export DATASET_IDS="James/civil_comments" +export MODEL_IDS="your-model" +export USE_CUSTOM_DATASETS="false" +./start.sh +``` + +### 预期日志 + +``` +[DEBUG] Successfully imported evalscope v1.1.1+ API +[DEBUG] Using BENCHMARK_REGISTRY from evalscope.benchmarks +✓ Custom dataset 'James/civil_comments' registered successfully as 'James_civil_comments' +[SUCCESS] Found task name: James_civil_comments +Loading civil_comments from remote: James/civil_comments, split: test +``` + +## 🎯 核心特性 + +- ✅ **动态注册** - 从 DATASET_IDS 自动识别 +- ✅ **多组织支持** - google、James、任意组织 +- ✅ **零配置** - 只需设置环境变量 +- ✅ **详细日志** - 完整的调试信息 + +## 📚 文档 + +| 文档 | 说明 | +|------|------| +| `README.md` | 本文档 - 快速开始 | +| `CUSTOM_DATASETS.md` | 详细使用说明 | +| `REGISTRY_FIX.md` | Evalscope v1.1.1 注册方法说明 | +| `WRAPPER_SOLUTION.md` | 包装脚本解决方案说明 | + +## 🔧 技术细节 + +### Evalscope v1.1.1 API + +**正确的导入:** +```python +from evalscope.api.benchmark import DefaultDataAdapter +from evalscope.api.benchmark.meta import BenchmarkMeta +from evalscope.api.registry import BENCHMARK_REGISTRY, register_benchmark +``` + +**注册方法:** +```python +# 创建 BenchmarkMeta +meta = BenchmarkMeta( + name='benchmark_name', + dataset_id='org/dataset', + data_adapter=DefaultDataAdapter, + eval_split='test', +) + +# 直接注册到 BENCHMARK_REGISTRY +BENCHMARK_REGISTRY['benchmark_name'] = meta +``` + +## 📝 支持的数据集 + +### civil_comments (任意组织) + +- **格式**: `{organization}/civil_comments` +- **示例**: + - `google/civil_comments` + - `James/civil_comments` + - `your-org/civil_comments` +- **Split**: test(默认) +- **任务**: 毒性检测(二分类) + +## 🐛 故障排除 + +### 问题 1: ImportError + +**症状:** 各种导入错误 + +**解决方案:** 使用正确的导入路径: +```python +from evalscope.api.benchmark import DefaultDataAdapter +from evalscope.api.benchmark.meta import BenchmarkMeta +from evalscope.api.registry import BENCHMARK_REGISTRY +``` + +### 问题 2: dataset_tasks is empty + +**检查:** +1. 查看注册日志是否显示成功 +2. 检查 DATASET_IDS 是否正确传递 +3. 确认数据集名称包含 "civil_comments" + +参考 `REGISTRY_FIX.md` 获取详细信息。 + +## ✨ 文件结构 + +``` +evalscope/ +├── custom_datasets.py # 核心实现 +├── evalscope_wrapper.py # Evalscope 包装脚本(关键!) +├── register_custom.py # 注册脚本 +├── get_task.py # 任务查找(已修改) +├── start.sh # 启动脚本(已修改) +├── README.md # 本文档 +├── CUSTOM_DATASETS.md # 详细说明 +├── REGISTRY_FIX.md # 注册方法说明 +└── WRAPPER_SOLUTION.md # 包装脚本解决方案 +``` + +**关键组件:** +- **`evalscope_wrapper.py`**: 在同一进程中先注册数据集再运行 evalscope +- **`start.sh`**: 使用 `python evalscope_wrapper.py` 代替 `evalscope` 命令 + +## 🚀 扩展 + +添加新的自定义数据集,编辑 `custom_datasets.py`: + +```python +# 在 register_custom_datasets() 函数中 +for dataset_id in dataset_id_list: + if 'your_dataset' in dataset_id.lower(): + # 实现注册逻辑 +``` + +--- + +**版本**: Evalscope v1.1.1 +**状态**: ✅ 已测试并修复 + diff --git a/docker/evaluation/evalscope/custom_datasets.py b/docker/evaluation/evalscope/custom_datasets.py new file mode 100644 index 00000000..44aa27b1 --- /dev/null +++ b/docker/evaluation/evalscope/custom_datasets.py @@ -0,0 +1,181 @@ +""" +Custom dataset registration for evalscope v1.1.1+ +This module registers custom datasets that are not included in evalscope by default. +""" + +try: + # Evalscope v1.1.1+ API + from evalscope.api.benchmark import DefaultDataAdapter + from evalscope.api.benchmark.meta import BenchmarkMeta + from evalscope.api.registry import BENCHMARK_REGISTRY, register_benchmark + from evalscope.api.dataset import Sample # Correct import path + EVALSCOPE_AVAILABLE = True + print("[DEBUG] Successfully imported evalscope v1.1.1+ API") +except ImportError as e: + print(f"[ERROR] Failed to import evalscope: {e}") + print("[ERROR] Make sure evalscope v1.1.1+ is installed in the container") + print("[ERROR] Custom datasets will not be registered") + EVALSCOPE_AVAILABLE = False + import traceback + traceback.print_exc() + + +class CivilCommentsDataAdapter(DefaultDataAdapter): + """ + Custom data adapter for civil_comments datasets + + The civil_comments dataset contains text samples with toxicity labels. + Expected fields in the dataset: + - text: The comment text to classify + - toxicity: The toxicity label (binary: 0 or 1, or continuous: 0.0 to 1.0) + - Other optional fields: severe_toxicity, obscene, threat, insult, identity_attack, sexual_explicit + """ + + def __init__(self, *args, **kwargs): + """Initialize the adapter, ensuring parent class is properly initialized""" + super().__init__(*args, **kwargs) + print(f"[DEBUG] CivilCommentsDataAdapter initialized: {type(self).__name__}") + + def format_prompt_template(self, sample: Sample) -> str: + """ + Override to skip additional prompt formatting since we already format in record_to_sample + + Args: + sample: The Sample object with pre-formatted input + + Returns: + The sample.input as-is without additional formatting + """ + return sample.input + + def record_to_sample(self, record: dict) -> Sample: + """ + Convert a dataset record to evalscope Sample format + + Args: + record: A dictionary containing the dataset record with 'text' and 'toxicity' fields + + Returns: + Sample object with the converted data + """ + print(f"[DEBUG] CivilCommentsDataAdapter.record_to_sample called with record keys: {list(record.keys())}") + # Extract text and toxicity label + text = record.get('text', '') + toxicity = record.get('toxicity', 0) + + # Convert toxicity to binary label if it's a float (threshold at 0.5) + if isinstance(toxicity, float): + label = 1 if toxicity >= 0.5 else 0 + else: + label = int(toxicity) + + # Create a simple classification prompt + # The model should classify whether the comment is toxic or not + prompt = f"Classify the following comment as toxic (1) or not toxic (0):\n\nComment: {text}\n\nClassification:" + + # Return Sample with the correct field structure + return Sample( + input=prompt, # Required: the input prompt for the model + target=str(label), # Optional: the reference/ground truth answer + metadata={ # Optional: additional metadata + 'text': text, + 'toxicity': toxicity, + } + ) + + +def register_custom_datasets(): + """ + Register all custom datasets with evalscope + + Reads DATASET_IDS from environment variable and registers matching custom datasets. + For example: + - DATASET_IDS="google/civil_comments" -> registers google/civil_comments + - DATASET_IDS="abc/civil_comments" -> registers abc/civil_comments + - DATASET_IDS="abc/civil_comments,test/hellaswag" -> registers abc/civil_comments + """ + + print("[DEBUG] register_custom_datasets() called") + + if not EVALSCOPE_AVAILABLE: + print("[WARNING] Evalscope not available, skipping custom dataset registration") + return + + import os + + print("[DEBUG] EVALSCOPE_AVAILABLE = True") + + # Read DATASET_IDS from environment variable + dataset_ids = os.environ.get('DATASET_IDS', '') + print(f"[DEBUG] Reading DATASET_IDS from environment: '{dataset_ids}'") + + if not dataset_ids: + print("[WARNING] No DATASET_IDS found in environment, skipping custom dataset registration") + return + + # Parse dataset IDs (comma-separated) + dataset_id_list = [ds.strip() for ds in dataset_ids.split(',')] + print(f"[DEBUG] Parsed dataset_id_list: {dataset_id_list}") + + # Register civil_comments datasets + registered_count = 0 + for dataset_id in dataset_id_list: + print(f"[DEBUG] Processing dataset_id: '{dataset_id}'") + if 'civil_comments' in dataset_id.lower(): + print(f"[DEBUG] Found civil_comments dataset: '{dataset_id}'") + try: + # Use the full dataset_id as the benchmark name to avoid conflicts + # e.g., "google_civil_comments" or "James_civil_comments" + benchmark_name = dataset_id.replace('/', '_').replace('-', '_') + + print(f"[DEBUG] Benchmark name: '{benchmark_name}'") + + # Create BenchmarkMeta with CivilCommentsDataAdapter + civil_comments_meta = BenchmarkMeta( + name=benchmark_name, + dataset_id=dataset_id, + data_adapter=CivilCommentsDataAdapter, + subset_list=['default'], + metric_list=['acc'], # Use 'acc' instead of 'accuracy' - it's registered in evalscope + few_shot_num=0, + train_split=None, + eval_split='test', + ) + + print(f"[DEBUG] Created BenchmarkMeta: name='{benchmark_name}', dataset_id='{dataset_id}'") + print(f"[DEBUG] BenchmarkMeta.data_adapter: {civil_comments_meta.data_adapter}") + print(f"[DEBUG] BenchmarkMeta.data_adapter type: {type(civil_comments_meta.data_adapter)}") + print( + f"[DEBUG] Is CivilCommentsDataAdapter: {civil_comments_meta.data_adapter is CivilCommentsDataAdapter}") + print( + f"[DEBUG] Has record_to_sample method: {hasattr(civil_comments_meta.data_adapter, 'record_to_sample')}") + if hasattr(civil_comments_meta.data_adapter, 'record_to_sample'): + import inspect + try: + source = inspect.getsource(civil_comments_meta.data_adapter.record_to_sample) + print(f"[DEBUG] record_to_sample source (first 200 chars): {source[:200]}") + except Exception as e: + print(f"[DEBUG] Could not get source: {e}") + # Check if it's our method by checking the qualname + print( + f"[DEBUG] record_to_sample qualname: {civil_comments_meta.data_adapter.record_to_sample.__qualname__}") + print( + f"[DEBUG] record_to_sample module: {civil_comments_meta.data_adapter.record_to_sample.__module__}") + + # Register benchmark using BENCHMARK_REGISTRY + BENCHMARK_REGISTRY[benchmark_name] = civil_comments_meta + + print(f"✓ Custom dataset '{dataset_id}' registered successfully as '{benchmark_name}'") + registered_count += 1 + except Exception as e: + print(f"[ERROR] Failed to register {dataset_id}: {e}") + import traceback + traceback.print_exc() + else: + print(f"[DEBUG] Skipping '{dataset_id}' (not a civil_comments dataset)") + + print(f"[INFO] Total civil_comments datasets registered: {registered_count}") + + +if __name__ == '__main__': + register_custom_datasets() diff --git a/docker/evaluation/evalscope/evalscope_wrapper.py b/docker/evaluation/evalscope/evalscope_wrapper.py new file mode 100644 index 00000000..f84abacb --- /dev/null +++ b/docker/evaluation/evalscope/evalscope_wrapper.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python3 +""" +Wrapper script for evalscope that registers custom datasets before running. +""" +from evalscope.cli.cli import run_cmd +import sys +import os + +# Add custom datasets path to Python path +sys.path.insert(0, '/etc/csghub') + +# Register custom datasets BEFORE importing evalscope +try: + from custom_datasets import register_custom_datasets + register_custom_datasets() + print("[INFO] Custom datasets registered successfully") +except Exception as e: + print(f"[WARNING] Failed to register custom datasets: {e}") + import traceback + traceback.print_exc() + +# Now run evalscope CLI + +if __name__ == '__main__': + sys.exit(run_cmd()) diff --git a/docker/evaluation/evalscope/get_task.py b/docker/evaluation/evalscope/get_task.py index 070fe87e..b8ae9165 100644 --- a/docker/evaluation/evalscope/get_task.py +++ b/docker/evaluation/evalscope/get_task.py @@ -1,16 +1,62 @@ import argparse -from evalscope.benchmarks.benchmark import BENCHMARK_MAPPINGS +import sys +import os + +# Register custom datasets first +sys.path.insert(0, '/etc/csghub') +try: + from custom_datasets import register_custom_datasets + register_custom_datasets() +except Exception as e: + print(f"Warning: Failed to register custom datasets: {e}") + import traceback + traceback.print_exc() + +try: + from evalscope.api.registry import BENCHMARK_REGISTRY + BENCHMARK_MAPPINGS = BENCHMARK_REGISTRY + print("[DEBUG] Using BENCHMARK_REGISTRY from evalscope.api.registry") +except ImportError as e: + print(f"[ERROR] Failed to import benchmark registry: {e}") + BENCHMARK_MAPPINGS = {} def find_name_by_dataset_id(target_dataset_id): + """ + Find benchmark name by dataset_id. + Returns the benchmark name if found, empty string otherwise. + """ + print(f"[DEBUG] Searching for dataset_id: {target_dataset_id}") + print(f"[DEBUG] Available benchmarks in registry: {len(BENCHMARK_MAPPINGS)} total") + + # First, try exact match for name, meta in BENCHMARK_MAPPINGS.items(): if meta.dataset_id == target_dataset_id: + print(f"[DEBUG] Found exact match: {name} -> {meta.dataset_id}") return name + + # If not found, print available civil_comments datasets for debugging + print(f"[DEBUG] No exact match found for '{target_dataset_id}'") + print(f"[DEBUG] Available civil_comments related benchmarks:") + for name, meta in BENCHMARK_MAPPINGS.items(): + if 'civil' in name.lower() or 'civil' in str(meta.dataset_id).lower(): + print(f"[DEBUG] - {name}: {meta.dataset_id}") + return "" if __name__ == '__main__': parser = argparse.ArgumentParser(description='Get dataset key by ms_id.') parser.add_argument('ms_id', type=str, help='The ms_id to search for') args = parser.parse_args() + + print(f"[DEBUG] get_task.py called with ms_id: {args.ms_id}") + print(f"[DEBUG] DATASET_IDS environment variable: {os.environ.get('DATASET_IDS', 'NOT SET')}") + task = find_name_by_dataset_id(args.ms_id) + + if task: + print(f"[SUCCESS] Found task name: {task}") + else: + print(f"[WARNING] No task found for dataset_id: {args.ms_id}") + with open("/tmp/task.txt", "w", encoding="utf-8") as f: f.write(task) \ No newline at end of file diff --git a/docker/evaluation/evalscope/register_custom.py b/docker/evaluation/evalscope/register_custom.py new file mode 100644 index 00000000..ea7a6d3c --- /dev/null +++ b/docker/evaluation/evalscope/register_custom.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python3 +""" +Script to register custom datasets with evalscope before evaluation +""" + +import sys +import os + +# Add custom datasets path to Python path +sys.path.insert(0, '/etc/csghub') + +print(f"[DEBUG] register_custom.py starting...") +print(f"[DEBUG] DATASET_IDS environment variable: {os.environ.get('DATASET_IDS', 'NOT SET')}") + +try: + from custom_datasets import register_custom_datasets + print(f"[DEBUG] Calling register_custom_datasets()...") + register_custom_datasets() + print("[SUCCESS] Custom datasets registered successfully") +except Exception as e: + print(f"[ERROR] Failed to register custom datasets: {e}") + import traceback + traceback.print_exc() + # Don't fail the entire evaluation if custom registration fails + sys.exit(0) diff --git a/docker/evaluation/evalscope/start.sh b/docker/evaluation/evalscope/start.sh index d206ae03..a9d70c2d 100644 --- a/docker/evaluation/evalscope/start.sh +++ b/docker/evaluation/evalscope/start.sh @@ -11,9 +11,36 @@ download_model() { } get_subset_and_task() { repo=$1 - csv_file=$(find $repo -name "*_val.csv" -type f | head -n 1) - tsv_file=$(find $repo -name "*.tsv" -type f | head -n 1) - jsonl_files=$(find $repo -name "*.jsonl" -type f) + # repo is the dataset_id (e.g., xzgan001/civil_comments) + # The dataset is downloaded to /workspace/$repo + repo_path="/workspace/$repo" + + # Check if it's a civil_comments dataset (by dataset_id name) + # For civil_comments datasets, always use get_task.py to get the registered benchmark name + # This ensures compatibility with USE_CUSTOM_DATASETS regardless of file format + if echo "$repo" | grep -qi "civil_comments"; then + echo "[DEBUG] Detected civil_comments dataset, using get_task.py to get registered benchmark name" >&2 + rm -rf /tmp/task.txt + # Redirect get_task.py output to stderr to prevent it from being captured + python /etc/csghub/get_task.py "$repo" >&2 + if [ -f /tmp/task.txt ]; then + repo_task=$(cat /tmp/task.txt) + if [ ! -z "$repo_task" ]; then + # Return empty subset and the registered task name (output to stdout only) + echo "[DEBUG] Found registered benchmark name: $repo_task" >&2 + echo "|$repo_task" + return 0 + fi + fi + echo "[ERROR] Failed to get task name for registered civil_comments dataset $repo" >&2 + echo "[ERROR] Make sure the dataset is registered in custom_datasets.py" >&2 + exit 1 + fi + + # Fall back to original logic for other dataset types + csv_file=$(find "$repo_path" -name "*_val.csv" -type f | head -n 1) + tsv_file=$(find "$repo_path" -name "*.tsv" -type f | head -n 1) + jsonl_files=$(find "$repo_path" -name "*.jsonl" -type f) if [ -n "$csv_file" ]; then basename=$(basename "$csv_file") star_value="${basename%_val.csv}" @@ -35,13 +62,22 @@ get_subset_and_task() { done echo "$subset|general_qa" else - echo "No valid subset found for $repo" + echo "No valid subset found for $repo_path" exit 1 fi } export HF_TOKEN=$ACCESS_TOKEN mkdir -p /workspace/data + +# Ensure NLTK resources are available for BLEU and other metrics +echo "Checking NLTK resources..." +python -c "import nltk; nltk.download('punkt_tab', quiet=True)" 2>&1 || echo "[WARNING] Failed to download NLTK punkt_tab resource" + +# Register custom datasets +echo "Registering custom datasets..." +python /etc/csghub/register_custom.py + # download datasets IFS=',' read -r -a dataset_repos <<< "$DATASET_IDS" IFS=',' read -r -a dataset_revisions <<< "$DATASET_REVISIONS" @@ -127,7 +163,8 @@ for index in "${!model_repos[@]}"; do fi model_name=`basename $modelID` echo "Start evaluating model $model_name, dataset $dataset_tasks" - evalscope eval --model /workspace/$modelID --datasets $dataset_tasks --dataset-args "$dataset_tasks_args" --limit 10 + # Use wrapper script to ensure custom datasets are registered + python /etc/csghub/evalscope_wrapper.py eval --model /workspace/$modelID --datasets $dataset_tasks --dataset-args "$dataset_tasks_args" --limit 10 if [ $? -ne 0 ]; then echo "Evaluation failed for model $model_name." exit 1 diff --git a/docker/inference/Dockerfile.sglang-nvidia b/docker/inference/Dockerfile.sglang-nvidia new file mode 100644 index 00000000..426bd134 --- /dev/null +++ b/docker/inference/Dockerfile.sglang-nvidia @@ -0,0 +1,15 @@ +FROM nvcr.io/nvidia/sglang:25.10-py3 +RUN pip config set global.index-url https://mirrors.aliyun.com/pypi/simple && \ + apt-get update && \ + apt-get install -y dumb-init && apt-get clean && rm -rf /var/lib/apt/lists/* +RUN pip install --no-cache-dir csghub-sdk==0.7.6 +COPY ./sglang/ /etc/csghub/ +RUN chmod +x /etc/csghub/*.sh + +WORKDIR /workspace/ +ENV HUGGINGFACE_HUB_CACHE=/workspace/ \ + HF_HUB_ENABLE_HF_TRANSFER=0 +ENV PORT=8000 +EXPOSE 8000 +ENTRYPOINT [ "/usr/bin/dumb-init", "--" ] +CMD ["/etc/csghub/serve.sh"] \ No newline at end of file diff --git a/docker/inference/Dockerfile.vllm-nvidia b/docker/inference/Dockerfile.vllm-nvidia new file mode 100644 index 00000000..28028c67 --- /dev/null +++ b/docker/inference/Dockerfile.vllm-nvidia @@ -0,0 +1,26 @@ +FROM nvcr.io/nvidia/vllm:25.10-py3 +# Build with some basic utilities +RUN apt-get update && apt-get install -y \ + python3-pip apt-utils \ + wget curl vim \ + git git-lfs \ + supervisor \ + unzip + +# alias python='python3' +RUN pip config set global.index-url https://mirrors.aliyun.com/pypi/simple +RUN pip install --no-cache-dir csghub-sdk==0.7.6 supervisor +RUN mkdir -p /var/log/supervisord +COPY ./supervisord.conf /etc/supervisor/conf.d/supervisord.conf +COPY ./vllm/ /etc/csghub/ +COPY ./vllm/multi-node-serving.sh /vllm-workspace/examples/online_serving/multi-node-serving.sh +RUN chmod +x /etc/csghub/*.sh +RUN chmod +x /vllm-workspace/examples/online_serving/*.sh + +WORKDIR /workspace/ +ENV HUGGINGFACE_HUB_CACHE=/workspace/ \ + HF_HUB_ENABLE_HF_TRANSFER=0 + +EXPOSE 8000 + +ENTRYPOINT ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"] \ No newline at end of file