Skip to content

Commit 8d05db3

Browse files
authored
Merge branch 'develop' into new_add_prompt_logprobs_online
2 parents 66c8e66 + 109d48e commit 8d05db3

File tree

22 files changed

+593
-122
lines changed

22 files changed

+593
-122
lines changed

.github/workflows/_unit_test_coverage.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ jobs:
4343
runs-on: [self-hosted, GPU-h1z1-2Cards]
4444
timeout-minutes: 90
4545
needs: check_cov_skip
46+
if: needs.check_cov_skip.outputs.can-skip != 'true'
4647
outputs:
4748
diff_cov_file_url: ${{ steps.cov_upload.outputs.diff_cov_file_url }}
4849
unittest_failed_url: ${{ steps.cov_upload.outputs.unittest_failed_url }}
@@ -319,7 +320,7 @@ jobs:
319320
echo "All tests passed"
320321
321322
- name: Verify Code Coverage Threshold (80%)
322-
if: ${{ github.event_name == 'pull_request' && (needs.check_cov_skip.outputs['can-skip'] != 'true') }}
323+
if: ${{ github.event_name == 'pull_request' }}
323324
shell: bash
324325
run: |
325326
cd FastDeploy
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
name: Remove Skip-CI Labels
2+
3+
on:
4+
pull_request_target:
5+
types: [synchronize]
6+
7+
permissions:
8+
pull-requests: write
9+
10+
jobs:
11+
remove-skip-ci-labels:
12+
name: Remove skip-ci labels on new commits
13+
runs-on: ubuntu-latest
14+
steps:
15+
- name: Get PR labels
16+
id: get-labels
17+
uses: actions/github-script@v7
18+
with:
19+
github-token: ${{ secrets.GITHUB_TOKEN }}
20+
script: |
21+
const { data: labels } = await github.rest.issues.listLabelsOnIssue({
22+
owner: context.repo.owner,
23+
repo: context.repo.repo,
24+
issue_number: context.issue.number
25+
});
26+
27+
const skipCiLabels = labels
28+
.filter(label => label.name.startsWith('skip-ci:'))
29+
.map(label => label.name);
30+
31+
console.log('Found skip-ci labels:', skipCiLabels);
32+
core.setOutput('skip-ci-labels', JSON.stringify(skipCiLabels));
33+
core.setOutput('has-skip-ci-labels', skipCiLabels.length > 0 ? 'true' : 'false');
34+
35+
- name: Remove skip-ci labels
36+
if: steps.get-labels.outputs.has-skip-ci-labels == 'true'
37+
uses: actions/github-script@v7
38+
with:
39+
github-token: ${{ secrets.GITHUB_TOKEN }}
40+
script: |
41+
const skipCiLabels = JSON.parse('${{ steps.get-labels.outputs.skip-ci-labels }}');
42+
43+
for (const label of skipCiLabels) {
44+
console.log(`Removing label: ${label}`);
45+
await github.rest.issues.removeLabel({
46+
owner: context.repo.owner,
47+
repo: context.repo.repo,
48+
issue_number: context.issue.number,
49+
name: label
50+
});
51+
}
52+
53+
console.log(`Successfully removed ${skipCiLabels.length} skip-ci label(s)`);
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
max_model_len: 131072
2+
tensor_parallel_size: 1
3+
quantization: wint8
4+
max_num_seqs: 32
5+
reasoning_parser: ernie-45-vl-thinking
6+
tool_call_parser: ernie-45-vl-thinking
7+
load_choices: "default_v1"
8+
mm-processor-kwargs: '{"image_max_pixels": 12845056 }'
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
max_model_len: 32768
2+
tensor_parallel_size: 1
3+
quantization: wint8
4+
max_num_seqs: 32
5+
reasoning_parser: ernie-45-vl-thinking
6+
tool_call_parser: ernie-45-vl-thinking
7+
load_choices: "default_v1"
8+
mm-processor-kwargs: '{"image_max_pixels": 12845056 }'

custom_ops/gpu_ops/helper.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -725,6 +725,10 @@ inline void launchWithPdlWhenEnabled(KernelFn kernelFn,
725725
size_t dynamicShmSize,
726726
cudaStream_t stream,
727727
Args &&...args) {
728+
#ifdef PADDLE_WITH_CUSTOM_DEVICE_METAX_GPU
729+
(*kernelFn)<<<grid, block, dynamicShmSize, stream>>>(
730+
std::forward<Args>(args)...);
731+
#else
728732
cudaLaunchConfig_t kernelConfig;
729733
kernelConfig.gridDim = grid;
730734
kernelConfig.blockDim = block;
@@ -738,5 +742,6 @@ inline void launchWithPdlWhenEnabled(KernelFn kernelFn,
738742
kernelConfig.numAttrs = 1;
739743

740744
cudaLaunchKernelEx(&kernelConfig, kernelFn, std::forward<Args>(args)...);
745+
#endif
741746
}
742747
#endif

custom_ops/xpu_ops/build.sh

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,28 @@ SYSTEM_VERSION=`${python} -c "import platform; print(platform.system().lower())"
2626
PROCESSOR_VERSION=`${python} -c "import platform; print(platform.processor())"`
2727
WHEEL_NAME="fastdeploy_ops-${OPS_VERSION}-${PY_VERSION}-${SYSTEM_VERSION}-${PROCESSOR_VERSION}.egg"
2828

29+
# Add compatibility for modern python packaging methods
30+
WHEEL_MODERN_NAME="fastdeploy_ops"
31+
2932
${python} setup_ops.py install --install-lib ${OPS_TMP_DIR}
30-
mkdir -p ${OPS_TMP_DIR}/${WHEEL_NAME}/libs
31-
cp ${XVLLM_PATH}/xft_blocks/so/libxft_blocks.so ${OPS_TMP_DIR}/${WHEEL_NAME}/libs/
32-
cp ${XVLLM_PATH}/infer_ops/so/libapiinfer.so ${OPS_TMP_DIR}/${WHEEL_NAME}/libs/
33-
patchelf --set-rpath '$ORIGIN/libs' ${OPS_TMP_DIR}/${WHEEL_NAME}/fastdeploy_ops_pd_.so
33+
34+
# Handle directory compatibility between modern and legacy naming
35+
if [ -d "./${OPS_TMP_DIR}/${WHEEL_MODERN_NAME}" ]; then
36+
echo -e "${GREEN}[Info]${NONE} Ready to use ops from modern directory ${WHEEL_MODERN_NAME}"
37+
# Use modern directory name
38+
TARGET_DIR="${OPS_TMP_DIR}/${WHEEL_MODERN_NAME}"
39+
else
40+
# If modern directory doesn't exist, check for legacy directory
41+
if [ -d "./${OPS_TMP_DIR}/${WHEEL_NAME}" ]; then
42+
echo -e "${YELLOW}[Warning]${NONE} ${WHEEL_NAME} directory exists. This is a deprecated packaging and distribution method."
43+
else
44+
echo -e "${RED}[Error]${NONE} Neither modern nor legacy directory found in ${OPS_TMP_DIR}"
45+
fi
46+
# Use legacy directory name
47+
TARGET_DIR="${OPS_TMP_DIR}/${WHEEL_NAME}"
48+
fi
49+
50+
mkdir -p ${TARGET_DIR}/libs
51+
cp ${XVLLM_PATH}/xft_blocks/so/libxft_blocks.so ${TARGET_DIR}/libs/
52+
cp ${XVLLM_PATH}/infer_ops/so/libapiinfer.so ${TARGET_DIR}/libs/
53+
patchelf --set-rpath '$ORIGIN/libs' ${TARGET_DIR}/fastdeploy_ops_pd_.so

fastdeploy/config.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -557,6 +557,8 @@ def __init__(
557557
self.use_internode_ll_two_stage: bool = False
558558
# disable sequence parallel moe
559559
self.disable_sequence_parallel_moe: bool = False
560+
# enable async download features
561+
self.enable_async_download_features: bool = False
560562

561563
self.pod_ip: str = None
562564
# enable the custom all-reduce kernel and fall back to NCCL(dist.all_reduce).

fastdeploy/engine/args_utils.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,11 @@ class EngineArgs:
467467
Url for router server, such as `0.0.0.0:30000`.
468468
"""
469469

470+
enable_async_download_features: bool = False
471+
"""
472+
Flag to enable async download features. Default is False (disabled).
473+
"""
474+
470475
def __post_init__(self):
471476
"""
472477
Post-initialization processing to set default tokenizer if not provided.
@@ -849,6 +854,12 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
849854
default=EngineArgs.enable_expert_parallel,
850855
help="Enable expert parallelism.",
851856
)
857+
parallel_group.add_argument(
858+
"--enable-async-download-features",
859+
action="store_true",
860+
default=EngineArgs.enable_async_download_features,
861+
help="Enable async download features.",
862+
)
852863

853864
# Load group
854865
load_group = parser.add_argument_group("Load Configuration")

fastdeploy/engine/common_engine.py

Lines changed: 27 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -51,14 +51,7 @@
5151
from fastdeploy.splitwise.splitwise_connector import SplitwiseConnector
5252
from fastdeploy.trace.constants import LoggingEventName
5353
from fastdeploy.trace.trace_logger import print as trace_print
54-
from fastdeploy.utils import (
55-
EngineError,
56-
check_download_links,
57-
envs,
58-
get_logger,
59-
init_bos_client,
60-
llm_logger,
61-
)
54+
from fastdeploy.utils import EngineError, envs, get_logger, llm_logger
6255

6356
try:
6457
TokenProcessor = load_token_processor_plugins()
@@ -808,7 +801,7 @@ def _fetch_request():
808801
else:
809802
raise
810803
# 2. Schedule requests
811-
tasks = self.resource_manager.schedule()
804+
tasks, error_tasks = self.resource_manager.schedule()
812805

813806
# 3. Send to engine
814807
if tasks:
@@ -833,7 +826,16 @@ def _fetch_request():
833826
trace_print(LoggingEventName.REQUEST_SCHEDULE_END, task.request_id, getattr(task, "user", ""))
834827
trace_print(LoggingEventName.INFERENCE_START, task.request_id, getattr(task, "user", ""))
835828
self.engine_worker_queue.put_tasks((tasks, self.resource_manager.real_bsz))
836-
else:
829+
830+
# 4. Response error tasks
831+
if error_tasks:
832+
for request_id, failed in error_tasks:
833+
if failed is None:
834+
llm_logger.warning(f"Request {request_id} has no error, skip sending error response.")
835+
continue
836+
self._send_error_response(request_id, failed)
837+
838+
if not tasks and not error_tasks:
837839
time.sleep(0.005)
838840

839841
except RuntimeError as e:
@@ -909,24 +911,6 @@ def _insert_zmq_task_to_scheduler(self):
909911
self.llm_logger.error(f"Receive request error: {err_msg}")
910912
results.append((request.request_id, err_msg))
911913

912-
if self._has_features_info(request) and err_msg is None:
913-
if self.bos_client is None:
914-
self.bos_client = init_bos_client()
915-
916-
download_urls = []
917-
inputs = request.multimodal_inputs
918-
if inputs.get("video_feature_urls") is not None:
919-
download_urls.extend(inputs.get("video_feature_urls"))
920-
if inputs.get("image_feature_urls") is not None:
921-
download_urls.extend(inputs.get("image_feature_urls"))
922-
if inputs.get("audio_feature_urls") is not None:
923-
download_urls.extend(inputs.get("audio_feature_urls"))
924-
925-
err_msg = check_download_links(self.bos_client, download_urls)
926-
if err_msg:
927-
llm_logger.error(f"Receive request {request.request_id} download error: {err_msg}")
928-
results.append((request.request_id, err_msg))
929-
930914
if err_msg is None:
931915
insert_task.append(request)
932916

@@ -948,21 +932,27 @@ def _insert_zmq_task_to_scheduler(self):
948932
main_process_metrics.num_requests_waiting.inc(1)
949933
continue
950934

951-
error_result = RequestOutput(
952-
request_id=request_id,
953-
finished=True,
954-
error_code=500,
955-
error_msg=failed,
956-
)
957-
# Since the request is not in scheduler
958-
# Send result by zmq directly
959-
self.send_response_server.send_response(request_id, [error_result])
935+
self._send_error_response(request_id, failed)
960936
except Exception as e:
961937
self.llm_logger.error(
962938
f"Error happened while receiving new request from zmq, details={e}, "
963939
f"traceback={traceback.format_exc()}"
964940
)
965941

942+
def _send_error_response(self, request_id, error_msg, error_code: int = 500):
943+
llm_logger.error(
944+
f"Send error response to client, request_id: {request_id}, error_msg: {error_msg}, error_code: {error_code}"
945+
)
946+
error_result = RequestOutput(
947+
request_id=request_id,
948+
finished=True,
949+
error_code=error_code,
950+
error_msg=error_msg,
951+
)
952+
# Since the request is not in scheduler
953+
# Send result by zmq directly
954+
self.send_response_server.send_response(request_id, [error_result])
955+
966956
def _decode_token(self, token_ids, req_id, is_end):
967957
delta_text = ""
968958
if envs.FD_ENABLE_RETURN_TEXT:
@@ -977,19 +967,6 @@ def _decode_token(self, token_ids, req_id, is_end):
977967
del self.data_processor.decode_status[req_id]
978968
return delta_text, token_ids
979969

980-
def _has_features_info(self, task):
981-
inputs = task.multimodal_inputs
982-
if inputs is None or len(inputs) == 0:
983-
return False
984-
985-
if (
986-
(inputs.get("video_feature_urls") is not None and len(inputs["video_feature_urls"]) > 0)
987-
or (inputs.get("image_feature_urls") is not None and len(inputs["image_feature_urls"]) > 0)
988-
or (inputs.get("audio_feature_urls") is not None and len(inputs["audio_feature_urls"]) > 0)
989-
):
990-
return True
991-
return False
992-
993970
def _zmq_send_generated_tokens(self):
994971
"""
995972
Recieve output for zmq

fastdeploy/engine/request.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,10 @@ def __init__(
168168
# dp
169169
self.dp_rank = dp_rank
170170

171+
self.async_process_futures = []
172+
self.error_message = None
173+
self.error_code = None
174+
171175
@classmethod
172176
def from_dict(cls, d: dict):
173177
data_processor_logger.debug(f"{d}")

0 commit comments

Comments
 (0)