Skip to content

[Bug]: Flashinfer stuck with CUDA Graph #6086

@Juelianqvq

Description

@Juelianqvq

Your current environment

2*3090 on Llama-2-13B

@LiuXiaoxuanPKU
Traceback as follows:

🐛 Describe the bug

[416fa14255e5:25629:0:25629] Caught signal 11 (Segmentation fault: invalid permissions for mapped object at address 0x7f9bb9000000)
==== backtrace (tid:  25918) ====
 0 0x0000000000042520 __sigaction()  ???:0
 1 0x00000000008dfeb8 flashinfer::PartitionPagedKVCacheComputeAuxiliaryInfo<int>()  ???:0
 2 0x00000000009036b1 flashinfer::BatchDecodeHandler::BeginForwardDispatched<128u, (flashinfer::PageStorage)0, (flashinfer::LogitsPostHook)0, (flashinfer::QKVLayout)0, (flashinfer::PosEncodingMode)0, __half, __half, __half, int>()  ???:0
 3 0x0000000000892488 BatchDecodeWithPagedKVCachePyTorchWrapper::BeginForward(at::Tensor, at::Tensor, at::Tensor, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, float, at::Tensor, at::Tensor)::{lambda()#1}::operator()() const::{lambda()#1}::operator()() const::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda()#1}::operator()() const::{lambda()#1}::operator()() const::{lambda()#1}::operator()() const::{lambda()#1}::operator()() const::{lambda()#1}::operator()()  tmpxft_00000134_00000000-6_batch_decode.compute_90.cudafe1.cpp:0
 4 0x0000000000892ea8 BatchDecodeWithPagedKVCachePyTorchWrapper::BeginForward(at::Tensor, at::Tensor, at::Tensor, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, float, at::Tensor, at::Tensor)::{lambda()#1}::operator()() const::{lambda()#1}::operator()() const::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda()#1}::operator()()  tmpxft_00000134_00000000-6_batch_decode.compute_90.cudafe1.cpp:0
 5 0x00000000008b40b4 BatchDecodeWithPagedKVCachePyTorchWrapper::BeginForward(at::Tensor, at::Tensor, at::Tensor, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, float, at::Tensor, at::Tensor)::{lambda()#1}::operator()()  tmpxft_00000134_00000000-6_batch_decode.compute_90.cudafe1.cpp:0
 6 0x00000000008dc470 BatchDecodeWithPagedKVCachePyTorchWrapper::BeginForward()  ???:0
 7 0x0000000000e2aae9 pybind11::cpp_function::initialize<pybind11::cpp_function::initialize<void, BatchDecodeWithPagedKVCachePyTorchWrapper, at::Tensor, at::Tensor, at::Tensor, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, float, at::Tensor, at::Tensor, pybind11::name, pybind11::is_method, pybind11::sibling>(void (BatchDecodeWithPagedKVCachePyTorchWrapper::*)(at::Tensor, at::Tensor, at::Tensor, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, float, at::Tensor, at::Tensor), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&)::{lambda(BatchDecodeWithPagedKVCachePyTorchWrapper*, at::Tensor, at::Tensor, at::Tensor, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, float, at::Tensor, at::Tensor)#1}, void, BatchDecodeWithPagedKVCachePyTorchWrapper*, at::Tensor, at::Tensor, at::Tensor, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, float, at::Tensor, at::Tensor, pybind11::name, pybind11::is_method, pybind11::sibling>(pybind11::cpp_function::initialize<void, BatchDecodeWithPagedKVCachePyTorchWrapper, at::Tensor, at::Tensor, at::Tensor, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, float, at::Tensor, at::Tensor, pybind11::name, pybind11::is_method, pybind11::sibling>(void (BatchDecodeWithPagedKVCachePyTorchWrapper::*)(at::Tensor, at::Tensor, at::Tensor, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, float, at::Tensor, at::Tensor), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&)::{lambda(BatchDecodeWithPagedKVCachePyTorchWrapper*, at::Tensor, at::Tensor, at::Tensor, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, float, at::Tensor, at::Tensor)#1}&&, void (*)(BatchDecodeWithPagedKVCachePyTorchWrapper*, at::Tensor, at::Tensor, at::Tensor, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, float, at::Tensor, at::Tensor), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&)::{lambda(pybind11::detail::function_call&)#3}::_FUN()  :0
 8 0x0000000000e2123c pybind11::cpp_function::dispatcher()  :0
 9 0x000000000015fe0e PyObject_CallFunctionObjArgs()  ???:0
10 0x00000000001565eb _PyObject_MakeTpCall()  ???:0
11 0x000000000016e7bb PyMethod_New()  ???:0
12 0x000000000014e8a2 _PyEval_EvalFrameDefault()  ???:0
13 0x000000000016e4e1 PyMethod_New()  ???:0
14 0x000000000014a0d1 _PyEval_EvalFrameDefault()  ???:0
15 0x000000000016070c _PyFunction_Vectorcall()  ???:0
16 0x0000000000148f52 _PyEval_EvalFrameDefault()  ???:0
17 0x000000000016070c _PyFunction_Vectorcall()  ???:0
18 0x000000000014b2c1 _PyEval_EvalFrameDefault()  ???:0
19 0x000000000016070c _PyFunction_Vectorcall()  ???:0
20 0x0000000000148f52 _PyEval_EvalFrameDefault()  ???:0
21 0x000000000016e62e PyMethod_New()  ???:0
22 0x000000000015bd3b _PyType_LookupId()  ???:0
23 0x000000000014b2c1 _PyEval_EvalFrameDefault()  ???:0
24 0x000000000016070c _PyFunction_Vectorcall()  ???:0
25 0x0000000000148f52 _PyEval_EvalFrameDefault()  ???:0
26 0x000000000016070c _PyFunction_Vectorcall()  ???:0
27 0x000000000014b2c1 _PyEval_EvalFrameDefault()  ???:0
28 0x000000000016070c _PyFunction_Vectorcall()  ???:0
29 0x0000000000148f52 _PyEval_EvalFrameDefault()  ???:0
30 0x000000000016070c _PyFunction_Vectorcall()  ???:0
31 0x0000000000148f52 _PyEval_EvalFrameDefault()  ???:0
32 0x000000000016e741 PyMethod_New()  ???:0
33 0x000000000029562a _PyDict_SetItem_KnownHash()  ???:0
34 0x000000000028a9e8 _PyObject_RealIsInstance()  ???:0
35 0x0000000000094ac3 pthread_condattr_setpshared()  ???:0
36 0x0000000000125bf4 clone()  ???:0
=================================
==== backtrace (tid:  25629) ====
 0 0x0000000000042520 __sigaction()  ???:0
 1 0x00000000008dfeb8 flashinfer::PartitionPagedKVCacheComputeAuxiliaryInfo<int>()  ???:0
 2 0x00000000009036b1 flashinfer::BatchDecodeHandler::BeginForwardDispatched<128u, (flashinfer::PageStorage)0, (flashinfer::LogitsPostHook)0, (flashinfer::QKVLayout)0, (flashinfer::PosEncodingMode)0, __half, __half, __half, int>()  ???:0
 3 0x0000000000892488 BatchDecodeWithPagedKVCachePyTorchWrapper::BeginForward(at::Tensor, at::Tensor, at::Tensor, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, float, at::Tensor, at::Tensor)::{lambda()#1}::operator()() const::{lambda()#1}::operator()() const::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda()#1}::operator()() const::{lambda()#1}::operator()() const::{lambda()#1}::operator()() const::{lambda()#1}::operator()() const::{lambda()#1}::operator()()  tmpxft_00000134_00000000-6_batch_decode.compute_90.cudafe1.cpp:0
 4 0x0000000000892ea8 BatchDecodeWithPagedKVCachePyTorchWrapper::BeginForward(at::Tensor, at::Tensor, at::Tensor, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, float, at::Tensor, at::Tensor)::{lambda()#1}::operator()() const::{lambda()#1}::operator()() const::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda()#1}::operator()()  tmpxft_00000134_00000000-6_batch_decode.compute_90.cudafe1.cpp:0
 5 0x00000000008b40b4 BatchDecodeWithPagedKVCachePyTorchWrapper::BeginForward(at::Tensor, at::Tensor, at::Tensor, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, float, at::Tensor, at::Tensor)::{lambda()#1}::operator()()  tmpxft_00000134_00000000-6_batch_decode.compute_90.cudafe1.cpp:0
 6 0x00000000008dc470 BatchDecodeWithPagedKVCachePyTorchWrapper::BeginForward()  ???:0
 7 0x0000000000e2aae9 pybind11::cpp_function::initialize<pybind11::cpp_function::initialize<void, BatchDecodeWithPagedKVCachePyTorchWrapper, at::Tensor, at::Tensor, at::Tensor, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, float, at::Tensor, at::Tensor, pybind11::name, pybind11::is_method, pybind11::sibling>(void (BatchDecodeWithPagedKVCachePyTorchWrapper::*)(at::Tensor, at::Tensor, at::Tensor, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, float, at::Tensor, at::Tensor), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&)::{lambda(BatchDecodeWithPagedKVCachePyTorchWrapper*, at::Tensor, at::Tensor, at::Tensor, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, float, at::Tensor, at::Tensor)#1}, void, BatchDecodeWithPagedKVCachePyTorchWrapper*, at::Tensor, at::Tensor, at::Tensor, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, float, at::Tensor, at::Tensor, pybind11::name, pybind11::is_method, pybind11::sibling>(pybind11::cpp_function::initialize<void, BatchDecodeWithPagedKVCachePyTorchWrapper, at::Tensor, at::Tensor, at::Tensor, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, float, at::Tensor, at::Tensor, pybind11::name, pybind11::is_method, pybind11::sibling>(void (BatchDecodeWithPagedKVCachePyTorchWrapper::*)(at::Tensor, at::Tensor, at::Tensor, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, float, at::Tensor, at::Tensor), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&)::{lambda(BatchDecodeWithPagedKVCachePyTorchWrapper*, at::Tensor, at::Tensor, at::Tensor, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, float, at::Tensor, at::Tensor)#1}&&, void (*)(BatchDecodeWithPagedKVCachePyTorchWrapper*, at::Tensor, at::Tensor, at::Tensor, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, float, at::Tensor, at::Tensor), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&)::{lambda(pybind11::detail::function_call&)#3}::_FUN()  :0
 8 0x0000000000e2123c pybind11::cpp_function::dispatcher()  :0
 9 0x000000000015fe0e PyObject_CallFunctionObjArgs()  ???:0
10 0x00000000001565eb _PyObject_MakeTpCall()  ???:0
11 0x000000000016e7bb PyMethod_New()  ???:0
12 0x000000000014e8a2 _PyEval_EvalFrameDefault()  ???:0
13 0x000000000016e4e1 PyMethod_New()  ???:0
14 0x000000000014a0d1 _PyEval_EvalFrameDefault()  ???:0
15 0x000000000016070c _PyFunction_Vectorcall()  ???:0
16 0x0000000000148f52 _PyEval_EvalFrameDefault()  ???:0
17 0x000000000016070c _PyFunction_Vectorcall()  ???:0
18 0x000000000014b2c1 _PyEval_EvalFrameDefault()  ???:0
19 0x000000000016070c _PyFunction_Vectorcall()  ???:0
20 0x0000000000148f52 _PyEval_EvalFrameDefault()  ???:0
21 0x000000000016e4e1 PyMethod_New()  ???:0
22 0x000000000014a0d1 _PyEval_EvalFrameDefault()  ???:0
23 0x000000000016070c _PyFunction_Vectorcall()  ???:0
24 0x000000000014b2c1 _PyEval_EvalFrameDefault()  ???:0
25 0x000000000016e741 PyMethod_New()  ???:0
26 0x000000000014b2c1 _PyEval_EvalFrameDefault()  ???:0
27 0x000000000016070c _PyFunction_Vectorcall()  ???:0
28 0x000000000016f192 PyObject_Call()  ???:0
29 0x000000000014b2c1 _PyEval_EvalFrameDefault()  ???:0
30 0x000000000016070c _PyFunction_Vectorcall()  ???:0
31 0x0000000000148f52 _PyEval_EvalFrameDefault()  ???:0
32 0x000000000016e4e1 PyMethod_New()  ???:0
33 0x000000000014a0d1 _PyEval_EvalFrameDefault()  ???:0
34 0x000000000016070c _PyFunction_Vectorcall()  ???:0
35 0x0000000000148f52 _PyEval_EvalFrameDefault()  ???:0
36 0x0000000000155784 _PyObject_FastCallDictTstate()  ???:0
37 0x000000000016a744 _PyStack_AsDict()  ???:0
38 0x000000000015658c _PyObject_MakeTpCall()  ???:0
39 0x000000000014ec66 _PyEval_EvalFrameDefault()  ???:0
40 0x000000000016070c _PyFunction_Vectorcall()  ???:0
41 0x000000000014e8a2 _PyEval_EvalFrameDefault()  ???:0
42 0x000000000016070c _PyFunction_Vectorcall()  ???:0
43 0x0000000000148f52 _PyEval_EvalFrameDefault()  ???:0
44 0x0000000000155784 _PyObject_FastCallDictTstate()  ???:0
45 0x000000000016a744 _PyStack_AsDict()  ???:0
46 0x000000000015658c _PyObject_MakeTpCall()  ???:0
47 0x000000000014ec66 _PyEval_EvalFrameDefault()  ???:0
48 0x000000000016070c _PyFunction_Vectorcall()  ???:0
49 0x0000000000148e0d _PyEval_EvalFrameDefault()  ???:0
50 0x000000000016070c _PyFunction_Vectorcall()  ???:0
51 0x0000000000148f52 _PyEval_EvalFrameDefault()  ???:0
52 0x000000000016e4e1 PyMethod_New()  ???:0
53 0x000000000016f192 PyObject_Call()  ???:0
54 0x000000000014b2c1 _PyEval_EvalFrameDefault()  ???:0
55 0x000000000016e4e1 PyMethod_New()  ???:0
56 0x000000000016f192 PyObject_Call()  ???:0
=================================
/usr/lib/python3.10/multiprocessing/resource_tracker.py:224: UserWarning: resource_tracker: There appear to be 2 leaked shared_memory objects to clean up at shutdown
  warnings.warn('resource_tracker: There appear to be %d '
Segmentation fault (core dumped)```

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions