@@ -1743,39 +1743,38 @@ class qnn_instance {
1743
1743
static constexpr const int _required_num_providers = 1 ;
1744
1744
1745
1745
private:
1746
- std::string _lib_path;
1747
- std::string _backend_name;
1748
- std::string _model_name; // name of prebuilt QNN model, might be used in the future
1749
- BackendIdType _backend_id;
1746
+ std::string _lib_path;
1747
+ std::string _backend_name;
1748
+ std::string _model_name; // name of prebuilt QNN model, might be used in the future
1749
+ BackendIdType _backend_id;
1750
1750
1751
1751
bool _debug_tensor = false ; // flag to indicate if requested graph is to be run in debug mode
1752
1752
bool _do_node_validations = true ; // flag to indicate whether all add_node calls need to be validated
1753
1753
QnnLog_Level_t _qnn_log_level = QNN_LOG_LEVEL_DEBUG;
1754
1754
1755
1755
ggml_qnn_profile_level _profile_level = ggml_qnn_profile_level::profile_detail;
1756
1756
1757
- qnn_interface _qnn_interface;
1758
-
1759
- void * _system_lib_handle = nullptr ;
1757
+ void * _system_lib_handle = nullptr ;
1760
1758
1761
- Qnn_GraphHandle_t _qnn_graph_handle = nullptr ;
1759
+ Qnn_GraphHandle_t _qnn_graph_handle = nullptr ;
1762
1760
1763
- Qnn_LogHandle_t _qnn_log_handle = nullptr ;
1761
+ Qnn_LogHandle_t _qnn_log_handle = nullptr ;
1764
1762
1765
1763
Qnn_ProfileHandle_t _qnn_profile_handle = nullptr ;
1766
1764
1767
- Qnn_DeviceHandle_t _qnn_device_handle = nullptr ;
1765
+ Qnn_DeviceHandle_t _qnn_device_handle = nullptr ;
1768
1766
1769
1767
Qnn_BackendHandle_t _qnn_backend_handle = nullptr ;
1770
1768
1771
1769
Qnn_ContextHandle_t _qnn_context_handle = nullptr ;
1772
1770
1773
1771
QnnSystemContext_Handle_t _qnn_system_handle = nullptr ;
1774
1772
1775
- QnnHtpDevice_PerfInfrastructure_t *_qnn_htp_perfinfra = nullptr ;
1776
- uint32_t _qnn_power_configid = 1 ;
1777
- uint32_t _qnn_rpc_pollingtime = 9999 ; // 0-10000 us for high performing
1773
+ QnnHtpDevice_PerfInfrastructure_t * _qnn_htp_perfinfra = nullptr ;
1774
+ uint32_t _qnn_power_configid = 1 ;
1775
+ uint32_t _qnn_rpc_pollingtime = 9999 ; // 0-10000 us for high performing
1778
1776
1777
+ qnn_interface _qnn_interface;
1779
1778
QNN_INTERFACE_VER_TYPE _qnn_raw_interface;
1780
1779
QNN_SYSTEM_INTERFACE_VER_TYPE _qnn_raw_system_interface;
1781
1780
@@ -1787,7 +1786,6 @@ class qnn_instance {
1787
1786
static std::unordered_map<std::string, BackendIdType> _lib_path_to_backend_id;
1788
1787
static std::unordered_map<BackendIdType, const QnnInterface_t *> _loaded_backend;
1789
1788
1790
- void * _rpc_lib_handle = nullptr ;
1791
1789
std::atomic_bool _rpcmem_initialized{false };
1792
1790
pfn_rpc_mem_alloc _pfn_rpc_mem_alloc;
1793
1791
pfn_rpc_mem_free _pfn_rpc_mem_free;
@@ -1796,12 +1794,13 @@ class qnn_instance {
1796
1794
pfn_rpc_mem_deinit _pfn_rpc_mem_deinit;
1797
1795
std::unordered_map<void *, void *> _rpcmem_store_map;
1798
1796
std::unordered_map<void *, size_t > _rpcmem_usage_map;
1799
- size_t _rpcmem_capacity = 512 ; // mempool size in Mbytes
1800
1797
size_t _rpcmem_usage = 0 ; // mempool usage in Mbytes
1798
+ size_t _rpcmem_capacity = 512 ; // mempool size in Mbytes
1801
1799
1802
1800
std::string _graph_name;
1803
1801
QNNBackend _device_id;
1804
- bool _enable_qnn_rpc = false ; // TODO:unknown issue with QNN RPC feature
1802
+ void * _rpc_lib_handle = nullptr ;
1803
+ bool _enable_qnn_rpc = false ; // TODO:unknown issue with QNN RPC feature
1805
1804
1806
1805
DISABLE_COPY (qnn_instance);
1807
1806
DISABLE_MOVE (qnn_instance);
@@ -1925,13 +1924,13 @@ int qnn_instance::register_rpcmem(void * p_data, Qnn_Tensor_t * p_tensor) {
1925
1924
1926
1925
if (is_rpcmem_registered ((QNN_VER_PTR (*p_tensor)->memHandle ))) {
1927
1926
GGMLQNN_LOG_WARN (" tensor %s has been registered shared memory\n " , (QNN_VER_PTR (*p_tensor)->name ));
1928
- return 4 ;
1927
+ return 3 ;
1929
1928
}
1930
1929
1931
1930
int32_t mem_fd = rpcmem_to_fd (p_data);
1932
1931
if (-1 == mem_fd) {
1933
1932
GGMLQNN_LOG_WARN (" failed to get file descriptor\n " );
1934
- return 5 ;
1933
+ return 4 ;
1935
1934
}
1936
1935
GGMLQNN_LOG_DEBUG (" mem_fd %d\n " , mem_fd);
1937
1936
Qnn_MemDescriptor_t descriptor = {
@@ -1947,9 +1946,8 @@ int qnn_instance::register_rpcmem(void * p_data, Qnn_Tensor_t * p_tensor) {
1947
1946
/* numDescriptors=*/ 1 ,
1948
1947
&handle);
1949
1948
if (error != QNN_SUCCESS) {
1950
- GGMLQNN_LOG_WARN (" failed to register shared memory, error %d, %s\n " , QNN_GET_ERROR_CODE (error),
1951
- strerror (error));
1952
- return 6 ;
1949
+ GGMLQNN_LOG_WARN (" failed to register shared memory, error %d, %s\n " , QNN_GET_ERROR_CODE (error), strerror (error));
1950
+ return 5 ;
1953
1951
} else {
1954
1952
GGMLQNN_LOG_INFO (" tensor %s successfully register shared memory\n " , (QNN_VER_PTR (*p_tensor)->name ));
1955
1953
}
@@ -1988,8 +1986,7 @@ Qnn_MemHandle_t qnn_instance::register_rpcmem(void * p_data, const uint32_t ran
1988
1986
{{mem_fd}}
1989
1987
};
1990
1988
Qnn_MemHandle_t handle = nullptr ;
1991
- auto error = _qnn_interface.qnn_mem_register (_qnn_context_handle, &descriptor,
1992
- /* numDescriptors=*/ 1 , &handle);
1989
+ auto error = _qnn_interface.qnn_mem_register (_qnn_context_handle, &descriptor, /* numDescriptors=*/ 1 , &handle);
1993
1990
if (error != QNN_SUCCESS) {
1994
1991
GGMLQNN_LOG_WARN (" failed to register shared memory, error %d, %s" , QNN_GET_ERROR_CODE (error), strerror (error));
1995
1992
return nullptr ;
@@ -2407,7 +2404,7 @@ int qnn_instance::qnn_init(const QnnSaver_Config_t ** saver_config) {
2407
2404
if (QNN_PROFILE_NO_ERROR != _qnn_raw_interface.profileCreate (
2408
2405
_qnn_backend_handle, QNN_PROFILE_LEVEL_BASIC, &_qnn_profile_handle)) {
2409
2406
GGMLQNN_LOG_WARN (" unable to create profile handle in the backend\n " );
2410
- return 7 ;
2407
+ return 6 ;
2411
2408
} else {
2412
2409
GGMLQNN_LOG_DEBUG (" initialize qnn profile successfully\n " );
2413
2410
}
@@ -2433,7 +2430,7 @@ int qnn_instance::qnn_init(const QnnSaver_Config_t ** saver_config) {
2433
2430
#endif
2434
2431
if (nullptr == _rpc_lib_handle) {
2435
2432
GGMLQNN_LOG_WARN (" failed to load qualcomm's rpc lib, error:%s\n " , dlerror ());
2436
- return 9 ;
2433
+ return 8 ;
2437
2434
} else {
2438
2435
GGMLQNN_LOG_DEBUG (" load rpcmem lib successfully\n " );
2439
2436
set_rpcmem_initialized (true );
@@ -2447,7 +2444,7 @@ int qnn_instance::qnn_init(const QnnSaver_Config_t ** saver_config) {
2447
2444
|| nullptr == _pfn_rpc_mem_to_fd) {
2448
2445
GGMLQNN_LOG_WARN (" unable to access symbols in QNN RPC lib. dlerror(): %s" , dlerror ());
2449
2446
dlclose (_rpc_lib_handle);
2450
- return 10 ;
2447
+ return 9 ;
2451
2448
}
2452
2449
2453
2450
if (nullptr != _pfn_rpc_mem_init) // make Qualcomm's SoC based low-end phone happy
@@ -2459,7 +2456,7 @@ int qnn_instance::qnn_init(const QnnSaver_Config_t ** saver_config) {
2459
2456
&_qnn_context_handle);
2460
2457
if (nullptr == _qnn_context_handle) {
2461
2458
GGMLQNN_LOG_WARN (" why failed to initialize qnn context, error:%s\n " , strerror (errno));
2462
- return 8 ;
2459
+ return 10 ;
2463
2460
} else {
2464
2461
GGMLQNN_LOG_DEBUG (" initialize qnn context successfully\n " );
2465
2462
}
@@ -2751,29 +2748,33 @@ static bool ggml_qnn_can_handle_op(const struct ggml_tensor * tensor) {
2751
2748
return true ;
2752
2749
}
2753
2750
if (ggml_is_empty (tensor) || tensor->op == GGML_OP_RESHAPE
2754
- || tensor->op == GGML_OP_TRANSPOSE || tensor->op == GGML_OP_VIEW
2755
- || tensor->op == GGML_OP_PERMUTE) {
2751
+ || tensor->op == GGML_OP_TRANSPOSE
2752
+ || tensor->op == GGML_OP_VIEW
2753
+ || tensor->op == GGML_OP_PERMUTE
2754
+ ) {
2756
2755
return false ;
2757
2756
}
2758
2757
2759
2758
// TODO: support other op
2760
- bool supported_op = ((tensor->op == GGML_OP_ADD) || (tensor->op == GGML_OP_MUL_MAT)
2761
- || (tensor->op == GGML_OP_MUL));
2759
+ bool supported_op = ((tensor->op == GGML_OP_ADD)
2760
+ || (tensor->op == GGML_OP_MUL_MAT)
2761
+ || (tensor->op == GGML_OP_MUL)
2762
+ );
2762
2763
if (!supported_op) {
2763
2764
return false ;
2764
2765
}
2765
2766
2766
2767
struct ggml_tensor * src0 = tensor->src [0 ];
2767
2768
struct ggml_tensor * src1 = tensor->src [1 ];
2768
2769
2769
- const int64_t ne00 = tensor->src [0 ]->ne [0 ];
2770
- const int64_t ne01 = tensor->src [0 ]->ne [1 ];
2770
+ const int64_t ne00 = tensor->src [0 ]->ne [0 ];
2771
+ const int64_t ne01 = tensor->src [0 ]->ne [1 ];
2771
2772
2772
- const int64_t ne10 = tensor->src [1 ]->ne [0 ];
2773
- const int64_t ne11 = tensor->src [1 ]->ne [1 ];
2773
+ const int64_t ne10 = tensor->src [1 ]->ne [0 ];
2774
+ const int64_t ne11 = tensor->src [1 ]->ne [1 ];
2774
2775
2775
- const int64_t ne0 = tensor->ne [0 ];
2776
- const int64_t ne1 = tensor->ne [1 ];
2776
+ const int64_t ne0 = tensor->ne [0 ];
2777
+ const int64_t ne1 = tensor->ne [1 ];
2777
2778
2778
2779
const uint32_t src0_rank = ggml_get_tensor_rank (src0);
2779
2780
const uint32_t src1_rank = ggml_get_tensor_rank (src1);
0 commit comments