@@ -888,72 +888,91 @@ def dpnp_det(a):
888
888
return det .reshape (shape )
889
889
890
890
891
- def dpnp_eigh (a , UPLO ):
891
+ def dpnp_eigh (a , UPLO , eigen_mode = "V" ):
892
892
"""
893
- dpnp_eigh(a, UPLO)
893
+ dpnp_eigh(a, UPLO, eigen_mode="V" )
894
894
895
895
Return the eigenvalues and eigenvectors of a complex Hermitian
896
896
(conjugate symmetric) or a real symmetric matrix.
897
+ Can return both eigenvalues and eigenvectors (`eigen_mode="V"`) or
898
+ only eigenvalues (`eigen_mode="N"`).
897
899
898
900
The main calculation is done by calling an extension function
899
901
for LAPACK library of OneMKL. Depending on input type of `a` array,
900
902
it will be either ``heevd`` (for complex types) or ``syevd`` (for others).
901
903
902
904
"""
903
905
904
- a_usm_type = a .usm_type
905
- a_sycl_queue = a .sycl_queue
906
- a_order = "C" if a .flags .c_contiguous else "F"
907
- a_usm_arr = dpnp .get_usm_ndarray (a )
906
+ # get resulting type of arrays with eigenvalues and eigenvectors
907
+ v_type = _common_type (a )
908
+ w_type = _real_type (v_type )
908
909
909
- # 'V' means both eigenvectors and eigenvalues will be calculated
910
- jobz = _jobz ["V" ]
910
+ if a .size == 0 :
911
+ w = dpnp .empty_like (a , shape = a .shape [:- 1 ], dtype = w_type )
912
+ if eigen_mode == "V" :
913
+ v = dpnp .empty_like (a , dtype = v_type )
914
+ return w , v
915
+ return w
916
+
917
+ # `eigen_mode` can be either "N" or "V", specifying the computation mode
918
+ # for OneMKL LAPACK `syevd` and `heevd` routines.
919
+ # "V" (default) means both eigenvectors and eigenvalues will be calculated
920
+ # "N" means only eigenvalues will be calculated
921
+ jobz = _jobz [eigen_mode ]
911
922
uplo = _upper_lower [UPLO ]
912
923
913
- # get resulting type of arrays with eigenvalues and eigenvectors
914
- a_dtype = a .dtype
915
- lapack_func = "_syevd"
916
- if dpnp .issubdtype (a_dtype , dpnp .complexfloating ):
917
- lapack_func = "_heevd"
918
- v_type = a_dtype
919
- w_type = dpnp .float64 if a_dtype == dpnp .complex128 else dpnp .float32
920
- elif dpnp .issubdtype (a_dtype , dpnp .floating ):
921
- v_type = w_type = a_dtype
922
- elif a_sycl_queue .sycl_device .has_aspect_fp64 :
923
- v_type = w_type = dpnp .float64
924
- else :
925
- v_type = w_type = dpnp .float32
924
+ # Get LAPACK function (_syevd for real or _heevd for complex data types)
925
+ # to compute all eigenvalues and, optionally, all eigenvectors
926
+ lapack_func = (
927
+ "_heevd" if dpnp .issubdtype (v_type , dpnp .complexfloating ) else "_syevd"
928
+ )
929
+
930
+ a_sycl_queue = a .sycl_queue
931
+ a_order = "C" if a .flags .c_contiguous else "F"
926
932
927
933
if a .ndim > 2 :
928
- w = dpnp .empty (
929
- a .shape [:- 1 ],
934
+ is_cpu_device = a .sycl_device .has_aspect_cpu
935
+ orig_shape = a .shape
936
+ # get 3d input array by reshape
937
+ a = a .reshape (- 1 , orig_shape [- 2 ], orig_shape [- 1 ])
938
+ a_usm_arr = dpnp .get_usm_ndarray (a )
939
+
940
+ # allocate a memory for dpnp array of eigenvalues
941
+ w = dpnp .empty_like (
942
+ a ,
943
+ shape = orig_shape [:- 1 ],
930
944
dtype = w_type ,
931
- usm_type = a_usm_type ,
932
- sycl_queue = a_sycl_queue ,
933
945
)
946
+ w_orig_shape = w .shape
947
+ # get 2d dpnp array with eigenvalues by reshape
948
+ w = w .reshape (- 1 , w_orig_shape [- 1 ])
934
949
935
950
# need to loop over the 1st dimension to get eigenvalues and eigenvectors of 3d matrix A
936
- op_count = a .shape [0 ]
937
- if op_count == 0 :
938
- return w , dpnp .empty_like (a , dtype = v_type )
939
-
940
- eig_vecs = [None ] * op_count
941
- ht_copy_ev = [None ] * op_count
942
- ht_lapack_ev = [None ] * op_count
943
- for i in range (op_count ):
951
+ batch_size = a .shape [0 ]
952
+ eig_vecs = [None ] * batch_size
953
+ ht_list_ev = [None ] * batch_size * 2
954
+ for i in range (batch_size ):
944
955
# oneMKL LAPACK assumes fortran-like array as input, so
945
956
# allocate a memory with 'F' order for dpnp array of eigenvectors
946
957
eig_vecs [i ] = dpnp .empty_like (a [i ], order = "F" , dtype = v_type )
947
958
948
959
# use DPCTL tensor function to fill the array of eigenvectors with content of input array
949
- ht_copy_ev [ i ], copy_ev = ti ._copy_usm_ndarray_into_usm_ndarray (
960
+ ht_list_ev [ 2 * i ], copy_ev = ti ._copy_usm_ndarray_into_usm_ndarray (
950
961
src = a_usm_arr [i ],
951
962
dst = eig_vecs [i ].get_array (),
952
963
sycl_queue = a_sycl_queue ,
953
964
)
954
965
966
+ # TODO: Remove this w/a when MKLD-17201 is solved.
967
+ # Waiting for a host task executing an OneMKL LAPACK syevd call
968
+ # on CPU causes deadlock due to serialization of all host tasks
969
+ # in the queue.
970
+ # We need to wait for each host tasks before calling _seyvd to avoid deadlock.
971
+ if lapack_func == "_syevd" and is_cpu_device :
972
+ ht_list_ev [2 * i ].wait ()
973
+
955
974
# call LAPACK extension function to get eigenvalues and eigenvectors of a portion of matrix A
956
- ht_lapack_ev [ i ], _ = getattr (li , lapack_func )(
975
+ ht_list_ev [ 2 * i + 1 ], _ = getattr (li , lapack_func )(
957
976
a_sycl_queue ,
958
977
jobz ,
959
978
uplo ,
@@ -962,29 +981,43 @@ def dpnp_eigh(a, UPLO):
962
981
depends = [copy_ev ],
963
982
)
964
983
965
- for i in range (op_count ):
966
- ht_lapack_ev [i ].wait ()
967
- ht_copy_ev [i ].wait ()
984
+ dpctl .SyclEvent .wait_for (ht_list_ev )
985
+
986
+ w = w .reshape (w_orig_shape )
987
+
988
+ if eigen_mode == "V" :
989
+ # combine the list of eigenvectors into a single array
990
+ v = dpnp .array (eig_vecs , order = a_order ).reshape (orig_shape )
991
+ return w , v
992
+ return w
968
993
969
- # combine the list of eigenvectors into a single array
970
- v = dpnp .array (eig_vecs , order = a_order )
971
- return w , v
972
994
else :
973
- # oneMKL LAPACK assumes fortran-like array as input, so
974
- # allocate a memory with 'F' order for dpnp array of eigenvectors
975
- v = dpnp . empty_like ( a , order = "F" , dtype = v_type )
995
+ a_usm_arr = dpnp . get_usm_ndarray ( a )
996
+ ht_list_ev = []
997
+ copy_ev = dpctl . SyclEvent ( )
976
998
977
- # use DPCTL tensor function to fill the array of eigenvectors with content of input array
978
- ht_copy_ev , copy_ev = ti ._copy_usm_ndarray_into_usm_ndarray (
979
- src = a_usm_arr , dst = v .get_array (), sycl_queue = a_sycl_queue
980
- )
999
+ # When `eigen_mode == "N"` (jobz == 0), OneMKL LAPACK does not overwrite the input array.
1000
+ # If the input array 'a' is already F-contiguous and matches the target data type,
1001
+ # we can avoid unnecessary memory allocation and data copying.
1002
+ if eigen_mode == "N" and a_order == "F" and a .dtype == v_type :
1003
+ v = a
1004
+
1005
+ else :
1006
+ # oneMKL LAPACK assumes fortran-like array as input, so
1007
+ # allocate a memory with 'F' order for dpnp array of eigenvectors
1008
+ v = dpnp .empty_like (a , order = "F" , dtype = v_type )
1009
+
1010
+ # use DPCTL tensor function to fill the array of eigenvectors with content of input array
1011
+ ht_copy_ev , copy_ev = ti ._copy_usm_ndarray_into_usm_ndarray (
1012
+ src = a_usm_arr , dst = v .get_array (), sycl_queue = a_sycl_queue
1013
+ )
1014
+ ht_list_ev .append (ht_copy_ev )
981
1015
982
1016
# allocate a memory for dpnp array of eigenvalues
983
- w = dpnp .empty (
984
- a .shape [:- 1 ],
1017
+ w = dpnp .empty_like (
1018
+ a ,
1019
+ shape = a .shape [:- 1 ],
985
1020
dtype = w_type ,
986
- usm_type = a_usm_type ,
987
- sycl_queue = a_sycl_queue ,
988
1021
)
989
1022
990
1023
# call LAPACK extension function to get eigenvalues and eigenvectors of matrix A
@@ -996,8 +1029,9 @@ def dpnp_eigh(a, UPLO):
996
1029
w .get_array (),
997
1030
depends = [copy_ev ],
998
1031
)
1032
+ ht_list_ev .append (ht_lapack_ev )
999
1033
1000
- if a_order != "F" :
1034
+ if eigen_mode == "V" and a_order != "F" :
1001
1035
# need to align order of eigenvectors with one of input matrix A
1002
1036
out_v = dpnp .empty_like (v , order = a_order )
1003
1037
ht_copy_out_ev , _ = ti ._copy_usm_ndarray_into_usm_ndarray (
@@ -1006,14 +1040,13 @@ def dpnp_eigh(a, UPLO):
1006
1040
sycl_queue = a_sycl_queue ,
1007
1041
depends = [lapack_ev ],
1008
1042
)
1009
- ht_copy_out_ev . wait ( )
1043
+ ht_list_ev . append ( ht_copy_out_ev )
1010
1044
else :
1011
1045
out_v = v
1012
1046
1013
- ht_lapack_ev .wait ()
1014
- ht_copy_ev .wait ()
1047
+ dpctl .SyclEvent .wait_for (ht_list_ev )
1015
1048
1016
- return w , out_v
1049
+ return ( w , out_v ) if eigen_mode == "V" else w
1017
1050
1018
1051
1019
1052
def dpnp_inv_batched (a , res_type ):
0 commit comments