33# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
44
55import ctypes
6- import warnings
76import weakref
87from contextlib import contextmanager
98from dataclasses import dataclass
109from typing import List , Optional
10+ from warnings import warn
1111
1212from cuda .core .experimental ._device import Device
1313from cuda .core .experimental ._module import ObjectCode
2323
2424
2525# Note: this function is reused in the tests
26- def _decide_nvjitlink_or_driver ():
26+ def _decide_nvjitlink_or_driver () -> bool :
2727 """Returns True if falling back to the cuLink* driver APIs."""
2828 global _driver_ver , _driver , _nvjitlink
2929 if _driver or _nvjitlink :
30- return
30+ return _driver is not None
3131
3232 _driver_ver = handle_return (driver .cuDriverGetVersion ())
3333 _driver_ver = (_driver_ver // 1000 , (_driver_ver % 1000 ) // 10 )
@@ -43,7 +43,7 @@ def _decide_nvjitlink_or_driver():
4343 _nvjitlink = None
4444
4545 if _nvjitlink is None :
46- warnings . warn (
46+ warn (
4747 "nvJitLink is not installed or too old (<12.3). Therefore it is not usable "
4848 "and the culink APIs will be used instead." ,
4949 stacklevel = 3 ,
@@ -98,78 +98,59 @@ class LinkerOptions:
9898 will be used.
9999 max_register_count : int, optional
100100 Maximum register count.
101- Maps to: ``-maxrregcount=<N>``.
102101 time : bool, optional
103102 Print timing information to the info log.
104- Maps to ``-time``.
105103 Default: False.
106104 verbose : bool, optional
107105 Print verbose messages to the info log.
108- Maps to ``-verbose``.
109106 Default: False.
110107 link_time_optimization : bool, optional
111108 Perform link time optimization.
112- Maps to: ``-lto``.
113109 Default: False.
114110 ptx : bool, optional
115- Emit PTX after linking instead of CUBIN; only supported with ``-lto``.
116- Maps to ``-ptx``.
111+ Emit PTX after linking instead of CUBIN; only supported with ``link_time_optimization=True``.
117112 Default: False.
118113 optimization_level : int, optional
119114 Set optimization level. Only 0 and 3 are accepted.
120- Maps to ``-O<N>``.
121115 debug : bool, optional
122116 Generate debug information.
123- Maps to ``-g``
124117 Default: False.
125118 lineinfo : bool, optional
126119 Generate line information.
127- Maps to ``-lineinfo``.
128120 Default: False.
129121 ftz : bool, optional
130122 Flush denormal values to zero.
131- Maps to ``-ftz=<n>``.
132123 Default: False.
133124 prec_div : bool, optional
134125 Use precise division.
135- Maps to ``-prec-div=<n>``.
136126 Default: True.
137127 prec_sqrt : bool, optional
138128 Use precise square root.
139- Maps to ``-prec-sqrt=<n>``.
140129 Default: True.
141130 fma : bool, optional
142131 Use fast multiply-add.
143- Maps to ``-fma=<n>``.
144132 Default: True.
145133 kernels_used : List[str], optional
146134 Pass list of kernels that are used; any not in the list can be removed. This option can be specified multiple
147135 times.
148- Maps to ``-kernels-used=<name>``.
149136 variables_used : List[str], optional
150137 Pass a list of variables that are used; any not in the list can be removed.
151- Maps to ``-variables-used=<name>``
152138 optimize_unused_variables : bool, optional
153139 Assume that if a variable is not referenced in device code, it can be removed.
154- Maps to: ``-optimize-unused-variables``
155140 Default: False.
156- xptxas : List[str], optional
141+ ptxas_options : List[str], optional
157142 Pass options to PTXAS.
158- Maps to: ``-Xptxas=<opt>``.
159143 split_compile : int, optional
160144 Split compilation maximum thread count. Use 0 to use all available processors. Value of 1 disables split
161145 compilation (default).
162- Maps to ``-split-compile=<N>``.
163146 Default: 1.
164147 split_compile_extended : int, optional
165148 A more aggressive form of split compilation available in LTO mode only. Accepts a maximum thread count value.
166149 Use 0 to use all available processors. Value of 1 disables extended split compilation (default). Note: This
167150 option can potentially impact performance of the compiled binary.
168- Maps to ``-split-compile-extended=<N>``.
169151 Default: 1.
170152 no_cache : bool, optional
171153 Do not cache the intermediate steps of nvJitLink.
172- Maps to ``-no-cache``.
173154 Default: False.
174155 """
175156
@@ -189,7 +170,7 @@ class LinkerOptions:
189170 kernels_used : Optional [List [str ]] = None
190171 variables_used : Optional [List [str ]] = None
191172 optimize_unused_variables : Optional [bool ] = None
192- xptxas : Optional [List [str ]] = None
173+ ptxas_options : Optional [List [str ]] = None
193174 split_compile : Optional [int ] = None
194175 split_compile_extended : Optional [int ] = None
195176 no_cache : Optional [bool ] = None
@@ -239,8 +220,8 @@ def _init_nvjitlink(self):
239220 self .formatted_options .append (f"-variables-used={ variable } " )
240221 if self .optimize_unused_variables is not None :
241222 self .formatted_options .append ("-optimize-unused-variables" )
242- if self .xptxas is not None :
243- for opt in self .xptxas :
223+ if self .ptxas_options is not None :
224+ for opt in self .ptxas_options :
244225 self .formatted_options .append (f"-Xptxas={ opt } " )
245226 if self .split_compile is not None :
246227 self .formatted_options .append (f"-split-compile={ self .split_compile } " )
@@ -290,21 +271,21 @@ def _init_driver(self):
290271 self .formatted_options .append (1 )
291272 self .option_keys .append (_driver .CUjit_option .CU_JIT_GENERATE_LINE_INFO )
292273 if self .ftz is not None :
293- raise ValueError ("ftz option is deprecated in the driver API" )
274+ warn ("ftz option is deprecated in the driver API" , DeprecationWarning , stacklevel = 3 )
294275 if self .prec_div is not None :
295- raise ValueError ("prec_div option is deprecated in the driver API" )
276+ warn ("prec_div option is deprecated in the driver API" , DeprecationWarning , stacklevel = 3 )
296277 if self .prec_sqrt is not None :
297- raise ValueError ("prec_sqrt option is deprecated in the driver API" )
278+ warn ("prec_sqrt option is deprecated in the driver API" , DeprecationWarning , stacklevel = 3 )
298279 if self .fma is not None :
299- raise ValueError ("fma options is deprecated in the driver API" )
280+ warn ("fma options is deprecated in the driver API" , DeprecationWarning , stacklevel = 3 )
300281 if self .kernels_used is not None :
301- raise ValueError ("kernels_used is deprecated in the driver API" )
282+ warn ("kernels_used is deprecated in the driver API" , DeprecationWarning , stacklevel = 3 )
302283 if self .variables_used is not None :
303- raise ValueError ("variables_used is deprecated in the driver API" )
284+ warn ("variables_used is deprecated in the driver API" , DeprecationWarning , stacklevel = 3 )
304285 if self .optimize_unused_variables is not None :
305- raise ValueError ("optimize_unused_variables is deprecated in the driver API" )
306- if self .xptxas is not None :
307- raise ValueError ("xptxas option is not supported by the driver API" )
286+ warn ("optimize_unused_variables is deprecated in the driver API" , DeprecationWarning , stacklevel = 3 )
287+ if self .ptxas_options is not None :
288+ raise ValueError ("ptxas_options option is not supported by the driver API" )
308289 if self .split_compile is not None :
309290 raise ValueError ("split_compile option is not supported by the driver API" )
310291 if self .split_compile_extended is not None :
0 commit comments