@@ -77,118 +77,92 @@ def _lazy_init():
77
77
78
78
@dataclass
79
79
class LinkerOptions :
80
- """Customizable :obj:`LinkerOptions` for nvJitLink or driver API. Some options are only available
81
- whenusing the cuda.bindings.nvjitlink backend. Some options are only available when using newer
82
- or older versions of cuda.
80
+ """Customizable :obj:`Linker` options.
83
81
82
+ Since the linker would choose to use nvJitLink or the driver APIs as the linking backed,
83
+ not all options are applicable.
84
84
85
85
Attributes
86
86
----------
87
87
arch : str
88
- Pass SM architecture value. Can use compute_<N> value instead if only generating PTX.
88
+ Pass the SM architecture value, such as ``-arch=sm_<CC>`` (for generating CUBIN) or
89
+ ``compute_<CC>`` (for generating PTX).
89
90
This is a required option.
90
- Acceptable value type: str
91
- Maps to: -arch=sm_<N>
92
91
max_register_count : int, optional
93
92
Maximum register count.
94
- Default: None
95
- Acceptable value type: int
96
- Maps to: -maxrregcount=<N>
93
+ Maps to: ``-maxrregcount=<N>``.
97
94
time : bool, optional
98
- Print timing information to InfoLog.
99
- Default: False
100
- Acceptable value type: bool
101
- Maps to: -time
95
+ Print timing information to the info log.
96
+ Maps to ``-time``.
97
+ Default: False.
102
98
verbose : bool, optional
103
- Print verbose messages to InfoLog.
104
- Default: False
105
- Acceptable value type: bool
106
- Maps to: -verbose
99
+ Print verbose messages to the info log.
100
+ Maps to ``-verbose``.
101
+ Default: False.
107
102
link_time_optimization : bool, optional
108
103
Perform link time optimization.
109
- Default: False
110
- Acceptable value type: bool
111
- Maps to: -lto
104
+ Maps to: ``-lto``.
105
+ Default: False.
112
106
ptx : bool, optional
113
- Emit PTX after linking instead of CUBIN; only supported with -lto.
114
- Default: False
115
- Acceptable value type: bool
116
- Maps to: -ptx
107
+ Emit PTX after linking instead of CUBIN; only supported with ``-lto``.
108
+ Maps to ``-ptx``.
109
+ Default: False.
117
110
optimization_level : int, optional
118
111
Set optimization level. Only 0 and 3 are accepted.
119
- Default: None
120
- Acceptable value type: int
121
- Maps to: -O<N>
112
+ Maps to ``-O<N>``.
122
113
debug : bool, optional
123
114
Generate debug information.
124
- Default: False
125
- Acceptable value type: bool
126
- Maps to: -g
115
+ Maps to ``-g``
116
+ Default: False.
127
117
lineinfo : bool, optional
128
118
Generate line information.
129
- Default: False
130
- Acceptable value type: bool
131
- Maps to: -lineinfo
119
+ Maps to ``-lineinfo``.
120
+ Default: False.
132
121
ftz : bool, optional
133
122
Flush denormal values to zero.
134
- Default: False
135
- Acceptable value type: bool
136
- Maps to: -ftz=<n>
123
+ Maps to ``-ftz=<n>``.
124
+ Default: False.
137
125
prec_div : bool, optional
138
126
Use precise division.
139
- Default: True
140
- Acceptable value type: bool
141
- Maps to: -prec-div=<n>
127
+ Maps to ``-prec-div=<n>``.
128
+ Default: True.
142
129
prec_sqrt : bool, optional
143
130
Use precise square root.
144
- Default: True
145
- Acceptable value type: bool
146
- Maps to: -prec-sqrt=<n>
131
+ Maps to ``-prec-sqrt=<n>``.
132
+ Default: True.
147
133
fma : bool, optional
148
134
Use fast multiply-add.
149
- Default: True
150
- Acceptable value type: bool
151
- Maps to: -fma=<n>
135
+ Maps to ``-fma=<n>``.
136
+ Default: True.
152
137
kernels_used : List[str], optional
153
138
Pass list of kernels that are used; any not in the list can be removed. This option can be specified multiple
154
139
times.
155
- Default: None
156
- Acceptable value type: list of str
157
- Maps to: -kernels-used=<name>
140
+ Maps to ``-kernels-used=<name>``.
158
141
variables_used : List[str], optional
159
- Pass list of variables that are used; any not in the list can be removed. This option can be specified multiple
160
- times.
161
- Default: None
162
- Acceptable value type: list of str
163
- Maps to: -variables-used=<name>
142
+ Pass a list of variables that are used; any not in the list can be removed.
143
+ Maps to ``-variables-used=<name>``
164
144
optimize_unused_variables : bool, optional
165
145
Assume that if a variable is not referenced in device code, it can be removed.
166
- Default: False
167
- Acceptable value type: bool
168
- Maps to: -optimize-unused-variables
146
+ Maps to: ``-optimize-unused-variables``
147
+ Default: False.
169
148
xptxas : List[str], optional
170
- Pass options to PTXAS. This option can be called multiple times.
171
- Default: None
172
- Acceptable value type: list of str
173
- Maps to: -Xptxas=<opt>
149
+ Pass options to PTXAS.
150
+ Maps to: ``-Xptxas=<opt>``.
174
151
split_compile : int, optional
175
152
Split compilation maximum thread count. Use 0 to use all available processors. Value of 1 disables split
176
153
compilation (default).
177
- Default: 1
178
- Acceptable value type: int
179
- Maps to: -split-compile=<N>
154
+ Maps to ``-split-compile=<N>``.
155
+ Default: 1.
180
156
split_compile_extended : int, optional
181
157
A more aggressive form of split compilation available in LTO mode only. Accepts a maximum thread count value.
182
158
Use 0 to use all available processors. Value of 1 disables extended split compilation (default). Note: This
183
159
option can potentially impact performance of the compiled binary.
184
- Default: 1
185
- Acceptable value type: int
186
- Maps to: -split-compile-extended=<N>
160
+ Maps to ``-split-compile-extended=<N>``.
161
+ Default: 1.
187
162
no_cache : bool, optional
188
163
Do not cache the intermediate steps of nvJitLink.
189
- Default: False
190
- Acceptable value type: bool
191
- Maps to: -no-cache
164
+ Maps to ``-no-cache``.
165
+ Default: False.
192
166
"""
193
167
194
168
arch : str
@@ -351,8 +325,11 @@ def _exception_manager(self):
351
325
352
326
353
327
class Linker :
354
- """
355
- Linker class for managing the linking of object codes with specified options.
328
+ """Represent a linking machinery to link one or multiple object codes into
329
+ :obj:`~cuda.core.experimental._module.ObjectCode` with the specified options.
330
+
331
+ This object provides a unified interface to multiple underlying
332
+ linker libraries (such as nvJitLink or cuLink* from CUDA driver).
356
333
357
334
Parameters
358
335
----------
@@ -442,7 +419,7 @@ def link(self, target_type) -> ObjectCode:
442
419
443
420
Note
444
421
------
445
- See nvrtc compiler options documnetation to ensure the input ObjectCodes are
422
+ See nvrtc compiler options documnetation to ensure the input object codes are
446
423
correctly compiled for linking.
447
424
"""
448
425
if target_type not in ("cubin" , "ptx" ):
@@ -470,7 +447,8 @@ def get_error_log(self) -> str:
470
447
471
448
Returns
472
449
-------
473
- The error log.
450
+ str
451
+ The error log.
474
452
"""
475
453
if _nvjitlink :
476
454
log_size = _nvjitlink .get_error_log_size (self ._mnff .handle )
@@ -485,7 +463,8 @@ def get_info_log(self) -> str:
485
463
486
464
Returns
487
465
-------
488
- The info log.
466
+ str
467
+ The info log.
489
468
"""
490
469
if _nvjitlink :
491
470
log_size = _nvjitlink .get_info_log_size (self ._mnff .handle )
0 commit comments