Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
b9e5c94
Trigger tests on push to devel or main branch
EmilyBourne Mar 11, 2024
df24e81
Add cuda workflow to test cuda developments on CI
EmilyBourne Mar 11, 2024
31d7247
Trigger tests on push to devel or main branch
EmilyBourne Mar 11, 2024
17aa0e6
[init] Adding CUDA language/compiler and CodePrinter (#32)
bauom Feb 28, 2024
2c58573
Fix import handling (#49)
smazouz42 May 15, 2024
0d154f8
Add support for kernels (#42)
smazouz42 Jun 27, 2024
2ffa7fc
Updated CUDA Name Clash Checker By Added CUDA-specific keywords (#60)
smazouz42 Jul 3, 2024
8eef19d
add handle for custom device (#61)
smazouz42 Jul 3, 2024
e5feffb
improve kernel decorator
smazouz42 Jul 23, 2024
2547f6e
addinf doc string to all CudaThreadIndexing fucntions
smazouz42 Jul 23, 2024
f80eed5
update CHANGELOG
smazouz42 Jul 23, 2024
6df004d
Add missing docstring for internal_loop in KernelAccessor class
smazouz42 Jul 23, 2024
048bd16
Add missing docstring for internal_loop in KernelAccessor class
smazouz42 Jul 23, 2024
738371a
update CHANGELOG
smazouz42 Jul 24, 2024
ec738b3
refactoring the code
smazouz42 Jul 25, 2024
aa76f91
refactoring the code
smazouz42 Jul 25, 2024
528099f
move CudaThreadIndexing to pyccel/cuda
smazouz42 Jul 25, 2024
f1f63ef
cleaning upmy PR
smazouz42 Jul 25, 2024
1aa26b1
add final new line
smazouz42 Jul 25, 2024
ea1beb7
add final new line
smazouz42 Jul 25, 2024
0f076a0
Make sure tests are passing
smazouz42 Jul 25, 2024
57f977e
refactoring the code
smazouz42 Jul 25, 2024
26fcdc0
adding missing import to device test
smazouz42 Jul 25, 2024
9f58f02
adding missing import to kernel
smazouz42 Jul 25, 2024
c45c615
refactoring the code
smazouz42 Jul 25, 2024
572cdd8
refactoring the code
smazouz42 Jul 25, 2024
d969ebb
update doc
smazouz42 Jul 25, 2024
2b3085f
update doc
smazouz42 Jul 25, 2024
34d801f
update doc
smazouz42 Jul 25, 2024
e9436a9
update doc
smazouz42 Jul 25, 2024
1aeb5a5
work in progress
smazouz42 Jul 25, 2024
654afa3
update docs
smazouz42 Jul 26, 2024
c5e1986
Merge branch 'devel' of https://github.com/pyccel/pyccel-cuda into is…
smazouz42 Jul 26, 2024
ded14a7
Add tests for both thread_ndx and block_ndx
smazouz42 Jul 26, 2024
ab33bd0
fix doc string
smazouz42 Jul 26, 2024
5c8fa2d
fix linting
smazouz42 Jul 26, 2024
f6a5792
fix linting
smazouz42 Jul 26, 2024
cd9ce2b
fix linting
smazouz42 Jul 26, 2024
ef09298
.
smazouz42 Jul 26, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions docs/cuda.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,22 @@ def my_kernel():
my_kernel[1, 1]()

```
## Cuda Device Methods
The following methods are available for CUDA devices in Pyccel and can be called from either kernels or device functions. Currently, the only import syntax supported is:
```python
from pyccel import cuda
```
Using an alias for the import is not supported, so this is not allowed:

```python
from pyccel import cuda as py_cu
```

| Method | Description |
|--------|-------------|






88 changes: 88 additions & 0 deletions pyccel/cuda/cuda_thread_indexing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
#------------------------------------------------------------------------------------------#
# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. #
#------------------------------------------------------------------------------------------#
"""
This module contains all the CUDA thread indexing methods
"""
class CudaThreadIndexing:
"""
Class representing the CUDA thread indexing.

Class representing the CUDA thread indexing.

Parameters
----------
block_idx : int
The index of the block in the x-dimension.

thread_idx : int
The index of the thread in the x-dimension.
"""
def __init__(self, block_idx, thread_idx):
self._block_idx = block_idx
self._thread_idx = thread_idx

def threadIdx(self, dim):
"""
Get the thread index.

Get the thread index.

Parameters
----------
dim : int
The dimension of the indexing. It can be:
- 0 for the x-dimension
- 1 for the y-dimension
- 2 for the z-dimension

Returns
-------
int
The index of the thread in the specified dimension of its block.
"""
return self._thread_idx

def blockIdx(self, dim):
"""
Get the block index.

Get the block index.

Parameters
----------
dim : int
The dimension of the indexing. It can be:
- 0 for the x-dimension
- 1 for the y-dimension
- 2 for the z-dimension

Returns
-------
int
The index of the block in the specified dimension.
"""
return self._block_idx

def blockDim(self, dim):
"""
Get the block dimension.

Get the block dimension.

Parameters
----------
dim : int
The dimension of the indexing. It can be:
- 0 for the x-dimension
- 1 for the y-dimension
- 2 for the z-dimension

Returns
-------
int
The size of the block in the specified dimension.
"""
return 0

20 changes: 19 additions & 1 deletion pyccel/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"""
This module contains all the provided decorator methods.
"""
from pyccel.cuda.cuda_thread_indexing import CudaThreadIndexing
import warnings

__all__ = (
Expand Down Expand Up @@ -139,7 +140,24 @@ class KernelAccessor:
def __init__(self, f):
self._f = f
def __getitem__(self, args):
return self._f
num_blocks, num_threads = args
def internal_loop(*args, **kwargs):
"""
The internal loop for kernel execution.

The internal loop for kernel execution.
"""
for b in range(num_blocks):
for t in range(num_threads):
cu = CudaThreadIndexing(b, t)
if 'cuda' in self._f.__globals__:
self._f.__globals__['cuda'].threadIdx = cu.threadIdx
self._f.__globals__['cuda'].blockIdx = cu.blockIdx
self._f.__globals__['cuda'].blockDim = cu.blockDim
else:
self._f.__globals__['cuda'] = cu
self._f(*args, **kwargs)
return internal_loop

return KernelAccessor(f)

Expand Down
15 changes: 15 additions & 0 deletions tests/pyccel/scripts/kernel/block_idx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# pylint: disable=missing-function-docstring, missing-module-docstring
from pyccel.decorators import kernel
from pyccel import cuda

@kernel
def print_block():
print(cuda.blockIdx(0)) # pylint: disable=no-member

def f():
print_block[5,5]()
cuda.synchronize()

if __name__ == '__main__':
f()

2 changes: 1 addition & 1 deletion tests/pyccel/scripts/kernel/device_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# pylint: disable=missing-function-docstring, missing-module-docstring
from pyccel.decorators import device, kernel
from pyccel import cuda
from pyccel import cuda

@device
def device_call():
Expand Down
2 changes: 1 addition & 1 deletion tests/pyccel/scripts/kernel/hello_kernel.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# pylint: disable=missing-function-docstring, missing-module-docstring
from pyccel.decorators import kernel
from pyccel import cuda
from pyccel import cuda

@kernel
def say_hello(its_morning : bool):
Expand Down
15 changes: 15 additions & 0 deletions tests/pyccel/scripts/kernel/thread_idx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# pylint: disable=missing-function-docstring, missing-module-docstring
from pyccel.decorators import kernel
from pyccel import cuda

@kernel
def print_block():
print(cuda.threadIdx(0)) # pylint: disable=no-member

def f():
print_block[5,5]()
cuda.synchronize()

if __name__ == '__main__':
f()

24 changes: 24 additions & 0 deletions tests/pyccel/test_pyccel.py
Original file line number Diff line number Diff line change
Expand Up @@ -730,6 +730,8 @@ def test_elemental(language):
pyccel_test("scripts/decorators_elemental.py", language = language)

#------------------------------------------------------------------------------


@pytest.mark.cuda
def test_hello_kernel(gpu_available):
types = str
Expand All @@ -743,7 +745,29 @@ def test_kernel_collision(gpu_available):
language="cuda", execute_code=gpu_available)

#------------------------------------------------------------------------------
def test_block_idx():
test_file = get_abs_path("scripts/kernel/block_idx.py")
cwd = get_abs_path(os.path.dirname(test_file))

pyth_out = get_python_output(test_file, cwd)

python_block_idx = list(map(int, pyth_out.split()))

for i in range(5):
assert python_block_idx.count(i) == 5
#------------------------------------------------------------------------------
def test_thread_idx():
test_file = get_abs_path("scripts/kernel/thread_idx.py")
cwd = get_abs_path(os.path.dirname(test_file))

pyth_out = get_python_output(test_file, cwd)

python_idx = list(map(int, pyth_out.split()))

for i in range(5):
assert python_idx.count(i) == 5

#------------------------------------------------------------------------------
@pytest.mark.cuda
def test_device_call(gpu_available):
types = str
Expand Down