From 3e15ebc0c5ea4ec7a9b10554cf891bb9476c11f1 Mon Sep 17 00:00:00 2001 From: ksimpson Date: Thu, 5 Dec 2024 15:07:57 -0800 Subject: [PATCH 1/7] lazy load module in ObjectCode --- cuda_core/cuda/core/experimental/_module.py | 64 +++++++++++---------- 1 file changed, 35 insertions(+), 29 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py index 69dbcd374..e5e235993 100644 --- a/cuda_core/cuda/core/experimental/_module.py +++ b/cuda_core/cuda/core/experimental/_module.py @@ -106,43 +106,20 @@ class ObjectCode: """ - __slots__ = ("_handle", "_code_type", "_module", "_loader", "_sym_map") + __slots__ = ("_handle", "_backend_version", "_code_type", "_module", "_loader", "_sym_map") _supported_code_type = ("cubin", "ptx", "ltoir", "fatbin") + def __init__(self, module, code_type, jit_options=None, *, symbol_mapping=None): if code_type not in self._supported_code_type: raise ValueError _lazy_init() - self._handle = None - backend = "new" if (_py_major_ver >= 12 and _driver_ver >= 12000) else "old" - self._loader = _backend[backend] + # handle is assigned during _lazy_load + self._handle = None - if isinstance(module, str): - # TODO: this option is only taken by the new library APIs, but we have - # a bug that we can't easily support it just yet (NVIDIA/cuda-python#73). - if jit_options is not None: - raise ValueError - module = module.encode() - self._handle = handle_return(self._loader["file"](module)) - else: - assert isinstance(module, bytes) - if jit_options is None: - jit_options = {} - if backend == "new": - args = ( - module, - list(jit_options.keys()), - list(jit_options.values()), - len(jit_options), - # TODO: support library options - [], - [], - 0, - ) - else: # "old" backend - args = (module, len(jit_options), list(jit_options.keys()), list(jit_options.values())) - self._handle = handle_return(self._loader["data"](*args)) + _backend_version = "new" if (_py_major_ver >= 12 and _driver_ver >= 12000) else "old" + self._loader = _backend[_backend_version] self._code_type = code_type self._module = module @@ -168,7 +145,36 @@ def get_kernel(self, name): name = self._sym_map[name] except KeyError: name = name.encode() + + self._lazy_load_module() data = handle_return(self._loader["kernel"](self._handle, name)) return Kernel._from_obj(data, self) + def _lazy_load_module(self): + if isinstance(module, str): + # TODO: this option is only taken by the new library APIs, but we have + # a bug that we can't easily support it just yet (NVIDIA/cuda-python#73). + if jit_options is not None: + raise ValueError + module = module.encode() + self._handle = handle_return(self._loader["file"](module)) + else: + assert isinstance(module, bytes) + if jit_options is None: + jit_options = {} + if self._backend_version == "new": + args = ( + module, + list(jit_options.keys()), + list(jit_options.values()), + len(jit_options), + # TODO: support library options + [], + [], + 0, + ) + else: # "old" backend + args = (module, len(jit_options), list(jit_options.keys()), list(jit_options.values())) + self._handle = handle_return(self._loader["data"](*args)) + # TODO: implement from_handle() From 6ac39ba17ace7fffbe074733928deef551afc952 Mon Sep 17 00:00:00 2001 From: ksimpson Date: Thu, 5 Dec 2024 15:16:30 -0800 Subject: [PATCH 2/7] lazy load modul einto code object --- cuda_core/cuda/core/experimental/_module.py | 31 +++++++++++---------- cuda_core/tests/test_module.py | 1 - 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py index e5e235993..bd38b913c 100644 --- a/cuda_core/cuda/core/experimental/_module.py +++ b/cuda_core/cuda/core/experimental/_module.py @@ -106,7 +106,7 @@ class ObjectCode: """ - __slots__ = ("_handle", "_backend_version", "_code_type", "_module", "_loader", "_sym_map") + __slots__ = ("_handle", "_backend_version", "_jit_options", "_code_type", "_module", "_loader", "_sym_map") _supported_code_type = ("cubin", "ptx", "ltoir", "fatbin") @@ -117,9 +117,10 @@ def __init__(self, module, code_type, jit_options=None, *, symbol_mapping=None): # handle is assigned during _lazy_load self._handle = None + self._jit_options = jit_options - _backend_version = "new" if (_py_major_ver >= 12 and _driver_ver >= 12000) else "old" - self._loader = _backend[_backend_version] + self._backend_version = "new" if (_py_major_ver >= 12 and _driver_ver >= 12000) else "old" + self._loader = _backend[self._backend_version] self._code_type = code_type self._module = module @@ -151,30 +152,30 @@ def get_kernel(self, name): return Kernel._from_obj(data, self) def _lazy_load_module(self): - if isinstance(module, str): + if isinstance(self._module, str): # TODO: this option is only taken by the new library APIs, but we have # a bug that we can't easily support it just yet (NVIDIA/cuda-python#73). - if jit_options is not None: + if self._jit_options is not None: raise ValueError - module = module.encode() - self._handle = handle_return(self._loader["file"](module)) + module = self._module.encode() + self._handle = handle_return(self._loader["file"](self._module)) else: - assert isinstance(module, bytes) - if jit_options is None: - jit_options = {} + assert isinstance(self._module, bytes) + if self._jit_options is None: + self._jit_options = {} if self._backend_version == "new": args = ( - module, - list(jit_options.keys()), - list(jit_options.values()), - len(jit_options), + self._module, + list(self._jit_options.keys()), + list(self._jit_options.values()), + len(self._jit_options), # TODO: support library options [], [], 0, ) else: # "old" backend - args = (module, len(jit_options), list(jit_options.keys()), list(jit_options.values())) + args = (self._module, len(self._jit_options), list(self._jit_options.keys()), list(self._jit_options.values())) self._handle = handle_return(self._loader["data"](*args)) # TODO: implement from_handle() diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index a976726fa..e7fec356d 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -24,7 +24,6 @@ def test_object_code_initialization(): obj_code = ObjectCode(module_data, code_type) assert obj_code._code_type == code_type assert obj_code._module == module_data - assert obj_code._handle is not None # Test with unsupported code type with pytest.raises(ValueError): From 9fba2b7f2651dac121626ee12f76c3f14b17aefa Mon Sep 17 00:00:00 2001 From: ksimpson Date: Mon, 9 Dec 2024 14:22:58 -0800 Subject: [PATCH 3/7] use precondition and update test --- cuda_core/cuda/core/experimental/_module.py | 55 +++++++++++---------- cuda_core/tests/test_module.py | 49 ++++++------------ 2 files changed, 43 insertions(+), 61 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py index bd38b913c..e6a5e6863 100644 --- a/cuda_core/cuda/core/experimental/_module.py +++ b/cuda_core/cuda/core/experimental/_module.py @@ -5,7 +5,7 @@ import importlib.metadata from cuda import cuda -from cuda.core.experimental._utils import handle_return +from cuda.core.experimental._utils import handle_return, precondition _backend = { "old": { @@ -127,31 +127,10 @@ def __init__(self, module, code_type, jit_options=None, *, symbol_mapping=None): self._sym_map = {} if symbol_mapping is None else symbol_mapping # TODO: do we want to unload in a finalizer? Probably not.. - - def get_kernel(self, name): - """Return the :obj:`Kernel` of a specified name from this object code. - - Parameters - ---------- - name : Any - Name of the kernel to retrieve. - - Returns - ------- - :obj:`Kernel` - Newly created kernel object. - - """ - try: - name = self._sym_map[name] - except KeyError: - name = name.encode() - - self._lazy_load_module() - data = handle_return(self._loader["kernel"](self._handle, name)) - return Kernel._from_obj(data, self) - - def _lazy_load_module(self): + + def _lazy_load_module(self, *args, **kwargs): + if self._handle is not None: + return if isinstance(self._module, str): # TODO: this option is only taken by the new library APIs, but we have # a bug that we can't easily support it just yet (NVIDIA/cuda-python#73). @@ -178,4 +157,28 @@ def _lazy_load_module(self): args = (self._module, len(self._jit_options), list(self._jit_options.keys()), list(self._jit_options.values())) self._handle = handle_return(self._loader["data"](*args)) + @precondition(_lazy_load_module) + def get_kernel(self, name): + """Return the :obj:`Kernel` of a specified name from this object code. + + Parameters + ---------- + name : Any + Name of the kernel to retrieve. + + Returns + ------- + :obj:`Kernel` + Newly created kernel object. + + """ + try: + name = self._sym_map[name] + except KeyError: + name = name.encode() + + data = handle_return(self._loader["kernel"](self._handle, name)) + return Kernel._from_obj(data, self) + + # TODO: implement from_handle() diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index e7fec356d..f952542bb 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -10,38 +10,17 @@ import pytest -from cuda.core.experimental._module import ObjectCode - - -@pytest.mark.skipif( - int(importlib.metadata.version("cuda-python").split(".")[0]) < 12, - reason="Module loading for older drivers validate require valid module code.", -) -def test_object_code_initialization(): - # Test with supported code types - for code_type in ["cubin", "ptx", "fatbin"]: - module_data = b"dummy_data" - obj_code = ObjectCode(module_data, code_type) - assert obj_code._code_type == code_type - assert obj_code._module == module_data - - # Test with unsupported code type - with pytest.raises(ValueError): - ObjectCode(b"dummy_data", "unsupported_code_type") - - -# TODO add ObjectCode tests which provide the appropriate data for cuLibraryLoadFromFile -def test_object_code_initialization_with_str(): - assert True - - -def test_object_code_initialization_with_jit_options(): - assert True - - -def test_object_code_get_kernel(): - assert True - - -def test_kernel_from_obj(): - assert True +from cuda.core.experimental import Program + + +def test_get_kernel(): + kernel = """ +extern __device__ int B(); +extern __device__ int C(int a, int b); +__global__ void A() { int result = C(B(), 1);} +""" + object_code = Program(kernel, "c++").compile("ptx", options=("-rdc=true",)) + assert object_code._handle is None + kernel = object_code.get_kernel("A") + assert object_code._handle is not None + assert kernel._handle is not None From a55f322128097b22ec6f90cc75962f1028eada82 Mon Sep 17 00:00:00 2001 From: ksimpson Date: Mon, 9 Dec 2024 15:10:16 -0800 Subject: [PATCH 4/7] handle ptx version too new --- cuda_core/cuda/core/experimental/_module.py | 14 ++++++++------ cuda_core/tests/conftest.py | 10 ++++++++-- cuda_core/tests/test_module.py | 3 ++- cuda_core/tests/test_program.py | 10 +--------- 4 files changed, 19 insertions(+), 18 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py index e6a5e6863..c2330e910 100644 --- a/cuda_core/cuda/core/experimental/_module.py +++ b/cuda_core/cuda/core/experimental/_module.py @@ -109,13 +109,12 @@ class ObjectCode: __slots__ = ("_handle", "_backend_version", "_jit_options", "_code_type", "_module", "_loader", "_sym_map") _supported_code_type = ("cubin", "ptx", "ltoir", "fatbin") - def __init__(self, module, code_type, jit_options=None, *, symbol_mapping=None): if code_type not in self._supported_code_type: raise ValueError _lazy_init() - # handle is assigned during _lazy_load + # handle is assigned during _lazy_load self._handle = None self._jit_options = jit_options @@ -127,7 +126,7 @@ def __init__(self, module, code_type, jit_options=None, *, symbol_mapping=None): self._sym_map = {} if symbol_mapping is None else symbol_mapping # TODO: do we want to unload in a finalizer? Probably not.. - + def _lazy_load_module(self, *args, **kwargs): if self._handle is not None: return @@ -136,7 +135,6 @@ def _lazy_load_module(self, *args, **kwargs): # a bug that we can't easily support it just yet (NVIDIA/cuda-python#73). if self._jit_options is not None: raise ValueError - module = self._module.encode() self._handle = handle_return(self._loader["file"](self._module)) else: assert isinstance(self._module, bytes) @@ -154,7 +152,12 @@ def _lazy_load_module(self, *args, **kwargs): 0, ) else: # "old" backend - args = (self._module, len(self._jit_options), list(self._jit_options.keys()), list(self._jit_options.values())) + args = ( + self._module, + len(self._jit_options), + list(self._jit_options.keys()), + list(self._jit_options.values()), + ) self._handle = handle_return(self._loader["data"](*args)) @precondition(_lazy_load_module) @@ -179,6 +182,5 @@ def get_kernel(self, name): data = handle_return(self._loader["kernel"](self._handle, name)) return Kernel._from_obj(data, self) - # TODO: implement from_handle() diff --git a/cuda_core/tests/conftest.py b/cuda_core/tests/conftest.py index fe755738e..7854abc0e 100644 --- a/cuda_core/tests/conftest.py +++ b/cuda_core/tests/conftest.py @@ -11,10 +11,10 @@ import sys try: - from cuda.bindings import driver + from cuda.bindings import driver, nvrtc except ImportError: from cuda import cuda as driver - + from cuda import nvrtc import pytest from cuda.core.experimental import Device, _device @@ -65,3 +65,9 @@ def clean_up_cffi_files(): os.remove(f) except FileNotFoundError: pass # noqa: SIM105 + + +def can_load_generated_ptx(): + _, driver_ver = driver.cuDriverGetVersion() + _, nvrtc_major, nvrtc_minor = nvrtc.nvrtcVersion() + return not nvrtc_major * 1000 + nvrtc_minor * 10 > driver_ver diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index f952542bb..b2519c859 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -6,13 +6,14 @@ # this software and related documentation outside the terms of the EULA # is strictly prohibited. -import importlib import pytest +from conftest import can_load_generated_ptx from cuda.core.experimental import Program +@pytest.mark.xfail(not can_load_generated_ptx(), reason="PTX version too new") def test_get_kernel(): kernel = """ extern __device__ int B(); diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index f1c24b3e3..cca01af5b 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -7,20 +7,12 @@ # is strictly prohibited. import pytest +from conftest import can_load_generated_ptx -from cuda import cuda, nvrtc from cuda.core.experimental import Device, Program from cuda.core.experimental._module import Kernel, ObjectCode -def can_load_generated_ptx(): - _, driver_ver = cuda.cuDriverGetVersion() - _, nvrtc_major, nvrtc_minor = nvrtc.nvrtcVersion() - if nvrtc_major * 1000 + nvrtc_minor * 10 > driver_ver: - return False - return True - - def test_program_init_valid_code_type(): code = 'extern "C" __global__ void my_kernel() {}' program = Program(code, "c++") From d0d528ba2df917883b1a20ba82031d4f8b1c03cd Mon Sep 17 00:00:00 2001 From: Keenan Simpson Date: Tue, 10 Dec 2024 11:19:07 -0800 Subject: [PATCH 5/7] Update cuda_core/tests/conftest.py Co-authored-by: Leo Fang --- cuda_core/tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_core/tests/conftest.py b/cuda_core/tests/conftest.py index 7854abc0e..0ff1f37bd 100644 --- a/cuda_core/tests/conftest.py +++ b/cuda_core/tests/conftest.py @@ -70,4 +70,4 @@ def clean_up_cffi_files(): def can_load_generated_ptx(): _, driver_ver = driver.cuDriverGetVersion() _, nvrtc_major, nvrtc_minor = nvrtc.nvrtcVersion() - return not nvrtc_major * 1000 + nvrtc_minor * 10 > driver_ver + return not (nvrtc_major * 1000 + nvrtc_minor * 10 > driver_ver) From a2555f99e3a234fb6709b0296ea238e6757a2d70 Mon Sep 17 00:00:00 2001 From: ksimpson Date: Tue, 10 Dec 2024 12:06:52 -0800 Subject: [PATCH 6/7] use Ralfs logic for ptx check --- cuda_core/tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_core/tests/conftest.py b/cuda_core/tests/conftest.py index 0ff1f37bd..30d80f6f8 100644 --- a/cuda_core/tests/conftest.py +++ b/cuda_core/tests/conftest.py @@ -70,4 +70,4 @@ def clean_up_cffi_files(): def can_load_generated_ptx(): _, driver_ver = driver.cuDriverGetVersion() _, nvrtc_major, nvrtc_minor = nvrtc.nvrtcVersion() - return not (nvrtc_major * 1000 + nvrtc_minor * 10 > driver_ver) + return nvrtc_major * 1000 + nvrtc_minor * 10 <= driver_ver From e50030426c6b8161e057a86e15f04a867abcd322 Mon Sep 17 00:00:00 2001 From: ksimpson Date: Tue, 10 Dec 2024 12:38:02 -0800 Subject: [PATCH 7/7] store attribtues in function --- cuda_core/cuda/core/experimental/_module.py | 30 +++++++++++---------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py index c2330e910..7a4fc0e2d 100644 --- a/cuda_core/cuda/core/experimental/_module.py +++ b/cuda_core/cuda/core/experimental/_module.py @@ -130,22 +130,24 @@ def __init__(self, module, code_type, jit_options=None, *, symbol_mapping=None): def _lazy_load_module(self, *args, **kwargs): if self._handle is not None: return - if isinstance(self._module, str): + jit_options = self._jit_options + module = self._module + if isinstance(module, str): # TODO: this option is only taken by the new library APIs, but we have # a bug that we can't easily support it just yet (NVIDIA/cuda-python#73). - if self._jit_options is not None: + if jit_options is not None: raise ValueError - self._handle = handle_return(self._loader["file"](self._module)) + self._handle = handle_return(self._loader["file"](module)) else: - assert isinstance(self._module, bytes) - if self._jit_options is None: - self._jit_options = {} + assert isinstance(module, bytes) + if jit_options is None: + jit_options = {} if self._backend_version == "new": args = ( - self._module, - list(self._jit_options.keys()), - list(self._jit_options.values()), - len(self._jit_options), + module, + list(jit_options.keys()), + list(jit_options.values()), + len(jit_options), # TODO: support library options [], [], @@ -153,10 +155,10 @@ def _lazy_load_module(self, *args, **kwargs): ) else: # "old" backend args = ( - self._module, - len(self._jit_options), - list(self._jit_options.keys()), - list(self._jit_options.values()), + module, + len(jit_options), + list(jit_options.keys()), + list(jit_options.values()), ) self._handle = handle_return(self._loader["data"](*args))