From 593e35d9cb0c06b7854183f6766194b975eef1f6 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Thu, 13 Mar 2025 13:45:02 -0700 Subject: [PATCH 1/3] `cuLinkAddData()` with `const_char_keep_alive` INCLUDING DEBUGGING CODE --- cuda_core/cuda/core/experimental/_linker.py | 18 +++++++++++++++--- cuda_core/cuda/core/experimental/_module.py | 2 ++ cuda_core/cuda/core/experimental/_program.py | 6 +++++- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_linker.py b/cuda_core/cuda/core/experimental/_linker.py index 5c5e1c3ce..90b39050a 100644 --- a/cuda_core/cuda/core/experimental/_linker.py +++ b/cuda_core/cuda/core/experimental/_linker.py @@ -38,6 +38,8 @@ def _decide_nvjitlink_or_driver() -> bool: _driver_ver = handle_return(driver.cuDriverGetVersion()) _driver_ver = (_driver_ver // 1000, (_driver_ver % 1000) // 10) try: + if 0: + import BLOCK_NVJITLINK_IMPORT from cuda.bindings import nvjitlink as _nvjitlink from cuda.bindings._internal import nvjitlink as inner_nvjitlink except ImportError: @@ -48,6 +50,10 @@ def _decide_nvjitlink_or_driver() -> bool: # binding is available, but nvJitLink is not installed _nvjitlink = None + if 0: + print("\nLOOOK FORCING _driver = driver", flush=True) + _driver = driver + return True if _nvjitlink is None: warn( "nvJitLink is not installed or too old (<12.3). Therefore it is not usable " @@ -349,11 +355,12 @@ class Linker: """ class _MembersNeededForFinalize: - __slots__ = ("handle", "use_nvjitlink") + __slots__ = ("handle", "use_nvjitlink", "const_char_keep_alive") def __init__(self, program_obj, handle, use_nvjitlink): self.handle = handle self.use_nvjitlink = use_nvjitlink + self.const_char_keep_alive = [] weakref.finalize(program_obj, self.close) def close(self): @@ -390,27 +397,30 @@ def _add_code_object(self, object_code: ObjectCode): data = object_code._module assert_type(data, bytes) with _exception_manager(self): + name_str = f"{object_code._handle}_{object_code._code_type}" if _nvjitlink: _nvjitlink.add_data( self._mnff.handle, self._input_type_from_code_type(object_code._code_type), data, len(data), - f"{object_code._handle}_{object_code._code_type}", + name_str, ) else: + name_bytes = name_str.encode() handle_return( _driver.cuLinkAddData( self._mnff.handle, self._input_type_from_code_type(object_code._code_type), data, len(data), - f"{object_code._handle}_{object_code._code_type}".encode(), + name_bytes, 0, None, None, ) ) + self._mnff.handle.const_char_keep_alive.append(name_bytes) def link(self, target_type) -> ObjectCode: """ @@ -435,6 +445,7 @@ def link(self, target_type) -> ObjectCode: raise ValueError(f"Unsupported target type: {target_type}") with _exception_manager(self): if _nvjitlink: + print("\nlink with _nvjitlink.complete", flush=True) _nvjitlink.complete(self._mnff.handle) if target_type == "cubin": get_size = _nvjitlink.get_linked_cubin_size @@ -446,6 +457,7 @@ def link(self, target_type) -> ObjectCode: code = bytearray(size) get_code(self._mnff.handle, code) else: + print("\nlink with _driver.cuLinkComplete", flush=True) addr, size = handle_return(_driver.cuLinkComplete(self._mnff.handle)) code = (ctypes.c_char * size).from_address(addr) diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py index 7a4c4623a..0d75c3539 100644 --- a/cuda_core/cuda/core/experimental/_module.py +++ b/cuda_core/cuda/core/experimental/_module.py @@ -281,6 +281,7 @@ def from_cubin(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = No should be mapped to the mangled names before trying to retrieve them (default to no mappings). """ + print("\nLOOOK ObjectCode.from_cubin", flush=True) return ObjectCode._init(module, "cubin", symbol_mapping=symbol_mapping) @staticmethod @@ -297,6 +298,7 @@ def from_ptx(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = None should be mapped to the mangled names before trying to retrieve them (default to no mappings). """ + print("\nLOOOK ObjectCode.from_ptx", flush=True) return ObjectCode._init(module, "ptx", symbol_mapping=symbol_mapping) # TODO: do we want to unload in a finalizer? Probably not.. diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index 3125cbb7f..cb2a6adc7 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -396,7 +396,7 @@ def __init__(self, code, code_type, options: ProgramOptions = None): # TODO: support pre-loaded headers & include names # TODO: allow tuples once NVIDIA/cuda-python#72 is resolved - self._mnff.handle = handle_return(nvrtc.nvrtcCreateProgram(code.encode(), b"", 0, [], [])) + self._mnff.handle = handle_return(nvrtc.nvrtcCreateProgram(code.encode(), b"HELLO", 0, [], [])) self._backend = "NVRTC" self._linker = None @@ -461,6 +461,7 @@ def compile(self, target_type, name_expressions=(), logs=None): Newly created code object. """ + print(f"\nLOOOK {target_type=} {name_expressions=}", flush=True) supported_target_types = ("ptx", "cubin", "ltoir") if target_type not in supported_target_types: raise ValueError(f'Unsupported target_type="{target_type}" ({supported_target_types=})') @@ -480,6 +481,7 @@ def compile(self, target_type, name_expressions=(), logs=None): handle=self._mnff.handle, ) options = self._options._as_bytes() + print(f"\nLOOOK {options=}", flush=True) handle_return( nvrtc.nvrtcCompileProgram(self._mnff.handle, len(options), options), handle=self._mnff.handle, @@ -505,11 +507,13 @@ def compile(self, target_type, name_expressions=(), logs=None): handle_return(nvrtc.nvrtcGetProgramLog(self._mnff.handle, log), handle=self._mnff.handle) logs.write(log.decode("utf-8", errors="backslashreplace")) + print("\nLOOOK compile using ObjectCode._init()", flush=True) return ObjectCode._init(data, target_type, symbol_mapping=symbol_mapping) supported_backends = ("nvJitLink", "driver") if self._backend not in supported_backends: raise ValueError(f'Unsupported backend="{self._backend}" ({supported_backends=})') + print("\nLOOOK compile using self._linker.link()", flush=True) return self._linker.link(target_type) @property From c01ff712a3a4d57090819692e93536b14673e8aa Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Thu, 13 Mar 2025 13:48:53 -0700 Subject: [PATCH 2/3] Remove all debugging code. --- cuda_core/cuda/core/experimental/_linker.py | 8 -------- cuda_core/cuda/core/experimental/_module.py | 2 -- cuda_core/cuda/core/experimental/_program.py | 6 +----- 3 files changed, 1 insertion(+), 15 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_linker.py b/cuda_core/cuda/core/experimental/_linker.py index 90b39050a..90cb75438 100644 --- a/cuda_core/cuda/core/experimental/_linker.py +++ b/cuda_core/cuda/core/experimental/_linker.py @@ -38,8 +38,6 @@ def _decide_nvjitlink_or_driver() -> bool: _driver_ver = handle_return(driver.cuDriverGetVersion()) _driver_ver = (_driver_ver // 1000, (_driver_ver % 1000) // 10) try: - if 0: - import BLOCK_NVJITLINK_IMPORT from cuda.bindings import nvjitlink as _nvjitlink from cuda.bindings._internal import nvjitlink as inner_nvjitlink except ImportError: @@ -50,10 +48,6 @@ def _decide_nvjitlink_or_driver() -> bool: # binding is available, but nvJitLink is not installed _nvjitlink = None - if 0: - print("\nLOOOK FORCING _driver = driver", flush=True) - _driver = driver - return True if _nvjitlink is None: warn( "nvJitLink is not installed or too old (<12.3). Therefore it is not usable " @@ -445,7 +439,6 @@ def link(self, target_type) -> ObjectCode: raise ValueError(f"Unsupported target type: {target_type}") with _exception_manager(self): if _nvjitlink: - print("\nlink with _nvjitlink.complete", flush=True) _nvjitlink.complete(self._mnff.handle) if target_type == "cubin": get_size = _nvjitlink.get_linked_cubin_size @@ -457,7 +450,6 @@ def link(self, target_type) -> ObjectCode: code = bytearray(size) get_code(self._mnff.handle, code) else: - print("\nlink with _driver.cuLinkComplete", flush=True) addr, size = handle_return(_driver.cuLinkComplete(self._mnff.handle)) code = (ctypes.c_char * size).from_address(addr) diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py index 0d75c3539..7a4c4623a 100644 --- a/cuda_core/cuda/core/experimental/_module.py +++ b/cuda_core/cuda/core/experimental/_module.py @@ -281,7 +281,6 @@ def from_cubin(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = No should be mapped to the mangled names before trying to retrieve them (default to no mappings). """ - print("\nLOOOK ObjectCode.from_cubin", flush=True) return ObjectCode._init(module, "cubin", symbol_mapping=symbol_mapping) @staticmethod @@ -298,7 +297,6 @@ def from_ptx(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = None should be mapped to the mangled names before trying to retrieve them (default to no mappings). """ - print("\nLOOOK ObjectCode.from_ptx", flush=True) return ObjectCode._init(module, "ptx", symbol_mapping=symbol_mapping) # TODO: do we want to unload in a finalizer? Probably not.. diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index cb2a6adc7..3125cbb7f 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -396,7 +396,7 @@ def __init__(self, code, code_type, options: ProgramOptions = None): # TODO: support pre-loaded headers & include names # TODO: allow tuples once NVIDIA/cuda-python#72 is resolved - self._mnff.handle = handle_return(nvrtc.nvrtcCreateProgram(code.encode(), b"HELLO", 0, [], [])) + self._mnff.handle = handle_return(nvrtc.nvrtcCreateProgram(code.encode(), b"", 0, [], [])) self._backend = "NVRTC" self._linker = None @@ -461,7 +461,6 @@ def compile(self, target_type, name_expressions=(), logs=None): Newly created code object. """ - print(f"\nLOOOK {target_type=} {name_expressions=}", flush=True) supported_target_types = ("ptx", "cubin", "ltoir") if target_type not in supported_target_types: raise ValueError(f'Unsupported target_type="{target_type}" ({supported_target_types=})') @@ -481,7 +480,6 @@ def compile(self, target_type, name_expressions=(), logs=None): handle=self._mnff.handle, ) options = self._options._as_bytes() - print(f"\nLOOOK {options=}", flush=True) handle_return( nvrtc.nvrtcCompileProgram(self._mnff.handle, len(options), options), handle=self._mnff.handle, @@ -507,13 +505,11 @@ def compile(self, target_type, name_expressions=(), logs=None): handle_return(nvrtc.nvrtcGetProgramLog(self._mnff.handle, log), handle=self._mnff.handle) logs.write(log.decode("utf-8", errors="backslashreplace")) - print("\nLOOOK compile using ObjectCode._init()", flush=True) return ObjectCode._init(data, target_type, symbol_mapping=symbol_mapping) supported_backends = ("nvJitLink", "driver") if self._backend not in supported_backends: raise ValueError(f'Unsupported backend="{self._backend}" ({supported_backends=})') - print("\nLOOOK compile using self._linker.link()", flush=True) return self._linker.link(target_type) @property From 5b46dce001694c09fe52b00481d070ac9c69f338 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Thu, 13 Mar 2025 14:17:21 -0700 Subject: [PATCH 3/3] Fix silly oversight. --- cuda_core/cuda/core/experimental/_linker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_core/cuda/core/experimental/_linker.py b/cuda_core/cuda/core/experimental/_linker.py index 90cb75438..fd5bbac0a 100644 --- a/cuda_core/cuda/core/experimental/_linker.py +++ b/cuda_core/cuda/core/experimental/_linker.py @@ -414,7 +414,7 @@ def _add_code_object(self, object_code: ObjectCode): None, ) ) - self._mnff.handle.const_char_keep_alive.append(name_bytes) + self._mnff.const_char_keep_alive.append(name_bytes) def link(self, target_type) -> ObjectCode: """