From db42da1654ccebe7449e2318b95d6df87c57712c Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Thu, 5 Jun 2025 23:17:59 +0000 Subject: [PATCH 1/4] allow ObjectCode to have a name --- cuda_core/cuda/core/experimental/_linker.py | 8 ++- cuda_core/cuda/core/experimental/_module.py | 66 ++++++++++++++----- cuda_core/cuda/core/experimental/_program.py | 10 ++- cuda_core/docs/source/release/0.3.0-notes.rst | 2 + cuda_core/tests/test_linker.py | 4 ++ cuda_core/tests/test_program.py | 15 +++-- 6 files changed, 78 insertions(+), 27 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_linker.py b/cuda_core/cuda/core/experimental/_linker.py index 915f38f1a..227f7bec5 100644 --- a/cuda_core/cuda/core/experimental/_linker.py +++ b/cuda_core/cuda/core/experimental/_linker.py @@ -100,6 +100,8 @@ class LinkerOptions: Attributes ---------- + name : str, optional + Name of the linker. If the linking succeeds, the name is passed down to the generated `ObjectCode`. arch : str, optional Pass the SM architecture value, such as ``sm_`` (for generating CUBIN) or ``compute_`` (for generating PTX). If not provided, the current device's architecture @@ -161,6 +163,7 @@ class LinkerOptions: Default: False. """ + name: Optional[str] = "" arch: Optional[str] = None max_register_count: Optional[int] = None time: Optional[bool] = None @@ -184,6 +187,7 @@ class LinkerOptions: def __post_init__(self): _lazy_init() + self._name = self.name.encode() self.formatted_options = [] if _nvjitlink: self._init_nvjitlink() @@ -393,7 +397,7 @@ def _add_code_object(self, object_code: ObjectCode): data = object_code._module assert_type(data, bytes) with _exception_manager(self): - name_str = f"{object_code._handle}_{object_code._code_type}" + name_str = f"{object_code.name}" if _nvjitlink: _nvjitlink.add_data( self._mnff.handle, @@ -455,7 +459,7 @@ def link(self, target_type) -> ObjectCode: addr, size = handle_return(_driver.cuLinkComplete(self._mnff.handle)) code = (ctypes.c_char * size).from_address(addr) - return ObjectCode._init(bytes(code), target_type) + return ObjectCode._init(bytes(code), target_type, name=self._options.name) def get_error_log(self) -> str: """Get the error log generated by the linker. diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py index 9c80c687b..45789802a 100644 --- a/cuda_core/cuda/core/experimental/_module.py +++ b/cuda_core/cuda/core/experimental/_module.py @@ -449,7 +449,7 @@ class ObjectCode: context. """ - __slots__ = ("_handle", "_backend_version", "_code_type", "_module", "_loader", "_sym_map") + __slots__ = ("_handle", "_backend_version", "_code_type", "_module", "_loader", "_sym_map", "_name") _supported_code_type = ("cubin", "ptx", "ltoir", "fatbin", "object", "library") def __new__(self, *args, **kwargs): @@ -459,7 +459,9 @@ def __new__(self, *args, **kwargs): ) @classmethod - def _init(cls, module, code_type, *, symbol_mapping: Optional[dict] = None): + def _init(cls, module, code_type, *, + name: str = "", + symbol_mapping: Optional[dict] = None): self = super().__new__(cls) assert code_type in self._supported_code_type, f"{code_type=} is not supported" _lazy_init() @@ -473,19 +475,22 @@ def _init(cls, module, code_type, *, symbol_mapping: Optional[dict] = None): self._code_type = code_type self._module = module self._sym_map = {} if symbol_mapping is None else symbol_mapping + self._name = name return self @classmethod - def _reduce_helper(self, module, code_type, symbol_mapping): + def _reduce_helper(self, module, code_type, name, symbol_mapping): # just for forwarding kwargs - return ObjectCode._init(module, code_type, symbol_mapping=symbol_mapping) + return ObjectCode._init(module, code_type, name=name, symbol_mapping=symbol_mapping) def __reduce__(self): - return ObjectCode._reduce_helper, (self._module, self._code_type, self._sym_map) + return ObjectCode._reduce_helper, (self._module, self._code_type, self._name, self._sym_map) @staticmethod - def from_cubin(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = None) -> "ObjectCode": + def from_cubin(module: Union[bytes, str], *, + name: str = "", + symbol_mapping: Optional[dict] = None) -> "ObjectCode": """Create an :class:`ObjectCode` instance from an existing cubin. Parameters @@ -493,15 +498,19 @@ def from_cubin(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = No module : Union[bytes, str] Either a bytes object containing the in-memory cubin to load, or a file path string pointing to the on-disk cubin to load. + name : Optional[str] + A human-readable identifier representing this code object. symbol_mapping : Optional[dict] A dictionary specifying how the unmangled symbol names (as keys) should be mapped to the mangled names before trying to retrieve them (default to no mappings). """ - return ObjectCode._init(module, "cubin", symbol_mapping=symbol_mapping) + return ObjectCode._init(module, "cubin", name=name, symbol_mapping=symbol_mapping) @staticmethod - def from_ptx(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = None) -> "ObjectCode": + def from_ptx(module: Union[bytes, str], *, + name: str = "", + symbol_mapping: Optional[dict] = None) -> "ObjectCode": """Create an :class:`ObjectCode` instance from an existing PTX. Parameters @@ -509,15 +518,19 @@ def from_ptx(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = None module : Union[bytes, str] Either a bytes object containing the in-memory ptx code to load, or a file path string pointing to the on-disk ptx file to load. + name : Optional[str] + A human-readable identifier representing this code object. symbol_mapping : Optional[dict] A dictionary specifying how the unmangled symbol names (as keys) should be mapped to the mangled names before trying to retrieve them (default to no mappings). """ - return ObjectCode._init(module, "ptx", symbol_mapping=symbol_mapping) + return ObjectCode._init(module, "ptx", name=name, symbol_mapping=symbol_mapping) @staticmethod - def from_ltoir(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = None) -> "ObjectCode": + def from_ltoir(module: Union[bytes, str], *, + name: str = "", + symbol_mapping: Optional[dict] = None) -> "ObjectCode": """Create an :class:`ObjectCode` instance from an existing LTOIR. Parameters @@ -525,15 +538,19 @@ def from_ltoir(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = No module : Union[bytes, str] Either a bytes object containing the in-memory ltoir code to load, or a file path string pointing to the on-disk ltoir file to load. + name : Optional[str] + A human-readable identifier representing this code object. symbol_mapping : Optional[dict] A dictionary specifying how the unmangled symbol names (as keys) should be mapped to the mangled names before trying to retrieve them (default to no mappings). """ - return ObjectCode._init(module, "ltoir", symbol_mapping=symbol_mapping) + return ObjectCode._init(module, "ltoir", name=name, symbol_mapping=symbol_mapping) @staticmethod - def from_fatbin(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = None) -> "ObjectCode": + def from_fatbin(module: Union[bytes, str], *, + name: str = "", + symbol_mapping: Optional[dict] = None) -> "ObjectCode": """Create an :class:`ObjectCode` instance from an existing fatbin. Parameters @@ -541,15 +558,19 @@ def from_fatbin(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = N module : Union[bytes, str] Either a bytes object containing the in-memory fatbin to load, or a file path string pointing to the on-disk fatbin to load. + name : Optional[str] + A human-readable identifier representing this code object. symbol_mapping : Optional[dict] A dictionary specifying how the unmangled symbol names (as keys) should be mapped to the mangled names before trying to retrieve them (default to no mappings). """ - return ObjectCode._init(module, "fatbin", symbol_mapping=symbol_mapping) + return ObjectCode._init(module, "fatbin", name=name, symbol_mapping=symbol_mapping) @staticmethod - def from_object(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = None) -> "ObjectCode": + def from_object(module: Union[bytes, str], *, + name: str = "", + symbol_mapping: Optional[dict] = None) -> "ObjectCode": """Create an :class:`ObjectCode` instance from an existing object code. Parameters @@ -557,15 +578,19 @@ def from_object(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = N module : Union[bytes, str] Either a bytes object containing the in-memory object code to load, or a file path string pointing to the on-disk object code to load. + name : Optional[str] + A human-readable identifier representing this code object. symbol_mapping : Optional[dict] A dictionary specifying how the unmangled symbol names (as keys) should be mapped to the mangled names before trying to retrieve them (default to no mappings). """ - return ObjectCode._init(module, "object", symbol_mapping=symbol_mapping) + return ObjectCode._init(module, "object", name=name, symbol_mapping=symbol_mapping) @staticmethod - def from_library(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = None) -> "ObjectCode": + def from_library(module: Union[bytes, str], *, + name: str = "", + symbol_mapping: Optional[dict] = None) -> "ObjectCode": """Create an :class:`ObjectCode` instance from an existing library. Parameters @@ -573,12 +598,14 @@ def from_library(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = module : Union[bytes, str] Either a bytes object containing the in-memory library to load, or a file path string pointing to the on-disk library to load. + name : Optional[str] + A human-readable identifier representing this code object. symbol_mapping : Optional[dict] A dictionary specifying how the unmangled symbol names (as keys) should be mapped to the mangled names before trying to retrieve them (default to no mappings). """ - return ObjectCode._init(module, "library", symbol_mapping=symbol_mapping) + return ObjectCode._init(module, "library", name=name, symbol_mapping=symbol_mapping) # TODO: do we want to unload in a finalizer? Probably not.. @@ -632,6 +659,11 @@ def code(self) -> CodeTypeT: """Return the underlying code object.""" return self._module + @property + def name(self) -> str: + """Return a human-readable name of this code object.""" + return self._name + @property @precondition(_lazy_load_module) def handle(self): diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index dd86bd250..e82b33a98 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -57,6 +57,8 @@ class ProgramOptions: Attributes ---------- + name : str, optional + Name of the program. If the compilation succeeds, the name is passed down to the generated `ObjectCode`. arch : str, optional Pass the SM architecture value, such as ``sm_`` (for generating CUBIN) or ``compute_`` (for generating PTX). If not provided, the current device's architecture @@ -180,6 +182,7 @@ class ProgramOptions: Default: False """ + name: Optional[str] = "" arch: Optional[str] = None relocatable_device_code: Optional[bool] = None extensible_whole_program: Optional[bool] = None @@ -222,6 +225,8 @@ class ProgramOptions: minimal: Optional[bool] = None def __post_init__(self): + self._name = self.name.encode() + self._formatted_options = [] if self.arch is not None: self._formatted_options.append(f"--gpu-architecture={self.arch}") @@ -396,7 +401,7 @@ def __init__(self, code, code_type, options: ProgramOptions = None): # TODO: support pre-loaded headers & include names # TODO: allow tuples once NVIDIA/cuda-python#72 is resolved - self._mnff.handle = handle_return(nvrtc.nvrtcCreateProgram(code.encode(), b"", 0, [], [])) + self._mnff.handle = handle_return(nvrtc.nvrtcCreateProgram(code.encode(), options._name, 0, [], [])) self._backend = "NVRTC" self._linker = None @@ -413,6 +418,7 @@ def __init__(self, code, code_type, options: ProgramOptions = None): def _translate_program_options(self, options: ProgramOptions) -> LinkerOptions: return LinkerOptions( + name=options.name, arch=options.arch, max_register_count=options.max_register_count, time=options.time, @@ -505,7 +511,7 @@ def compile(self, target_type, name_expressions=(), logs=None): handle_return(nvrtc.nvrtcGetProgramLog(self._mnff.handle, log), handle=self._mnff.handle) logs.write(log.decode("utf-8", errors="backslashreplace")) - return ObjectCode._init(data, target_type, symbol_mapping=symbol_mapping) + return ObjectCode._init(data, target_type, symbol_mapping=symbol_mapping, name=self._options.name) supported_backends = ("nvJitLink", "driver") if self._backend not in supported_backends: diff --git a/cuda_core/docs/source/release/0.3.0-notes.rst b/cuda_core/docs/source/release/0.3.0-notes.rst index 88a028b51..18c3ec223 100644 --- a/cuda_core/docs/source/release/0.3.0-notes.rst +++ b/cuda_core/docs/source/release/0.3.0-notes.rst @@ -22,6 +22,8 @@ New features - :class:`Kernel` adds :property:`Kernel.num_arguments` and :property:`Kernel.arguments_info` for introspection of kernel arguments. (#612) - Add pythonic access to kernel occupancy calculation functions via :property:`Kernel.occupancy`. (#648) +- A name can be assigned to :class:`ObjectCode` instances generated by both :class:`Program` and :class:`Linker` through their respective + options. New examples ------------ diff --git a/cuda_core/tests/test_linker.py b/cuda_core/tests/test_linker.py index f15e98a42..d1895921b 100644 --- a/cuda_core/tests/test_linker.py +++ b/cuda_core/tests/test_linker.py @@ -109,6 +109,7 @@ def test_linker_link_ptx_nvjitlink(compile_ltoir_functions): linker = Linker(*compile_ltoir_functions, options=options) linked_code = linker.link("ptx") assert isinstance(linked_code, ObjectCode) + assert linked_code.name == options.name @pytest.mark.skipif(not is_culink_backend, reason="nvjitlink requires lto for ptx linking") @@ -117,6 +118,7 @@ def test_linker_link_ptx_culink(compile_ptx_functions): linker = Linker(*compile_ptx_functions, options=options) linked_code = linker.link("ptx") assert isinstance(linked_code, ObjectCode) + assert linked_code.name == options.name def test_linker_link_cubin(compile_ptx_functions): @@ -124,6 +126,7 @@ def test_linker_link_cubin(compile_ptx_functions): linker = Linker(*compile_ptx_functions, options=options) linked_code = linker.link("cubin") assert isinstance(linked_code, ObjectCode) + assert linked_code.name == options.name def test_linker_link_ptx_multiple(compile_ptx_functions): @@ -132,6 +135,7 @@ def test_linker_link_ptx_multiple(compile_ptx_functions): linker = Linker(*ptxes, options=options) linked_code = linker.link("cubin") assert isinstance(linked_code, ObjectCode) + assert linked_code.name == options.name def test_linker_link_invalid_target_type(compile_ptx_functions): diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index 56cddb135..7d818657e 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -24,6 +24,7 @@ def ptx_code_object(): @pytest.mark.parametrize( "options", [ + ProgramOptions(name="abc"), ProgramOptions(device_code_optimize=True, debug=True), ProgramOptions(relocatable_device_code=True, max_register_count=32), ProgramOptions(ftz=True, prec_sqrt=False, prec_div=False), @@ -105,21 +106,23 @@ def test_program_init_invalid_code_format(): # This is tested against the current device's arch def test_program_compile_valid_target_type(init_cuda): code = 'extern "C" __global__ void my_kernel() {}' - program = Program(code, "c++") + program = Program(code, "c++", options={"name": "42"}) with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") ptx_object_code = program.compile("ptx") + assert isinstance(ptx_object_code, ObjectCode) + assert ptx_object_code.name == "42" if any("The CUDA driver version is older than the backend version" in str(warning.message) for warning in w): pytest.skip("PTX version too new for current driver") + ptx_kernel = ptx_object_code.get_kernel("my_kernel") + assert isinstance(ptx_kernel, Kernel) - program = Program(ptx_object_code._module.decode(), "ptx") + program = Program(ptx_object_code._module.decode(), "ptx", options={"name": "24"}) cubin_object_code = program.compile("cubin") - ptx_kernel = ptx_object_code.get_kernel("my_kernel") - cubin_kernel = cubin_object_code.get_kernel("my_kernel") - assert isinstance(ptx_object_code, ObjectCode) assert isinstance(cubin_object_code, ObjectCode) - assert isinstance(ptx_kernel, Kernel) + assert cubin_object_code.name == "24" + cubin_kernel = cubin_object_code.get_kernel("my_kernel") assert isinstance(cubin_kernel, Kernel) From f72b1c02e9bbdc565ed347b3b5b76f58d26829ab Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Thu, 5 Jun 2025 23:23:15 +0000 Subject: [PATCH 2/4] fix linter --- cuda_core/cuda/core/experimental/_module.py | 34 ++++++++------------- 1 file changed, 13 insertions(+), 21 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py index 45789802a..8bfc74e4a 100644 --- a/cuda_core/cuda/core/experimental/_module.py +++ b/cuda_core/cuda/core/experimental/_module.py @@ -459,9 +459,7 @@ def __new__(self, *args, **kwargs): ) @classmethod - def _init(cls, module, code_type, *, - name: str = "", - symbol_mapping: Optional[dict] = None): + def _init(cls, module, code_type, *, name: str = "", symbol_mapping: Optional[dict] = None): self = super().__new__(cls) assert code_type in self._supported_code_type, f"{code_type=} is not supported" _lazy_init() @@ -488,9 +486,7 @@ def __reduce__(self): return ObjectCode._reduce_helper, (self._module, self._code_type, self._name, self._sym_map) @staticmethod - def from_cubin(module: Union[bytes, str], *, - name: str = "", - symbol_mapping: Optional[dict] = None) -> "ObjectCode": + def from_cubin(module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None) -> "ObjectCode": """Create an :class:`ObjectCode` instance from an existing cubin. Parameters @@ -508,9 +504,7 @@ def from_cubin(module: Union[bytes, str], *, return ObjectCode._init(module, "cubin", name=name, symbol_mapping=symbol_mapping) @staticmethod - def from_ptx(module: Union[bytes, str], *, - name: str = "", - symbol_mapping: Optional[dict] = None) -> "ObjectCode": + def from_ptx(module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None) -> "ObjectCode": """Create an :class:`ObjectCode` instance from an existing PTX. Parameters @@ -528,9 +522,7 @@ def from_ptx(module: Union[bytes, str], *, return ObjectCode._init(module, "ptx", name=name, symbol_mapping=symbol_mapping) @staticmethod - def from_ltoir(module: Union[bytes, str], *, - name: str = "", - symbol_mapping: Optional[dict] = None) -> "ObjectCode": + def from_ltoir(module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None) -> "ObjectCode": """Create an :class:`ObjectCode` instance from an existing LTOIR. Parameters @@ -548,9 +540,9 @@ def from_ltoir(module: Union[bytes, str], *, return ObjectCode._init(module, "ltoir", name=name, symbol_mapping=symbol_mapping) @staticmethod - def from_fatbin(module: Union[bytes, str], *, - name: str = "", - symbol_mapping: Optional[dict] = None) -> "ObjectCode": + def from_fatbin( + module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None + ) -> "ObjectCode": """Create an :class:`ObjectCode` instance from an existing fatbin. Parameters @@ -568,9 +560,9 @@ def from_fatbin(module: Union[bytes, str], *, return ObjectCode._init(module, "fatbin", name=name, symbol_mapping=symbol_mapping) @staticmethod - def from_object(module: Union[bytes, str], *, - name: str = "", - symbol_mapping: Optional[dict] = None) -> "ObjectCode": + def from_object( + module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None + ) -> "ObjectCode": """Create an :class:`ObjectCode` instance from an existing object code. Parameters @@ -588,9 +580,9 @@ def from_object(module: Union[bytes, str], *, return ObjectCode._init(module, "object", name=name, symbol_mapping=symbol_mapping) @staticmethod - def from_library(module: Union[bytes, str], *, - name: str = "", - symbol_mapping: Optional[dict] = None) -> "ObjectCode": + def from_library( + module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None + ) -> "ObjectCode": """Create an :class:`ObjectCode` instance from an existing library. Parameters From be635661e59998b8012c8147b01e054e2c7960ce Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Fri, 6 Jun 2025 01:55:43 +0000 Subject: [PATCH 3/4] update cuLink default name --- cuda_core/tests/test_linker.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cuda_core/tests/test_linker.py b/cuda_core/tests/test_linker.py index d1895921b..d616faaf3 100644 --- a/cuda_core/tests/test_linker.py +++ b/cuda_core/tests/test_linker.py @@ -148,7 +148,7 @@ def test_linker_link_invalid_target_type(compile_ptx_functions): # this test causes an API error when using the culink API @skipif_testing_with_compute_sanitizer def test_linker_get_error_log(compile_ptx_functions): - options = LinkerOptions(arch=ARCH) + options = LinkerOptions(name="ABC", arch=ARCH) replacement_kernel = """ extern __device__ int Z(); @@ -165,7 +165,7 @@ def test_linker_get_error_log(compile_ptx_functions): assert isinstance(log, str) # TODO when 4902246 is addressed, we can update this to cover nvjitlink as well if is_culink_backend: - assert log.rstrip("\x00") == "error : Undefined reference to '_Z1Zv' in 'None_ptx'" + assert log.rstrip("\x00") == "error : Undefined reference to '_Z1Zv' in 'ABC'" def test_linker_get_info_log(compile_ptx_functions): From c8572b2aa4054f45cc97a06c960b99fdfb80d47b Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Fri, 6 Jun 2025 02:39:45 +0000 Subject: [PATCH 4/4] ensure we look at the right object --- cuda_core/tests/test_linker.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/cuda_core/tests/test_linker.py b/cuda_core/tests/test_linker.py index d616faaf3..9b96d6847 100644 --- a/cuda_core/tests/test_linker.py +++ b/cuda_core/tests/test_linker.py @@ -155,7 +155,9 @@ def test_linker_get_error_log(compile_ptx_functions): extern __device__ int C(int a, int b); __global__ void A() { int result = C(Z(), 1);} """ - dummy_program = Program(replacement_kernel, "c++", ProgramOptions(relocatable_device_code=True)).compile("ptx") + dummy_program = Program( + replacement_kernel, "c++", ProgramOptions(name="CBA", relocatable_device_code=True) + ).compile("ptx") linker = Linker(dummy_program, *(compile_ptx_functions[1:]), options=options) try: linker.link("cubin") @@ -164,8 +166,9 @@ def test_linker_get_error_log(compile_ptx_functions): log = linker.get_error_log() assert isinstance(log, str) # TODO when 4902246 is addressed, we can update this to cover nvjitlink as well + # The error is coming from the input object that's being linked (CBA), not the output object (ABC). if is_culink_backend: - assert log.rstrip("\x00") == "error : Undefined reference to '_Z1Zv' in 'ABC'" + assert log.rstrip("\x00") == "error : Undefined reference to '_Z1Zv' in 'CBA'" def test_linker_get_info_log(compile_ptx_functions):