From 18ba714576e2e556e0ab8bfacfa3f5009fcbf003 Mon Sep 17 00:00:00 2001 From: luojia65 Date: Mon, 21 Mar 2022 15:34:18 +0800 Subject: [PATCH 1/4] Modify Zks functions to use LLVM intrinsics Add target_feature_11 to feature list Use LLVM intrinsics LLVM intrinsic type differ from function type; transmute if necessary. --- crates/core_arch/src/lib.rs | 2 + crates/core_arch/src/riscv_shared/mod.rs | 81 +++++++++++------------- 2 files changed, 39 insertions(+), 44 deletions(-) diff --git a/crates/core_arch/src/lib.rs b/crates/core_arch/src/lib.rs index c73e309e72..7076be6592 100644 --- a/crates/core_arch/src/lib.rs +++ b/crates/core_arch/src/lib.rs @@ -17,8 +17,10 @@ stdsimd, staged_api, doc_cfg, + target_feature_11, tbm_target_feature, sse4a_target_feature, + riscv_target_feature, arm_target_feature, aarch64_target_feature, cmpxchg16b_target_feature, diff --git a/crates/core_arch/src/riscv_shared/mod.rs b/crates/core_arch/src/riscv_shared/mod.rs index 347735df1d..86a83a9ab3 100644 --- a/crates/core_arch/src/riscv_shared/mod.rs +++ b/crates/core_arch/src/riscv_shared/mod.rs @@ -602,13 +602,12 @@ pub unsafe fn hinval_gvma_all() { /// According to RISC-V Cryptography Extensions, Volume I, the execution latency of /// this instruction must always be independent from the data it operates on. #[inline] +#[target_feature(enable = "zksh")] pub fn sm3p0(x: u32) -> u32 { - let ans: u32; unsafe { - // asm!("sm3p0 {}, {}", out(reg) ans, in(reg) x, options(nomem, nostack)) - asm!(".insn i 0x13, 0x1, {}, {}, 0x108", out(reg) ans, in(reg) x, options(nomem, nostack)) - }; - ans + core::mem::transmute::<_, usize>(sm3p0_isize(core::mem::transmute::<_, i32>(x) as isize)) + as u32 + } } /// `P1` transformation function as is used in the SM3 hash algorithm @@ -634,13 +633,12 @@ pub fn sm3p0(x: u32) -> u32 { /// According to RISC-V Cryptography Extensions, Volume I, the execution latency of /// this instruction must always be independent from the data it operates on. #[inline] +#[target_feature(enable = "zksh")] pub fn sm3p1(x: u32) -> u32 { - let ans: u32; unsafe { - // asm!("sm3p1 {}, {}", out(reg) ans, in(reg) x, options(nomem, nostack)) - asm!(".insn i 0x13, 0x1, {}, {}, 0x109", out(reg) ans, in(reg) x, options(nomem, nostack)) - }; - ans + core::mem::transmute::<_, usize>(sm3p1_isize(core::mem::transmute::<_, i32>(x) as isize)) + as u32 + } } /// Accelerates the round function `F` in the SM4 block cipher algorithm @@ -684,25 +682,17 @@ pub fn sm3p1(x: u32) -> u32 { /// /// According to RISC-V Cryptography Extensions, Volume I, the execution latency of /// this instruction must always be independent from the data it operates on. +#[inline] +#[target_feature(enable = "zksed")] pub fn sm4ed(x: u32, a: u32) -> u32 { static_assert!(BS: u8 where BS <= 3); - let ans: u32; - match BS { - 0 => unsafe { - asm!(".insn r 0x33, 0, 0x18, {}, {}, {}", out(reg) ans, in(reg) x, in(reg) a, options(nomem, nostack)) - }, - 1 => unsafe { - asm!(".insn r 0x33, 0, 0x38, {}, {}, {}", out(reg) ans, in(reg) x, in(reg) a, options(nomem, nostack)) - }, - 2 => unsafe { - asm!(".insn r 0x33, 0, 0x58, {}, {}, {}", out(reg) ans, in(reg) x, in(reg) a, options(nomem, nostack)) - }, - 3 => unsafe { - asm!(".insn r 0x33, 0, 0x78, {}, {}, {}", out(reg) ans, in(reg) x, in(reg) a, options(nomem, nostack)) - }, - _ => unreachable!(), - }; - ans + unsafe { + core::mem::transmute::<_, usize>(sm4ed_isize( + core::mem::transmute::<_, i32>(x) as isize, + core::mem::transmute::<_, i32>(a) as isize, + BS as i8, + )) as u32 + } } /// Accelerates the key schedule operation in the SM4 block cipher algorithm @@ -749,23 +739,26 @@ pub fn sm4ed(x: u32, a: u32) -> u32 { /// /// According to RISC-V Cryptography Extensions, Volume I, the execution latency of /// this instruction must always be independent from the data it operates on. +#[inline] +#[target_feature(enable = "zksed")] pub fn sm4ks(x: u32, k: u32) -> u32 { static_assert!(BS: u8 where BS <= 3); - let ans: u32; - match BS { - 0 => unsafe { - asm!(".insn r 0x33, 0, 0x1A, {}, {}, {}", out(reg) ans, in(reg) x, in(reg) k, options(nomem, nostack)) - }, - 1 => unsafe { - asm!(".insn r 0x33, 0, 0x3A, {}, {}, {}", out(reg) ans, in(reg) x, in(reg) k, options(nomem, nostack)) - }, - 2 => unsafe { - asm!(".insn r 0x33, 0, 0x5A, {}, {}, {}", out(reg) ans, in(reg) x, in(reg) k, options(nomem, nostack)) - }, - 3 => unsafe { - asm!(".insn r 0x33, 0, 0x7A, {}, {}, {}", out(reg) ans, in(reg) x, in(reg) k, options(nomem, nostack)) - }, - _ => unreachable!(), - }; - ans + unsafe { + core::mem::transmute::<_, usize>(sm4ks_isize( + core::mem::transmute::<_, i32>(x) as isize, + core::mem::transmute::<_, i32>(k) as isize, + BS as i8, + )) as u32 + } +} + +extern "unadjusted" { + #[link_name = "llvm.riscv.sm3p0"] + fn sm3p0_isize(x: isize) -> isize; + #[link_name = "llvm.riscv.sm3p1"] + fn sm3p1_isize(x: isize) -> isize; + #[link_name = "llvm.riscv.sm4ed"] + fn sm4ed_isize(x: isize, a: isize, bs: i8) -> isize; + #[link_name = "llvm.riscv.sm4ks"] + fn sm4ks_isize(x: isize, a: isize, bs: i8) -> isize; } From 7882885339a053bdb678ad562ea9160e0cd9fdae Mon Sep 17 00:00:00 2001 From: luojia65 Date: Mon, 21 Mar 2022 17:21:20 +0800 Subject: [PATCH 2/4] Remove stable feature gate attributes The feature `aarch64_target_feature` and `adx_target_feature` have been stable since 1.61.0 and no longer requires an attribute to enable --- crates/core_arch/src/lib.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/crates/core_arch/src/lib.rs b/crates/core_arch/src/lib.rs index 7076be6592..7ec8a75960 100644 --- a/crates/core_arch/src/lib.rs +++ b/crates/core_arch/src/lib.rs @@ -22,14 +22,12 @@ sse4a_target_feature, riscv_target_feature, arm_target_feature, - aarch64_target_feature, cmpxchg16b_target_feature, avx512_target_feature, mips_target_feature, powerpc_target_feature, wasm_target_feature, abi_unadjusted, - adx_target_feature, rtm_target_feature, f16c_target_feature, allow_internal_unstable, From 569f549d89455f0749cabe64d6681784562927e2 Mon Sep 17 00:00:00 2001 From: luojia65 Date: Mon, 21 Mar 2022 17:38:19 +0800 Subject: [PATCH 3/4] Small fix --- crates/core_arch/src/riscv_shared/mod.rs | 34 ++++++------------------ 1 file changed, 8 insertions(+), 26 deletions(-) diff --git a/crates/core_arch/src/riscv_shared/mod.rs b/crates/core_arch/src/riscv_shared/mod.rs index 86a83a9ab3..cee825679d 100644 --- a/crates/core_arch/src/riscv_shared/mod.rs +++ b/crates/core_arch/src/riscv_shared/mod.rs @@ -604,10 +604,7 @@ pub unsafe fn hinval_gvma_all() { #[inline] #[target_feature(enable = "zksh")] pub fn sm3p0(x: u32) -> u32 { - unsafe { - core::mem::transmute::<_, usize>(sm3p0_isize(core::mem::transmute::<_, i32>(x) as isize)) - as u32 - } + unsafe { sm3p0_usize(x as usize) as u32 } } /// `P1` transformation function as is used in the SM3 hash algorithm @@ -635,10 +632,7 @@ pub fn sm3p0(x: u32) -> u32 { #[inline] #[target_feature(enable = "zksh")] pub fn sm3p1(x: u32) -> u32 { - unsafe { - core::mem::transmute::<_, usize>(sm3p1_isize(core::mem::transmute::<_, i32>(x) as isize)) - as u32 - } + unsafe { sm3p1_usize(x as usize) as u32 } } /// Accelerates the round function `F` in the SM4 block cipher algorithm @@ -686,13 +680,7 @@ pub fn sm3p1(x: u32) -> u32 { #[target_feature(enable = "zksed")] pub fn sm4ed(x: u32, a: u32) -> u32 { static_assert!(BS: u8 where BS <= 3); - unsafe { - core::mem::transmute::<_, usize>(sm4ed_isize( - core::mem::transmute::<_, i32>(x) as isize, - core::mem::transmute::<_, i32>(a) as isize, - BS as i8, - )) as u32 - } + unsafe { sm4ed_usize(x as usize, a as usize, BS as i8) as u32 } } /// Accelerates the key schedule operation in the SM4 block cipher algorithm @@ -743,22 +731,16 @@ pub fn sm4ed(x: u32, a: u32) -> u32 { #[target_feature(enable = "zksed")] pub fn sm4ks(x: u32, k: u32) -> u32 { static_assert!(BS: u8 where BS <= 3); - unsafe { - core::mem::transmute::<_, usize>(sm4ks_isize( - core::mem::transmute::<_, i32>(x) as isize, - core::mem::transmute::<_, i32>(k) as isize, - BS as i8, - )) as u32 - } + unsafe { sm4ks_usize(x as usize, k as usize, BS as i8) as u32 } } extern "unadjusted" { #[link_name = "llvm.riscv.sm3p0"] - fn sm3p0_isize(x: isize) -> isize; + fn sm3p0_usize(x: usize) -> usize; #[link_name = "llvm.riscv.sm3p1"] - fn sm3p1_isize(x: isize) -> isize; + fn sm3p1_usize(x: usize) -> usize; #[link_name = "llvm.riscv.sm4ed"] - fn sm4ed_isize(x: isize, a: isize, bs: i8) -> isize; + fn sm4ed_usize(x: usize, a: usize, bs: i8) -> usize; #[link_name = "llvm.riscv.sm4ks"] - fn sm4ks_isize(x: isize, a: isize, bs: i8) -> isize; + fn sm4ks_usize(x: usize, k: usize, bs: i8) -> usize; } From 4df0783e4b51c81a7becfcf8b44d8fc5052259f9 Mon Sep 17 00:00:00 2001 From: luojia65 Date: Tue, 22 Mar 2022 13:48:18 +0800 Subject: [PATCH 4/4] Sign-extend Zks intrinsic function input If code after this commit generates more than one instruction, it should be reverted. --- crates/core_arch/src/riscv_shared/mod.rs | 30 +++++++++++++++++------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/crates/core_arch/src/riscv_shared/mod.rs b/crates/core_arch/src/riscv_shared/mod.rs index cee825679d..b287371022 100644 --- a/crates/core_arch/src/riscv_shared/mod.rs +++ b/crates/core_arch/src/riscv_shared/mod.rs @@ -1,6 +1,7 @@ //! Shared RISC-V intrinsics use crate::arch::asm; +use core::mem::transmute; /// Generates the `PAUSE` instruction /// @@ -604,7 +605,8 @@ pub unsafe fn hinval_gvma_all() { #[inline] #[target_feature(enable = "zksh")] pub fn sm3p0(x: u32) -> u32 { - unsafe { sm3p0_usize(x as usize) as u32 } + // sign extend parameter to isize + unsafe { sm3p0_isize(transmute::<_, i32>(x) as isize) as u32 } } /// `P1` transformation function as is used in the SM3 hash algorithm @@ -632,7 +634,7 @@ pub fn sm3p0(x: u32) -> u32 { #[inline] #[target_feature(enable = "zksh")] pub fn sm3p1(x: u32) -> u32 { - unsafe { sm3p1_usize(x as usize) as u32 } + unsafe { sm3p1_isize(transmute::<_, i32>(x) as isize) as u32 } } /// Accelerates the round function `F` in the SM4 block cipher algorithm @@ -680,7 +682,13 @@ pub fn sm3p1(x: u32) -> u32 { #[target_feature(enable = "zksed")] pub fn sm4ed(x: u32, a: u32) -> u32 { static_assert!(BS: u8 where BS <= 3); - unsafe { sm4ed_usize(x as usize, a as usize, BS as i8) as u32 } + unsafe { + sm4ed_isize( + transmute::<_, i32>(x) as isize, + transmute::<_, i32>(a) as isize, + BS as i8, + ) as u32 + } } /// Accelerates the key schedule operation in the SM4 block cipher algorithm @@ -731,16 +739,22 @@ pub fn sm4ed(x: u32, a: u32) -> u32 { #[target_feature(enable = "zksed")] pub fn sm4ks(x: u32, k: u32) -> u32 { static_assert!(BS: u8 where BS <= 3); - unsafe { sm4ks_usize(x as usize, k as usize, BS as i8) as u32 } + unsafe { + sm4ks_isize( + transmute::<_, i32>(x) as isize, + transmute::<_, i32>(k) as isize, + BS as i8, + ) as u32 + } } extern "unadjusted" { #[link_name = "llvm.riscv.sm3p0"] - fn sm3p0_usize(x: usize) -> usize; + fn sm3p0_isize(x: isize) -> isize; #[link_name = "llvm.riscv.sm3p1"] - fn sm3p1_usize(x: usize) -> usize; + fn sm3p1_isize(x: isize) -> isize; #[link_name = "llvm.riscv.sm4ed"] - fn sm4ed_usize(x: usize, a: usize, bs: i8) -> usize; + fn sm4ed_isize(x: isize, a: isize, bs: i8) -> isize; #[link_name = "llvm.riscv.sm4ks"] - fn sm4ks_usize(x: usize, k: usize, bs: i8) -> usize; + fn sm4ks_isize(x: isize, k: isize, bs: i8) -> isize; }