Skip to content

Commit 02e9ddf

Browse files
committed
Updated arm kernel type signature to match caller
1 parent 5c945be commit 02e9ddf

File tree

2 files changed

+11
-11
lines changed

2 files changed

+11
-11
lines changed

python/tvm/topi/arm_cpu/mprofile/dsp/micro_kernel/max_pool.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def _body():
4646
ib = tvm.tir.ir_builder.create()
4747
ib.emit(
4848
tvm.tir.call_extern(
49-
cc.dtype,
49+
"int32",
5050
f"{func_prefix}_{uniq_id}",
5151
aa.access_ptr("r"),
5252
cc.access_ptr("w"),
@@ -59,7 +59,7 @@ def _reduce_reset():
5959
ib = tvm.tir.ir_builder.create()
6060
ib.emit(
6161
tvm.tir.call_extern(
62-
cc.dtype, f"{func_prefix}_reset_{uniq_id}", cc.access_ptr("w"), cc.strides[0]
62+
"int32", f"{func_prefix}_reset_{uniq_id}", cc.access_ptr("w"), cc.strides[0]
6363
)
6464
)
6565
return ib.get()
@@ -96,7 +96,7 @@ def max_impl(uniq_id):
9696
#endif
9797
__attribute__((always_inline)) static inline int32_t max8_reset_{uniq_id}(
9898
int8_t *res,
99-
int N) {{
99+
int32_t N) {{
100100
memset(res, (int8_t)-128, N * sizeof(*res));
101101
return 0;
102102
}}
@@ -107,7 +107,7 @@ def max_impl(uniq_id):
107107
__attribute__((always_inline)) static inline int32_t max8_loop_{uniq_id}(
108108
int8_t *arg,
109109
int8_t *res,
110-
int N) {{
110+
int32_t N) {{
111111
for ( int i = 0; i < N; ++ i )
112112
if ( arg[i] > res[i] )
113113
res[i] = arg[i];
@@ -120,7 +120,7 @@ def max_impl(uniq_id):
120120
__attribute__((always_inline)) static inline int32_t max8_{uniq_id}(
121121
int8_t *arg,
122122
int8_t *res,
123-
int N) {{
123+
int32_t N) {{
124124
int32_t *parg32, *pres32;
125125
int una_arg = (int32_t)arg & 0x3, una_res = (int32_t)res & 0x3;
126126
int32_t retcode = 0;

python/tvm/topi/arm_cpu/mprofile/dsp/micro_kernel/tensordot.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -155,14 +155,14 @@ def _load_tensor_vars(halfwords, tensor_w) -> Iterator[str]:
155155
var_name = f"{_get_int16_alias(halfwords[i])}__{_get_int16_alias(halfwords[i+1])}"
156156
y, x = halfwords[i + 1] or halfwords[i]
157157
tensor_index = (y * tensor_w + x + offset) // 2
158-
yield f"int32_t tensor__{var_name} = tensor[{tensor_index}];"
158+
yield f"int32_t tensor__{var_name} = ((int32_t*)tensor)[{tensor_index}];"
159159

160160

161161
def _load_kernel_vars(halfwords) -> Iterator[str]:
162162
assert len(halfwords) % 2 == 0
163163
for i in range(0, len(halfwords), 2):
164164
var_name = f"{_get_int16_alias(halfwords[i])}__{_get_int16_alias(halfwords[i+1])}"
165-
yield f"int32_t kernel__{var_name} = kernel[{i // 2}];"
165+
yield f"int32_t kernel__{var_name} = ((int32_t*)kernel)[{i // 2}];"
166166

167167

168168
def _get_draft_macs(
@@ -280,7 +280,7 @@ def _write_sums_to_memory(num_outputs, offset, stride) -> Iterator[str]:
280280

281281
if stride > 1:
282282
for i in range(num_outputs):
283-
yield f"((int16_t*) output)[{i * stride + offset}] = (int16_t) requant_{i};"
283+
yield f"output[{i * stride + offset}] = (int16_t) requant_{i};"
284284

285285
else:
286286
num_packed = (num_outputs - offset) // 2
@@ -296,13 +296,13 @@ def _write_sums_to_memory(num_outputs, offset, stride) -> Iterator[str]:
296296
)
297297

298298
if offset == 1:
299-
yield "((int16_t*) output)[1] = (int16_t) requant_0;"
299+
yield "output[1] = (int16_t) requant_0;"
300300

301301
for i in range(num_packed):
302302
yield f"output[{offset + i}] = packed_res_{i};"
303303

304304
if (offset + num_outputs) % 2 == 1:
305-
yield f"((int16_t*) output)[{num_packed * 2}] = (int16_t) requant_{num_packed * 2};"
305+
yield f"output[{num_packed * 2}] = (int16_t) requant_{num_packed * 2};"
306306

307307

308308
def tensordot_int16_impl(
@@ -390,7 +390,7 @@ def insert_lines(lines):
390390
#define {function_name.upper()}_EXISTS
391391
#include <arm_acle.h>
392392
__attribute__((always_inline)) static inline int32_t {function_name}(
393-
int32_t *output, int32_t *tensor, int32_t *kernel, int32_t *bias, int32_t *scale
393+
int16_t *output, int16_t *tensor, int16_t *kernel, int32_t *bias, int32_t *scale
394394
) {{
395395
{_init_biased_accumulators(num_outputs)}
396396

0 commit comments

Comments
 (0)