Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,16 @@ bool WebAssemblyDAGToDAGISel::SelectAddrAddOperands(MVT OffsetType, SDValue N,
Addr = OtherOp;
return true;
}

// Fold Add of Global Address straight into load
if (Op.getOpcode() == WebAssemblyISD::Wrapper)
Op = Op.getOperand(0);

if (Op.getOpcode() == ISD::TargetGlobalAddress) {
Addr = OtherOp;
Offset = Op;
return true;
}
}
return false;
}
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/WebAssembly/address-offsets.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ define i32 @load_test0() {
; CHECK-LABEL: load_test0:
; CHECK: .functype load_test0 () -> (i32)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: global.get $push0=, g@GOT
; CHECK-NEXT: i32.load $push1=, 40($pop0)
; CHECK-NEXT: i32.const $push0=, 40
; CHECK-NEXT: i32.load $push1=, g@GOT($pop0)
; CHECK-NEXT: return $pop1
%t = load i32, ptr getelementptr inbounds ([0 x i32], ptr @g, i32 0, i32 10), align 4
ret i32 %t
Expand Down Expand Up @@ -395,8 +395,8 @@ define void @store_test0(i32 %i) {
; CHECK-LABEL: store_test0:
; CHECK: .functype store_test0 (i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: global.get $push0=, g@GOT
; CHECK-NEXT: i32.store 40($pop0), $0
; CHECK-NEXT: i32.const $push0=, 40
; CHECK-NEXT: i32.store g@GOT($pop0), $0
; CHECK-NEXT: return
store i32 %i, ptr getelementptr inbounds ([0 x i32], ptr @g, i32 0, i32 10), align 4
ret void
Expand Down
19 changes: 9 additions & 10 deletions llvm/test/CodeGen/WebAssembly/eh-lsda.ll
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
; RUN: llc < %s --mtriple=wasm32-unknown-unknown -wasm-disable-explicit-locals -wasm-keep-registers -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling | FileCheck %s -check-prefixes=CHECK,NOPIC -DPTR=32
; RUN: llc < %s --mtriple=wasm64-unknown-unknown -wasm-disable-explicit-locals -wasm-keep-registers -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling | FileCheck %s -check-prefixes=CHECK,NOPIC -DPTR=64
; RUN: llc < %s --mtriple=wasm32-unknown-emscripten -wasm-disable-explicit-locals -wasm-keep-registers -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling | FileCheck %s -check-prefixes=CHECK,NOPIC -DPTR=32
; RUN: llc < %s --mtriple=wasm64-unknown-emscripten -wasm-disable-explicit-locals -wasm-keep-registers -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling | FileCheck %s -check-prefixes=CHECK,NOPIC -DPTR=64
; RUN: llc < %s --mtriple=wasm32-unknown-emscripten -wasm-disable-explicit-locals -wasm-keep-registers -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling -relocation-model=pic | FileCheck %s -check-prefixes=CHECK,PIC -DPTR=32
; RUN: llc < %s --mtriple=wasm64-unknown-emscripten -wasm-disable-explicit-locals -wasm-keep-registers -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling -relocation-model=pic | FileCheck %s -check-prefixes=CHECK,PIC -DPTR=64
; RUN: llc < %s --mtriple=wasm32-unknown-unknown -wasm-disable-explicit-locals -wasm-keep-registers -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling | FileCheck %s -check-prefixes=CHECK,NOPIC -DPTR=32 -DALIGNMENT=4
; RUN: llc < %s --mtriple=wasm64-unknown-unknown -wasm-disable-explicit-locals -wasm-keep-registers -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling | FileCheck %s -check-prefixes=CHECK,NOPIC -DPTR=64 -DALIGNMENT=8
; RUN: llc < %s --mtriple=wasm32-unknown-emscripten -wasm-disable-explicit-locals -wasm-keep-registers -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling | FileCheck %s -check-prefixes=CHECK,NOPIC -DPTR=32 -DALIGNMENT=4
; RUN: llc < %s --mtriple=wasm64-unknown-emscripten -wasm-disable-explicit-locals -wasm-keep-registers -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling | FileCheck %s -check-prefixes=CHECK,NOPIC -DPTR=64 -DALIGNMENT=8
; RUN: llc < %s --mtriple=wasm32-unknown-emscripten -wasm-disable-explicit-locals -wasm-keep-registers -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling -relocation-model=pic | FileCheck %s -check-prefixes=CHECK,PIC -DPTR=32 -DALIGNMENT=4
; RUN: llc < %s --mtriple=wasm64-unknown-emscripten -wasm-disable-explicit-locals -wasm-keep-registers -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling -relocation-model=pic | FileCheck %s -check-prefixes=CHECK,PIC -DPTR=64 -DALIGNMENT=8

@_ZTIi = external constant ptr
@_ZTIf = external constant ptr
Expand Down Expand Up @@ -66,18 +66,17 @@ try.cont: ; preds = %entry, %catch.start

; CHECK-LABEL: test1:
; In static linking, we load GCC_except_table as a constant directly.
; NOPIC: i[[PTR]].const $push[[CONTEXT:.*]]=, __wasm_lpad_context
; NOPIC: i[[PTR]].const $push[[CONTEXT:.*]]=, [[ALIGNMENT]]
; NOPIC-NEXT: i[[PTR]].const $push[[EXCEPT_TABLE:.*]]=, GCC_except_table1
; NOPIC-NEXT: i[[PTR]].store {{[48]}}($pop[[CONTEXT]]), $pop[[EXCEPT_TABLE]]
; NOPIC-NEXT: i[[PTR]].store __wasm_lpad_context($pop[[CONTEXT]]), $pop[[EXCEPT_TABLE]]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's interesting that this transformation eagerly swaps these 2 constants, even though it's not actually an optimization. I'm a little bit ambivalent about this because the original actually looks more natural to me (i.e. the operand contains the actual address, and the offset field is actually an offset). OTOH adding more complexity to the transform to prevent this doesn't seem great either, considering that it's not really any worse.

It does suggest though that we might be able to do a further optimization of actually combining these constants in the compiler? In an isolated case like this we couldn't actually remove the i32.const entirely (since there still needs to be an operand), and the constant offset field is always there, so it would potentially be of no benefit. But if we combine them into the offset and leave behind an i32.const 0, it's plausible that some subsequent optimization could remove it where appropriate. And this is a point where we have the information that the add is nuw so it's legal to do, whereas some subsequent pass (or post-link optimizer like Binaryen) might not have it. So it still seems worthwhile.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suppose if we want to be really galaxy-brained, it's possible to have a situation where splitting an LEB-encoded value into 2 can actually reduce the overall size because of the way the encoding works; e.g. if one value is 128 and the other is 0, it takes 3 bytes to encode the first and one for the second, but if we split them into 2 64s, they would encode in 1 byte each!

... still, that seems like a bit of a stretch and maybe not as good as just leaving the 0 in the const field.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh wait... actually we can't do this when one of the operands is relocatable anyway. So it doesn't really apply for this PR. And for cases where they are just regular constants, it seems likely they'd have already been combined earlier in the compiler pipeline anyway. So, yeah there's probably nothing there, other than my original observation about eagerly swapping the operands when there's no actual add instruction to eliminate.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's interesting that this transformation eagerly swaps these 2 constants, even though it's not actually an optimization. I'm a little bit ambivalent about this because the original actually looks more natural to me

FWIW I had the same thought too. I guess we could just swap around the offset and addrs and see if that maintains the original order in this test? But it's probably dependent on the order of the operands in the add node.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i tried swapping Addr and Offset in WebAssemblyISelDAGToDAG and it seems it generates bad machine code on several test case, for example: cfg-stackify-eh.ll

*** Bad machine code: Expected a register operand. ***
- function:    two_catches
- basic block: %bb.1 catch.start (0x149811920)
- instruction: STORE_I32_A32 2, killed %3:i32, @__wasm_lpad_context, killed %2:i32, implicit-def dead $arguments :: (store (s32) into `ptr getelementptr inbounds ({ i32, ptr, i32 }, ptr @__wasm_lpad_context, i32 0, i32 1)`)
- operand 2:   @__wasm_lpad_context

*** Bad machine code: Expected a register operand. ***
- function:    two_catches
- basic block: %bb.1 catch.start (0x149811920)
- instruction: %1:i32 = LOAD_I32_A32 2, killed %6:i32, @__wasm_lpad_context, implicit-def dead $arguments :: (dereferenceable load (s32) from `ptr getelementptr inbounds ({ i32, ptr, i32 }, ptr @__wasm_lpad_context, i32 0, i32 2)`)
- operand 3:   @__wasm_lpad_context
LLVM ERROR: Found 2 machine code errors.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i'll push the changes to current nits (excluding swapping) but lmk if we'd like to pursue this avenue


; In case of PIC, we make GCC_except_table symbols a relative on based on
; __memory_base.
; PIC: global.get $push[[CONTEXT:.*]]=, __wasm_lpad_context@GOT
; PIC-NEXT: local.tee $push{{.*}}=, $[[CONTEXT_LOCAL:.*]]=, $pop[[CONTEXT]]
; PIC: global.get $push[[MEMORY_BASE:.*]]=, __memory_base
; PIC-NEXT: i[[PTR]].const $push[[EXCEPT_TABLE_REL:.*]]=, GCC_except_table1@MBREL
; PIC-NEXT: i[[PTR]].add $push[[EXCEPT_TABLE:.*]]=, $pop[[MEMORY_BASE]], $pop[[EXCEPT_TABLE_REL]]
; PIC-NEXT: i[[PTR]].store {{[48]}}($[[CONTEXT_LOCAL]]), $pop[[EXCEPT_TABLE]]
; PIC-NEXT: i[[PTR]].store __wasm_lpad_context@GOT(${{.*}}), $pop[[EXCEPT_TABLE]]

; CHECK: .section .rodata.gcc_except_table,"",@
; CHECK-NEXT: .p2align 2
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/WebAssembly/exception-legacy.ll
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ define void @throw(ptr %p) {
; CHECK: call foo
; CHECK: catch $[[EXN:[0-9]+]]=, __cpp_exception
; CHECK: global.set __stack_pointer
; CHECK: i32.{{store|const}} {{.*}} __wasm_lpad_context
; CHECK: i32.{{store|const}} {{.*}} 4
; CHECK: call $drop=, _Unwind_CallPersonality, $[[EXN]]
; CHECK: block
; CHECK: br_if 0
Expand Down
25 changes: 25 additions & 0 deletions llvm/test/CodeGen/WebAssembly/offset.ll
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,31 @@ define i32 @load_i32_with_folded_gep_offset_nuw(ptr %p) {
ret i32 %t
}

@global_data = hidden local_unnamed_addr global [12 x i8] c"Hello world\00", align 1

define hidden signext i8 @global_load_i32_with_folded_gep_offset_nonconst_nuw(i32 noundef %idx) local_unnamed_addr {
; CHECK-LABEL: global_load_i32_with_folded_gep_offset_nonconst_nuw:
; CHECK: .functype global_load_i32_with_folded_gep_offset_nonconst_nuw (i32) -> (i32)
; CHECK: i32.load8_s $push0=, global_data($0)
; CHECK: return $pop0
entry:
%arrayidx = getelementptr inbounds nuw [12 x i8], ptr @global_data, i32 0, i32 %idx
%0 = load i8, ptr %arrayidx, align 1
ret i8 %0
}

define hidden signext i8 @global_load_i32_with_folded_gep_offset_const_nuw() local_unnamed_addr {
; CHECK-LABEL: global_load_i32_with_folded_gep_offset_const_nuw:
; CHECK: .functype global_load_i32_with_folded_gep_offset_const_nuw () -> (i32)
; CHECK: i32.const $push0=, 0
; CHECK: i32.load8_s $push1=, global_data+2($pop0)
; CHECK: return $pop1
entry:
%0 = load i8, ptr getelementptr inbounds nuw (i8, ptr @global_data, i32 2), align 1
ret i8 %0
}


; We can't fold a negative offset though, even with an inbounds gep.

; CHECK-LABEL: load_i32_with_unfolded_gep_negative_offset:
Expand Down