Skip to content

Commit 9a51f40

Browse files
committed
[SOL] Add SBF compiler-rt builtins
1 parent e535821 commit 9a51f40

File tree

1 file changed

+195
-0
lines changed

1 file changed

+195
-0
lines changed
Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
//===-- udivmodti4.c - Implement __udivmodti4 -----------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file implements __udivmodti4 for the compiler_rt library.
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#include "../int_lib.h"
14+
15+
#ifdef CRT_HAS_128BIT
16+
17+
// Effects: if rem != 0, *rem = a % b
18+
// Returns: a / b
19+
20+
// Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide
21+
22+
COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int *rem) {
23+
const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
24+
const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT;
25+
utwords n;
26+
n.all = a;
27+
utwords d;
28+
d.all = b;
29+
utwords q;
30+
utwords r;
31+
unsigned sr;
32+
// special cases, X is unknown, K != 0
33+
if (n.s.high == 0) {
34+
if (d.s.high == 0) {
35+
// 0 X
36+
// ---
37+
// 0 X
38+
if (rem)
39+
*rem = n.s.low % d.s.low;
40+
return n.s.low / d.s.low;
41+
}
42+
// 0 X
43+
// ---
44+
// K X
45+
if (rem)
46+
*rem = n.s.low;
47+
return 0;
48+
}
49+
// n.s.high != 0
50+
if (d.s.low == 0) {
51+
if (d.s.high == 0) {
52+
// K X
53+
// ---
54+
// 0 0
55+
if (rem)
56+
*rem = n.s.high % d.s.low;
57+
return n.s.high / d.s.low;
58+
}
59+
// d.s.high != 0
60+
if (n.s.low == 0) {
61+
// K 0
62+
// ---
63+
// K 0
64+
if (rem) {
65+
r.s.high = n.s.high % d.s.high;
66+
r.s.low = 0;
67+
*rem = r.all;
68+
}
69+
return n.s.high / d.s.high;
70+
}
71+
// K K
72+
// ---
73+
// K 0
74+
if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */ {
75+
if (rem) {
76+
r.s.low = n.s.low;
77+
r.s.high = n.s.high & (d.s.high - 1);
78+
*rem = r.all;
79+
}
80+
return n.s.high >> __builtin_ctzll(d.s.high);
81+
}
82+
// K K
83+
// ---
84+
// K 0
85+
sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high);
86+
// 0 <= sr <= n_udword_bits - 2 or sr large
87+
if (sr > n_udword_bits - 2) {
88+
if (rem)
89+
*rem = n.all;
90+
return 0;
91+
}
92+
++sr;
93+
// 1 <= sr <= n_udword_bits - 1
94+
// q.all = n.all << (n_utword_bits - sr);
95+
q.s.low = 0;
96+
q.s.high = n.s.low << (n_udword_bits - sr);
97+
// r.all = n.all >> sr;
98+
r.s.high = n.s.high >> sr;
99+
r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
100+
} else /* d.s.low != 0 */ {
101+
if (d.s.high == 0) {
102+
// K X
103+
// ---
104+
// 0 K
105+
if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */ {
106+
if (rem)
107+
*rem = n.s.low & (d.s.low - 1);
108+
if (d.s.low == 1)
109+
return n.all;
110+
sr = __builtin_ctzll(d.s.low);
111+
q.s.high = n.s.high >> sr;
112+
q.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
113+
return q.all;
114+
}
115+
// K X
116+
// ---
117+
// 0 K
118+
sr = 1 + n_udword_bits + __builtin_clzll(d.s.low) -
119+
__builtin_clzll(n.s.high);
120+
// 2 <= sr <= n_utword_bits - 1
121+
// q.all = n.all << (n_utword_bits - sr);
122+
// r.all = n.all >> sr;
123+
if (sr == n_udword_bits) {
124+
q.s.low = 0;
125+
q.s.high = n.s.low;
126+
r.s.high = 0;
127+
r.s.low = n.s.high;
128+
} else if (sr < n_udword_bits) /* 2 <= sr <= n_udword_bits - 1 */ {
129+
q.s.low = 0;
130+
q.s.high = n.s.low << (n_udword_bits - sr);
131+
r.s.high = n.s.high >> sr;
132+
r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
133+
} else /* n_udword_bits + 1 <= sr <= n_utword_bits - 1 */ {
134+
q.s.low = n.s.low << (n_utword_bits - sr);
135+
q.s.high = (n.s.high << (n_utword_bits - sr)) |
136+
(n.s.low >> (sr - n_udword_bits));
137+
r.s.high = 0;
138+
r.s.low = n.s.high >> (sr - n_udword_bits);
139+
}
140+
} else {
141+
// K X
142+
// ---
143+
// K K
144+
sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high);
145+
// 0 <= sr <= n_udword_bits - 1 or sr large
146+
if (sr > n_udword_bits - 1) {
147+
if (rem)
148+
*rem = n.all;
149+
return 0;
150+
}
151+
++sr;
152+
// 1 <= sr <= n_udword_bits
153+
// q.all = n.all << (n_utword_bits - sr);
154+
// r.all = n.all >> sr;
155+
q.s.low = 0;
156+
if (sr == n_udword_bits) {
157+
q.s.high = n.s.low;
158+
r.s.high = 0;
159+
r.s.low = n.s.high;
160+
} else {
161+
r.s.high = n.s.high >> sr;
162+
r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
163+
q.s.high = n.s.low << (n_udword_bits - sr);
164+
}
165+
}
166+
}
167+
// Not a special case
168+
// q and r are initialized with:
169+
// q.all = n.all << (n_utword_bits - sr);
170+
// r.all = n.all >> sr;
171+
// 1 <= sr <= n_utword_bits - 1
172+
su_int carry = 0;
173+
for (; sr > 0; --sr) {
174+
// r:q = ((r:q) << 1) | carry
175+
r.s.high = (r.s.high << 1) | (r.s.low >> (n_udword_bits - 1));
176+
r.s.low = (r.s.low << 1) | (q.s.high >> (n_udword_bits - 1));
177+
q.s.high = (q.s.high << 1) | (q.s.low >> (n_udword_bits - 1));
178+
q.s.low = (q.s.low << 1) | carry;
179+
// carry = 0;
180+
// if (r.all >= d.all)
181+
// {
182+
// r.all -= d.all;
183+
// carry = 1;
184+
// }
185+
const ti_int s = (ti_int)(d.all - r.all - 1) >> (n_utword_bits - 1);
186+
carry = s & 1;
187+
r.all -= d.all & s;
188+
}
189+
q.all = (q.all << 1) | carry;
190+
if (rem)
191+
*rem = r.all;
192+
return q.all;
193+
}
194+
195+
#endif // CRT_HAS_128BIT

0 commit comments

Comments
 (0)