Skip to content

Commit 34fe416

Browse files
committed
Draft of julia initialization refactor. This is mostly focused on making embedding julia as a shared library behave better
1 parent e4101b7 commit 34fe416

File tree

9 files changed

+143
-24
lines changed

9 files changed

+143
-24
lines changed

contrib/juliac.jl

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ outname = nothing
99
file = nothing
1010
add_ccallables = false
1111
verbose = false
12+
output_llvm = nothing
1213

1314
help = findfirst(x->x == "--help", ARGS)
1415
if help !== nothing
@@ -18,6 +19,7 @@ if help !== nothing
1819
--trim=<no,safe,unsafe,unsafe-warn> Only output code statically determined to be reachable
1920
--compile-ccallable Include all methods marked `@ccallable` in output
2021
--verbose Request verbose output
22+
--output-llvm=<opt,unopt> Output LLVM bitcode
2123
""")
2224
exit(0)
2325
end
@@ -42,6 +44,17 @@ let i = 1
4244
global add_ccallables = true
4345
elseif arg == "--verbose"
4446
global verbose = true
47+
elseif startswith(arg, "--output-llvm")
48+
arg = split(arg, '=')
49+
if length(arg) == 1
50+
global output_llvm = "--output-bc"
51+
elseif arg[2] == "opt"
52+
global output_llvm = "--output-bc"
53+
elseif arg[2] == "unopt"
54+
global output_llvm = "--output-unopt-bc"
55+
else
56+
error("Invalid argument to --output-llvm")
57+
end
4558
else
4659
if arg[1] == '-' || !isnothing(file)
4760
println("Unexpected argument `$arg`")
@@ -65,7 +78,6 @@ tmpdir = mktempdir(cleanup=false)
6578
initsrc_path = joinpath(tmpdir, "init.c")
6679
init_path = joinpath(tmpdir, "init.a")
6780
img_path = joinpath(tmpdir, "img.a")
68-
bc_path = joinpath(tmpdir, "img-bc.a")
6981

7082
open(initsrc_path, "w") do io
7183
print(io, """
@@ -80,6 +92,17 @@ open(initsrc_path, "w") do io
8092
end
8193

8294
static_call_graph_arg() = isnothing(trim) ? `` : `--trim=$(trim)`
95+
96+
if !isnothing(output_llvm)
97+
cmd = addenv(`$cmd --project=$(Base.active_project()) $output_llvm $(outname * ".a") --output-incremental=no --strip-ir --strip-metadata $(static_call_graph_arg()) $(joinpath(@__DIR__,"juliac-buildscript.jl")) $absfile $output_type $add_ccallables`, "OPENBLAS_NUM_THREADS" => 1, "JULIA_NUM_THREADS" => 1)
98+
verbose && println("Running: $cmd")
99+
if !success(pipeline(cmd; stdout, stderr))
100+
println(stderr, "\nFailed to compile $file")
101+
exit(1)
102+
end
103+
exit(0)
104+
end
105+
83106
cmd = addenv(`$cmd --project=$(Base.active_project()) --output-o $img_path --output-incremental=no --strip-ir --strip-metadata $(static_call_graph_arg()) $(joinpath(@__DIR__,"juliac-buildscript.jl")) $absfile $output_type $add_ccallables`, "OPENBLAS_NUM_THREADS" => 1, "JULIA_NUM_THREADS" => 1)
84107
verbose && println("Running: $cmd")
85108
if !success(pipeline(cmd; stdout, stderr))
@@ -100,11 +123,11 @@ end
100123
julia_libs = Base.shell_split(Base.isdebugbuild() ? "-ljulia-debug -ljulia-internal-debug" : "-ljulia -ljulia-internal")
101124
try
102125
if output_type == "--output-lib"
103-
run(`cc $(allflags) -o $outname -shared -Wl,$(Base.Linking.WHOLE_ARCHIVE) $img_path -Wl,$(Base.Linking.NO_WHOLE_ARCHIVE) $init_path $(julia_libs)`)
126+
run(`cc $(allflags) -o $outname -shared -Wl,$(Base.Linking.WHOLE_ARCHIVE) $img_path -Wl,$(Base.Linking.NO_WHOLE_ARCHIVE) $(julia_libs)`)
104127
elseif output_type == "--output-sysimage"
105128
run(`cc $(allflags) -o $outname -shared -Wl,$(Base.Linking.WHOLE_ARCHIVE) $img_path -Wl,$(Base.Linking.NO_WHOLE_ARCHIVE) $(julia_libs)`)
106129
else
107-
run(`cc $(allflags) -o $outname -Wl,$(Base.Linking.WHOLE_ARCHIVE) $img_path -Wl,$(Base.Linking.NO_WHOLE_ARCHIVE) $init_path $(julia_libs)`)
130+
run(`cc $(allflags) -o $outname -Wl,$(Base.Linking.WHOLE_ARCHIVE) $img_path -Wl,$(Base.Linking.NO_WHOLE_ARCHIVE) $(julia_libs)`)
108131
end
109132
catch
110133
println("\nCompilation failed.")

src/dlload.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,10 @@ void *jl_find_dynamic_library_by_addr(void *symbol) {
254254
jl_error("could not load base module");
255255
}
256256
handle = dlopen(info.dli_fname, RTLD_NOW | RTLD_NOLOAD | RTLD_LOCAL);
257-
dlclose(handle); // Undo ref count increment from `dlopen`
257+
if (handle == NULL && dlerror() == NULL) // We loaded the executable but got RTLD_DEFAULT back, give a real handle instead
258+
handle = dlopen("", RTLD_NOW | RTLD_NOLOAD | RTLD_LOCAL);
259+
if (handle) // We may get a null handle so don't segfault
260+
dlclose(handle); // Undo ref count increment from `dlopen`
258261
#endif
259262
return handle;
260263
}

src/init.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode) JL_NOTSAFEPOINT_ENTER
238238
jl_task_t *ct = jl_get_current_task();
239239

240240
if (ct == NULL && jl_base_module) {
241-
ct = container_of(jl_adopt_thread(), jl_task_t, gcstack);
241+
ct = container_of(jl_adopt_thread(NULL, NULL), jl_task_t, gcstack);
242242
}
243243
else if (ct != NULL) {
244244
// we are about to start tearing everything down, so lets try not to get
@@ -379,6 +379,8 @@ JL_DLLEXPORT void *jl_libjulia_internal_handle;
379379
JL_DLLEXPORT void *jl_libjulia_handle;
380380
JL_DLLEXPORT void *jl_RTLD_DEFAULT_handle;
381381
JL_DLLEXPORT void *jl_exe_handle;
382+
JL_DLLEXPORT void *jl_base_image_handle; // Used when the sysimage is bundled as an executable or a standalone dylib
383+
382384
#ifdef _OS_WINDOWS_
383385
void *jl_ntdll_handle;
384386
void *jl_kernel32_handle;
@@ -761,7 +763,6 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
761763
rec_backtrace(NULL, 0, 0);
762764
#endif
763765

764-
libsupport_init();
765766
jl_safepoint_init();
766767
jl_page_size = jl_getpagesize();
767768
htable_new(&jl_current_modules, 0);
@@ -843,9 +844,10 @@ static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_
843844
JL_TIMING(JULIA_INIT, JULIA_INIT);
844845
jl_resolve_sysimg_location(rel);
845846
// loads sysimg if available, and conditionally sets jl_options.cpu_target
847+
// If the image is in memory then jl_base_image_handle must be set before calling init
846848
if (rel == JL_IMAGE_IN_MEMORY) {
847-
jl_set_sysimg_so(jl_exe_handle);
848-
jl_options.image_file = jl_options.julia_bin;
849+
jl_set_sysimg_so(jl_base_image_handle);
850+
jl_options.image_file = jl_options.julia_bin; // XXX: Use dladdr on base_image_handle?
849851
}
850852
else if (jl_options.image_file)
851853
jl_preload_sysimg_so(jl_options.image_file);

src/jl_exported_funcs.inc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,7 @@
291291
XX(jl_load_) \
292292
XX(jl_load_and_lookup) \
293293
XX(jl_load_dynamic_library) \
294+
XX(jl_find_dynamic_library_by_addr) \
294295
XX(jl_load_file_string) \
295296
XX(jl_lookup_code_address) \
296297
XX(jl_lseek) \

src/jlapi.c

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,15 @@ extern "C" {
2525
#else
2626
#include <fenv.h>
2727
#endif
28+
_Atomic(int) jl_runtime_is_initialized = 0;
29+
uv_mutex_t initialization_lock;
2830

31+
// Should be called in an attribute constructor function to make initialization threadsafe when embedding as an SO.
32+
__attribute__((constructor)) void jl_preinit_runtime(void)
33+
{
34+
libsupport_init();
35+
uv_mutex_init(&initialization_lock);
36+
}
2937
/**
3038
* @brief Check if Julia is already initialized.
3139
*
@@ -36,7 +44,7 @@ extern "C" {
3644
*/
3745
JL_DLLEXPORT int jl_is_initialized(void)
3846
{
39-
return jl_main_module != NULL;
47+
return jl_atomic_load_acquire(&jl_runtime_is_initialized);
4048
}
4149

4250
/**
@@ -87,7 +95,7 @@ JL_DLLEXPORT void jl_init_with_image(const char *julia_bindir,
8795
{
8896
if (jl_is_initialized())
8997
return;
90-
libsupport_init();
98+
uv_mutex_lock(&initialization_lock);
9199
jl_options.julia_bindir = julia_bindir;
92100
if (image_path != NULL)
93101
jl_options.image_file = image_path;
@@ -1029,7 +1037,7 @@ JL_DLLEXPORT int jl_repl_entrypoint(int argc, char *argv[])
10291037
// No-op on non-windows
10301038
lock_low32();
10311039

1032-
libsupport_init();
1040+
jl_preinit_runtime();
10331041
int lisp_prompt = (argc >= 2 && strcmp((char*)argv[1],"--lisp") == 0);
10341042
if (lisp_prompt) {
10351043
memmove(&argv[1], &argv[2], (argc-2)*sizeof(void*));
@@ -1061,6 +1069,23 @@ JL_DLLEXPORT int jl_repl_entrypoint(int argc, char *argv[])
10611069
return ret;
10621070
}
10631071

1072+
int jl_init_runtime_adopt_thread(void* sysimg_handle) {
1073+
// Check if we are the main thread
1074+
uv_mutex_lock(&initialization_lock);
1075+
if (jl_is_initialized()) {
1076+
uv_mutex_unlock(&initialization_lock); // Someone won the race
1077+
return 0;
1078+
}
1079+
//TODO: Implement option parsing
1080+
assert(sysimg_handle);
1081+
// This code path assumes that you are either an executable or dylib with the sysimage linked in
1082+
jl_base_image_handle = sysimg_handle;
1083+
julia_init(JL_IMAGE_IN_MEMORY);
1084+
jl_enter_threaded_region();
1085+
uv_mutex_unlock(&initialization_lock);
1086+
return 1;
1087+
}
1088+
10641089
#ifdef __cplusplus
10651090
}
10661091
#endif

src/julia.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2120,7 +2120,7 @@ JL_DLLEXPORT void jl_postoutput_hook(void);
21202120
JL_DLLEXPORT void JL_NORETURN jl_exit(int status);
21212121
JL_DLLEXPORT void JL_NORETURN jl_raise(int signo);
21222122
JL_DLLEXPORT const char *jl_pathname_for_handle(void *handle);
2123-
JL_DLLEXPORT jl_gcframe_t **jl_adopt_thread(void);
2123+
JL_DLLEXPORT jl_gcframe_t **jl_adopt_thread(void* sysimage_handle);
21242124

21252125
JL_DLLEXPORT int jl_deserialize_verify_header(ios_t *s);
21262126
JL_DLLEXPORT void jl_preload_sysimg_so(const char *fname);

src/julia_internal.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1058,13 +1058,15 @@ void jl_init_serializer(void);
10581058
void jl_init_uv(void);
10591059
void jl_init_int32_int64_cache(void);
10601060
JL_DLLEXPORT void jl_init_options(void);
1061+
int jl_init_runtime_adopt_thread(void* sysimg_handle);
10611062

10621063
void jl_set_base_ctx(char *__stk);
10631064

10641065
extern JL_DLLEXPORT ssize_t jl_tls_offset;
10651066
extern JL_DLLEXPORT const int jl_tls_elf_support;
10661067
void jl_init_threading(void);
10671068
void jl_start_threads(void);
1069+
JL_DLLEXPORT void jl_enter_threaded_region(void);
10681070

10691071
// Whether the GC is running
10701072
extern uv_mutex_t safepoint_lock;
@@ -1454,6 +1456,7 @@ JL_DLLEXPORT void jl_srand(uint64_t) JL_NOTSAFEPOINT;
14541456
JL_DLLEXPORT void jl_init_rand(void);
14551457

14561458
JL_DLLEXPORT extern void *jl_exe_handle;
1459+
JL_DLLEXPORT extern void *jl_base_image_handle;
14571460
JL_DLLEXPORT extern void *jl_libjulia_handle;
14581461
JL_DLLEXPORT extern void *jl_libjulia_internal_handle;
14591462
JL_DLLEXPORT extern void *jl_RTLD_DEFAULT_handle;
@@ -1468,7 +1471,7 @@ void win32_formatmessage(DWORD code, char *reason, int len) JL_NOTSAFEPOINT;
14681471
#endif
14691472

14701473
JL_DLLEXPORT void *jl_get_library_(const char *f_lib, int throw_err);
1471-
void *jl_find_dynamic_library_by_addr(void *symbol);
1474+
JL_DLLEXPORT void *jl_find_dynamic_library_by_addr(void *symbol);
14721475
#define jl_get_library(f_lib) jl_get_library_(f_lib, 1)
14731476
JL_DLLEXPORT void *jl_load_and_lookup(const char *f_lib, const char *f_name, _Atomic(void*) *hnd);
14741477
JL_DLLEXPORT void *jl_lazy_load_and_lookup(jl_value_t *lib_val, const char *f_name);

src/llvm-ptls.cpp

Lines changed: 66 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55
#include "support/dtypes.h"
66
#include "passes.h"
77

8+
#include "llvm/IR/BasicBlock.h"
9+
#include "llvm/IR/Instruction.h"
10+
#include "llvm/Support/Debug.h"
811
#include <llvm-c/Core.h>
912
#include <llvm-c/Types.h>
1013

@@ -151,7 +154,17 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter,
151154
return;
152155
}
153156
if (or_new) {
154-
// pgcstack();
157+
// pgcstack = pgstack_intrinsic()
158+
159+
160+
// if (offset != 0)
161+
// pgcstack = tp + offset; // fast
162+
// else
163+
// pgcstack_getter = load pgcstack_func_slot
164+
// if pgcstack_getter == nullptr // Runtime not initialized
165+
// pgcstack = nullptr
166+
// else
167+
// pgcstack = pgcstack_getter();
155168
// if (pgcstack != nullptr)
156169
// last_gc_state = emit_gc_unsafe_enter(ctx);
157170
// phi = pgcstack; // fast
@@ -177,17 +190,25 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter,
177190
if (CFGModified)
178191
*CFGModified = true;
179192
// emit slow branch code
180-
CallInst *adopt = cast<CallInst>(pgcstack->clone());
181193
Function *adoptFunc = M->getFunction(XSTR(jl_adopt_thread));
182194
if (adoptFunc == NULL) {
183-
adoptFunc = Function::Create(pgcstack_getter->getFunctionType(),
195+
adoptFunc = Function::Create(FunctionType::get(builder.getPtrTy(), { builder.getPtrTy()}, false),
184196
pgcstack_getter->getLinkage(), pgcstack_getter->getAddressSpace(),
185197
XSTR(jl_adopt_thread), M);
186198
adoptFunc->copyAttributesFrom(pgcstack_getter);
187199
adoptFunc->copyMetadata(pgcstack_getter, 0);
188200
}
189-
adopt->setCalledFunction(adoptFunc);
190-
adopt->insertBefore(slowTerm);
201+
// Adopt thread takes in a handle to the sysimage and this is the easiest way to get it.
202+
Function *dladdr = M->getFunction(XSTR(jl_find_dynamic_library_by_addr)); // gets handle to sysimage
203+
if (dladdr == NULL) {
204+
dladdr = Function::Create(FunctionType::get(builder.getPtrTy(), { builder.getPtrTy()}, false),
205+
pgcstack_getter->getLinkage(), pgcstack_getter->getAddressSpace(),
206+
XSTR(jl_find_dynamic_library_by_addr), M);
207+
}
208+
builder.SetInsertPoint(slowTerm);
209+
auto this_func = builder.GetInsertBlock()->getParent();
210+
auto handle = builder.CreateCall(dladdr, {ConstantExpr::getBitCast(this_func, builder.getPtrTy())});
211+
auto adopt = builder.CreateCall(adoptFunc, {handle});
191212
phi->addIncoming(adopt, slowTerm->getParent());
192213
// emit fast branch code
193214
builder.SetInsertPoint(fastTerm->getParent());
@@ -213,17 +234,20 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter,
213234

214235
if (imaging_mode) {
215236
IRBuilder<> builder(pgcstack);
237+
SmallVector<uint32_t, 2> Weights{9, 1};
238+
MDBuilder MDB(pgcstack->getContext());
216239
if (jl_tls_elf_support) {
217240
// if (offset != 0)
218241
// pgcstack = tp + offset; // fast
219242
// else
220-
// pgcstack = getter(); // slow
243+
// if pgcstack_getter == null
244+
// pgcstack = null; // slow
245+
// else
246+
// pgcstack = pgcstack_getter(); // slow
221247
auto offset = builder.CreateLoad(T_size, pgcstack_offset);
222248
offset->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
223249
offset->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(pgcstack->getContext(), None));
224250
auto cmp = builder.CreateICmpNE(offset, Constant::getNullValue(offset->getType()));
225-
MDBuilder MDB(pgcstack->getContext());
226-
SmallVector<uint32_t, 2> Weights{9, 1};
227251
TerminatorInst *fastTerm;
228252
TerminatorInst *slowTerm;
229253
SplitBlockAndInsertIfThenElse(cmp, pgcstack, &fastTerm, &slowTerm,
@@ -240,21 +264,53 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter,
240264
// refresh the basic block in the builder
241265
builder.SetInsertPoint(pgcstack);
242266
auto getter = builder.CreateLoad(T_pgcstack_getter, pgcstack_func_slot);
267+
auto phi_value = cast<Instruction>(pgcstack);
268+
if (or_new) {
269+
// if pgcstack_func_slot is not initialized we set pgcstack to null to trigger the slow path
270+
TerminatorInst *nonNullTerm;
271+
TerminatorInst *nullTerm;
272+
auto is_null = builder.CreateICmpEQ(getter, Constant::getNullValue(builder.getPtrTy()));
273+
SplitBlockAndInsertIfThenElse(is_null, pgcstack, &nullTerm, &nonNullTerm,
274+
MDB.createBranchWeights(Weights));
275+
builder.SetInsertPoint(pgcstack);
276+
auto phi2 = builder.CreatePHI(T_pppjlvalue, 2, "pgcstack");
277+
pgcstack->moveBefore(nonNullTerm);
278+
phi2->addIncoming(pgcstack, nonNullTerm->getParent());
279+
phi2->addIncoming(Constant::getNullValue(T_pppjlvalue), nullTerm->getParent());
280+
phi_value = phi2;
281+
// Check if pgcstack_func_slot is initialized
282+
}
243283
getter->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
244284
getter->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(pgcstack->getContext(), None));
245285
pgcstack->setCalledFunction(pgcstack->getFunctionType(), getter);
246286
set_pgcstack_attrs(pgcstack);
247287

248288
phi->addIncoming(fastTLS, fastTLS->getParent());
249-
phi->addIncoming(pgcstack, pgcstack->getParent());
250-
289+
phi->addIncoming(phi_value, phi_value->getParent());
290+
if (or_new) {
291+
pgcstack->getParent()->getParent()->print(dbgs());
292+
}
251293
return;
252294
}
253295
// In imaging mode, we emit the function address as a load of a static
254296
// variable to be filled (in `staticdata.c`) at initialization time of the sysimg.
255297
// This way we can bypass the extra indirection in `jl_get_pgcstack`
256298
// since we may not know which getter function to use ahead of time.
257299
auto getter = builder.CreateLoad(T_pgcstack_getter, pgcstack_func_slot);
300+
if (or_new) {
301+
// if pgcstack_func_slot is not initialized we set pgcstack to null to trigger the slow path
302+
TerminatorInst *nonNullTerm;
303+
TerminatorInst *nullTerm;
304+
auto is_null = builder.CreateICmpEQ(getter, Constant::getNullValue(builder.getPtrTy()));
305+
SplitBlockAndInsertIfThenElse(is_null, pgcstack, &nullTerm, &nonNullTerm,
306+
MDB.createBranchWeights(Weights));
307+
builder.SetInsertPoint(pgcstack);
308+
auto phi2 = builder.CreatePHI(T_pppjlvalue, 2, "pgcstack");
309+
pgcstack->moveBefore(nonNullTerm);
310+
phi2->addIncoming(pgcstack, nonNullTerm->getParent());
311+
phi2->addIncoming(Constant::getNullValue(T_pppjlvalue), nullTerm->getParent());
312+
pgcstack->replaceAllUsesWith(phi2);
313+
}
258314
getter->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
259315
getter->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(pgcstack->getContext(), None));
260316
if (TargetTriple.isOSDarwin()) {

src/threading.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -401,8 +401,14 @@ jl_ptls_t jl_init_threadtls(int16_t tid)
401401
return ptls;
402402
}
403403

404-
JL_DLLEXPORT jl_gcframe_t **jl_adopt_thread(void)
404+
// Pass in the handle to the system image. This is used to initialize the runtime correctly in case we are a shared library
405+
JL_DLLEXPORT jl_gcframe_t **jl_adopt_thread(void* sysimg_handle)
405406
{
407+
if (!jl_is_initialized()) {
408+
if (jl_init_runtime_adopt_thread(sysimg_handle) == 1)
409+
return &jl_get_current_task()->gcstack;
410+
// We lost the race and need to be initialized as usual
411+
}
406412
// `jl_init_threadtls` puts us in a GC unsafe region, so ensure GC isn't running.
407413
// we can't use a normal safepoint because we don't have signal handlers yet.
408414
// we also can't use jl_safepoint_wait_gc because that assumes we're in a task.

0 commit comments

Comments
 (0)