diff --git a/erts/emulator/beam/beam_file.c b/erts/emulator/beam/beam_file.c index ccb37c4fa50b..03f488ceb8e2 100644 --- a/erts/emulator/beam/beam_file.c +++ b/erts/emulator/beam/beam_file.c @@ -241,6 +241,7 @@ static int parse_atom_chunk(BeamFile *beam, BeamReader reader; Sint32 count; int i; + bool long_counts = false; ASSERT(beam->atoms.entries == NULL); atoms = &beam->atoms; @@ -248,6 +249,10 @@ static int parse_atom_chunk(BeamFile *beam, beamreader_init(chunk->data, chunk->size, &reader); LoadAssert(beamreader_read_i32(&reader, &count)); + if (count < 0) { + long_counts = true; + count = -count; + } LoadAssert(CHECK_ITEM_COUNT(count, 1, sizeof(atoms->entries[0]))); /* Reserve a slot for the empty list, which is encoded as atom 0 as we @@ -264,12 +269,21 @@ static int parse_atom_chunk(BeamFile *beam, for (i = 1; i < count; i++) { const byte *string; - byte length; Eterm atom; + Uint length; - LoadAssert(beamreader_read_u8(&reader, &length)); - LoadAssert(beamreader_read_bytes(&reader, length, &string)); + if (long_counts) { + TaggedNumber len; + LoadAssert(beamreader_read_tagged(&reader, &len)); + LoadAssert(len.size == 0); + length = (Uint) len.word_value; + } else { + byte len; + LoadAssert(beamreader_read_u8(&reader, &len)); + length = len; + } + LoadAssert(beamreader_read_bytes(&reader, length, &string)); atom = erts_atom_put(string, length, ERTS_ATOM_ENC_UTF8, 1); LoadAssert(atom != THE_NON_VALUE); diff --git a/lib/compiler/src/beam_asm.erl b/lib/compiler/src/beam_asm.erl index 817d740b0206..d14185e0279d 100644 --- a/lib/compiler/src/beam_asm.erl +++ b/lib/compiler/src/beam_asm.erl @@ -137,8 +137,7 @@ build_file(Code, Attr, Dict, NumLabels, NumFuncs, ExtraChunks0, CompileInfo, Com Code), %% Create the atom table chunk. - {NumAtoms, AtomTab} = beam_dict:atom_table(Dict), - AtomChunk = chunk(<<"AtU8">>, <>, AtomTab), + AtomChunk = build_atom_table(CompilerOpts, Dict), %% Create the import table chunk. @@ -287,6 +286,19 @@ build_attributes(Attr, Compile, MD5) -> CompileBinary = term_to_binary([{version,?COMPILER_VSN}|Compile]), {AttrBinary,CompileBinary}. +build_atom_table(Options, Dict) -> + {NumAtoms, AtomTab0} = beam_dict:atom_table(Dict), + case member(no_long_atoms, Options) of + false -> + %% Build an atom table for Erlang/OTP 28 and later. + AtomTab = [[encode(?tag_u, Len),Text] || [Len,Text] <- AtomTab0], + chunk(<<"AtU8">>, <<-NumAtoms:32>>, AtomTab); + true -> + %% Build an atom table compatible with Erlang/OTP 27 + %% and earlier. + chunk(<<"AtU8">>, <>, AtomTab0) + end. + build_line_table(Dict, Options) -> {NumLineInstrs,NumFnames0,Fnames0,NumLines,Lines0,ExecLine} = beam_dict:line_table(Dict), diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl index b33b41bf7d08..b9164710e9e6 100644 --- a/lib/compiler/src/compile.erl +++ b/lib/compiler/src/compile.erl @@ -1040,7 +1040,9 @@ expand_opt(r25, Os) -> [no_ssa_opt_update_tuple, no_bs_match, no_min_max_bifs | expand_opt(r26, Os)]; expand_opt(r26, Os) -> - [no_bsm_opt | Os]; + [no_bsm_opt | expand_opt(r27, Os)]; +expand_opt(r27, Os) -> + [no_long_atoms | Os]; expand_opt({debug_info_key,_}=O, Os) -> [encrypt_debug_info,O|Os]; expand_opt(no_type_opt=O, Os) -> diff --git a/lib/compiler/test/compile_SUITE.erl b/lib/compiler/test/compile_SUITE.erl index 2d7ede128f99..18cbdd5930a5 100644 --- a/lib/compiler/test/compile_SUITE.erl +++ b/lib/compiler/test/compile_SUITE.erl @@ -943,19 +943,31 @@ test_sloppy() -> Turtle. utf8_atoms(Config) when is_list(Config) -> + do_utf8_atom(binary_to_atom(<<"こんにちは"/utf8>>, utf8)), + + LongAtom = binary_to_atom(binary:copy(<<240,159,159,166>>, 255)), + do_utf8_atom(LongAtom), + + ok. + +do_utf8_atom(Atom) -> + Mod = ?FUNCTION_NAME, Anno = erl_anno:new(1), - Atom = binary_to_atom(<<"こんにちは"/utf8>>, utf8), - Forms = [{attribute,Anno,compile,[export_all]}, + Forms = [{attribute,Anno,module,Mod}, + {attribute,Anno,compile,[export_all]}, {function,Anno,atom,0,[{clause,Anno,[],[],[{atom,Anno,Atom}]}]}], - Utf8AtomForms = [{attribute,Anno,module,utf8_atom}|Forms], - {ok,utf8_atom,Utf8AtomBin} = - compile:forms(Utf8AtomForms, [binary]), - {ok,{utf8_atom,[{atoms,_}]}} = - beam_lib:chunks(Utf8AtomBin, [atoms]), - code:load_binary(utf8_atom, "compile_SUITE", Utf8AtomBin), - Atom = utf8_atom:atom(), + {ok,Mod,Utf8AtomBin} = compile:forms(Forms, [binary,report]), + {ok,{Mod,[{atoms,_}]}} = beam_lib:chunks(Utf8AtomBin, [atoms]), + + code:load_binary(Mod, "compile_SUITE", Utf8AtomBin), + + Atom = Mod:atom(), true = is_atom(Atom), + + true = code:delete(Mod), + false = code:purge(Mod), + ok. utf8_functions(Config) when is_list(Config) -> diff --git a/lib/stdlib/src/beam_lib.erl b/lib/stdlib/src/beam_lib.erl index 95c3d3d7b563..4e8d74d315dc 100644 --- a/lib/stdlib/src/beam_lib.erl +++ b/lib/stdlib/src/beam_lib.erl @@ -977,8 +977,7 @@ scan_beam(FD, Pos, What, Mod, Data) -> get_atom_data(Cs, Id, FD, Size, Pos, Pos2, Data, Encoding) -> NewCs = del_chunk(Id, Cs), {NFD, Chunk} = get_chunk(Id, Pos, Size, FD), - <<_Num:32, Chunk2/binary>> = Chunk, - {Module, _} = extract_atom(Chunk2, Encoding), + Module = extract_module(Chunk, Encoding), C = case Cs of info -> {Id, Pos, Size}; @@ -1202,6 +1201,15 @@ ensure_atoms({empty, AT}, Cs) -> ensure_atoms(AT, _Cs) -> AT. +extract_module(<>, utf8) when Num < 0 -> + {Module, _} = extract_long_atom(B), + Module; +extract_module(<<_Num:32/signed-integer, B/binary>>, Encoding) -> + {Module, _} = extract_atom(B, Encoding), + Module. + +extract_atoms(<>, AT, utf8) when Num < 0 -> + extract_long_atoms(B, 1, AT); extract_atoms(<<_Num:32, B/binary>>, AT, Encoding) -> extract_atoms(B, 1, AT, Encoding). @@ -1216,6 +1224,27 @@ extract_atom(<>, Encoding) -> <> = B, {binary_to_atom(SB, Encoding), Tail}. +extract_long_atoms(<<>>, _I, _AT) -> + true; +extract_long_atoms(B, I, AT) -> + {Atom, B1} = extract_long_atom(B), + true = ets:insert(AT, {I, Atom}), + extract_long_atoms(B1, I+1, AT). + +extract_long_atom(B0) -> + {Len, B} = decode_arg_val(B0), + <> = B, + {binary_to_atom(SB, utf8), Tail}. + +%% Extract the value from variable-sized tagged argument. Only support +%% values from 0 through 2047, which is sufficient to handle the +%% length of atoms in the atom table. +decode_arg_val(<>) -> + {N, Code}; +decode_arg_val(<>) -> + N = (High bsl 8) bor Low, + {N, Code0}. + %%% Utils. -record(bb, {pos = 0 :: integer(), diff --git a/lib/stdlib/test/beam_lib_SUITE.erl b/lib/stdlib/test/beam_lib_SUITE.erl index 3413fee6fd7f..f7e25f246750 100644 --- a/lib/stdlib/test/beam_lib_SUITE.erl +++ b/lib/stdlib/test/beam_lib_SUITE.erl @@ -94,6 +94,7 @@ normal(Conf) when is_list(Conf) -> P0 = pps(), do_normal(Source, PrivDir, BeamFile, []), + do_normal(Source, PrivDir, BeamFile, [r27]), {ok,_} = compile:file(Source, [{outdir,PrivDir}, no_debug_info]), {ok, {simple, [{debug_info, {debug_info_v1, erl_abstract_code, {none, _}}}]}} = @@ -118,7 +119,7 @@ do_normal(Source, PrivDir, BeamFile, Opts) -> do_normal(BeamFile, Opts), do_normal(Binary, Opts). -do_normal(BeamFile, Opts) -> +do_normal(BeamFile, _Opts) -> Imports = {imports, [{erlang, get_module_info, 1}, {erlang, get_module_info, 2}, {lists, member, 2}]}, @@ -151,10 +152,8 @@ do_normal(BeamFile, Opts) -> %% Test reading optional chunks. All = ["Atom", "Code", "StrT", "ImpT", "ExpT", "FunT", "LitT", "AtU8"], {ok,{simple,Chunks}} = beam_lib:chunks(BeamFile, All, [allow_missing_chunks]), - case {verify_simple(Chunks),Opts} of - {{missing_chunk, AtomBin}, []} when is_binary(AtomBin) -> ok; - {{AtomBin, missing_chunk}, [no_utf8_atoms]} when is_binary(AtomBin) -> ok - end, + {missing_chunk, AtomBin} = verify_simple(Chunks), + true = is_binary(AtomBin), %% 'allow_missing_chunks' should work for named chunks too. {ok, {simple, StrippedBeam}} = beam_lib:strip(BeamFile),