Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions erts/emulator/beam/beam_file.c
Original file line number Diff line number Diff line change
Expand Up @@ -241,13 +241,18 @@ static int parse_atom_chunk(BeamFile *beam,
BeamReader reader;
Sint32 count;
int i;
bool long_counts = false;

ASSERT(beam->atoms.entries == NULL);
atoms = &beam->atoms;

beamreader_init(chunk->data, chunk->size, &reader);

LoadAssert(beamreader_read_i32(&reader, &count));
if (count < 0) {
long_counts = true;
count = -count;
}
LoadAssert(CHECK_ITEM_COUNT(count, 1, sizeof(atoms->entries[0])));

/* Reserve a slot for the empty list, which is encoded as atom 0 as we
Expand All @@ -264,12 +269,21 @@ static int parse_atom_chunk(BeamFile *beam,

for (i = 1; i < count; i++) {
const byte *string;
byte length;
Eterm atom;
Uint length;

LoadAssert(beamreader_read_u8(&reader, &length));
LoadAssert(beamreader_read_bytes(&reader, length, &string));
if (long_counts) {
TaggedNumber len;
LoadAssert(beamreader_read_tagged(&reader, &len));
LoadAssert(len.size == 0);
length = (Uint) len.word_value;
} else {
byte len;
LoadAssert(beamreader_read_u8(&reader, &len));
length = len;
}

LoadAssert(beamreader_read_bytes(&reader, length, &string));
atom = erts_atom_put(string, length, ERTS_ATOM_ENC_UTF8, 1);
LoadAssert(atom != THE_NON_VALUE);

Expand Down
16 changes: 14 additions & 2 deletions lib/compiler/src/beam_asm.erl
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,7 @@ build_file(Code, Attr, Dict, NumLabels, NumFuncs, ExtraChunks0, CompileInfo, Com
Code),

%% Create the atom table chunk.
{NumAtoms, AtomTab} = beam_dict:atom_table(Dict),
AtomChunk = chunk(<<"AtU8">>, <<NumAtoms:32>>, AtomTab),
AtomChunk = build_atom_table(CompilerOpts, Dict),

%% Create the import table chunk.

Expand Down Expand Up @@ -287,6 +286,19 @@ build_attributes(Attr, Compile, MD5) ->
CompileBinary = term_to_binary([{version,?COMPILER_VSN}|Compile]),
{AttrBinary,CompileBinary}.

build_atom_table(Options, Dict) ->
{NumAtoms, AtomTab0} = beam_dict:atom_table(Dict),
case member(no_long_atoms, Options) of
false ->
%% Build an atom table for Erlang/OTP 28 and later.
AtomTab = [[encode(?tag_u, Len),Text] || [Len,Text] <- AtomTab0],
chunk(<<"AtU8">>, <<-NumAtoms:32>>, AtomTab);
true ->
%% Build an atom table compatible with Erlang/OTP 27
%% and earlier.
chunk(<<"AtU8">>, <<NumAtoms:32>>, AtomTab0)
end.

build_line_table(Dict, Options) ->
{NumLineInstrs,NumFnames0,Fnames0,NumLines,Lines0,ExecLine} =
beam_dict:line_table(Dict),
Expand Down
4 changes: 3 additions & 1 deletion lib/compiler/src/compile.erl
Original file line number Diff line number Diff line change
Expand Up @@ -1040,7 +1040,9 @@ expand_opt(r25, Os) ->
[no_ssa_opt_update_tuple, no_bs_match, no_min_max_bifs |
expand_opt(r26, Os)];
expand_opt(r26, Os) ->
[no_bsm_opt | Os];
[no_bsm_opt | expand_opt(r27, Os)];
expand_opt(r27, Os) ->
[no_long_atoms | Os];
expand_opt({debug_info_key,_}=O, Os) ->
[encrypt_debug_info,O|Os];
expand_opt(no_type_opt=O, Os) ->
Expand Down
30 changes: 21 additions & 9 deletions lib/compiler/test/compile_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -943,19 +943,31 @@ test_sloppy() ->
Turtle.

utf8_atoms(Config) when is_list(Config) ->
do_utf8_atom(binary_to_atom(<<"こんにちは"/utf8>>, utf8)),

LongAtom = binary_to_atom(binary:copy(<<240,159,159,166>>, 255)),
do_utf8_atom(LongAtom),

ok.

do_utf8_atom(Atom) ->
Mod = ?FUNCTION_NAME,
Anno = erl_anno:new(1),
Atom = binary_to_atom(<<"こんにちは"/utf8>>, utf8),
Forms = [{attribute,Anno,compile,[export_all]},
Forms = [{attribute,Anno,module,Mod},
{attribute,Anno,compile,[export_all]},
{function,Anno,atom,0,[{clause,Anno,[],[],[{atom,Anno,Atom}]}]}],

Utf8AtomForms = [{attribute,Anno,module,utf8_atom}|Forms],
{ok,utf8_atom,Utf8AtomBin} =
compile:forms(Utf8AtomForms, [binary]),
{ok,{utf8_atom,[{atoms,_}]}} =
beam_lib:chunks(Utf8AtomBin, [atoms]),
code:load_binary(utf8_atom, "compile_SUITE", Utf8AtomBin),
Atom = utf8_atom:atom(),
{ok,Mod,Utf8AtomBin} = compile:forms(Forms, [binary,report]),
{ok,{Mod,[{atoms,_}]}} = beam_lib:chunks(Utf8AtomBin, [atoms]),

code:load_binary(Mod, "compile_SUITE", Utf8AtomBin),

Atom = Mod:atom(),
true = is_atom(Atom),

true = code:delete(Mod),
false = code:purge(Mod),

ok.

utf8_functions(Config) when is_list(Config) ->
Expand Down
33 changes: 31 additions & 2 deletions lib/stdlib/src/beam_lib.erl
Original file line number Diff line number Diff line change
Expand Up @@ -977,8 +977,7 @@ scan_beam(FD, Pos, What, Mod, Data) ->
get_atom_data(Cs, Id, FD, Size, Pos, Pos2, Data, Encoding) ->
NewCs = del_chunk(Id, Cs),
{NFD, Chunk} = get_chunk(Id, Pos, Size, FD),
<<_Num:32, Chunk2/binary>> = Chunk,
{Module, _} = extract_atom(Chunk2, Encoding),
Module = extract_module(Chunk, Encoding),
C = case Cs of
info ->
{Id, Pos, Size};
Expand Down Expand Up @@ -1202,6 +1201,15 @@ ensure_atoms({empty, AT}, Cs) ->
ensure_atoms(AT, _Cs) ->
AT.

extract_module(<<Num:32/signed-integer, B/binary>>, utf8) when Num < 0 ->
{Module, _} = extract_long_atom(B),
Module;
extract_module(<<_Num:32/signed-integer, B/binary>>, Encoding) ->
{Module, _} = extract_atom(B, Encoding),
Module.

extract_atoms(<<Num:32/signed-integer, B/binary>>, AT, utf8) when Num < 0 ->
extract_long_atoms(B, 1, AT);
extract_atoms(<<_Num:32, B/binary>>, AT, Encoding) ->
extract_atoms(B, 1, AT, Encoding).

Expand All @@ -1216,6 +1224,27 @@ extract_atom(<<Len, B/binary>>, Encoding) ->
<<SB:Len/binary, Tail/binary>> = B,
{binary_to_atom(SB, Encoding), Tail}.

extract_long_atoms(<<>>, _I, _AT) ->
true;
extract_long_atoms(B, I, AT) ->
{Atom, B1} = extract_long_atom(B),
true = ets:insert(AT, {I, Atom}),
extract_long_atoms(B1, I+1, AT).

extract_long_atom(B0) ->
{Len, B} = decode_arg_val(B0),
<<SB:Len/binary, Tail/binary>> = B,
{binary_to_atom(SB, utf8), Tail}.

%% Extract the value from variable-sized tagged argument. Only support
%% values from 0 through 2047, which is sufficient to handle the
%% length of atoms in the atom table.
decode_arg_val(<<N:4,0:1, _Tag:3, Code/binary>>) ->
{N, Code};
decode_arg_val(<<High:3,0:1,1:1, _Tag:3, Low, Code0/binary>>) ->
N = (High bsl 8) bor Low,
{N, Code0}.

%%% Utils.

-record(bb, {pos = 0 :: integer(),
Expand Down
9 changes: 4 additions & 5 deletions lib/stdlib/test/beam_lib_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ normal(Conf) when is_list(Conf) ->
P0 = pps(),

do_normal(Source, PrivDir, BeamFile, []),
do_normal(Source, PrivDir, BeamFile, [r27]),

{ok,_} = compile:file(Source, [{outdir,PrivDir}, no_debug_info]),
{ok, {simple, [{debug_info, {debug_info_v1, erl_abstract_code, {none, _}}}]}} =
Expand All @@ -118,7 +119,7 @@ do_normal(Source, PrivDir, BeamFile, Opts) ->
do_normal(BeamFile, Opts),
do_normal(Binary, Opts).

do_normal(BeamFile, Opts) ->
do_normal(BeamFile, _Opts) ->
Imports = {imports, [{erlang, get_module_info, 1},
{erlang, get_module_info, 2},
{lists, member, 2}]},
Expand Down Expand Up @@ -151,10 +152,8 @@ do_normal(BeamFile, Opts) ->
%% Test reading optional chunks.
All = ["Atom", "Code", "StrT", "ImpT", "ExpT", "FunT", "LitT", "AtU8"],
{ok,{simple,Chunks}} = beam_lib:chunks(BeamFile, All, [allow_missing_chunks]),
case {verify_simple(Chunks),Opts} of
{{missing_chunk, AtomBin}, []} when is_binary(AtomBin) -> ok;
{{AtomBin, missing_chunk}, [no_utf8_atoms]} when is_binary(AtomBin) -> ok
end,
{missing_chunk, AtomBin} = verify_simple(Chunks),
true = is_binary(AtomBin),

%% 'allow_missing_chunks' should work for named chunks too.
{ok, {simple, StrippedBeam}} = beam_lib:strip(BeamFile),
Expand Down
Loading