@@ -445,26 +445,71 @@ long atom_table_ensure_atom(struct AtomTable *table, AtomString string, enum Ato
445445 return new_index ;
446446}
447447
448- int atom_table_ensure_atoms (
449- struct AtomTable * table , const void * atoms , int count , int * translate_table )
448+ static inline int read_encoded_len (const uint8_t * * len_bytes )
450449{
450+ uint8_t byte0 = (* len_bytes )[0 ];
451+
452+ if ((byte0 & 0x8 ) == 0 ) {
453+ (* len_bytes )++ ;
454+ return byte0 >> 4 ;
455+
456+ } else if ((byte0 & 0x10 ) == 0 ) {
457+ uint8_t byte1 = (* len_bytes )[1 ];
458+ (* len_bytes ) += 2 ;
459+ return ((byte0 >> 5 ) << 8 ) | byte1 ;
460+
461+ } else {
462+ return -1 ;
463+ }
464+ }
465+
466+ int atom_table_ensure_atoms (struct AtomTable * table , const void * atoms , int count ,
467+ int * translate_table , enum EnsureAtomsOpt opt )
468+ {
469+ bool is_long_format = (opt & EnsureLongEncoding ) != 0 ;
470+
451471 SMP_WRLOCK (table );
452472
453473 int new_atoms_count = 0 ;
454474
455475 const uint8_t * current_atom = atoms ;
456476
457477 for (int i = 0 ; i < count ; i ++ ) {
458- struct HNode * node = get_node (table , current_atom );
478+ struct HNode * node ;
479+ if (is_long_format ) {
480+ int atom_len = read_encoded_len (& current_atom );
481+ if (UNLIKELY (atom_len < 0 )) {
482+ fprintf (stderr , "Found invalid atom len." );
483+ SMP_UNLOCK (table );
484+ return ATOM_TABLE_INVALID_LEN ;
485+ } else if (UNLIKELY (atom_len > 255 )) {
486+ fprintf (stderr ,
487+ "Unsupported atom length %i bytes.\n"
488+ "Unlike OTP >= 28, AtomVM supports a maximum of 255 bytes"
489+ "regardeless the number of codepoints.\n"
490+ "If you are seeing this error please open an issue on GitHub:\n"
491+ "https://github.com/atomvm/AtomVM/issues\n" ,
492+ atom_len );
493+ SMP_UNLOCK (table );
494+ return ATOM_TABLE_INVALID_LEN ;
495+ }
496+ char tmp_old_fmt [256 ];
497+ tmp_old_fmt [0 ] = atom_len ;
498+ memcpy (tmp_old_fmt + 1 , current_atom , atom_len );
499+ node = get_node (table , tmp_old_fmt );
500+ current_atom += atom_len ;
501+ } else {
502+ node = get_node (table , current_atom );
503+ uint8_t atom_len = current_atom [0 ];
504+ current_atom += 1 + atom_len ;
505+ }
506+
459507 if (node ) {
460508 translate_table [i ] = node -> index ;
461509 } else {
462510 new_atoms_count ++ ;
463511 translate_table [i ] = ATOM_TABLE_NOT_FOUND ;
464512 }
465-
466- uint8_t atom_len = current_atom [0 ];
467- current_atom += 1 + atom_len ;
468513 }
469514
470515 maybe_rehash (table , new_atoms_count );
@@ -473,6 +518,19 @@ int atom_table_ensure_atoms(
473518 int remaining_atoms = new_atoms_count ;
474519 struct HNodeGroup * node_group = table -> last_node_group ;
475520 for (int i = 0 ; i < count ; i ++ ) {
521+
522+ const uint8_t * to_be_copied = NULL ;
523+ const uint8_t * next_atom = current_atom ;
524+ uint8_t atom_len ;
525+ if (is_long_format ) {
526+ atom_len = read_encoded_len (& next_atom );
527+ to_be_copied = next_atom ;
528+ next_atom += atom_len ;
529+ } else {
530+ atom_len = current_atom [0 ];
531+ next_atom += 1 + atom_len ;
532+ }
533+
476534 if (translate_table [i ] == ATOM_TABLE_NOT_FOUND ) {
477535 if (!table -> last_node_group_avail ) {
478536 node_group = new_node_group (table , remaining_atoms );
@@ -482,7 +540,19 @@ int atom_table_ensure_atoms(
482540 }
483541 }
484542
485- unsigned long hash = sdbm_hash (current_atom , atom_string_len (current_atom ));
543+ if (is_long_format ) {
544+ uint8_t * atom_copy = malloc (atom_len + 1 );
545+ if (IS_NULL_PTR (atom_copy )) {
546+ // we are not going to remove atoms that have already been added up to this one
547+ SMP_UNLOCK (table );
548+ return ATOM_TABLE_ALLOC_FAIL ;
549+ }
550+ atom_copy [0 ] = atom_len ;
551+ memcpy (atom_copy + 1 , to_be_copied , atom_len );
552+ current_atom = atom_copy ;
553+ }
554+
555+ unsigned long hash = sdbm_hash (current_atom , atom_len );
486556 unsigned long bucket_index = hash % table -> capacity ;
487557
488558 translate_table [i ] = insert_node (table , node_group , bucket_index , current_atom );
@@ -491,8 +561,7 @@ int atom_table_ensure_atoms(
491561 break ;
492562 }
493563 }
494- uint8_t atom_len = current_atom [0 ];
495- current_atom += 1 + atom_len ;
564+ current_atom = next_atom ;
496565 }
497566
498567 SMP_UNLOCK (table );
0 commit comments