From dd0663dcba56cddc5efce72da18401128b4af754 Mon Sep 17 00:00:00 2001 From: Ryan Lamb <4955475+kinyoklion@users.noreply.github.com> Date: Mon, 10 Nov 2025 12:42:00 -0800 Subject: [PATCH 1/2] fix: Handle redis errors during mutations. --- src/ldclient_instance_sup.erl | 5 +- src/ldclient_storage_redis.erl | 12 +++- src/ldclient_storage_redis_server.erl | 63 +++++++++++++++------ src/ldclient_storage_redis_sup.erl | 5 +- test-redis/ldclient_storage_redis_SUITE.erl | 50 +++++++++++++++- 5 files changed, 112 insertions(+), 23 deletions(-) diff --git a/src/ldclient_instance_sup.erl b/src/ldclient_instance_sup.erl index f903b28..22b385e 100644 --- a/src/ldclient_instance_sup.erl +++ b/src/ldclient_instance_sup.erl @@ -43,7 +43,10 @@ start_link(SupName, UpdateSupName, UpdateWorkerModule, EventSupName, Tag) -> -spec init(Args :: term()) -> {ok, {{supervisor:strategy(), non_neg_integer(), pos_integer()}, [supervisor:child_spec()]}}. init([UpdateSupName, UpdateWorkerModule, EventSupName, Tag]) -> - {ok, {{one_for_one, 1, 5}, children(UpdateSupName, UpdateWorkerModule, EventSupName, Tag)}}. + % Allow up to 5 restarts in 60 seconds before giving up. + % This provides resilience for transient failures in child supervisors + % (update processor, events, storage) while preventing infinite restart loops. + {ok, {{one_for_one, 5, 60}, children(UpdateSupName, UpdateWorkerModule, EventSupName, Tag)}}. %%=================================================================== %% Internal functions diff --git a/src/ldclient_storage_redis.erl b/src/ldclient_storage_redis.erl index 9febf78..8449b16 100644 --- a/src/ldclient_storage_redis.erl +++ b/src/ldclient_storage_redis.erl @@ -37,9 +37,15 @@ init(SupRef, Tag, _) -> StorageSup = ?CHILD(ldclient_storage_redis_sup, ldclient_storage_redis_sup, [SupRegName, WorkerRegName, Tag], supervisor), {ok, _} = supervisor:start_child(SupRef, StorageSup), ok = ldclient_storage_cache:init(SupRef, Tag, []), - % Pre-create features and segments buckets - ok = create(Tag, features), - ok = create(Tag, segments). + % Pre-create features and segments buckets only if not in daemon mode + UseLdd = ldclient_config:get_value(Tag, use_ldd), + case UseLdd of + false -> + ok = create(Tag, features), + ok = create(Tag, segments); + true -> + ok + end. -spec create(Tag :: atom(), Bucket :: atom()) -> ok | diff --git a/src/ldclient_storage_redis_server.erl b/src/ldclient_storage_redis_server.erl index f3363c3..68b3479 100644 --- a/src/ldclient_storage_redis_server.erl +++ b/src/ldclient_storage_redis_server.erl @@ -207,8 +207,13 @@ bucket_exists(Bucket, Buckets) when is_atom(Bucket) -> create_bucket(true, Bucket, _Client, _Prefix, Buckets) -> {{error, already_exists, "Redis hash " ++ atom_to_list(Bucket) ++ " already exists."}, Buckets}; create_bucket(false, Bucket, Client, Prefix, Buckets) -> - {ok, _} = eredis:q(Client, ["HSET", bucket_name(Prefix, Bucket), null, null]), - {ok, [Bucket | Buckets]}. + case eredis:q(Client, ["HSET", bucket_name(Prefix, Bucket), null, null]) of + {ok, _} -> + {ok, [Bucket | Buckets]}; + {error, Reason} -> + error_logger:error_msg("Redis connection error during create_bucket for ~p: ~p", [Bucket, Reason]), + {ok, [Bucket | Buckets]} + end. %% @doc Empty a bucket %% @private @@ -221,9 +226,14 @@ create_bucket(false, Bucket, Client, Prefix, Buckets) -> empty_bucket(false, Bucket, _Client, _Prefix) -> {error, bucket_not_found, "Redis hash " ++ atom_to_list(Bucket) ++ " does not exist."}; empty_bucket(true, Bucket, Client, Prefix) -> - {ok, _} = eredis:q(Client, ["DEL", bucket_name(Prefix, Bucket)]), - {ok, _} = create_bucket(false, Bucket, Client, Prefix, []), - ok. + case eredis:q(Client, ["DEL", bucket_name(Prefix, Bucket)]) of + {ok, _} -> + {ok, _} = create_bucket(false, Bucket, Client, Prefix, []), + ok; + {error, Reason} -> + error_logger:error_msg("Redis connection error during empty_bucket for ~p: ~p", [Bucket, Reason]), + ok + end. %% @doc List all items in a bucket %% @private @@ -300,14 +310,27 @@ lookup_key(true, Key, Bucket, Client, Prefix) -> upsert_items(false, _Items, Bucket, _Client, _Prefix) -> {error, bucket_not_found, "Redis hash " ++ atom_to_list(Bucket) ++ " does not exist."}; upsert_items(true, Items, Bucket, Client, Prefix) -> - {ok, <<"OK">>} = eredis:q(Client, ["WATCH", bucket_name(Prefix, Bucket)]), - ok = maps:fold( - fun(K, V, ok) -> - {ok, _} = eredis:q(Client, ["HSET", bucket_name(Prefix, Bucket), K, jsx:encode(V)]), + case eredis:q(Client, ["WATCH", bucket_name(Prefix, Bucket)]) of + {ok, <<"OK">>} -> + Result = maps:fold( + fun(K, V, ok) -> + case eredis:q(Client, ["HSET", bucket_name(Prefix, Bucket), K, jsx:encode(V)]) of + {ok, _} -> ok; + {error, Reason} -> + error_logger:error_msg("Redis connection error during HSET for ~p key ~p: ~p", [Bucket, K, Reason]), + ok + end + end, ok, Items), + case eredis:q(Client, ["UNWATCH"]) of + {ok, <<"OK">>} -> Result; + {error, Reason} -> + error_logger:error_msg("Redis connection error during UNWATCH for ~p: ~p", [Bucket, Reason]), + ok + end; + {error, Reason} -> + error_logger:error_msg("Redis connection error during WATCH for ~p: ~p", [Bucket, Reason]), ok - end, ok, Items), - {ok, <<"OK">>} = eredis:q(Client, ["UNWATCH"]), - ok. + end. %% @doc Empty bucket and upsert key value pairs %% @private @@ -334,8 +357,12 @@ upsert_clean_items(true, Items, Bucket, Client, Prefix) -> delete_key(false, _Key, Bucket, _Client, _Prefix) -> {error, bucket_not_found, "Redis hash " ++ atom_to_list(Bucket) ++ " does not exist."}; delete_key(true, Key, Bucket, Client, Prefix) -> - {ok, _} = eredis:q(Client, ["HDEL", bucket_name(Prefix, Bucket), Key]), - ok. + case eredis:q(Client, ["HDEL", bucket_name(Prefix, Bucket), Key]) of + {ok, _} -> ok; + {error, Reason} -> + error_logger:error_msg("Redis connection error during delete_key for ~p key ~p: ~p", [Bucket, Key, Reason]), + ok + end. bucket_name(Prefix, Bucket) -> lists:concat([Prefix, ":", Bucket]). @@ -350,8 +377,12 @@ format_error(Reason) when is_binary(Reason) -> binary_to_list(Reason). -spec set_init(Client :: client_pid(), Prefix :: string()) -> ok. set_init(Client, Prefix) -> - {ok, _} = eredis:q(Client, ["SET", lists:concat([Prefix, ":$inited"]), ""]), - ok. + case eredis:q(Client, ["SET", lists:concat([Prefix, ":$inited"]), ""]) of + {ok, _} -> ok; + {error, Reason} -> + error_logger:error_msg("Redis connection error during set_init: ~p", [Reason]), + ok + end. -spec get_init(Client :: client_pid(), Prefix :: string()) -> boolean(). get_init(Client, Prefix) -> diff --git a/src/ldclient_storage_redis_sup.erl b/src/ldclient_storage_redis_sup.erl index a6b4fe3..84ae612 100644 --- a/src/ldclient_storage_redis_sup.erl +++ b/src/ldclient_storage_redis_sup.erl @@ -27,7 +27,10 @@ start_link(SupRegName, WorkerRegName, Tag) when is_atom(SupRegName), is_atom(Wor -spec init(Args :: term()) -> {ok, {{supervisor:strategy(), non_neg_integer(), pos_integer()}, [supervisor:child_spec()]}}. init([WorkerRegName, Tag]) -> - {ok, {{one_for_one, 0, 1}, children(WorkerRegName, Tag)}}. + % Allow up to 10 restarts in 60 seconds before giving up. + % This provides resilience for transient Redis connection issues while + % preventing infinite restart loops. + {ok, {{one_for_one, 10, 60}, children(WorkerRegName, Tag)}}. %%=================================================================== %% Internal functions diff --git a/test-redis/ldclient_storage_redis_SUITE.erl b/test-redis/ldclient_storage_redis_SUITE.erl index b354bba..4108b00 100644 --- a/test-redis/ldclient_storage_redis_SUITE.erl +++ b/test-redis/ldclient_storage_redis_SUITE.erl @@ -17,7 +17,9 @@ server_upsert_clean/1, server_all/1, server_process_events_put/1, - server_process_events_patch/1 + server_process_events_patch/1, + buckets_precreated_non_daemon_mode/1, + buckets_not_precreated_daemon_mode/1 ]). %%==================================================================== @@ -32,7 +34,9 @@ all() -> server_upsert_clean, server_all, server_process_events_put, - server_process_events_patch + server_process_events_patch, + buckets_precreated_non_daemon_mode, + buckets_not_precreated_daemon_mode ]. init_per_suite(Config) -> @@ -313,3 +317,45 @@ server_process_events_patch(_) -> trackEvents := false,trackEventsFallthrough := false, variations := [],version := 1}} ] = lists:sort(ldclient_storage_redis:all(default, features)). + +buckets_precreated_non_daemon_mode(_) -> + % Start a new instance with use_ldd: false (non-daemon mode) + Tag = non_daemon_test, + Options = #{ + feature_store => ldclient_storage_redis, + stream => false, + polling_update_requestor => ldclient_update_requestor_test, + redis_prefix => "non_daemon_test", + use_ldd => false, + cache_ttl => 0 + }, + ldclient:start_instance("", Tag, Options), + + % Verify buckets were pre-created by trying to create them again + {error, already_exists, _} = ldclient_storage_redis:create(Tag, features), + {error, already_exists, _} = ldclient_storage_redis:create(Tag, segments), + + % Clean up + ok = ldclient:stop_instance(Tag). + +buckets_not_precreated_daemon_mode(_) -> + % Start a new instance with use_ldd: true (daemon mode) + Tag = daemon_test, + Options = #{ + feature_store => ldclient_storage_redis, + stream => false, + polling_update_requestor => ldclient_update_requestor_test, + redis_prefix => "daemon_test", + use_ldd => true, + cache_ttl => 0 + }, + ldclient:start_instance("", Tag, Options), + + % Verify buckets were NOT pre-created by successfully creating them + ok = ldclient_storage_redis:create(Tag, features), + ok = ldclient_storage_redis:create(Tag, segments), + + % Clean up + ok = ldclient_storage_redis:empty(Tag, features), + ok = ldclient_storage_redis:empty(Tag, segments), + ok = ldclient:stop_instance(Tag). From 07156affe0b90882e5a1bfc914e5f061d1f6362b Mon Sep 17 00:00:00 2001 From: Ryan Lamb <4955475+kinyoklion@users.noreply.github.com> Date: Mon, 10 Nov 2025 13:48:36 -0800 Subject: [PATCH 2/2] 10 in 60 --- src/ldclient_instance_sup.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ldclient_instance_sup.erl b/src/ldclient_instance_sup.erl index 22b385e..25b1be4 100644 --- a/src/ldclient_instance_sup.erl +++ b/src/ldclient_instance_sup.erl @@ -43,10 +43,10 @@ start_link(SupName, UpdateSupName, UpdateWorkerModule, EventSupName, Tag) -> -spec init(Args :: term()) -> {ok, {{supervisor:strategy(), non_neg_integer(), pos_integer()}, [supervisor:child_spec()]}}. init([UpdateSupName, UpdateWorkerModule, EventSupName, Tag]) -> - % Allow up to 5 restarts in 60 seconds before giving up. + % Allow up to 10 restarts in 60 seconds before giving up. % This provides resilience for transient failures in child supervisors % (update processor, events, storage) while preventing infinite restart loops. - {ok, {{one_for_one, 5, 60}, children(UpdateSupName, UpdateWorkerModule, EventSupName, Tag)}}. + {ok, {{one_for_one, 10, 60}, children(UpdateSupName, UpdateWorkerModule, EventSupName, Tag)}}. %%=================================================================== %% Internal functions