diff --git a/src/common/src/common-bits-private.h b/src/common/src/common-bits-private.h new file mode 100644 index 00000000000..dda727b8631 --- /dev/null +++ b/src/common/src/common-bits-private.h @@ -0,0 +1,50 @@ +/* + * Copyright 2009-present MongoDB, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "common-prelude.h" + +#ifndef MONGO_C_DRIVER_COMMON_BITS_PRIVATE_H +#define MONGO_C_DRIVER_COMMON_BITS_PRIVATE_H + +#include + + +// Round up to the next power of two uint32_t value. Saturates on overflow. +static BSON_INLINE uint32_t +mcommon_next_power_of_two_u32 (uint32_t v) +{ + if (v == 0) { + return 1; + } + + // https://graphics.stanford.edu/%7Eseander/bithacks.html#RoundUpPowerOf2 + v--; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v++; + + if (v == 0) { + return UINT32_MAX; + } else { + return v; + } +} + + +#endif /* MONGO_C_DRIVER_COMMON_BITS_PRIVATE_H */ diff --git a/src/common/src/common-json-private.h b/src/common/src/common-json-private.h new file mode 100644 index 00000000000..d8e98980305 --- /dev/null +++ b/src/common/src/common-json-private.h @@ -0,0 +1,397 @@ +/* + * Copyright 2009-present MongoDB, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "common-prelude.h" + +#ifndef MONGO_C_DRIVER_COMMON_JSON_PRIVATE_H +#define MONGO_C_DRIVER_COMMON_JSON_PRIVATE_H + +#include "common-string-private.h" + +#define mcommon_iso8601_string_append COMMON_NAME (iso8601_string_append) +#define mcommon_json_append_escaped COMMON_NAME (json_append_escaped) +#define mcommon_json_append_value_double COMMON_NAME (json_append_value_double) +#define mcommon_json_append_value_decimal128 COMMON_NAME (json_append_value_decimal128) +#define mcommon_json_append_value_oid COMMON_NAME (json_append_value_oid) +#define mcommon_json_append_value_binary COMMON_NAME (json_append_value_binary) +#define mcommon_json_append_value_date_time COMMON_NAME (json_append_value_date_time) +#define mcommon_json_append_value_timestamp COMMON_NAME (json_append_value_timestamp) +#define mcommon_json_append_value_regex COMMON_NAME (json_append_value_regex) +#define mcommon_json_append_value_dbpointer COMMON_NAME (json_append_value_dbpointer) +#define mcommon_json_append_value_code COMMON_NAME (json_append_value_code) +#define mcommon_json_append_value_codewscope COMMON_NAME (json_append_value_codewscope) +#define mcommon_json_append_value_symbol COMMON_NAME (json_append_value_symbol) +#define mcommon_json_append_bson_values COMMON_NAME (json_append_bson_values) +#define mcommon_json_append_bson_document COMMON_NAME (json_append_bson_document) +#define mcommon_json_append_bson_array COMMON_NAME (json_append_bson_array) + +// Needed by libbson and common-json +#ifndef BSON_MAX_RECURSION +#define BSON_MAX_RECURSION 200 +#endif + +// Needed by libbson and common-json +#define BSON_REGEX_OPTIONS_SORTED "ilmsux" + +/** + * @brief Append an ISO 8601 formatted date, given 64-bit milliseconds since the epoch + * @param append A bounded string append, initialized with mcommon_string_set_append() + * @param msec_since_epoch Milliseconds since Jan 1 1970 UTC + * @returns true on success, false if this 'append' has exceeded its max length + */ +bool +mcommon_iso8601_string_append (mcommon_string_append_t *append, int64_t msec_since_epoch); + +/** + * @brief Append a UTF-8 string with all special characters escaped + * + * @param append A bounded string append, initialized with mcommon_string_set_append() + * @param str UTF-8 string to escape and append + * @param len Length of 'str' in bytes + * @param allow_nul true if internal "00" bytes or "C0 80" sequences should be encoded as "\u0000", false to treat + * them as invalid data + * @returns true on success, false if this 'append' has exceeded its max length or if we encountered invalid UTF-8 or + * disallowed NUL bytes in 'str' + * + * The string may include internal NUL characters. It does not need to be NUL terminated. + * The two-byte sequence "C0 80" is also interpreted as an internal NUL, for historical reasons. This sequence is + * considered invalid according to RFC3629. + */ +bool +mcommon_json_append_escaped (mcommon_string_append_t *append, const char *str, uint32_t len, bool allow_nul); + +/** + * @brief Append a comma separator string to appear between values + * @param append A bounded string append, initialized with mcommon_string_set_append() + * @returns true on success, false if this 'append' has exceeded its max length + */ +static BSON_INLINE bool +mcommon_json_append_separator (mcommon_string_append_t *append) +{ + return mcommon_string_append (append, ", "); +} + +/** + * @brief Append a quoted and escaped key and key-value separator + * @param append A bounded string append, initialized with mcommon_string_set_append() + * @param str UTF-8 string to escape and append + * @param len Length of 'str' in bytes + * @returns true on success, false if this 'append' has exceeded its max length or if we encountered invalid UTF-8 or + * disallowed NUL bytes in 'str' + * + * See mcommon_json_append_escaped. NUL values in keys are never allowed. + */ +static BSON_INLINE bool +mcommon_json_append_key (mcommon_string_append_t *append, const char *str, uint32_t len) +{ + return mcommon_string_append (append, "\"") && mcommon_json_append_escaped (append, str, len, false) && + mcommon_string_append (append, "\" : "); +} + +/** + * @brief Append a quoted and escaped string + * @param append A bounded string append, initialized with mcommon_string_set_append() + * @param str UTF-8 string to escape and append + * @param len Length of 'str' in bytes + * @param allow_nul true if internal "00" bytes or "C0 80" sequences should be encoded as "\u0000", false to treat them + * as invalid data + * @returns true on success, false if this 'append' has exceeded its max length or if we encountered invalid UTF-8 or + * disallowed NUL bytes in 'str' + * + * See mcommon_json_append_escaped. + */ +static BSON_INLINE bool +mcommon_json_append_value_utf8 (mcommon_string_append_t *append, const char *str, uint32_t len, bool allow_nul) +{ + return mcommon_string_append (append, "\"") && mcommon_json_append_escaped (append, str, len, allow_nul) && + mcommon_string_append (append, "\""); +} + +/** + * @brief Append an int32_t value, serialized according to a bson_json_mode_t + * @param append A bounded string append, initialized with mcommon_string_set_append() + * @param value Integer value + * @param mode One of the JSON serialization modes, as a bson_json_mode_t. + * @returns true on success, false if this 'append' has exceeded its max length + */ +static BSON_INLINE bool +mcommon_json_append_value_int32 (mcommon_string_append_t *append, int32_t value, bson_json_mode_t mode) +{ + return mode == BSON_JSON_MODE_CANONICAL + ? mcommon_string_append_printf (append, "{ \"$numberInt\" : \"%" PRId32 "\" }", value) + : mcommon_string_append_printf (append, "%" PRId32, value); +} + +/** + * @brief Append an int64_t value, serialized according to a bson_json_mode_t + * @param append A bounded string append, initialized with mcommon_string_set_append() + * @param value Integer value + * @param mode One of the JSON serialization modes, as a bson_json_mode_t. + * @returns true on success, false if this 'append' has exceeded its max length + */ +static BSON_INLINE bool +mcommon_json_append_value_int64 (mcommon_string_append_t *append, int64_t value, bson_json_mode_t mode) +{ + return mode == BSON_JSON_MODE_CANONICAL + ? mcommon_string_append_printf (append, "{ \"$numberLong\" : \"%" PRId64 "\" }", value) + : mcommon_string_append_printf (append, "%" PRId64, value); +} + +/** + * @brief Append a JSON compatible bool value + * @param append A bounded string append, initialized with mcommon_string_set_append() + * @param bool Boolean value + * @returns true on success, false if this 'append' has exceeded its max length + */ +static BSON_INLINE bool +mcommon_json_append_value_bool (mcommon_string_append_t *append, bool value) +{ + return mcommon_string_append (append, value ? "true" : "false"); +} + +/** + * @brief Append an $undefined value + * @param append A bounded string append, initialized with mcommon_string_set_append() + * @returns true on success, false if this 'append' has exceeded its max length + */ +static BSON_INLINE bool +mcommon_json_append_value_undefined (mcommon_string_append_t *append) +{ + return mcommon_string_append (append, "{ \"$undefined\" : true }"); +} + +/** + * @brief Append a null value + * @param append A bounded string append, initialized with mcommon_string_set_append() + * @returns true on success, false if this 'append' has exceeded its max length + */ +static BSON_INLINE bool +mcommon_json_append_value_null (mcommon_string_append_t *append) +{ + return mcommon_string_append (append, "null"); +} + +/** + * @brief Append a $minKey value + * @param append A bounded string append, initialized with mcommon_string_set_append() + * @returns true on success, false if this 'append' has exceeded its max length + */ +static BSON_INLINE bool +mcommon_json_append_value_minkey (mcommon_string_append_t *append) +{ + return mcommon_string_append (append, "{ \"$minKey\" : 1 }"); +} + +/** + * @brief Append a $maxKey value + * @param append A bounded string append, initialized with mcommon_string_set_append() + * @returns true on success, false if this 'append' has exceeded its max length + */ +static BSON_INLINE bool +mcommon_json_append_value_maxkey (mcommon_string_append_t *append) +{ + return mcommon_string_append (append, "{ \"$maxKey\" : 1 }"); +} + +/** + * @brief Append a double-precision floating point value + * @param append A bounded string append, initialized with mcommon_string_set_append() + * @param value Double-precision floating point value + * @param mode One of the JSON serialization modes, as a bson_json_mode_t. + * @returns true on success, false if this 'append' has exceeded its max length + */ +bool +mcommon_json_append_value_double (mcommon_string_append_t *append, double value, bson_json_mode_t mode); + +/** + * @brief Append a decimal128 value + * @param append A bounded string append, initialized with mcommon_string_set_append() + * @param value decimal128 value to copy + * @returns true on success, false if this 'append' has exceeded its max length + */ +bool +mcommon_json_append_value_decimal128 (mcommon_string_append_t *append, const bson_decimal128_t *value); + +/** + * @brief Append the $oid JSON serialization of an ObjectId value + * @param append A bounded string append, initialized with mcommon_string_set_append() + * @param value bson_oid_t value to copy + * @returns true on success, false if this 'append' has exceeded its max length + */ +bool +mcommon_json_append_value_oid (mcommon_string_append_t *append, const bson_oid_t *value); + +/** + * @brief Append the JSON serialization of a BSON binary value + * @param append A bounded string append, initialized with mcommon_string_set_append() + * @param subtype Subtype code, identifying the format within the base64-encoded binary block + * @param bytes Bytes to be base64 encoded + * @param byte_count Number of bytes + * @param mode One of the JSON serialization modes, as a bson_json_mode_t + * @returns true on success, false if this 'append' has exceeded its max length + */ +bool +mcommon_json_append_value_binary (mcommon_string_append_t *append, + bson_subtype_t subtype, + const uint8_t *bytes, + uint32_t byte_count, + bson_json_mode_t mode); + +/** + * @brief Append the JSON serialization of a BSON date and time + * @param append A bounded string append, initialized with mcommon_string_set_append() + * @param msec_since_epoch Milliseconds since Jan 1 1970 + * @param mode One of the JSON serialization modes, as a bson_json_mode_t + * @returns true on success, false if this 'append' has exceeded its max length + */ +bool +mcommon_json_append_value_date_time (mcommon_string_append_t *append, int64_t msec_since_epoch, bson_json_mode_t mode); + +/** + * @brief Append the JSON serialization of a BSON timestamp value + * @param append A bounded string append, initialized with mcommon_string_set_append() + * @param timestamp 32-bit timestamp value + * @param increment 32-bit increment value + * @returns true on success, false if this 'append' has exceeded its max length + */ +bool +mcommon_json_append_value_timestamp (mcommon_string_append_t *append, uint32_t timestamp, uint32_t increment); + +/** + * @brief Append the JSON serialization of a BSON regular expression + * @param append A bounded string append, initialized with mcommon_string_set_append() + * @param pattern Regular expression pattern, as a UTF-8 string + * @param pattern_len Length of pattern string, in bytes + * @param options Regular expression options, as a UTF-8 string + * @param options_len Length of the options string, in bytes + * @param mode One of the JSON serialization modes, as a bson_json_mode_t + * @returns true on success, false if this 'append' has exceeded its max length + */ +bool +mcommon_json_append_value_regex (mcommon_string_append_t *append, + const char *pattern, + uint32_t pattern_len, + const char *options, + size_t options_len, + bson_json_mode_t mode); + +/** + * @brief Append the JSON serialization of a BSON legacy DBPointer + * @param append A bounded string append, initialized with mcommon_string_set_append() + * @param collection Collection name, as a UTF-8 string + * @param collection_len Length of collection name string, in bytes + * @param oid Optional ObjectId reference, or NULL + * @param mode One of the JSON serialization modes, as a bson_json_mode_t + * @returns true on success, false if this 'append' has exceeded its max length + */ +bool +mcommon_json_append_value_dbpointer (mcommon_string_append_t *append, + const char *collection, + uint32_t collection_len, + const bson_oid_t *oid, + bson_json_mode_t mode); + +/** + * @brief Append the JSON serialization of a BSON legacy code object + * @param append A bounded string append, initialized with mcommon_string_set_append() + * @param code Code string, in UTF-8 + * @param code_len Length of code string, in bytes + * @returns true on success, false if this 'append' has exceeded its max length + */ +bool +mcommon_json_append_value_code (mcommon_string_append_t *append, const char *code, uint32_t code_len); + +/** + * @brief Append the JSON serialization of a BSON legacy code-with-scope object + * @param append A bounded string append, initialized with mcommon_string_set_append() + * @param code Code string, in UTF-8 + * @param code_len Length of code string, in bytes + * @param scope Scope as a bson_t document + * @param mode One of the JSON serialization modes, as a bson_json_mode_t + * @param max_depth Maximum allowed number of document/array nesting levels below this one + * @returns true if the input bson was valid, even if we reached max length. false on invalid BSON. + */ +bool +mcommon_json_append_value_codewscope (mcommon_string_append_t *append, + const char *code, + uint32_t code_len, + const bson_t *scope, + bson_json_mode_t mode, + unsigned max_depth); + +/** + * @brief Append the JSON serialization of a BSON legacy symbol object + * @param append A bounded string append, initialized with mcommon_string_set_append() + * @param symbol Symbol string, in UTF-8 + * @param symbol_len Length of symbol string, in bytes + * @param mode One of the JSON serialization modes, as a bson_json_mode_t. + * @returns true on success, false if this 'append' has exceeded its max length + */ +bool +mcommon_json_append_value_symbol (mcommon_string_append_t *append, + const char *symbol, + uint32_t symbol_len, + bson_json_mode_t mode); + +/** + * @brief Append all JSON-serialized values from a bson_t + * @param append A bounded string append, initialized with mcommon_string_set_append() + * @param bson bson_t document or array + * @param mode One of the JSON serialization modes, as a bson_json_mode_t + * @param has_keys true if this is a document, false if this is an array + * @param max_depth Maximum allowed number of document/array nesting levels below this one + * @returns true if the input bson was valid, even if we reached max length. false on invalid BSON. + * + * This generates keys, values, and separators but does not enclose the result in {} or []. + * Note that the return value reflects the status of BSON decoding, not string appending. + * The append status can be read using mcommon_string_status_from_append() if needed. + * If encoding was stopped early due to the max depth limit or max length, invalid input may go unnoticed. + */ +bool +mcommon_json_append_bson_values ( + mcommon_string_append_t *append, const bson_t *bson, bson_json_mode_t mode, bool has_keys, unsigned max_depth); + +/** + * @brief Append a BSON document serialized as a JSON document + * @param append A bounded string append, initialized with mcommon_string_set_append() + * @param bson bson_t document + * @param mode One of the JSON serialization modes, as a bson_json_mode_t + * @param max_depth Maximum allowed number of document/array nesting levels *including* this one. If zero, appends "{ + * ... }". + * @returns true if the input bson was valid, even if we reached max length. false on invalid BSON. + */ +bool +mcommon_json_append_bson_document (mcommon_string_append_t *append, + const bson_t *bson, + bson_json_mode_t mode, + unsigned max_depth); + +/** + * @brief Append a BSON document serialized as a JSON array + * @param append A bounded string append, initialized with mcommon_string_set_append() + * @param bson bson_t to interpret as an array + * @param mode One of the JSON serialization modes, as a bson_json_mode_t + * @param max_depth Maximum allowed number of document/array nesting levels *including* this one. If zero, appends "[ + * ... ]". + * @returns true if the input bson was valid, even if we reached max length. false on invalid BSON. + */ +bool +mcommon_json_append_bson_array (mcommon_string_append_t *append, + const bson_t *bson, + bson_json_mode_t mode, + unsigned max_depth); + +#endif /* MONGO_C_DRIVER_COMMON_JSON_PRIVATE_H */ diff --git a/src/common/src/common-json.c b/src/common/src/common-json.c new file mode 100644 index 00000000000..70573b9f3d4 --- /dev/null +++ b/src/common/src/common-json.c @@ -0,0 +1,766 @@ +/* + * Copyright 2009-present MongoDB, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include +#include +#include +#include +#include +#include + + +typedef struct { + mcommon_string_append_t *append; + unsigned max_depth; + bson_json_mode_t mode; + bool has_keys; + bool not_first_item; + bool is_corrupt; +} mcommon_json_append_visit_t; + + +static bool +mcommon_json_append_visit_utf8 ( + const bson_iter_t *iter, const char *key, size_t v_utf8_len, const char *v_utf8, void *data) +{ + mcommon_json_append_visit_t *state = data; + BSON_UNUSED (iter); + BSON_UNUSED (key); + if (!mcommon_in_range_unsigned (uint32_t, v_utf8_len)) { + mcommon_string_append_overflow (state->append); + return true; + } + return !mcommon_json_append_value_utf8 (state->append, v_utf8, (uint32_t) v_utf8_len, true); +} + +static bool +mcommon_json_append_visit_int32 (const bson_iter_t *iter, const char *key, int32_t v_int32, void *data) +{ + mcommon_json_append_visit_t *state = data; + BSON_UNUSED (iter); + BSON_UNUSED (key); + return !mcommon_json_append_value_int32 (state->append, v_int32, state->mode); +} + +static bool +mcommon_json_append_visit_int64 (const bson_iter_t *iter, const char *key, int64_t v_int64, void *data) +{ + mcommon_json_append_visit_t *state = data; + BSON_UNUSED (iter); + BSON_UNUSED (key); + return !mcommon_json_append_value_int64 (state->append, v_int64, state->mode); +} + +static bool +mcommon_json_append_visit_decimal128 (const bson_iter_t *iter, + const char *key, + const bson_decimal128_t *value, + void *data) +{ + mcommon_json_append_visit_t *state = data; + BSON_UNUSED (iter); + BSON_UNUSED (key); + return !mcommon_json_append_value_decimal128 (state->append, value); +} + +static bool +mcommon_json_append_visit_double (const bson_iter_t *iter, const char *key, double v_double, void *data) +{ + mcommon_json_append_visit_t *state = data; + BSON_UNUSED (iter); + BSON_UNUSED (key); + return !mcommon_json_append_value_double (state->append, v_double, state->mode); +} + +static bool +mcommon_json_append_visit_undefined (const bson_iter_t *iter, const char *key, void *data) +{ + mcommon_json_append_visit_t *state = data; + BSON_UNUSED (iter); + BSON_UNUSED (key); + return !mcommon_json_append_value_undefined (state->append); +} + +static bool +mcommon_json_append_visit_null (const bson_iter_t *iter, const char *key, void *data) +{ + mcommon_json_append_visit_t *state = data; + BSON_UNUSED (iter); + BSON_UNUSED (key); + return !mcommon_json_append_value_null (state->append); +} + +static bool +mcommon_json_append_visit_oid (const bson_iter_t *iter, const char *key, const bson_oid_t *oid, void *data) +{ + mcommon_json_append_visit_t *state = data; + BSON_UNUSED (iter); + BSON_UNUSED (key); + return !mcommon_json_append_value_oid (state->append, oid); +} + +static bool +mcommon_json_append_visit_binary (const bson_iter_t *iter, + const char *key, + bson_subtype_t v_subtype, + size_t v_binary_len, + const uint8_t *v_binary, + void *data) +{ + mcommon_json_append_visit_t *state = data; + BSON_UNUSED (iter); + BSON_UNUSED (key); + if (!mcommon_in_range_unsigned (uint32_t, v_binary_len)) { + mcommon_string_append_overflow (state->append); + return true; + } + return !mcommon_json_append_value_binary (state->append, v_subtype, v_binary, (uint32_t) v_binary_len, state->mode); +} + +static bool +mcommon_json_append_visit_bool (const bson_iter_t *iter, const char *key, bool v_bool, void *data) +{ + mcommon_json_append_visit_t *state = data; + BSON_UNUSED (iter); + BSON_UNUSED (key); + return !mcommon_json_append_value_bool (state->append, v_bool); +} + +static bool +mcommon_json_append_visit_date_time (const bson_iter_t *iter, const char *key, int64_t msec_since_epoch, void *data) +{ + mcommon_json_append_visit_t *state = data; + BSON_UNUSED (iter); + BSON_UNUSED (key); + return !mcommon_json_append_value_date_time (state->append, msec_since_epoch, state->mode); +} + +static bool +mcommon_json_append_visit_regex ( + const bson_iter_t *iter, const char *key, const char *v_regex, const char *v_options, void *data) +{ + mcommon_json_append_visit_t *state = data; + size_t v_regex_len = strlen (v_regex); + size_t v_options_len = strlen (v_options); + BSON_UNUSED (iter); + BSON_UNUSED (key); + if (!mcommon_in_range_unsigned (uint32_t, v_regex_len)) { + mcommon_string_append_overflow (state->append); + return true; + } + return !mcommon_json_append_value_regex ( + state->append, v_regex, (uint32_t) v_regex_len, v_options, v_options_len, state->mode); +} + +static bool +mcommon_json_append_visit_timestamp ( + const bson_iter_t *iter, const char *key, uint32_t v_timestamp, uint32_t v_increment, void *data) +{ + mcommon_json_append_visit_t *state = data; + BSON_UNUSED (iter); + BSON_UNUSED (key); + return !mcommon_json_append_value_timestamp (state->append, v_timestamp, v_increment); +} + +static bool +mcommon_json_append_visit_dbpointer (const bson_iter_t *iter, + const char *key, + size_t v_collection_len, + const char *v_collection, + const bson_oid_t *v_oid, + void *data) +{ + mcommon_json_append_visit_t *state = data; + BSON_UNUSED (iter); + BSON_UNUSED (key); + if (!mcommon_in_range_unsigned (uint32_t, v_collection_len)) { + mcommon_string_append_overflow (state->append); + return true; + } + return !mcommon_json_append_value_dbpointer ( + state->append, v_collection, (uint32_t) v_collection_len, v_oid, state->mode); +} + +static bool +mcommon_json_append_visit_minkey (const bson_iter_t *iter, const char *key, void *data) +{ + mcommon_json_append_visit_t *state = data; + BSON_UNUSED (iter); + BSON_UNUSED (key); + return !mcommon_json_append_value_minkey (state->append); +} + +static bool +mcommon_json_append_visit_maxkey (const bson_iter_t *iter, const char *key, void *data) +{ + mcommon_json_append_visit_t *state = data; + BSON_UNUSED (iter); + BSON_UNUSED (key); + return !mcommon_json_append_value_maxkey (state->append); +} + +static bool +mcommon_json_append_visit_before (const bson_iter_t *iter, const char *key, void *data) +{ + mcommon_json_append_visit_t *state = data; + BSON_UNUSED (iter); + + if (!mcommon_string_status_from_append (state->append)) { + return true; + } + + if (state->not_first_item) { + if (!mcommon_json_append_separator (state->append)) { + return true; + } + } else { + state->not_first_item = true; + } + + if (state->has_keys) { + size_t key_len = strlen (key); + if (!mcommon_in_range_unsigned (uint32_t, key_len)) { + mcommon_string_append_overflow (state->append); + return true; + } + if (!mcommon_json_append_key (state->append, key, (uint32_t) key_len)) { + return true; + } + } + + return false; +} + +static bool +mcommon_json_append_visit_after (const bson_iter_t *iter, const char *key, void *data) +{ + mcommon_json_append_visit_t *state = data; + BSON_UNUSED (iter); + BSON_UNUSED (key); + return !mcommon_string_status_from_append (state->append); +} + +static void +mcommon_json_append_visit_corrupt (const bson_iter_t *iter, void *data) +{ + mcommon_json_append_visit_t *state = data; + BSON_UNUSED (iter); + state->is_corrupt = true; +} + +static bool +mcommon_json_append_visit_code ( + const bson_iter_t *iter, const char *key, size_t v_code_len, const char *v_code, void *data) +{ + mcommon_json_append_visit_t *state = data; + BSON_UNUSED (iter); + BSON_UNUSED (key); + if (!mcommon_in_range_unsigned (uint32_t, v_code_len)) { + mcommon_string_append_overflow (state->append); + return true; + } + return !mcommon_json_append_value_code (state->append, v_code, (uint32_t) v_code_len); +} + +static bool +mcommon_json_append_visit_symbol ( + const bson_iter_t *iter, const char *key, size_t v_symbol_len, const char *v_symbol, void *data) +{ + mcommon_json_append_visit_t *state = data; + BSON_UNUSED (iter); + BSON_UNUSED (key); + if (!mcommon_in_range_unsigned (uint32_t, v_symbol_len)) { + mcommon_string_append_overflow (state->append); + return true; + } + return !mcommon_json_append_value_symbol (state->append, v_symbol, (uint32_t) v_symbol_len, state->mode); +} + +static bool +mcommon_json_append_visit_codewscope ( + const bson_iter_t *iter, const char *key, size_t v_code_len, const char *v_code, const bson_t *v_scope, void *data) +{ + mcommon_json_append_visit_t *state = data; + BSON_UNUSED (iter); + BSON_UNUSED (key); + if (!mcommon_in_range_unsigned (uint32_t, v_code_len)) { + mcommon_string_append_overflow (state->append); + return true; + } + if (mcommon_json_append_value_codewscope ( + state->append, v_code, (uint32_t) v_code_len, v_scope, state->mode, state->max_depth)) { + return !mcommon_string_status_from_append (state->append); + } else { + state->is_corrupt = true; + return true; + } +} + +static bool +mcommon_json_append_visit_document (const bson_iter_t *iter, const char *key, const bson_t *v_document, void *data) +{ + mcommon_json_append_visit_t *state = data; + BSON_UNUSED (iter); + BSON_UNUSED (key); + if (mcommon_json_append_bson_document (state->append, v_document, state->mode, state->max_depth)) { + return !mcommon_string_status_from_append (state->append); + } else { + state->is_corrupt = true; + return true; + } +} + +static bool +mcommon_json_append_visit_array (const bson_iter_t *iter, const char *key, const bson_t *v_array, void *data) +{ + mcommon_json_append_visit_t *state = data; + BSON_UNUSED (iter); + BSON_UNUSED (key); + if (mcommon_json_append_bson_array (state->append, v_array, state->mode, state->max_depth)) { + return !mcommon_string_status_from_append (state->append); + } else { + state->is_corrupt = true; + return true; + } +} + +bool +mcommon_json_append_bson_values ( + mcommon_string_append_t *append, const bson_t *bson, bson_json_mode_t mode, bool has_keys, unsigned max_depth) +{ + mcommon_json_append_visit_t state = {.append = append, .max_depth = max_depth, .mode = mode, .has_keys = has_keys}; + bson_iter_t iter; + if (!bson_iter_init (&iter, bson)) { + return false; + } + static const bson_visitor_t visitors = { + mcommon_json_append_visit_before, mcommon_json_append_visit_after, mcommon_json_append_visit_corrupt, + mcommon_json_append_visit_double, mcommon_json_append_visit_utf8, mcommon_json_append_visit_document, + mcommon_json_append_visit_array, mcommon_json_append_visit_binary, mcommon_json_append_visit_undefined, + mcommon_json_append_visit_oid, mcommon_json_append_visit_bool, mcommon_json_append_visit_date_time, + mcommon_json_append_visit_null, mcommon_json_append_visit_regex, mcommon_json_append_visit_dbpointer, + mcommon_json_append_visit_code, mcommon_json_append_visit_symbol, mcommon_json_append_visit_codewscope, + mcommon_json_append_visit_int32, mcommon_json_append_visit_timestamp, mcommon_json_append_visit_int64, + mcommon_json_append_visit_maxkey, mcommon_json_append_visit_minkey, NULL, /* visit_unsupported_type */ + mcommon_json_append_visit_decimal128, + }; + /* Note that early exit from bson_iter_visit_all does not affect our success, which is based only on BSON validity. + * BSON errors will set is_corrupt if they prevent full traversal, but non-fatal parse errors (like invalid UTF-8) + * may let bson_iter_visit_all() succeed while leaving an error status in iter.err_off. */ + (void) bson_iter_visit_all (&iter, &visitors, &state); + return iter.err_off == 0 && !state.is_corrupt; +} + +static BSON_INLINE bool +mcommon_json_append_bson_container (mcommon_string_append_t *append, + const bson_t *bson, + bson_json_mode_t mode, + unsigned max_depth, + bool has_keys, + const char *empty, + const char *begin_non_empty, + const char *end_non_empty, + const char *omitted) +{ + // Note that the return value here is bson validity, not append status. + if (bson_empty (bson)) { + (void) mcommon_string_append (append, empty); + return true; + } else if (max_depth == 0) { + (void) mcommon_string_append (append, omitted); + return true; + } else { + (void) mcommon_string_append (append, begin_non_empty); + bool result = mcommon_json_append_bson_values (append, bson, mode, has_keys, max_depth - 1u); + (void) mcommon_string_append (append, end_non_empty); + return result; + } +} + +bool +mcommon_json_append_bson_document (mcommon_string_append_t *append, + const bson_t *bson, + bson_json_mode_t mode, + unsigned max_depth) +{ + return mcommon_json_append_bson_container (append, bson, mode, max_depth, true, "{ }", "{ ", " }", "{ ... }"); +} + +bool +mcommon_json_append_bson_array (mcommon_string_append_t *append, + const bson_t *bson, + bson_json_mode_t mode, + unsigned max_depth) +{ + return mcommon_json_append_bson_container (append, bson, mode, max_depth, false, "[ ]", "[ ", " ]", "[ ... ]"); +} + +/** + * @brief Like mcommon_string_append_printf (append, "\\u%04x", c) but intended to be more optimizable. + */ +static BSON_INLINE bool +mcommon_json_append_hex_char (mcommon_string_append_t *append, uint16_t c) +{ + static const char digit_table[] = "0123456789abcdef"; + char hex_char[6]; + hex_char[0] = '\\'; + hex_char[1] = 'u'; + hex_char[2] = digit_table[0xf & (c >> 12)]; + hex_char[3] = digit_table[0xf & (c >> 8)]; + hex_char[4] = digit_table[0xf & (c >> 4)]; + hex_char[5] = digit_table[0xf & c]; + return mcommon_string_append_bytes (append, hex_char, 6); +} + +/** + * @brief Test whether a byte may require special processing in mcommon_json_append_escaped. + * @returns true for bytes in the range 0x00 - 0x1F, '\\', '\"', and 0xC0. + */ +static BSON_INLINE bool +mcommon_json_append_escaped_considers_byte_as_special (uint8_t byte) +{ + static const uint64_t table[4] = { + 0x00000004ffffffffull, // 0x00-0x1F (control), 0x22 (") + 0x0000000010000000ull, // 0x5C (') + 0x0000000000000000ull, // none + 0x0000000000000001ull, // 0xC0 (Possible two-byte NUL) + }; + return 0 != (table[byte >> 6] & (1ull << (byte & 0x3f))); +} + +/** + * @brief Measure the number of consecutive non-special bytes. + */ +static BSON_INLINE uint32_t +mcommon_json_append_escaped_count_non_special_bytes (const char *str, uint32_t len) +{ + uint32_t result = 0; + // Good candidate for architecture-specific optimizations. + // SSE4 strcspn is nearly what we want, but our table of special bytes would be too large (34 > 16) + while (len) { + if (mcommon_json_append_escaped_considers_byte_as_special ((uint8_t) *str)) { + break; + } + result++; + str++; + len--; + } + return result; +} + +bool +mcommon_json_append_escaped (mcommon_string_append_t *append, const char *str, uint32_t len, bool allow_nul) +{ + BSON_ASSERT_PARAM (append); + BSON_ASSERT_PARAM (str); + + // Repeatedly handle runs of zero or more non-special bytes punctuated by a potentially-special sequence. + uint32_t non_special_len = mcommon_json_append_escaped_count_non_special_bytes (str, len); + while (len) { + if (!mcommon_string_append_bytes (append, str, non_special_len)) { + return false; + } + str += non_special_len; + len -= non_special_len; + if (len) { + char c = *str; + switch (c) { + case '"': + if (!mcommon_string_append (append, "\\\"")) { + return false; + } + break; + case '\\': + if (!mcommon_string_append (append, "\\\\")) { + return false; + } + break; + case '\b': + if (!mcommon_string_append (append, "\\b")) { + return false; + } + break; + case '\f': + if (!mcommon_string_append (append, "\\f")) { + return false; + } + break; + case '\n': + if (!mcommon_string_append (append, "\\n")) { + return false; + } + break; + case '\r': + if (!mcommon_string_append (append, "\\r")) { + return false; + } + break; + case '\t': + if (!mcommon_string_append (append, "\\t")) { + return false; + } + break; + case '\0': + if (!allow_nul || !mcommon_json_append_hex_char (append, 0)) { + return false; + } + break; + case '\xc0': // Could be a 2-byte NUL, or could begin another non-special run + if (len >= 2 && str[1] == '\x80') { + if (!allow_nul || !mcommon_json_append_hex_char (append, 0)) { + return false; + } + str++; + len--; + } else { + // Wasn't "C0 80". Begin a non-special run with the "C0" byte, which is usually special. + non_special_len = mcommon_json_append_escaped_count_non_special_bytes (str + 1, len - 1) + 1; + continue; + } + break; + default: + BSON_ASSERT (c > 0x00 && c < 0x20); + if (!mcommon_json_append_hex_char (append, c)) { + return false; + } + break; + } + str++; + len--; + non_special_len = mcommon_json_append_escaped_count_non_special_bytes (str, len); + } + } + return mcommon_string_status_from_append (append); +} + +bool +mcommon_iso8601_string_append (mcommon_string_append_t *append, int64_t msec_since_epoch) +{ + time_t t; + int64_t msec_part; + char buf[64]; + + msec_part = msec_since_epoch % 1000; + t = (time_t) (msec_since_epoch / 1000); + +#ifdef BSON_HAVE_GMTIME_R + { + struct tm posix_date; + gmtime_r (&t, &posix_date); + strftime (buf, sizeof buf, "%Y-%m-%dT%H:%M:%S", &posix_date); + } +#elif defined(_MSC_VER) + { + /* Windows gmtime_s is thread-safe */ + struct tm time_buf; + gmtime_s (&time_buf, &t); + strftime (buf, sizeof buf, "%Y-%m-%dT%H:%M:%S", &time_buf); + } +#else + strftime (buf, sizeof buf, "%Y-%m-%dT%H:%M:%S", gmtime (&t)); +#endif + + if (msec_part) { + return mcommon_string_append_printf (append, "%s.%03" PRId64 "Z", buf, msec_part); + } else { + return mcommon_string_append_printf (append, "%sZ", buf); + } +} + +bool +mcommon_json_append_value_double (mcommon_string_append_t *append, double value, bson_json_mode_t mode) +{ + /* Determine if legacy (i.e. unwrapped) output should be used. Relaxed mode + * will use this for nan and inf values, which we check manually since old + * platforms may not have isinf or isnan. */ + bool legacy = + mode == BSON_JSON_MODE_LEGACY || (mode == BSON_JSON_MODE_RELAXED && !(value != value || value * 0 != 0)); + + if (!legacy) { + mcommon_string_append (append, "{ \"$numberDouble\" : \""); + } + + if (!legacy && value != value) { + mcommon_string_append (append, "NaN"); + } else if (!legacy && value * 0 != 0) { + if (value > 0) { + mcommon_string_append (append, "Infinity"); + } else { + mcommon_string_append (append, "-Infinity"); + } + } else { + const mcommon_string_t *string = mcommon_string_from_append (append); + uint32_t start_len = string->len; + if (mcommon_string_append_printf (append, "%.20g", value)) { + /* ensure trailing ".0" to distinguish "3" from "3.0" */ + if (strspn (&string->str[start_len], "0123456789-") == string->len - start_len) { + mcommon_string_append (append, ".0"); + } + } + } + + if (!legacy) { + mcommon_string_append (append, "\" }"); + } + + return mcommon_string_status_from_append (append); +} + +bool +mcommon_json_append_value_decimal128 (mcommon_string_append_t *append, const bson_decimal128_t *value) +{ + char decimal128_string[BSON_DECIMAL128_STRING]; + bson_decimal128_to_string (value, decimal128_string); + + return mcommon_string_append (append, "{ \"$numberDecimal\" : \"") && + mcommon_string_append (append, decimal128_string) && mcommon_string_append (append, "\" }"); +} + +bool +mcommon_json_append_value_oid (mcommon_string_append_t *append, const bson_oid_t *value) +{ + return mcommon_string_append (append, "{ \"$oid\" : \"") && mcommon_string_append_oid_as_hex (append, value) && + mcommon_string_append (append, "\" }"); +} + +bool +mcommon_json_append_value_binary (mcommon_string_append_t *append, + bson_subtype_t subtype, + const uint8_t *bytes, + uint32_t byte_count, + bson_json_mode_t mode) +{ + if (mode == BSON_JSON_MODE_CANONICAL || mode == BSON_JSON_MODE_RELAXED) { + return mcommon_string_append (append, "{ \"$binary\" : { \"base64\" : \"") && + mcommon_string_append_base64_encode (append, bytes, byte_count) && + mcommon_string_append_printf (append, "\", \"subType\" : \"%02x\" } }", subtype); + } else { + return mcommon_string_append (append, "{ \"$binary\" : \"") && + mcommon_string_append_base64_encode (append, bytes, byte_count) && + mcommon_string_append_printf (append, "\", \"$type\" : \"%02x\" }", subtype); + } +} + +bool +mcommon_json_append_value_date_time (mcommon_string_append_t *append, int64_t msec_since_epoch, bson_json_mode_t mode) +{ + const int64_t y10k = 253402300800000; // 10000-01-01T00:00:00Z in milliseconds since the epoch. + + if (mode == BSON_JSON_MODE_CANONICAL || + (mode == BSON_JSON_MODE_RELAXED && (msec_since_epoch < 0 || msec_since_epoch >= y10k))) { + return mcommon_string_append_printf ( + append, "{ \"$date\" : { \"$numberLong\" : \"%" PRId64 "\" } }", msec_since_epoch); + } else if (mode == BSON_JSON_MODE_RELAXED) { + return mcommon_string_append (append, "{ \"$date\" : \"") && + mcommon_iso8601_string_append (append, msec_since_epoch) && mcommon_string_append (append, "\" }"); + } else { + return mcommon_string_append_printf (append, "{ \"$date\" : %" PRId64 " }", msec_since_epoch); + } +} + +bool +mcommon_json_append_value_timestamp (mcommon_string_append_t *append, uint32_t timestamp, uint32_t increment) +{ + BSON_ASSERT_PARAM (append); + return mcommon_string_append_printf ( + append, "{ \"$timestamp\" : { \"t\" : %u, \"i\" : %u } }", timestamp, increment); +} + +bool +mcommon_json_append_value_regex (mcommon_string_append_t *append, + const char *pattern, + uint32_t pattern_len, + const char *options, + size_t options_len, + bson_json_mode_t mode) +{ + if (mode == BSON_JSON_MODE_CANONICAL || mode == BSON_JSON_MODE_RELAXED) { + return mcommon_string_append (append, "{ \"$regularExpression\" : { \"pattern\" : \"") && + mcommon_json_append_escaped (append, pattern, pattern_len, false) && + mcommon_string_append (append, "\", \"options\" : \"") && + mcommon_string_append_selected_chars (append, BSON_REGEX_OPTIONS_SORTED, options, options_len) && + mcommon_string_append (append, "\" } }"); + } else { + return mcommon_string_append (append, "{ \"$regex\" : \"") && + mcommon_json_append_escaped (append, pattern, pattern_len, false) && + mcommon_string_append (append, "\", \"$options\" : \"") && + mcommon_string_append_selected_chars (append, BSON_REGEX_OPTIONS_SORTED, options, options_len) && + mcommon_string_append (append, "\" }"); + } +} + +bool +mcommon_json_append_value_dbpointer (mcommon_string_append_t *append, + const char *collection, + uint32_t collection_len, + const bson_oid_t *oid, + bson_json_mode_t mode) +{ + if (mode == BSON_JSON_MODE_CANONICAL || mode == BSON_JSON_MODE_RELAXED) { + return mcommon_string_append (append, "{ \"$dbPointer\" : { \"$ref\" : \"") && + mcommon_json_append_escaped (append, collection, collection_len, false) && + mcommon_string_append (append, "\"") && + (!oid || + (mcommon_string_append (append, ", \"$id\" : ") && mcommon_json_append_value_oid (append, oid))) && + mcommon_string_append (append, " } }"); + } else { + return mcommon_string_append (append, "{ \"$ref\" : \"") && + mcommon_json_append_escaped (append, collection, collection_len, false) && + mcommon_string_append (append, "\"") && + (!oid || + (mcommon_string_append (append, ", \"$id\" : \"") && mcommon_string_append_oid_as_hex (append, oid))) && + mcommon_string_append (append, "\" }"); + } +} + +bool +mcommon_json_append_value_code (mcommon_string_append_t *append, const char *code, uint32_t code_len) +{ + return mcommon_string_append (append, "{ \"$code\" : \"") && + mcommon_json_append_escaped (append, code, code_len, true) && mcommon_string_append (append, "\" }"); +} + +bool +mcommon_json_append_value_codewscope (mcommon_string_append_t *append, + const char *code, + uint32_t code_len, + const bson_t *scope, + bson_json_mode_t mode, + unsigned max_depth) +{ + // Note that the return value here is bson validity, not append status. + (void) mcommon_string_append (append, "{ \"$code\" : \""); + (void) mcommon_json_append_escaped (append, code, code_len, true); + (void) mcommon_string_append (append, "\", \"$scope\" : "); + bool result = mcommon_json_append_bson_document (append, scope, mode, max_depth); + (void) mcommon_string_append (append, " }"); + return result; +} + +bool +mcommon_json_append_value_symbol (mcommon_string_append_t *append, + const char *symbol, + uint32_t symbol_len, + bson_json_mode_t mode) +{ + if (mode == BSON_JSON_MODE_CANONICAL || mode == BSON_JSON_MODE_RELAXED) { + return mcommon_string_append (append, "{ \"$symbol\" : \"") && + mcommon_json_append_escaped (append, symbol, symbol_len, true) && mcommon_string_append (append, "\" }"); + } else { + return mcommon_json_append_value_utf8 (append, symbol, symbol_len, true); + } +} diff --git a/src/common/src/common-string-private.h b/src/common/src/common-string-private.h index 770497b8797..5134c8d58fb 100644 --- a/src/common/src/common-string-private.h +++ b/src/common/src/common-string-private.h @@ -20,75 +20,655 @@ #define MONGO_C_DRIVER_COMMON_STRING_PRIVATE_H #include -#include // BEGIN_IGNORE_DEPRECATIONS +#include +#include "common-cmp-private.h" -// mcommon_string_t is an internal string type intended to replace the deprecated bson_string_t. -// When bson_string_t is removed, migrate the implementation to mcommon_string_t. -typedef bson_string_t mcommon_string_t; +/* Until the deprecated bson_string_t is removed, this must have the same members in the same order, so we can safely + * cast between the two types. Afterward, we are free to modify the memory layout as needed. + * + * In mcommon_string_t, 'str' is guaranteed to be NUL terminated and SHOULD be valid UTF-8. mcommon_string_t operations + * MUST maintain the validity of valid UTF-8 strings. + * + * Unused portions of the buffer may be uninitialized, and must not be compared or copied. + * + * 'len' is measured in bytes, not including the NUL terminator. + * + * 'alloc' is the actual length of the bson_malloc() allocation in bytes, including the required space for NUL + * termination. + * + * When we use 'capacity', it refers to the largest 'len' that the buffer could store. alloc == capacity + 1. + */ +typedef struct mcommon_string_t { + char *str; + uint32_t len; + uint32_t alloc; +} mcommon_string_t; + +/* Parameters and outcome for a bounded append operation on a mcommon_string_t. Individual type-specific append + * functions can consume this struct to communicate bounds info. "max_len_exceeded" can be tested any time an + * algorithmic exit is convenient; the actual appended content will be limited by max_len. Truncation is guaranteed not + * to split a valid UTF-8 byte sequence. + * + * Members are here to support inline definitions; not intended for direct access. + * + * Multiple mcommon_string_append_t may simultaneously refer to the same 'string' but this usage is not recommended. + * + * 'max_len_exceeded' only includes operations undertaken on this specific mcommon_string_append_t. It will not be set + * if the string was already overlong, or if a different mcommon_string_append_t experiences an overage. + */ +typedef struct mcommon_string_append_t { + mcommon_string_t *_string; + uint32_t _max_len; + bool _max_len_exceeded; +} mcommon_string_append_t; + +#define mcommon_string_new_with_capacity COMMON_NAME (string_new_with_capacity) +#define mcommon_string_new_with_buffer COMMON_NAME (string_new_with_buffer) +#define mcommon_string_destroy COMMON_NAME (string_destroy) +#define mcommon_string_destroy_with_steal COMMON_NAME (string_destroy_with_steal) +#define mcommon_string_grow_to_capacity COMMON_NAME (string_grow_to_capacity) +#define mcommon_string_append_selected_chars COMMON_NAME (string_append_selected_chars) +#define mcommon_string_append_bytes_internal COMMON_NAME (string_append_bytes_internal) +#define mcommon_string_append_bytes_all_or_none COMMON_NAME (string_append_bytes_all_or_none) +#define mcommon_string_append_unichar_internal COMMON_NAME (string_append_unichar_internal) +#define mcommon_string_append_base64_encode COMMON_NAME (string_append_base64_encode) +#define mcommon_string_append_oid_as_hex COMMON_NAME (string_append_oid_as_hex) +#define mcommon_string_append_printf COMMON_NAME (string_append_printf) +#define mcommon_string_append_vprintf COMMON_NAME (string_append_vprintf) + +bool +mcommon_string_append_bytes_internal (mcommon_string_append_t *append, const char *str, uint32_t len); + +bool +mcommon_string_append_unichar_internal (mcommon_string_append_t *append, bson_unichar_t unichar); + +/** + * @brief Allocate a new mcommon_string_t with a copy of the supplied initializer string and an explicit buffer + * capacity. + * + * @param str Initializer string, should be valid UTF-8. + * @param length Length of initializer string, in bytes. + * @param min_capacity Minimum string capacity, in bytes, the buffer must be able to store without reallocating. Does + * not include the NUL terminator. Must be less than UINT32_MAX. + * @returns A new mcommon_string_t that must be freed with mcommon_string_destroy() or + * mcommon_string_destroy_with_steal() and bson_free(). It will hold 'str' in its entirety, even if the requested + * min_capacity was smaller. + */ +mcommon_string_t * +mcommon_string_new_with_capacity (const char *str, uint32_t length, uint32_t min_capacity); + +/** + * @brief Allocate a new mcommon_string_t with a copy of the supplied initializer string and a minimum-capacity buffer + * + * @param str NUL terminated string, should be valid UTF-8. Must be less than UINT32_MAX bytes long, overlong input + * causes a runtime assertion failure. + * @returns A new mcommon_string_t that must be freed with mcommon_string_destroy() or + * mcommon_string_destroy_with_steal() and bson_free(). + */ static BSON_INLINE mcommon_string_t * mcommon_string_new (const char *str) { - BEGIN_IGNORE_DEPRECATIONS - return bson_string_new (str); - END_IGNORE_DEPRECATIONS + BSON_ASSERT_PARAM (str); + size_t length = strlen (str); + BSON_ASSERT (mcommon_in_range_unsigned (uint32_t, length) && (uint32_t) length < UINT32_MAX); + return mcommon_string_new_with_capacity (str, (uint32_t) length, 0); } -static BSON_INLINE char * -mcommon_string_free (mcommon_string_t *string, bool free_segment) + +/** + * @brief Allocate a new mcommon_string_t, taking ownership of an existing buffer + * + * @param buffer Buffer to adopt, suitable for bson_free() and bson_realloc(). + * @param length Length of the string data, in bytes, not including the required NUL terminator. If string data is + * present, it should be valid UTF-8. + * @param alloc Actual allocated size of the buffer, in bytes, including room for NUL termination. + * @returns A new mcommon_string_t that must be freed with mcommon_string_destroy() or + * mcommon_string_destroy_with_steal() and bson_free(). + */ +mcommon_string_t * +mcommon_string_new_with_buffer (char *buffer, uint32_t length, uint32_t alloc); + +/** + * @brief Deallocate a mcommon_string_t and its internal buffer + * @param string String allocated with mcommon_string_new, or NULL. + */ +void +mcommon_string_destroy (mcommon_string_t *string); + +/** + * @brief Deallocate a mcommon_string_t and return its internal buffer as a NUL-terminated C string. + * @param string String allocated with mcommon_string_new, or NULL. + * @returns A freestanding NUL-terminated string in a buffer that must be freed with bson_free(), or NULL if 'string' + * was NULL. + */ +char * +mcommon_string_destroy_with_steal (mcommon_string_t *string); + +/** + * @brief Truncate the string to zero length without deallocating the buffer + * @param string String to clear + */ +static BSON_INLINE void +mcommon_string_clear (mcommon_string_t *string) +{ + BSON_ASSERT_PARAM (string); + string->len = 0; + string->str[0] = '\0'; +} + +/** + * @brief Test if the string has zero length + * @param string String to test + */ +static BSON_INLINE bool +mcommon_string_is_empty (const mcommon_string_t *string) +{ + BSON_ASSERT_PARAM (string); + return string->len == 0; +} + +/** + * @brief Test if the string begins with a C string + * @param string mcommon_string_t to test + * @param substring prefix to match, as a NUL terminated C string. + */ +static BSON_INLINE bool +mcommon_string_starts_with_str (const mcommon_string_t *string, const char *substring) +{ + BSON_ASSERT_PARAM (string); + BSON_ASSERT_PARAM (substring); + + size_t substring_len = strlen (substring); + uint32_t string_len = string->len; + + if (mcommon_in_range_unsigned (uint32_t, substring_len) && (uint32_t) substring_len <= string_len) { + return 0 == memcmp (string->str, substring, substring_len); + } else { + return false; + } +} + +/** + * @brief Test if the string ends with a C string + * @param string mcommon_string_t to test + * @param substring suffix to match, as a NUL terminated C string. + */ +static BSON_INLINE bool +mcommon_string_ends_with_str (const mcommon_string_t *string, const char *substring) { - BEGIN_IGNORE_DEPRECATIONS - return bson_string_free (string, free_segment); - END_IGNORE_DEPRECATIONS + BSON_ASSERT_PARAM (string); + BSON_ASSERT_PARAM (substring); + + size_t substring_len = strlen (substring); + uint32_t string_len = string->len; + + if (mcommon_in_range_unsigned (uint32_t, substring_len) && (uint32_t) substring_len <= string_len) { + uint32_t offset = string_len - (uint32_t) substring_len; + return 0 == memcmp (string->str + offset, substring, substring_len); + } else { + return false; + } } + +/** + * @brief Grow a mcommon_string_t buffer if necessary to ensure a minimum capacity + * + * @param string String allocated with mcommon_string_new + * @param capacity Minimum string length, in bytes, the buffer must be able to store without reallocating. Does not + * include the NUL terminator. Must be less than UINT32_MAX. + * + * If a reallocation is necessary, the actual allocation size will be chosen as the next highest power-of-two above the + * minimum needed to store 'capacity' as well as the NUL terminator. + */ +void +mcommon_string_grow_to_capacity (mcommon_string_t *string, uint32_t capacity); + +/** + * @brief Set an append operation for this string, with an explicit length limit + * @param string String allocated with mcommon_string_new + * @param new_append Pointer to an uninitialized mcommon_string_append_t + * @param max_len Maximum allowed length for the resulting string, in bytes. Must be less than UINT32_MAX. + * + * The mcommon_string_append_t does not need to be deallocated. It is no longer usable if the underlying + * mcommon_string_t is freed. + * + * If the string was already over maximum length, it will not be modified. All append operations are guaranteed not to + * lengthen the string beyond max_len. Truncations are guaranteed to happen at UTF-8 code point boundaries. + */ static BSON_INLINE void -mcommon_string_append (mcommon_string_t *string, const char *str) +mcommon_string_set_append_with_limit (mcommon_string_t *string, mcommon_string_append_t *new_append, uint32_t max_len) { - BEGIN_IGNORE_DEPRECATIONS - bson_string_append (string, str); - END_IGNORE_DEPRECATIONS + BSON_ASSERT_PARAM (string); + BSON_ASSERT_PARAM (new_append); + BSON_ASSERT (max_len < UINT32_MAX); + + new_append->_string = string; + new_append->_max_len = max_len; + new_append->_max_len_exceeded = false; } + +/** + * @brief Set an append operation for this string + * @param string String allocated with mcommon_string_new + * @param new_append Pointer to an uninitialized mcommon_string_append_t + * + * The mcommon_string_append_t does not need to be deallocated. It is no longer usable if the underlying + * mcommon_string_t is freed. + * + * The maximum string length will be set to the largest representable by the data type, UINT32_MAX - 1. + */ static BSON_INLINE void -mcommon_string_append_c (mcommon_string_t *string, char str) +mcommon_string_set_append (mcommon_string_t *string, mcommon_string_append_t *new_append) { - BEGIN_IGNORE_DEPRECATIONS - bson_string_append_c (string, str); - END_IGNORE_DEPRECATIONS + BSON_ASSERT_PARAM (string); + BSON_ASSERT_PARAM (new_append); + + mcommon_string_set_append_with_limit (string, new_append, UINT32_MAX - 1u); } + +/** + * @brief Allocate an empty mcommon_string_t with the specified initial capacity, and set an append operation for it + * with maximum length + * @param new_append Pointer to an uninitialized mcommon_string_append_t + * @param capacity Initial capacity for the string, in bytes, not including NUL termination + * + * Allocates a new mcommon_string_t, which will need to be deallocated by the caller. + * The mcommon_string_append_t itself does not need to be deallocated. + * + * The initial mcommon_string_t buffer will be allocated to have room for the given number of string bytes, not + * including the NUL terminator. The maximum append length will be set to the largest representable by the data type, + * UINT32_MAX - 1. + * + * This is a shortcut for mcommon_string_new_with_capacity() combined with mcommon_string_set_append(). + */ static BSON_INLINE void -mcommon_string_append_unichar (mcommon_string_t *string, bson_unichar_t unichar) +mcommon_string_new_with_capacity_as_append (mcommon_string_append_t *new_append, uint32_t capacity) { - BEGIN_IGNORE_DEPRECATIONS - bson_string_append_unichar (string, unichar); - END_IGNORE_DEPRECATIONS + BSON_ASSERT_PARAM (new_append); + + mcommon_string_set_append (mcommon_string_new_with_capacity ("", 0, capacity), new_append); } +/** + * @brief Allocate an empty mcommon_string_t with default initial capacity, and set an append operation for it with + * maximum length + * @param new_append Pointer to an uninitialized mcommon_string_append_t + * + * Allocates a new mcommon_string_t, which will need to be deallocated by the caller. + * The mcommon_string_append_t itself does not need to be deallocated. + * + * The maximum string length will be set to the largest representable by the data type, UINT32_MAX - 1. + * The new string will be allocated with a small default capacity. + * + * This method is intended to be the most convenient way to start growing a string. If a reasonable guess + * can be made about the final size of the string, it's better to call mcommon_string_new_with_capacity_as_append() + * or mcommon_string_new_with_capacity() and mcommon_string_set_append(). + */ static BSON_INLINE void -mcommon_string_append_printf (mcommon_string_t *string, const char *format, ...) BSON_GNUC_PRINTF (2, 3); +mcommon_string_new_as_append (mcommon_string_append_t *new_append) +{ + BSON_ASSERT_PARAM (new_append); + + mcommon_string_new_with_capacity_as_append (new_append, 32); +} +/** + * @brief Begin appending to a new empty mcommon_string_t with a given capacity and a matching max append length. + * @param new_append Pointer to an uninitialized mcommon_string_append_t + * @param capacity Fixed capacity for the string, in bytes, not including NUL termination + * + * Allocates a new mcommon_string_t, which will need to be deallocated by the caller. + * The mcommon_string_append_t itself does not need to be deallocated. + * The string buffer will not need to resize for operations performed through the resulting mcommon_string_append_t. + */ static BSON_INLINE void -mcommon_string_append_printf (mcommon_string_t *string, const char *format, ...) +mcommon_string_new_as_fixed_capacity_append (mcommon_string_append_t *new_append, uint32_t capacity) { - va_list args; - char *ret; + BSON_ASSERT_PARAM (new_append); - BSON_ASSERT_PARAM (string); - BSON_ASSERT_PARAM (format); + mcommon_string_set_append_with_limit (mcommon_string_new_with_capacity ("", 0, capacity), new_append, capacity); +} + +/** + * @brief Check the status of an append operation. + * @param append Append operation, initialized with mcommon_string_set_append + * @returns true if the append operation has no permanent error status. false if the max length has been exceeded. + */ +static BSON_INLINE bool +mcommon_string_status_from_append (const mcommon_string_append_t *append) +{ + BSON_ASSERT_PARAM (append); + + return !append->_max_len_exceeded; +} + +/** + * @brief Get a mcommon_string_t pointer to a mcommon_string_append_t destination. + * @param append Append operation, initialized with mcommon_string_set_append + * @returns Pointer to the mcommon_string_t destination. + * + * The mcommon_string_append_t includes a plain mcommon_string_t pointer with no fixed ownership semantics. + * Depending on usage, it may be a string with borrowed ownership or the append operation may be its primary owner. + */ +static BSON_INLINE mcommon_string_t * +mcommon_string_from_append (const mcommon_string_append_t *append) +{ + BSON_ASSERT_PARAM (append); - va_start (args, format); - ret = bson_strdupv_printf (format, args); - va_end (args); - BEGIN_IGNORE_DEPRECATIONS - bson_string_append (string, ret); - END_IGNORE_DEPRECATIONS - bson_free (ret); + return append->_string; } +/** + * @brief Get the current string buffer for an mcommon_string_append_t destination. + * @param append Append operation, initialized with mcommon_string_set_append + * @returns String buffer pointer, NUL terminated, invalidated if the string is destroyed and by any operation that may + * grow the string. + * + * Shortcut for mcommon_string_from_append(append)->str + */ +static BSON_INLINE char * +mcommon_str_from_append (const mcommon_string_append_t *append) +{ + BSON_ASSERT_PARAM (append); + + return mcommon_string_from_append (append)->str; +} + +/** + * @brief Get the current string length for an mcommon_string_append_t destination. + * @param append Append operation, initialized with mcommon_string_set_append + * @returns Snapshot of the current string length + * + * Shortcut for mcommon_string_from_append(append)->len + */ +static BSON_INLINE uint32_t +mcommon_strlen_from_append (const mcommon_string_append_t *append) +{ + BSON_ASSERT_PARAM (append); + + return mcommon_string_from_append (append)->len; +} + +/** + * @brief Deallocate the mcommon_string_t destination associated with an mcommon_string_append_t + * @param append Append operation, initialized with mcommon_string_set_append + * The append operation will no longer be usable after this call. + */ +static BSON_INLINE void +mcommon_string_from_append_destroy (const mcommon_string_append_t *append) +{ + BSON_ASSERT_PARAM (append); + + mcommon_string_destroy (mcommon_string_from_append (append)); +} + +/** + * @brief Truncate the append destination string to zero length without deallocating its buffer. + * @param append Append operation, initialized with mcommon_string_set_append + * This is equivalent to mcommon_string_clear() combined with mcommon_string_from_append(). + */ static BSON_INLINE void -mcommon_string_truncate (mcommon_string_t *string, uint32_t len) +mcommon_string_from_append_clear (const mcommon_string_append_t *append) { - BEGIN_IGNORE_DEPRECATIONS - bson_string_truncate (string, len); - END_IGNORE_DEPRECATIONS + BSON_ASSERT_PARAM (append); + + mcommon_string_clear (mcommon_string_from_append (append)); } +/** + * @brief Deallocate the mcommon_string_t destination associated with an mcommon_string_append_t and return its internal + * buffer + * @param append Append operation, initialized with mcommon_string_set_append + * @returns A freestanding NUL-terminated string in a buffer that must be freed with bson_free() + * The append operation will no longer be usable after this call. + */ +static BSON_INLINE char * +mcommon_string_from_append_destroy_with_steal (const mcommon_string_append_t *append) +{ + BSON_ASSERT_PARAM (append); + + return mcommon_string_destroy_with_steal (mcommon_string_from_append (append)); +} + +/** + * @brief Test if the append destination ends with a C string + * @param string mcommon_string_append_t with the string to test + * @param substring suffix to match, as a NUL terminated C string. + */ +static BSON_INLINE bool +mcommon_string_from_append_ends_with_str (const mcommon_string_append_t *append, const char *substring) +{ + BSON_ASSERT_PARAM (append); + BSON_ASSERT_PARAM (substring); + + return mcommon_string_ends_with_str (mcommon_string_from_append (append), substring); +} + +/** + * @brief Test if the append destination has zero length + * @param string mcommon_string_append_t with the string to test + */ +static BSON_INLINE bool +mcommon_string_from_append_is_empty (const mcommon_string_append_t *append) +{ + BSON_ASSERT_PARAM (append); + + return mcommon_string_is_empty (mcommon_string_from_append (append)); +} + +/** + * @brief Signal an explicit overflow during string append + * @param append Append operation, initialized with mcommon_string_set_append + * + * Future calls to mcommon_string_status_from_append() return false, exactly as if an overlong append was attempted and + * failed. This should be used for cases when a logical overflow is occurring but it was detected early enough that no + * actual append was attempted. + */ +static BSON_INLINE void +mcommon_string_append_overflow (mcommon_string_append_t *append) +{ + BSON_ASSERT_PARAM (append); + + append->_max_len_exceeded = true; +} + +/** + * @brief Append selected characters from a template + * @param append Append operation, initialized with mcommon_string_set_append + * @param template UTF-8 string listing allowed characters in the desired order + * @param selector UTF-8 string that chooses which template characters are appended + * @param selector_len Length of the selector string, in bytes + * + * Sort and filter lists of option characters. The template should list all allowed options in their desired order. + * This implementation does not support multi-byte template characters. ASSERTs that each template character is <= + * '\x7f'. Selectors may contain untrusted data, template should not. + */ +bool +mcommon_string_append_selected_chars (mcommon_string_append_t *append, + const char *template, + const char *selector, + size_t selector_len); + +/** + * @brief Append a string with known length to the mcommon_string_t + * @param append Append operation, initialized with mcommon_string_set_append + * @param str String to append a copy of, should be valid UTF-8 + * @param len Length of 'str', in bytes + * @returns true if the append operation has no permanent error status. false if the max length has been exceeded. + * + * If the string must be truncated to fit in the limit set by mcommon_string_set_append_with_limit, it will always be + * split in-between UTF-8 code points. + */ +static BSON_INLINE bool +mcommon_string_append_bytes (mcommon_string_append_t *append, const char *str, uint32_t len) +{ + BSON_ASSERT_PARAM (append); + BSON_ASSERT_PARAM (str); + + if (BSON_UNLIKELY (!mcommon_string_status_from_append (append))) { + return false; + } + + mcommon_string_t *string = append->_string; + char *buffer = string->str; + uint64_t alloc = (uint64_t) string->alloc; + uint64_t old_len = (uint64_t) string->len; + uint64_t max_len = (uint64_t) append->_max_len; + uint64_t new_len = old_len + (uint64_t) len; + uint64_t new_len_with_nul = new_len + 1; + + // Fast path: no truncation, no buffer growing + if (BSON_LIKELY (new_len <= max_len && new_len_with_nul <= alloc)) { + memcpy (buffer + old_len, str, len); + buffer[new_len] = '\0'; + string->len = (uint32_t) new_len; + return true; + } + + // Other cases are not inlined + return mcommon_string_append_bytes_internal (append, str, len); +} + +/** + * @brief Append a NUL-terminated UTF-8 string to the mcommon_string_t + * @param append Append operation, initialized with mcommon_string_set_append + * @param str NUL-terminated string to append a copy of + * @returns true if the append operation has no permanent error status. false if the max length has been exceeded. + * + * If the string must be truncated to fit in the limit set by mcommon_string_set_append_with_limit, it will always be + * split in-between UTF-8 code points. + */ +static BSON_INLINE bool +mcommon_string_append (mcommon_string_append_t *append, const char *str) +{ + BSON_ASSERT_PARAM (append); + BSON_ASSERT_PARAM (str); + + return mcommon_string_append_bytes (append, str, strlen (str)); +} + +/** + * @brief Append an entire string with known length to the mcommon_string_t or fail, without truncating. + * @param append Append operation, initialized with mcommon_string_set_append + * @param str UTF-8 string to append a copy of + * @param len Length of 'str', in bytes + * @returns true if the append operation has no permanent error status. false if the max length has been exceeded. + * + * Atomic version of mcommon_string_append_bytes. If string does not fit completely, it is not truncated. + * The destination string is only modified if the entire append operation can be completed. + */ +bool +mcommon_string_append_bytes_all_or_none (mcommon_string_append_t *append, const char *str, uint32_t len); + +/** + * @brief Append an entire NUL-terminated UTF-8 string to the mcommon_string_t or fail, without truncating. + * @param append Append operation, initialized with mcommon_string_set_append + * @param str NUL-terminated UTF-8 sequence to append a copy of + * @returns true if the append operation has no permanent error status. false if the max length has been exceeded. + * + * Atomic version of mcommon_string_append. If string does not fit completely, it is not truncated. + * The destination string is only modified if the entire append operation can be completed. + */ +static BSON_INLINE bool +mcommon_string_append_all_or_none (mcommon_string_append_t *append, const char *str) +{ + BSON_ASSERT_PARAM (append); + BSON_ASSERT_PARAM (str); + + return mcommon_string_append_bytes_all_or_none (append, str, strlen (str)); +} + +/** + * @brief Append base64 encoded bytes to an mcommon_string_t + * @param append Append operation, initialized with mcommon_string_set_append + * @param bytes Bytes to be encoded + * @param len Number of bytes to encoded + * @returns true if the append operation has no permanent error status. false if the max length has been exceeded. + */ +bool +mcommon_string_append_base64_encode (mcommon_string_append_t *append, const uint8_t *bytes, uint32_t len); + +/** + * @brief Append an ObjectId as a hex string + * @param append Append operation, initialized with mcommon_string_set_append + * @param value bson_oid_t value to copy + * @returns true if the append operation has no permanent error status. false if the max length has been exceeded. + */ +bool +mcommon_string_append_oid_as_hex (mcommon_string_append_t *append, const bson_oid_t *value); + +/** + * @brief Append printf() formatted text to a mcommon_string_t + * @param append Append operation, initialized with mcommon_string_set_append + * @param format printf() format string + * @param ... Format string arguments + * @returns true if the append operation has no permanent error status, and this operation has succeeded. false if the + * max length has been surpassed or this printf() experienced an unrecoverable error. + * + * Writes the printf() result directly into the mcommon_string_t buffer, growing it as needed. + * + * If the string must be truncated to fit in the limit set by mcommon_string_set_append_with_limit, it will always be + * split in-between UTF-8 code points. + */ +bool +mcommon_string_append_printf (mcommon_string_append_t *append, const char *format, ...) BSON_GNUC_PRINTF (2, 3); + +/** + * @brief Variant of mcommon_string_append_printf() that takes a va_list + * @param append Append operation, initialized with mcommon_string_set_append + * @param format printf() format string + * @param args Format string arguments + * @returns true if the append operation has no permanent error status, and this operation has succeeded. false if the + * max length has been surpassed or this printf() experienced an unrecoverable error. + * + * Writes the printf() result directly into the mcommon_string_t buffer, growing it as needed. + * + * If the string must be truncated to fit in the limit set by mcommon_string_set_append_with_limit, it will always be + * split in-between UTF-8 code points. + */ +bool +mcommon_string_append_vprintf (mcommon_string_append_t *append, const char *format, va_list args) + BSON_GNUC_PRINTF (2, 0); + +/** + * @brief Append one code point to a mcommon_string_t + * @param append Append operation, initialized with mcommon_string_set_append + * @param unichar Code point to append, as a bson_unichar_t + * @returns true if the append operation has no permanent error status. false if the max length has been exceeded. + * + * Guaranteed not to truncate. The character will fully append or no change will be made. + */ +static BSON_INLINE bool +mcommon_string_append_unichar (mcommon_string_append_t *append, bson_unichar_t unichar) +{ + BSON_ASSERT_PARAM (append); + + if (BSON_UNLIKELY (!mcommon_string_status_from_append (append))) { + return false; + } + + mcommon_string_t *string = append->_string; + BSON_ASSERT (string); + char *buffer = string->str; + uint64_t alloc = (uint64_t) string->alloc; + uint64_t old_len = (uint64_t) string->len; + uint64_t max_len = (uint64_t) append->_max_len; + + // Fast path: single-byte character, no truncation, no buffer growing + if (BSON_LIKELY (unichar <= 0x7f)) { + uint64_t new_len = old_len + 1; + uint64_t new_len_with_nul = new_len + 1; + if (BSON_LIKELY (new_len <= max_len && new_len_with_nul <= alloc)) { + buffer[old_len] = (char) unichar; + buffer[new_len] = '\0'; + string->len = new_len; + return true; + } + } + + // Other cases are not inlined + return mcommon_string_append_unichar_internal (append, unichar); +} + + #endif /* MONGO_C_DRIVER_COMMON_STRING_PRIVATE_H */ diff --git a/src/common/src/common-string.c b/src/common/src/common-string.c new file mode 100644 index 00000000000..efe40646eb1 --- /dev/null +++ b/src/common/src/common-string.c @@ -0,0 +1,374 @@ +/* + * Copyright 2009-present MongoDB, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "common-string-private.h" +#include "common-bits-private.h" +#include "common-utf8-private.h" +#include "common-b64-private.h" +#include "common-cmp-private.h" + + +mcommon_string_t * +mcommon_string_new_with_capacity (const char *str, uint32_t length, uint32_t min_capacity) +{ + BSON_ASSERT_PARAM (str); + BSON_ASSERT (length < UINT32_MAX && min_capacity < UINT32_MAX); + uint32_t capacity = BSON_MAX (length, min_capacity); + uint32_t alloc = capacity + 1u; + char *buffer = bson_malloc (alloc); + memcpy (buffer, str, length); + buffer[length] = '\0'; + return mcommon_string_new_with_buffer (buffer, length, alloc); +} + +mcommon_string_t * +mcommon_string_new_with_buffer (char *buffer, uint32_t length, uint32_t alloc) +{ + BSON_ASSERT_PARAM (buffer); + BSON_ASSERT (length < UINT32_MAX && alloc >= length + 1u); + BSON_ASSERT (buffer[length] == '\0'); + mcommon_string_t *string = bson_malloc0 (sizeof *string); + string->str = buffer; + string->len = length; + string->alloc = alloc; + return string; +} + +void +mcommon_string_destroy (mcommon_string_t *string) +{ + if (string) { + bson_free (mcommon_string_destroy_with_steal (string)); + } +} + +char * +mcommon_string_destroy_with_steal (mcommon_string_t *string) +{ + if (string) { + char *buffer = string->str; + BSON_ASSERT (buffer[string->len] == '\0'); + bson_free (string); + return buffer; + } else { + return NULL; + } +} + +void +mcommon_string_grow_to_capacity (mcommon_string_t *string, uint32_t capacity) +{ + BSON_ASSERT_PARAM (string); + BSON_ASSERT (capacity < UINT32_MAX); + uint32_t min_alloc_needed = capacity + 1u; + if (string->alloc < min_alloc_needed) { + uint32_t alloc = mcommon_next_power_of_two_u32 (min_alloc_needed); + string->str = bson_realloc (string->str, alloc); + string->alloc = alloc; + } +} + +// Handle cases omitted from the inlined mcommon_string_append_bytes() +bool +mcommon_string_append_bytes_internal (mcommon_string_append_t *append, const char *str, uint32_t len) +{ + mcommon_string_t *string = append->_string; + BSON_ASSERT (string); + uint32_t old_len = string->len; + uint32_t max_len = append->_max_len; + BSON_ASSERT (max_len < UINT32_MAX); + + uint32_t max_append_len = old_len < max_len ? max_len - old_len : 0; + uint32_t truncated_append_len = len; + if (len > max_append_len) { + // Search for an actual append length, <= the maximum allowed, which preserves UTF-8 validity + append->_max_len_exceeded = true; + truncated_append_len = mcommon_utf8_truncate_len (str, max_append_len); + } + + uint32_t new_len = old_len + truncated_append_len; + BSON_ASSERT (new_len <= max_len); + mcommon_string_grow_to_capacity (string, new_len); + char *buffer = string->str; + + memcpy (buffer + old_len, str, truncated_append_len); + buffer[new_len] = '\0'; + string->len = new_len; + + return mcommon_string_status_from_append (append); +} + +// Variant of mcommon_string_append_bytes() that grows but never truncates +bool +mcommon_string_append_bytes_all_or_none (mcommon_string_append_t *append, const char *str, uint32_t len) +{ + BSON_ASSERT_PARAM (append); + BSON_ASSERT_PARAM (str); + + if (BSON_UNLIKELY (!mcommon_string_status_from_append (append))) { + return false; + } + + mcommon_string_t *string = append->_string; + BSON_ASSERT (string); + uint32_t old_len = string->len; + uint32_t max_len = append->_max_len; + BSON_ASSERT (max_len < UINT32_MAX); + + uint32_t max_append_len = old_len < max_len ? max_len - old_len : 0; + if (len > max_append_len) { + append->_max_len_exceeded = true; + return false; + } + + uint32_t new_len = old_len + len; + BSON_ASSERT (new_len <= max_len); + mcommon_string_grow_to_capacity (string, new_len); + char *buffer = string->str; + + memcpy (buffer + old_len, str, len); + buffer[new_len] = '\0'; + string->len = new_len; + + return mcommon_string_status_from_append (append); +} + +bool +mcommon_string_append_unichar_internal (mcommon_string_append_t *append, bson_unichar_t unichar) +{ + mcommon_string_t *string = append->_string; + uint32_t old_len = string->len; + uint32_t max_len = append->_max_len; + BSON_ASSERT (max_len < UINT32_MAX); + + char max_utf8_sequence[6]; + uint32_t max_append_len = old_len < max_len ? max_len - old_len : 0; + + // Usually we can write the UTF-8 sequence directly + if (BSON_LIKELY (max_append_len >= sizeof max_utf8_sequence)) { + uint32_t actual_sequence_len; + mcommon_string_grow_to_capacity (string, old_len + sizeof max_utf8_sequence); + char *buffer = string->str; + mcommon_utf8_from_unichar (unichar, buffer + old_len, &actual_sequence_len); + BSON_ASSERT (actual_sequence_len <= sizeof max_utf8_sequence); + BSON_ASSERT (append->_max_len_exceeded == false); + uint32_t new_len = old_len + actual_sequence_len; + buffer[new_len] = '\0'; + string->len = new_len; + return true; + } + + // If we are near max_len, avoid growing the buffer beyond it. + uint32_t actual_sequence_len; + mcommon_utf8_from_unichar (unichar, max_utf8_sequence, &actual_sequence_len); + return mcommon_string_append_bytes_internal (append, max_utf8_sequence, actual_sequence_len); +} + +bool +mcommon_string_append_base64_encode (mcommon_string_append_t *append, const uint8_t *bytes, uint32_t len) +{ + BSON_ASSERT_PARAM (append); + BSON_ASSERT_PARAM (bytes); + + if (BSON_UNLIKELY (!mcommon_string_status_from_append (append))) { + return false; + } + + mcommon_string_t *string = append->_string; + uint32_t old_len = string->len; + uint32_t max_len = append->_max_len; + BSON_ASSERT (max_len < UINT32_MAX); + uint32_t max_append_len = old_len < max_len ? max_len - old_len : 0; + + // Note that mcommon_b64_ntop_calculate_target_size includes room for NUL. + // mcommon_b64_ntop includes NUL in the input (buffer size) but not in the return value (string length). + size_t encoded_target_len = mcommon_b64_ntop_calculate_target_size ((size_t) len) - 1; + + if (encoded_target_len <= (size_t) max_append_len) { + // No truncation needed. Grow the buffer and encode directly. + mcommon_string_grow_to_capacity (string, old_len + encoded_target_len); + BSON_ASSERT (encoded_target_len == + mcommon_b64_ntop (bytes, (size_t) len, string->str + old_len, encoded_target_len + 1)); + BSON_ASSERT (mcommon_in_range_unsigned (uint32_t, encoded_target_len)); + string->len = old_len + (uint32_t) encoded_target_len; + return true; + } else if (max_append_len == 0) { + // Truncation to a zero-length append + mcommon_string_append_overflow (append); + return false; + } else { + /* We expect to append at least one byte, and truncate. + * Encoding only produces single-byte UTF-8 sequences, so the result always has exactly the maximum length. + * + * mcommon_b64_ntop() can't truncate without failing. To do this without allocating a full size temporary buffer + * or rewriting mcommon_b64_ntop, we can partition the write into three parts: a 'direct' portion made from entire + * non-truncated units of 3 bytes in and 4 characters out, a truncated 'remainder', and an ignored portion. + * Remainders longer than 3 bytes in / 4 bytes out are never necessary, and further portions of the input data + * will not be used. + */ + mcommon_string_grow_to_capacity (string, max_len); + char *buffer = string->str; + + uint32_t remainder_truncated_len = max_append_len % 4; + uint32_t direct_encoded_len = max_append_len - remainder_truncated_len; + uint32_t direct_input_len = mcommon_b64_pton_calculate_target_size ((size_t) direct_encoded_len); + BSON_ASSERT (direct_input_len % 3 == 0); + BSON_ASSERT (direct_input_len < len); + BSON_ASSERT (direct_encoded_len == + mcommon_b64_ntop (bytes, (size_t) direct_input_len, string->str + old_len, direct_encoded_len + 1)); + + char remainder_buffer[5]; + uint32_t remainder_input_len = BSON_MIN (3, len - direct_input_len); + BSON_ASSERT (remainder_input_len > 0); + uint32_t remainder_encoded_len = mcommon_b64_ntop_calculate_target_size ((size_t) remainder_input_len) - 1; + BSON_ASSERT (remainder_encoded_len > remainder_truncated_len); + BSON_ASSERT (remainder_encoded_len == mcommon_b64_ntop (bytes + direct_input_len, + (size_t) remainder_input_len, + remainder_buffer, + sizeof remainder_buffer)); + memcpy (buffer + old_len + direct_encoded_len, remainder_buffer, remainder_encoded_len); + + BSON_ASSERT (old_len + direct_encoded_len + remainder_truncated_len == max_len); + buffer[max_len] = '\0'; + string->len = max_len; + mcommon_string_append_overflow (append); + return false; + } +} + +bool +mcommon_string_append_oid_as_hex (mcommon_string_append_t *append, const bson_oid_t *value) +{ + BSON_ASSERT_PARAM (append); + BSON_ASSERT_PARAM (value); + + char oid_str[25]; + bson_oid_to_string (value, oid_str); + return mcommon_string_append (append, oid_str); +} + +bool +mcommon_string_append_selected_chars (mcommon_string_append_t *append, + const char *template, + const char *selector, + size_t selector_len) +{ + BSON_ASSERT_PARAM (append); + BSON_ASSERT_PARAM (template); + BSON_ASSERT_PARAM (selector); + + for (uint8_t template_char; (template_char = (uint8_t) * template); template ++) { + BSON_ASSERT (template_char <= 0x7f); + if (memchr (selector, template_char, selector_len) && !mcommon_string_append_unichar (append, template_char)) { + return false; + } + } + return mcommon_string_status_from_append (append); +} + +bool +mcommon_string_append_printf (mcommon_string_append_t *append, const char *format, ...) +{ + BSON_ASSERT_PARAM (append); + BSON_ASSERT_PARAM (format); + + va_list args; + va_start (args, format); + bool ret = mcommon_string_append_vprintf (append, format, args); + va_end (args); + return ret; +} + +bool +mcommon_string_append_vprintf (mcommon_string_append_t *append, const char *format, va_list args) +{ + BSON_ASSERT_PARAM (append); + BSON_ASSERT_PARAM (format); + + if (BSON_UNLIKELY (!mcommon_string_status_from_append (append))) { + return false; + } + + mcommon_string_t *string = append->_string; + uint32_t old_len = string->len; + uint32_t max_len = append->_max_len; + BSON_ASSERT (max_len < UINT32_MAX); + uint32_t max_append_len = old_len < max_len ? max_len - old_len : 0; + + // Initial minimum buffer length; increases on retry. + uint32_t min_format_buffer_capacity = 16; + + while (true) { + // Allocate room for a format buffer at the end of the string. + // It will be at least this round's min_format_buffer_capacity, but if we happen to have extra space allocated we + // do want that to be available to vsnprintf(). + + min_format_buffer_capacity = BSON_MIN (min_format_buffer_capacity, max_append_len); + mcommon_string_grow_to_capacity (string, old_len + min_format_buffer_capacity); + uint32_t alloc = string->alloc; + BSON_ASSERT (alloc > 0 && alloc - 1u >= old_len); + char *format_buffer = string->str + old_len; + uint32_t actual_format_buffer_capacity = BSON_MIN (alloc - 1u - old_len, max_append_len); + BSON_ASSERT (actual_format_buffer_capacity >= min_format_buffer_capacity); + BSON_ASSERT (actual_format_buffer_capacity < UINT32_MAX); + uint32_t format_buffer_alloc = actual_format_buffer_capacity + 1u; + + va_list args_copy; + va_copy (args_copy, args); + int format_result = bson_vsnprintf (format_buffer, format_buffer_alloc, format, args_copy); + va_end (args_copy); + + if (format_result > -1 && mcommon_in_range_signed (uint32_t, format_result) && + (uint32_t) format_result <= actual_format_buffer_capacity) { + // Successful result, no truncation. + format_buffer[format_result] = '\0'; + string->len = old_len + (uint32_t) format_result; + BSON_ASSERT (string->len <= append->_max_len); + BSON_ASSERT (append->_max_len_exceeded == false); + return true; + } + + if (actual_format_buffer_capacity == max_append_len) { + // No more space to grow into, this must be the final result. + + if (format_result > -1 && mcommon_in_range_signed (uint32_t, format_result) && + (uint32_t) format_result < UINT32_MAX) { + // We have truncated output from vsnprintf. Clean it up by removing + // any partial UTF-8 sequences that might be left on the end. + uint32_t truncated_append_len = mcommon_utf8_truncate_len ( + format_buffer, BSON_MIN (actual_format_buffer_capacity, (uint32_t) format_result)); + BSON_ASSERT (truncated_append_len <= actual_format_buffer_capacity); + format_buffer[truncated_append_len] = '\0'; + string->len = old_len + truncated_append_len; + append->_max_len_exceeded = true; + return false; + } + + // Error from vsnprintf; This operation fails, but we do not set max_len_exceeded. + return false; + } + + // Choose a larger format_buffer_len and try again. Length will be clamped to max_append_len above. + if (format_result > -1 && mcommon_in_range_signed (uint32_t, format_result) && + (uint32_t) format_result < UINT32_MAX) { + min_format_buffer_capacity = (uint32_t) format_result + 1u; + } else if (min_format_buffer_capacity < UINT32_MAX / 2) { + min_format_buffer_capacity *= 2; + } else { + min_format_buffer_capacity = UINT32_MAX - 1u; + } + } +} diff --git a/src/common/src/common-utf8-private.h b/src/common/src/common-utf8-private.h new file mode 100644 index 00000000000..f3d10f423df --- /dev/null +++ b/src/common/src/common-utf8-private.h @@ -0,0 +1,182 @@ +/* + * Copyright 2009-present MongoDB, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "common-prelude.h" + +#ifndef MONGO_C_DRIVER_COMMON_UTF8_PRIVATE_H +#define MONGO_C_DRIVER_COMMON_UTF8_PRIVATE_H + +#include + + +/* + *-------------------------------------------------------------------------- + * + * mcommon_utf8_get_sequence -- + * + * Determine the sequence length of the first UTF-8 character in + * @utf8. The sequence length is stored in @seq_length and the mask + * for the first character is stored in @first_mask. + * + * Returns: + * None. + * + * Side effects: + * @seq_length is set. + * @first_mask is set. + * + *-------------------------------------------------------------------------- + */ + +static BSON_INLINE void +mcommon_utf8_get_sequence (const char *utf8, /* IN */ + uint8_t *seq_length, /* OUT */ + uint8_t *first_mask) /* OUT */ +{ + unsigned char c = *(const unsigned char *) utf8; + uint8_t m; + uint8_t n; + + /* + * See the following[1] for a description of what the given multi-byte + * sequences will be based on the bits set of the first byte. We also need + * to mask the first byte based on that. All subsequent bytes are masked + * against 0x3F. + * + * [1] http://www.joelonsoftware.com/articles/Unicode.html + */ + + if ((c & 0x80) == 0) { + n = 1; + m = 0x7F; + } else if ((c & 0xE0) == 0xC0) { + n = 2; + m = 0x1F; + } else if ((c & 0xF0) == 0xE0) { + n = 3; + m = 0x0F; + } else if ((c & 0xF8) == 0xF0) { + n = 4; + m = 0x07; + } else { + n = 0; + m = 0; + } + + *seq_length = n; + *first_mask = m; +} + + +/* + *-------------------------------------------------------------------------- + * + * mcommon_utf8_from_unichar -- + * + * Converts the unichar to a sequence of utf8 bytes and stores those + * in @utf8. The number of bytes in the sequence are stored in @len. + * + * Parameters: + * @unichar: A bson_unichar_t. + * @utf8: A location for the multi-byte sequence. + * @len: A location for number of bytes stored in @utf8. + * + * Returns: + * None. + * + * Side effects: + * @utf8 is set. + * @len is set. + * + *-------------------------------------------------------------------------- + */ + +static BSON_INLINE void +mcommon_utf8_from_unichar (bson_unichar_t unichar, /* IN */ + char utf8[BSON_ENSURE_ARRAY_PARAM_SIZE (6)], /* OUT */ + uint32_t *len) /* OUT */ +{ + BSON_ASSERT_PARAM (len); + + if (unichar <= 0x7F) { + utf8[0] = unichar; + *len = 1; + } else if (unichar <= 0x7FF) { + *len = 2; + utf8[0] = 0xC0 | ((unichar >> 6) & 0x3F); + utf8[1] = 0x80 | ((unichar) & 0x3F); + } else if (unichar <= 0xFFFF) { + *len = 3; + utf8[0] = 0xE0 | ((unichar >> 12) & 0xF); + utf8[1] = 0x80 | ((unichar >> 6) & 0x3F); + utf8[2] = 0x80 | ((unichar) & 0x3F); + } else if (unichar <= 0x1FFFFF) { + *len = 4; + utf8[0] = 0xF0 | ((unichar >> 18) & 0x7); + utf8[1] = 0x80 | ((unichar >> 12) & 0x3F); + utf8[2] = 0x80 | ((unichar >> 6) & 0x3F); + utf8[3] = 0x80 | ((unichar) & 0x3F); + } else { + *len = 0; + } +} + + +/* + * @brief Calculate a truncation length that preserves UTF-8 validity + * @param str String data, at least 'len' bytes long. + * @returns A new length <= 'len' + * + * When 'str' is a valid UTF-8 string with length >= 'len' bytes, + * this calculates a new length, less than or equal to 'len', which + * guarantees that the string will be truncated in-between code points. + */ + +static BSON_INLINE uint32_t +mcommon_utf8_truncate_len (const char *str, uint32_t len) +{ + uint32_t resulting_len = len; + while (resulting_len > 0) { + if (BSON_LIKELY ((uint8_t) str[resulting_len - 1u] <= 0x7f)) { + // Single-byte sequence, always a fine place to stop + return resulting_len; + } + + // Search for the last byte that could begin a UTF-8 sequence + uint32_t seq_begin_at = resulting_len - 1u; + while (((uint8_t) str[seq_begin_at] & 0xc0) == 0x80) { + if (seq_begin_at > 0) { + seq_begin_at--; + } else { + return 0; + } + } + + uint8_t seq_length, first_mask_unused; + mcommon_utf8_get_sequence (str + seq_begin_at, &seq_length, &first_mask_unused); + if (seq_begin_at + seq_length == resulting_len) { + // Sequence is complete, we can truncate here. + return resulting_len; + } + + // Sequence was truncated or invalid; resume search prior to it's beginning. + resulting_len = seq_begin_at; + } + return 0; +} + + +#endif /* MONGO_C_DRIVER_COMMON_UTF8_PRIVATE_H */ diff --git a/src/libbson/NEWS b/src/libbson/NEWS index 662600906e7..c4a155c6309 100644 --- a/src/libbson/NEWS +++ b/src/libbson/NEWS @@ -1,3 +1,12 @@ +libbson 1.30.0 (Unreleased) +=========================== + +Fixes: + + * Truncated output of `bson_as_json_with_opts` is changed to no longer split valid UTF-8 sequences. + + + libbson 1.29.2 ============== diff --git a/src/libbson/doc/bson_string_truncate.rst b/src/libbson/doc/bson_string_truncate.rst index 55a10a2f8b6..e6204d0fb9f 100644 --- a/src/libbson/doc/bson_string_truncate.rst +++ b/src/libbson/doc/bson_string_truncate.rst @@ -28,4 +28,6 @@ Truncates the string so that it is ``len`` bytes in length. This must be smaller A ``\0`` byte will be placed where the end of the string occurs. +.. warning:: This function is oblivious to UTF-8 structure. If truncation occurs in the middle of a UTF-8 byte sequence, the resulting :symbol:`bson_string_t` will contain invalid UTF-8. + .. warning:: The length of the resulting string (including the ``NULL`` terminator) MUST NOT exceed ``UINT32_MAX``. diff --git a/src/libbson/doc/bson_utf8_escape_for_json.rst b/src/libbson/doc/bson_utf8_escape_for_json.rst index 797c1521bd0..a79983e5316 100644 --- a/src/libbson/doc/bson_utf8_escape_for_json.rst +++ b/src/libbson/doc/bson_utf8_escape_for_json.rst @@ -28,6 +28,9 @@ Both " and \\ characters will be backslash-escaped. If a NUL byte is found before ``utf8_len`` bytes, it is converted to "\\u0000". Other non-ASCII characters in the input are preserved. +The two-byte sequence "C0 80" is also interpreted as an internal NUL, for historical reasons. +This sequence is considered invalid according to RFC3629. + Returns ------- diff --git a/src/libbson/src/bson/bson-iso8601-private.h b/src/libbson/src/bson/bson-iso8601-private.h index ca4ea68279a..1f010419fbb 100644 --- a/src/libbson/src/bson/bson-iso8601-private.h +++ b/src/libbson/src/bson/bson-iso8601-private.h @@ -31,16 +31,6 @@ BSON_BEGIN_DECLS bool _bson_iso8601_date_parse (const char *str, int32_t len, int64_t *out, bson_error_t *error); -/** - * _bson_iso8601_date_format: - * @msecs_since_epoch: A positive number of milliseconds since Jan 1, 1970. - * @str: The string to append the ISO8601-formatted to. - * - * Appends a date formatted like "2012-12-24T12:15:30.500Z" to @str. - */ -void -_bson_iso8601_date_format (int64_t msecs_since_epoch, mcommon_string_t *str); - BSON_END_DECLS diff --git a/src/libbson/src/bson/bson-iso8601.c b/src/libbson/src/bson/bson-iso8601.c index 9dcbc550fcd..cf86cf9a1d3 100644 --- a/src/libbson/src/bson/bson-iso8601.c +++ b/src/libbson/src/bson/bson-iso8601.c @@ -279,39 +279,3 @@ _bson_iso8601_date_parse (const char *str, int32_t len, int64_t *out, bson_error return true; } - - -void -_bson_iso8601_date_format (int64_t msec_since_epoch, mcommon_string_t *str) -{ - time_t t; - int64_t msecs_part; - char buf[64]; - - msecs_part = msec_since_epoch % 1000; - t = (time_t) (msec_since_epoch / 1000); - -#ifdef BSON_HAVE_GMTIME_R - { - struct tm posix_date; - gmtime_r (&t, &posix_date); - strftime (buf, sizeof buf, "%Y-%m-%dT%H:%M:%S", &posix_date); - } -#elif defined(_MSC_VER) - { - /* Windows gmtime_s is thread-safe */ - struct tm time_buf; - gmtime_s (&time_buf, &t); - strftime (buf, sizeof buf, "%Y-%m-%dT%H:%M:%S", &time_buf); - } -#else - strftime (buf, sizeof buf, "%Y-%m-%dT%H:%M:%S", gmtime (&t)); -#endif - - if (msecs_part) { - mcommon_string_append_printf (str, "%s.%03" PRId64 "Z", buf, msecs_part); - } else { - mcommon_string_append (str, buf); - mcommon_string_append_c (str, 'Z'); - } -} diff --git a/src/libbson/src/bson/bson-private.h b/src/libbson/src/bson/bson-private.h index e56b4426e71..c79065980a0 100644 --- a/src/libbson/src/bson/bson-private.h +++ b/src/libbson/src/bson/bson-private.h @@ -84,8 +84,6 @@ typedef struct { BSON_STATIC_ASSERT2 (impl_alloc_t, sizeof (bson_impl_alloc_t) <= 128); -#define BSON_REGEX_OPTIONS_SORTED "ilmsux" - BSON_END_DECLS diff --git a/src/libbson/src/bson/bson-string-private.h b/src/libbson/src/bson/bson-string-private.h deleted file mode 100644 index 77acd139857..00000000000 --- a/src/libbson/src/bson/bson-string-private.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright 2009-present MongoDB, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - - -#ifndef BSON_STRING_PRIVATE_H -#define BSON_STRING_PRIVATE_H - -bson_string_t * -_bson_string_alloc (const size_t size); - -void -_bson_string_append_ex (bson_string_t *string, const char *str, const size_t len); - -#endif /* BSON_STRING_PRIVATE_H */ diff --git a/src/libbson/src/bson/bson-string.c b/src/libbson/src/bson/bson-string.c index a59835cf81a..432b7429465 100644 --- a/src/libbson/src/bson/bson-string.c +++ b/src/libbson/src/bson/bson-string.c @@ -22,9 +22,9 @@ #include #include #include +#include #include #include -#include #ifdef BSON_HAVE_STRINGS_H #include @@ -32,365 +32,114 @@ #include #endif -// `bson_next_power_of_two_u32` returns 0 on overflow. -static BSON_INLINE uint32_t -bson_next_power_of_two_u32 (uint32_t v) -{ - BSON_ASSERT (v > 0); - - // https://graphics.stanford.edu/%7Eseander/bithacks.html#RoundUpPowerOf2 - v--; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - v++; - - return v; -} - -// `bson_string_ensure_space` ensures `string` has enough room for `needed` + a null terminator. -static void -bson_string_ensure_space (bson_string_t *string, uint32_t needed) -{ - BSON_ASSERT_PARAM (string); - BSON_ASSERT (needed <= UINT32_MAX - 1u); - needed += 1u; // Add one for trailing NULL byte. - if (string->alloc >= needed) { - return; - } - // Get the next largest power of 2 if possible. - uint32_t alloc = bson_next_power_of_two_u32 (needed); - if (alloc == 0) { - // Overflowed: saturate at UINT32_MAX. - alloc = UINT32_MAX; - } - if (!string->str) { - string->str = bson_malloc (alloc); - } else { - string->str = bson_realloc (string->str, alloc); - } - string->alloc = alloc; -} - -/* - *-------------------------------------------------------------------------- - * - * bson_string_new -- - * - * Create a new bson_string_t from an existing char *. - * - * bson_string_t is a power-of-2 allocation growing string. Every - * time data is appended the next power of two size is chosen for - * the allocation. Pretty standard stuff. - * - * It is UTF-8 aware through the use of bson_string_append_unichar(). - * The proper UTF-8 character sequence will be used. - * - * Parameters: - * @str: a string to copy or NULL. - * - * Returns: - * A newly allocated bson_string_t that should be freed with - * bson_string_free(). - * - * Side effects: - * None. - * - *-------------------------------------------------------------------------- - */ bson_string_t * bson_string_new (const char *str) /* IN */ { - bson_string_t *ret; - - ret = bson_malloc0 (sizeof *ret); - const size_t len_sz = str == NULL ? 0u : strlen (str); - BSON_ASSERT (mcommon_in_range_unsigned (uint32_t, len_sz)); - const uint32_t len_u32 = (uint32_t) len_sz; - bson_string_ensure_space (ret, len_u32); - if (str) { - memcpy (ret->str, str, len_sz); - } - - ret->str[len_u32] = '\0'; - ret->len = len_u32; - return ret; -} - -/* - *-------------------------------------------------------------------------- - * - * _bson_string_alloc -- - * - * Create an empty bson_string_t and allocate memory for it. - * - * The amount of memory allocated will be the next power-of-two if the - * specified size is not already a power-of-two. - * - * Parameters: - * @size: Size of the string to allocate - * - * Returns: - * A newly allocated bson_string_t that should be freed with - * bson_string_free(). - * - * Side effects: - * None. - * - *-------------------------------------------------------------------------- - */ - -bson_string_t * -_bson_string_alloc (const size_t size) -{ - BSON_ASSERT (size < UINT32_MAX); - - bson_string_t *ret; - - ret = bson_malloc0 (sizeof *ret); - - bson_string_ensure_space (ret, (uint32_t) size); - - BSON_ASSERT (ret->alloc > 0); - ret->len = 0; - ret->str[ret->len] = '\0'; - return ret; + /* Compatibility wrapper; deprecated. + * New mcommon_string behavior is to use power of two rounding for resize but not for initial allocation unless + * extra capacity is explicitly requested. This emulates the old behavior, padding the allocation of all new strings. + */ + size_t len = str ? strlen (str) : 0; + BSON_ASSERT (mcommon_in_range_unsigned (uint32_t, len) && (uint32_t) len < UINT32_MAX); + uint32_t alloc = mcommon_next_power_of_two_u32 ((uint32_t) len + 1); + return (bson_string_t *) mcommon_string_new_with_capacity (str ? str : "", (uint32_t) len, alloc - 1); } char * bson_string_free (bson_string_t *string, /* IN */ bool free_segment) /* IN */ { - char *ret = NULL; - - if (!string) { + // Compatibility wrapper; deprecated. + if (free_segment) { + mcommon_string_destroy ((mcommon_string_t *) string); return NULL; - } - - if (!free_segment) { - ret = string->str; } else { - bson_free (string->str); + return mcommon_string_destroy_with_steal ((mcommon_string_t *) string); } - - bson_free (string); - - return ret; -} - - -/* - *-------------------------------------------------------------------------- - * - * _bson_string_append_ex -- - * - * Append the UTF-8 string @str of given length @len to @string. - * - * Returns: - * None. - * - * Side effects: - * None. - * - *-------------------------------------------------------------------------- - */ - -void -_bson_string_append_ex (bson_string_t *string, /* IN */ - const char *str, /* IN */ - const size_t len) /* IN */ -{ - BSON_ASSERT (string); - BSON_ASSERT (str); - - BSON_ASSERT (mcommon_in_range_unsigned (uint32_t, len)); - const uint32_t len_u32 = (uint32_t) len; - BSON_ASSERT (len_u32 <= UINT32_MAX - string->len); - const uint32_t new_len = len_u32 + string->len; - bson_string_ensure_space (string, new_len); - memcpy (string->str + string->len, str, len); - string->str[new_len] = '\0'; - string->len = new_len; } - -/* - *-------------------------------------------------------------------------- - * - * bson_string_append -- - * - * Append the UTF-8 string @str to @string. - * - * Returns: - * None. - * - * Side effects: - * None. - * - *-------------------------------------------------------------------------- - */ - void bson_string_append (bson_string_t *string, /* IN */ const char *str) /* IN */ { + // Compatibility wrapper; deprecated. BSON_ASSERT_PARAM (string); BSON_ASSERT_PARAM (str); - _bson_string_append_ex (string, str, strlen (str)); -} - - -/* - *-------------------------------------------------------------------------- - * - * bson_string_append_c -- - * - * Append the ASCII character @c to @string. - * - * Do not use this if you are working with UTF-8 sequences, - * use bson_string_append_unichar(). - * - * Returns: - * None. - * - * Side effects: - * None. - * - *-------------------------------------------------------------------------- - */ - -void -bson_string_append_c (bson_string_t *string, /* IN */ - char c) /* IN */ -{ - char cc[2]; - - BSON_ASSERT (string); - - if (BSON_UNLIKELY (string->alloc == (string->len + 1))) { - cc[0] = c; - cc[1] = '\0'; - mcommon_string_append (string, cc); - return; - } - string->str[string->len++] = c; - string->str[string->len] = '\0'; + mcommon_string_append_t append; + mcommon_string_set_append ((mcommon_string_t *) string, &append); + (void) mcommon_string_append (&append, str); } - -/* - *-------------------------------------------------------------------------- - * - * bson_string_append_unichar -- - * - * Append the bson_unichar_t @unichar to the string @string. - * - * Returns: - * None. - * - * Side effects: - * None. - * - *-------------------------------------------------------------------------- - */ - void bson_string_append_unichar (bson_string_t *string, /* IN */ bson_unichar_t unichar) /* IN */ { - uint32_t len; - char str[8]; - - BSON_ASSERT (string); - BSON_ASSERT (unichar); - - bson_utf8_from_unichar (unichar, str, &len); + // Compatibility wrapper; deprecated. + BSON_ASSERT_PARAM (string); - if (len <= 6) { - str[len] = '\0'; - mcommon_string_append (string, str); - } + mcommon_string_append_t append; + mcommon_string_set_append ((mcommon_string_t *) string, &append); + (void) mcommon_string_append_unichar (&append, unichar); } +void +bson_string_append_c (bson_string_t *string, /* IN */ + char c) /* IN */ +{ + // Compatibility wrapper; deprecated. + BSON_ASSERT_PARAM (string); -/* - *-------------------------------------------------------------------------- - * - * bson_string_append_printf -- - * - * Format a string according to @format and append it to @string. - * - * Returns: - * None. - * - * Side effects: - * None. - * - *-------------------------------------------------------------------------- - */ + mcommon_string_append_t append; + mcommon_string_set_append ((mcommon_string_t *) string, &append); + (void) mcommon_string_append_bytes (&append, &c, 1); +} void bson_string_append_printf (bson_string_t *string, const char *format, ...) { - va_list args; - char *ret; - - BSON_ASSERT (string); - BSON_ASSERT (format); + // Compatibility wrapper; deprecated. + BSON_ASSERT_PARAM (string); + BSON_ASSERT_PARAM (format); + va_list args; + mcommon_string_append_t append; + mcommon_string_set_append ((mcommon_string_t *) string, &append); va_start (args, format); - ret = bson_strdupv_printf (format, args); + (void) mcommon_string_append_vprintf (&append, format, args); va_end (args); - mcommon_string_append (string, ret); - bson_free (ret); } -/* - *-------------------------------------------------------------------------- - * - * bson_string_truncate -- - * - * Truncate the string @string to @len bytes. - * - * The underlying memory will be released via realloc() down to - * the minimum required size (at power-of-two boundary) specified by @len. - * - * Returns: - * None. - * - * Side effects: - * None. - * - *-------------------------------------------------------------------------- - */ - void bson_string_truncate (bson_string_t *string, /* IN */ uint32_t len) /* IN */ { + /* Does not preserve UTF-8 validity; deprecated. + * Although the documentation only describes truncation as decreasing the length, we have undocumented requirements: + * the string may grow or shrink, and the buffer is expected to be allocated using the same power-of-two scheme as + * when growing to append. No effect if 'string' already has the requested length, regardless of the allocation size. + * When extending string length, this implementation is guaranteed to fill with NUL bytes. Previous versions left the + * new buffer contents undefined. + */ BSON_ASSERT_PARAM (string); - if (len == string->len) { - return; - } - uint32_t needed = len; - BSON_ASSERT (needed < UINT32_MAX); - needed += 1u; // Add one for trailing NULL byte. - uint32_t alloc = bson_next_power_of_two_u32 (needed); - if (alloc == 0) { - // Overflowed: saturate at UINT32_MAX. - alloc = UINT32_MAX; + BSON_ASSERT (len < UINT32_MAX); + + uint32_t old_len = string->len; + if (len != old_len) { + uint32_t alloc = mcommon_next_power_of_two_u32 (len + 1u); + char *buffer = bson_realloc (string->str, alloc); + string->str = buffer; + string->alloc = alloc; + string->len = len; + + if (len < old_len) { + buffer[len] = '\0'; + } else { + memset (buffer + old_len, 0, len + 1 - old_len); + } } - - string->str = bson_realloc (string->str, alloc); - string->alloc = alloc; - string->len = len; - - string->str[string->len] = '\0'; } diff --git a/src/libbson/src/bson/bson-utf8.c b/src/libbson/src/bson/bson-utf8.c index a44a01ea626..32506a28691 100644 --- a/src/libbson/src/bson/bson-utf8.c +++ b/src/libbson/src/bson/bson-utf8.c @@ -18,68 +18,12 @@ #include #include +#include +#include +#include #include +#include #include -#include - - -/* - *-------------------------------------------------------------------------- - * - * _bson_utf8_get_sequence -- - * - * Determine the sequence length of the first UTF-8 character in - * @utf8. The sequence length is stored in @seq_length and the mask - * for the first character is stored in @first_mask. - * - * Returns: - * None. - * - * Side effects: - * @seq_length is set. - * @first_mask is set. - * - *-------------------------------------------------------------------------- - */ - -static BSON_INLINE void -_bson_utf8_get_sequence (const char *utf8, /* IN */ - uint8_t *seq_length, /* OUT */ - uint8_t *first_mask) /* OUT */ -{ - unsigned char c = *(const unsigned char *) utf8; - uint8_t m; - uint8_t n; - - /* - * See the following[1] for a description of what the given multi-byte - * sequences will be based on the bits set of the first byte. We also need - * to mask the first byte based on that. All subsequent bytes are masked - * against 0x3F. - * - * [1] http://www.joelonsoftware.com/articles/Unicode.html - */ - - if ((c & 0x80) == 0) { - n = 1; - m = 0x7F; - } else if ((c & 0xE0) == 0xC0) { - n = 2; - m = 0x1F; - } else if ((c & 0xF0) == 0xE0) { - n = 3; - m = 0x0F; - } else if ((c & 0xF8) == 0xF0) { - n = 4; - m = 0x07; - } else { - n = 0; - m = 0; - } - - *seq_length = n; - *first_mask = m; -} /* @@ -97,10 +41,14 @@ _bson_utf8_get_sequence (const char *utf8, /* IN */ * However, some languages such as Python can send UTF-8 encoded * strings with NUL's in them. * + * Note that the two-byte sequence "C0 80" is also interpreted as an + * internal NUL, for historical reasons. This sequence is considered + * invalid according to RFC3629. + * * Parameters: * @utf8: A UTF-8 encoded string. * @utf8_len: The length of @utf8 in bytes. - * @allow_null: If \0 is allowed within @utf8, excluding trailing \0. + * @allow_null: If the single "00" byte or two-byte sequence "C0 80" are allowed internally within @utf8. * * Returns: * true if @utf8 is valid UTF-8. otherwise false. @@ -125,7 +73,7 @@ bson_utf8_validate (const char *utf8, /* IN */ BSON_ASSERT (utf8); for (i = 0; i < utf8_len; i += seq_length) { - _bson_utf8_get_sequence (&utf8[i], &seq_length, &first_mask); + mcommon_utf8_get_sequence (&utf8[i], &seq_length, &first_mask); /* * Ensure we have a valid multi-byte sequence length. @@ -231,116 +179,6 @@ bson_utf8_validate (const char *utf8, /* IN */ return true; } -/* - *-------------------------------------------------------------------------- - * - * _is_special_char -- - * - * Uses a bit mask to check if a character requires special formatting - * or not. Called from bson_utf8_escape_for_json. - * - * Parameters: - * @c: An unsigned char c. - * - * Returns: - * true if @c requires special formatting. otherwise false. - * - * Side effects: - * None. - * - *-------------------------------------------------------------------------- - */ - -static BSON_INLINE bool -_is_special_char (unsigned char c) -{ - /* - C++ equivalent: - std::bitset<256> charmap = [...] - return charmap[c]; - */ - static const bson_unichar_t charmap[8] = {0xffffffff, // control characters - 0x00000004, // double quote " - 0x10000000, // backslash - 0x00000000, - 0xffffffff, - 0xffffffff, - 0xffffffff, - 0xffffffff}; // non-ASCII - const int int_index = ((int) c) / ((int) sizeof (bson_unichar_t) * 8); - const int bit_index = ((int) c) & ((int) sizeof (bson_unichar_t) * 8 - 1); - return ((charmap[int_index] >> bit_index) & ((bson_unichar_t) 1)) != 0u; -} - -/* - *-------------------------------------------------------------------------- - * - * _bson_utf8_handle_special_char -- - * - * Appends a special character in the correct format when converting - * from UTF-8 to JSON. This includes characters that should be escaped - * as well as ASCII control characters. - * - * Normal ASCII characters and multi-byte UTF-8 sequences are handled - * in bson_utf8_escape_for_json, where this function is called from. - * - * Parameters: - * @c: A uint8_t ASCII codepoint. - * @str: A string to append the special character to. - * - * Returns: - * None. - * - * Side effects: - * None. - * - *-------------------------------------------------------------------------- - */ - -static BSON_INLINE void -_bson_utf8_handle_special_char (const uint8_t c, /* IN */ - mcommon_string_t *str) /* OUT */ -{ - BSON_ASSERT (c < 0x80u); - BSON_ASSERT (str); - - switch (c) { - case '"': - mcommon_string_append (str, "\\\""); - break; - case '\\': - mcommon_string_append (str, "\\\\"); - break; - case '\b': - mcommon_string_append (str, "\\b"); - break; - case '\f': - mcommon_string_append (str, "\\f"); - break; - case '\n': - mcommon_string_append (str, "\\n"); - break; - case '\r': - mcommon_string_append (str, "\\r"); - break; - case '\t': - mcommon_string_append (str, "\\t"); - break; - default: { - // ASCII control character - BSON_ASSERT (c < 0x20u); - - const char digits[] = "0123456789abcdef"; - char codepoint[6] = "\\u0000"; - - codepoint[4] = digits[(c >> 4) & 0x0fu]; - codepoint[5] = digits[c & 0x0fu]; - - _bson_string_append_ex (str, codepoint, sizeof (codepoint)); - break; - } - } -} /* *-------------------------------------------------------------------------- @@ -355,6 +193,10 @@ _bson_utf8_handle_special_char (const uint8_t c, /* IN */ * byte is found before @utf8_len bytes, it will be converted to the * two byte UTF-8 sequence. * + * The two-byte sequence "C0 80" is also interpreted as an internal NUL, + * for historical reasons. This sequence is considered invalid according + * to RFC3629. + * * Parameters: * @utf8: A UTF-8 encoded string. * @utf8_len: The length of @utf8 in bytes or -1 if NUL terminated. @@ -372,114 +214,43 @@ char * bson_utf8_escape_for_json (const char *utf8, /* IN */ ssize_t utf8_len) /* IN */ { - bool length_provided = true; - size_t utf8_ulen; - - BSON_ASSERT (utf8); + uint32_t len32; + bool allow_nul; if (utf8_len < 0) { - length_provided = false; - utf8_ulen = strlen (utf8); - } else { - utf8_ulen = (size_t) utf8_len; - } - - if (utf8_ulen == 0) { - return bson_strdup (""); - } - - const char *const end = utf8 + utf8_ulen; - - mcommon_string_t *const str = _bson_string_alloc (utf8_ulen); - - size_t normal_chars_seen = 0u; - - do { - const uint8_t current_byte = (uint8_t) utf8[normal_chars_seen]; - if (!_is_special_char (current_byte)) { - // Normal character, no need to do anything besides iterate - // Copy rest of the string if we reach the end - normal_chars_seen++; - utf8_ulen--; - if (utf8_ulen == 0) { - _bson_string_append_ex (str, utf8, normal_chars_seen); - break; - } - - continue; - } - - // Reached a special character. Copy over all of normal characters - // we have passed so far - if (normal_chars_seen > 0) { - _bson_string_append_ex (str, utf8, normal_chars_seen); - utf8 += normal_chars_seen; - normal_chars_seen = 0; - } - - // Check if expected char length goes past end - // bson_utf8_get_char will crash without this check - { - uint8_t mask; - uint8_t length_of_char; - - _bson_utf8_get_sequence (utf8, &length_of_char, &mask); - if (utf8 > end - length_of_char) { - goto invalid_utf8; - } - } - - // Check for null character - // Null characters are only allowed if the length is provided - if (utf8[0] == '\0' || (utf8[0] == '\xc0' && utf8[1] == '\x80')) { - if (!length_provided) { - goto invalid_utf8; - } - - mcommon_string_append (str, "\\u0000"); - utf8_ulen -= *utf8 ? 2u : 1u; - utf8 += *utf8 ? 2 : 1; - continue; - } - - // Multi-byte UTF-8 sequence - if (current_byte > 0x7fu) { - const char *utf8_old = utf8; - size_t char_len; - - bson_unichar_t unichar = bson_utf8_get_char (utf8); - - if (!unichar) { - goto invalid_utf8; - } - - mcommon_string_append_unichar (str, unichar); - utf8 = bson_utf8_next_char (utf8); - - char_len = (size_t) (utf8 - utf8_old); - BSON_ASSERT (utf8_ulen >= char_len); - utf8_ulen -= char_len; - - continue; + size_t sizet_len = strlen (utf8); + if (sizet_len < UINT32_MAX) { + len32 = (uint32_t) sizet_len; + allow_nul = false; + } else { + return NULL; } - - // Special ASCII characters (control chars and misc.) - _bson_utf8_handle_special_char (current_byte, str); - - if (current_byte > 0) { - utf8++; + } else { + if (mcommon_in_range_signed (uint32_t, utf8_len) && (uint32_t) utf8_len < UINT32_MAX) { + len32 = utf8_len; + allow_nul = true; } else { - goto invalid_utf8; + return NULL; } + } - utf8_ulen--; - } while (utf8_ulen > 0); - - return mcommon_string_free (str, false); + /* The new private implementation of mcommon_json_append_escaped() avoids + * parsing UTF-8 sequences at all in most cases. It preserves the validity + * of valid sequences, but it will not catch most UTF-8 errors. For compatibility + * at the expense of performance, we emulate the old behavior in this wrapper. + */ + if (!bson_utf8_validate (utf8, (size_t) len32, allow_nul)) { + return NULL; + } -invalid_utf8: - mcommon_string_free (str, true); - return NULL; + mcommon_string_append_t append; + mcommon_string_new_with_capacity_as_append (&append, len32); + if (mcommon_json_append_escaped (&append, utf8, len32, allow_nul)) { + return mcommon_string_from_append_destroy_with_steal (&append); + } else { + mcommon_string_from_append_destroy (&append); + return NULL; + } } @@ -512,7 +283,7 @@ bson_utf8_get_char (const char *utf8) /* IN */ BSON_ASSERT (utf8); - _bson_utf8_get_sequence (utf8, &num, &mask); + mcommon_utf8_get_sequence (utf8, &num, &mask); c = (*utf8) & mask; for (i = 1; i < num; i++) { @@ -551,7 +322,7 @@ bson_utf8_next_char (const char *utf8) /* IN */ BSON_ASSERT (utf8); - _bson_utf8_get_sequence (utf8, &num, &mask); + mcommon_utf8_get_sequence (utf8, &num, &mask); return utf8 + num; } @@ -585,28 +356,6 @@ bson_utf8_from_unichar (bson_unichar_t unichar, /* IN */ char utf8[BSON_ENSURE_ARRAY_PARAM_SIZE (6)], /* OUT */ uint32_t *len) /* OUT */ { - BSON_ASSERT (utf8); - BSON_ASSERT (len); - - if (unichar <= 0x7F) { - utf8[0] = unichar; - *len = 1; - } else if (unichar <= 0x7FF) { - *len = 2; - utf8[0] = 0xC0 | ((unichar >> 6) & 0x3F); - utf8[1] = 0x80 | ((unichar) & 0x3F); - } else if (unichar <= 0xFFFF) { - *len = 3; - utf8[0] = 0xE0 | ((unichar >> 12) & 0xF); - utf8[1] = 0x80 | ((unichar >> 6) & 0x3F); - utf8[2] = 0x80 | ((unichar) & 0x3F); - } else if (unichar <= 0x1FFFFF) { - *len = 4; - utf8[0] = 0xF0 | ((unichar >> 18) & 0x7); - utf8[1] = 0x80 | ((unichar >> 12) & 0x3F); - utf8[2] = 0x80 | ((unichar >> 6) & 0x3F); - utf8[3] = 0x80 | ((unichar) & 0x3F); - } else { - *len = 0; - } + // Inlined implementation from common-utf8-private + mcommon_utf8_from_unichar (unichar, utf8, len); } diff --git a/src/libbson/src/bson/bson.c b/src/libbson/src/bson/bson.c index b21279e810b..a1854b97fac 100644 --- a/src/libbson/src/bson/bson.c +++ b/src/libbson/src/bson/bson.c @@ -20,12 +20,10 @@ #include #include #include +#include #include -#include #include -#include "common-b64-private.h" - #include #include @@ -35,11 +33,6 @@ #endif -#ifndef BSON_MAX_RECURSION -#define BSON_MAX_RECURSION 200 -#endif - - typedef enum { BSON_VALIDATE_PHASE_START, BSON_VALIDATE_PHASE_TOP, @@ -63,29 +56,6 @@ typedef struct { } bson_validate_state_t; -typedef struct { - uint32_t count; - bool keys; - ssize_t *err_offset; - uint32_t depth; - mcommon_string_t *str; - bson_json_mode_t mode; - int32_t max_len; - bool max_len_reached; -} bson_json_state_t; - - -/* - * Forward declarations. - */ -static bool -_bson_as_json_visit_array (const bson_iter_t *iter, const char *key, const bson_t *v_array, void *data); -static bool -_bson_as_json_visit_document (const bson_iter_t *iter, const char *key, const bson_t *v_document, void *data); -static char * -_bson_as_json_visit_all ( - const bson_t *bson, size_t *length, bson_json_mode_t mode, int32_t max_len, bool is_outermost_array); - /* * Globals. */ @@ -1439,41 +1409,6 @@ bson_append_oid (bson_t *bson, const char *key, int key_length, const bson_oid_t } -/* - *-------------------------------------------------------------------------- - * - * _bson_append_regex_options_sorted -- - * - * Helper to append regex options to a buffer in a sorted order. - * Any duplicate or unsupported options will be ignored. - * - * Parameters: - * @buffer: Buffer to which sorted options will be appended - * @options: Regex options - * - * Returns: - * None. - * - * Side effects: - * None. - * - *-------------------------------------------------------------------------- - */ - -static BSON_INLINE void -_bson_append_regex_options_sorted (mcommon_string_t *buffer, /* IN */ - const char *options) /* IN */ -{ - const char *c; - - for (c = BSON_REGEX_OPTIONS_SORTED; *c; c++) { - if (strchr (options, *c)) { - mcommon_string_append_c (buffer, *c); - } - } -} - - bool bson_append_regex (bson_t *bson, const char *key, int key_length, const char *regex, const char *options) { @@ -1500,10 +1435,10 @@ bson_append_regex_w_len ( options = ""; } - mcommon_string_t *const options_sorted = _bson_string_alloc (strlen (options)); - _bson_append_regex_options_sorted (options_sorted, options); - - if (options_sorted->len > UINT32_MAX - 1u) { + size_t options_len = strlen (options); + mcommon_string_append_t options_sorted; + mcommon_string_new_with_capacity_as_append (&options_sorted, (uint32_t) options_len); + if (!mcommon_string_append_selected_chars (&options_sorted, BSON_REGEX_OPTIONS_SORTED, options, options_len)) { goto append_failure; } @@ -1514,14 +1449,15 @@ bson_append_regex_w_len ( BSON_APPEND_BYTES_ADD_ARGUMENT (args, &gZero, sizeof (gZero)); BSON_APPEND_BYTES_ADD_CHECKED_STRING (args, regex, regex_length); BSON_APPEND_BYTES_ADD_ARGUMENT (args, &gZero, sizeof (gZero)); - BSON_APPEND_BYTES_ADD_ARGUMENT (args, options_sorted->str, (options_sorted->len + 1u)); + BSON_APPEND_BYTES_ADD_ARGUMENT ( + args, mcommon_str_from_append (&options_sorted), 1u + mcommon_strlen_from_append (&options_sorted)); BSON_APPEND_BYTES_APPLY_ARGUMENTS (bson, args); ret = true; append_failure: - (void) mcommon_string_free (options_sorted, true); + mcommon_string_from_append_destroy (&options_sorted); return ret; } @@ -2387,745 +2323,40 @@ bson_equal (const bson_t *bson, const bson_t *other) } -static bool -_bson_as_json_visit_utf8 (const bson_iter_t *iter, const char *key, size_t v_utf8_len, const char *v_utf8, void *data) -{ - bson_json_state_t *state = data; - char *escaped; - - BSON_UNUSED (iter); - BSON_UNUSED (key); - - escaped = bson_utf8_escape_for_json (v_utf8, v_utf8_len); - - if (escaped) { - mcommon_string_append (state->str, "\""); - mcommon_string_append (state->str, escaped); - mcommon_string_append (state->str, "\""); - bson_free (escaped); - return false; - } - - return true; -} - - -static bool -_bson_as_json_visit_int32 (const bson_iter_t *iter, const char *key, int32_t v_int32, void *data) -{ - bson_json_state_t *state = data; - - BSON_UNUSED (iter); - BSON_UNUSED (key); - - if (state->mode == BSON_JSON_MODE_CANONICAL) { - mcommon_string_append_printf (state->str, "{ \"$numberInt\" : \"%" PRId32 "\" }", v_int32); - } else { - mcommon_string_append_printf (state->str, "%" PRId32, v_int32); - } - - return false; -} - - -static bool -_bson_as_json_visit_int64 (const bson_iter_t *iter, const char *key, int64_t v_int64, void *data) -{ - bson_json_state_t *state = data; - - BSON_UNUSED (iter); - BSON_UNUSED (key); - - if (state->mode == BSON_JSON_MODE_CANONICAL) { - mcommon_string_append_printf (state->str, "{ \"$numberLong\" : \"%" PRId64 "\" }", v_int64); - } else { - mcommon_string_append_printf (state->str, "%" PRId64, v_int64); - } - - return false; -} - - -static bool -_bson_as_json_visit_decimal128 (const bson_iter_t *iter, const char *key, const bson_decimal128_t *value, void *data) -{ - bson_json_state_t *state = data; - char decimal128_string[BSON_DECIMAL128_STRING]; - - BSON_UNUSED (iter); - BSON_UNUSED (key); - - bson_decimal128_to_string (value, decimal128_string); - - mcommon_string_append (state->str, "{ \"$numberDecimal\" : \""); - mcommon_string_append (state->str, decimal128_string); - mcommon_string_append (state->str, "\" }"); - - return false; -} - - -static bool -_bson_as_json_visit_double (const bson_iter_t *iter, const char *key, double v_double, void *data) -{ - bson_json_state_t *state = data; - mcommon_string_t *str = state->str; - uint32_t start_len; - bool legacy; - - BSON_UNUSED (iter); - BSON_UNUSED (key); - - /* Determine if legacy (i.e. unwrapped) output should be used. Relaxed mode - * will use this for nan and inf values, which we check manually since old - * platforms may not have isinf or isnan. */ - legacy = state->mode == BSON_JSON_MODE_LEGACY || - (state->mode == BSON_JSON_MODE_RELAXED && !(v_double != v_double || v_double * 0 != 0)); - - if (!legacy) { - mcommon_string_append (state->str, "{ \"$numberDouble\" : \""); - } - - if (!legacy && v_double != v_double) { - mcommon_string_append (str, "NaN"); - } else if (!legacy && v_double * 0 != 0) { - if (v_double > 0) { - mcommon_string_append (str, "Infinity"); - } else { - mcommon_string_append (str, "-Infinity"); - } - } else { - start_len = str->len; - mcommon_string_append_printf (str, "%.20g", v_double); - - /* ensure trailing ".0" to distinguish "3" from "3.0" */ - if (strspn (&str->str[start_len], "0123456789-") == str->len - start_len) { - mcommon_string_append (str, ".0"); - } - } - - if (!legacy) { - mcommon_string_append (state->str, "\" }"); - } - - return false; -} - - -static bool -_bson_as_json_visit_undefined (const bson_iter_t *iter, const char *key, void *data) -{ - bson_json_state_t *state = data; - - BSON_UNUSED (iter); - BSON_UNUSED (key); - - mcommon_string_append (state->str, "{ \"$undefined\" : true }"); - - return false; -} - - -static bool -_bson_as_json_visit_null (const bson_iter_t *iter, const char *key, void *data) -{ - bson_json_state_t *state = data; - - BSON_UNUSED (iter); - BSON_UNUSED (key); - - mcommon_string_append (state->str, "null"); - - return false; -} - - -static bool -_bson_as_json_visit_oid (const bson_iter_t *iter, const char *key, const bson_oid_t *oid, void *data) -{ - bson_json_state_t *state = data; - char str[25]; - - BSON_UNUSED (iter); - BSON_UNUSED (key); - - bson_oid_to_string (oid, str); - mcommon_string_append (state->str, "{ \"$oid\" : \""); - mcommon_string_append (state->str, str); - mcommon_string_append (state->str, "\" }"); - - return false; -} - - -static bool -_bson_as_json_visit_binary (const bson_iter_t *iter, - const char *key, - bson_subtype_t v_subtype, - size_t v_binary_len, - const uint8_t *v_binary, - void *data) -{ - bson_json_state_t *state = data; - size_t b64_len; - char *b64; - - BSON_UNUSED (iter); - BSON_UNUSED (key); - - b64_len = mcommon_b64_ntop_calculate_target_size (v_binary_len); - b64 = bson_malloc0 (b64_len); - BSON_ASSERT (mcommon_b64_ntop (v_binary, v_binary_len, b64, b64_len) != -1); - - if (state->mode == BSON_JSON_MODE_CANONICAL || state->mode == BSON_JSON_MODE_RELAXED) { - mcommon_string_append (state->str, "{ \"$binary\" : { \"base64\" : \""); - mcommon_string_append (state->str, b64); - mcommon_string_append (state->str, "\", \"subType\" : \""); - mcommon_string_append_printf (state->str, "%02x", v_subtype); - mcommon_string_append (state->str, "\" } }"); - } else { - mcommon_string_append (state->str, "{ \"$binary\" : \""); - mcommon_string_append (state->str, b64); - mcommon_string_append (state->str, "\", \"$type\" : \""); - mcommon_string_append_printf (state->str, "%02x", v_subtype); - mcommon_string_append (state->str, "\" }"); - } - - bson_free (b64); - - return false; -} - - -static bool -_bson_as_json_visit_bool (const bson_iter_t *iter, const char *key, bool v_bool, void *data) -{ - bson_json_state_t *state = data; - - BSON_UNUSED (iter); - BSON_UNUSED (key); - - mcommon_string_append (state->str, v_bool ? "true" : "false"); - - return false; -} - - -static bool -_bson_as_json_visit_date_time (const bson_iter_t *iter, const char *key, int64_t msec_since_epoch, void *data) -{ - bson_json_state_t *state = data; - - BSON_UNUSED (iter); - BSON_UNUSED (key); - - const int64_t msec_since_Y10K = 253402300800000; // Milliseconds since 10000-01-01T00:00:00Z. - - if (state->mode == BSON_JSON_MODE_CANONICAL || - (state->mode == BSON_JSON_MODE_RELAXED && (msec_since_epoch < 0 || msec_since_epoch >= msec_since_Y10K))) { - mcommon_string_append (state->str, "{ \"$date\" : { \"$numberLong\" : \""); - mcommon_string_append_printf (state->str, "%" PRId64, msec_since_epoch); - mcommon_string_append (state->str, "\" } }"); - } else if (state->mode == BSON_JSON_MODE_RELAXED) { - mcommon_string_append (state->str, "{ \"$date\" : \""); - _bson_iso8601_date_format (msec_since_epoch, state->str); - mcommon_string_append (state->str, "\" }"); - } else { - mcommon_string_append (state->str, "{ \"$date\" : "); - mcommon_string_append_printf (state->str, "%" PRId64, msec_since_epoch); - mcommon_string_append (state->str, " }"); - } - - return false; -} - - -static bool -_bson_as_json_visit_regex ( - const bson_iter_t *iter, const char *key, const char *v_regex, const char *v_options, void *data) -{ - bson_json_state_t *state = data; - char *escaped; - - BSON_UNUSED (iter); - BSON_UNUSED (key); - - escaped = bson_utf8_escape_for_json (v_regex, -1); - if (!escaped) { - return true; - } - - if (state->mode == BSON_JSON_MODE_CANONICAL || state->mode == BSON_JSON_MODE_RELAXED) { - mcommon_string_append (state->str, "{ \"$regularExpression\" : { \"pattern\" : \""); - mcommon_string_append (state->str, escaped); - mcommon_string_append (state->str, "\", \"options\" : \""); - _bson_append_regex_options_sorted (state->str, v_options); - mcommon_string_append (state->str, "\" } }"); - } else { - mcommon_string_append (state->str, "{ \"$regex\" : \""); - mcommon_string_append (state->str, escaped); - mcommon_string_append (state->str, "\", \"$options\" : \""); - _bson_append_regex_options_sorted (state->str, v_options); - mcommon_string_append (state->str, "\" }"); - } - - bson_free (escaped); - - return false; -} - - -static bool -_bson_as_json_visit_timestamp ( - const bson_iter_t *iter, const char *key, uint32_t v_timestamp, uint32_t v_increment, void *data) -{ - bson_json_state_t *state = data; - - BSON_UNUSED (iter); - BSON_UNUSED (key); - - mcommon_string_append (state->str, "{ \"$timestamp\" : { \"t\" : "); - mcommon_string_append_printf (state->str, "%u", v_timestamp); - mcommon_string_append (state->str, ", \"i\" : "); - mcommon_string_append_printf (state->str, "%u", v_increment); - mcommon_string_append (state->str, " } }"); - - return false; -} - - -static bool -_bson_as_json_visit_dbpointer (const bson_iter_t *iter, - const char *key, - size_t v_collection_len, - const char *v_collection, - const bson_oid_t *v_oid, - void *data) -{ - bson_json_state_t *state = data; - char *escaped; - char str[25]; - - BSON_UNUSED (iter); - BSON_UNUSED (key); - BSON_UNUSED (v_collection_len); - - escaped = bson_utf8_escape_for_json (v_collection, -1); - if (!escaped) { - return true; - } - - if (state->mode == BSON_JSON_MODE_CANONICAL || state->mode == BSON_JSON_MODE_RELAXED) { - mcommon_string_append (state->str, "{ \"$dbPointer\" : { \"$ref\" : \""); - mcommon_string_append (state->str, escaped); - mcommon_string_append (state->str, "\""); - - if (v_oid) { - bson_oid_to_string (v_oid, str); - mcommon_string_append (state->str, ", \"$id\" : { \"$oid\" : \""); - mcommon_string_append (state->str, str); - mcommon_string_append (state->str, "\" }"); - } - - mcommon_string_append (state->str, " } }"); - } else { - mcommon_string_append (state->str, "{ \"$ref\" : \""); - mcommon_string_append (state->str, escaped); - mcommon_string_append (state->str, "\""); - - if (v_oid) { - bson_oid_to_string (v_oid, str); - mcommon_string_append (state->str, ", \"$id\" : \""); - mcommon_string_append (state->str, str); - mcommon_string_append (state->str, "\""); - } - - mcommon_string_append (state->str, " }"); - } - - bson_free (escaped); - - return false; -} - - -static bool -_bson_as_json_visit_minkey (const bson_iter_t *iter, const char *key, void *data) -{ - bson_json_state_t *state = data; - - BSON_UNUSED (iter); - BSON_UNUSED (key); - - mcommon_string_append (state->str, "{ \"$minKey\" : 1 }"); - - return false; -} - - -static bool -_bson_as_json_visit_maxkey (const bson_iter_t *iter, const char *key, void *data) -{ - bson_json_state_t *state = data; - - BSON_UNUSED (iter); - BSON_UNUSED (key); - - mcommon_string_append (state->str, "{ \"$maxKey\" : 1 }"); - - return false; -} - - -static bool -_bson_as_json_visit_before (const bson_iter_t *iter, const char *key, void *data) -{ - bson_json_state_t *state = data; - char *escaped; - - BSON_UNUSED (iter); - - if (state->max_len_reached) { - return true; - } - - if (state->count) { - mcommon_string_append (state->str, ", "); - } - - if (state->keys) { - escaped = bson_utf8_escape_for_json (key, -1); - if (escaped) { - mcommon_string_append (state->str, "\""); - mcommon_string_append (state->str, escaped); - mcommon_string_append (state->str, "\" : "); - bson_free (escaped); - } else { - return true; - } - } - - state->count++; - - return false; -} - - -static bool -_bson_as_json_visit_after (const bson_iter_t *iter, const char *key, void *data) -{ - bson_json_state_t *state = data; - - BSON_UNUSED (iter); - BSON_UNUSED (key); - - if (state->max_len == BSON_MAX_LEN_UNLIMITED) { - return false; - } - - if (mcommon_cmp_greater_equal_us (state->str->len, state->max_len)) { - state->max_len_reached = true; - - if (mcommon_cmp_greater_us (state->str->len, state->max_len)) { - BSON_ASSERT (mcommon_in_range_signed (uint32_t, state->max_len)); - /* Truncate string to maximum length */ - mcommon_string_truncate (state->str, (uint32_t) state->max_len); - } - - return true; - } - - return false; -} - - -static void -_bson_as_json_visit_corrupt (const bson_iter_t *iter, void *data) -{ - *(((bson_json_state_t *) data)->err_offset) = iter->off; -} - - -static bool -_bson_as_json_visit_code (const bson_iter_t *iter, const char *key, size_t v_code_len, const char *v_code, void *data) -{ - bson_json_state_t *state = data; - char *escaped; - - BSON_UNUSED (iter); - BSON_UNUSED (key); - - escaped = bson_utf8_escape_for_json (v_code, v_code_len); - if (!escaped) { - return true; - } - - mcommon_string_append (state->str, "{ \"$code\" : \""); - mcommon_string_append (state->str, escaped); - mcommon_string_append (state->str, "\" }"); - bson_free (escaped); - - return false; -} - - -static bool -_bson_as_json_visit_symbol ( - const bson_iter_t *iter, const char *key, size_t v_symbol_len, const char *v_symbol, void *data) -{ - bson_json_state_t *state = data; - char *escaped; - - BSON_UNUSED (iter); - BSON_UNUSED (key); - - escaped = bson_utf8_escape_for_json (v_symbol, v_symbol_len); - if (!escaped) { - return true; - } - - if (state->mode == BSON_JSON_MODE_CANONICAL || state->mode == BSON_JSON_MODE_RELAXED) { - mcommon_string_append (state->str, "{ \"$symbol\" : \""); - mcommon_string_append (state->str, escaped); - mcommon_string_append (state->str, "\" }"); - } else { - mcommon_string_append (state->str, "\""); - mcommon_string_append (state->str, escaped); - mcommon_string_append (state->str, "\""); - } - - bson_free (escaped); - - return false; -} - - -static bool -_bson_as_json_visit_codewscope ( - const bson_iter_t *iter, const char *key, size_t v_code_len, const char *v_code, const bson_t *v_scope, void *data) -{ - bson_json_state_t *state = data; - char *code_escaped; - char *scope; - int32_t max_scope_len = BSON_MAX_LEN_UNLIMITED; - - BSON_UNUSED (iter); - BSON_UNUSED (key); - - code_escaped = bson_utf8_escape_for_json (v_code, v_code_len); - if (!code_escaped) { - return true; - } - - mcommon_string_append (state->str, "{ \"$code\" : \""); - mcommon_string_append (state->str, code_escaped); - mcommon_string_append (state->str, "\", \"$scope\" : "); - - bson_free (code_escaped); - - /* Encode scope with the same mode */ - if (state->max_len != BSON_MAX_LEN_UNLIMITED) { - BSON_ASSERT (mcommon_in_range_unsigned (int32_t, state->str->len)); - max_scope_len = BSON_MAX (0, state->max_len - (int32_t) state->str->len); - } - - scope = _bson_as_json_visit_all (v_scope, NULL, state->mode, max_scope_len, false); - - if (!scope) { - return true; - } - - mcommon_string_append (state->str, scope); - mcommon_string_append (state->str, " }"); - - bson_free (scope); - - return false; -} - - -static const bson_visitor_t bson_as_json_visitors = { - _bson_as_json_visit_before, _bson_as_json_visit_after, _bson_as_json_visit_corrupt, - _bson_as_json_visit_double, _bson_as_json_visit_utf8, _bson_as_json_visit_document, - _bson_as_json_visit_array, _bson_as_json_visit_binary, _bson_as_json_visit_undefined, - _bson_as_json_visit_oid, _bson_as_json_visit_bool, _bson_as_json_visit_date_time, - _bson_as_json_visit_null, _bson_as_json_visit_regex, _bson_as_json_visit_dbpointer, - _bson_as_json_visit_code, _bson_as_json_visit_symbol, _bson_as_json_visit_codewscope, - _bson_as_json_visit_int32, _bson_as_json_visit_timestamp, _bson_as_json_visit_int64, - _bson_as_json_visit_maxkey, _bson_as_json_visit_minkey, NULL, /* visit_unsupported_type */ - _bson_as_json_visit_decimal128, -}; - - -static bool -_bson_as_json_visit_document (const bson_iter_t *iter, const char *key, const bson_t *v_document, void *data) -{ - bson_json_state_t *state = data; - bson_json_state_t child_state = {0, true, state->err_offset}; - bson_iter_t child; - - BSON_UNUSED (iter); - BSON_UNUSED (key); - - if (state->depth >= BSON_MAX_RECURSION) { - mcommon_string_append (state->str, "{ ... }"); - return false; - } - - if (bson_iter_init (&child, v_document)) { - child_state.str = mcommon_string_new ("{ "); - child_state.depth = state->depth + 1; - child_state.mode = state->mode; - child_state.max_len = BSON_MAX_LEN_UNLIMITED; - if (state->max_len != BSON_MAX_LEN_UNLIMITED) { - BSON_ASSERT (mcommon_in_range_unsigned (int32_t, state->str->len)); - child_state.max_len = BSON_MAX (0, state->max_len - (int32_t) state->str->len); - } - - child_state.max_len_reached = child_state.max_len == 0; - - if (bson_iter_visit_all (&child, &bson_as_json_visitors, &child_state)) { - if (child_state.max_len_reached) { - mcommon_string_append (state->str, child_state.str->str); - } - - mcommon_string_free (child_state.str, true); - - /* If max_len was reached, we return a success state to ensure that - * VISIT_AFTER is still called - */ - return !child_state.max_len_reached; - } - - mcommon_string_append (child_state.str, " }"); - mcommon_string_append (state->str, child_state.str->str); - mcommon_string_free (child_state.str, true); - } - - return false; -} - - -static bool -_bson_as_json_visit_array (const bson_iter_t *iter, const char *key, const bson_t *v_array, void *data) +char * +bson_as_json_with_opts (const bson_t *bson, size_t *length, const bson_json_opts_t *opts) { - bson_json_state_t *state = data; - bson_json_state_t child_state = {0, false, state->err_offset}; - bson_iter_t child; - - BSON_UNUSED (iter); - BSON_UNUSED (key); - - if (state->depth >= BSON_MAX_RECURSION) { - mcommon_string_append (state->str, "{ ... }"); - return false; - } - - if (bson_iter_init (&child, v_array)) { - child_state.str = mcommon_string_new ("[ "); - child_state.depth = state->depth + 1; - child_state.mode = state->mode; - child_state.max_len = BSON_MAX_LEN_UNLIMITED; - if (state->max_len != BSON_MAX_LEN_UNLIMITED) { - BSON_ASSERT (mcommon_in_range_unsigned (int32_t, state->str->len)); - child_state.max_len = BSON_MAX (0, state->max_len - (int32_t) state->str->len); - } - - child_state.max_len_reached = child_state.max_len == 0; - - if (bson_iter_visit_all (&child, &bson_as_json_visitors, &child_state)) { - if (child_state.max_len_reached) { - mcommon_string_append (state->str, child_state.str->str); - } - - mcommon_string_free (child_state.str, true); - - /* If max_len was reached, we return a success state to ensure that - * VISIT_AFTER is still called - */ - return !child_state.max_len_reached; - } + BSON_ASSERT_PARAM (bson); + BSON_OPTIONAL_PARAM (length); + BSON_ASSERT_PARAM (opts); - mcommon_string_append (child_state.str, " ]"); - mcommon_string_append (state->str, child_state.str->str); - mcommon_string_free (child_state.str, true); + // Convert the API-specified max length into a literal byte count; max length is transformed from a special value + // (-1) to the maximum representable size. + int32_t limit_i32 = opts->max_len; + uint32_t limit_u32 = 0; + if (limit_i32 == BSON_MAX_LEN_UNLIMITED) { + limit_u32 = UINT32_MAX - 1u; + } else if (limit_i32 > 0) { + limit_u32 = (uint32_t) limit_i32; } - return false; -} + // Use the bson length as an initial buffer capacity guess + mcommon_string_append_t append; + mcommon_string_set_append_with_limit (mcommon_string_new_with_capacity ("", 0, bson->len), &append, limit_u32); - -static char * -_bson_as_json_visit_all ( - const bson_t *bson, size_t *length, bson_json_mode_t mode, int32_t max_len, bool is_outermost_array) -{ - bson_json_state_t state; - bson_iter_t iter; - ssize_t err_offset = -1; - int32_t remaining; - - BSON_ASSERT (bson); - - if (length) { - *length = 0; - } - - if (bson_empty0 (bson)) { + if (opts->is_outermost_array ? mcommon_json_append_bson_array (&append, bson, opts->mode, BSON_MAX_RECURSION) + : mcommon_json_append_bson_document (&append, bson, opts->mode, BSON_MAX_RECURSION)) { if (length) { - *length = 3; + *length = (size_t) mcommon_strlen_from_append (&append); } - - return bson_strdup (is_outermost_array ? "[ ]" : "{ }"); - } - - if (!bson_iter_init (&iter, bson)) { - return NULL; - } - - state.count = 0; - state.keys = !is_outermost_array; - state.str = mcommon_string_new (is_outermost_array ? "[ " : "{ "); - state.depth = 0; - state.err_offset = &err_offset; - state.mode = mode; - state.max_len = max_len; - state.max_len_reached = false; - - if ((bson_iter_visit_all (&iter, &bson_as_json_visitors, &state) || err_offset != -1) && !state.max_len_reached) { - /* - * We were prematurely exited due to corruption or failed visitor. - */ - mcommon_string_free (state.str, true); + return mcommon_string_from_append_destroy_with_steal (&append); + } else { if (length) { *length = 0; } + mcommon_string_from_append_destroy (&append); return NULL; } - - /* Append closing space and } separately, in case we hit the max in between. - */ - remaining = state.max_len - state.str->len; - if (state.max_len == BSON_MAX_LEN_UNLIMITED || remaining > 1) { - mcommon_string_append (state.str, is_outermost_array ? " ]" : " }"); - } else if (remaining == 1) { - mcommon_string_append (state.str, " "); - } - - if (length) { - *length = state.str->len; - } - - return mcommon_string_free (state.str, false); -} - - -char * -bson_as_json_with_opts (const bson_t *bson, size_t *length, const bson_json_opts_t *opts) -{ - BSON_ASSERT_PARAM (bson); - BSON_OPTIONAL_PARAM (length); - BSON_ASSERT_PARAM (opts); - return _bson_as_json_visit_all (bson, length, opts->mode, opts->max_len, opts->is_outermost_array); } diff --git a/src/libbson/tests/test-bson-corpus.c b/src/libbson/tests/test-bson-corpus.c index 1ca35e580ec..e112304d2d8 100644 --- a/src/libbson/tests/test-bson-corpus.c +++ b/src/libbson/tests/test-bson-corpus.c @@ -42,23 +42,26 @@ skipped_corpus_test_t VS2013_SKIPPED_CORPUS_TESTS[] = { static void compare_data (const uint8_t *a, uint32_t a_len, const uint8_t *b, uint32_t b_len) { - mcommon_string_t *a_str; - mcommon_string_t *b_str; + mcommon_string_append_t a_str, b_str; uint32_t i; if (a_len != b_len || memcmp (a, b, (size_t) a_len)) { - a_str = mcommon_string_new (NULL); + mcommon_string_new_as_append (&a_str); for (i = 0; i < a_len; i++) { - mcommon_string_append_printf (a_str, "%02" PRIx8, a[i]); + mcommon_string_append_printf (&a_str, "%02" PRIx8, a[i]); } - b_str = mcommon_string_new (NULL); + mcommon_string_new_as_append (&b_str); for (i = 0; i < b_len; i++) { - mcommon_string_append_printf (b_str, "%02" PRIx8, b[i]); + mcommon_string_append_printf (&b_str, "%02" PRIx8, b[i]); } - fprintf ( - stderr, "unequal data of length %" PRIu32 " and %" PRIu32 ":\n%s\n%s\n", a_len, b_len, a_str->str, b_str->str); + fprintf (stderr, + "unequal data of length %" PRIu32 " and %" PRIu32 ":\n%s\n%s\n", + a_len, + b_len, + mcommon_str_from_append (&a_str), + mcommon_str_from_append (&b_str)); abort (); } diff --git a/src/libbson/tests/test-iso8601.c b/src/libbson/tests/test-iso8601.c index 156acb83dbb..74c89f3831d 100644 --- a/src/libbson/tests/test-iso8601.c +++ b/src/libbson/tests/test-iso8601.c @@ -3,6 +3,8 @@ #include "bson/bson-iso8601-private.h" #include "TestSuite.h" #include +#include +#include static const bool is_time_t_small = (sizeof (time_t) == sizeof (int32_t)); @@ -33,14 +35,13 @@ test_date (const char *str, int64_t millis) static void test_date_io (const char *str_in, const char *str_out, int64_t millis) { - mcommon_string_t *bson_str; - test_date (str_in, millis); - bson_str = mcommon_string_new (NULL); - _bson_iso8601_date_format (millis, bson_str); - ASSERT_CMPSTR (bson_str->str, str_out); - mcommon_string_free (bson_str, true); + mcommon_string_append_t bson_str; + mcommon_string_new_as_append (&bson_str); + mcommon_iso8601_string_append (&bson_str, millis); + ASSERT_CMPSTR (mcommon_str_from_append (&bson_str), str_out); + mcommon_string_from_append_destroy (&bson_str); } diff --git a/src/libbson/tests/test-json.c b/src/libbson/tests/test-json.c index 6a62370072f..9695c5120c9 100644 --- a/src/libbson/tests/test-json.c +++ b/src/libbson/tests/test-json.c @@ -8,7 +8,9 @@ #include "test-conveniences.h" #include #include +#include #include +#include static ssize_t test_bson_json_read_cb_helper (void *string, uint8_t *buf, size_t len) @@ -754,8 +756,7 @@ static void test_bson_json_read_buffering (void) { bson_t **bsons; - char *json_tmp; - mcommon_string_t *json; + mcommon_string_append_t json; bson_error_t error; bson_t bson_out = BSON_INITIALIZER; int i; @@ -767,7 +768,7 @@ test_bson_json_read_buffering (void) bson_json_reader_t *reader; int r; - json = mcommon_string_new (NULL); + mcommon_string_new_as_append (&json); /* parse between 1 and 10 JSON objects */ for (n_docs = 1; n_docs < 10; n_docs++) { @@ -786,16 +787,15 @@ test_bson_json_read_buffering (void) } /* append the BSON document's JSON representation to "json" */ - json_tmp = bson_as_legacy_extended_json (bsons[docs_idx], NULL); - BSON_ASSERT (json_tmp); - mcommon_string_append (json, json_tmp); - bson_free (json_tmp); + BSON_ASSERT ( + mcommon_json_append_bson_document (&json, bsons[docs_idx], BSON_JSON_MODE_LEGACY, BSON_MAX_RECURSION)); } reader = bson_json_data_reader_new (true /* "allow_multiple" is unused */, (size_t) RAND_R (&seed) % 100 /* bufsize*/); - bson_json_data_reader_ingest (reader, (uint8_t *) json->str, json->len); + bson_json_data_reader_ingest ( + reader, (uint8_t *) mcommon_str_from_append (&json), mcommon_strlen_from_append (&json)); for (docs_idx = 0; docs_idx < n_docs; docs_idx++) { bson_reinit (&bson_out); @@ -813,7 +813,7 @@ test_bson_json_read_buffering (void) ASSERT_CMPINT (0, ==, bson_json_reader_read (reader, &bson_out, &error)); bson_json_reader_destroy (reader); - mcommon_string_truncate (json, 0); + mcommon_string_from_append_clear (&json); for (docs_idx = 0; docs_idx < n_docs; docs_idx++) { bson_destroy (bsons[docs_idx]); @@ -823,7 +823,7 @@ test_bson_json_read_buffering (void) } } - mcommon_string_free (json, true); + mcommon_string_from_append_destroy (&json); bson_destroy (&bson_out); } @@ -2738,6 +2738,9 @@ truncate_string (const char *str, size_t len) return truncated; } +/* Run multiple tests against a particular bson_t and bson_json_mode_t, with expected output. + * This test assumes naive string truncation (not UTF-8 preserving) so it's unsuitable + * when expected json output includes multi-byte UTF-8 sequences. */ static void run_bson_as_json_with_opts_tests (bson_t *bson, bson_json_mode_t mode, const char *expected) { @@ -3193,6 +3196,114 @@ test_bson_as_json_with_opts_all_types (void) bson_destroy (&scope); } +/* Helper for test_bson_as_json_with_opts_utf8_truncation. Expects to be able to truncate the + * string output at all lengths except those identified in a variable-length (-1) terminated integer argument list. */ +static void +_test_bson_json_utf8_truncation (bson_t *test_doc, bson_json_mode_t mode, const char *expected, ...) +{ + size_t expected_len = expected ? strlen (expected) : 64; + size_t last_valid_truncation = 0; + + for (size_t checking_len = 0; checking_len < expected_len; checking_len++) { + bool expect_truncation_here = true; + va_list ap; + va_start (ap, expected); + while (true) { + int arg = va_arg (ap, int); + if (arg < 0) { + BSON_ASSERT (arg == -1); + break; + } else if (arg == checking_len) { + expect_truncation_here = false; + } + } + va_end (ap); + if (expect_truncation_here) { + last_valid_truncation = checking_len; + } + + bson_json_opts_t *opts = bson_json_opts_new (mode, checking_len); + size_t json_len; + char *str = bson_as_json_with_opts (test_doc, &json_len, opts); + bson_json_opts_destroy (opts); + + if (str) { + BSON_ASSERT (expected); + ASSERT_CMPSIZE_T (json_len, ==, last_valid_truncation); + + char *str_trunc = bson_strndup (str, json_len); + char *expected_trunc = bson_strndup (expected, last_valid_truncation); + ASSERT_CMPSTR (str_trunc, expected_trunc); + bson_free (str_trunc); + bson_free (expected_trunc); + + bson_free (str); + } else { + BSON_ASSERT (!expected); + } + } + + bson_destroy (test_doc); +} + +static void +test_bson_as_json_with_opts_utf8_truncation (void) +{ + // Plain ASCII, in a subdocument. Just checking that all truncations here are okay. + _test_bson_json_utf8_truncation (BCON_NEW ("doc", "{", "a", BCON_UTF8 ("b"), "}"), + BSON_JSON_MODE_CANONICAL, + "{ \"doc\" : { \"a\" : \"b\" } }", + -1); + + // Escape sequences, also fine to truncate anywhere + _test_bson_json_utf8_truncation (BCON_NEW ("doc", "{", "\x01", BCON_UTF8 ("\xc0\x80"), "}"), + BSON_JSON_MODE_CANONICAL, + "{ \"doc\" : { \"\\u0001\" : \"\\u0000\" } }", + -1); + + // Invalid UTF-8 sequence, rejected by bson_as_json_with_opts + _test_bson_json_utf8_truncation ( + BCON_NEW ("foo\xff\xff bar", "{", "a", BCON_UTF8 ("b"), "}"), BSON_JSON_MODE_CANONICAL, NULL, -1); + + // Valid 2-byte UTF-8 sequence + _test_bson_json_utf8_truncation (BCON_NEW ("foo \xc2\xa9", "{", "\xc2\xa9 bar", BCON_UTF8 ("foo \xc2\xa9 bar"), "}"), + BSON_JSON_MODE_CANONICAL, + "{ \"foo \xc2\xa9\" : { \"\xc2\xa9 bar\" : \"foo \xc2\xa9 bar\" } }", + 8, + 17, + 32, + -1); + + // Valid 3-byte UTF-8 sequence + _test_bson_json_utf8_truncation ( + BCON_NEW ("foo \xef\xbf\xbd", "{", "\xef\xbf\xbd bar", BCON_UTF8 ("foo \xef\xbf\xbd bar"), "}"), + BSON_JSON_MODE_CANONICAL, + "{ \"foo \xef\xbf\xbd\" : { \"\xef\xbf\xbd bar\" : \"foo \xef\xbf\xbd bar\" } }", + 8, + 9, + 18, + 19, + 34, + 35, + -1); + + // Valid 4-byte UTF-8 sequence + _test_bson_json_utf8_truncation ( + BCON_NEW ("foo \xf4\x8f\xbf\xbf", "{", "\xf4\x8f\xbf\xbf bar", BCON_UTF8 ("foo \xf4\x8f\xbf\xbf bar"), "}"), + BSON_JSON_MODE_CANONICAL, + "{ \"foo \xf4\x8f\xbf\xbf\" : { \"\xf4\x8f\xbf\xbf bar\" : \"foo \xf4\x8f\xbf\xbf bar\" } }", + 8, + 9, + 10, + 19, + 20, + 21, + 36, + 37, + 38, + -1); +} + static void test_decimal128_overflowing_exponent (void) { @@ -3782,6 +3893,7 @@ test_json_install (TestSuite *suite) TestSuite_Add (suite, "/bson/as_json_with_opts/maxkey", test_bson_as_json_with_opts_maxkey); TestSuite_Add (suite, "/bson/as_json_with_opts/decimal128", test_bson_as_json_with_opts_decimal128); TestSuite_Add (suite, "/bson/as_json_with_opts/all_types", test_bson_as_json_with_opts_all_types); + TestSuite_Add (suite, "/bson/as_json_with_opts/utf8_truncation", test_bson_as_json_with_opts_utf8_truncation); TestSuite_Add (suite, "/bson/parse_array", test_parse_array); TestSuite_Add (suite, "/bson/decimal128_overflowing_exponent", test_decimal128_overflowing_exponent); TestSuite_Add (suite, "/bson/as_json/all_formats", test_bson_as_json_all_formats); diff --git a/src/libbson/tests/test-oid.c b/src/libbson/tests/test-oid.c index 205d19ea4d9..34aa28c0fa1 100644 --- a/src/libbson/tests/test-oid.c +++ b/src/libbson/tests/test-oid.c @@ -30,6 +30,7 @@ #include "TestSuite.h" #include // BEGIN_IGNORE_DEPRECATIONS +#include #define N_THREADS 4 @@ -229,11 +230,12 @@ test_bson_oid_init_sequence (void) static char * get_time_as_string (const bson_oid_t *oid) { - mcommon_string_t *str = mcommon_string_new (NULL); + mcommon_string_append_t str; + mcommon_string_new_as_append (&str); time_t time = bson_oid_get_time_t (oid); - _bson_iso8601_date_format (time * 1000, str); - return mcommon_string_free (str, false); + mcommon_iso8601_string_append (&str, time * 1000); + return mcommon_string_from_append_destroy_with_steal (&str); } diff --git a/src/libbson/tests/test-string.c b/src/libbson/tests/test-string.c index 245a71b1acd..f4ec5633037 100644 --- a/src/libbson/tests/test-string.c +++ b/src/libbson/tests/test-string.c @@ -16,7 +16,8 @@ #include -#include +#include +#include #include "TestSuite.h" #include "test-libmongoc.h" @@ -110,6 +111,31 @@ test_bson_string_append_printf (void) } +static void +test_bson_string_append_printf_truncate (void) +{ + // mcommon_string_append_printf will always truncate strings between UTF-8 code points. + // Also see /bson/as_json_with_opts/utf8_truncation. Both tests exercise functionality implemented by + // mcommon_utf8_truncate_len(), but printf() uses a different path through mcommon_string. + + for (uint32_t limit = 0; limit < 13; limit++) { + mcommon_string_append_t append; + mcommon_string_set_append_with_limit (mcommon_string_new (""), &append, limit); + mcommon_string_append_printf (&append, "foo \xf4%s%c%c bar", "\x8f", '\xbf', '\xbf'); + const char *expected = "foo \xf4\x8f\xbf\xbf bar"; + const char *str = mcommon_str_from_append (&append); + uint32_t len = mcommon_strlen_from_append (&append); + if (limit >= 4 && limit < 8) { + BSON_ASSERT (len == 4); + } else { + BSON_ASSERT (len == limit); + } + BSON_ASSERT (0 == memcmp (str, expected, len)); + mcommon_string_from_append_destroy (&append); + } +} + + static void test_bson_string_append_unichar (void) { @@ -358,6 +384,21 @@ test_bson_string_truncate (void) ASSERT_CMPUINT32 (str->alloc, ==, 4u); bson_string_free (str, true); } + + // Test truncating in the middle of a UTF-8 sequence, producing invalid UTF-8 as output. + // This is not especially desirable, but the behavior is maintained for compatibility. + { + // From RFC-3629 examples, "A." + bson_string_t *str = bson_string_new ("\x41\xe2\x89\xa2\xce\x91\x2e"); + ASSERT_CMPSIZE_T (str->len, ==, 7u); + ASSERT_CMPSIZE_T (str->alloc, ==, 8u); + + bson_string_truncate (str, 3); + ASSERT_CMPSTR (str->str, "\x41\xe2\x89"); + ASSERT_CMPSIZE_T (str->len, ==, 3u); + ASSERT_CMPSIZE_T (str->alloc, ==, 4u); + bson_string_free (str, true); + } } static void @@ -412,14 +453,6 @@ test_bson_string_capacity (void *unused) large_str[UINT32_MAX - 2u] = 's'; // Restore. } - // Test allocating the largest possible string. - { - bson_string_t *str = _bson_string_alloc (UINT32_MAX - 1u); - ASSERT_CMPUINT32 (str->alloc, ==, UINT32_MAX); - ASSERT_CMPUINT32 (str->len, ==, 0); - bson_string_free (str, true); - } - // Can truncate strings of length close to UINT32_MAX - 1. { large_str[UINT32_MAX - 1u] = '\0'; // Set size. @@ -433,55 +466,6 @@ test_bson_string_capacity (void *unused) bson_free (large_str); } -static void -test_bson_string_append_ex (void) -{ - bson_string_t *str; - - str = bson_string_new (NULL); - _bson_string_append_ex (str, "the quick brown fox jumps over the lazy dog", 10); - ASSERT_CMPSTR (str->str, "the quick "); - bson_string_free (str, true); - - str = bson_string_new (NULL); - _bson_string_append_ex (str, "the quick brown fox jumps over the lazy dog", 0); - ASSERT_CMPSTR (str->str, ""); - bson_string_free (str, true); - - str = bson_string_new (NULL); - _bson_string_append_ex (str, "the quick\n brown fox jumps over the lazy dog", 10); - _bson_string_append_ex (str, "the\n quick brown fox jumps over the lazy dog", 5); - ASSERT_CMPSTR (str->str, "the quick\nthe\n "); - bson_string_free (str, true); -} - -static void -test_bson_string_alloc (void) -{ - bson_string_t *str; - - str = _bson_string_alloc (0); - ASSERT_CMPUINT32 (str->alloc, ==, 1); - ASSERT_CMPUINT32 (str->len, ==, 0); - bson_string_free (str, true); - - - str = _bson_string_alloc (1); - ASSERT_CMPUINT32 (str->alloc, ==, 2); - ASSERT_CMPUINT32 (str->len, ==, 0); - bson_string_free (str, true); - - str = _bson_string_alloc (2); - ASSERT_CMPUINT32 (str->alloc, ==, 4); - ASSERT_CMPUINT32 (str->len, ==, 0); - bson_string_free (str, true); - - str = _bson_string_alloc (10); - ASSERT_CMPUINT32 (str->alloc, ==, 16); - ASSERT_CMPUINT32 (str->len, ==, 0); - bson_string_free (str, true); -} - void test_string_install (TestSuite *suite) { @@ -489,6 +473,7 @@ test_string_install (TestSuite *suite) TestSuite_Add (suite, "/bson/string/append", test_bson_string_append); TestSuite_Add (suite, "/bson/string/append_c", test_bson_string_append_c); TestSuite_Add (suite, "/bson/string/append_printf", test_bson_string_append_printf); + TestSuite_Add (suite, "/bson/string/append_printf_truncate", test_bson_string_append_printf_truncate); TestSuite_Add (suite, "/bson/string/append_unichar", test_bson_string_append_unichar); TestSuite_Add (suite, "/bson/string/strdup", test_bson_strdup); TestSuite_Add (suite, "/bson/string/strdup_printf", test_bson_strdup_printf); @@ -500,7 +485,5 @@ test_string_install (TestSuite *suite) TestSuite_Add (suite, "/bson/string/strcasecmp", test_bson_strcasecmp); TestSuite_AddFull ( suite, "/bson/string/capacity", test_bson_string_capacity, NULL, NULL, skip_if_no_large_allocations); - TestSuite_Add (suite, "/bson/string/append_ex", test_bson_string_append_ex); - TestSuite_Add (suite, "/bson/string/alloc", test_bson_string_alloc); TestSuite_Add (suite, "/bson/string/truncate", test_bson_string_truncate); } diff --git a/src/libmongoc/CMakeLists.txt b/src/libmongoc/CMakeLists.txt index 0e31e0460b5..dab7ce87d1d 100644 --- a/src/libmongoc/CMakeLists.txt +++ b/src/libmongoc/CMakeLists.txt @@ -665,6 +665,8 @@ set (MONGOC_SOURCES ${mongo-c-driver_SOURCE_DIR}/src/common/src/common-atomic.c ${mongo-c-driver_SOURCE_DIR}/src/common/src/common-b64.c ${mongo-c-driver_SOURCE_DIR}/src/common/src/common-md5.c + ${mongo-c-driver_SOURCE_DIR}/src/common/src/common-json.c + ${mongo-c-driver_SOURCE_DIR}/src/common/src/common-string.c ${mongo-c-driver_SOURCE_DIR}/src/common/src/common-oid.c ${mongo-c-driver_SOURCE_DIR}/src/common/src/common-thread.c diff --git a/src/libmongoc/src/mongoc/mongoc-client-side-encryption.c b/src/libmongoc/src/mongoc/mongoc-client-side-encryption.c index af173d7eb90..b5849402c2b 100644 --- a/src/libmongoc/src/mongoc/mongoc-client-side-encryption.c +++ b/src/libmongoc/src/mongoc/mongoc-client-side-encryption.c @@ -1340,25 +1340,25 @@ _uri_construction_error (bson_error_t *error) static bool _do_spawn (const char *path, char **args, bson_error_t *error) { - mcommon_string_t *command; + mcommon_string_append_t command; char **arg; PROCESS_INFORMATION process_information; STARTUPINFO startup_info; /* Construct the full command, quote path and arguments. */ - command = mcommon_string_new (""); - mcommon_string_append (command, "\""); + mcommon_string_new_as_append (&command); + mcommon_string_append (&command, "\""); if (path) { - mcommon_string_append (command, path); + mcommon_string_append (&command, path); } - mcommon_string_append (command, "mongocryptd.exe"); - mcommon_string_append (command, "\""); + mcommon_string_append (&command, "mongocryptd.exe"); + mcommon_string_append (&command, "\""); /* skip the "mongocryptd" first arg. */ arg = args + 1; while (*arg) { - mcommon_string_append (command, " \""); - mcommon_string_append (command, *arg); - mcommon_string_append (command, "\""); + mcommon_string_append (&command, " \""); + mcommon_string_append (&command, *arg); + mcommon_string_append (&command, "\""); arg++; } @@ -1368,7 +1368,7 @@ _do_spawn (const char *path, char **args, bson_error_t *error) startup_info.cb = sizeof (startup_info); if (!CreateProcessA (NULL, - command->str, + mcommon_str_from_append (&command), NULL, NULL, false /* inherit descriptors */, @@ -1395,11 +1395,11 @@ _do_spawn (const char *path, char **args, bson_error_t *error) "failed to spawn mongocryptd: %s", message); LocalFree (message); - mcommon_string_free (command, true); + mcommon_string_from_append_destroy (&command); return false; } - mcommon_string_free (command, true); + mcommon_string_from_append_destroy (&command); return true; } #else diff --git a/src/libmongoc/src/mongoc/mongoc-client.c b/src/libmongoc/src/mongoc/mongoc-client.c index b5f75b41116..f6ae2cb1fde 100644 --- a/src/libmongoc/src/mongoc/mongoc-client.c +++ b/src/libmongoc/src/mongoc/mongoc-client.c @@ -136,16 +136,15 @@ static bool txt_callback (const char *hostname, PDNS_RECORD pdns, mongoc_rr_data_t *rr_data, bson_error_t *error) { DWORD i; - mcommon_string_t *txt; - txt = mcommon_string_new (NULL); + mcommon_string_append_t txt; + mcommon_string_new_with_capacity_as_append (&txt, pdns->wDataLength); for (i = 0; i < pdns->Data.TXT.dwStringCount; i++) { - mcommon_string_append (txt, pdns->Data.TXT.pStringArray[i]); + mcommon_string_append (&txt, pdns->Data.TXT.pStringArray[i]); } - rr_data->txt_record_opts = bson_strdup (txt->str); - mcommon_string_free (txt, true); + rr_data->txt_record_opts = mcommon_string_from_append_destroy_with_steal (&txt); return true; } @@ -331,36 +330,36 @@ srv_callback (const char *hostname, ns_msg *ns_answer, ns_rr *rr, mongoc_rr_data static bool txt_callback (const char *hostname, ns_msg *ns_answer, ns_rr *rr, mongoc_rr_data_t *rr_data, bson_error_t *error) { - char s[256]; - const uint8_t *data; - mcommon_string_t *txt; - uint16_t pos, total; - uint8_t len; bool ret = false; BSON_UNUSED (ns_answer); - total = (uint16_t) ns_rr_rdlen (*rr); + uint16_t total = (uint16_t) ns_rr_rdlen (*rr); if (total < 1 || total > 255) { DNS_ERROR ("Invalid TXT record size %hu for \"%s\"", total, hostname); } - /* a TXT record has one or more strings, each up to 255 chars, each is - * prefixed by its length as 1 byte. thus endianness doesn't matter. */ - txt = mcommon_string_new (NULL); - pos = 0; - data = ns_rr_rdata (*rr); + /* a TXT record has one or more strings, each up to 255 chars, each is prefixed by its length as 1 byte. + * In this usage, they are all concatenated without any spacers. */ + mcommon_string_append_t txt; + mcommon_string_new_with_capacity_as_append (&txt, total); + uint16_t pos = 0; + const uint8_t *data = ns_rr_rdata (*rr); while (pos < total) { - memcpy (&len, data + pos, sizeof (uint8_t)); - pos++; - bson_strncpy (s, (const char *) (data + pos), (size_t) len + 1); - mcommon_string_append (txt, s); + uint8_t len = data[pos++]; + if (total - pos < (uint16_t) len) { + DNS_ERROR ("Invalid TXT string size %hu at %hu in %hu-byte TXT record for \"%s\"", + (uint16_t) len, + pos, + total, + hostname); + } + mcommon_string_append_bytes (&txt, (const char *) (data + pos), (uint32_t) len); pos += len; } - rr_data->txt_record_opts = bson_strdup (txt->str); - mcommon_string_free (txt, true); + rr_data->txt_record_opts = mcommon_string_from_append_destroy_with_steal (&txt); ret = true; done: diff --git a/src/libmongoc/src/mongoc/mongoc-cluster-aws.c b/src/libmongoc/src/mongoc/mongoc-cluster-aws.c index e3bf652bc8c..2efa0541587 100644 --- a/src/libmongoc/src/mongoc/mongoc-cluster-aws.c +++ b/src/libmongoc/src/mongoc/mongoc-cluster-aws.c @@ -486,8 +486,8 @@ _obtain_creds_from_assumerolewithwebidentity (_mongoc_aws_credentials_t *creds, strerror (errno)); } - token_file_contents = mcommon_string_new (NULL); - + mcommon_string_append_t append; + mcommon_string_new_as_append (&append); for (;;) { char buf[128]; ssize_t got = mongoc_stream_read (fstream, @@ -497,9 +497,7 @@ _obtain_creds_from_assumerolewithwebidentity (_mongoc_aws_credentials_t *creds, 0 /* timeout_msec. Unused for file stream. */); if (got > 0) { - // add null terminator. - buf[got] = '\0'; - mcommon_string_append (token_file_contents, (const char *) buf); + mcommon_string_append_bytes (&append, (const char *) buf, (uint32_t) got); } else if (got == 0) { // EOF. break; @@ -509,6 +507,7 @@ _obtain_creds_from_assumerolewithwebidentity (_mongoc_aws_credentials_t *creds, strerror (errno)); } } + token_file_contents = mcommon_string_from_append (&append); } path_and_query = bson_strdup_printf ("/?Action=AssumeRoleWithWebIdentity" @@ -621,7 +620,7 @@ _obtain_creds_from_assumerolewithwebidentity (_mongoc_aws_credentials_t *creds, bson_destroy (response_bson); bson_free (http_response_headers); bson_free (http_response_body); - mcommon_string_free (token_file_contents, true /* free segment */); + mcommon_string_destroy (token_file_contents); mongoc_stream_destroy (fstream); bson_free (aws_role_session_name); bson_free (aws_role_arn); diff --git a/src/libmongoc/src/mongoc/mongoc-collection.c b/src/libmongoc/src/mongoc/mongoc-collection.c index bf69597eb00..ed62989862d 100644 --- a/src/libmongoc/src/mongoc/mongoc-collection.c +++ b/src/libmongoc/src/mongoc/mongoc-collection.c @@ -1189,7 +1189,7 @@ mongoc_collection_drop_index_with_opts (mongoc_collection_t *collection, char * mongoc_collection_keys_to_index_string (const bson_t *keys) { - mcommon_string_t *s; + mcommon_string_append_t append; bson_iter_t iter; bson_type_t type; int i = 0; @@ -1200,7 +1200,7 @@ mongoc_collection_keys_to_index_string (const bson_t *keys) return NULL; } - s = mcommon_string_new (NULL); + mcommon_string_new_as_append (&append); while (bson_iter_next (&iter)) { /* Index type can be specified as a string ("2d") or as an integer @@ -1208,18 +1208,19 @@ mongoc_collection_keys_to_index_string (const bson_t *keys) type = bson_iter_type (&iter); if (type == BSON_TYPE_UTF8) { mcommon_string_append_printf ( - s, (i++ ? "_%s_%s" : "%s_%s"), bson_iter_key (&iter), bson_iter_utf8 (&iter, NULL)); + &append, (i++ ? "_%s_%s" : "%s_%s"), bson_iter_key (&iter), bson_iter_utf8 (&iter, NULL)); } else if (type == BSON_TYPE_INT32) { - mcommon_string_append_printf (s, (i++ ? "_%s_%d" : "%s_%d"), bson_iter_key (&iter), bson_iter_int32 (&iter)); + mcommon_string_append_printf ( + &append, (i++ ? "_%s_%d" : "%s_%d"), bson_iter_key (&iter), bson_iter_int32 (&iter)); } else if (type == BSON_TYPE_INT64) { mcommon_string_append_printf ( - s, (i++ ? "_%s_%" PRId64 : "%s_%" PRId64), bson_iter_key (&iter), bson_iter_int64 (&iter)); + &append, (i++ ? "_%s_%" PRId64 : "%s_%" PRId64), bson_iter_key (&iter), bson_iter_int64 (&iter)); } else { - mcommon_string_free (s, true); + mcommon_string_from_append_destroy (&append); return NULL; } } - return mcommon_string_free (s, false); + return mcommon_string_from_append_destroy_with_steal (&append); } diff --git a/src/libmongoc/src/mongoc/mongoc-crypt.c b/src/libmongoc/src/mongoc/mongoc-crypt.c index 024ea5a5b6a..c1f9aa2ccb7 100644 --- a/src/libmongoc/src/mongoc/mongoc-crypt.c +++ b/src/libmongoc/src/mongoc/mongoc-crypt.c @@ -1139,10 +1139,10 @@ _parse_one_tls_opts (bson_iter_t *iter, mongoc_ssl_opt_t *out_opt, bson_error_t bson_t tls_opts_doc; const uint8_t *data; uint32_t len; - mcommon_string_t *errmsg; + mcommon_string_append_t errmsg; bson_iter_t permitted_iter; - errmsg = mcommon_string_new (NULL); + mcommon_string_new_as_append (&errmsg); kms_provider = bson_iter_key (iter); memset (out_opt, 0, sizeof (mongoc_ssl_opt_t)); @@ -1194,19 +1194,19 @@ _parse_one_tls_opts (bson_iter_t *iter, mongoc_ssl_opt_t *out_opt, bson_error_t goto fail; } - if (!_mongoc_ssl_opts_from_bson (out_opt, &tls_opts_doc, errmsg)) { + if (!_mongoc_ssl_opts_from_bson (out_opt, &tls_opts_doc, &errmsg)) { bson_set_error (error, MONGOC_ERROR_CLIENT_SIDE_ENCRYPTION, MONGOC_ERROR_CLIENT_INVALID_ENCRYPTION_ARG, "Error parsing TLS options for %s: %s", kms_provider, - errmsg->str); + mcommon_str_from_append (&errmsg)); goto fail; } ok = true; fail: - mcommon_string_free (errmsg, true /* free_segment */); + mcommon_string_from_append_destroy (&errmsg); return ok; } diff --git a/src/libmongoc/src/mongoc/mongoc-cyrus.c b/src/libmongoc/src/mongoc/mongoc-cyrus.c index ddaaae3043d..39d4737bc57 100644 --- a/src/libmongoc/src/mongoc/mongoc-cyrus.c +++ b/src/libmongoc/src/mongoc/mongoc-cyrus.c @@ -34,7 +34,8 @@ bool _mongoc_cyrus_set_mechanism (mongoc_cyrus_t *sasl, const char *mechanism, bson_error_t *error) { - mcommon_string_t *str = mcommon_string_new (""); + mcommon_string_append_t available_mechs_str; + mcommon_string_new_as_append (&available_mechs_str); const char **mechs = sasl_global_listmech (); int i = 0; bool ok = false; @@ -46,9 +47,9 @@ _mongoc_cyrus_set_mechanism (mongoc_cyrus_t *sasl, const char *mechanism, bson_e ok = true; break; } - mcommon_string_append (str, mechs[i]); + mcommon_string_append (&available_mechs_str, mechs[i]); if (mechs[i + 1]) { - mcommon_string_append (str, ","); + mcommon_string_append (&available_mechs_str, ","); } } @@ -62,10 +63,10 @@ _mongoc_cyrus_set_mechanism (mongoc_cyrus_t *sasl, const char *mechanism, bson_e "SASL Failure: Unsupported mechanism by client: %s. " "Available mechanisms: %s", mechanism, - str->str); + mcommon_str_from_append (&available_mechs_str)); } - mcommon_string_free (str, true); + mcommon_string_from_append_destroy (&available_mechs_str); return ok; } @@ -276,19 +277,23 @@ _mongoc_cyrus_is_failure (int status, bson_error_t *error) bson_set_error (error, MONGOC_ERROR_SASL, status, "SASL Failure: insufficient memory."); break; case SASL_NOMECH: { - mcommon_string_t *str = mcommon_string_new ("available mechanisms: "); + mcommon_string_append_t available_mechs_str; + mcommon_string_new_as_append (&available_mechs_str); const char **mechs = sasl_global_listmech (); int i = 0; for (i = 0; mechs[i]; i++) { - mcommon_string_append (str, mechs[i]); + mcommon_string_append (&available_mechs_str, mechs[i]); if (mechs[i + 1]) { - mcommon_string_append (str, ","); + mcommon_string_append (&available_mechs_str, ","); } } - bson_set_error ( - error, MONGOC_ERROR_SASL, status, "SASL Failure: failure to negotiate mechanism (%s)", str->str); - mcommon_string_free (str, 0); + bson_set_error (error, + MONGOC_ERROR_SASL, + status, + "SASL Failure: failure to negotiate mechanism (available mechanisms: %s)", + mcommon_str_from_append (&available_mechs_str)); + mcommon_string_from_append_destroy (&available_mechs_str); } break; case SASL_BADPARAM: bson_set_error (error, diff --git a/src/libmongoc/src/mongoc/mongoc-handshake.c b/src/libmongoc/src/mongoc/mongoc-handshake.c index 34ba09bac94..55186408548 100644 --- a/src/libmongoc/src/mongoc/mongoc-handshake.c +++ b/src/libmongoc/src/mongoc/mongoc-handshake.c @@ -201,13 +201,13 @@ _mongoc_handshake_get_config_hex_string (void) _set_bit (bf, byte_count, MONGOC_MD_FLAG_ENABLE_SRV); } - mcommon_string_t *const str = mcommon_string_new ("0x"); + mcommon_string_append_t append; + mcommon_string_set_append (mcommon_string_new_with_capacity ("0x", 2, 2 + byte_count * 2), &append); for (uint32_t i = 0u; i < byte_count; i++) { - mcommon_string_append_printf (str, "%02x", bf[i]); + mcommon_string_append_printf (&append, "%02x", bf[i]); } bson_free (bf); - /* free the mcommon_string_t, but keep the underlying char* alive. */ - return mcommon_string_free (str, false); + return mcommon_string_from_append_destroy_with_steal (&append); } static char * @@ -378,11 +378,7 @@ _free_driver_info (mongoc_handshake_t *handshake) static void _set_platform_string (mongoc_handshake_t *handshake) { - mcommon_string_t *str; - - str = mcommon_string_new (""); - - handshake->platform = mcommon_string_free (str, false); + handshake->platform = bson_strdup (""); } static void @@ -474,47 +470,44 @@ _get_env_info (mongoc_handshake_t *handshake) static void _set_compiler_info (mongoc_handshake_t *handshake) { - mcommon_string_t *str; - char *config_str; - - str = mcommon_string_new (""); + mcommon_string_append_t append; + mcommon_string_new_as_append (&append); - config_str = _mongoc_handshake_get_config_hex_string (); - mcommon_string_append_printf (str, "cfg=%s", config_str); + char *config_str = _mongoc_handshake_get_config_hex_string (); + mcommon_string_append_printf (&append, "cfg=%s", config_str); bson_free (config_str); #ifdef _POSIX_VERSION - mcommon_string_append_printf (str, " posix=%ld", _POSIX_VERSION); + mcommon_string_append_printf (&append, " posix=%ld", _POSIX_VERSION); #endif #ifdef __STDC_VERSION__ - mcommon_string_append_printf (str, " stdc=%ld", __STDC_VERSION__); + mcommon_string_append_printf (&append, " stdc=%ld", __STDC_VERSION__); #endif - mcommon_string_append_printf (str, " CC=%s", MONGOC_COMPILER); + mcommon_string_append_printf (&append, " CC=%s", MONGOC_COMPILER); #ifdef MONGOC_COMPILER_VERSION - mcommon_string_append_printf (str, " %s", MONGOC_COMPILER_VERSION); + mcommon_string_append_printf (&append, " %s", MONGOC_COMPILER_VERSION); #endif - handshake->compiler_info = mcommon_string_free (str, false); + handshake->compiler_info = mcommon_string_from_append_destroy_with_steal (&append); } static void _set_flags (mongoc_handshake_t *handshake) { - mcommon_string_t *str; - - str = mcommon_string_new (""); + mcommon_string_append_t append; + mcommon_string_new_as_append (&append); if (strlen (MONGOC_EVALUATE_STR (MONGOC_USER_SET_CFLAGS)) > 0) { - mcommon_string_append_printf (str, " CFLAGS=%s", MONGOC_EVALUATE_STR (MONGOC_USER_SET_CFLAGS)); + mcommon_string_append_printf (&append, " CFLAGS=%s", MONGOC_EVALUATE_STR (MONGOC_USER_SET_CFLAGS)); } if (strlen (MONGOC_EVALUATE_STR (MONGOC_USER_SET_LDFLAGS)) > 0) { - mcommon_string_append_printf (str, " LDFLAGS=%s", MONGOC_EVALUATE_STR (MONGOC_USER_SET_LDFLAGS)); + mcommon_string_append_printf (&append, " LDFLAGS=%s", MONGOC_EVALUATE_STR (MONGOC_USER_SET_LDFLAGS)); } - handshake->flags = mcommon_string_free (str, false); + handshake->flags = mcommon_string_from_append_destroy_with_steal (&append); } static void @@ -557,44 +550,36 @@ _append_platform_field (bson_t *doc, const char *platform, bool truncate) { char *compiler_info = _mongoc_handshake_get ()->compiler_info; char *flags = _mongoc_handshake_get ()->flags; - mcommon_string_t *combined_platform = mcommon_string_new (platform); - - /* Compute space left for platform field */ - const int max_platform_str_size = HANDSHAKE_MAX_SIZE - ((int) doc->len + - /* 1 byte for utf8 tag */ - 1 + - /* key size */ - (int) strlen (HANDSHAKE_PLATFORM_FIELD) + 1 + + const uint32_t overhead = (/* 1 byte for utf8 tag */ + 1 + + /* key size */ + (int) strlen (HANDSHAKE_PLATFORM_FIELD) + 1 + + /* 4 bytes for length of string */ + 4 + + /* NUL terminator */ + 1); - /* 4 bytes for length of string */ - 4); - - if (truncate && max_platform_str_size <= 0) { - mcommon_string_free (combined_platform, true); + if (truncate && doc->len >= HANDSHAKE_MAX_SIZE - overhead) { return; } - /* We opt to drop compiler info and flags if they can't fit, while the - * platform information is truncated - * Try to drop flags first, and if there is still not enough space also - * drop compiler info */ - if (!truncate || - mcommon_cmp_greater_equal_su (max_platform_str_size, combined_platform->len + strlen (compiler_info) + 1u)) { - mcommon_string_append (combined_platform, compiler_info); - } - if (!truncate || - mcommon_cmp_greater_equal_su (max_platform_str_size, combined_platform->len + strlen (flags) + 1u)) { - mcommon_string_append (combined_platform, flags); - } + mcommon_string_append_t combined_platform; + mcommon_string_set_append_with_limit (mcommon_string_new_with_capacity ("", 0, HANDSHAKE_MAX_SIZE - overhead), + &combined_platform, + truncate ? HANDSHAKE_MAX_SIZE - overhead - doc->len : UINT32_MAX - 1u); + + mcommon_string_append (&combined_platform, platform); + mcommon_string_append_all_or_none (&combined_platform, compiler_info); + mcommon_string_append_all_or_none (&combined_platform, flags); - /* We use the flags_index field to check if the CLAGS/LDFLAGS need to be - * truncated, and if so we drop them altogether */ - BSON_ASSERT (mcommon_in_range_unsigned (int, combined_platform->len)); - int length = truncate ? BSON_MIN (max_platform_str_size - 1, (int) combined_platform->len) : -1; - bson_append_utf8 (doc, HANDSHAKE_PLATFORM_FIELD, -1, combined_platform->str, length); + bson_append_utf8 (doc, + HANDSHAKE_PLATFORM_FIELD, + strlen (HANDSHAKE_PLATFORM_FIELD), + mcommon_str_from_append (&combined_platform), + mcommon_strlen_from_append (&combined_platform)); - mcommon_string_free (combined_platform, true); + mcommon_string_from_append_destroy (&combined_platform); } static bool diff --git a/src/libmongoc/src/mongoc/mongoc-http-private.h b/src/libmongoc/src/mongoc/mongoc-http-private.h index b367febd4e5..efe3367290d 100644 --- a/src/libmongoc/src/mongoc/mongoc-http-private.h +++ b/src/libmongoc/src/mongoc/mongoc-http-private.h @@ -18,6 +18,7 @@ #include "mongoc-ssl.h" #include "mongoc-prelude.h" +#include #ifndef MONGOC_HTTP_PRIVATE_H #define MONGOC_HTTP_PRIVATE_H @@ -52,17 +53,15 @@ _mongoc_http_response_cleanup (mongoc_http_response_t *response); /** * @brief Render the HTTP request head based on the given HTTP parameters. * + * @param append Destination for the HTTP request head, as an mcommon_string_append_t initialized with + * mcommon_string_set_append(). * @param req The request to render (required) - * @return bson_string_t* A new bson_string_t that contains the HTTP request - * head * * @note The request body (if applicable) is not included in the resulting * string. - * @note The returned bson_string_t must be freed, including the internal - * segment. */ -bson_string_t * -_mongoc_http_render_request_head (const mongoc_http_request_t *req); +void +_mongoc_http_render_request_head (mcommon_string_append_t *append, const mongoc_http_request_t *req); /** diff --git a/src/libmongoc/src/mongoc/mongoc-http.c b/src/libmongoc/src/mongoc/mongoc-http.c index 3b1546bf57c..8f42af4d0e3 100644 --- a/src/libmongoc/src/mongoc/mongoc-http.c +++ b/src/libmongoc/src/mongoc/mongoc-http.c @@ -47,46 +47,43 @@ _mongoc_http_response_cleanup (mongoc_http_response_t *response) bson_free (response->body); } -mcommon_string_t * -_mongoc_http_render_request_head (const mongoc_http_request_t *req) +void +_mongoc_http_render_request_head (mcommon_string_append_t *append, const mongoc_http_request_t *req) { BSON_ASSERT_PARAM (req); - char *path = NULL; + + mcommon_string_append_printf (append, "%s ", req->method); // Default paths if (!req->path) { // Top path: - path = bson_strdup ("/"); + mcommon_string_append (append, "/"); } else if (req->path[0] != '/') { // Path MUST be prefixed with a separator - path = bson_strdup_printf ("/%s", req->path); + mcommon_string_append (append, "/"); + mcommon_string_append (append, req->path); } else { // Just copy the path - path = bson_strdup (req->path); + mcommon_string_append (append, req->path); } - mcommon_string_t *const string = mcommon_string_new (""); - // Set the request line - mcommon_string_append_printf (string, "%s %s HTTP/1.0\r\n", req->method, path); - // (We're done with the path string:) - bson_free (path); + mcommon_string_append (append, " HTTP/1.0\r\n"); /* Always add Host header. */ - mcommon_string_append_printf (string, "Host: %s:%d\r\n", req->host, req->port); + mcommon_string_append_printf (append, "Host: %s:%d\r\n", req->host, req->port); /* Always add Connection: close header to ensure server closes connection. */ - mcommon_string_append_printf (string, "Connection: close\r\n"); + mcommon_string_append (append, "Connection: close\r\n"); /* Add Content-Length if body is included. */ if (req->body_len) { - mcommon_string_append_printf (string, "Content-Length: %d\r\n", req->body_len); + mcommon_string_append_printf (append, "Content-Length: %d\r\n", req->body_len); } // Add any extra headers if (req->extra_headers) { - mcommon_string_append (string, req->extra_headers); + mcommon_string_append (append, req->extra_headers); } // Final terminator - mcommon_string_append (string, "\r\n"); - return string; + mcommon_string_append (append, "\r\n"); } static int32_t @@ -110,7 +107,6 @@ _mongoc_http_send (const mongoc_http_request_t *req, bool ret = false; mongoc_iovec_t iovec; char *path = NULL; - mcommon_string_t *http_request = NULL; mongoc_buffer_t http_response_buf; char *http_response_str; char *ptr; @@ -118,6 +114,9 @@ _mongoc_http_send (const mongoc_http_request_t *req, const mcd_timer timer = mcd_timer_expire_after (mcd_milliseconds (timeout_ms)); + mcommon_string_append_t http_request; + mcommon_string_new_as_append (&http_request); + memset (res, 0, sizeof (*res)); _mongoc_buffer_init (&http_response_buf, NULL, 0, NULL, NULL); @@ -171,9 +170,10 @@ _mongoc_http_send (const mongoc_http_request_t *req, path = bson_strdup (req->path); } - http_request = _mongoc_http_render_request_head (req); - iovec.iov_base = http_request->str; - iovec.iov_len = http_request->len; + _mongoc_http_render_request_head (&http_request, req); + + iovec.iov_base = mcommon_str_from_append (&http_request); + iovec.iov_len = mcommon_strlen_from_append (&http_request); if (!_mongoc_stream_writev_full (stream, &iovec, 1, _mongoc_http_msec_remaining (timer), error)) { goto fail; @@ -282,9 +282,7 @@ _mongoc_http_send (const mongoc_http_request_t *req, fail: mongoc_stream_destroy (stream); - if (http_request) { - mcommon_string_free (http_request, true); - } + mcommon_string_from_append_destroy (&http_request); _mongoc_buffer_destroy (&http_response_buf); bson_free (path); return ret; diff --git a/src/libmongoc/src/mongoc/mongoc-log.c b/src/libmongoc/src/mongoc/mongoc-log.c index d96f806c2a6..6777d3af41e 100644 --- a/src/libmongoc/src/mongoc/mongoc-log.c +++ b/src/libmongoc/src/mongoc/mongoc-log.c @@ -227,8 +227,9 @@ mongoc_log_trace_bytes (const char *domain, const uint8_t *_b, size_t _l) { STOP_LOGGING_CHECK; - mcommon_string_t *const str = mcommon_string_new (NULL); - mcommon_string_t *const astr = mcommon_string_new (NULL); + mcommon_string_append_t str, astr; + mcommon_string_new_as_append (&str); + mcommon_string_new_as_append (&astr); size_t _i; for (_i = 0u; _i < _l; _i++) { @@ -236,38 +237,39 @@ mongoc_log_trace_bytes (const char *domain, const uint8_t *_b, size_t _l) const size_t rem = _i % 16u; if (rem == 0u) { - mcommon_string_append_printf (str, "%05zx: ", _i); + mcommon_string_append_printf (&str, "%05zx: ", _i); } - mcommon_string_append_printf (str, " %02x", _v); + mcommon_string_append_printf (&str, " %02x", _v); if (isprint (_v)) { - mcommon_string_append_printf (astr, " %c", _v); + mcommon_string_append_printf (&astr, " %c", _v); } else { - mcommon_string_append (astr, " ."); + mcommon_string_append (&astr, " ."); } if (rem == 15u) { - mongoc_log (MONGOC_LOG_LEVEL_TRACE, domain, "%s %s", str->str, astr->str); - mcommon_string_truncate (str, 0); - mcommon_string_truncate (astr, 0); + mongoc_log ( + MONGOC_LOG_LEVEL_TRACE, domain, "%s %s", mcommon_str_from_append (&str), mcommon_str_from_append (&astr)); + mcommon_string_from_append_clear (&str); + mcommon_string_from_append_clear (&astr); } else if (rem == 7u) { - mcommon_string_append (str, " "); - mcommon_string_append (astr, " "); + mcommon_string_append (&str, " "); + mcommon_string_append (&astr, " "); } } if (_i != 16u) { - mongoc_log (MONGOC_LOG_LEVEL_TRACE, domain, "%-56s %s", str->str, astr->str); + mongoc_log ( + MONGOC_LOG_LEVEL_TRACE, domain, "%-56s %s", mcommon_str_from_append (&str), mcommon_str_from_append (&astr)); } - mcommon_string_free (str, true); - mcommon_string_free (astr, true); + mcommon_string_from_append_destroy (&str); + mcommon_string_from_append_destroy (&astr); } void mongoc_log_trace_iovec (const char *domain, const mongoc_iovec_t *_iov, size_t _iovcnt) { - mcommon_string_t *str, *astr; const char *_b; unsigned _i = 0; unsigned _j = 0; @@ -282,8 +284,10 @@ mongoc_log_trace_iovec (const char *domain, const mongoc_iovec_t *_iov, size_t _ } _i = 0; - str = mcommon_string_new (NULL); - astr = mcommon_string_new (NULL); + + mcommon_string_append_t str, astr; + mcommon_string_new_as_append (&str); + mcommon_string_new_as_append (&astr); for (_j = 0; _j < _iovcnt; _j++) { _b = (char *) _iov[_j].iov_base; @@ -292,31 +296,36 @@ mongoc_log_trace_iovec (const char *domain, const mongoc_iovec_t *_iov, size_t _ for (_k = 0; _k < _l; _k++, _i++) { _v = *(_b + _k); if ((_i % 16) == 0) { - mcommon_string_append_printf (str, "%05x: ", _i); + mcommon_string_append_printf (&str, "%05x: ", _i); } - mcommon_string_append_printf (str, " %02x", _v); + mcommon_string_append_printf (&str, " %02x", _v); if (isprint (_v)) { - mcommon_string_append_printf (astr, " %c", _v); + mcommon_string_append_printf (&astr, " %c", _v); } else { - mcommon_string_append (astr, " ."); + mcommon_string_append (&astr, " ."); } if ((_i % 16) == 15) { - mongoc_log (MONGOC_LOG_LEVEL_TRACE, domain, "%s %s", str->str, astr->str); - mcommon_string_truncate (str, 0); - mcommon_string_truncate (astr, 0); + mongoc_log (MONGOC_LOG_LEVEL_TRACE, + domain, + "%s %s", + mcommon_str_from_append (&str), + mcommon_str_from_append (&astr)); + mcommon_string_from_append_clear (&str); + mcommon_string_from_append_clear (&astr); } else if ((_i % 16) == 7) { - mcommon_string_append (str, " "); - mcommon_string_append (astr, " "); + mcommon_string_append (&str, " "); + mcommon_string_append (&astr, " "); } } } if (_i != 16) { - mongoc_log (MONGOC_LOG_LEVEL_TRACE, domain, "%-56s %s", str->str, astr->str); + mongoc_log ( + MONGOC_LOG_LEVEL_TRACE, domain, "%-56s %s", mcommon_str_from_append (&str), mcommon_str_from_append (&astr)); } - mcommon_string_free (str, true); - mcommon_string_free (astr, true); + mcommon_string_from_append_destroy (&str); + mcommon_string_from_append_destroy (&astr); } diff --git a/src/libmongoc/src/mongoc/mongoc-secure-channel.c b/src/libmongoc/src/mongoc/mongoc-secure-channel.c index 93c65fcc079..79a326ed815 100644 --- a/src/libmongoc/src/mongoc/mongoc-secure-channel.c +++ b/src/libmongoc/src/mongoc/mongoc-secure-channel.c @@ -240,8 +240,8 @@ mongoc_secure_channel_setup_certificate (mongoc_stream_tls_secure_channel_t *sec return mongoc_secure_channel_setup_certificate_from_file (opt->pem_file); } -void -_bson_append_szoid (mcommon_string_t *retval, PCCERT_CONTEXT cert, const char *label, void *oid) +static void +_bson_append_szoid (mcommon_string_append_t *retval, PCCERT_CONTEXT cert, const char *label, void *oid) { DWORD oid_len = CertGetNameString (cert, CERT_NAME_ATTR_TYPE, 0, oid, NULL, 0); @@ -253,28 +253,28 @@ _bson_append_szoid (mcommon_string_t *retval, PCCERT_CONTEXT cert, const char *l bson_free (tmp); } } + char * _mongoc_secure_channel_extract_subject (const char *filename, const char *passphrase) { - mcommon_string_t *retval; PCCERT_CONTEXT cert; - cert = mongoc_secure_channel_setup_certificate_from_file (filename); if (!cert) { return NULL; } - retval = mcommon_string_new (""); - ; - _bson_append_szoid (retval, cert, "C=", szOID_COUNTRY_NAME); - _bson_append_szoid (retval, cert, ",ST=", szOID_STATE_OR_PROVINCE_NAME); - _bson_append_szoid (retval, cert, ",L=", szOID_LOCALITY_NAME); - _bson_append_szoid (retval, cert, ",O=", szOID_ORGANIZATION_NAME); - _bson_append_szoid (retval, cert, ",OU=", szOID_ORGANIZATIONAL_UNIT_NAME); - _bson_append_szoid (retval, cert, ",CN=", szOID_COMMON_NAME); - _bson_append_szoid (retval, cert, ",STREET=", szOID_STREET_ADDRESS); + mcommon_string_append_t retval; + mcommon_string_new_as_append (&retval); + + _bson_append_szoid (&retval, cert, "C=", szOID_COUNTRY_NAME); + _bson_append_szoid (&retval, cert, ",ST=", szOID_STATE_OR_PROVINCE_NAME); + _bson_append_szoid (&retval, cert, ",L=", szOID_LOCALITY_NAME); + _bson_append_szoid (&retval, cert, ",O=", szOID_ORGANIZATION_NAME); + _bson_append_szoid (&retval, cert, ",OU=", szOID_ORGANIZATIONAL_UNIT_NAME); + _bson_append_szoid (&retval, cert, ",CN=", szOID_COMMON_NAME); + _bson_append_szoid (&retval, cert, ",STREET=", szOID_STREET_ADDRESS); - return mcommon_string_free (retval, false); + return mcommon_string_from_append_destroy_with_steal (&retval); } bool diff --git a/src/libmongoc/src/mongoc/mongoc-secure-transport.c b/src/libmongoc/src/mongoc/mongoc-secure-transport.c index 5fe117cf363..6ee0064c70e 100644 --- a/src/libmongoc/src/mongoc/mongoc-secure-transport.c +++ b/src/libmongoc/src/mongoc/mongoc-secure-transport.c @@ -81,7 +81,7 @@ _mongoc_cfstringref_to_cstring (CFStringRef str) } static void -_bson_append_cftyperef (mcommon_string_t *retval, const char *label, CFTypeRef str) +_bson_append_cftyperef (mcommon_string_append_t *retval, const char *label, CFTypeRef str) { char *cs; @@ -125,7 +125,6 @@ char * _mongoc_secure_transport_RFC2253_from_cert (SecCertificateRef cert) { CFTypeRef value; - mcommon_string_t *retval; CFTypeRef subject_name; CFDictionaryRef cert_dict; @@ -146,49 +145,49 @@ _mongoc_secure_transport_RFC2253_from_cert (SecCertificateRef cert) return NULL; } - retval = mcommon_string_new (""); - ; + mcommon_string_append_t retval; + mcommon_string_new_as_append (&retval); value = _mongoc_secure_transport_dict_get (subject_name, kSecOIDCountryName); - _bson_append_cftyperef (retval, "C=", value); + _bson_append_cftyperef (&retval, "C=", value); value = _mongoc_secure_transport_dict_get (subject_name, kSecOIDStateProvinceName); - _bson_append_cftyperef (retval, ",ST=", value); + _bson_append_cftyperef (&retval, ",ST=", value); value = _mongoc_secure_transport_dict_get (subject_name, kSecOIDLocalityName); - _bson_append_cftyperef (retval, ",L=", value); + _bson_append_cftyperef (&retval, ",L=", value); value = _mongoc_secure_transport_dict_get (subject_name, kSecOIDOrganizationName); - _bson_append_cftyperef (retval, ",O=", value); + _bson_append_cftyperef (&retval, ",O=", value); value = _mongoc_secure_transport_dict_get (subject_name, kSecOIDOrganizationalUnitName); if (value) { /* Can be either one unit name, or array of unit names */ if (CFGetTypeID (value) == CFStringGetTypeID ()) { - _bson_append_cftyperef (retval, ",OU=", value); + _bson_append_cftyperef (&retval, ",OU=", value); } else if (CFGetTypeID (value) == CFArrayGetTypeID ()) { CFIndex len = CFArrayGetCount (value); if (len > 0) { - _bson_append_cftyperef (retval, ",OU=", CFArrayGetValueAtIndex (value, 0)); + _bson_append_cftyperef (&retval, ",OU=", CFArrayGetValueAtIndex (value, 0)); } if (len > 1) { - _bson_append_cftyperef (retval, ",", CFArrayGetValueAtIndex (value, 1)); + _bson_append_cftyperef (&retval, ",", CFArrayGetValueAtIndex (value, 1)); } if (len > 2) { - _bson_append_cftyperef (retval, ",", CFArrayGetValueAtIndex (value, 2)); + _bson_append_cftyperef (&retval, ",", CFArrayGetValueAtIndex (value, 2)); } } } value = _mongoc_secure_transport_dict_get (subject_name, kSecOIDCommonName); - _bson_append_cftyperef (retval, ",CN=", value); + _bson_append_cftyperef (&retval, ",CN=", value); value = _mongoc_secure_transport_dict_get (subject_name, kSecOIDStreetAddress); - _bson_append_cftyperef (retval, ",STREET", value); + _bson_append_cftyperef (&retval, ",STREET", value); CFRelease (cert_dict); - return mcommon_string_free (retval, false); + return mcommon_string_from_append_destroy_with_steal (&retval); } diff --git a/src/libmongoc/src/mongoc/mongoc-ssl-private.h b/src/libmongoc/src/mongoc/mongoc-ssl-private.h index bda7cadaf95..35a5f337c01 100644 --- a/src/libmongoc/src/mongoc/mongoc-ssl-private.h +++ b/src/libmongoc/src/mongoc/mongoc-ssl-private.h @@ -20,6 +20,7 @@ #define MONGOC_SSL_PRIVATE_H #include +#include #include "mongoc-uri-private.h" @@ -51,10 +52,10 @@ _mongoc_ssl_opts_cleanup (mongoc_ssl_opt_t *opt, bool free_internal); * from a BSON document. It is used to parse TLS options for the KMIP KMS * provider in CSFLE. * - ssl_opt must be a zero'd out ssl_opt struct. - * - errmsg must be an initialized bson_string_t. + * - errmsg must be an initialized mcommon_string_append_t. * - Returns false on error and appends to errmsg. */ bool -_mongoc_ssl_opts_from_bson (mongoc_ssl_opt_t *ssl_opt, const bson_t *bson, bson_string_t *errmsg); +_mongoc_ssl_opts_from_bson (mongoc_ssl_opt_t *ssl_opt, const bson_t *bson, mcommon_string_append_t *errmsg); BSON_END_DECLS diff --git a/src/libmongoc/src/mongoc/mongoc-ssl.c b/src/libmongoc/src/mongoc/mongoc-ssl.c index 4ea91f16b56..9d7e0c77ef6 100644 --- a/src/libmongoc/src/mongoc/mongoc-ssl.c +++ b/src/libmongoc/src/mongoc/mongoc-ssl.c @@ -168,7 +168,7 @@ _mongoc_ssl_opts_disable_ocsp_endpoint_check (const mongoc_ssl_opt_t *ssl_opt) } bool -_mongoc_ssl_opts_from_bson (mongoc_ssl_opt_t *ssl_opt, const bson_t *bson, mcommon_string_t *errmsg) +_mongoc_ssl_opts_from_bson (mongoc_ssl_opt_t *ssl_opt, const bson_t *bson, mcommon_string_append_t *errmsg) { bson_iter_t iter; diff --git a/src/libmongoc/src/mongoc/mongoc-stream-tls-secure-transport.c b/src/libmongoc/src/mongoc/mongoc-stream-tls-secure-transport.c index 5490d98e4af..de64d2b575f 100644 --- a/src/libmongoc/src/mongoc/mongoc-stream-tls-secure-transport.c +++ b/src/libmongoc/src/mongoc/mongoc-stream-tls-secure-transport.c @@ -410,35 +410,36 @@ _set_error_from_osstatus (OSStatus status, const char *prefix, bson_error_t *err static char * explain_trust_result (SecTrustRef trust, SecTrustResultType trust_result) { - mcommon_string_t *reason; CFArrayRef cfprops = NULL; CFIndex count, i; - reason = mcommon_string_new (""); + mcommon_string_append_t reason; + mcommon_string_new_as_append (&reason); + switch (trust_result) { case kSecTrustResultDeny: - mcommon_string_append (reason, "Certificate trust denied"); + mcommon_string_append (&reason, "Certificate trust denied"); break; case kSecTrustResultRecoverableTrustFailure: - mcommon_string_append (reason, "Certificate trust failure"); + mcommon_string_append (&reason, "Certificate trust failure"); break; case kSecTrustResultFatalTrustFailure: - mcommon_string_append (reason, "Certificate trust fatal failure"); + mcommon_string_append (&reason, "Certificate trust fatal failure"); break; case kSecTrustResultInvalid: - mcommon_string_append (reason, "Certificate trust evaluation failure"); + mcommon_string_append (&reason, "Certificate trust evaluation failure"); break; default: - mcommon_string_append_printf (reason, "Certificate trust failure #%d", (int) trust_result); + mcommon_string_append_printf (&reason, "Certificate trust failure #%d", (int) trust_result); break; } - mcommon_string_append (reason, ": "); + mcommon_string_append (&reason, ": "); cfprops = SecTrustCopyProperties (trust); /* This contains an array of dictionaries, each representing a cert in the * chain. Append the first failure reason found. */ if (!cfprops) { - mcommon_string_append (reason, "Unable to retreive cause for trust failure"); + mcommon_string_append (&reason, "Unable to retreive cause for trust failure"); goto done; } @@ -451,7 +452,7 @@ explain_trust_result (SecTrustRef trust, SecTrustResultType trust_result) elem = CFArrayGetValueAtIndex (cfprops, i); if (CFGetTypeID (elem) != CFDictionaryGetTypeID ()) { - mcommon_string_append (reason, "Unable to parse cause for trust failure"); + mcommon_string_append (&reason, "Unable to parse cause for trust failure"); goto done; } @@ -461,24 +462,24 @@ explain_trust_result (SecTrustRef trust, SecTrustResultType trust_result) continue; } if (CFGetTypeID (reason_elem) != CFStringGetTypeID ()) { - mcommon_string_append (reason, "Unable to parse trust failure error"); + mcommon_string_append (&reason, "Unable to parse trust failure error"); goto done; } reason_str = _mongoc_cfstringref_to_cstring (reason_elem); if (reason_str) { - mcommon_string_append (reason, reason_str); + mcommon_string_append (&reason, reason_str); bson_free (reason_str); goto done; } else { - mcommon_string_append (reason, "Unable to express trust failure error"); + mcommon_string_append (&reason, "Unable to express trust failure error"); goto done; } } - mcommon_string_append (reason, "No trust failure reason available"); + mcommon_string_append (&reason, "No trust failure reason available"); done: CFReleaseSafe (cfprops); - return mcommon_string_free (reason, false); + return mcommon_string_from_append_destroy_with_steal (&reason); } /* Returns a boolean indicating success. If false is returned, then an error is diff --git a/src/libmongoc/src/mongoc/mongoc-topology-scanner.c b/src/libmongoc/src/mongoc/mongoc-topology-scanner.c index 1cef96f7b2d..4ebc73362c0 100644 --- a/src/libmongoc/src/mongoc/mongoc-topology-scanner.c +++ b/src/libmongoc/src/mongoc/mongoc-topology-scanner.c @@ -1171,21 +1171,21 @@ void _mongoc_topology_scanner_finish (mongoc_topology_scanner_t *ts) { mongoc_topology_scanner_node_t *node, *tmp; - bson_error_t *error = &ts->error; - mcommon_string_t *msg; + bson_error_t *error = &ts->error; memset (&ts->error, 0, sizeof (bson_error_t)); - msg = mcommon_string_new (NULL); + mcommon_string_append_t msg; + mcommon_string_new_as_fixed_capacity_append (&msg, sizeof error->message - 1u); DL_FOREACH_SAFE (ts->nodes, node, tmp) { if (node->last_error.code) { - if (msg->len) { - mcommon_string_append_c (msg, ' '); + if (!mcommon_string_from_append_is_empty (&msg)) { + mcommon_string_append (&msg, " "); } - mcommon_string_append_printf (msg, "[%s]", node->last_error.message); + mcommon_string_append_printf (&msg, "[%s]", node->last_error.message); /* last error domain and code win */ error->domain = node->last_error.domain; @@ -1193,8 +1193,8 @@ _mongoc_topology_scanner_finish (mongoc_topology_scanner_t *ts) } } - bson_strncpy ((char *) &error->message, msg->str, sizeof (error->message)); - mcommon_string_free (msg, true); + bson_strncpy ((char *) &error->message, mcommon_str_from_append (&msg), sizeof error->message); + mcommon_string_from_append_destroy (&msg); _delete_retired_nodes (ts); } diff --git a/src/libmongoc/src/mongoc/mongoc-topology.c b/src/libmongoc/src/mongoc/mongoc-topology.c index 800f1f0fca5..1546152f576 100644 --- a/src/libmongoc/src/mongoc/mongoc-topology.c +++ b/src/libmongoc/src/mongoc/mongoc-topology.c @@ -1159,8 +1159,10 @@ mongoc_topology_select_server_id (mongoc_topology_t *topology, uint32_t server_id; mc_shared_tpld td = mc_tpld_take_ref (topology); - mcommon_string_t *topology_type = mcommon_string_new (". Topology type: "); - mcommon_string_append (topology_type, mongoc_topology_description_type (td.ptr)); + mcommon_string_append_t topology_type; + mcommon_string_new_as_append (&topology_type); + mcommon_string_append (&topology_type, ". Topology type: "); + mcommon_string_append (&topology_type, mongoc_topology_description_type (td.ptr)); /* These names come from the Server Selection Spec pseudocode */ int64_t loop_start; /* when we entered this function */ @@ -1365,10 +1367,10 @@ mongoc_topology_select_server_id (mongoc_topology_t *topology, if (error && server_id == 0) { /* server_id set to zero indicates that an error has occured and that `error` is initialized */ if (error->domain == MONGOC_ERROR_SERVER_SELECTION) { - _mongoc_error_append (error, topology_type->str); + _mongoc_error_append (error, mcommon_str_from_append (&topology_type)); } } - mcommon_string_free (topology_type, true); + mcommon_string_from_append_destroy (&topology_type); mc_tpld_drop_ref (&td); return server_id; } @@ -1906,10 +1908,11 @@ static void _topology_collect_errors (const mongoc_topology_description_t *td, bson_error_t *error_out) { const mongoc_server_description_t *server_description; - mcommon_string_t *error_message; memset (error_out, 0, sizeof (bson_error_t)); - error_message = mcommon_string_new (""); + + mcommon_string_append_t error_message; + mcommon_string_new_as_fixed_capacity_append (&error_message, sizeof error_out->message - 1u); for (size_t i = 0u; i < mc_tpld_servers_const (td)->items_len; i++) { const bson_error_t *error; @@ -1917,18 +1920,18 @@ _topology_collect_errors (const mongoc_topology_description_t *td, bson_error_t server_description = mc_tpld_servers_const (td)->items[i].item; error = &server_description->error; if (error->code) { - if (error_message->len > 0) { - mcommon_string_append_c (error_message, ' '); + if (!mcommon_string_from_append_is_empty (&error_message)) { + mcommon_string_append (&error_message, " "); } - mcommon_string_append_printf (error_message, "[%s]", server_description->error.message); + mcommon_string_append_printf (&error_message, "[%s]", server_description->error.message); /* The last error's code and domain wins. */ error_out->code = error->code; error_out->domain = error->domain; } } - bson_strncpy ((char *) &error_out->message, error_message->str, sizeof (error_out->message)); - mcommon_string_free (error_message, true); + bson_strncpy ((char *) &error_out->message, mcommon_str_from_append (&error_message), sizeof (error_out->message)); + mcommon_string_from_append_destroy (&error_message); } void diff --git a/src/libmongoc/src/mongoc/mongoc-uri.c b/src/libmongoc/src/mongoc/mongoc-uri.c index b7c44394f80..9e9980ec3c2 100644 --- a/src/libmongoc/src/mongoc/mongoc-uri.c +++ b/src/libmongoc/src/mongoc/mongoc-uri.c @@ -2218,7 +2218,6 @@ char * mongoc_uri_unescape (const char *escaped_string) { bson_unichar_t c; - mcommon_string_t *str; unsigned int hex = 0; const char *ptr; const char *end; @@ -2239,7 +2238,9 @@ mongoc_uri_unescape (const char *escaped_string) ptr = escaped_string; end = ptr + len; - str = mcommon_string_new (NULL); + + mcommon_string_append_t append; + mcommon_string_new_with_capacity_as_append (&append, len); for (; *ptr; ptr = bson_utf8_next_char (ptr)) { c = bson_utf8_get_char (ptr); @@ -2252,28 +2253,32 @@ mongoc_uri_unescape (const char *escaped_string) (1 != sscanf (&ptr[1], "%02x", &hex)) #endif || 0 == hex) { - mcommon_string_free (str, true); + mcommon_string_from_append_destroy (&append); MONGOC_WARNING ("Invalid %% escape sequence"); return NULL; } - mcommon_string_append_c (str, hex); + + // This isn't guaranteed to be valid UTF-8, we check again below + char byte = (char) hex; + mcommon_string_append_bytes (&append, &byte, 1); ptr += 2; unescape_occurred = true; break; default: - mcommon_string_append_unichar (str, c); + mcommon_string_append_unichar (&append, c); break; } } /* Check that after unescaping, it is still valid UTF-8 */ - if (unescape_occurred && !bson_utf8_validate (str->str, str->len, false)) { + if (unescape_occurred && + !bson_utf8_validate (mcommon_str_from_append (&append), mcommon_strlen_from_append (&append), false)) { MONGOC_WARNING ("Invalid %% escape sequence: unescaped string contains invalid UTF-8"); - mcommon_string_free (str, true); + mcommon_string_from_append_destroy (&append); return NULL; } - return mcommon_string_free (str, false); + return mcommon_string_from_append_destroy_with_steal (&append); } diff --git a/src/libmongoc/src/mongoc/mongoc-write-command.c b/src/libmongoc/src/mongoc/mongoc-write-command.c index 152b8c3736d..476192d98f9 100644 --- a/src/libmongoc/src/mongoc/mongoc-write-command.c +++ b/src/libmongoc/src/mongoc/mongoc-write-command.c @@ -989,15 +989,16 @@ _set_error_from_response (bson_t *bson_array, { bson_iter_t array_iter; bson_iter_t doc_iter; - mcommon_string_t *compound_err; const char *errmsg = NULL; int32_t code = 0; uint32_t n_keys, i; - compound_err = mcommon_string_new (NULL); + mcommon_string_append_t compound_err; + mcommon_string_new_as_fixed_capacity_append (&compound_err, sizeof error->message - 1u); + n_keys = bson_count_keys (bson_array); if (n_keys > 1) { - mcommon_string_append_printf (compound_err, "Multiple %s errors: ", error_type); + mcommon_string_append_printf (&compound_err, "Multiple %s errors: ", error_type); } if (!bson_empty0 (bson_array) && bson_iter_init (&array_iter, bson_array)) { @@ -1016,13 +1017,13 @@ _set_error_from_response (bson_t *bson_array, /* build message like 'Multiple write errors: "foo", "bar"' */ if (n_keys > 1) { - mcommon_string_append_printf (compound_err, "\"%s\"", errmsg); + mcommon_string_append_printf (&compound_err, "\"%s\"", errmsg); if (i < n_keys - 1) { - mcommon_string_append (compound_err, ", "); + mcommon_string_append (&compound_err, ", "); } } else { /* single error message */ - mcommon_string_append (compound_err, errmsg); + mcommon_string_append (&compound_err, errmsg); } } } @@ -1031,12 +1032,12 @@ _set_error_from_response (bson_t *bson_array, } } - if (code && compound_err->len) { - bson_set_error (error, domain, (uint32_t) code, "%s", compound_err->str); + if (code && !mcommon_string_from_append_is_empty (&compound_err)) { + bson_set_error (error, domain, (uint32_t) code, "%s", mcommon_str_from_append (&compound_err)); } } - mcommon_string_free (compound_err, true); + mcommon_string_from_append_destroy (&compound_err); } diff --git a/src/libmongoc/tests/TestSuite.c b/src/libmongoc/tests/TestSuite.c index 2c408bd3208..0c5726e11f2 100644 --- a/src/libmongoc/tests/TestSuite.c +++ b/src/libmongoc/tests/TestSuite.c @@ -43,6 +43,7 @@ #include "test-libmongoc.h" #include "TestSuite.h" #include +#include #define SKIP_LINE_BUFFER_SIZE 1024 @@ -203,7 +204,7 @@ TestSuite_Init (TestSuite *suite, const char *name, int argc, char **argv) } else if (!strcmp (mock_server_log, "stderr")) { suite->mock_server_log = stderr; } else if (!strcmp (mock_server_log, "json")) { - suite->mock_server_log_buf = mcommon_string_new (NULL); + suite->mock_server_log_buf = mcommon_string_new_with_capacity ("", 0, 4096); } else { test_error ("Unrecognized option: MONGOC_TEST_SERVER_LOG=%s", mock_server_log); } @@ -506,8 +507,9 @@ TestSuite_RunFuncInChild (TestSuite *suite, /* IN */ if (suite->mock_server_log_buf) { close (pipefd[1]); while ((nread = read (pipefd[0], buf, sizeof (buf) - 1)) > 0) { - buf[nread] = '\0'; - mcommon_string_append (suite->mock_server_log_buf, buf); + mcommon_string_append_t append; + mcommon_string_set_append (suite->mock_server_log_buf, &append); + mcommon_string_append_bytes (&append, buf, nread); } } @@ -520,16 +522,6 @@ TestSuite_RunFuncInChild (TestSuite *suite, /* IN */ #endif -/* replace " with \", newline with \n, tab with four spaces */ -static void -_append_json_escaped (mcommon_string_t *buf, const char *s) -{ - char *escaped = bson_utf8_escape_for_json (s, -1); - mcommon_string_append (buf, escaped); - bson_free (escaped); -} - - /* returns 1 on failure, 0 on success */ static int TestSuite_RunTest (TestSuite *suite, /* IN */ @@ -538,14 +530,14 @@ TestSuite_RunTest (TestSuite *suite, /* IN */ { int64_t t1, t2, t3; char name[MAX_TEST_NAME_LENGTH]; - mcommon_string_t *buf; + mcommon_string_append_t buf; mcommon_string_t *mock_server_log_buf; size_t i; int status = 0; bson_snprintf (name, sizeof name, "%s%s", suite->name, test->name); - buf = mcommon_string_new (NULL); + mcommon_string_new_as_append (&buf); if (suite->flags & TEST_DEBUGOUTPUT) { test_msg ("Begin %s, seed %u", name, test->seed); @@ -559,15 +551,15 @@ TestSuite_RunTest (TestSuite *suite, /* IN */ test_msg ("@@ctest-skipped@@"); } if (!suite->silent) { - mcommon_string_append_printf (buf, + mcommon_string_append_printf (&buf, " { \"status\": \"skip\", \"test_file\": \"%s\"," " \"reason\": \"%s\" }%s", test->name, skip->reason, ((*count) == 1) ? "" : ","); - test_msg ("%s", buf->str); + test_msg ("%s", mcommon_str_from_append (&buf)); if (suite->outfile) { - fprintf (suite->outfile, "%s", buf->str); + fprintf (suite->outfile, "%s", mcommon_str_from_append (&buf)); fflush (suite->outfile); } } @@ -584,10 +576,10 @@ TestSuite_RunTest (TestSuite *suite, /* IN */ } if (!suite->silent) { mcommon_string_append_printf ( - buf, " { \"status\": \"skip\", \"test_file\": \"%s\" }%s", test->name, ((*count) == 1) ? "" : ","); - test_msg ("%s", buf->str); + &buf, " { \"status\": \"skip\", \"test_file\": \"%s\" }%s", test->name, ((*count) == 1) ? "" : ","); + test_msg ("%s", mcommon_str_from_append (&buf)); if (suite->outfile) { - fprintf (suite->outfile, "%s", buf->str); + fprintf (suite->outfile, "%s", mcommon_str_from_append (&buf)); fflush (suite->outfile); } } @@ -626,7 +618,7 @@ TestSuite_RunTest (TestSuite *suite, /* IN */ ASSERT_CMPINT64 (t2, >=, t1); t3 = t2 - t1; - mcommon_string_append_printf (buf, + mcommon_string_append_printf (&buf, " { \"status\": \"%s\", " "\"test_file\": \"%s\", " "\"seed\": \"%u\", " @@ -645,28 +637,27 @@ TestSuite_RunTest (TestSuite *suite, /* IN */ mock_server_log_buf = suite->mock_server_log_buf; - if (mock_server_log_buf && mock_server_log_buf->len) { - mcommon_string_append (buf, ", \"log_raw\": \""); - _append_json_escaped (buf, mock_server_log_buf->str); - mcommon_string_append (buf, "\""); - - mcommon_string_truncate (mock_server_log_buf, 0); + if (mock_server_log_buf && !mcommon_string_is_empty (mock_server_log_buf)) { + mcommon_string_append (&buf, ", \"log_raw\": \""); + mcommon_json_append_escaped (&buf, mock_server_log_buf->str, mock_server_log_buf->len, true); + mcommon_string_append (&buf, "\""); + mcommon_string_clear (mock_server_log_buf); } - mcommon_string_append_printf (buf, " }"); + mcommon_string_append (&buf, " }"); if (*count > 1) { - mcommon_string_append_printf (buf, ","); + mcommon_string_append (&buf, ","); } - test_msg ("%s", buf->str); + test_msg ("%s", mcommon_str_from_append (&buf)); if (suite->outfile) { - fprintf (suite->outfile, "%s", buf->str); + fprintf (suite->outfile, "%s", mcommon_str_from_append (&buf)); fflush (suite->outfile); } done: - mcommon_string_free (buf, true); + mcommon_string_from_append_destroy (&buf); return status ? 1 : 0; } @@ -1123,9 +1114,7 @@ TestSuite_Destroy (TestSuite *suite) fclose (suite->outfile); } - if (suite->mock_server_log_buf) { - mcommon_string_free (suite->mock_server_log_buf, true); - } + mcommon_string_destroy (suite->mock_server_log_buf); bson_free (suite->name); bson_free (suite->prgname); @@ -1166,24 +1155,25 @@ MONGOC_PRINTF_FORMAT (1, 2) void test_suite_mock_server_log (const char *msg, ...) { - va_list ap; - char *formatted_msg; - bson_mutex_lock (&gTestMutex); - if (gTestSuite->mock_server_log || gTestSuite->mock_server_log_buf) { + if (gTestSuite->mock_server_log_buf) { + mcommon_string_append_t append; + mcommon_string_set_append (gTestSuite->mock_server_log_buf, &append); + + va_list ap; va_start (ap, msg); - formatted_msg = bson_strdupv_printf (msg, ap); + mcommon_string_append_vprintf (&append, msg, ap); va_end (ap); + mcommon_string_append (&append, "\n"); - if (gTestSuite->mock_server_log_buf) { - mcommon_string_append_printf (gTestSuite->mock_server_log_buf, "%s\n", formatted_msg); - } else { - fprintf (gTestSuite->mock_server_log, "%s\n", formatted_msg); - fflush (gTestSuite->mock_server_log); - } - - bson_free (formatted_msg); + } else if (gTestSuite->mock_server_log) { + va_list ap; + va_start (ap, msg); + vfprintf (gTestSuite->mock_server_log, msg, ap); + va_end (ap); + fputc ('\n', gTestSuite->mock_server_log); + fflush (gTestSuite->mock_server_log); } bson_mutex_unlock (&gTestMutex); diff --git a/src/libmongoc/tests/TestSuite.h b/src/libmongoc/tests/TestSuite.h index df3847bd100..4baaae8c921 100644 --- a/src/libmongoc/tests/TestSuite.h +++ b/src/libmongoc/tests/TestSuite.h @@ -25,6 +25,8 @@ #include #include +#include + #include "mongoc/mongoc-array-private.h" #include "mongoc/mongoc-util-private.h" @@ -672,7 +674,7 @@ struct _TestSuite { FILE *outfile; int flags; int silent; - bson_string_t *mock_server_log_buf; + mcommon_string_t *mock_server_log_buf; FILE *mock_server_log; mongoc_array_t failing_flaky_skips; }; diff --git a/src/libmongoc/tests/mock_server/mock-rs.c b/src/libmongoc/tests/mock_server/mock-rs.c index 2c196e9c896..3d42eb563a2 100644 --- a/src/libmongoc/tests/mock_server/mock-rs.c +++ b/src/libmongoc/tests/mock_server/mock-rs.c @@ -67,18 +67,19 @@ char * hosts (mongoc_array_t *servers) { const char *host_and_port; - mcommon_string_t *hosts_str = mcommon_string_new (""); + mcommon_string_append_t hosts_str; + mcommon_string_new_as_append (&hosts_str); for (size_t i = 0u; i < servers->len; i++) { host_and_port = mock_server_get_host_and_port (get_server (servers, i)); - mcommon_string_append_printf (hosts_str, "\"%s\"", host_and_port); + mcommon_string_append_printf (&hosts_str, "\"%s\"", host_and_port); if (i + 1u < servers->len) { - mcommon_string_append_printf (hosts_str, ", "); + mcommon_string_append_printf (&hosts_str, ", "); } } - return mcommon_string_free (hosts_str, false); /* detach buffer */ + return mcommon_string_from_append_destroy_with_steal (&hosts_str); } @@ -86,28 +87,31 @@ mongoc_uri_t * make_uri (mongoc_array_t *servers) { const char *host_and_port; - mcommon_string_t *uri_str = mcommon_string_new ("mongodb://"); + mcommon_string_append_t uri_str; + mcommon_string_new_as_append (&uri_str); mongoc_uri_t *uri; + mcommon_string_append (&uri_str, "mongodb://"); + for (size_t i = 0u; i < servers->len; i++) { host_and_port = mock_server_get_host_and_port (get_server (servers, i)); - mcommon_string_append_printf (uri_str, "%s", host_and_port); + mcommon_string_append (&uri_str, host_and_port); if (i + 1u < servers->len) { - mcommon_string_append_printf (uri_str, ","); + mcommon_string_append (&uri_str, ","); } } - mcommon_string_append_printf (uri_str, "/?replicaSet=rs"); + mcommon_string_append (&uri_str, "/?replicaSet=rs"); - uri = mongoc_uri_new (uri_str->str); + uri = mongoc_uri_new (mcommon_str_from_append (&uri_str)); // Many mock server tests do not expect retryable handshakes. Disable by // default: tests that expect or require retryable handshakes must opt-in. mongoc_uri_set_option_as_bool (uri, MONGOC_URI_RETRYREADS, false); mongoc_uri_set_option_as_bool (uri, MONGOC_URI_RETRYWRITES, false); - mcommon_string_free (uri_str, true); + mcommon_string_from_append_destroy (&uri_str); return uri; } diff --git a/src/libmongoc/tests/mock_server/mock-server.c b/src/libmongoc/tests/mock_server/mock-server.c index ed60bda2da6..46e78f45c54 100644 --- a/src/libmongoc/tests/mock_server/mock-server.c +++ b/src/libmongoc/tests/mock_server/mock-server.c @@ -29,6 +29,7 @@ #include "../test-libmongoc.h" #include "../TestSuite.h" #include +#include #include #ifdef BSON_HAVE_STRINGS_H @@ -1967,8 +1968,6 @@ reply_to_request_with_multiple_docs ( static void _mock_server_reply_with_stream (mock_server_t *server, reply_t *reply, mongoc_stream_t *client) { - char *doc_json; - mcommon_string_t *docs_json; uint8_t *buf; uint8_t *ptr; size_t len; @@ -1993,13 +1992,12 @@ _mock_server_reply_with_stream (mock_server_t *server, reply_t *reply, mongoc_st return; } - docs_json = mcommon_string_new (""); + mcommon_string_append_t docs_json; + mcommon_string_new_as_append (&docs_json); for (int i = 0; i < n_docs; i++) { - doc_json = bson_as_relaxed_extended_json (&docs[i], NULL); - mcommon_string_append (docs_json, doc_json); - bson_free (doc_json); + mcommon_json_append_bson_document (&docs_json, &docs[i], BSON_JSON_MODE_RELAXED, BSON_MAX_RECURSION); if (i < n_docs - 1) { - mcommon_string_append (docs_json, ", "); + mcommon_string_append (&docs_json, ", "); } } @@ -2010,7 +2008,7 @@ _mock_server_reply_with_stream (mock_server_t *server, reply_t *reply, mongoc_st reply->client_port, mock_server_get_port (server), is_op_msg ? "OP_MSG" : "OP_REPLY", - docs_json->str); + mcommon_str_from_append (&docs_json)); len = 0; @@ -2071,7 +2069,7 @@ _mock_server_reply_with_stream (mock_server_t *server, reply_t *reply, mongoc_st bson_free (iov); mcd_rpc_message_destroy (rpc); - mcommon_string_free (docs_json, true); + mcommon_string_from_append_destroy (&docs_json); bson_free (buf); } @@ -2094,7 +2092,6 @@ void rs_response_to_hello (mock_server_t *server, int max_wire_version, bool primary, int has_tags, ...) { va_list ap; - mcommon_string_t *hosts; bool first; mock_server_t *host; @@ -2103,7 +2100,8 @@ rs_response_to_hello (mock_server_t *server, int max_wire_version, bool primary, max_wire_version, WIRE_VERSION_MIN); - hosts = mcommon_string_new (""); + mcommon_string_append_t hosts; + mcommon_string_new_as_append (&hosts); va_start (ap, has_tags); @@ -2112,10 +2110,10 @@ rs_response_to_hello (mock_server_t *server, int max_wire_version, bool primary, if (first) { first = false; } else { - mcommon_string_append (hosts, ","); + mcommon_string_append (&hosts, ","); } - mcommon_string_append_printf (hosts, "'%s'", mock_server_get_host_and_port (host)); + mcommon_string_append_printf (&hosts, "'%s'", mock_server_get_host_and_port (host)); } va_end (ap); @@ -2137,7 +2135,7 @@ rs_response_to_hello (mock_server_t *server, int max_wire_version, bool primary, has_tags ? "'key': 'value'" : "", WIRE_VERSION_MIN, max_wire_version, - hosts->str); + mcommon_str_from_append (&hosts)); - mcommon_string_free (hosts, true); + mcommon_string_from_append_destroy (&hosts); } diff --git a/src/libmongoc/tests/mock_server/request.c b/src/libmongoc/tests/mock_server/request.c index e557c7d9bfa..8dc36fb5264 100644 --- a/src/libmongoc/tests/mock_server/request.c +++ b/src/libmongoc/tests/mock_server/request.c @@ -22,6 +22,7 @@ #include "../test-conveniences.h" #include "../TestSuite.h" #include +#include #include static bool @@ -39,8 +40,8 @@ request_from_getmore (request_t *request); static void request_from_op_msg (request_t *request); -static char * -query_flags_str (int32_t flags); +static void +query_flags_str (mcommon_string_append_t *str, int32_t flags); request_t * request_new (const mongoc_buffer_t *buffer, @@ -128,9 +129,18 @@ assert_request_matches_flags (const request_t *request, uint32_t flags) const int32_t request_flags = mcd_rpc_op_query_get_flags (request->rpc); if (mcommon_cmp_not_equal_su (request_flags, flags)) { + mcommon_string_append_t str_request_flags, str_flags; + mcommon_string_new_as_append (&str_request_flags); + mcommon_string_new_as_append (&str_flags); + query_flags_str (&str_request_flags, request_flags); + query_flags_str (&str_flags, (int32_t) flags); + test_error ("request's query flags are %s, expected %s", - query_flags_str (request_flags), - query_flags_str ((int32_t) flags)); + mcommon_str_from_append (&str_request_flags), + mcommon_str_from_append (&str_flags)); + + mcommon_string_from_append_destroy (&str_request_flags); + mcommon_string_from_append_destroy (&str_flags); } } @@ -462,11 +472,10 @@ is_command_ns (const char *ns) } -static char * -query_flags_str (int32_t flags) +static void +query_flags_str (mcommon_string_append_t *str, int32_t flags) { int flag = 1; - mcommon_string_t *str = mcommon_string_new (""); bool begun = false; if (flags == MONGOC_OP_QUERY_FLAG_NONE) { @@ -511,8 +520,6 @@ query_flags_str (int32_t flags) } } } - - return mcommon_string_free (str, false); /* detach buffer */ } @@ -532,8 +539,9 @@ static void request_from_query (request_t *request) { bson_iter_t iter; - mcommon_string_t *query_as_str = mcommon_string_new ("OP_QUERY "); - char *str; + mcommon_string_append_t query_as_str; + mcommon_string_new_as_append (&query_as_str); + mcommon_string_append (&query_as_str, "OP_QUERY "); const int32_t request_flags = mcd_rpc_op_query_get_flags (request->rpc); const char *const request_coll = mcd_rpc_op_query_get_full_collection_name (request->rpc); @@ -548,7 +556,7 @@ request_from_query (request_t *request) BSON_ASSERT (query); _mongoc_array_append_val (&request->docs, query); - mcommon_string_append_printf (query_as_str, "%s ", request_coll); + mcommon_string_append_printf (&query_as_str, "%s ", request_coll); if (is_command_ns (request_coll)) { request->is_command = true; @@ -560,9 +568,7 @@ request_from_query (request_t *request) } } - str = bson_as_relaxed_extended_json (query, NULL); - mcommon_string_append (query_as_str, str); - bson_free (str); + mcommon_json_append_bson_document (&query_as_str, query, BSON_JSON_MODE_RELAXED, BSON_MAX_RECURSION); } if (request_fields) { @@ -571,27 +577,22 @@ request_from_query (request_t *request) BSON_ASSERT (fields); _mongoc_array_append_val (&request->docs, fields); - str = bson_as_relaxed_extended_json (fields, NULL); - mcommon_string_append (query_as_str, " fields="); - mcommon_string_append (query_as_str, str); - bson_free (str); + mcommon_string_append (&query_as_str, " fields="); + mcommon_json_append_bson_document (&query_as_str, fields, BSON_JSON_MODE_RELAXED, BSON_MAX_RECURSION); } - mcommon_string_append (query_as_str, " flags="); - - str = query_flags_str (request_flags); - mcommon_string_append (query_as_str, str); - bson_free (str); + mcommon_string_append (&query_as_str, " flags="); + query_flags_str (&query_as_str, request_flags); if (request_skip) { - mcommon_string_append_printf (query_as_str, " skip=%" PRId32, request_skip); + mcommon_string_append_printf (&query_as_str, " skip=%" PRId32, request_skip); } if (request_return) { - mcommon_string_append_printf (query_as_str, " n_return=%" PRId32, request_return); + mcommon_string_append_printf (&query_as_str, " n_return=%" PRId32, request_return); } - request->as_str = mcommon_string_free (query_as_str, false); + request->as_str = mcommon_string_from_append_destroy_with_steal (&query_as_str); } @@ -616,7 +617,7 @@ request_from_getmore (request_t *request) static void -parse_op_msg_doc (request_t *request, const uint8_t *data, size_t data_len, mcommon_string_t *msg_as_str) +parse_op_msg_doc (request_t *request, const uint8_t *data, size_t data_len, mcommon_string_append_t *msg_as_str) { const uint8_t *pos = data; while (pos < data + data_len) { @@ -629,9 +630,7 @@ parse_op_msg_doc (request_t *request, const uint8_t *data, size_t data_len, mcom BSON_ASSERT (doc); _mongoc_array_append_val (&request->docs, doc); - char *const str = bson_as_relaxed_extended_json (doc, NULL); - mcommon_string_append (msg_as_str, str); - bson_free (str); + mcommon_json_append_bson_document (msg_as_str, doc, BSON_JSON_MODE_RELAXED, BSON_MAX_RECURSION); pos += doc_len; } @@ -641,30 +640,32 @@ parse_op_msg_doc (request_t *request, const uint8_t *data, size_t data_len, mcom static void request_from_op_msg (request_t *request) { - mcommon_string_t *msg_as_str = mcommon_string_new ("OP_MSG"); + mcommon_string_append_t msg_as_str; + mcommon_string_new_as_append (&msg_as_str); + mcommon_string_append (&msg_as_str, "OP_MSG"); const size_t sections_count = mcd_rpc_op_msg_get_sections_count (request->rpc); BSON_ASSERT (sections_count <= 2u); for (size_t index = 0; index < sections_count; ++index) { - mcommon_string_append (msg_as_str, (index > 0 ? ", " : " ")); + mcommon_string_append (&msg_as_str, (index > 0 ? ", " : " ")); const uint8_t kind = mcd_rpc_op_msg_section_get_kind (request->rpc, index); switch (kind) { case 0: { /* a single BSON document */ const void *const body = mcd_rpc_op_msg_section_get_body (request->rpc, index); - parse_op_msg_doc (request, body, (size_t) length_prefix (body), msg_as_str); + parse_op_msg_doc (request, body, (size_t) length_prefix (body), &msg_as_str); break; } case 1: { /* a sequence of BSON documents */ - mcommon_string_append (msg_as_str, mcd_rpc_op_msg_section_get_identifier (request->rpc, index)); - mcommon_string_append (msg_as_str, ": ["); + mcommon_string_append (&msg_as_str, mcd_rpc_op_msg_section_get_identifier (request->rpc, index)); + mcommon_string_append (&msg_as_str, ": ["); parse_op_msg_doc (request, mcd_rpc_op_msg_section_get_document_sequence (request->rpc, index), mcd_rpc_op_msg_section_get_document_sequence_length (request->rpc, index), - msg_as_str); + &msg_as_str); - mcommon_string_append (msg_as_str, "]"); + mcommon_string_append (&msg_as_str, "]"); break; } @@ -673,7 +674,7 @@ request_from_op_msg (request_t *request) } } - request->as_str = mcommon_string_free (msg_as_str, false); + request->as_str = mcommon_string_from_append_destroy_with_steal (&msg_as_str); request->is_command = true; /* true for all OP_MSG requests */ if (request->docs.len) { diff --git a/src/libmongoc/tests/test-libmongoc.c b/src/libmongoc/tests/test-libmongoc.c index e6c9ad9a78d..adb65ecd98e 100644 --- a/src/libmongoc/tests/test-libmongoc.c +++ b/src/libmongoc/tests/test-libmongoc.c @@ -395,7 +395,9 @@ char * test_framework_get_unix_domain_socket_path_escaped (void) { char *path = test_framework_get_unix_domain_socket_path (), *c = path; - mcommon_string_t *escaped = mcommon_string_new (NULL); + + mcommon_string_append_t escaped; + mcommon_string_new_as_append (&escaped); /* Connection String Spec: "The host information cannot contain an unescaped * slash ("/"), if it does then an exception MUST be thrown informing users @@ -406,16 +408,15 @@ test_framework_get_unix_domain_socket_path_escaped (void) */ do { if (*c == '/') { - mcommon_string_append (escaped, "%2F"); + mcommon_string_append (&escaped, "%2F"); } else { - mcommon_string_append_c (escaped, *c); + mcommon_string_append_bytes (&escaped, c, 1); } } while (*(++c)); - mcommon_string_append_c (escaped, '\0'); bson_free (path); - return mcommon_string_free (escaped, false /* free_segment */); + return mcommon_string_from_append_destroy_with_steal (&escaped); } static char * @@ -979,17 +980,17 @@ uri_str_has_db (mcommon_string_t *uri_string) static void -add_option_to_uri_str (mcommon_string_t *uri_string, const char *option, const char *value) +add_option_to_uri_str (mcommon_string_append_t *uri_string, const char *option, const char *value) { - if (strchr (uri_string->str, '?')) { + if (strchr (mcommon_str_from_append (uri_string), '?')) { /* already has some options */ - mcommon_string_append_c (uri_string, '&'); - } else if (uri_str_has_db (uri_string)) { + mcommon_string_append (uri_string, "&"); + } else if (uri_str_has_db (mcommon_string_from_append (uri_string))) { /* like "mongodb://host/db" */ - mcommon_string_append_c (uri_string, '?'); + mcommon_string_append (uri_string, "?"); } else { /* like "mongodb://host" */ - mcommon_string_append_printf (uri_string, "/?"); + mcommon_string_append (uri_string, "/?"); } mcommon_string_append_printf (uri_string, "%s=%s", option, value); @@ -1023,7 +1024,6 @@ test_framework_get_uri_str_no_auth (const char *database_name) { char *env_uri_str; bson_t hello_response; - mcommon_string_t *uri_string; char *name; bson_iter_t iter; bson_iter_t hosts_iter; @@ -1031,20 +1031,23 @@ test_framework_get_uri_str_no_auth (const char *database_name) char *host; uint16_t port; + mcommon_string_append_t uri_string; + mcommon_string_new_as_append (&uri_string); + env_uri_str = _uri_str_from_env (); if (env_uri_str) { - uri_string = mcommon_string_new (env_uri_str); + mcommon_string_append (&uri_string, env_uri_str); if (database_name) { - if (uri_string->str[uri_string->len - 1] != '/') { - mcommon_string_append (uri_string, "/"); + if (!mcommon_string_from_append_ends_with_str (&uri_string, "/")) { + mcommon_string_append (&uri_string, "/"); } - mcommon_string_append (uri_string, database_name); + mcommon_string_append (&uri_string, database_name); } bson_free (env_uri_str); } else { /* construct a direct connection or replica set connection URI */ call_hello (&hello_response); - uri_string = mcommon_string_new ("mongodb://"); + mcommon_string_append (&uri_string, "mongodb://"); if ((name = set_name (&hello_response))) { /* make a replica set URI */ @@ -1056,34 +1059,34 @@ test_framework_get_uri_str_no_auth (const char *database_name) while (bson_iter_next (&hosts_iter)) { BSON_ASSERT (BSON_ITER_HOLDS_UTF8 (&hosts_iter)); if (!first) { - mcommon_string_append (uri_string, ","); + mcommon_string_append (&uri_string, ","); } - mcommon_string_append (uri_string, bson_iter_utf8 (&hosts_iter, NULL)); + mcommon_string_append (&uri_string, bson_iter_utf8 (&hosts_iter, NULL)); first = false; } - mcommon_string_append (uri_string, "/"); + mcommon_string_append (&uri_string, "/"); if (database_name) { - mcommon_string_append (uri_string, database_name); + mcommon_string_append (&uri_string, database_name); } - add_option_to_uri_str (uri_string, MONGOC_URI_REPLICASET, name); + add_option_to_uri_str (&uri_string, MONGOC_URI_REPLICASET, name); bson_free (name); } else { host = test_framework_get_host (); port = test_framework_get_port (); - mcommon_string_append_printf (uri_string, "%s:%hu", host, port); - mcommon_string_append (uri_string, "/"); + mcommon_string_append_printf (&uri_string, "%s:%hu", host, port); + mcommon_string_append (&uri_string, "/"); if (database_name) { - mcommon_string_append (uri_string, database_name); + mcommon_string_append (&uri_string, database_name); } bson_free (host); } if (test_framework_get_ssl ()) { - add_option_to_uri_str (uri_string, MONGOC_URI_SSL, "true"); + add_option_to_uri_str (&uri_string, MONGOC_URI_SSL, "true"); } bson_destroy (&hello_response); @@ -1092,15 +1095,15 @@ test_framework_get_uri_str_no_auth (const char *database_name) if (test_framework_has_compressors ()) { char *compressors = test_framework_get_compressors (); - add_option_to_uri_str (uri_string, MONGOC_URI_COMPRESSORS, compressors); + add_option_to_uri_str (&uri_string, MONGOC_URI_COMPRESSORS, compressors); bson_free (compressors); } // Required by test-atlas-executor. Not required by normal unified test // runner, but make tests a little more resilient to transient errors. - add_option_to_uri_str (uri_string, MONGOC_URI_SERVERSELECTIONTRYONCE, "false"); + add_option_to_uri_str (&uri_string, MONGOC_URI_SERVERSELECTIONTRYONCE, "false"); - return mcommon_string_free (uri_string, false); + return mcommon_string_from_append_destroy_with_steal (&uri_string); } /* diff --git a/src/libmongoc/tests/test-mcd-azure-imds.c b/src/libmongoc/tests/test-mcd-azure-imds.c index 387705a2ab5..327cf9ae288 100644 --- a/src/libmongoc/tests/test-mcd-azure-imds.c +++ b/src/libmongoc/tests/test-mcd-azure-imds.c @@ -40,10 +40,12 @@ _test_http_req (void) // Test generating an HTTP request for the IMDS server mcd_azure_imds_request req; mcd_azure_imds_request_init (&req, "example.com", 9879, ""); - mcommon_string_t *req_str = _mongoc_http_render_request_head (&req.req); + mcommon_string_append_t req_str; + mcommon_string_new_as_append (&req_str); + _mongoc_http_render_request_head (&req_str, &req.req); mcd_azure_imds_request_destroy (&req); // Assert that we composed exactly the request that we expected - ASSERT_CMPSTR (req_str->str, + ASSERT_CMPSTR (mcommon_str_from_append (&req_str), "GET " "/metadata/identity/oauth2/" "token?api-version=2018-02-01&resource=https%3A%2F%2Fvault." @@ -53,7 +55,7 @@ _test_http_req (void) "Metadata: true\r\n" "Accept: application/json\r\n" "\r\n"); - mcommon_string_free (req_str, true); + mcommon_string_from_append_destroy (&req_str); } static const char * diff --git a/src/libmongoc/tests/test-mongoc-background-monitoring.c b/src/libmongoc/tests/test-mongoc-background-monitoring.c index 6a7a2af03ab..dd535b52281 100644 --- a/src/libmongoc/tests/test-mongoc-background-monitoring.c +++ b/src/libmongoc/tests/test-mongoc-background-monitoring.c @@ -58,7 +58,7 @@ typedef struct { tf_observations_t *observations; bson_mutex_t mutex; mongoc_cond_t cond; - mcommon_string_t *logs; + mcommon_string_append_t logs; } test_fixture_t; void @@ -73,14 +73,13 @@ tf_dump (test_fixture_t *tf) printf ("sd_type=%d\n", (int) tf->observations->sd_type); printf ("-- Test fixture logs --\n"); - printf ("%s", tf->logs->str); + printf ("%s", mcommon_str_from_append (&tf->logs)); printf ("== End dump ==\n"); } void BSON_GNUC_PRINTF (2, 3) tf_log (test_fixture_t *tf, const char *format, ...) { va_list ap; - char *str; char nowstr[32]; struct timeval tv; struct tm tt; @@ -100,14 +99,13 @@ void BSON_GNUC_PRINTF (2, 3) tf_log (test_fixture_t *tf, const char *format, ... #endif strftime (nowstr, sizeof nowstr, "%Y/%m/%d %H:%M:%S ", &tt); + mcommon_string_append (&tf->logs, nowstr); va_start (ap, format); - str = bson_strdupv_printf (format, ap); + mcommon_string_append_vprintf (&tf->logs, format, ap); va_end (ap); - mcommon_string_append (tf->logs, nowstr); - mcommon_string_append (tf->logs, str); - mcommon_string_append_c (tf->logs, '\n'); - bson_free (str); + + mcommon_string_append (&tf->logs, "\n"); } #define TF_LOG(_tf, ...) tf_log (_tf, __VA_ARGS__) @@ -241,7 +239,7 @@ tf_new (tf_flags_t flags) mock_server_autoresponds (tf->server, auto_respond_polling_hello, NULL, NULL); } tf->flags = flags; - tf->logs = mcommon_string_new (""); + mcommon_string_new_as_append (&tf->logs); tf->client = mongoc_client_pool_pop (tf->pool); return tf; } @@ -252,7 +250,7 @@ tf_destroy (test_fixture_t *tf) mock_server_destroy (tf->server); mongoc_client_pool_push (tf->pool, tf->client); mongoc_client_pool_destroy (tf->pool); - mcommon_string_free (tf->logs, true); + mcommon_string_from_append_destroy (&tf->logs); bson_mutex_destroy (&tf->mutex); mongoc_cond_destroy (&tf->cond); bson_free (tf->observations); diff --git a/src/libmongoc/tests/test-mongoc-cursor.c b/src/libmongoc/tests/test-mongoc-cursor.c index 6e8a3ef4874..1f48464adcf 100644 --- a/src/libmongoc/tests/test-mongoc-cursor.c +++ b/src/libmongoc/tests/test-mongoc-cursor.c @@ -1744,7 +1744,7 @@ typedef struct { static void -_make_reply_batch (mcommon_string_t *reply, uint32_t n_docs, bool first_batch, bool finished) +_make_reply_batch (mcommon_string_append_t *reply, uint32_t n_docs, bool first_batch, bool finished) { uint32_t j; @@ -1778,7 +1778,6 @@ _test_cursor_n_return_find_cmd (mongoc_cursor_t *cursor, mock_server_t *server, future_t *future; int j; int reply_no; - mcommon_string_t *reply; bool cursor_finished; BSON_APPEND_UTF8 (&find_cmd, "find", "coll"); @@ -1801,10 +1800,13 @@ _test_cursor_n_return_find_cmd (mongoc_cursor_t *cursor, mock_server_t *server, assert_match_bson (request_get_doc (request, 0), &find_cmd, true); - reply = mcommon_string_new (NULL); - _make_reply_batch (reply, (uint32_t) test->reply_length[0], true, false); - reply_to_request_simple (request, reply->str); - mcommon_string_free (reply, true); + { + mcommon_string_append_t reply; + mcommon_string_new_as_append (&reply); + _make_reply_batch (&reply, (uint32_t) test->reply_length[0], true, false); + reply_to_request_simple (request, mcommon_str_from_append (&reply)); + mcommon_string_from_append_destroy (&reply); + } ASSERT (future_get_bool (future)); future_destroy (future); @@ -1830,12 +1832,14 @@ _test_cursor_n_return_find_cmd (mongoc_cursor_t *cursor, mock_server_t *server, assert_match_bson (request_get_doc (request, 0), &getmore_cmd, true); - reply = mcommon_string_new (NULL); - cursor_finished = (reply_no == 2); - _make_reply_batch (reply, (uint32_t) test->reply_length[reply_no], false, cursor_finished); - - reply_to_request_simple (request, reply->str); - mcommon_string_free (reply, true); + { + mcommon_string_append_t reply; + mcommon_string_new_as_append (&reply); + cursor_finished = (reply_no == 2); + _make_reply_batch (&reply, (uint32_t) test->reply_length[reply_no], false, cursor_finished); + reply_to_request_simple (request, mcommon_str_from_append (&reply)); + mcommon_string_from_append_destroy (&reply); + } ASSERT (future_get_bool (future)); future_destroy (future); diff --git a/src/libmongoc/tests/test-mongoc-gridfs.c b/src/libmongoc/tests/test-mongoc-gridfs.c index d62289c0ced..e89a6ed0ff4 100644 --- a/src/libmongoc/tests/test-mongoc-gridfs.c +++ b/src/libmongoc/tests/test-mongoc-gridfs.c @@ -1506,7 +1506,8 @@ test_reading_multiple_chunks (void) // Read the entire file. { - mcommon_string_t *str = mcommon_string_new (""); + mcommon_string_append_t str; + mcommon_string_new_as_append (&str); uint8_t buf[7] = {0}; mongoc_iovec_t iov = {.iov_base = (void *) buf, .iov_len = sizeof (buf)}; mongoc_gridfs_file_t *file = mongoc_gridfs_find_one_by_filename (gridfs, "test_file", &error); @@ -1518,7 +1519,7 @@ test_reading_multiple_chunks (void) mongoc_gridfs_file_readv (file, &iov, 1 /* iovcnt */, 1 /* min_bytes */, 0 /* timeout_msec */); ASSERT_CMPSSIZE_T (got, >=, 0); ASSERT (mcommon_in_range_int_signed (got)); - mcommon_string_append_printf (str, "%.*s", (int) got, (char *) buf); + mcommon_string_append_printf (&str, "%.*s", (int) got, (char *) buf); ASSERT_CMPSSIZE_T (got, ==, 4); } @@ -1528,12 +1529,12 @@ test_reading_multiple_chunks (void) mongoc_gridfs_file_readv (file, &iov, 1 /* iovcnt */, 1 /* min_bytes */, 0 /* timeout_msec */); ASSERT_CMPSSIZE_T (got, >=, 0); ASSERT (mcommon_in_range_int_signed (got)); - mcommon_string_append_printf (str, "%.*s", (int) got, (char *) buf); + mcommon_string_append_printf (&str, "%.*s", (int) got, (char *) buf); ASSERT_CMPSSIZE_T (got, ==, 3); } - ASSERT_CMPSTR (str->str, "foobar"); - mcommon_string_free (str, true); + ASSERT_CMPSTR (mcommon_str_from_append (&str), "foobar"); + mcommon_string_from_append_destroy (&str); mongoc_gridfs_file_destroy (file); } diff --git a/src/libmongoc/tests/test-mongoc-ssl.c b/src/libmongoc/tests/test-mongoc-ssl.c index b37bc2571d9..9de4ad05fff 100644 --- a/src/libmongoc/tests/test-mongoc-ssl.c +++ b/src/libmongoc/tests/test-mongoc-ssl.c @@ -141,16 +141,17 @@ test_mongoc_ssl_opts_from_bson (void) for (test = tests; test->bson != NULL; test++) { mongoc_ssl_opt_t ssl_opt = {0}; - mcommon_string_t *errmsg = mcommon_string_new (NULL); - bool ok = _mongoc_ssl_opts_from_bson (&ssl_opt, tmp_bson (test->bson), errmsg); + mcommon_string_append_t errmsg; + mcommon_string_new_as_append (&errmsg); + bool ok = _mongoc_ssl_opts_from_bson (&ssl_opt, tmp_bson (test->bson), &errmsg); MONGOC_DEBUG ("testcase: %s", test->bson); if (test->expect_error) { - ASSERT_CONTAINS (errmsg->str, test->expect_error); + ASSERT_CONTAINS (mcommon_str_from_append (&errmsg), test->expect_error); ASSERT (!ok); } else { if (!ok) { - test_error ("unexpected error parsing: %s", errmsg->str); + test_error ("unexpected error parsing: %s", mcommon_str_from_append (&errmsg)); } } @@ -186,7 +187,7 @@ test_mongoc_ssl_opts_from_bson (void) ASSERT (!ssl_opt.crl_file); _mongoc_ssl_opts_cleanup (&ssl_opt, true /* free_internal */); - mcommon_string_free (errmsg, true /* free_segment */); + mcommon_string_from_append_destroy (&errmsg); } } diff --git a/src/libmongoc/tests/test-mongoc-uri.c b/src/libmongoc/tests/test-mongoc-uri.c index 742c2d5eab9..8c3124fe548 100644 --- a/src/libmongoc/tests/test-mongoc-uri.c +++ b/src/libmongoc/tests/test-mongoc-uri.c @@ -2232,18 +2232,20 @@ test_parses_long_ipv6 (void) // Test the largest permitted IPv6 literal. { // Construct a string of repeating `:`. - mcommon_string_t *host = mcommon_string_new (NULL); + mcommon_string_append_t host; + mcommon_string_new_as_append (&host); for (int i = 0; i < BSON_HOST_NAME_MAX - 2; i++) { // Max IPv6 literal is two less due to including `[` and `]`. - mcommon_string_append (host, ":"); + mcommon_string_append (&host, ":"); } + const char *host_str = mcommon_str_from_append (&host); - char *host_and_port = bson_strdup_printf ("[%s]:27017", host->str); + char *host_and_port = bson_strdup_printf ("[%s]:27017", host_str); char *uri_string = bson_strdup_printf ("mongodb://%s", host_and_port); mongoc_uri_t *uri = mongoc_uri_new_with_error (uri_string, &error); ASSERT_OR_PRINT (uri, error); const mongoc_host_list_t *hosts = mongoc_uri_get_hosts (uri); - ASSERT_CMPSTR (hosts->host, host->str); + ASSERT_CMPSTR (hosts->host, host_str); ASSERT_CMPSTR (hosts->host_and_port, host_and_port); ASSERT_CMPUINT16 (hosts->port, ==, 27017); ASSERT (!hosts->next); @@ -2251,18 +2253,20 @@ test_parses_long_ipv6 (void) mongoc_uri_destroy (uri); bson_free (uri_string); bson_free (host_and_port); - mcommon_string_free (host, true /* free_segment */); + mcommon_string_from_append_destroy (&host); } // Test one character more than the largest IPv6 literal. { // Construct a string of repeating `:`. - mcommon_string_t *host = mcommon_string_new (NULL); + mcommon_string_append_t host; + mcommon_string_new_as_append (&host); for (int i = 0; i < BSON_HOST_NAME_MAX - 2 + 1; i++) { - mcommon_string_append (host, ":"); + mcommon_string_append (&host, ":"); } + const char *host_str = mcommon_str_from_append (&host); - char *host_and_port = bson_strdup_printf ("[%s]:27017", host->str); + char *host_and_port = bson_strdup_printf ("[%s]:27017", host_str); char *uri_string = bson_strdup_printf ("mongodb://%s", host_and_port); capture_logs (true); mongoc_uri_t *uri = mongoc_uri_new_with_error (uri_string, &error); @@ -2278,7 +2282,7 @@ test_parses_long_ipv6 (void) mongoc_uri_destroy (uri); bson_free (uri_string); bson_free (host_and_port); - mcommon_string_free (host, true /* free_segment */); + mcommon_string_from_append_destroy (&host); } } diff --git a/src/libmongoc/tests/test-service-gcp.c b/src/libmongoc/tests/test-service-gcp.c index 7b1b49ee7a6..47388a4fd7f 100644 --- a/src/libmongoc/tests/test-service-gcp.c +++ b/src/libmongoc/tests/test-service-gcp.c @@ -43,16 +43,18 @@ _test_gcp_http_request (void) // Test that we correctly build a http request for the GCP metadata server gcp_request req; gcp_request_init (&req, "helloworld.com", 1234, NULL); - mcommon_string_t *req_str = _mongoc_http_render_request_head (&req.req); + mcommon_string_append_t req_str; + mcommon_string_new_as_append (&req_str); + _mongoc_http_render_request_head (&req_str, &req.req); gcp_request_destroy (&req); - ASSERT_CMPSTR (req_str->str, + ASSERT_CMPSTR (mcommon_str_from_append (&req_str), "GET " "/computeMetadata/v1/instance/service-accounts/default/token HTTP/1.0\r\n" "Host: helloworld.com:1234\r\n" "Connection: close\r\n" "Metadata-Flavor: Google\r\n" "\r\n"); - mcommon_string_free (req_str, true); + mcommon_string_from_append_destroy (&req_str); } static const char * diff --git a/src/libmongoc/tests/unified/entity-map.c b/src/libmongoc/tests/unified/entity-map.c index 686f6757244..286c8136fe3 100644 --- a/src/libmongoc/tests/unified/entity-map.c +++ b/src/libmongoc/tests/unified/entity-map.c @@ -2071,19 +2071,20 @@ entity_map_match ( char * event_list_to_string (event_t *events) { - mcommon_string_t *str = NULL; event_t *eiter = NULL; - str = mcommon_string_new (""); + mcommon_string_append_t str; + mcommon_string_new_as_append (&str); + LL_FOREACH (events, eiter) { - mcommon_string_append_printf (str, + mcommon_string_append_printf (&str, "- %s: %s (%s)\n", eiter->type, tmp_json (eiter->serialized), eiter->is_sensitive_command ? "marked SENSITIVE" : "not sensitive"); } - return mcommon_string_free (str, false); + return mcommon_string_from_append_destroy_with_steal (&str); } diff --git a/src/libmongoc/tests/unified/result.c b/src/libmongoc/tests/unified/result.c index 6ba0c22d80d..4e08fbcb619 100644 --- a/src/libmongoc/tests/unified/result.c +++ b/src/libmongoc/tests/unified/result.c @@ -50,27 +50,26 @@ result_new (void) static void _result_init (result_t *result, const bson_val_t *value, const bson_t *reply, const bson_error_t *error) { - mcommon_string_t *str; - - str = mcommon_string_new (""); + mcommon_string_append_t str; + mcommon_string_new_as_append (&str); if (value) { result->value = bson_val_copy (value); - mcommon_string_append_printf (str, "value=%s ", bson_val_to_json (value)); + mcommon_string_append_printf (&str, "value=%s ", bson_val_to_json (value)); } if (reply) { char *reply_str = bson_as_canonical_extended_json (reply, NULL); - mcommon_string_append_printf (str, "reply=%s ", reply_str); + mcommon_string_append_printf (&str, "reply=%s ", reply_str); result->reply = bson_copy (reply); bson_free (reply_str); } - mcommon_string_append_printf (str, "bson_error=%s", error->message); + mcommon_string_append_printf (&str, "bson_error=%s", error->message); memcpy (&result->error, error, sizeof (bson_error_t)); result->ok = (error->code == 0); - result->str = mcommon_string_free (str, false); + result->str = mcommon_string_from_append_destroy_with_steal (&str); result->write_errors = bson_new (); result->write_concern_errors = bson_new (); } diff --git a/src/libmongoc/tests/unified/runner.c b/src/libmongoc/tests/unified/runner.c index f0c5df59b47..d484c352275 100644 --- a/src/libmongoc/tests/unified/runner.c +++ b/src/libmongoc/tests/unified/runner.c @@ -773,11 +773,12 @@ check_run_on_requirement (test_runner_t *test_runner, static bool check_run_on_requirements (test_runner_t *test_runner, bson_t *run_on_requirements, const char **reason) { - mcommon_string_t *fail_reasons = NULL; bool requirements_satisfied = false; bson_iter_t iter; - fail_reasons = mcommon_string_new (""); + mcommon_string_append_t fail_reasons; + mcommon_string_new_as_append (&fail_reasons); + BSON_FOREACH (run_on_requirements, iter) { bson_t run_on_requirement; @@ -795,15 +796,15 @@ check_run_on_requirements (test_runner_t *test_runner, bson_t *run_on_requiremen } mcommon_string_append_printf ( - fail_reasons, "- Requirement %s failed because: %s\n", bson_iter_key (&iter), fail_reason); + &fail_reasons, "- Requirement %s failed because: %s\n", bson_iter_key (&iter), fail_reason); bson_free (fail_reason); } *reason = NULL; if (!requirements_satisfied) { - (*reason) = tmp_str ("runOnRequirements not satisfied:\n%s", fail_reasons->str); + *reason = tmp_str ("runOnRequirements not satisfied:\n%s", mcommon_str_from_append (&fail_reasons)); } - mcommon_string_free (fail_reasons, true); + mcommon_string_from_append_destroy (&fail_reasons); return requirements_satisfied; } diff --git a/src/libmongoc/tests/unified/test-diagnostics.c b/src/libmongoc/tests/unified/test-diagnostics.c index 52b6e957f11..c60a376684f 100644 --- a/src/libmongoc/tests/unified/test-diagnostics.c +++ b/src/libmongoc/tests/unified/test-diagnostics.c @@ -39,47 +39,50 @@ static char * test_diagnostics_error_string (bson_error_t *error) { msg_t *msg_iter = NULL; - mcommon_string_t *str = NULL; test_diagnostics_t *td = &diagnostics; + mcommon_string_append_t str; + mcommon_string_new_as_append (&str); + /* Give a large header / footer to make the error easily grep-able */ - str = mcommon_string_new ("****************************** BEGIN_MONGOC_ERROR " - "******************************\n"); + mcommon_string_append (&str, + "****************************** BEGIN_MONGOC_ERROR " + "******************************\n"); bson_mutex_lock (&td->mutex); if (td->test_info) { - mcommon_string_append (str, "test info:\n"); + mcommon_string_append (&str, "test info:\n"); } LL_FOREACH (td->test_info, msg_iter) { - mcommon_string_append (str, msg_iter->string); - mcommon_string_append (str, "\n"); + mcommon_string_append (&str, msg_iter->string); + mcommon_string_append (&str, "\n"); } - mcommon_string_append (str, "\n"); + mcommon_string_append (&str, "\n"); if (td->error_info) { - mcommon_string_append (str, "error context:\n"); + mcommon_string_append (&str, "error context:\n"); } LL_FOREACH (td->error_info, msg_iter) { - mcommon_string_append (str, msg_iter->string); - mcommon_string_append (str, "\n\n"); + mcommon_string_append (&str, msg_iter->string); + mcommon_string_append (&str, "\n\n"); } bson_mutex_unlock (&td->mutex); if (error && error->code != 0) { - mcommon_string_append_printf (str, "error: %s\n", error->message); + mcommon_string_append_printf (&str, "error: %s\n", error->message); } - mcommon_string_append (str, + mcommon_string_append (&str, "******************************* END_MONGOC_ERROR " "*******************************\n"); - return mcommon_string_free (str, false); + return mcommon_string_from_append_destroy_with_steal (&str); } static void