From 28b5a12cc18e0e4f900a680f3bc6751b02b807e0 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Wed, 14 Mar 2018 15:01:33 -0400 Subject: [PATCH 01/29] json_writer: new routines to create JSON data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add "struct json_writer" and a series of jw_ routines to compose JSON data into a string buffer. The resulting string may then be printed by commands wanting to support a JSON-like output format. The json_writer is limited to correctly formatting structured data for output. It does not attempt to build an object model of the JSON data. We say "JSON-like" because we do not enforce the Unicode (usually UTF-8) requirement on string fields. Internally, Git does not necessarily have Unicode/UTF-8 data for most fields, so it is currently unclear the best way to enforce that requirement. For example, on Linux pathnames can contain arbitrary 8-bit character data, so a command like "status" would not know how to encode the reported pathnames. We may want to revisit this (or double encode such strings) in the future. Helped-by: Eric Sunshine Helped-by: René Scharfe Helped-by: Wink Saville Helped-by: Ramsay Jones Signed-off-by: Jeff Hostetler --- Makefile | 2 + json-writer.c | 414 ++++++++++++++++++++++++++ json-writer.h | 105 +++++++ t/helper/test-json-writer.c | 565 ++++++++++++++++++++++++++++++++++++ t/helper/test-tool.c | 1 + t/helper/test-tool.h | 1 + t/t0019-json-writer.sh | 331 +++++++++++++++++++++ t/t0019/parse_json.perl | 52 ++++ 8 files changed, 1471 insertions(+) create mode 100644 json-writer.c create mode 100644 json-writer.h create mode 100644 t/helper/test-json-writer.c create mode 100755 t/t0019-json-writer.sh create mode 100644 t/t0019/parse_json.perl diff --git a/Makefile b/Makefile index 928c0e326489d2..48a54e54f43918 100644 --- a/Makefile +++ b/Makefile @@ -711,6 +711,7 @@ TEST_BUILTINS_OBJS += test-genrandom.o TEST_BUILTINS_OBJS += test-hashmap.o TEST_BUILTINS_OBJS += test-iconv.o TEST_BUILTINS_OBJS += test-index-version.o +TEST_BUILTINS_OBJS += test-json-writer.o TEST_BUILTINS_OBJS += test-lazy-init-name-hash.o TEST_BUILTINS_OBJS += test-match-trees.o TEST_BUILTINS_OBJS += test-mergesort.o @@ -874,6 +875,7 @@ LIB_OBJS += hashmap.o LIB_OBJS += help.o LIB_OBJS += hex.o LIB_OBJS += ident.o +LIB_OBJS += json-writer.o LIB_OBJS += kwset.o LIB_OBJS += levenshtein.o LIB_OBJS += line-log.o diff --git a/json-writer.c b/json-writer.c new file mode 100644 index 00000000000000..aadb9dbddc33ef --- /dev/null +++ b/json-writer.c @@ -0,0 +1,414 @@ +#include "cache.h" +#include "json-writer.h" + +void jw_init(struct json_writer *jw) +{ + strbuf_init(&jw->json, 0); + strbuf_init(&jw->open_stack, 0); + jw->need_comma = 0; + jw->pretty = 0; +} + +void jw_release(struct json_writer *jw) +{ + strbuf_release(&jw->json); + strbuf_release(&jw->open_stack); +} + +/* + * Append JSON-quoted version of the given string to 'out'. + */ +static void append_quoted_string(struct strbuf *out, const char *in) +{ + unsigned char c; + + strbuf_addch(out, '"'); + while ((c = *in++) != '\0') { + if (c == '"') + strbuf_addstr(out, "\\\""); + else if (c == '\\') + strbuf_addstr(out, "\\\\"); + else if (c == '\n') + strbuf_addstr(out, "\\n"); + else if (c == '\r') + strbuf_addstr(out, "\\r"); + else if (c == '\t') + strbuf_addstr(out, "\\t"); + else if (c == '\f') + strbuf_addstr(out, "\\f"); + else if (c == '\b') + strbuf_addstr(out, "\\b"); + else if (c < 0x20) + strbuf_addf(out, "\\u%04x", c); + else + strbuf_addch(out, c); + } + strbuf_addch(out, '"'); +} + +static void indent_pretty(struct json_writer *jw) +{ + int k; + + for (k = 0; k < jw->open_stack.len; k++) + strbuf_addstr(&jw->json, " "); +} + +/* + * Begin an object or array (either top-level or nested within the currently + * open object or array). + */ +static void begin(struct json_writer *jw, char ch_open, int pretty) +{ + jw->pretty = pretty; + + strbuf_addch(&jw->json, ch_open); + + strbuf_addch(&jw->open_stack, ch_open); + jw->need_comma = 0; +} + +/* + * Assert that the top of the open-stack is an object. + */ +static void assert_in_object(const struct json_writer *jw, const char *key) +{ + if (!jw->open_stack.len) + BUG("json-writer: object: missing jw_object_begin(): '%s'", key); + if (jw->open_stack.buf[jw->open_stack.len - 1] != '{') + BUG("json-writer: object: not in object: '%s'", key); +} + +/* + * Assert that the top of the open-stack is an array. + */ +static void assert_in_array(const struct json_writer *jw) +{ + if (!jw->open_stack.len) + BUG("json-writer: array: missing jw_array_begin()"); + if (jw->open_stack.buf[jw->open_stack.len - 1] != '[') + BUG("json-writer: array: not in array"); +} + +/* + * Add comma if we have already seen a member at this level. + */ +static void maybe_add_comma(struct json_writer *jw) +{ + if (jw->need_comma) + strbuf_addch(&jw->json, ','); + else + jw->need_comma = 1; +} + +static void fmt_double(struct json_writer *jw, int precision, + double value) +{ + if (precision < 0) { + strbuf_addf(&jw->json, "%f", value); + } else { + struct strbuf fmt = STRBUF_INIT; + strbuf_addf(&fmt, "%%.%df", precision); + strbuf_addf(&jw->json, fmt.buf, value); + strbuf_release(&fmt); + } +} + +static void object_common(struct json_writer *jw, const char *key) +{ + assert_in_object(jw, key); + maybe_add_comma(jw); + + if (jw->pretty) { + strbuf_addch(&jw->json, '\n'); + indent_pretty(jw); + } + + append_quoted_string(&jw->json, key); + strbuf_addch(&jw->json, ':'); + if (jw->pretty) + strbuf_addch(&jw->json, ' '); +} + +static void array_common(struct json_writer *jw) +{ + assert_in_array(jw); + maybe_add_comma(jw); + + if (jw->pretty) { + strbuf_addch(&jw->json, '\n'); + indent_pretty(jw); + } +} + +/* + * Assert that the given JSON object or JSON array has been properly + * terminated. (Has closing bracket.) + */ +static void assert_is_terminated(const struct json_writer *jw) +{ + if (jw->open_stack.len) + BUG("json-writer: object: missing jw_end(): '%s'", + jw->json.buf); +} + +void jw_object_begin(struct json_writer *jw, int pretty) +{ + begin(jw, '{', pretty); +} + +void jw_object_string(struct json_writer *jw, const char *key, const char *value) +{ + object_common(jw, key); + append_quoted_string(&jw->json, value); +} + +void jw_object_intmax(struct json_writer *jw, const char *key, intmax_t value) +{ + object_common(jw, key); + strbuf_addf(&jw->json, "%"PRIdMAX, value); +} + +void jw_object_double(struct json_writer *jw, const char *key, int precision, + double value) +{ + object_common(jw, key); + fmt_double(jw, precision, value); +} + +void jw_object_true(struct json_writer *jw, const char *key) +{ + object_common(jw, key); + strbuf_addstr(&jw->json, "true"); +} + +void jw_object_false(struct json_writer *jw, const char *key) +{ + object_common(jw, key); + strbuf_addstr(&jw->json, "false"); +} + +void jw_object_bool(struct json_writer *jw, const char *key, int value) +{ + if (value) + jw_object_true(jw, key); + else + jw_object_false(jw, key); +} + +void jw_object_null(struct json_writer *jw, const char *key) +{ + object_common(jw, key); + strbuf_addstr(&jw->json, "null"); +} + +static void increase_indent(struct strbuf *sb, + const struct json_writer *jw, + int indent) +{ + int k; + + strbuf_reset(sb); + for (k = 0; k < jw->json.len; k++) { + char ch = jw->json.buf[k]; + strbuf_addch(sb, ch); + if (ch == '\n') + strbuf_addchars(sb, ' ', indent); + } +} + +static void kill_indent(struct strbuf *sb, + const struct json_writer *jw) +{ + int k; + int eat_it = 0; + + strbuf_reset(sb); + for (k = 0; k < jw->json.len; k++) { + char ch = jw->json.buf[k]; + if (eat_it && ch == ' ') + continue; + if (ch == '\n') { + eat_it = 1; + continue; + } + eat_it = 0; + strbuf_addch(sb, ch); + } +} + +static void append_sub_jw(struct json_writer *jw, + const struct json_writer *value) +{ + /* + * If both are pretty, increase the indentation of the sub_jw + * to better fit under the super. + * + * If the super is pretty, but the sub_jw is compact, leave the + * sub_jw compact. (We don't want to parse and rebuild the sub_jw + * for this debug-ish feature.) + * + * If the super is compact, and the sub_jw is pretty, convert + * the sub_jw to compact. + * + * If both are compact, keep the sub_jw compact. + */ + if (jw->pretty && jw->open_stack.len && value->pretty) { + struct strbuf sb = STRBUF_INIT; + increase_indent(&sb, value, jw->open_stack.len * 2); + strbuf_addbuf(&jw->json, &sb); + strbuf_release(&sb); + return; + } + if (!jw->pretty && value->pretty) { + struct strbuf sb = STRBUF_INIT; + kill_indent(&sb, value); + strbuf_addbuf(&jw->json, &sb); + strbuf_release(&sb); + return; + } + + strbuf_addbuf(&jw->json, &value->json); +} + +/* + * Append existing (properly terminated) JSON sub-data (object or array) + * as-is onto the given JSON data. + */ +void jw_object_sub_jw(struct json_writer *jw, const char *key, + const struct json_writer *value) +{ + assert_is_terminated(value); + + object_common(jw, key); + append_sub_jw(jw, value); +} + +void jw_object_inline_begin_object(struct json_writer *jw, const char *key) +{ + object_common(jw, key); + + jw_object_begin(jw, jw->pretty); +} + +void jw_object_inline_begin_array(struct json_writer *jw, const char *key) +{ + object_common(jw, key); + + jw_array_begin(jw, jw->pretty); +} + +void jw_array_begin(struct json_writer *jw, int pretty) +{ + begin(jw, '[', pretty); +} + +void jw_array_string(struct json_writer *jw, const char *value) +{ + array_common(jw); + append_quoted_string(&jw->json, value); +} + +void jw_array_intmax(struct json_writer *jw, intmax_t value) +{ + array_common(jw); + strbuf_addf(&jw->json, "%"PRIdMAX, value); +} + +void jw_array_double(struct json_writer *jw, int precision, double value) +{ + array_common(jw); + fmt_double(jw, precision, value); +} + +void jw_array_true(struct json_writer *jw) +{ + array_common(jw); + strbuf_addstr(&jw->json, "true"); +} + +void jw_array_false(struct json_writer *jw) +{ + array_common(jw); + strbuf_addstr(&jw->json, "false"); +} + +void jw_array_bool(struct json_writer *jw, int value) +{ + if (value) + jw_array_true(jw); + else + jw_array_false(jw); +} + +void jw_array_null(struct json_writer *jw) +{ + array_common(jw); + strbuf_addstr(&jw->json, "null"); +} + +void jw_array_sub_jw(struct json_writer *jw, const struct json_writer *value) +{ + assert_is_terminated(value); + + array_common(jw); + append_sub_jw(jw, value); +} + +void jw_array_argc_argv(struct json_writer *jw, int argc, const char **argv) +{ + int k; + + for (k = 0; k < argc; k++) + jw_array_string(jw, argv[k]); +} + +void jw_array_argv(struct json_writer *jw, const char **argv) +{ + while (*argv) + jw_array_string(jw, *argv++); +} + +void jw_array_inline_begin_object(struct json_writer *jw) +{ + array_common(jw); + + jw_object_begin(jw, jw->pretty); +} + +void jw_array_inline_begin_array(struct json_writer *jw) +{ + array_common(jw); + + jw_array_begin(jw, jw->pretty); +} + +int jw_is_terminated(const struct json_writer *jw) +{ + return !jw->open_stack.len; +} + +void jw_end(struct json_writer *jw) +{ + char ch_open; + int len; + + if (!jw->open_stack.len) + BUG("json-writer: too many jw_end(): '%s'", jw->json.buf); + + len = jw->open_stack.len - 1; + ch_open = jw->open_stack.buf[len]; + + strbuf_setlen(&jw->open_stack, len); + jw->need_comma = 1; + + if (jw->pretty) { + strbuf_addch(&jw->json, '\n'); + indent_pretty(jw); + } + + if (ch_open == '{') + strbuf_addch(&jw->json, '}'); + else + strbuf_addch(&jw->json, ']'); +} diff --git a/json-writer.h b/json-writer.h new file mode 100644 index 00000000000000..fc18acc7d913d1 --- /dev/null +++ b/json-writer.h @@ -0,0 +1,105 @@ +#ifndef JSON_WRITER_H +#define JSON_WRITER_H + +/* + * JSON data structures are defined at: + * [1] http://www.ietf.org/rfc/rfc7159.txt + * [2] http://json.org/ + * + * The JSON-writer API allows one to build JSON data structures using a + * simple wrapper around a "struct strbuf" buffer. It is intended as a + * simple API to build output strings; it is not intended to be a general + * object model for JSON data. In particular, it does not re-order keys + * in an object (dictionary), it does not de-dup keys in an object, and + * it does not allow lookup or parsing of JSON data. + * + * All string values (both keys and string r-values) are properly quoted + * and escaped if they contain special characters. + * + * These routines create compact JSON data (with no unnecessary whitespace, + * newlines, or indenting). If you get an unexpected response, verify + * that you're not expecting a pretty JSON string. + * + * Both "JSON objects" (aka sets of k/v pairs) and "JSON array" can be + * constructed using a 'begin append* end' model. + * + * Nested objects and arrays can either be constructed bottom up (by + * creating sub object/arrays first and appending them to the super + * object/array) -or- by building them inline in one pass. This is a + * personal style and/or data shape choice. + * + * See t/helper/test-json-writer.c for various usage examples. + * + * LIMITATIONS: + * ============ + * + * The JSON specification [1,2] defines string values as Unicode data + * and probably UTF-8 encoded. The current json-writer API does not + * enforce this and will write any string as received. However, it will + * properly quote and backslash-escape them as necessary. It is up to + * the caller to UTF-8 encode their strings *before* passing them to this + * API. This layer should not have to try to guess the encoding or locale + * of the given strings. + */ + +struct json_writer +{ + /* + * Buffer of the in-progress JSON currently being composed. + */ + struct strbuf json; + + /* + * Simple stack of the currently open array and object forms. + * This is a string of '{' and '[' characters indicating the + * currently unterminated forms. This is used to ensure the + * properly closing character is used when popping a level and + * to know when the JSON is completely closed. + */ + struct strbuf open_stack; + + unsigned int need_comma:1; + unsigned int pretty:1; +}; + +#define JSON_WRITER_INIT { STRBUF_INIT, STRBUF_INIT, 0, 0 } + +void jw_init(struct json_writer *jw); +void jw_release(struct json_writer *jw); + +void jw_object_begin(struct json_writer *jw, int pretty); +void jw_array_begin(struct json_writer *jw, int pretty); + +void jw_object_string(struct json_writer *jw, const char *key, + const char *value); +void jw_object_intmax(struct json_writer *jw, const char *key, intmax_t value); +void jw_object_double(struct json_writer *jw, const char *key, int precision, + double value); +void jw_object_true(struct json_writer *jw, const char *key); +void jw_object_false(struct json_writer *jw, const char *key); +void jw_object_bool(struct json_writer *jw, const char *key, int value); +void jw_object_null(struct json_writer *jw, const char *key); +void jw_object_sub_jw(struct json_writer *jw, const char *key, + const struct json_writer *value); + +void jw_object_inline_begin_object(struct json_writer *jw, const char *key); +void jw_object_inline_begin_array(struct json_writer *jw, const char *key); + +void jw_array_string(struct json_writer *jw, const char *value); +void jw_array_intmax(struct json_writer *jw, intmax_t value); +void jw_array_double(struct json_writer *jw, int precision, double value); +void jw_array_true(struct json_writer *jw); +void jw_array_false(struct json_writer *jw); +void jw_array_bool(struct json_writer *jw, int value); +void jw_array_null(struct json_writer *jw); +void jw_array_sub_jw(struct json_writer *jw, const struct json_writer *value); +void jw_array_argc_argv(struct json_writer *jw, int argc, const char **argv); +void jw_array_argv(struct json_writer *jw, const char **argv); + +void jw_array_inline_begin_object(struct json_writer *jw); +void jw_array_inline_begin_array(struct json_writer *jw); + +int jw_is_terminated(const struct json_writer *jw); +void jw_end(struct json_writer *jw); + +#endif /* JSON_WRITER_H */ diff --git a/t/helper/test-json-writer.c b/t/helper/test-json-writer.c new file mode 100644 index 00000000000000..37c452535f8bb2 --- /dev/null +++ b/t/helper/test-json-writer.c @@ -0,0 +1,565 @@ +#include "test-tool.h" +#include "cache.h" +#include "json-writer.h" + +static const char *expect_obj1 = "{\"a\":\"abc\",\"b\":42,\"c\":true}"; +static const char *expect_obj2 = "{\"a\":-1,\"b\":2147483647,\"c\":0}"; +static const char *expect_obj3 = "{\"a\":0,\"b\":4294967295,\"c\":9223372036854775807}"; +static const char *expect_obj4 = "{\"t\":true,\"f\":false,\"n\":null}"; +static const char *expect_obj5 = "{\"abc\\tdef\":\"abc\\\\def\"}"; +static const char *expect_obj6 = "{\"a\":3.14}"; + +static const char *pretty_obj1 = ("{\n" + " \"a\": \"abc\",\n" + " \"b\": 42,\n" + " \"c\": true\n" + "}"); +static const char *pretty_obj2 = ("{\n" + " \"a\": -1,\n" + " \"b\": 2147483647,\n" + " \"c\": 0\n" + "}"); +static const char *pretty_obj3 = ("{\n" + " \"a\": 0,\n" + " \"b\": 4294967295,\n" + " \"c\": 9223372036854775807\n" + "}"); +static const char *pretty_obj4 = ("{\n" + " \"t\": true,\n" + " \"f\": false,\n" + " \"n\": null\n" + "}"); + +static struct json_writer obj1 = JSON_WRITER_INIT; +static struct json_writer obj2 = JSON_WRITER_INIT; +static struct json_writer obj3 = JSON_WRITER_INIT; +static struct json_writer obj4 = JSON_WRITER_INIT; +static struct json_writer obj5 = JSON_WRITER_INIT; +static struct json_writer obj6 = JSON_WRITER_INIT; + +static void make_obj1(int pretty) +{ + jw_object_begin(&obj1, pretty); + { + jw_object_string(&obj1, "a", "abc"); + jw_object_intmax(&obj1, "b", 42); + jw_object_true(&obj1, "c"); + } + jw_end(&obj1); +} + +static void make_obj2(int pretty) +{ + jw_object_begin(&obj2, pretty); + { + jw_object_intmax(&obj2, "a", -1); + jw_object_intmax(&obj2, "b", 0x7fffffff); + jw_object_intmax(&obj2, "c", 0); + } + jw_end(&obj2); +} + +static void make_obj3(int pretty) +{ + jw_object_begin(&obj3, pretty); + { + jw_object_intmax(&obj3, "a", 0); + jw_object_intmax(&obj3, "b", 0xffffffff); + jw_object_intmax(&obj3, "c", 0x7fffffffffffffffULL); + } + jw_end(&obj3); +} + +static void make_obj4(int pretty) +{ + jw_object_begin(&obj4, pretty); + { + jw_object_true(&obj4, "t"); + jw_object_false(&obj4, "f"); + jw_object_null(&obj4, "n"); + } + jw_end(&obj4); +} + +static void make_obj5(int pretty) +{ + jw_object_begin(&obj5, pretty); + { + jw_object_string(&obj5, "abc" "\x09" "def", "abc" "\\" "def"); + } + jw_end(&obj5); +} + +static void make_obj6(int pretty) +{ + jw_object_begin(&obj6, pretty); + { + jw_object_double(&obj6, "a", 2, 3.14159); + } + jw_end(&obj6); +} + +static const char *expect_arr1 = "[\"abc\",42,true]"; +static const char *expect_arr2 = "[-1,2147483647,0]"; +static const char *expect_arr3 = "[0,4294967295,9223372036854775807]"; +static const char *expect_arr4 = "[true,false,null]"; + +static const char *pretty_arr1 = ("[\n" + " \"abc\",\n" + " 42,\n" + " true\n" + "]"); +static const char *pretty_arr2 = ("[\n" + " -1,\n" + " 2147483647,\n" + " 0\n" + "]"); +static const char *pretty_arr3 = ("[\n" + " 0,\n" + " 4294967295,\n" + " 9223372036854775807\n" + "]"); +static const char *pretty_arr4 = ("[\n" + " true,\n" + " false,\n" + " null\n" + "]"); + +static struct json_writer arr1 = JSON_WRITER_INIT; +static struct json_writer arr2 = JSON_WRITER_INIT; +static struct json_writer arr3 = JSON_WRITER_INIT; +static struct json_writer arr4 = JSON_WRITER_INIT; + +static void make_arr1(int pretty) +{ + jw_array_begin(&arr1, pretty); + { + jw_array_string(&arr1, "abc"); + jw_array_intmax(&arr1, 42); + jw_array_true(&arr1); + } + jw_end(&arr1); +} + +static void make_arr2(int pretty) +{ + jw_array_begin(&arr2, pretty); + { + jw_array_intmax(&arr2, -1); + jw_array_intmax(&arr2, 0x7fffffff); + jw_array_intmax(&arr2, 0); + } + jw_end(&arr2); +} + +static void make_arr3(int pretty) +{ + jw_array_begin(&arr3, pretty); + { + jw_array_intmax(&arr3, 0); + jw_array_intmax(&arr3, 0xffffffff); + jw_array_intmax(&arr3, 0x7fffffffffffffffULL); + } + jw_end(&arr3); +} + +static void make_arr4(int pretty) +{ + jw_array_begin(&arr4, pretty); + { + jw_array_true(&arr4); + jw_array_false(&arr4); + jw_array_null(&arr4); + } + jw_end(&arr4); +} + +static char *expect_nest1 = + "{\"obj1\":{\"a\":\"abc\",\"b\":42,\"c\":true},\"arr1\":[\"abc\",42,true]}"; + +static struct json_writer nest1 = JSON_WRITER_INIT; + +static void make_nest1(int pretty) +{ + jw_object_begin(&nest1, pretty); + { + jw_object_sub_jw(&nest1, "obj1", &obj1); + jw_object_sub_jw(&nest1, "arr1", &arr1); + } + jw_end(&nest1); +} + +static char *expect_inline1 = + "{\"obj1\":{\"a\":\"abc\",\"b\":42,\"c\":true},\"arr1\":[\"abc\",42,true]}"; + +static char *pretty_inline1 = + ("{\n" + " \"obj1\": {\n" + " \"a\": \"abc\",\n" + " \"b\": 42,\n" + " \"c\": true\n" + " },\n" + " \"arr1\": [\n" + " \"abc\",\n" + " 42,\n" + " true\n" + " ]\n" + "}"); + +static struct json_writer inline1 = JSON_WRITER_INIT; + +static void make_inline1(int pretty) +{ + jw_object_begin(&inline1, pretty); + { + jw_object_inline_begin_object(&inline1, "obj1"); + { + jw_object_string(&inline1, "a", "abc"); + jw_object_intmax(&inline1, "b", 42); + jw_object_true(&inline1, "c"); + } + jw_end(&inline1); + jw_object_inline_begin_array(&inline1, "arr1"); + { + jw_array_string(&inline1, "abc"); + jw_array_intmax(&inline1, 42); + jw_array_true(&inline1); + } + jw_end(&inline1); + } + jw_end(&inline1); +} + +static char *expect_inline2 = + "[[1,2],[3,4],{\"a\":\"abc\"}]"; + +static char *pretty_inline2 = + ("[\n" + " [\n" + " 1,\n" + " 2\n" + " ],\n" + " [\n" + " 3,\n" + " 4\n" + " ],\n" + " {\n" + " \"a\": \"abc\"\n" + " }\n" + "]"); + +static struct json_writer inline2 = JSON_WRITER_INIT; + +static void make_inline2(int pretty) +{ + jw_array_begin(&inline2, pretty); + { + jw_array_inline_begin_array(&inline2); + { + jw_array_intmax(&inline2, 1); + jw_array_intmax(&inline2, 2); + } + jw_end(&inline2); + jw_array_inline_begin_array(&inline2); + { + jw_array_intmax(&inline2, 3); + jw_array_intmax(&inline2, 4); + } + jw_end(&inline2); + jw_array_inline_begin_object(&inline2); + { + jw_object_string(&inline2, "a", "abc"); + } + jw_end(&inline2); + } + jw_end(&inline2); +} + +/* + * When super is compact, we expect subs to be compacted (even if originally + * pretty). + */ +static const char *expect_mixed1 = + ("{\"obj1\":{\"a\":\"abc\",\"b\":42,\"c\":true}," + "\"arr1\":[\"abc\",42,true]}"); + +/* + * When super is pretty, a compact sub (obj1) is kept compact and a pretty + * sub (arr1) is re-indented. + */ +static const char *pretty_mixed1 = + ("{\n" + " \"obj1\": {\"a\":\"abc\",\"b\":42,\"c\":true},\n" + " \"arr1\": [\n" + " \"abc\",\n" + " 42,\n" + " true\n" + " ]\n" + "}"); + +static struct json_writer mixed1 = JSON_WRITER_INIT; + +static void make_mixed1(int pretty) +{ + jw_init(&obj1); + jw_init(&arr1); + + make_obj1(0); /* obj1 is compact */ + make_arr1(1); /* arr1 is pretty */ + + jw_object_begin(&mixed1, pretty); + { + jw_object_sub_jw(&mixed1, "obj1", &obj1); + jw_object_sub_jw(&mixed1, "arr1", &arr1); + } + jw_end(&mixed1); +} + +static void cmp(const char *test, const struct json_writer *jw, const char *exp) +{ + if (!strcmp(jw->json.buf, exp)) + return; + + printf("error[%s]: observed '%s' expected '%s'\n", + test, jw->json.buf, exp); + exit(1); +} + +#define t(v) do { make_##v(0); cmp(#v, &v, expect_##v); } while (0) +#define p(v) do { make_##v(1); cmp(#v, &v, pretty_##v); } while (0) + +/* + * Run some basic regression tests with some known patterns. + * These tests also demonstrate how to use the jw_ API. + */ +static int unit_tests(void) +{ + /* comptact (canonical) forms */ + t(obj1); + t(obj2); + t(obj3); + t(obj4); + t(obj5); + t(obj6); + + t(arr1); + t(arr2); + t(arr3); + t(arr4); + + t(nest1); + + t(inline1); + t(inline2); + + jw_init(&obj1); + jw_init(&obj2); + jw_init(&obj3); + jw_init(&obj4); + + jw_init(&arr1); + jw_init(&arr2); + jw_init(&arr3); + jw_init(&arr4); + + jw_init(&inline1); + jw_init(&inline2); + + /* pretty forms */ + p(obj1); + p(obj2); + p(obj3); + p(obj4); + + p(arr1); + p(arr2); + p(arr3); + p(arr4); + + p(inline1); + p(inline2); + + /* mixed forms */ + t(mixed1); + jw_init(&mixed1); + p(mixed1); + + return 0; +} + +static void get_s(int line_nr, char **s_in) +{ + *s_in = strtok(NULL, " "); + if (!*s_in) + die("line[%d]: expected: ", line_nr); +} + +static void get_i(int line_nr, intmax_t *s_in) +{ + char *s; + char *endptr; + + get_s(line_nr, &s); + + *s_in = strtol(s, &endptr, 10); + if (*endptr || errno == ERANGE) + die("line[%d]: invalid integer value", line_nr); +} + +static void get_d(int line_nr, double *s_in) +{ + char *s; + char *endptr; + + get_s(line_nr, &s); + + *s_in = strtod(s, &endptr); + if (*endptr || errno == ERANGE) + die("line[%d]: invalid float value", line_nr); +} + +static int pretty; + +#define MAX_LINE_LENGTH (64 * 1024) + +static char *get_trimmed_line(char *buf, int buf_size) +{ + int len; + + if (!fgets(buf, buf_size, stdin)) + return NULL; + + len = strlen(buf); + while (len > 0) { + char c = buf[len - 1]; + if (c == '\n' || c == '\r' || c == ' ' || c == '\t') + buf[--len] = 0; + else + break; + } + + while (*buf == ' ' || *buf == '\t') + buf++; + + return buf; +} + +static int scripted(void) +{ + struct json_writer jw = JSON_WRITER_INIT; + char buf[MAX_LINE_LENGTH]; + char *line; + int line_nr = 0; + + line = get_trimmed_line(buf, MAX_LINE_LENGTH); + if (!line) + return 0; + + if (!strcmp(line, "object")) + jw_object_begin(&jw, pretty); + else if (!strcmp(line, "array")) + jw_array_begin(&jw, pretty); + else + die("expected first line to be 'object' or 'array'"); + + while ((line = get_trimmed_line(buf, MAX_LINE_LENGTH)) != NULL) { + char *verb; + char *key; + char *s_value; + intmax_t i_value; + double d_value; + + line_nr++; + + verb = strtok(line, " "); + + if (!strcmp(verb, "end")) { + jw_end(&jw); + } + else if (!strcmp(verb, "object-string")) { + get_s(line_nr, &key); + get_s(line_nr, &s_value); + jw_object_string(&jw, key, s_value); + } + else if (!strcmp(verb, "object-int")) { + get_s(line_nr, &key); + get_i(line_nr, &i_value); + jw_object_intmax(&jw, key, i_value); + } + else if (!strcmp(verb, "object-double")) { + get_s(line_nr, &key); + get_i(line_nr, &i_value); + get_d(line_nr, &d_value); + jw_object_double(&jw, key, i_value, d_value); + } + else if (!strcmp(verb, "object-true")) { + get_s(line_nr, &key); + jw_object_true(&jw, key); + } + else if (!strcmp(verb, "object-false")) { + get_s(line_nr, &key); + jw_object_false(&jw, key); + } + else if (!strcmp(verb, "object-null")) { + get_s(line_nr, &key); + jw_object_null(&jw, key); + } + else if (!strcmp(verb, "object-object")) { + get_s(line_nr, &key); + jw_object_inline_begin_object(&jw, key); + } + else if (!strcmp(verb, "object-array")) { + get_s(line_nr, &key); + jw_object_inline_begin_array(&jw, key); + } + else if (!strcmp(verb, "array-string")) { + get_s(line_nr, &s_value); + jw_array_string(&jw, s_value); + } + else if (!strcmp(verb, "array-int")) { + get_i(line_nr, &i_value); + jw_array_intmax(&jw, i_value); + } + else if (!strcmp(verb, "array-double")) { + get_i(line_nr, &i_value); + get_d(line_nr, &d_value); + jw_array_double(&jw, i_value, d_value); + } + else if (!strcmp(verb, "array-true")) + jw_array_true(&jw); + else if (!strcmp(verb, "array-false")) + jw_array_false(&jw); + else if (!strcmp(verb, "array-null")) + jw_array_null(&jw); + else if (!strcmp(verb, "array-object")) + jw_array_inline_begin_object(&jw); + else if (!strcmp(verb, "array-array")) + jw_array_inline_begin_array(&jw); + else + die("unrecognized token: '%s'", verb); + } + + if (!jw_is_terminated(&jw)) + die("json not terminated: '%s'", jw.json.buf); + + printf("%s\n", jw.json.buf); + + strbuf_release(&jw.json); + return 0; +} + +int cmd__json_writer(int argc, const char **argv) +{ + argc--; /* skip over "json-writer" arg */ + argv++; + + if (argc > 0 && argv[0][0] == '-') { + if (!strcmp(argv[0], "-u") || !strcmp(argv[0], "--unit")) + return unit_tests(); + + if (!strcmp(argv[0], "-p") || !strcmp(argv[0], "--pretty")) + pretty = 1; + } + + return scripted(); +} diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c index b4ea8d08d84409..cf6f374c271556 100644 --- a/t/helper/test-tool.c +++ b/t/helper/test-tool.c @@ -21,6 +21,7 @@ static struct test_cmd cmds[] = { { "hashmap", cmd__hashmap }, { "iconv", cmd__iconv }, { "index-version", cmd__index_version }, + { "json-writer", cmd__json_writer }, { "lazy-init-name-hash", cmd__lazy_init_name_hash }, { "match-trees", cmd__match_trees }, { "mergesort", cmd__mergesort }, diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h index ca28ba49b4f618..fc0323c2a8de49 100644 --- a/t/helper/test-tool.h +++ b/t/helper/test-tool.h @@ -15,6 +15,7 @@ int cmd__genrandom(int argc, const char **argv); int cmd__hashmap(int argc, const char **argv); int cmd__iconv(int argc, const char **argv); int cmd__index_version(int argc, const char **argv); +int cmd__json_writer(int argc, const char **argv); int cmd__lazy_init_name_hash(int argc, const char **argv); int cmd__match_trees(int argc, const char **argv); int cmd__mergesort(int argc, const char **argv); diff --git a/t/t0019-json-writer.sh b/t/t0019-json-writer.sh new file mode 100755 index 00000000000000..3b0c336b38e4c7 --- /dev/null +++ b/t/t0019-json-writer.sh @@ -0,0 +1,331 @@ +#!/bin/sh + +test_description='test json-writer JSON generation' +. ./test-lib.sh + +test_expect_success 'unit test of json-writer routines' ' + test-tool json-writer -u +' + +test_expect_success 'trivial object' ' + cat >expect <<-\EOF && + {} + EOF + cat >input <<-\EOF && + object + end + EOF + test-tool json-writer actual && + test_cmp expect actual +' + +test_expect_success 'trivial array' ' + cat >expect <<-\EOF && + [] + EOF + cat >input <<-\EOF && + array + end + EOF + test-tool json-writer actual && + test_cmp expect actual +' + +test_expect_success 'simple object' ' + cat >expect <<-\EOF && + {"a":"abc","b":42,"c":3.14,"d":true,"e":false,"f":null} + EOF + cat >input <<-\EOF && + object + object-string a abc + object-int b 42 + object-double c 2 3.140 + object-true d + object-false e + object-null f + end + EOF + test-tool json-writer actual && + test_cmp expect actual +' + +test_expect_success 'simple array' ' + cat >expect <<-\EOF && + ["abc",42,3.14,true,false,null] + EOF + cat >input <<-\EOF && + array + array-string abc + array-int 42 + array-double 2 3.140 + array-true + array-false + array-null + end + EOF + test-tool json-writer actual && + test_cmp expect actual +' + +test_expect_success 'escape quoting string' ' + cat >expect <<-\EOF && + {"a":"abc\\def"} + EOF + cat >input <<-\EOF && + object + object-string a abc\def + end + EOF + test-tool json-writer actual && + test_cmp expect actual +' + +test_expect_success 'escape quoting string 2' ' + cat >expect <<-\EOF && + {"a":"abc\"def"} + EOF + cat >input <<-\EOF && + object + object-string a abc"def + end + EOF + test-tool json-writer actual && + test_cmp expect actual +' + +test_expect_success 'nested inline object' ' + cat >expect <<-\EOF && + {"a":"abc","b":42,"sub1":{"c":3.14,"d":true,"sub2":{"e":false,"f":null}}} + EOF + cat >input <<-\EOF && + object + object-string a abc + object-int b 42 + object-object sub1 + object-double c 2 3.140 + object-true d + object-object sub2 + object-false e + object-null f + end + end + end + EOF + test-tool json-writer actual && + test_cmp expect actual +' + +test_expect_success 'nested inline array' ' + cat >expect <<-\EOF && + ["abc",42,[3.14,true,[false,null]]] + EOF + cat >input <<-\EOF && + array + array-string abc + array-int 42 + array-array + array-double 2 3.140 + array-true + array-array + array-false + array-null + end + end + end + EOF + test-tool json-writer actual && + test_cmp expect actual +' + +test_expect_success 'nested inline object and array' ' + cat >expect <<-\EOF && + {"a":"abc","b":42,"sub1":{"c":3.14,"d":true,"sub2":[false,null]}} + EOF + cat >input <<-\EOF && + object + object-string a abc + object-int b 42 + object-object sub1 + object-double c 2 3.140 + object-true d + object-array sub2 + array-false + array-null + end + end + end + EOF + test-tool json-writer actual && + test_cmp expect actual +' + +test_expect_success 'nested inline object and array 2' ' + cat >expect <<-\EOF && + {"a":"abc","b":42,"sub1":{"c":3.14,"d":true,"sub2":[false,{"g":0,"h":1},null]}} + EOF + cat >input <<-\EOF && + object + object-string a abc + object-int b 42 + object-object sub1 + object-double c 2 3.140 + object-true d + object-array sub2 + array-false + array-object + object-int g 0 + object-int h 1 + end + array-null + end + end + end + EOF + test-tool json-writer actual && + test_cmp expect actual +' + +test_expect_success 'pretty nested inline object and array 2' ' + sed -e "s/^|//" >expect <<-\EOF && + |{ + | "a": "abc", + | "b": 42, + | "sub1": { + | "c": 3.14, + | "d": true, + | "sub2": [ + | false, + | { + | "g": 0, + | "h": 1 + | }, + | null + | ] + | } + |} + EOF + cat >input <<-\EOF && + object + object-string a abc + object-int b 42 + object-object sub1 + object-double c 2 3.140 + object-true d + object-array sub2 + array-false + array-object + object-int g 0 + object-int h 1 + end + array-null + end + end + end + EOF + test-tool json-writer -p actual && + test_cmp expect actual +' + +test_expect_success 'inline object with no members' ' + cat >expect <<-\EOF && + {"a":"abc","empty":{},"b":42} + EOF + cat >input <<-\EOF && + object + object-string a abc + object-object empty + end + object-int b 42 + end + EOF + test-tool json-writer actual && + test_cmp expect actual +' + +test_expect_success 'inline array with no members' ' + cat >expect <<-\EOF && + {"a":"abc","empty":[],"b":42} + EOF + cat >input <<-\EOF && + object + object-string a abc + object-array empty + end + object-int b 42 + end + EOF + test-tool json-writer actual && + test_cmp expect actual +' + +test_expect_success 'larger empty example' ' + cat >expect <<-\EOF && + {"a":"abc","empty":[{},{},{},[],{}],"b":42} + EOF + cat >input <<-\EOF && + object + object-string a abc + object-array empty + array-object + end + array-object + end + array-object + end + array-array + end + array-object + end + end + object-int b 42 + end + EOF + test-tool json-writer actual && + test_cmp expect actual +' + +test_lazy_prereq PERLJSON ' + perl -MJSON -e "exit 0" +' + +# As a sanity check, ask Perl to parse our generated JSON and recursively +# dump the resulting data in sorted order. Confirm that that matches our +# expectations. +test_expect_success PERLJSON 'parse JSON using Perl' ' + cat >expect <<-\EOF && + row[0].a abc + row[0].b 42 + row[0].sub1 hash + row[0].sub1.c 3.14 + row[0].sub1.d 1 + row[0].sub1.sub2 array + row[0].sub1.sub2[0] 0 + row[0].sub1.sub2[1] hash + row[0].sub1.sub2[1].g 0 + row[0].sub1.sub2[1].h 1 + row[0].sub1.sub2[2] null + EOF + cat >input <<-\EOF && + object + object-string a abc + object-int b 42 + object-object sub1 + object-double c 2 3.140 + object-true d + object-array sub2 + array-false + array-object + object-int g 0 + object-int h 1 + end + array-null + end + end + end + EOF + test-tool json-writer output.json && + perl "$TEST_DIRECTORY"/t0019/parse_json.perl actual && + test_cmp expect actual +' + +test_done diff --git a/t/t0019/parse_json.perl b/t/t0019/parse_json.perl new file mode 100644 index 00000000000000..ca4e5bfa7850ee --- /dev/null +++ b/t/t0019/parse_json.perl @@ -0,0 +1,52 @@ +#!/usr/bin/perl +use strict; +use warnings; +use JSON; + +sub dump_array { + my ($label_in, $ary_ref) = @_; + my @ary = @$ary_ref; + + for ( my $i = 0; $i <= $#{ $ary_ref }; $i++ ) + { + my $label = "$label_in\[$i\]"; + dump_item($label, $ary[$i]); + } +} + +sub dump_hash { + my ($label_in, $obj_ref) = @_; + my %obj = %$obj_ref; + + foreach my $k (sort keys %obj) { + my $label = (length($label_in) > 0) ? "$label_in.$k" : "$k"; + my $value = $obj{$k}; + + dump_item($label, $value); + } +} + +sub dump_item { + my ($label_in, $value) = @_; + if (ref($value) eq 'ARRAY') { + print "$label_in array\n"; + dump_array($label_in, $value); + } elsif (ref($value) eq 'HASH') { + print "$label_in hash\n"; + dump_hash($label_in, $value); + } elsif (defined $value) { + print "$label_in $value\n"; + } else { + print "$label_in null\n"; + } +} + +my $row = 0; +while (<>) { + my $data = decode_json( $_ ); + my $label = "row[$row]"; + + dump_hash($label, $data); + $row++; +} + From 0224682306ec9d738688b69915c06ced80991443 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Fri, 13 Apr 2018 12:43:10 -0400 Subject: [PATCH 02/29] structured-logging: design document Signed-off-by: Jeff Hostetler --- .../technical/structured-logging.txt | 816 ++++++++++++++++++ 1 file changed, 816 insertions(+) create mode 100644 Documentation/technical/structured-logging.txt diff --git a/Documentation/technical/structured-logging.txt b/Documentation/technical/structured-logging.txt new file mode 100644 index 00000000000000..794c614432f53e --- /dev/null +++ b/Documentation/technical/structured-logging.txt @@ -0,0 +1,816 @@ +Structured Logging +================== + +Structured Logging (SLOG) is an optional feature to allow Git to +generate structured log data for executed commands. This includes +command line arguments, command run times, error codes and messages, +child process information, time spent in various critical functions, +and repository data-shape information. Data is written to a target +log file in JSON[1,2,3] format. + +SLOG is disabled by default. Several steps are required to enable it: + +1. Add the compile-time flag "STRUCTURED_LOGGING=1" when building git + to include the SLOG routines in the git executable. + +2. Set "slog.*" config settings[5] to enable SLOG in your repo. + + +Motivation +========== + +Git users may be faced with scenarios that are surprisingly slow or +produce unexpected results. And Git developers may have difficulty +reproducing these experiences. Structured logging allows users to +provide developers with additional usage, performance and error data +that can help diagnose and debug issues. + +Many Git hosting providers and users with many developers have bespoke +efforts to help troubleshoot problems; for example, command wrappers, +custom pre- and post-command hooks, and custom instrumentation of Git +code. These are inefficient and/or difficult to maintain. The goal +of SLOG is to provide this data as efficiently as possible. + +And having structured rather than free format log data, will help +developers with their analysis. + + +Background (Git Merge 2018 Barcelona) +===================================== + +Performance and/or error logging was discussed during the contributor's +summit in Barcelona. Here are the relevant notes from the meeting +minutes[6]. + +> Performance misc (Ævar) +> ----------------------- +> [...] +> - central error reporting for git +> - `git status` logging +> - git config that collects data, pushes to known endpoint with `git push` +> - pre_command and post_command hooks, for logs +> - `gvfs diagnose` that looks at packfiles, etc +> - detect BSODs, etc +> - Dropbox writes out json with index properties and command-line +> information for status/fetch/push, fork/execs external tool to upload +> - windows trace facility; would be nice to have cross-platform +> - would hosting providers care? +> - zipfile of logs to give when debugging +> - sanitizing data is harder +> - more in a company setting +> - fileshare to upload zipfile +> - most of the errors are proxy when they shouldn't, wrong proxy, proxy +> specific to particular URL; so upload endpoint wouldn't work +> - GIT_TRACE is supposed to be that (for proxy) +> - but we need more trace variables +> - series to make tracing cheaper +> - except that curl selects the proxy +> - trace should have an API, so it can call an executable +> - dump to .git/traces/... and everything else happens externally +> - tools like visual studio can't set GIT_TRACE, so +> - sourcetree has seen user environments where commands just take forever +> - third-party tools like perf/strace - could we be better leveraging those? +> - distribute turn-key solution to handout to collect more data? + + +A Quick Example +=============== + +Note: JSON pretty-printing is enabled in all of the examples shown in +this document. When pretty-printing is turned off, each event is +written on a single line. Pretty-printing is intended for debugging. +It should be turned off in production to make post-processing easier. + + $ git config slog.pretty + +Here is a quick example showing SLOG data for "git status". This +example has all optional features turned off. It contains 2 events. +The first is generated when the command started and the second when it +ended. + +{ + "event": "cmd_start", + "clock_us": 1530273550667800, + "pid": 107270, + "sid": "1530273550667800-107270", + "command": "status", + "argv": [ + "./git", + "status" + ] +} +{ + "event": "cmd_exit", + "clock_us": 1530273550680460, + "pid": 107270, + "sid": "1530273550667800-107270", + "command": "status", + "argv": [ + "./git", + "status" + ], + "result": { + "exit_code": 0, + "elapsed_core_us": 12573, + "elapsed_total_us": 12660 + }, + "version": { + "git": "2.18.0.rc0.83.gde7fb7c", + "slog": 0 + }, + "config": { + "slog": { + "detail": 0, + "timers": 0, + "aux": 0 + } + } +} + +Both events fields describing the event name, the event time (in +microseconds since the epoch), the OS process-id, a unique session-id +(described later), the normalized command name, and a vector of the +original command line arguments. + +The "cmd_exit" event additionally contains information about how the +process exited, the elapsed time, the version of git and the SLOG +format, and important SLOG configuration settings. + +The fields in the "cmd_start" event are replicated in the "cmd_exit" +event. This allows log file post-processors to operate in 2 modes: + +1. Just look at the "cmd_exit" events. This us useful if you just + want to analyze the command summary data. + +2. Look at the "cmd_start" and "cmd_exit" events as bracketing a time + span and examine the detailed activity between. For example, SLOG + can optionally generate "detail" events when spawning child + processes and those processes may themselves generate "cmd_start" + and "cmd_exit" events. The (top-level) "cmd_start" event serves as + the starting bracket of all of that activity. + + +Target Log File +=============== + +SLOG writes events to a log file. File logging works much like +GIT_TRACE where events are appended to a file on disk. + +Logging is enabled if the config variable "slog.path" is set to an +absolute pathname. + +As with GIT_TRACE, this file is local and private to the user's +system. Log file management and rotation is beyond the scope of the +SLOG effort. + +Similarly, if a user wants to provide this data to a developer, they +must explicitly make these log files available; SLOG does not +broadcast any of this information. It is up to the users of this +system to decide if any sensitive information should be sanitized, and +how to export the logs. + + +Comparison with GIT_TRACE +========================= + +SLOG is very similar to the existing GIT_TRACE[4] API because both +write event messages at various points during a command. However, +there are some fundamental differences that warrant it being +considered a separate feature rather than just another +GIT_TRACE_: + +1. GIT_TRACE events are unrelated, line-by-line logging. SLOG has + line-by-line events that show command progress and can serve as + structured debug messages. SLOG also supports accumulating summary + data (such as timers) that are automatically added to the final + `cmd_exit` event. + +2. GIT_TRACE events are unstructured free format printf-style messages + which makes post-processing difficult. SLOG events are written in + JSON and can be easily parsed using Perl, Python, and other tools. + +3. SLOG uses a well-defined API to build SLOG events containing + well-defined fields to make post-command analysis easier. + +4. It should be easier to filter/redact sensitive information from + SLOG data than from free form data. + +5. GIT_TRACE events are controlled by one or more global environment + variables which makes it awkward to selectively log some repos and + not others. SLOG events are controlled by a few configuration + settings[5]. Users (or system administrators) can configure + logging using repo-local or global config settings. + +6. GIT_TRACE events do not identify the git process. This makes it + difficult to associate all of events from a particular command. + Each SLOG event contains a session id to allow all events for a + command to be identified. + +7. Some git commands spawn child git commands. GIT_TRACE has no + mechanism to associate events from a child process with the parent + process. SLOG session ids allow child/parent relationships to be + tracked (even if there is an intermediate /bin/sh process between + them). + +8. GIT_TRACE supports logging to a file or stderr. SLOG only logs to + a file. + +9. Smashing SLOG into GIT_TRACE doesn't feel like a good fit. The 2 + APIs share nothing other than the concept that they write logging + data. + + +[1] http://json.org/ +[2] http://www.ietf.org/rfc/rfc7159.txt +[3] See UTF-8 limitations described in json-writer.h +[4] Documentation/technical/api-trace.txt +[5] See "slog.*" in Documentation/config.txt +[6] https://public-inbox.org/git/20180313004940.GG61720@google.com/t/ + + +SLOG Format (V0) +================ + +SLOG writes a series of events to the log target. Each event is a +self-describing JSON object. + + LF + LF + LF + ... + +Each event record in the log file is an independent and complete JSON +object. JSON parsers should process the file line-by-line rather than +trying to parse the entire file into a single object. + + Note: It may be difficult for parsers to find record boundaries if + pretty-printing is enabled, so it recommended that pretty-printing + only be enabled for interactive debugging and analysis. + +Every contains the following fields (F1): + + "event" : + "clock_us" : + "pid" : + "sid" : + + "command" : + "sub_command" : (optional) + + is one of "cmd_start", "cmd_end", or "detail". + + is the time of the event in microseconds since the epoch. + + is the process-id (from getpid()). + + is a session-id. (Described later) + + is a (possibly normalized) command name. This is + usually taken from the cmd_struct[] table after git parses the + command line and calls the appropriate cmd_() function. + Having it in a top-level field saves post-processors from having + to re-parse the command line to discover it. + + further qualifies the command. This field is + present for common commands that have multiple command modes. For + example, checkout can either change branches and do a full + checkout or it can checkout (refresh) an individual file. A + post-processor wanting to compute percentiles for the time spent + by branch-changing checkouts could easily filter out the + individual file checkouts (and without having to re-parse the + command line). + + The set of sub_command values are command-specific and are not + listed here. + +"event": "cmd_start" +------------------- + +The "cmd_start" event is emitted when git starts when cmd_main() is +called. In addition to the F1 fields, it contains the following +fields (F2): + + "argv" : + + is an array of the original command line arguments given to the + command (before git.c has a chance to remove the global options + before the verb. + + +"event": "cmd_exit" +------------------- + +The "cmd_exit" event is emitted immediately before git exits (during +an atexit() routine). It contains the F1 and F2 fields as described +above. It also contains the the following fields (F3): + + "result.exit_code" : + "result.errors" : (optional) + "result.elapsed_core_us" : + "result.elapsed_total_us" : + "result.signal" : (optional) + + "verion.git" : + "version.slog" : + + "config.slog.detail" : + "config.slog.timers" : + "config.slog.aux" : + "config.*.*" : (optional) + + "timers" : (optional) + "aux" : (optional) + + "child_summary" : (optional) + + is the value passed to exit() or returned from main(). + + is an array of messages passed to the die() + and error() functions. + + is the elapsed time from start until exit() + was called or main() returned. + + is the elapsed time from start until the slog + atexit routine was called. This time will include any time + required to shut down or wait for the pager to complete. + + is present if the command was stopped by a single, + such as a SIGPIPE when the pager is quit. + + is the git version number as reported by "git version". + + is the SLOG format version. + + are the values of the corresponding + "slog.{detail,timers,aux}" config setting. Since these values + control optional SLOG features and filtering, these are present + to help post-processors know if an expected event did not happen + or was simply filtered out. (Described later) + + is a place for developers to add additional + important config settings to the log. This is not intended as a + dumping ground for all config settings, but rather only ones that + might affect performance or allow A/B testing in production. + + is a structure of any SLOG timers used during the process. + (Described later) + + is a structure of any "aux data" generated during the process. + (Described later) + + is a structure summarizing child processes by class. + (Described later) + + +"event": "detail" and config setting "slog.detail" +-------------------------------------------------- + +The "detail" event is used to report progress and/or debug information +during a command. It is a line-by-line (rather than summary) event. +Like GIT_TRACE_, detail events are classified by "category" and +may be included or omitted based on the "slog.detail" config setting. + +Here are 3 example "detail" events: + +{ + "event": "detail", + "clock_us": 1530273485479387, + "pid": 107253, + "sid": "1530273485473820-107253", + "command": "status", + "detail": { + "category": "index", + "label": "lazy_init_name_hash", + "data": { + "cache_nr": 3269, + "elapsed_us": 195, + "dir_count": 0, + "dir_tablesize": 4096, + "name_count": 3269, + "name_tablesize": 4096 + } + } +} +{ + "event": "detail", + "clock_us": 1530283184051338, + "pid": 109679, + "sid": "1530283180782876-109679", + "command": "fetch", + "detail": { + "category": "child", + "label": "child_starting", + "data": { + "child_id": 3, + "git_cmd": true, + "use_shell": false, + "is_interactive": false, + "child_argv": [ + "gc", + "--auto" + ] + } + } +} +{ + "event": "detail", + "clock_us": 1530283184053158, + "pid": 109679, + "sid": "1530283180782876-109679", + "command": "fetch", + "detail": { + "category": "child", + "label": "child_ended", + "data": { + "child_id": 3, + "git_cmd": true, + "use_shell": false, + "is_interactive": false, + "child_argv": [ + "gc", + "--auto" + ], + "child_pid": 109684, + "child_exit_code": 0, + "child_elapsed_us": 1819 + } + } +} + +A detail event contains the common F1 described earlier. It also +contains 2 fixed fields and 1 variable field: + + "detail.category" : + "detail.label" : + "detail.data" : + + is the "category" name for the event. This is + similar to GIT_TRACE_. In the example above we have 1 + "index" and 2 "child" category events. + + If the config setting "slog.detail" is true or contains this + category name, the event will be generated. If "slog.detail" + is false, no detail events will be generated. + + $ git config slog.detail true + $ git config slog.detail child,index,status + $ git config slog.detail false + + is a descriptive label for the event. It may be the + name of a function or any meaningful value. + + is a JSON structure containing context-specific data for + the event. This replaces the need for printf-like trace messages. + + +Child Detail Events +------------------- + +Child detail events build upon the generic detail event and are used +to log information about spawned child processes. + +A "child_starting" detail event is generated immediately before +spawning a child process. + + "event" : "detail: + "detail.category" : "child" + "detail.label" : "child_starting" + + "detail.data.child_id" : + "detail.data.git_cmd" : + "detail.data.use_shell" : + "detail.data.is_interactive" : + "detail.data.child_class" : + "detail.data.child_argv" : + + is a simple integer id number for the child. This helps + match up the "child_starting" and "child_ended" detail events. + (The child's PID is not available until it is started.) + + is true if git will try to run the command as a git + command. The reported argv[0] for the child is probably a git + command verb rather than "git". + + is true if gill will try to use the shell to run the + command. + + is true if the child is considered interactive. + Editor and pager processes are considered interactive. + + is a classification for the child process, such as + "editor", "pager", and "shell". + + is the array of arguments to be passed to the child. + + +A "child_ended" detail event is generated after the child process +terminates and has been reaped. + + "event" : "detail: + "detail.category" : "child" + "detail.label" : "child_ended" + + "detail.data.child_id" : + "detail.data.git_cmd" : + "detail.data.use_shell" : + "detail.data.is_interactive" : + "detail.data.child_class" : + "detail.data.child_argv" : + + "detail.data.child_pid" : + "detail.data.child_exit_code" : + "detail.data.child_elapsed_us" : + + is the OS process-id for the child process. + + is the child's exit code. + + is the elapsed time in microseconds since the + "child_starting" event was generated. This is the observed time + the current process waited for the child to complete. This value + will be slightly larger than the value that the child process + reports for itself. + + +Child Summary Data +================================== + +If child processes are spawned, a summary is written to the "cmd_exit" +event. (This is written even if child detail events are disabled.) +The summary is grouped by child class (editor, pager, etc.) and contains +the number of child processes and their total elapsed time. + +For example: + +{ + "event": "cmd_exit", + ..., + "child_summary": { + "pager": { + "total_us": 14994045, + "count": 1 + } + } +} + + "child_summary..total_us" : + "child_summary..count" : + +Note that the total child time may exceed the elapsed time for the +git process because child processes may have been run in parallel. + + +Timers and config setting "slog.timers" +================================================ + +SLOG provides a stopwatch-like timer facility to easily instrument +small spans of code. These timers are automatically added to the +"cmd_exit" event. These are lighter weight than using explicit +"detail" events or git_trace_performance_since()-style messages. +Also, having timer data included in the "cmd_exit" event makes it +easier for some kinds of post-processing. + +For example: + +{ + "event": "cmd_exit", + ..., + "timers": { + "index": { + "do_read_index": { + "count": 1, + "total_us": 488 + }, + "preload": { + "count": 1, + "total_us": 2394 + } + }, + "status": { + "changes_index": { + "count": 1, + "total_us": 574 + }, + "untracked": { + "count": 1, + "total_us": 5877 + }, + "worktree": { + "count": 1, + "total_us": 92 + } + } + } +} + +Timers have a "category" and a "name". Timers may be enabled or +disabled by category (much like GIT_TRACE_). The "slog.timers" +config setting controls which timers are enabled. For example: + + $ git config --local slog.timers true + $ git config --local slog.timers index,status + $ git config --local slog.timers false + +Data for the enabled timers is written in the "cmd_exit" event under +the "timers" structure. They are grouped by category. Each timer +contains the total elapsed time and the number of times the timer was +started. Min, max, and average times are included if the timer was +started/stopped more than once. And "force_stop" flag is set if the +timer was still running when the command finished. + + "timers...count" : + "timers...total_us" : + "timers...min_us" : (optional) + "timers...max_us" : (optional) + "timers...avg_us" : (optional) + "timers...force_stop" : (optional) + + +Aux Data and config setting "slog.aux" +============================================ + +"Aux" data is intended as a generic container for context-specific +fields, such as information about the size or shape of the repository. +This data is automatically added to the "cmd_exit" event. This is +data is lighter weight than using explicit detail events and may make +some kinds of post-processing easier. + +For example: + +{ + "event": "cmd_exit", + ..., + "aux": { + "index": [ + [ + "cache_nr", + 3269 + ], + [ + "sparse_checkout_count", + 1 + ] + ] + } + + +This API adds additional key/value pairs to the "cmd_exit" summary +data. Value may be scalars or any JSON structure or array. + +Like detail events and timers, each key/value pair is associated with +a "category" (much like GIT_TRACE_). The "slog.aux" config +setting controls which pairs are written or omitted. For example: + + $ git config --local slog.aux true + $ git config --local slog.aux index + $ git config --local slog.aux false + +Aux data is written in the "cmd_exit" event under the "aux" structure +and are grouped by category. Each key/value pair is written as an +array rather than a structure to allow for duplicate keys. + + "aux." : [ , , ... ] + + is 2-element array of [ , ]. + + +Session-Id +======================= + +A session id (SID) is a cheap, unique-enough string to associate all +of the events generated by a single process. A child git process inherits +the SID of their parent git process and incorporates it into their SID. + +SIDs are constructed as: + + SID ::= '-' [ ':' ] + +This scheme is used rather than a simple PID or {PPID, PID} because +PIDs are recycled by the OS (after sufficient time). This also allows +a git child process to be associated with their git parent process +even when there is an intermediate shell process. + +Note: we could use UUIDs or GUIDs for this, but that seemed overkill +at this point. It also required platform-specific code to generate +which muddied up the code. + + +Detailed Example +================ + +Here is a longer example for `git status` with all optional settings +turned on: + + $ git config slog.detail true + $ git config slog.timers true + $ git config slog.aux true + + $ git status + + # A "cmd_start" event is written when the command starts. + +{ + "event": "cmd_start", + "clock_us": 1531499671154813, + "pid": 14667, + "sid": "1531499671154813-14667", + "command": "status", + "argv": [ + "./git", + "status" + ] +} + + # An example detail event was added to lazy_init_name_hash() to + # dump the size of the index and the resulting hash tables. + +{ + "event": "detail", + "clock_us": 1531499671161042, + "pid": 14667, + "sid": "1531499671154813-14667", + "command": "status", + "detail": { + "category": "index", + "label": "lazy_init_name_hash", + "data": { + "cache_nr": 3266, + "elapsed_us": 214, + "dir_count": 0, + "dir_tablesize": 4096, + "name_count": 3266, + "name_tablesize": 4096 + } + } +} + + # The "cmd_exit" event includes the command result and elapsed + # time and the various configuration settings. During the run + # "index" category timers were placed around the do_read_index() + # and "preload()" calls and various "status" category timers were + # placed around the 3 major parts of the status computation. + # Lastly, an "index" category "aux" data item was added to report + # the size of the index. + +{ + "event": "cmd_exit", + "clock_us": 1531499671168303, + "pid": 14667, + "sid": "1531499671154813-14667", + "command": "status", + "argv": [ + "./git", + "status" + ], + "result": { + "exit_code": 0, + "elapsed_core_us": 13488, + "elapsed_total_us": 13490 + }, + "version": { + "git": "2.18.0.26.gebaccfc", + "slog": 0 + }, + "config": { + "slog": { + "detail": 1, + "timers": 1, + "aux": 1 + } + }, + "timers": { + "index": { + "do_read_index": { + "count": 1, + "total_us": 553 + }, + "preload": { + "count": 1, + "total_us": 2892 + } + }, + "status": { + "changes_index": { + "count": 1, + "total_us": 778 + }, + "untracked": { + "count": 1, + "total_us": 6136 + }, + "worktree": { + "count": 1, + "total_us": 106 + } + } + }, + "aux": { + "index": [ + [ + "cache_nr", + 3266 + ] + ] + } +} From ae74377a5d10470e761704b084d4a4c4b4d84e66 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Tue, 19 Jun 2018 15:26:04 +0000 Subject: [PATCH 03/29] structured-logging: add STRUCTURED_LOGGING=1 to Makefile Teach the Makefile to take STRUCTURED_LOGGING=1 variable to compile in/out structured logging feature. Signed-off-by: Jeff Hostetler --- Makefile | 8 ++++++++ structured-logging.c | 9 +++++++++ structured-logging.h | 13 +++++++++++++ 3 files changed, 30 insertions(+) create mode 100644 structured-logging.c create mode 100644 structured-logging.h diff --git a/Makefile b/Makefile index 48a54e54f43918..12d502cd3936df 100644 --- a/Makefile +++ b/Makefile @@ -442,6 +442,8 @@ all:: # When cross-compiling, define HOST_CPU as the canonical name of the CPU on # which the built Git will run (for instance "x86_64"). # +# Define STRUCTURED_LOGGING if you want structured logging to be available. +# # Define RUNTIME_PREFIX to configure Git to resolve its ancillary tooling and # support files relative to the location of the runtime binary, rather than # hard-coding them into the binary. Git installations built with RUNTIME_PREFIX @@ -959,6 +961,7 @@ LIB_OBJS += split-index.o LIB_OBJS += strbuf.o LIB_OBJS += streaming.o LIB_OBJS += string-list.o +LIB_OBJS += structured-logging.o LIB_OBJS += submodule.o LIB_OBJS += submodule-config.o LIB_OBJS += sub-process.o @@ -1334,6 +1337,10 @@ ifdef ZLIB_PATH endif EXTLIBS += -lz +ifdef STRUCTURED_LOGGING + BASIC_CFLAGS += -DSTRUCTURED_LOGGING +endif + ifndef NO_OPENSSL OPENSSL_LIBSSL = -lssl ifdef OPENSSLDIR @@ -2551,6 +2558,7 @@ GIT-BUILD-OPTIONS: FORCE @echo TAR=\''$(subst ','\'',$(subst ','\'',$(TAR)))'\' >>$@+ @echo NO_CURL=\''$(subst ','\'',$(subst ','\'',$(NO_CURL)))'\' >>$@+ @echo NO_EXPAT=\''$(subst ','\'',$(subst ','\'',$(NO_EXPAT)))'\' >>$@+ + @echo STRUCTURED_LOGGING=\''$(subst ','\'',$(subst ','\'',$(STRUCTURED_LOGGING)))'\' >>$@+ @echo USE_LIBPCRE1=\''$(subst ','\'',$(subst ','\'',$(USE_LIBPCRE1)))'\' >>$@+ @echo USE_LIBPCRE2=\''$(subst ','\'',$(subst ','\'',$(USE_LIBPCRE2)))'\' >>$@+ @echo NO_LIBPCRE1_JIT=\''$(subst ','\'',$(subst ','\'',$(NO_LIBPCRE1_JIT)))'\' >>$@+ diff --git a/structured-logging.c b/structured-logging.c new file mode 100644 index 00000000000000..702fd8484c4e96 --- /dev/null +++ b/structured-logging.c @@ -0,0 +1,9 @@ +#if !defined(STRUCTURED_LOGGING) +/* + * Structured logging is not available. + * Stub out all API routines. + */ + +#else + +#endif diff --git a/structured-logging.h b/structured-logging.h new file mode 100644 index 00000000000000..c9e8c1db916ce9 --- /dev/null +++ b/structured-logging.h @@ -0,0 +1,13 @@ +#ifndef STRUCTURED_LOGGING_H +#define STRUCTURED_LOGGING_H + +#if !defined(STRUCTURED_LOGGING) +/* + * Structured logging is not available. + * Stub out all API routines. + */ + +#else + +#endif /* STRUCTURED_LOGGING */ +#endif /* STRUCTURED_LOGGING_H */ From 6cc6a2cc4332b378dcb3a330b1c9bc9847d10123 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Tue, 19 Jun 2018 20:06:51 +0000 Subject: [PATCH 04/29] structured-logging: add structured logging framework Teach git to optionally generate structured logging data in JSON using the json-writer API. "cmd_start" and "cmd_end" events are generated. Structured logging is only available when git is built with STRUCTURED_LOGGING=1. Structured logging is only enabled when the config setting "slog.path" is set to an absolute pathname. Signed-off-by: Jeff Hostetler --- Documentation/config.txt | 8 + compat/mingw.h | 7 + config.c | 3 + git-compat-util.h | 9 + git.c | 8 +- structured-logging.c | 366 +++++++++++++++++++++++++++++++++++++++ structured-logging.h | 82 +++++++++ usage.c | 4 + 8 files changed, 486 insertions(+), 1 deletion(-) diff --git a/Documentation/config.txt b/Documentation/config.txt index 12ef9c3c5d77da..7f38c106b8f81d 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -3269,6 +3269,14 @@ showbranch.default:: The default set of branches for linkgit:git-show-branch[1]. See linkgit:git-show-branch[1]. +slog.path:: + (EXPERIMENTAL) Enable structured logging to a file. This must be + an absolute path. (Git must be compiled with STRUCTURED_LOGGING=1.) + +slog.pretty:: + (EXPERIMENTAL) Pretty-print structured log data when true. + (Git must be compiled with STRUCTURED_LOGGING=1.) + splitIndex.maxPercentChange:: When the split index feature is used, this specifies the percent of entries the split index can contain compared to the diff --git a/compat/mingw.h b/compat/mingw.h index 0614b4c81fa3dc..ba28d674e4641f 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -146,8 +146,15 @@ static inline int fcntl(int fd, int cmd, ...) errno = EINVAL; return -1; } + /* bash cannot reliably detect negative return codes as failure */ +#if defined(STRUCTURED_LOGGING) +#include "structured-logging.h" +#define exit(code) exit(slog_exit_code((code) & 0xff)) +#else #define exit(code) exit((code) & 0xff) +#endif + #define sigemptyset(x) (void)0 static inline int sigaddset(sigset_t *set, int signum) { return 0; } diff --git a/config.c b/config.c index 3d0519859a657c..a5ce3de87d9f0f 100644 --- a/config.c +++ b/config.c @@ -1488,6 +1488,9 @@ int git_default_config(const char *var, const char *value, void *cb) return 0; } + if (starts_with(var, "slog.")) + return slog_default_config(var, value); + /* Add other config variables here and to Documentation/config.txt. */ return 0; } diff --git a/git-compat-util.h b/git-compat-util.h index 3e186e6974108a..c4d0d851cfaad1 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -1316,4 +1316,13 @@ extern void unleak_memory(const void *ptr, size_t len); #define UNLEAK(var) do {} while (0) #endif +#include "structured-logging.h" +#if defined(STRUCTURED_LOGGING) && !defined(exit) +/* + * Intercept all calls to exit() so that exit-code can be included + * in the "cmd_exit" message written by the at-exit routine. + */ +#define exit(code) exit(slog_exit_code(code)) +#endif + #endif diff --git a/git.c b/git.c index 65b1c2e68bbcd2..88ca8290d21be1 100644 --- a/git.c +++ b/git.c @@ -475,6 +475,7 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv) die("pre-command hook aborted command"); trace_argv_printf(argv, "trace: built-in: git"); + slog_set_command_name(p->cmd); /* * Validate the state of the cache entries in the index before and @@ -802,7 +803,7 @@ static int run_argv(int *argcp, const char ***argv) return done_alias; } -int cmd_main(int argc, const char **argv) +static int real_cmd_main(int argc, const char **argv) { const char *cmd; int done_help = 0; @@ -892,3 +893,8 @@ int cmd_main(int argc, const char **argv) return 1; } + +int cmd_main(int argc, const char **argv) +{ + return slog_wrap_main(real_cmd_main, argc, argv); +} diff --git a/structured-logging.c b/structured-logging.c index 702fd8484c4e96..afa2224349dd51 100644 --- a/structured-logging.c +++ b/structured-logging.c @@ -1,3 +1,10 @@ +#include "cache.h" +#include "config.h" +#include "version.h" +#include "json-writer.h" +#include "sigchain.h" +#include "argv-array.h" + #if !defined(STRUCTURED_LOGGING) /* * Structured logging is not available. @@ -6,4 +13,363 @@ #else +#define SLOG_VERSION 0 + +static uint64_t my__start_time; +static uint64_t my__exit_time; +static int my__is_config_loaded; +static int my__is_enabled; +static int my__is_pretty; +static int my__signal; +static int my__exit_code; +static int my__pid; +static int my__wrote_start_event; +static int my__log_fd = -1; + +static char *my__log_path; +static char *my__command_name; +static char *my__sub_command_name; + +static struct argv_array my__argv = ARGV_ARRAY_INIT; +static struct json_writer my__errors = JSON_WRITER_INIT; + +/* + * Write a single event to the structured log file. + */ +static void emit_event(struct json_writer *jw, const char *event_name) +{ + if (my__log_fd == -1) { + my__log_fd = open(my__log_path, + O_WRONLY | O_APPEND | O_CREAT, + 0644); + if (my__log_fd == -1) { + warning("slog: could not open '%s' for logging: %s", + my__log_path, strerror(errno)); + my__is_enabled = 0; + return; + } + } + + /* + * A properly authored JSON string does not have a final NL + * (even when pretty-printing is enabled). Structured logging + * output should look like a series of terminated forms one + * per line. Temporarily append a NL to the buffer so that + * the disk write happens atomically. + */ + strbuf_addch(&jw->json, '\n'); + if (write(my__log_fd, jw->json.buf, jw->json.len) != jw->json.len) + warning("slog: could not write event '%s': %s", + event_name, strerror(errno)); + + strbuf_setlen(&jw->json, jw->json.len - 1); +} + +static void emit_start_event(void) +{ + struct json_writer jw = JSON_WRITER_INIT; + + /* build "cmd_start" event message */ + jw_object_begin(&jw, my__is_pretty); + { + jw_object_string(&jw, "event", "cmd_start"); + jw_object_intmax(&jw, "clock_us", (intmax_t)my__start_time); + jw_object_intmax(&jw, "pid", (intmax_t)my__pid); + + if (my__command_name && *my__command_name) + jw_object_string(&jw, "command", my__command_name); + if (my__sub_command_name && *my__sub_command_name) + jw_object_string(&jw, "sub_command", my__sub_command_name); + + jw_object_inline_begin_array(&jw, "argv"); + { + int k; + for (k = 0; k < my__argv.argc; k++) + jw_array_string(&jw, my__argv.argv[k]); + } + jw_end(&jw); + } + jw_end(&jw); + + emit_event(&jw, "cmd_start"); + jw_release(&jw); + + my__wrote_start_event = 1; +} + +static void emit_exit_event(void) +{ + struct json_writer jw = JSON_WRITER_INIT; + uint64_t atexit_time = getnanotime() / 1000; + + /* close unterminated forms */ + if (my__errors.json.len) + jw_end(&my__errors); + + /* build "cmd_exit" event message */ + jw_object_begin(&jw, my__is_pretty); + { + jw_object_string(&jw, "event", "cmd_exit"); + jw_object_intmax(&jw, "clock_us", (intmax_t)atexit_time); + jw_object_intmax(&jw, "pid", (intmax_t)my__pid); + + if (my__command_name && *my__command_name) + jw_object_string(&jw, "command", my__command_name); + if (my__sub_command_name && *my__sub_command_name) + jw_object_string(&jw, "sub_command", my__sub_command_name); + + jw_object_inline_begin_array(&jw, "argv"); + { + int k; + for (k = 0; k < my__argv.argc; k++) + jw_array_string(&jw, my__argv.argv[k]); + } + jw_end(&jw); + + jw_object_inline_begin_object(&jw, "result"); + { + jw_object_intmax(&jw, "exit_code", my__exit_code); + if (my__errors.json.len) + jw_object_sub_jw(&jw, "errors", &my__errors); + + if (my__signal) + jw_object_intmax(&jw, "signal", my__signal); + + if (my__exit_time > 0) + jw_object_intmax(&jw, "elapsed_core_us", + my__exit_time - my__start_time); + + jw_object_intmax(&jw, "elapsed_total_us", + atexit_time - my__start_time); + } + jw_end(&jw); + + jw_object_inline_begin_object(&jw, "version"); + { + jw_object_string(&jw, "git", git_version_string); + jw_object_intmax(&jw, "slog", SLOG_VERSION); + } + jw_end(&jw); + } + jw_end(&jw); + + emit_event(&jw, "cmd_exit"); + jw_release(&jw); +} + +static int cfg_path(const char *key, const char *value) +{ + if (is_absolute_path(value)) { + my__log_path = xstrdup(value); + my__is_enabled = 1; + } else { + warning("'%s' must be an absolute path: '%s'", + key, value); + } + + return 0; +} + +static int cfg_pretty(const char *key, const char *value) +{ + my__is_pretty = git_config_bool(key, value); + return 0; +} + +int slog_default_config(const char *key, const char *value) +{ + const char *sub; + + /* + * git_default_config() calls slog_default_config() with "slog.*" + * k/v pairs. git_default_config() MAY or MAY NOT be called when + * cmd_() calls git_config(). + * + * Remember if we've ever been called. + */ + my__is_config_loaded = 1; + + if (skip_prefix(key, "slog.", &sub)) { + if (!strcmp(sub, "path")) + return cfg_path(key, value); + if (!strcmp(sub, "pretty")) + return cfg_pretty(key, value); + } + + return 0; +} + +static int lazy_load_config_cb(const char *key, const char * value, void *data) +{ + return slog_default_config(key, value); +} + +/* + * If cmd_() did not cause slog_default_config() to be called + * during git_config(), we try to lookup our config settings the first + * time we actually need them. + * + * (We do this rather than using read_early_config() at initialization + * because we want any "-c key=value" arguments to be included.) + */ +static inline void lazy_load_config(void) +{ + if (my__is_config_loaded) + return; + my__is_config_loaded = 1; + + read_early_config(lazy_load_config_cb, NULL); +} + +int slog_is_enabled(void) +{ + lazy_load_config(); + + return my__is_enabled; +} + +static void do_final_steps(int in_signal) +{ + static int completed = 0; + + if (completed) + return; + completed = 1; + + if (slog_is_enabled()) { + if (!my__wrote_start_event) + emit_start_event(); + emit_exit_event(); + my__is_enabled = 0; + } + + if (my__log_fd != -1) + close(my__log_fd); + free(my__log_path); + free(my__command_name); + free(my__sub_command_name); + argv_array_clear(&my__argv); + jw_release(&my__errors); +} + +static void slog_atexit(void) +{ + do_final_steps(0); +} + +static void slog_signal(int signo) +{ + my__signal = signo; + + do_final_steps(1); + + sigchain_pop(signo); + raise(signo); +} + +static void intern_argv(int argc, const char **argv) +{ + int k; + + for (k = 0; k < argc; k++) + argv_array_push(&my__argv, argv[k]); +} + +/* + * Collect basic startup information before cmd_main() has a chance + * to alter the command line and before we have seen the config (to + * know if logging is enabled). And since the config isn't loaded + * until cmd_main() dispatches to cmd_(), we have to wait + * and lazy-write the "cmd_start" event. + * + * This also implies that commands such as "help" and "version" that + * don't need load the config won't generate any log data. + */ +static void initialize(int argc, const char **argv) +{ + my__start_time = getnanotime() / 1000; + my__pid = getpid(); + + intern_argv(argc, argv); + + atexit(slog_atexit); + + /* + * Put up backstop signal handler to ensure we get the "cmd_exit" + * event. This is primarily for when the pager throws SIGPIPE + * when the user quits. + */ + sigchain_push(SIGPIPE, slog_signal); +} + +int slog_wrap_main(slog_fn_main_t fn_main, int argc, const char **argv) +{ + int result; + + initialize(argc, argv); + result = fn_main(argc, argv); + slog_exit_code(result); + + return result; +} + +void slog_set_command_name(const char *command_name) +{ + /* + * Capture the command name even if logging is not enabled + * because we don't know if the config has been loaded yet by + * the cmd_() and/or it may be too early to force a + * lazy load. + */ + if (my__command_name) + free(my__command_name); + my__command_name = xstrdup(command_name); +} + +void slog_set_sub_command_name(const char *sub_command_name) +{ + /* + * Capture the sub-command name even if logging is not enabled + * because we don't know if the config has been loaded yet by + * the cmd_() and/or it may be too early to force a + * lazy load. + */ + if (my__sub_command_name) + free(my__sub_command_name); + my__sub_command_name = xstrdup(sub_command_name); +} + +int slog_is_pretty(void) +{ + return my__is_pretty; +} + +int slog_exit_code(int exit_code) +{ + my__exit_time = getnanotime() / 1000; + my__exit_code = exit_code; + + return exit_code; +} + +void slog_error_message(const char *prefix, const char *fmt, va_list params) +{ + struct strbuf em = STRBUF_INIT; + va_list copy_params; + + if (prefix && *prefix) + strbuf_addstr(&em, prefix); + + va_copy(copy_params, params); + strbuf_vaddf(&em, fmt, copy_params); + va_end(copy_params); + + if (!my__errors.json.len) + jw_array_begin(&my__errors, my__is_pretty); + jw_array_string(&my__errors, em.buf); + /* leave my__errors array unterminated for now */ + + strbuf_release(&em); +} + #endif diff --git a/structured-logging.h b/structured-logging.h index c9e8c1db916ce9..61e98e65a66649 100644 --- a/structured-logging.h +++ b/structured-logging.h @@ -1,13 +1,95 @@ #ifndef STRUCTURED_LOGGING_H #define STRUCTURED_LOGGING_H +typedef int (*slog_fn_main_t)(int, const char **); + #if !defined(STRUCTURED_LOGGING) /* * Structured logging is not available. * Stub out all API routines. */ +#define slog_is_available() (0) +#define slog_default_config(k, v) (0) +#define slog_wrap_main(real_cmd_main, argc, argv) ((real_cmd_main)((argc), (argv))) +#define slog_set_command_name(n) do { } while (0) +#define slog_set_sub_command_name(n) do { } while (0) +#define slog_is_enabled() (0) +#define slog_is_pretty() (0) +#define slog_exit_code(exit_code) (exit_code) +#define slog_error_message(prefix, fmt, params) do { } while (0) #else +/* + * Is structured logging available (compiled-in)? + */ +#define slog_is_available() (1) + +/* + * Process "slog.*" config settings. + */ +int slog_default_config(const char *key, const char *value); + +/* + * Wrapper for the "real" cmd_main(). Initialize structured logging if + * enabled, run the given real_cmd_main(), and capture the return value. + * + * Note: common-main.c is shared by many top-level commands. + * common-main.c:main() does common process setup before calling + * the version of cmd_main() found in the executable. Some commands + * SHOULD NOT do logging (such as t/helper/test-tool). Ones that do + * need some common initialization/teardown. + * + * Use this function for any top-level command that should do logging. + * + * Usage: + * + * static int real_cmd_main(int argc, const char **argv) + * { + * ....the actual code for the command.... + * } + * + * int cmd_main(int argc, const char **argv) + * { + * return slog_wrap_main(real_cmd_main, argc, argv); + * } + * + * + * See git.c for an example. + */ +int slog_wrap_main(slog_fn_main_t real_cmd_main, int argc, const char **argv); + +/* + * Record a canonical command name and optional sub-command name for the + * current process. For example, "checkout" and "switch-branch". + */ +void slog_set_command_name(const char *name); +void slog_set_sub_command_name(const char *name); + +/* + * Is structured logging enabled? + */ +int slog_is_enabled(void); + +/* + * Is JSON pretty-printing enabled? + */ +int slog_is_pretty(void); + +/* + * Register the process exit code with the structured logging layer + * and return it. This value will appear in the final "cmd_exit" event. + * + * Use this to wrap all calls to exit(). + * Use this before returning in main(). + */ +int slog_exit_code(int exit_code); + +/* + * Append formatted error message to the structured log result. + * Messages from this will appear in the final "cmd_exit" event. + */ +void slog_error_message(const char *prefix, const char *fmt, va_list params); + #endif /* STRUCTURED_LOGGING */ #endif /* STRUCTURED_LOGGING_H */ diff --git a/usage.c b/usage.c index cc803336bd5e67..5d48f6b629a7bd 100644 --- a/usage.c +++ b/usage.c @@ -27,12 +27,16 @@ static NORETURN void usage_builtin(const char *err, va_list params) static NORETURN void die_builtin(const char *err, va_list params) { + slog_error_message("fatal: ", err, params); + vreportf("fatal: ", err, params); exit(128); } static void error_builtin(const char *err, va_list params) { + slog_error_message("error: ", err, params); + vreportf("error: ", err, params); } From 16b346e04e5de99fd0d46d1bf1e7dca5ae86d02a Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Wed, 20 Jun 2018 18:12:49 +0000 Subject: [PATCH 05/29] structured-logging: add session-id to log events Teach git to create a unique session id (SID) during structured logging initialization and use that SID in all log events. This SID is exported into a transient environment variable and inherited by child processes. This allows git child processes to be related back to the parent git process event if there are intermediate /bin/sh processes between them. Update t0001 to ignore the environment variable GIT_SLOG_PARENT_SID. Signed-off-by: Jeff Hostetler --- Documentation/git.txt | 6 +++++ structured-logging.c | 52 +++++++++++++++++++++++++++++++++++++++++++ t/t0001-init.sh | 1 + 3 files changed, 59 insertions(+) diff --git a/Documentation/git.txt b/Documentation/git.txt index 54aab669b2f858..013d4dd2764c95 100644 --- a/Documentation/git.txt +++ b/Documentation/git.txt @@ -775,6 +775,12 @@ standard output. adequate and support for it is likely to be removed in the foreseeable future (along with the variable). +`GIT_SLOG_PARENT_SID`:: + (Experimental) A transient environment variable set by top-level + Git commands and inherited by child Git commands. It contains + a session id that will be written the structured logging output + to help associate child and parent processes. + Discussion[[Discussion]] ------------------------ diff --git a/structured-logging.c b/structured-logging.c index afa2224349dd51..289140feeb05cc 100644 --- a/structured-logging.c +++ b/structured-logging.c @@ -31,8 +31,56 @@ static char *my__command_name; static char *my__sub_command_name; static struct argv_array my__argv = ARGV_ARRAY_INIT; +static struct strbuf my__session_id = STRBUF_INIT; static struct json_writer my__errors = JSON_WRITER_INIT; +/* + * Compute a new session id for the current process. Build string + * with the start time and PID of the current process and append + * the inherited session id from our parent process (if present). + * The parent session id may include its parent session id. + * + * sid := '-' [ ':' [ ... ] ] + */ +static void compute_our_sid(void) +{ + const char *parent_sid; + + if (my__session_id.len) + return; + + /* + * A "session id" (SID) is a cheap, unique-enough string to + * associate child process with the hierarchy of invoking git + * processes. + * + * This is stronger than a simple parent-pid because we may + * have an intermediate shell between a top-level Git command + * and a child Git command. It also isolates from issues + * about how the OS recycles PIDs. + * + * This could be a UUID/GUID, but that is overkill for our + * needs here and more expensive to compute. + * + * Consumers should consider this an unordered opaque string + * in case we decide to switch to a real UUID in the future. + */ + strbuf_addf(&my__session_id, "%"PRIuMAX"-%"PRIdMAX, + (uintmax_t)my__start_time, (intmax_t)my__pid); + + parent_sid = getenv("GIT_SLOG_PARENT_SID"); + if (parent_sid && *parent_sid) { + strbuf_addch(&my__session_id, ':'); + strbuf_addstr(&my__session_id, parent_sid); + } + + /* + * Install our SID into the environment for our child processes + * to inherit. + */ + setenv("GIT_SLOG_PARENT_SID", my__session_id.buf, 1); +} + /* * Write a single event to the structured log file. */ @@ -75,6 +123,7 @@ static void emit_start_event(void) jw_object_string(&jw, "event", "cmd_start"); jw_object_intmax(&jw, "clock_us", (intmax_t)my__start_time); jw_object_intmax(&jw, "pid", (intmax_t)my__pid); + jw_object_string(&jw, "sid", my__session_id.buf); if (my__command_name && *my__command_name) jw_object_string(&jw, "command", my__command_name); @@ -112,6 +161,7 @@ static void emit_exit_event(void) jw_object_string(&jw, "event", "cmd_exit"); jw_object_intmax(&jw, "clock_us", (intmax_t)atexit_time); jw_object_intmax(&jw, "pid", (intmax_t)my__pid); + jw_object_string(&jw, "sid", my__session_id.buf); if (my__command_name && *my__command_name) jw_object_string(&jw, "command", my__command_name); @@ -250,6 +300,7 @@ static void do_final_steps(int in_signal) free(my__sub_command_name); argv_array_clear(&my__argv); jw_release(&my__errors); + strbuf_release(&my__session_id); } static void slog_atexit(void) @@ -289,6 +340,7 @@ static void initialize(int argc, const char **argv) { my__start_time = getnanotime() / 1000; my__pid = getpid(); + compute_our_sid(); intern_argv(argc, argv); diff --git a/t/t0001-init.sh b/t/t0001-init.sh index ceb07e7d7a835f..70179f33e29e66 100755 --- a/t/t0001-init.sh +++ b/t/t0001-init.sh @@ -92,6 +92,7 @@ test_expect_success 'No extra GIT_* on alias scripts' ' env | sed -n \ -e "/^GIT_PREFIX=/d" \ + -e "/^GIT_SLOG_PARENT_SID=/d" \ -e "/^GIT_TEXTDOMAINDIR=/d" \ -e "/^GIT_/s/=.*//p" | sort From 8e71b14487d70bfb2aad3f0d9426438eb3b0d89a Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Wed, 20 Jun 2018 19:33:24 +0000 Subject: [PATCH 06/29] structured-logging: set sub_command field for branch command Set sub-command field for the various forms of the branch command. Signed-off-by: Jeff Hostetler --- builtin/branch.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/builtin/branch.c b/builtin/branch.c index 5217ba3bdebc22..fba516f9e5fa7d 100644 --- a/builtin/branch.c +++ b/builtin/branch.c @@ -689,10 +689,12 @@ int cmd_branch(int argc, const char **argv, const char *prefix) setup_auto_pager("branch", 1); if (delete) { + slog_set_sub_command_name("delete"); if (!argc) die(_("branch name required")); return delete_branches(argc, argv, delete > 1, filter.kind, quiet); } else if (list) { + slog_set_sub_command_name("list"); /* git branch --local also shows HEAD when it is detached */ if ((filter.kind & FILTER_REFS_BRANCHES) && filter.detached) filter.kind |= FILTER_REFS_DETACHED_HEAD; @@ -716,6 +718,7 @@ int cmd_branch(int argc, const char **argv, const char *prefix) const char *branch_name; struct strbuf branch_ref = STRBUF_INIT; + slog_set_sub_command_name("edit"); if (!argc) { if (filter.detached) die(_("Cannot give description to detached HEAD")); @@ -741,6 +744,7 @@ int cmd_branch(int argc, const char **argv, const char *prefix) if (edit_branch_description(branch_name)) return 1; } else if (copy) { + slog_set_sub_command_name("copy"); if (!argc) die(_("branch name required")); else if (argc == 1) @@ -750,6 +754,7 @@ int cmd_branch(int argc, const char **argv, const char *prefix) else die(_("too many branches for a copy operation")); } else if (rename) { + slog_set_sub_command_name("rename"); if (!argc) die(_("branch name required")); else if (argc == 1) @@ -761,6 +766,7 @@ int cmd_branch(int argc, const char **argv, const char *prefix) } else if (new_upstream) { struct branch *branch = branch_get(argv[0]); + slog_set_sub_command_name("new_upstream"); if (argc > 1) die(_("too many arguments to set new upstream")); @@ -784,6 +790,7 @@ int cmd_branch(int argc, const char **argv, const char *prefix) struct branch *branch = branch_get(argv[0]); struct strbuf buf = STRBUF_INIT; + slog_set_sub_command_name("unset_upstream"); if (argc > 1) die(_("too many arguments to unset upstream")); @@ -806,6 +813,7 @@ int cmd_branch(int argc, const char **argv, const char *prefix) } else if (argc > 0 && argc <= 2) { struct branch *branch = branch_get(argv[0]); + slog_set_sub_command_name("create"); if (!branch) die(_("no such branch '%s'"), argv[0]); From 37a4792dbddea281c2600ee9eff624be9f1443ff Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Wed, 20 Jun 2018 19:47:54 +0000 Subject: [PATCH 07/29] structured-logging: set sub_command field for checkout command Signed-off-by: Jeff Hostetler --- builtin/checkout.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/builtin/checkout.c b/builtin/checkout.c index 48bb958adf8aba..efd24bb0932fd7 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -254,6 +254,8 @@ static int checkout_paths(const struct checkout_opts *opts, int errs = 0; struct lock_file lock_file = LOCK_INIT; + slog_set_sub_command_name(opts->patch_mode ? "patch" : "path"); + if (opts->track != BRANCH_TRACK_UNSPECIFIED) die(_("'%s' cannot be used with updating paths"), "--track"); @@ -925,6 +927,9 @@ static int switch_branches(const struct checkout_opts *opts, void *path_to_free; struct object_id rev; int flag, writeout_error = 0; + + slog_set_sub_command_name("switch_branch"); + memset(&old_branch_info, 0, sizeof(old_branch_info)); old_branch_info.path = path_to_free = resolve_refdup("HEAD", 0, &rev, &flag); if (old_branch_info.path) @@ -1136,6 +1141,8 @@ static int switch_unborn_to_new_branch(const struct checkout_opts *opts) int status; struct strbuf branch_ref = STRBUF_INIT; + slog_set_sub_command_name("switch_unborn_to_new_branch"); + if (!opts->new_branch) die(_("You are on a branch yet to be born")); strbuf_addf(&branch_ref, "refs/heads/%s", opts->new_branch); From ae9ddd10d4e8653761696260f55355ffa6f933d3 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Tue, 19 Jun 2018 21:21:01 +0000 Subject: [PATCH 08/29] structured-logging: t0420 basic tests Add structured logging prereq definition "SLOG" to test-lib.sh. Create t0420 test script with some basic tests. Signed-off-by: Jeff Hostetler --- t/t0420-structured-logging.sh | 143 ++++++++++++++++++++++++++++++++++ t/t0420/parse_json.perl | 52 +++++++++++++ t/test-lib.sh | 1 + 3 files changed, 196 insertions(+) create mode 100755 t/t0420-structured-logging.sh create mode 100644 t/t0420/parse_json.perl diff --git a/t/t0420-structured-logging.sh b/t/t0420-structured-logging.sh new file mode 100755 index 00000000000000..a594af321b574f --- /dev/null +++ b/t/t0420-structured-logging.sh @@ -0,0 +1,143 @@ +#!/bin/sh + +test_description='structured logging tests' + +. ./test-lib.sh + +if ! test_have_prereq SLOG +then + skip_all='skipping structured logging tests' + test_done +fi + +LOGFILE=$TRASH_DIRECTORY/test.log + +test_expect_success 'setup' ' + test_commit hello && + cat >key_cmd_start <<-\EOF && + "event":"cmd_start" + EOF + cat >key_cmd_exit <<-\EOF && + "event":"cmd_exit" + EOF + cat >key_exit_code_0 <<-\EOF && + "exit_code":0 + EOF + cat >key_exit_code_129 <<-\EOF && + "exit_code":129 + EOF + git config --local slog.pretty false && + git config --local slog.path "$LOGFILE" +' + +test_expect_success 'basic events' ' + test_when_finished "rm \"$LOGFILE\"" && + git status >/dev/null && + grep -f key_cmd_start "$LOGFILE" && + grep -f key_cmd_exit "$LOGFILE" && + grep -f key_exit_code_0 "$LOGFILE" +' + +test_expect_success 'basic error code and message' ' + test_when_finished "rm \"$LOGFILE\" event_exit" && + test_expect_code 129 git status --xyzzy >/dev/null 2>/dev/null && + grep -f key_cmd_exit "$LOGFILE" >event_exit && + grep -f key_exit_code_129 event_exit && + grep "\"errors\":" event_exit +' + +test_lazy_prereq PERLJSON ' + perl -MJSON -e "exit 0" +' + +# Let perl parse the resulting JSON and dump it out. +# +# Since the output contains PIDs, SIDs, clock values, and the full path to +# git[.exe] we cannot have a HEREDOC with the expected result, so we look +# for a few key fields. +# +test_expect_success PERLJSON 'parse JSON for basic command' ' + test_when_finished "rm \"$LOGFILE\" event_exit" && + git status >/dev/null && + + grep -f key_cmd_exit "$LOGFILE" >event_exit && + + perl "$TEST_DIRECTORY"/t0420/parse_json.perl parsed_exit && + + grep "row\[0\]\.version\.slog 0" /dev/null && + git branch --all >/dev/null && + git branch new_branch >/dev/null && + + grep -f key_cmd_exit "$LOGFILE" >event_exit && + + perl "$TEST_DIRECTORY"/t0420/parse_json.perl parsed_exit && + + grep "row\[0\]\.version\.slog 0" /dev/null && + git checkout master >/dev/null && + git checkout -- hello.t >/dev/null && + + grep -f key_cmd_exit "$LOGFILE" >event_exit && + + perl "$TEST_DIRECTORY"/t0420/parse_json.perl parsed_exit && + + grep "row\[0\]\.version\.slog 0" 0) ? "$label_in.$k" : "$k"; + my $value = $obj{$k}; + + dump_item($label, $value); + } +} + +sub dump_item { + my ($label_in, $value) = @_; + if (ref($value) eq 'ARRAY') { + print "$label_in array\n"; + dump_array($label_in, $value); + } elsif (ref($value) eq 'HASH') { + print "$label_in hash\n"; + dump_hash($label_in, $value); + } elsif (defined $value) { + print "$label_in $value\n"; + } else { + print "$label_in null\n"; + } +} + +my $row = 0; +while (<>) { + my $data = decode_json( $_ ); + my $label = "row[$row]"; + + dump_hash($label, $data); + $row++; +} + diff --git a/t/test-lib.sh b/t/test-lib.sh index fe886a53fbd3e8..8c1691fc6e1fbb 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -1104,6 +1104,7 @@ test -n "$USE_LIBPCRE1$USE_LIBPCRE2" && test_set_prereq PCRE test -n "$USE_LIBPCRE1" && test_set_prereq LIBPCRE1 test -n "$USE_LIBPCRE2" && test_set_prereq LIBPCRE2 test -z "$NO_GETTEXT" && test_set_prereq GETTEXT +test -z "$STRUCTURED_LOGGING" || test_set_prereq SLOG # Can we rely on git's output in the C locale? if test -n "$GETTEXT_POISON" From 00d1eaec65a5846bdfc209eb4182962ec81f4150 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 21 Jun 2018 15:35:53 +0000 Subject: [PATCH 09/29] structured-logging: add detail-event facility Add a generic "detail-event" to structured logging. This can be used to emit context-specific events for performance or debugging purposes. These are conceptually similar to the various GIT_TRACE_ messages. Signed-off-by: Jeff Hostetler --- Documentation/config.txt | 13 ++++++ structured-logging.c | 95 ++++++++++++++++++++++++++++++++++++++++ structured-logging.h | 16 +++++++ 3 files changed, 124 insertions(+) diff --git a/Documentation/config.txt b/Documentation/config.txt index 7f38c106b8f81d..6789db7d77442f 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -3277,6 +3277,19 @@ slog.pretty:: (EXPERIMENTAL) Pretty-print structured log data when true. (Git must be compiled with STRUCTURED_LOGGING=1.) +slog.detail:: + (EXPERIMENTAL) May be set to a boolean value or a list of comma + separated tokens. Controls which categories of optional "detail" + events are generated. Default to off. This is conceptually + similar to the different GIT_TRACE_ values. ++ +Detail events are generic events with a context-specific payload. This +may represent a single function call or a section of performance sensitive +code. ++ +This is intended to be an extendable facility where new events can easily +be added (possibly only for debugging or performance testing purposes). + splitIndex.maxPercentChange:: When the split index feature is used, this specifies the percent of entries the split index can contain compared to the diff --git a/structured-logging.c b/structured-logging.c index 289140feeb05cc..9cbf3bd4429d68 100644 --- a/structured-logging.c +++ b/structured-logging.c @@ -34,6 +34,34 @@ static struct argv_array my__argv = ARGV_ARRAY_INIT; static struct strbuf my__session_id = STRBUF_INIT; static struct json_writer my__errors = JSON_WRITER_INIT; +struct category_filter +{ + char *categories; + int want; +}; + +static struct category_filter my__detail_categories; + +static void set_want_categories(struct category_filter *cf, const char *value) +{ + FREE_AND_NULL(cf->categories); + + cf->want = git_parse_maybe_bool(value); + if (cf->want == -1) + cf->categories = xstrdup(value); +} + +static int want_category(const struct category_filter *cf, const char *category) +{ + if (cf->want == 0 || cf->want == 1) + return cf->want; + + if (!category || !*category) + return 0; + + return !!strstr(cf->categories, category); +} + /* * Compute a new session id for the current process. Build string * with the start time and PID of the current process and append @@ -207,6 +235,40 @@ static void emit_exit_event(void) jw_release(&jw); } +static void emit_detail_event(const char *category, const char *label, + const struct json_writer *data) +{ + struct json_writer jw = JSON_WRITER_INIT; + uint64_t clock_us = getnanotime() / 1000; + + /* build "detail" event */ + jw_object_begin(&jw, my__is_pretty); + { + jw_object_string(&jw, "event", "detail"); + jw_object_intmax(&jw, "clock_us", (intmax_t)clock_us); + jw_object_intmax(&jw, "pid", (intmax_t)my__pid); + jw_object_string(&jw, "sid", my__session_id.buf); + + if (my__command_name && *my__command_name) + jw_object_string(&jw, "command", my__command_name); + if (my__sub_command_name && *my__sub_command_name) + jw_object_string(&jw, "sub_command", my__sub_command_name); + + jw_object_inline_begin_object(&jw, "detail"); + { + jw_object_string(&jw, "category", category); + jw_object_string(&jw, "label", label); + if (data) + jw_object_sub_jw(&jw, "data", data); + } + jw_end(&jw); + } + jw_end(&jw); + + emit_event(&jw, "detail"); + jw_release(&jw); +} + static int cfg_path(const char *key, const char *value) { if (is_absolute_path(value)) { @@ -226,6 +288,12 @@ static int cfg_pretty(const char *key, const char *value) return 0; } +static int cfg_detail(const char *key, const char *value) +{ + set_want_categories(&my__detail_categories, value); + return 0; +} + int slog_default_config(const char *key, const char *value) { const char *sub; @@ -244,6 +312,8 @@ int slog_default_config(const char *key, const char *value) return cfg_path(key, value); if (!strcmp(sub, "pretty")) return cfg_pretty(key, value); + if (!strcmp(sub, "detail")) + return cfg_detail(key, value); } return 0; @@ -424,4 +494,29 @@ void slog_error_message(const char *prefix, const char *fmt, va_list params) strbuf_release(&em); } +int slog_want_detail_event(const char *category) +{ + return want_category(&my__detail_categories, category); +} + +void slog_emit_detail_event(const char *category, const char *label, + const struct json_writer *data) +{ + if (!my__wrote_start_event) + emit_start_event(); + + if (!slog_want_detail_event(category)) + return; + + if (!category || !*category) + BUG("no category for slog.detail event"); + if (!label || !*label) + BUG("no label for slog.detail event"); + if (data && !jw_is_terminated(data)) + BUG("unterminated slog.detail data: '%s' '%s' '%s'", + category, label, data->json.buf); + + emit_detail_event(category, label, data); +} + #endif diff --git a/structured-logging.h b/structured-logging.h index 61e98e65a66649..01ae55d95ec606 100644 --- a/structured-logging.h +++ b/structured-logging.h @@ -1,6 +1,8 @@ #ifndef STRUCTURED_LOGGING_H #define STRUCTURED_LOGGING_H +struct json_writer; + typedef int (*slog_fn_main_t)(int, const char **); #if !defined(STRUCTURED_LOGGING) @@ -17,6 +19,8 @@ typedef int (*slog_fn_main_t)(int, const char **); #define slog_is_pretty() (0) #define slog_exit_code(exit_code) (exit_code) #define slog_error_message(prefix, fmt, params) do { } while (0) +#define slog_want_detail_event(category) (0) +#define slog_emit_detail_event(category, label, data) do { } while (0) #else @@ -91,5 +95,17 @@ int slog_exit_code(int exit_code); */ void slog_error_message(const char *prefix, const char *fmt, va_list params); +/* + * Is detail logging enabled for this category? + */ +int slog_want_detail_event(const char *category); + +/* + * Write a detail event. + */ + +void slog_emit_detail_event(const char *category, const char *label, + const struct json_writer *data); + #endif /* STRUCTURED_LOGGING */ #endif /* STRUCTURED_LOGGING_H */ From 286c36431b450c7488685742e2b1e9f1d53f74a4 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 21 Jun 2018 15:42:32 +0000 Subject: [PATCH 10/29] structured-logging: add detail-event for lazy_init_name_hash Teach git to generate a structured logging detail-event for lazy_init_name_hash(). This is marked as an "index" category event and includes time and size data for the hashmaps. Signed-off-by: Jeff Hostetler --- name-hash.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/name-hash.c b/name-hash.c index 163849831c9f11..939b26aea32637 100644 --- a/name-hash.c +++ b/name-hash.c @@ -7,6 +7,8 @@ */ #define NO_THE_INDEX_COMPATIBILITY_MACROS #include "cache.h" +#include "json-writer.h" +#include "structured-logging.h" struct dir_entry { struct hashmap_entry ent; @@ -603,6 +605,30 @@ static void lazy_init_name_hash(struct index_state *istate) istate->name_hash_initialized = 1; trace_performance_since(start, "initialize name hash"); + + if (slog_want_detail_event("index")) { + struct json_writer jw = JSON_WRITER_INIT; + uint64_t now_ns = getnanotime(); + uint64_t elapsed_us = (now_ns - start) / 1000; + + jw_object_begin(&jw, slog_is_pretty()); + { + jw_object_intmax(&jw, "cache_nr", istate->cache_nr); + jw_object_intmax(&jw, "elapsed_us", elapsed_us); + jw_object_intmax(&jw, "dir_count", + hashmap_get_size(&istate->dir_hash)); + jw_object_intmax(&jw, "dir_tablesize", + istate->dir_hash.tablesize); + jw_object_intmax(&jw, "name_count", + hashmap_get_size(&istate->name_hash)); + jw_object_intmax(&jw, "name_tablesize", + istate->name_hash.tablesize); + } + jw_end(&jw); + + slog_emit_detail_event("index", "lazy_init_name_hash", &jw); + jw_release(&jw); + } } /* From ae3fb49fde22a53e47f3352f1db315a57c12c905 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 21 Jun 2018 17:57:28 +0000 Subject: [PATCH 11/29] structured-logging: add timer facility Add timer facility to structured logging. This allows stopwatch-like operations over the life of the git process. Timer data is summarized in the "cmd_exit" event. Signed-off-by: Jeff Hostetler --- Documentation/config.txt | 6 ++ structured-logging.c | 180 +++++++++++++++++++++++++++++++++++++++ structured-logging.h | 19 +++++ 3 files changed, 205 insertions(+) diff --git a/Documentation/config.txt b/Documentation/config.txt index 6789db7d77442f..9b8c99d40b7083 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -3290,6 +3290,12 @@ code. This is intended to be an extendable facility where new events can easily be added (possibly only for debugging or performance testing purposes). +slog.timers:: + (EXPERIMENTAL) May be set to a boolean value or a list of comma + separated tokens. Controls which categories of SLOG timers are + enabled. Defaults to off. Data for enabled timers is added to + the `cmd_exit` event. + splitIndex.maxPercentChange:: When the split index feature is used, this specifies the percent of entries the split index can contain compared to the diff --git a/structured-logging.c b/structured-logging.c index 9cbf3bd4429d68..215138c52b8286 100644 --- a/structured-logging.c +++ b/structured-logging.c @@ -15,6 +15,26 @@ #define SLOG_VERSION 0 +struct timer_data { + char *category; + char *name; + uint64_t total_ns; + uint64_t min_ns; + uint64_t max_ns; + uint64_t start_ns; + int count; + int started; +}; + +struct timer_data_array { + struct timer_data **array; + size_t nr, alloc; +}; + +static struct timer_data_array my__timers; +static void format_timers(struct json_writer *jw); +static void free_timers(void); + static uint64_t my__start_time; static uint64_t my__exit_time; static int my__is_config_loaded; @@ -41,6 +61,7 @@ struct category_filter }; static struct category_filter my__detail_categories; +static struct category_filter my__timer_categories; static void set_want_categories(struct category_filter *cf, const char *value) { @@ -228,6 +249,12 @@ static void emit_exit_event(void) jw_object_intmax(&jw, "slog", SLOG_VERSION); } jw_end(&jw); + + if (my__timers.nr) { + jw_object_inline_begin_object(&jw, "timers"); + format_timers(&jw); + jw_end(&jw); + } } jw_end(&jw); @@ -294,6 +321,12 @@ static int cfg_detail(const char *key, const char *value) return 0; } +static int cfg_timers(const char *key, const char *value) +{ + set_want_categories(&my__timer_categories, value); + return 0; +} + int slog_default_config(const char *key, const char *value) { const char *sub; @@ -314,6 +347,8 @@ int slog_default_config(const char *key, const char *value) return cfg_pretty(key, value); if (!strcmp(sub, "detail")) return cfg_detail(key, value); + if (!strcmp(sub, "timers")) + return cfg_timers(key, value); } return 0; @@ -371,6 +406,7 @@ static void do_final_steps(int in_signal) argv_array_clear(&my__argv); jw_release(&my__errors); strbuf_release(&my__session_id); + free_timers(); } static void slog_atexit(void) @@ -519,4 +555,148 @@ void slog_emit_detail_event(const char *category, const char *label, emit_detail_event(category, label, data); } +int slog_start_timer(const char *category, const char *name) +{ + int k; + struct timer_data *td; + + if (!want_category(&my__timer_categories, category)) + return SLOG_UNDEFINED_TIMER_ID; + if (!name || !*name) + return SLOG_UNDEFINED_TIMER_ID; + + for (k = 0; k < my__timers.nr; k++) { + td = my__timers.array[k]; + if (!strcmp(category, td->category) && !strcmp(name, td->name)) + goto start_timer; + } + + td = xcalloc(1, sizeof(struct timer_data)); + td->category = xstrdup(category); + td->name = xstrdup(name); + td->min_ns = UINT64_MAX; + + ALLOC_GROW(my__timers.array, my__timers.nr + 1, my__timers.alloc); + my__timers.array[my__timers.nr++] = td; + +start_timer: + if (td->started) + BUG("slog.timer '%s:%s' already started", + td->category, td->name); + + td->start_ns = getnanotime(); + td->started = 1; + + return k; +} + +static void stop_timer(struct timer_data *td) +{ + uint64_t delta_ns = getnanotime() - td->start_ns; + + td->count++; + td->total_ns += delta_ns; + if (delta_ns < td->min_ns) + td->min_ns = delta_ns; + if (delta_ns > td->max_ns) + td->max_ns = delta_ns; + td->started = 0; +} + +void slog_stop_timer(int tid) +{ + struct timer_data *td; + + if (tid == SLOG_UNDEFINED_TIMER_ID) + return; + if (tid >= my__timers.nr || tid < 0) + BUG("Invalid slog.timer id '%d'", tid); + + td = my__timers.array[tid]; + if (!td->started) + BUG("slog.timer '%s:%s' not started", td->category, td->name); + + stop_timer(td); +} + +static int sort_timers_cb(const void *a, const void *b) +{ + struct timer_data *td_a = *(struct timer_data **)a; + struct timer_data *td_b = *(struct timer_data **)b; + int r; + + r = strcmp(td_a->category, td_b->category); + if (r) + return r; + return strcmp(td_a->name, td_b->name); +} + +static void format_a_timer(struct json_writer *jw, struct timer_data *td, + int force_stop) +{ + jw_object_inline_begin_object(jw, td->name); + { + jw_object_intmax(jw, "count", td->count); + jw_object_intmax(jw, "total_us", td->total_ns / 1000); + if (td->count > 1) { + uint64_t avg_ns = td->total_ns / td->count; + + jw_object_intmax(jw, "min_us", td->min_ns / 1000); + jw_object_intmax(jw, "max_us", td->max_ns / 1000); + jw_object_intmax(jw, "avg_us", avg_ns / 1000); + } + if (force_stop) + jw_object_true(jw, "force_stop"); + } + jw_end(jw); +} + +static void format_timers(struct json_writer *jw) +{ + const char *open_category = NULL; + int k; + + QSORT(my__timers.array, my__timers.nr, sort_timers_cb); + + for (k = 0; k < my__timers.nr; k++) { + struct timer_data *td = my__timers.array[k]; + int force_stop = td->started; + + if (force_stop) + stop_timer(td); + + if (!open_category) { + jw_object_inline_begin_object(jw, td->category); + open_category = td->category; + } + else if (strcmp(open_category, td->category)) { + jw_end(jw); + jw_object_inline_begin_object(jw, td->category); + open_category = td->category; + } + + format_a_timer(jw, td, force_stop); + } + + if (open_category) + jw_end(jw); +} + +static void free_timers(void) +{ + int k; + + for (k = 0; k < my__timers.nr; k++) { + struct timer_data *td = my__timers.array[k]; + + free(td->category); + free(td->name); + free(td); + } + + FREE_AND_NULL(my__timers.array); + my__timers.nr = 0; + my__timers.alloc = 0; +} + #endif diff --git a/structured-logging.h b/structured-logging.h index 01ae55d95ec606..a29aa6e762af44 100644 --- a/structured-logging.h +++ b/structured-logging.h @@ -5,6 +5,8 @@ struct json_writer; typedef int (*slog_fn_main_t)(int, const char **); +#define SLOG_UNDEFINED_TIMER_ID (-1) + #if !defined(STRUCTURED_LOGGING) /* * Structured logging is not available. @@ -21,6 +23,8 @@ typedef int (*slog_fn_main_t)(int, const char **); #define slog_error_message(prefix, fmt, params) do { } while (0) #define slog_want_detail_event(category) (0) #define slog_emit_detail_event(category, label, data) do { } while (0) +#define slog_start_timer(category, name) (SLOG_UNDEFINED_TIMER_ID) +static inline void slog_stop_timer(int tid) { }; #else @@ -107,5 +111,20 @@ int slog_want_detail_event(const char *category); void slog_emit_detail_event(const char *category, const char *label, const struct json_writer *data); +/* + * Define and start or restart a structured logging timer. Stats for the + * timer will be added to the "cmd_exit" event. Use a timer when you are + * interested in the net time of an operation (such as part of a computation + * in a loop) but don't want a detail event for each iteration. + * + * Returns a timer id. + */ +int slog_start_timer(const char *category, const char *name); + +/* + * Stop the timer. + */ +void slog_stop_timer(int tid); + #endif /* STRUCTURED_LOGGING */ #endif /* STRUCTURED_LOGGING_H */ From ea9ec284b6b7b63325a2c1b1c01bc662a55c2025 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 21 Jun 2018 18:02:24 +0000 Subject: [PATCH 12/29] structured-logging: add timer around do_read_index Use a SLOG timer to record the time spent in do_read_index() and report it in the "cmd_exit" event. Signed-off-by: Jeff Hostetler --- read-cache.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/read-cache.c b/read-cache.c index 09baf7b864af44..69c95cbe13f4d8 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1992,6 +1992,7 @@ static void freshen_shared_index(const char *shared_index, int warn) int read_index_from(struct index_state *istate, const char *path, const char *gitdir) { + int slog_tid; uint64_t start = getnanotime(); struct split_index *split_index; int ret; @@ -2002,7 +2003,9 @@ int read_index_from(struct index_state *istate, const char *path, if (istate->initialized) return istate->cache_nr; + slog_tid = slog_start_timer("index", "do_read_index"); ret = do_read_index(istate, path, 0); + slog_stop_timer(slog_tid); trace_performance_since(start, "read cache %s", path); split_index = istate->split_index; @@ -2018,7 +2021,9 @@ int read_index_from(struct index_state *istate, const char *path, base_oid_hex = oid_to_hex(&split_index->base_oid); base_path = xstrfmt("%s/sharedindex.%s", gitdir, base_oid_hex); + slog_tid = slog_start_timer("index", "do_read_index"); ret = do_read_index(split_index->base, base_path, 1); + slog_stop_timer(slog_tid); if (oidcmp(&split_index->base_oid, &split_index->base->oid)) die("broken index, expect %s in %s, got %s", base_oid_hex, base_path, From d9a494f46959d5216ef3ea813b9c0429d3bae958 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 21 Jun 2018 18:10:19 +0000 Subject: [PATCH 13/29] structured-logging: add timer around do_write_index Use a SLOG timer to record the time spend in do_write_index() and report it in the "cmd_exit" event. Signed-off-by: Jeff Hostetler --- read-cache.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/read-cache.c b/read-cache.c index 69c95cbe13f4d8..1bc952ece09ac6 100644 --- a/read-cache.c +++ b/read-cache.c @@ -2563,7 +2563,9 @@ static int commit_locked_index(struct lock_file *lk) static int do_write_locked_index(struct index_state *istate, struct lock_file *lock, unsigned flags) { + int slog_timer = slog_start_timer("index", "do_write_index"); int ret = do_write_index(istate, lock->tempfile, 0); + slog_stop_timer(slog_timer); if (ret) return ret; if (flags & COMMIT_LOCK) @@ -2644,11 +2646,14 @@ static int clean_shared_index_files(const char *current_hex) static int write_shared_index(struct index_state *istate, struct tempfile **temp) { + int slog_tid = SLOG_UNDEFINED_TIMER_ID; struct split_index *si = istate->split_index; int ret; move_cache_to_base_index(istate); + slog_tid = slog_start_timer("index", "do_write_index"); ret = do_write_index(si->base, *temp, 1); + slog_stop_timer(slog_tid); if (ret) return ret; ret = adjust_shared_perm(get_tempfile_path(*temp)); From 6d80034375439f8a9be4280164cdc760dcc2e727 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 21 Jun 2018 18:27:54 +0000 Subject: [PATCH 14/29] structured-logging: add timer around wt-status functions Use a SLOG timer to record the time spend in wt_status_collect_worktree(), wt_status_collect_changes_initial(), wt_status_collect_changes_index(), and wt_status_collect_untracked(). These are reported in the "cmd_exit" event. Signed-off-by: Jeff Hostetler --- wt-status.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/wt-status.c b/wt-status.c index 78b3b0edfd0f2e..4118383e51c75b 100644 --- a/wt-status.c +++ b/wt-status.c @@ -582,8 +582,11 @@ static void wt_status_collect_updated_cb(struct diff_queue_struct *q, static void wt_status_collect_changes_worktree(struct wt_status *s) { + int slog_tid; struct rev_info rev; + slog_tid = slog_start_timer("status", "worktree"); + init_revisions(&rev, NULL); setup_revisions(0, NULL, &rev, NULL); rev.diffopt.output_format |= DIFF_FORMAT_CALLBACK; @@ -602,13 +605,18 @@ static void wt_status_collect_changes_worktree(struct wt_status *s) rev.diffopt.rename_score = s->rename_score >= 0 ? s->rename_score : rev.diffopt.rename_score; copy_pathspec(&rev.prune_data, &s->pathspec); run_diff_files(&rev, 0); + + slog_stop_timer(slog_tid); } static void wt_status_collect_changes_index(struct wt_status *s) { + int slog_tid; struct rev_info rev; struct setup_revision_opt opt; + slog_tid = slog_start_timer("status", "changes_index"); + init_revisions(&rev, NULL); memset(&opt, 0, sizeof(opt)); opt.def = s->is_initial ? empty_tree_oid_hex() : s->reference; @@ -638,12 +646,17 @@ static void wt_status_collect_changes_index(struct wt_status *s) rev.diffopt.rename_score = s->rename_score >= 0 ? s->rename_score : rev.diffopt.rename_score; copy_pathspec(&rev.prune_data, &s->pathspec); run_diff_index(&rev, 1); + + slog_stop_timer(slog_tid); } static void wt_status_collect_changes_initial(struct wt_status *s) { + int slog_tid; int i; + slog_tid = slog_start_timer("status", "changes_initial"); + for (i = 0; i < active_nr; i++) { struct string_list_item *it; struct wt_status_change_data *d; @@ -674,10 +687,13 @@ static void wt_status_collect_changes_initial(struct wt_status *s) oidcpy(&d->oid_index, &ce->oid); } } + + slog_stop_timer(slog_tid); } static void wt_status_collect_untracked(struct wt_status *s) { + int slog_tid; int i; struct dir_struct dir; uint64_t t_begin = getnanotime(); @@ -685,6 +701,8 @@ static void wt_status_collect_untracked(struct wt_status *s) if (!s->show_untracked_files) return; + slog_tid = slog_start_timer("status", "untracked"); + memset(&dir, 0, sizeof(dir)); if (s->show_untracked_files != SHOW_ALL_UNTRACKED_FILES) dir.flags |= @@ -727,6 +745,8 @@ static void wt_status_collect_untracked(struct wt_status *s) if (advice_status_u_option) s->untracked_in_ms = (getnanotime() - t_begin) / 1000000; + + slog_stop_timer(slog_tid); } void wt_status_collect(struct wt_status *s) From 0592b361dc028ffa94de876f666743cda0a41b49 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 21 Jun 2018 18:33:29 +0000 Subject: [PATCH 15/29] structured-logging: add timer around preload_index Use a SLOG timer to record the time spend in preload_index() and report it in the "cmd_exit" event. Signed-off-by: Jeff Hostetler --- preload-index.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/preload-index.c b/preload-index.c index 354d1df0dbf9b3..70c58b17a39146 100644 --- a/preload-index.c +++ b/preload-index.c @@ -116,8 +116,14 @@ void preload_index(struct index_state *index, const struct pathspec *pathspec) int read_index_preload(struct index_state *index, const struct pathspec *pathspec) { + int slog_tid; int retval = read_index(index); + slog_tid = slog_start_timer("index", "preload"); + preload_index(index, pathspec); + + slog_stop_timer(slog_tid); + return retval; } From 782a1499a596db5dc42d346de37131e91fa9332a Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Tue, 26 Jun 2018 18:23:33 +0000 Subject: [PATCH 16/29] structured-logging: t0420 tests for timers Signed-off-by: Jeff Hostetler --- t/t0420-structured-logging.sh | 48 +++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/t/t0420-structured-logging.sh b/t/t0420-structured-logging.sh index a594af321b574f..37c7e8337b1835 100755 --- a/t/t0420-structured-logging.sh +++ b/t/t0420-structured-logging.sh @@ -140,4 +140,52 @@ test_expect_success PERLJSON 'parse JSON for checkout command' ' grep "row\[2\]\.sub_command path" /dev/null && + + grep -f key_cmd_exit "$LOGFILE" >event_exit && + + perl "$TEST_DIRECTORY"/t0420/parse_json.perl parsed_exit && + + grep "row\[0\]\.version\.slog 0" /dev/null && + + grep -f key_cmd_exit "$LOGFILE" >event_exit && + + perl "$TEST_DIRECTORY"/t0420/parse_json.perl parsed_exit && + + grep "row\[0\]\.version\.slog 0" Date: Thu, 21 Jun 2018 19:26:17 +0000 Subject: [PATCH 17/29] structured-logging: add aux-data facility Add facility to add extra data to the structured logging data allowing arbitrary key/value pair data to be added to the "cmd_exit" event. Signed-off-by: Jeff Hostetler --- Documentation/config.txt | 6 ++ structured-logging.c | 116 +++++++++++++++++++++++++++++++++++++++ structured-logging.h | 21 +++++++ 3 files changed, 143 insertions(+) diff --git a/Documentation/config.txt b/Documentation/config.txt index 9b8c99d40b7083..a9d4d27f7abbc9 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -3296,6 +3296,12 @@ slog.timers:: enabled. Defaults to off. Data for enabled timers is added to the `cmd_exit` event. +slog.aux:: + (EXPERIMENTAL) May be set to a boolean value or a list of + comma separated tokens. Controls which categories of SLOG + "aux" data are enabled. Defaults to off. "Aux" data is added + to the `cmd_exit` event. + splitIndex.maxPercentChange:: When the split index feature is used, this specifies the percent of entries the split index can contain compared to the diff --git a/structured-logging.c b/structured-logging.c index 215138c52b8286..584f70a82f0dec 100644 --- a/structured-logging.c +++ b/structured-logging.c @@ -35,6 +35,19 @@ static struct timer_data_array my__timers; static void format_timers(struct json_writer *jw); static void free_timers(void); +struct aux_data { + char *category; + struct json_writer jw; +}; + +struct aux_data_array { + struct aux_data **array; + size_t nr, alloc; +}; + +static struct aux_data_array my__aux_data; +static void format_and_free_aux_data(struct json_writer *jw); + static uint64_t my__start_time; static uint64_t my__exit_time; static int my__is_config_loaded; @@ -62,6 +75,7 @@ struct category_filter static struct category_filter my__detail_categories; static struct category_filter my__timer_categories; +static struct category_filter my__aux_categories; static void set_want_categories(struct category_filter *cf, const char *value) { @@ -255,6 +269,12 @@ static void emit_exit_event(void) format_timers(&jw); jw_end(&jw); } + + if (my__aux_data.nr) { + jw_object_inline_begin_object(&jw, "aux"); + format_and_free_aux_data(&jw); + jw_end(&jw); + } } jw_end(&jw); @@ -327,6 +347,12 @@ static int cfg_timers(const char *key, const char *value) return 0; } +static int cfg_aux(const char *key, const char *value) +{ + set_want_categories(&my__aux_categories, value); + return 0; +} + int slog_default_config(const char *key, const char *value) { const char *sub; @@ -349,6 +375,8 @@ int slog_default_config(const char *key, const char *value) return cfg_detail(key, value); if (!strcmp(sub, "timers")) return cfg_timers(key, value); + if (!strcmp(sub, "aux")) + return cfg_aux(key, value); } return 0; @@ -699,4 +727,92 @@ static void free_timers(void) my__timers.alloc = 0; } +int slog_want_aux(const char *category) +{ + return want_category(&my__aux_categories, category); +} + +static struct aux_data *find_aux_data(const char *category) +{ + struct aux_data *ad; + int k; + + if (!slog_want_aux(category)) + return NULL; + + for (k = 0; k < my__aux_data.nr; k++) { + ad = my__aux_data.array[k]; + if (!strcmp(category, ad->category)) + return ad; + } + + ad = xcalloc(1, sizeof(struct aux_data)); + ad->category = xstrdup(category); + + jw_array_begin(&ad->jw, my__is_pretty); + /* leave per-category object unterminated for now */ + + ALLOC_GROW(my__aux_data.array, my__aux_data.nr + 1, my__aux_data.alloc); + my__aux_data.array[my__aux_data.nr++] = ad; + + return ad; +} + +#define add_to_aux(c, k, v, fn) \ + do { \ + struct aux_data *ad = find_aux_data((c)); \ + if (ad) { \ + jw_array_inline_begin_array(&ad->jw); \ + { \ + jw_array_string(&ad->jw, (k)); \ + (fn)(&ad->jw, (v)); \ + } \ + jw_end(&ad->jw); \ + } \ + } while (0) + +void slog_aux_string(const char *category, const char *key, const char *value) +{ + add_to_aux(category, key, value, jw_array_string); +} + +void slog_aux_intmax(const char *category, const char *key, intmax_t value) +{ + add_to_aux(category, key, value, jw_array_intmax); +} + +void slog_aux_bool(const char *category, const char *key, int value) +{ + add_to_aux(category, key, value, jw_array_bool); +} + +void slog_aux_jw(const char *category, const char *key, + const struct json_writer *value) +{ + add_to_aux(category, key, value, jw_array_sub_jw); +} + +static void format_and_free_aux_data(struct json_writer *jw) +{ + int k; + + for (k = 0; k < my__aux_data.nr; k++) { + struct aux_data *ad = my__aux_data.array[k]; + + /* terminate per-category form */ + jw_end(&ad->jw); + + /* insert per-category form into containing "aux" form */ + jw_object_sub_jw(jw, ad->category, &ad->jw); + + jw_release(&ad->jw); + free(ad->category); + free(ad); + } + + FREE_AND_NULL(my__aux_data.array); + my__aux_data.nr = 0; + my__aux_data.alloc = 0; +} + #endif diff --git a/structured-logging.h b/structured-logging.h index a29aa6e762af44..227259819e378c 100644 --- a/structured-logging.h +++ b/structured-logging.h @@ -25,6 +25,11 @@ typedef int (*slog_fn_main_t)(int, const char **); #define slog_emit_detail_event(category, label, data) do { } while (0) #define slog_start_timer(category, name) (SLOG_UNDEFINED_TIMER_ID) static inline void slog_stop_timer(int tid) { }; +#define slog_want_aux(c) (0) +#define slog_aux_string(c, k, v) do { } while (0) +#define slog_aux_intmax(c, k, v) do { } while (0) +#define slog_aux_bool(c, k, v) do { } while (0) +#define slog_aux_jw(c, k, v) do { } while (0) #else @@ -126,5 +131,21 @@ int slog_start_timer(const char *category, const char *name); */ void slog_stop_timer(int tid); +/* + * Add arbitrary extra key/value data to the "cmd_exit" event. + * These fields will appear under the "aux" object. This is + * intended for "interesting" config values or repo stats, such + * as the size of the index. + * + * These key/value pairs are written as an array-pair rather than + * an object/value because the keys may be repeated. + */ +int slog_want_aux(const char *category); +void slog_aux_string(const char *category, const char *key, const char *value); +void slog_aux_intmax(const char *category, const char *key, intmax_t value); +void slog_aux_bool(const char *category, const char *key, int value); +void slog_aux_jw(const char *category, const char *key, + const struct json_writer *value); + #endif /* STRUCTURED_LOGGING */ #endif /* STRUCTURED_LOGGING_H */ From 3f09ea0884ece1816085bd7024a6c6955d08a584 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 21 Jun 2018 19:28:15 +0000 Subject: [PATCH 18/29] structured-logging: add aux-data for index size Teach do_read_index() and do_write_index() to record the size of the index in aux-data. This will be reported in the "cmd_exit" event. Signed-off-by: Jeff Hostetler --- read-cache.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/read-cache.c b/read-cache.c index 1bc952ece09ac6..cb8c2130990ffa 100644 --- a/read-cache.c +++ b/read-cache.c @@ -2008,6 +2008,8 @@ int read_index_from(struct index_state *istate, const char *path, slog_stop_timer(slog_tid); trace_performance_since(start, "read cache %s", path); + slog_aux_intmax("index", "cache_nr", istate->cache_nr); + split_index = istate->split_index; if (!split_index || is_null_oid(&split_index->base_oid)) { post_read_index_from(istate); @@ -2029,6 +2031,8 @@ int read_index_from(struct index_state *istate, const char *path, base_oid_hex, base_path, oid_to_hex(&split_index->base->oid)); + slog_aux_intmax("index", "split_index_cache_nr", split_index->base->cache_nr); + freshen_shared_index(base_path, 0); merge_base_index(istate); post_read_index_from(istate); From 76bf16f3714b161fe48903faadd4589b0c755d2b Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 21 Jun 2018 19:39:49 +0000 Subject: [PATCH 19/29] structured-logging: add aux-data for size of sparse-checkout file Teach unpack_trees() to record the number of entries in the sparse-checkout file in the aux-data. This will be reported in the "cmd_exit" event. Signed-off-by: Jeff Hostetler --- unpack-trees.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/unpack-trees.c b/unpack-trees.c index 17ebd3c8e26640..f3539c0e25a1ea 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1303,12 +1303,15 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options if (!o->skip_sparse_checkout) { if (core_virtualfilesystem) { o->el = ⪙ + slog_aux_intmax("index", "sparse_checkout_count", el.nr); } else { char *sparse = git_pathdup("info/sparse-checkout"); if (add_excludes_from_file_to_list(sparse, "", 0, &el, NULL) < 0) o->skip_sparse_checkout = 1; - else + else { o->el = ⪙ + slog_aux_intmax("index", "sparse_checkout_count", el.nr); + } free(sparse); } } From c17dd26b28242a662d9cbd00c6ffe4c7faab0ba1 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Tue, 26 Jun 2018 18:50:53 +0000 Subject: [PATCH 20/29] structured-logging: t0420 tests for aux-data Signed-off-by: Jeff Hostetler --- t/t0420-structured-logging.sh | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/t/t0420-structured-logging.sh b/t/t0420-structured-logging.sh index 37c7e8337b1835..2e06cd79af77e5 100755 --- a/t/t0420-structured-logging.sh +++ b/t/t0420-structured-logging.sh @@ -188,4 +188,37 @@ test_expect_success PERLJSON 'turn on index timers only' ' test_expect_code 1 grep "row\[0\]\.timers\.status\.untracked\.total_us" .git/info/sparse-checkout && + git config --local core.sparsecheckout true && + git config --local slog.aux foo,index,bar && + rm -f "$LOGFILE" && + + git checkout HEAD && + + grep -f key_cmd_exit "$LOGFILE" >event_exit && + + perl "$TEST_DIRECTORY"/t0420/parse_json.perl parsed_exit && + + grep "row\[0\]\.version\.slog 0" ][0] cache_nr + # row[0].aux.index[][1] 1 + # row[0].aux.index[][0] sparse_checkout_count + # row[0].aux.index[][1] 1 + # + # But do not assume values for and (in case the sorting changes + # or other "aux" fields are added later). + + grep "row\[0\]\.aux\.index\[.*\]\[0\] cache_nr" Date: Mon, 25 Jun 2018 15:22:36 +0000 Subject: [PATCH 21/29] structured-logging: add structured logging to remote-curl remote-curl is not a builtin command and therefore, does not inherit the common cmd_main() startup in git.c Wrap cmd_main() with slog_cmd_main() in remote-curl to initialize logging. Add slog timers around push, fetch, and list verbs. Signed-off-by: Jeff Hostetler --- remote-curl.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/remote-curl.c b/remote-curl.c index 99b0bedc6ddc67..ed910f8428ef31 100644 --- a/remote-curl.c +++ b/remote-curl.c @@ -1322,8 +1322,9 @@ static int stateless_connect(const char *service_name) return 0; } -int cmd_main(int argc, const char **argv) +static int real_cmd_main(int argc, const char **argv) { + int slog_tid; struct strbuf buf = STRBUF_INIT; int nongit; @@ -1333,6 +1334,8 @@ int cmd_main(int argc, const char **argv) return 1; } + slog_set_command_name("remote-curl"); + options.verbosity = 1; options.progress = !!isatty(2); options.thin = 1; @@ -1362,14 +1365,20 @@ int cmd_main(int argc, const char **argv) if (starts_with(buf.buf, "fetch ")) { if (nongit) die("remote-curl: fetch attempted without a local repo"); + slog_tid = slog_start_timer("curl", "fetch"); parse_fetch(&buf); + slog_stop_timer(slog_tid); } else if (!strcmp(buf.buf, "list") || starts_with(buf.buf, "list ")) { int for_push = !!strstr(buf.buf + 4, "for-push"); + slog_tid = slog_start_timer("curl", "list"); output_refs(get_refs(for_push)); + slog_stop_timer(slog_tid); } else if (starts_with(buf.buf, "push ")) { + slog_tid = slog_start_timer("curl", "push"); parse_push(&buf); + slog_stop_timer(slog_tid); } else if (skip_prefix(buf.buf, "option ", &arg)) { char *value = strchr(arg, ' '); @@ -1411,3 +1420,8 @@ int cmd_main(int argc, const char **argv) return 0; } + +int cmd_main(int argc, const char **argv) +{ + return slog_wrap_main(real_cmd_main, argc, argv); +} From 2c8e58d2f680130380bccc7fd2c931c4567da66f Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Wed, 27 Jun 2018 15:56:44 +0000 Subject: [PATCH 22/29] structured-logging: add detail-events for child processes Teach git to emit "detail" events with category "child" before a child process is started and after it finishes. These events can be used to infer time spent by git waiting for child processes to complete. These events are controlled by the slog.detail config setting. Set to true or add the token "child" to it. Signed-off-by: Jeff Hostetler --- run-command.c | 14 +++- run-command.h | 1 + structured-logging.c | 154 ++++++++++++++++++++++++++++++++++++++++++- structured-logging.h | 15 +++++ 4 files changed, 181 insertions(+), 3 deletions(-) diff --git a/run-command.c b/run-command.c index a0c9dea4d3d339..5bc7b8d1625b42 100644 --- a/run-command.c +++ b/run-command.c @@ -711,6 +711,8 @@ int start_command(struct child_process *cmd) fflush(NULL); + cmd->slog_child_id = slog_child_starting(cmd); + #ifndef GIT_WINDOWS_NATIVE { int notify_pipe[2]; @@ -924,6 +926,9 @@ int start_command(struct child_process *cmd) close_pair(fderr); else if (cmd->err) close(cmd->err); + + slog_child_ended(cmd->slog_child_id, cmd->pid, failed_errno); + child_process_clear(cmd); errno = failed_errno; return -1; @@ -950,13 +955,20 @@ int start_command(struct child_process *cmd) int finish_command(struct child_process *cmd) { int ret = wait_or_whine(cmd->pid, cmd->argv[0], 0); + + slog_child_ended(cmd->slog_child_id, cmd->pid, ret); + child_process_clear(cmd); return ret; } int finish_command_in_signal(struct child_process *cmd) { - return wait_or_whine(cmd->pid, cmd->argv[0], 1); + int ret = wait_or_whine(cmd->pid, cmd->argv[0], 1); + + slog_child_ended(cmd->slog_child_id, cmd->pid, ret); + + return ret; } diff --git a/run-command.h b/run-command.h index e9c72b73a83358..1a840d612e172b 100644 --- a/run-command.h +++ b/run-command.h @@ -12,6 +12,7 @@ struct child_process { struct argv_array args; struct argv_array env_array; pid_t pid; + int slog_child_id; /* * Using .in, .out, .err: * - Specify 0 for no redirections (child inherits stdin, stdout, diff --git a/structured-logging.c b/structured-logging.c index 584f70a82f0dec..dbe60b785d404c 100644 --- a/structured-logging.c +++ b/structured-logging.c @@ -4,6 +4,7 @@ #include "json-writer.h" #include "sigchain.h" #include "argv-array.h" +#include "run-command.h" #if !defined(STRUCTURED_LOGGING) /* @@ -48,6 +49,23 @@ struct aux_data_array { static struct aux_data_array my__aux_data; static void format_and_free_aux_data(struct json_writer *jw); +struct child_data { + uint64_t start_ns; + uint64_t end_ns; + struct json_writer jw_argv; + unsigned int is_running:1; + unsigned int is_git_cmd:1; + unsigned int use_shell:1; +}; + +struct child_data_array { + struct child_data **array; + size_t nr, alloc; +}; + +static struct child_data_array my__child_data; +static void free_children(void); + static uint64_t my__start_time; static uint64_t my__exit_time; static int my__is_config_loaded; @@ -283,10 +301,11 @@ static void emit_exit_event(void) } static void emit_detail_event(const char *category, const char *label, + uint64_t clock_ns, const struct json_writer *data) { struct json_writer jw = JSON_WRITER_INIT; - uint64_t clock_us = getnanotime() / 1000; + uint64_t clock_us = clock_ns / 1000; /* build "detail" event */ jw_object_begin(&jw, my__is_pretty); @@ -435,6 +454,7 @@ static void do_final_steps(int in_signal) jw_release(&my__errors); strbuf_release(&my__session_id); free_timers(); + free_children(); } static void slog_atexit(void) @@ -580,7 +600,7 @@ void slog_emit_detail_event(const char *category, const char *label, BUG("unterminated slog.detail data: '%s' '%s' '%s'", category, label, data->json.buf); - emit_detail_event(category, label, data); + emit_detail_event(category, label, getnanotime(), data); } int slog_start_timer(const char *category, const char *name) @@ -815,4 +835,134 @@ static void format_and_free_aux_data(struct json_writer *jw) my__aux_data.alloc = 0; } +static struct child_data *alloc_child_data(const struct child_process *cmd) +{ + struct child_data *cd = xcalloc(1, sizeof(struct child_data)); + + cd->start_ns = getnanotime(); + cd->is_running = 1; + cd->is_git_cmd = cmd->git_cmd; + cd->use_shell = cmd->use_shell; + + jw_init(&cd->jw_argv); + + jw_array_begin(&cd->jw_argv, my__is_pretty); + { + jw_array_argv(&cd->jw_argv, cmd->argv); + } + jw_end(&cd->jw_argv); + + return cd; +} + +static int insert_child_data(struct child_data *cd) +{ + int child_id = my__child_data.nr; + + ALLOC_GROW(my__child_data.array, my__child_data.nr + 1, + my__child_data.alloc); + my__child_data.array[my__child_data.nr++] = cd; + + return child_id; +} + +int slog_child_starting(const struct child_process *cmd) +{ + struct child_data *cd; + int child_id; + + if (!slog_is_enabled()) + return SLOG_UNDEFINED_CHILD_ID; + + /* + * If we have not yet written a cmd_start event (and even if + * we do not emit this child_start event), force the cmd_start + * event now so that it appears in the log before any events + * that the child process itself emits. + */ + if (!my__wrote_start_event) + emit_start_event(); + + cd = alloc_child_data(cmd); + child_id = insert_child_data(cd); + + /* build data portion for a "detail" event */ + if (slog_want_detail_event("child")) { + struct json_writer jw_data = JSON_WRITER_INIT; + + jw_object_begin(&jw_data, my__is_pretty); + { + jw_object_intmax(&jw_data, "child_id", child_id); + jw_object_bool(&jw_data, "git_cmd", cd->is_git_cmd); + jw_object_bool(&jw_data, "use_shell", cd->use_shell); + jw_object_sub_jw(&jw_data, "child_argv", &cd->jw_argv); + } + jw_end(&jw_data); + + emit_detail_event("child", "child_starting", cd->start_ns, + &jw_data); + jw_release(&jw_data); + } + + return child_id; +} + +void slog_child_ended(int child_id, int child_pid, int child_exit_code) +{ + struct child_data *cd; + + if (!slog_is_enabled()) + return; + if (child_id == SLOG_UNDEFINED_CHILD_ID) + return; + if (child_id >= my__child_data.nr || child_id < 0) + BUG("Invalid slog.child id '%d'", child_id); + + cd = my__child_data.array[child_id]; + if (!cd->is_running) + BUG("slog.child '%d' already stopped", child_id); + + cd->end_ns = getnanotime(); + cd->is_running = 0; + + /* build data portion for a "detail" event */ + if (slog_want_detail_event("child")) { + struct json_writer jw_data = JSON_WRITER_INIT; + + jw_object_begin(&jw_data, my__is_pretty); + { + jw_object_intmax(&jw_data, "child_id", child_id); + jw_object_bool(&jw_data, "git_cmd", cd->is_git_cmd); + jw_object_bool(&jw_data, "use_shell", cd->use_shell); + jw_object_sub_jw(&jw_data, "child_argv", &cd->jw_argv); + + jw_object_intmax(&jw_data, "child_pid", child_pid); + jw_object_intmax(&jw_data, "child_exit_code", + child_exit_code); + jw_object_intmax(&jw_data, "child_elapsed_us", + (cd->end_ns - cd->start_ns) / 1000); + } + jw_end(&jw_data); + + emit_detail_event("child", "child_ended", cd->end_ns, &jw_data); + jw_release(&jw_data); + } +} + +static void free_children(void) +{ + int k; + + for (k = 0; k < my__child_data.nr; k++) { + struct child_data *cd = my__child_data.array[k]; + + jw_release(&cd->jw_argv); + free(cd); + } + + FREE_AND_NULL(my__child_data.array); + my__child_data.nr = 0; + my__child_data.alloc = 0; +} + #endif diff --git a/structured-logging.h b/structured-logging.h index 227259819e378c..7c98d336f9e2df 100644 --- a/structured-logging.h +++ b/structured-logging.h @@ -2,10 +2,12 @@ #define STRUCTURED_LOGGING_H struct json_writer; +struct child_process; typedef int (*slog_fn_main_t)(int, const char **); #define SLOG_UNDEFINED_TIMER_ID (-1) +#define SLOG_UNDEFINED_CHILD_ID (-1) #if !defined(STRUCTURED_LOGGING) /* @@ -30,6 +32,8 @@ static inline void slog_stop_timer(int tid) { }; #define slog_aux_intmax(c, k, v) do { } while (0) #define slog_aux_bool(c, k, v) do { } while (0) #define slog_aux_jw(c, k, v) do { } while (0) +#define slog_child_starting(cmd) (SLOG_UNDEFINED_CHILD_ID) +#define slog_child_ended(i, p, ec) do { } while (0) #else @@ -147,5 +151,16 @@ void slog_aux_bool(const char *category, const char *key, int value); void slog_aux_jw(const char *category, const char *key, const struct json_writer *value); +/* + * Emit a detail event of category "child" and label "child_starting" + * or "child_ending" with information about the child process. Note + * that this is in addition to any events that the child process itself + * generates. + * + * Set "slog.detail" to true or contain "child" to get these events. + */ +int slog_child_starting(const struct child_process *cmd); +void slog_child_ended(int child_id, int child_pid, int child_exit_code); + #endif /* STRUCTURED_LOGGING */ #endif /* STRUCTURED_LOGGING_H */ From 11c5fc7d97bde92604e978e3b59283162beac8c2 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Fri, 29 Jun 2018 22:27:20 +0000 Subject: [PATCH 23/29] structured-logging: add child process classification Teach git to classify child processes as "editor", "pager", "subprocess", "alias", "shell", or "other". Add the child process classification to the child detail events. Mark child processes of class "editor" or "pager" as interactive in the child detail event. Add child summary to cmd_exit event grouping child process by class. Signed-off-by: Jeff Hostetler --- editor.c | 1 + git.c | 2 + pager.c | 1 + run-command.h | 1 + structured-logging.c | 119 +++++++++++++++++++++++++++++++++++++++++++ sub-process.c | 1 + 6 files changed, 125 insertions(+) diff --git a/editor.c b/editor.c index 9a9b4e12d1db7f..6f5ccf36c4ca80 100644 --- a/editor.c +++ b/editor.c @@ -66,6 +66,7 @@ int launch_editor(const char *path, struct strbuf *buffer, const char *const *en p.argv = args; p.env = env; p.use_shell = 1; + p.slog_child_class = "editor"; if (start_command(&p) < 0) return error("unable to start editor '%s'", editor); diff --git a/git.c b/git.c index 88ca8290d21be1..b6c78243f71646 100644 --- a/git.c +++ b/git.c @@ -329,6 +329,7 @@ static int handle_alias(int *argcp, const char ***argv) commit_pager_choice(); child.use_shell = 1; + child.slog_child_class = "alias"; argv_array_push(&child.args, alias_string + 1); argv_array_pushv(&child.args, (*argv) + 1); @@ -723,6 +724,7 @@ static void execv_dashed_external(const char **argv) cmd.clean_on_exit = 1; cmd.wait_after_clean = 1; cmd.silent_exec_failure = 1; + cmd.slog_child_class = "alias"; if (run_pre_command_hook(cmd.args.argv)) die("pre-command hook aborted command"); diff --git a/pager.c b/pager.c index bdc75d0b4fad5b..3aea083da6f472 100644 --- a/pager.c +++ b/pager.c @@ -103,6 +103,7 @@ void prepare_pager_args(struct child_process *pager_process, const char *pager) argv_array_push(&pager_process->args, pager); pager_process->use_shell = 1; setup_pager_env(&pager_process->env_array); + pager_process->slog_child_class = "pager"; } void setup_pager(void) diff --git a/run-command.h b/run-command.h index 1a840d612e172b..cafb83c9c0ba3e 100644 --- a/run-command.h +++ b/run-command.h @@ -13,6 +13,7 @@ struct child_process { struct argv_array env_array; pid_t pid; int slog_child_id; + const char *slog_child_class; /* * Using .in, .out, .err: * - Specify 0 for no redirections (child inherits stdin, stdout, diff --git a/structured-logging.c b/structured-logging.c index dbe60b785d404c..2571e798ef57f8 100644 --- a/structured-logging.c +++ b/structured-logging.c @@ -49,13 +49,30 @@ struct aux_data_array { static struct aux_data_array my__aux_data; static void format_and_free_aux_data(struct json_writer *jw); +struct child_summary_data { + char *child_class; + uint64_t total_ns; + int count; +}; + +struct child_summary_data_array { + struct child_summary_data **array; + size_t nr, alloc; +}; + +static struct child_summary_data_array my__child_summary_data; +static void format_child_summary_data(struct json_writer *jw); +static void free_child_summary_data(void); + struct child_data { uint64_t start_ns; uint64_t end_ns; struct json_writer jw_argv; + char *child_class; unsigned int is_running:1; unsigned int is_git_cmd:1; unsigned int use_shell:1; + unsigned int is_interactive:1; }; struct child_data_array { @@ -293,6 +310,12 @@ static void emit_exit_event(void) format_and_free_aux_data(&jw); jw_end(&jw); } + + if (my__child_summary_data.nr) { + jw_object_inline_begin_object(&jw, "child_summary"); + format_child_summary_data(&jw); + jw_end(&jw); + } } jw_end(&jw); @@ -453,6 +476,7 @@ static void do_final_steps(int in_signal) argv_array_clear(&my__argv); jw_release(&my__errors); strbuf_release(&my__session_id); + free_child_summary_data(); free_timers(); free_children(); } @@ -835,6 +859,85 @@ static void format_and_free_aux_data(struct json_writer *jw) my__aux_data.alloc = 0; } +static struct child_summary_data *find_child_summary_data( + const struct child_data *cd) +{ + struct child_summary_data *csd; + char *child_class; + int k; + + child_class = cd->child_class; + if (!child_class || !*child_class) { + if (cd->use_shell) + child_class = "shell"; + child_class = "other"; + } + + for (k = 0; k < my__child_summary_data.nr; k++) { + csd = my__child_summary_data.array[k]; + if (!strcmp(child_class, csd->child_class)) + return csd; + } + + csd = xcalloc(1, sizeof(struct child_summary_data)); + csd->child_class = xstrdup(child_class); + + ALLOC_GROW(my__child_summary_data.array, my__child_summary_data.nr + 1, + my__child_summary_data.alloc); + my__child_summary_data.array[my__child_summary_data.nr++] = csd; + + return csd; +} + +static void add_child_to_summary_data(const struct child_data *cd) +{ + struct child_summary_data *csd = find_child_summary_data(cd); + + csd->total_ns += cd->end_ns - cd->start_ns; + csd->count++; +} + +static void format_child_summary_data(struct json_writer *jw) +{ + int k; + + for (k = 0; k < my__child_summary_data.nr; k++) { + struct child_summary_data *csd = my__child_summary_data.array[k]; + + jw_object_inline_begin_object(jw, csd->child_class); + { + jw_object_intmax(jw, "total_us", csd->total_ns / 1000); + jw_object_intmax(jw, "count", csd->count); + } + jw_end(jw); + } +} + +static void free_child_summary_data(void) +{ + int k; + + for (k = 0; k < my__child_summary_data.nr; k++) { + struct child_summary_data *csd = my__child_summary_data.array[k]; + + free(csd->child_class); + free(csd); + } + + free(my__child_summary_data.array); +} + +static unsigned int is_interactive(const char *child_class) +{ + if (child_class && *child_class) { + if (!strcmp(child_class, "editor")) + return 1; + if (!strcmp(child_class, "pager")) + return 1; + } + return 0; +} + static struct child_data *alloc_child_data(const struct child_process *cmd) { struct child_data *cd = xcalloc(1, sizeof(struct child_data)); @@ -843,6 +946,9 @@ static struct child_data *alloc_child_data(const struct child_process *cmd) cd->is_running = 1; cd->is_git_cmd = cmd->git_cmd; cd->use_shell = cmd->use_shell; + cd->is_interactive = is_interactive(cmd->slog_child_class); + if (cmd->slog_child_class && *cmd->slog_child_class) + cd->child_class = xstrdup(cmd->slog_child_class); jw_init(&cd->jw_argv); @@ -895,6 +1001,11 @@ int slog_child_starting(const struct child_process *cmd) jw_object_intmax(&jw_data, "child_id", child_id); jw_object_bool(&jw_data, "git_cmd", cd->is_git_cmd); jw_object_bool(&jw_data, "use_shell", cd->use_shell); + jw_object_bool(&jw_data, "is_interactive", + cd->is_interactive); + if (cd->child_class) + jw_object_string(&jw_data, "child_class", + cd->child_class); jw_object_sub_jw(&jw_data, "child_argv", &cd->jw_argv); } jw_end(&jw_data); @@ -925,6 +1036,8 @@ void slog_child_ended(int child_id, int child_pid, int child_exit_code) cd->end_ns = getnanotime(); cd->is_running = 0; + add_child_to_summary_data(cd); + /* build data portion for a "detail" event */ if (slog_want_detail_event("child")) { struct json_writer jw_data = JSON_WRITER_INIT; @@ -934,6 +1047,11 @@ void slog_child_ended(int child_id, int child_pid, int child_exit_code) jw_object_intmax(&jw_data, "child_id", child_id); jw_object_bool(&jw_data, "git_cmd", cd->is_git_cmd); jw_object_bool(&jw_data, "use_shell", cd->use_shell); + jw_object_bool(&jw_data, "is_interactive", + cd->is_interactive); + if (cd->child_class) + jw_object_string(&jw_data, "child_class", + cd->child_class); jw_object_sub_jw(&jw_data, "child_argv", &cd->jw_argv); jw_object_intmax(&jw_data, "child_pid", child_pid); @@ -957,6 +1075,7 @@ static void free_children(void) struct child_data *cd = my__child_data.array[k]; jw_release(&cd->jw_argv); + free(cd->child_class); free(cd); } diff --git a/sub-process.c b/sub-process.c index 8d2a1707cfe1a7..93f7a529a8f439 100644 --- a/sub-process.c +++ b/sub-process.c @@ -88,6 +88,7 @@ int subprocess_start(struct hashmap *hashmap, struct subprocess_entry *entry, co process->out = -1; process->clean_on_exit = 1; process->clean_on_exit_handler = subprocess_exit_handler; + process->slog_child_class = "subprocess"; err = start_command(process); if (err) { From 0de1490d6070048a6332ed5932f0b60c1e763e4c Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 28 Jun 2018 19:11:02 +0000 Subject: [PATCH 24/29] structured-logging: t0420 tests for child process detail events Signed-off-by: Jeff Hostetler --- t/t0420-structured-logging.sh | 39 +++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/t/t0420-structured-logging.sh b/t/t0420-structured-logging.sh index 2e06cd79af77e5..4ac404dd2b20f1 100755 --- a/t/t0420-structured-logging.sh +++ b/t/t0420-structured-logging.sh @@ -26,6 +26,9 @@ test_expect_success 'setup' ' cat >key_exit_code_129 <<-\EOF && "exit_code":129 EOF + cat >key_detail <<-\EOF && + "event":"detail" + EOF git config --local slog.pretty false && git config --local slog.path "$LOGFILE" ' @@ -221,4 +224,40 @@ test_expect_success PERLJSON 'turn on aux-data, verify a few fields' ' grep "row\[0\]\.aux\.index\[.*\]\[0\] sparse_checkout_count" event_exit && + grep -f key_detail "$LOGFILE" >event_detail && + + perl "$TEST_DIRECTORY"/t0420/parse_json.perl parsed_exit && + perl "$TEST_DIRECTORY"/t0420/parse_json.perl parsed_detail && + + grep "row\[0\]\.event cmd_exit" Date: Fri, 29 Jun 2018 22:57:51 +0000 Subject: [PATCH 25/29] structured-logging: t0420 tests for interacitve child_summary Test running a command with a fake pager and verify that a child_summary is generated. Signed-off-by: Jeff Hostetler --- t/t0420-structured-logging.sh | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/t/t0420-structured-logging.sh b/t/t0420-structured-logging.sh index 4ac404dd2b20f1..69f811aa9ea4ae 100755 --- a/t/t0420-structured-logging.sh +++ b/t/t0420-structured-logging.sh @@ -260,4 +260,34 @@ test_expect_success PERLJSON 'verify child start/end events during clone' ' grep "row\[1\]\.detail\.data\.child_exit_code 0" event_exit && + + perl "$TEST_DIRECTORY"/t0420/parse_json.perl parsed_exit && + + grep "row\[0\]\.child_summary\.pager\.count 1" Date: Thu, 28 Jun 2018 15:08:39 +0000 Subject: [PATCH 26/29] structured-logging: add config data facility Add "config" section to "cmd_exit" event to record important configuration settings in the log. Add the value of "slog.detail", "slog.timers", and "slog.aux" config settings to the log. These values control the filtering of the log. Knowing the filter settings can help post-processors reason about the contents of the log. Signed-off-by: Jeff Hostetler --- structured-logging.c | 132 +++++++++++++++++++++++++++++++++++++++++++ structured-logging.h | 13 +++++ 2 files changed, 145 insertions(+) diff --git a/structured-logging.c b/structured-logging.c index 2571e798ef57f8..0e3f79ee48627c 100644 --- a/structured-logging.c +++ b/structured-logging.c @@ -83,6 +83,20 @@ struct child_data_array { static struct child_data_array my__child_data; static void free_children(void); +struct config_data { + char *group; + struct json_writer jw; +}; + +struct config_data_array { + struct config_data **array; + size_t nr, alloc; +}; + +static struct config_data_array my__config_data; +static void format_config_data(struct json_writer *jw); +static void free_config_data(void); + static uint64_t my__start_time; static uint64_t my__exit_time; static int my__is_config_loaded; @@ -132,6 +146,15 @@ static int want_category(const struct category_filter *cf, const char *category) return !!strstr(cf->categories, category); } +static void set_config_data_from_category(const struct category_filter *cf, + const char *key) +{ + if (cf->want == 0 || cf->want == 1) + slog_set_config_data_intmax(key, cf->want); + else + slog_set_config_data_string(key, cf->categories); +} + /* * Compute a new session id for the current process. Build string * with the start time and PID of the current process and append @@ -249,6 +272,18 @@ static void emit_exit_event(void) struct json_writer jw = JSON_WRITER_INIT; uint64_t atexit_time = getnanotime() / 1000; + /* + * Copy important (and non-obvious) config settings into the + * "config" section of the "cmd_exit" event. The values of + * "slog.detail", "slog.timers", and "slog.aux" are used in + * category want filtering, so post-processors should know the + * filter settings so that they can tell if an event is missing + * because of filtering or an error. + */ + set_config_data_from_category(&my__detail_categories, "slog.detail"); + set_config_data_from_category(&my__timer_categories, "slog.timers"); + set_config_data_from_category(&my__aux_categories, "slog.aux"); + /* close unterminated forms */ if (my__errors.json.len) jw_end(&my__errors); @@ -299,6 +334,12 @@ static void emit_exit_event(void) } jw_end(&jw); + if (my__config_data.nr) { + jw_object_inline_begin_object(&jw, "config"); + format_config_data(&jw); + jw_end(&jw); + } + if (my__timers.nr) { jw_object_inline_begin_object(&jw, "timers"); format_timers(&jw); @@ -479,6 +520,7 @@ static void do_final_steps(int in_signal) free_child_summary_data(); free_timers(); free_children(); + free_config_data(); } static void slog_atexit(void) @@ -1084,4 +1126,94 @@ static void free_children(void) my__child_data.alloc = 0; } +/* + * Split into . (for example "slog.path" into "slog" and "path") + * Find or insert in config_data_array[]. + * + * Return config_data_arary[]. + */ +static struct config_data *find_config_data(const char *key, const char **sub_key) +{ + struct config_data *cd; + char *dot; + size_t group_len; + int k; + + dot = strchr(key, '.'); + if (!dot) + return NULL; + + *sub_key = dot + 1; + + group_len = dot - key; + + for (k = 0; k < my__config_data.nr; k++) { + cd = my__config_data.array[k]; + if (!strncmp(key, cd->group, group_len)) + return cd; + } + + cd = xcalloc(1, sizeof(struct config_data)); + cd->group = xstrndup(key, group_len); + + jw_object_begin(&cd->jw, my__is_pretty); + /* leave per-group object unterminated for now */ + + ALLOC_GROW(my__config_data.array, my__config_data.nr + 1, + my__config_data.alloc); + my__config_data.array[my__config_data.nr++] = cd; + + return cd; +} + +void slog_set_config_data_string(const char *key, const char *value) +{ + const char *sub_key; + struct config_data *cd = find_config_data(key, &sub_key); + + if (cd) + jw_object_string(&cd->jw, sub_key, value); +} + +void slog_set_config_data_intmax(const char *key, intmax_t value) +{ + const char *sub_key; + struct config_data *cd = find_config_data(key, &sub_key); + + if (cd) + jw_object_intmax(&cd->jw, sub_key, value); +} + +static void format_config_data(struct json_writer *jw) +{ + int k; + + for (k = 0; k < my__config_data.nr; k++) { + struct config_data *cd = my__config_data.array[k]; + + /* termminate per-group form */ + jw_end(&cd->jw); + + /* insert per-category form into containing "config" form */ + jw_object_sub_jw(jw, cd->group, &cd->jw); + } +} + +static void free_config_data(void) +{ + int k; + + for (k = 0; k < my__config_data.nr; k++) { + struct config_data *cd = my__config_data.array[k]; + + jw_release(&cd->jw); + free(cd->group); + free(cd); + } + + FREE_AND_NULL(my__config_data.array); + my__config_data.nr = 0; + my__config_data.alloc = 0; +} + #endif diff --git a/structured-logging.h b/structured-logging.h index 7c98d336f9e2df..2c90267e0975bb 100644 --- a/structured-logging.h +++ b/structured-logging.h @@ -34,6 +34,8 @@ static inline void slog_stop_timer(int tid) { }; #define slog_aux_jw(c, k, v) do { } while (0) #define slog_child_starting(cmd) (SLOG_UNDEFINED_CHILD_ID) #define slog_child_ended(i, p, ec) do { } while (0) +#define slog_set_config_data_string(k, v) do { } while (0) +#define slog_set_config_data_intmax(k, v) do { } while (0) #else @@ -162,5 +164,16 @@ void slog_aux_jw(const char *category, const char *key, int slog_child_starting(const struct child_process *cmd); void slog_child_ended(int child_id, int child_pid, int child_exit_code); +/* + * Add an important config key/value pair to the "cmd_event". Keys + * are assumed to be of the form ., such as "slog.path". + * The pair will appear under the "config" object in the resulting JSON + * as "config..:". + * + * This should only be used for important config settings. + */ +void slog_set_config_data_string(const char *key, const char *value); +void slog_set_config_data_intmax(const char *key, intmax_t value); + #endif /* STRUCTURED_LOGGING */ #endif /* STRUCTURED_LOGGING_H */ From 498b9706b2a5bd88dce6483b2800a15dc4eed6c7 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Fri, 27 Jul 2018 16:17:04 -0400 Subject: [PATCH 27/29] gvfs-structured-logging: log gvfs.* config settings Add all "gvfs.*" config settings to the cmd_exit event. Signed-off-by: Jeff Hostetler --- config.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/config.c b/config.c index a5ce3de87d9f0f..f73d1c83a1d0ba 100644 --- a/config.c +++ b/config.c @@ -1491,6 +1491,11 @@ int git_default_config(const char *var, const char *value, void *cb) if (starts_with(var, "slog.")) return slog_default_config(var, value); + if (starts_with(var, "gvfs.")) { + slog_set_config_data_string(var, value); + return 0; + } + /* Add other config variables here and to Documentation/config.txt. */ return 0; } From 81ded923d64dbc0b213f09df54f4443234afa2a2 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Fri, 27 Jul 2018 16:32:44 -0400 Subject: [PATCH 28/29] gvfs-structured-logging: add timer around read_object_process Add SLOG timer to capture total/min/max/avg time spent waiting for an object to be dynamically fetched. This will be reported in the "cmd_exit" event. Signed-off-by: Jeff Hostetler --- sha1-file.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sha1-file.c b/sha1-file.c index b79488c72dc6f9..034c64e217deee 100644 --- a/sha1-file.c +++ b/sha1-file.c @@ -742,6 +742,7 @@ static int read_object_process(const struct object_id *oid) struct strbuf status = STRBUF_INIT; const char *cmd = find_hook("read-object"); uint64_t start; + int slog_tid; start = getnanotime(); @@ -769,6 +770,8 @@ static int read_object_process(const struct object_id *oid) if (!(CAP_GET & entry->supported_capabilities)) return -1; + slog_tid = slog_start_timer("object", "read_object_process"); + sigchain_push(SIGPIPE, SIG_IGN); err = packet_write_fmt_gently(process->in, "command=get\n"); @@ -814,6 +817,8 @@ static int read_object_process(const struct object_id *oid) } } + slog_stop_timer(slog_tid); + trace_performance_since(start, "read_object_process"); return err; From 7bdc2e6788536edfed376632456624b02433fd4d Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Tue, 31 Jul 2018 17:34:25 -0400 Subject: [PATCH 29/29] gvfs-structured-logging: add aux data for status deserialization Add aux data describing results of attempting to deserialize status data using a status cache. When deserializing from a file: aux.status.deserialize = ( "ok" | "reject" ) aux.status.deserialize_wait = When deserializing from stdin: aux.status.deserialize_stdin = ( "ok" | "reject" ) Status will fallback and run a normal status scan when a "reject" is reported (unless "--deserialize-wait=fail"). If "ok" is reported, status was able to use the status cache and avoid scanning the workdir. Signed-off-by: Jeff Hostetler --- wt-status-deserialize.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/wt-status-deserialize.c b/wt-status-deserialize.c index 2e293057744c32..925393d1bbe017 100644 --- a/wt-status-deserialize.c +++ b/wt-status-deserialize.c @@ -732,8 +732,11 @@ static int try_deserialize_read_from_file(const struct wt_status *cmd_s, * For "fail" or "no", try exactly once to read the status cache. * Return an error if the file is stale. */ - if (dw == DESERIALIZE_WAIT__FAIL || dw == DESERIALIZE_WAIT__NO) - return try_deserialize_read_from_file_1(cmd_s, path, des_s); + if (dw == DESERIALIZE_WAIT__FAIL || dw == DESERIALIZE_WAIT__NO) { + result = try_deserialize_read_from_file_1(cmd_s, path, des_s); + k = 0; + goto done; + } /* * Wait for the status cache file to refresh. Wait duration can @@ -761,6 +764,12 @@ static int try_deserialize_read_from_file(const struct wt_status *cmd_s, trace_printf_key(&trace_deserialize, "wait polled=%d result=%d '%s'", k, result, path); + +done: + slog_aux_string("status", "deserialize", + ((result == DESERIALIZE_OK) ? "ok" : "reject")); + slog_aux_intmax("status", "deserialize_wait", k); + return result; } @@ -793,6 +802,8 @@ int wt_status_deserialize(const struct wt_status *cmd_s, * term, since we cannot read stdin multiple times. */ result = wt_deserialize_fd(cmd_s, &des_s, 0); + slog_aux_string("status", "deserialize_stdin", + ((result == DESERIALIZE_OK) ? "ok" : "reject")); } if (result == DESERIALIZE_OK)