diff --git a/.github/ISSUE_TEMPLATE/bug-report.md b/.github/ISSUE_TEMPLATE/bug-report.md index 0d2a3b627a..17710d9001 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.md +++ b/.github/ISSUE_TEMPLATE/bug-report.md @@ -37,7 +37,7 @@ assignees: '' ### Stack Trace -[If applicable, the stack trace which shows the error. Find it with `cortex logs -v `, or use `kubectl get pods -n cortex` and use the name of the failed pod in `kubectl logs -n cortex`] +[If applicable, the stack trace which shows the error. Find it with `cortex logs `, or use `kubectl get pods -n cortex` and use the name of the failed pod in `kubectl logs -n cortex`] ``` diff --git a/cli/cmd/lib_client.go b/cli/cmd/lib_client.go index ba7535c1b5..464e9c8d11 100644 --- a/cli/cmd/lib_client.go +++ b/cli/cmd/lib_client.go @@ -27,7 +27,6 @@ import ( "os" "os/signal" "regexp" - "strconv" "strings" "time" @@ -153,7 +152,7 @@ func HTTPUploadZip(endpoint string, zipInput *zip.Input, fileName string, qParam return HTTPUpload(endpoint, uploadInput, qParams...) } -func StreamLogs(appName string, resourceName string, resourceType string, verbose bool) error { +func StreamLogs(appName string, resourceName string, resourceType string) error { interrupt := make(chan os.Signal, 1) signal.Notify(interrupt, os.Interrupt) @@ -166,7 +165,6 @@ func StreamLogs(appName string, resourceName string, resourceType string, verbos values.Set("resourceName", resourceName) values.Set("resourceType", resourceType) values.Set("appName", appName) - values.Set("verbose", strconv.FormatBool(verbose)) req.URL.RawQuery = values.Encode() wsURL := req.URL.String() wsURL = strings.Replace(wsURL, "http", "ws", 1) diff --git a/cli/cmd/logs.go b/cli/cmd/logs.go index 0743033db0..4f7a538408 100644 --- a/cli/cmd/logs.go +++ b/cli/cmd/logs.go @@ -26,7 +26,6 @@ import ( func init() { addAppNameFlag(logsCmd) addEnvFlag(logsCmd) - addVerboseFlag(logsCmd) // addResourceTypesToHelp(logsCmd) } @@ -59,7 +58,7 @@ var logsCmd = &cobra.Command{ errors.Exit(err) } - err = StreamLogs(appName, resourceName, resourceTypeStr, flagVerbose) + err = StreamLogs(appName, resourceName, resourceTypeStr) if err != nil { errors.Exit(err) } diff --git a/docs/cluster/cli.md b/docs/cluster/cli.md index f76f79b77c..ed8f994dea 100644 --- a/docs/cluster/cli.md +++ b/docs/cluster/cli.md @@ -48,10 +48,9 @@ Flags: -d, --deployment string deployment name -e, --env string environment (default "dev") -h, --help help for logs - -v, --verbose show verbose output ``` -The `logs` command streams logs from the workload corresponding to the specified resource. For example, `cortex logs api my-api` will stream the logs from the most recent api named `my-api`. `RESOURCE_TYPE` is optional (unless there are name colisions), so `cortex logs my-api` will also work. Using the `-v` or `--verbose` flag will show all of the logs for the workload (not just Cortex's logs). +The `logs` command streams logs from the workload corresponding to the specified resource. For example, `cortex logs api my-api` will stream the logs from the most recent api named `my-api`. `RESOURCE_TYPE` is optional (unless there are name colisions), so `cortex logs my-api` will also work. ## refresh diff --git a/docs/deployments/request-handlers.md b/docs/deployments/request-handlers.md index 7eb6b8e99f..79d40a804d 100644 --- a/docs/deployments/request-handlers.md +++ b/docs/deployments/request-handlers.md @@ -102,4 +102,4 @@ def pre_inference(sample, metadata): ... ``` -The output of these logs can be viewed using `cortex logs -v `. +The output of these logs can be viewed using `cortex logs `. diff --git a/docs/deployments/statuses.md b/docs/deployments/statuses.md index 3ab0e12fdd..a30c36af5e 100644 --- a/docs/deployments/statuses.md +++ b/docs/deployments/statuses.md @@ -9,7 +9,7 @@ | creating | API is being created | | stopping | API is stopping | | stopped | API is stopped | -| error | API was not created due to an error; run `cortex logs -v ` to view the logs | +| error | API was not created due to an error; run `cortex logs ` to view the logs | | skipped | API was not created due to an error in another resource | | upstream error | API was not created due to an error in one of its dependencies; a previous version of this API may be ready | | upstream termination | API was not created because one of its dependencies was terminated; a previous version of this API may be ready | diff --git a/pkg/operator/endpoints/logs.go b/pkg/operator/endpoints/logs.go index c84dc4d539..ef3fcceedc 100644 --- a/pkg/operator/endpoints/logs.go +++ b/pkg/operator/endpoints/logs.go @@ -40,8 +40,6 @@ func ReadLogs(w http.ResponseWriter, r *http.Request) { return } - verbose := getOptionalBoolQParam("verbose", false, r) - workloadID := getOptionalQParam("workloadID", r) resourceID := getOptionalQParam("resourceID", r) resourceName := getOptionalQParam("resourceName", r) @@ -54,7 +52,7 @@ func ReadLogs(w http.ResponseWriter, r *http.Request) { if workloadID != "" { podLabels["workloadID"] = workloadID - readLogs(w, r, podLabels, appName, verbose) + readLogs(w, r, podLabels, appName) return } @@ -70,7 +68,7 @@ func ReadLogs(w http.ResponseWriter, r *http.Request) { } podLabels["workloadID"] = workloadID - readLogs(w, r, podLabels, appName, verbose) + readLogs(w, r, podLabels, appName) return } @@ -90,7 +88,7 @@ func ReadLogs(w http.ResponseWriter, r *http.Request) { } else { podLabels["workloadID"] = res.GetWorkloadID() } - readLogs(w, r, podLabels, appName, verbose) + readLogs(w, r, podLabels, appName) return } @@ -103,7 +101,7 @@ func ReadLogs(w http.ResponseWriter, r *http.Request) { } else { podLabels["workloadID"] = res.GetWorkloadID() } - readLogs(w, r, podLabels, appName, verbose) + readLogs(w, r, podLabels, appName) return } @@ -115,7 +113,7 @@ func ReadLogs(w http.ResponseWriter, r *http.Request) { workloadIDs = slices.UniqueStrings(workloadIDs) if len(workloadIDs) == 1 { podLabels["workloadID"] = workloadIDs[0] - readLogs(w, r, podLabels, appName, verbose) + readLogs(w, r, podLabels, appName) return } @@ -123,7 +121,7 @@ func ReadLogs(w http.ResponseWriter, r *http.Request) { return } -func readLogs(w http.ResponseWriter, r *http.Request, podLabels map[string]string, appName string, verbose bool) { +func readLogs(w http.ResponseWriter, r *http.Request, podLabels map[string]string, appName string) { upgrader := websocket.Upgrader{} socket, err := upgrader.Upgrade(w, r, nil) if err != nil { @@ -132,5 +130,5 @@ func readLogs(w http.ResponseWriter, r *http.Request, podLabels map[string]strin } defer socket.Close() - workloads.ReadLogs(appName, podLabels, verbose, socket) + workloads.ReadLogs(appName, podLabels, socket) } diff --git a/pkg/operator/workloads/logs.go b/pkg/operator/workloads/logs.go index 2c6f7836bd..e4ece75915 100644 --- a/pkg/operator/workloads/logs.go +++ b/pkg/operator/workloads/logs.go @@ -21,7 +21,6 @@ import ( "fmt" "io" "os" - "regexp" "sort" "strings" "time" @@ -48,7 +47,7 @@ const ( initLogTailLines = 100 ) -func ReadLogs(appName string, podSearchLabels map[string]string, verbose bool, socket *websocket.Conn) { +func ReadLogs(appName string, podSearchLabels map[string]string, socket *websocket.Conn) { wrotePending := false for true { @@ -75,26 +74,26 @@ func ReadLogs(appName string, podSearchLabels map[string]string, verbose bool, s switch { case len(podMap[k8s.PodStatusSucceeded]) > 0: - getKubectlLogs(podMap[k8s.PodStatusSucceeded], verbose, wrotePending, socket) + getKubectlLogs(podMap[k8s.PodStatusSucceeded], wrotePending, socket) case len(podMap[k8s.PodStatusRunning]) > 0: - getKubectlLogs(podMap[k8s.PodStatusRunning], verbose, wrotePending, socket) + getKubectlLogs(podMap[k8s.PodStatusRunning], wrotePending, socket) case len(podMap[k8s.PodStatusPending]) > 0: - getKubectlLogs(podMap[k8s.PodStatusPending], verbose, wrotePending, socket) + getKubectlLogs(podMap[k8s.PodStatusPending], wrotePending, socket) case len(podMap[k8s.PodStatusKilled]) > 0: - getKubectlLogs(podMap[k8s.PodStatusKilled], verbose, wrotePending, socket) + getKubectlLogs(podMap[k8s.PodStatusKilled], wrotePending, socket) case len(podMap[k8s.PodStatusKilledOOM]) > 0: - getKubectlLogs(podMap[k8s.PodStatusKilledOOM], verbose, wrotePending, socket) + getKubectlLogs(podMap[k8s.PodStatusKilledOOM], wrotePending, socket) case len(podMap[k8s.PodStatusFailed]) > 0: - getKubectlLogs(podMap[k8s.PodStatusFailed], verbose, wrotePending, socket) + getKubectlLogs(podMap[k8s.PodStatusFailed], wrotePending, socket) case len(podMap[k8s.PodStatusTerminating]) > 0: - getKubectlLogs(podMap[k8s.PodStatusTerminating], verbose, wrotePending, socket) + getKubectlLogs(podMap[k8s.PodStatusTerminating], wrotePending, socket) case len(podMap[k8s.PodStatusUnknown]) > 0: - getKubectlLogs(podMap[k8s.PodStatusUnknown], verbose, wrotePending, socket) + getKubectlLogs(podMap[k8s.PodStatusUnknown], wrotePending, socket) default: // unexpected if len(pods) > maxParallelPodLogging { pods = pods[:maxParallelPodLogging] } - getKubectlLogs(pods, verbose, wrotePending, socket) + getKubectlLogs(pods, wrotePending, socket) } return } @@ -108,7 +107,7 @@ func ReadLogs(appName string, podSearchLabels map[string]string, verbose bool, s } if isEnded { - getCloudWatchLogs(workloadID, verbose, socket) + getCloudWatchLogs(workloadID, socket) return } } @@ -124,7 +123,7 @@ func ReadLogs(appName string, podSearchLabels map[string]string, verbose bool, s } } -func getKubectlLogs(pods []kcore.Pod, verbose bool, wrotePending bool, socket *websocket.Conn) { +func getKubectlLogs(pods []kcore.Pod, wrotePending bool, socket *websocket.Conn) { if !wrotePending { isAllPending := true for _, pod := range pods { @@ -152,7 +151,7 @@ func getKubectlLogs(pods []kcore.Pod, verbose bool, wrotePending bool, socket *w podCheckCancel := make(chan struct{}) defer close(podCheckCancel) - go podCheck(podCheckCancel, socket, pods, verbose, wrotePending, inr) + go podCheck(podCheckCancel, socket, pods, wrotePending, inr) pumpStdin(socket, inw) podCheckCancel <- struct{}{} } @@ -221,7 +220,7 @@ func createKubectlProcess(logKey LogKey, attrs *os.ProcAttr) (*os.Process, error return process, nil } -func podCheck(podCheckCancel chan struct{}, socket *websocket.Conn, initialPodList []kcore.Pod, verbose bool, wrotePending bool, inr *os.File) { +func podCheck(podCheckCancel chan struct{}, socket *websocket.Conn, initialPodList []kcore.Pod, wrotePending bool, inr *os.File) { timer := time.NewTimer(0) defer timer.Stop() @@ -250,7 +249,7 @@ func podCheck(podCheckCancel chan struct{}, socket *websocket.Conn, initialPodLi socketWriterError := make(chan error, 1) defer close(socketWriterError) - go pumpStdout(socket, socketWriterError, outr, verbose, true) + go pumpStdout(socket, socketWriterError, outr) for { select { @@ -337,7 +336,7 @@ func deleteProcesses(processMap map[string]*os.Process) { } } -func getCloudWatchLogs(prefix string, verbose bool, socket *websocket.Conn) { +func getCloudWatchLogs(prefix string, socket *websocket.Conn) { logs, err := config.AWS.GetLogs(prefix, config.Cortex.LogGroup) if err != nil { config.Telemetry.ReportError(err) @@ -355,7 +354,7 @@ func getCloudWatchLogs(prefix string, verbose bool, socket *websocket.Conn) { socketWriterError := make(chan error) defer close(socketWriterError) - go pumpStdout(socket, socketWriterError, logsReader, verbose, false) + go pumpStdout(socket, socketWriterError, logsReader) inr, inw, err := os.Pipe() if err != nil { @@ -383,23 +382,17 @@ func pumpStdin(socket *websocket.Conn, writer io.Writer) { } } -func pumpStdout(socket *websocket.Conn, socketWriterError chan error, reader io.Reader, verbose bool, checkForLastLog bool) { +func pumpStdout(socket *websocket.Conn, socketWriterError chan error, reader io.Reader) { scanner := bufio.NewScanner(reader) for scanner.Scan() { socket.SetWriteDeadline(time.Now().Add(socketWriteDeadlineWait)) logBytes := scanner.Bytes() - isLastLog := false - if !verbose { - logBytes, isLastLog = cleanLogBytes(logBytes) - } + if logBytes != nil { if !writeSocketBytes(logBytes, socket) { break } } - if isLastLog && checkForLastLog && !verbose { - break - } } select { @@ -416,43 +409,6 @@ func pumpStdout(socket *websocket.Conn, socketWriterError chan error, reader io. socket.Close() } -// Pod name is added when streaming from kubectl logs but not for cloudwatch logs, match it if it present and filter it out -var cortexRegex = regexp.MustCompile(`^(\[[A-Za-z0-9\d\-_\s]*\]\ )?(DEBUG|INFO|WARNING|ERROR|CRITICAL):cortex:`) -var lastLogRe = regexp.MustCompile(`^workload: (\w+), completed: (\S+)`) - -func extractFromCortexLog(match string, loglevel string, logStr string) (*string, bool) { - if loglevel == "DEBUG" { - return nil, false - } - - cutStr := logStr[len(match):] - - isLastLog := false - if lastLogRe.MatchString(cutStr) { - isLastLog = true - } - - return &cutStr, isLastLog -} - -func cleanLog(logStr string) (*string, bool) { - matches := cortexRegex.FindStringSubmatch(logStr) - if len(matches) == 3 { - return extractFromCortexLog(matches[0], matches[2], logStr) - } - - return nil, false -} - -func cleanLogBytes(logBytes []byte) ([]byte, bool) { - logStr := string(logBytes) - cleanLogStr, isLastLog := cleanLog(logStr) - if cleanLogStr == nil { - return nil, isLastLog - } - return []byte(*cleanLogStr), isLastLog -} - func stopProcess(process *os.Process) { process.Signal(os.Interrupt) time.Sleep(5 * time.Second) diff --git a/pkg/workloads/cortex/onnx_serve/api.py b/pkg/workloads/cortex/onnx_serve/api.py index ca0e67dfd3..067288a2c5 100644 --- a/pkg/workloads/cortex/onnx_serve/api.py +++ b/pkg/workloads/cortex/onnx_serve/api.py @@ -229,15 +229,10 @@ def predict(app_name, api_name): except CortexException as e: e.wrap("error", "sample {}".format(i + 1)) - logger.error(str(e)) - logger.exception( - "An error occurred, see `cx logs -v api {}` for more details.".format(api["name"]) - ) + logger.exception(str(e)) return prediction_failed(str(e)) except Exception as e: - logger.exception( - "An error occurred, see `cx logs -v api {}` for more details.".format(api["name"]) - ) + logger.exception(str(e)) return prediction_failed(str(e)) predictions.append(result) @@ -296,19 +291,13 @@ def start(args): except CortexException as e: e.wrap("error") logger.error(str(e)) - if api is not None: - logger.exception( - "An error occured starting the api, see `cx logs -v api {}` for more details".format( - api["name"] - ) - ) sys.exit(1) if api.get("tracker") is not None and api["tracker"].get("model_type") == "classification": try: local_cache["class_set"] = api_utils.get_classes(ctx, api["name"]) except Exception as e: - logger.warn("An error occurred while attempting to load classes", exc_info=True) + logger.warn("an error occurred while attempting to load classes", exc_info=True) serve(app, listen="*:{}".format(args.port)) diff --git a/pkg/workloads/cortex/tf_api/api.py b/pkg/workloads/cortex/tf_api/api.py index 1be937defd..e6b35c016e 100644 --- a/pkg/workloads/cortex/tf_api/api.py +++ b/pkg/workloads/cortex/tf_api/api.py @@ -257,19 +257,10 @@ def predict(deployment_name, api_name): result = run_predict(sample, debug) except CortexException as e: e.wrap("error", "sample {}".format(i + 1)) - logger.error(str(e)) - logger.exception( - "An error occurred, see `cortex logs -v api {}` for more details.".format( - api["name"] - ) - ) + logger.exception(str(e)) return prediction_failed(str(e)) except Exception as e: - logger.exception( - "An error occurred, see `cortex logs -v api {}` for more details.".format( - api["name"] - ) - ) + logger.exception(str(e)) return prediction_failed(str(e)) predictions.append(result) @@ -308,15 +299,10 @@ def get_signature(app_name, api_name): local_cache["api"]["tf_serving"]["signature_key"], ) except CortexException as e: - logger.error(str(e)) - logger.exception( - "An error occurred, see `cortex logs -v api {}` for more details.".format(api["name"]) - ) + logger.exception(str(e)) return str(e), HTTP_404_NOT_FOUND except Exception as e: - logger.exception( - "An error occurred, see `cortex logs -v api {}` for more details.".format(api["name"]) - ) + logger.exception(str(e)) return str(e), HTTP_404_NOT_FOUND response = {"signature": metadata} @@ -366,21 +352,16 @@ def start(args): local_cache["request_handler"] = ctx.get_request_handler_impl(api["name"]) except CortexException as e: e.wrap("error") - logger.error(str(e)) - logger.exception( - "An error occurred, see `cortex logs -v api {}` for more details.".format(api["name"]) - ) + logger.exception(str(e)) sys.exit(1) except Exception as e: - logger.exception( - "An error occurred, see `cortex logs -v api {}` for more details.".format(api["name"]) - ) + logger.exception(str(e)) sys.exit(1) try: validate_model_dir(args.model_dir) except Exception as e: - logger.exception(e) + logger.exception("failed to validate model") sys.exit(1) if api.get("tracker") is not None and api["tracker"].get("model_type") == "classification": @@ -400,16 +381,10 @@ def start(args): break except Exception as e: if i > 6: - logger.exception( - "An error occurred when reading model metadata, retrying... See `cortex logs -v api {}` for more details.".format( - api["name"] - ) - ) + logger.warn("an error occurred when reading model metadata, retrying...") if i == limit - 1: logger.exception( - "An error occurred when reading model metadata, retry limit exceeded. See `cortex logs -v api {}` for more details.".format( - api["name"] - ) + "an error occurred when reading model metadata: retry limit exceeded" ) sys.exit(1)