Skip to content

Commit 0dab2d7

Browse files
authored
Reworking logs (#1778)
1 parent 93f2947 commit 0dab2d7

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+1180
-1083
lines changed

CONTRIBUTING.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ image_metrics_server: <account_id>.dkr.ecr.<region>.amazonaws.com/cortexlabs/met
186186
image_inferentia: <account_id>.dkr.ecr.<region>.amazonaws.com/cortexlabs/inferentia:latest
187187
image_neuron_rtd: <account_id>.dkr.ecr.<region>.amazonaws.com/cortexlabs/neuron-rtd:latest
188188
image_nvidia: <account_id>.dkr.ecr.<region>.amazonaws.com/cortexlabs/nvidia:latest
189-
image_fluentd: <account_id>.dkr.ecr.<region>.amazonaws.com/cortexlabs/fluentd:latest
189+
image_fluent_bit: <account_id>.dkr.ecr.<region>.amazonaws.com/cortexlabs/fluent_bit:latest
190190
image_statsd: <account_id>.dkr.ecr.<region>.amazonaws.com/cortexlabs/statsd:latest
191191
image_istio_proxy: <account_id>.dkr.ecr.<region>.amazonaws.com/cortexlabs/istio-proxy:latest
192192
image_istio_pilot: <account_id>.dkr.ecr.<region>.amazonaws.com/cortexlabs/istio-pilot:latest

build/images.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ non_dev_images_cluster=(
6767
"operator"
6868
"istio-proxy"
6969
"istio-pilot"
70+
"fluent-bit"
7071
)
7172
non_dev_images_aws=(
7273
# includes non_dev_images_cluster
@@ -76,7 +77,6 @@ non_dev_images_aws=(
7677
"inferentia"
7778
"neuron-rtd"
7879
"nvidia"
79-
"fluentd"
8080
"statsd"
8181
)
8282
non_dev_images_gcp=(

cli/cluster/logs.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,11 @@ import (
2525
"os/signal"
2626
"strings"
2727

28+
"github.com/cortexlabs/cortex/cli/lib/routines"
2829
"github.com/cortexlabs/cortex/pkg/consts"
2930
"github.com/cortexlabs/cortex/pkg/lib/errors"
3031
"github.com/cortexlabs/cortex/pkg/lib/exit"
3132
"github.com/cortexlabs/cortex/pkg/lib/json"
32-
"github.com/cortexlabs/cortex/pkg/lib/routines"
3333
"github.com/cortexlabs/cortex/pkg/operator/schema"
3434
"github.com/gorilla/websocket"
3535
)
@@ -120,7 +120,7 @@ func handleConnection(connection *websocket.Conn, done chan struct{}) {
120120
if err != nil {
121121
exit.Error(ErrorOperatorSocketRead(err))
122122
}
123-
fmt.Println(string(message))
123+
fmt.Print(string(message))
124124
}
125125
}, false)
126126
}

cli/cmd/cluster_gcp.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,7 @@ func createGKECluster(clusterConfig *clusterconfig.GCPConfig, gcpClient *gcp.Cli
435435
gkeClusterConfig := containerpb.Cluster{
436436
Name: clusterConfig.ClusterName,
437437
InitialClusterVersion: "1.17",
438+
LoggingService: "none",
438439
NodePools: []*containerpb.NodePool{
439440
{
440441
Name: "ng-cortex-operator",

cli/cmd/lib_cluster_config_aws.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -383,10 +383,10 @@ func setConfigFieldsFromCached(userClusterConfig *clusterconfig.Config, cachedCl
383383
}
384384
userClusterConfig.ImageNvidia = cachedClusterConfig.ImageNvidia
385385

386-
if s.Obj(cachedClusterConfig.ImageFluentd) != s.Obj(userClusterConfig.ImageFluentd) {
387-
return clusterconfig.ErrorConfigCannotBeChangedOnUpdate(clusterconfig.ImageFluentdKey, cachedClusterConfig.ImageFluentd)
386+
if s.Obj(cachedClusterConfig.ImageFluentBit) != s.Obj(userClusterConfig.ImageFluentBit) {
387+
return clusterconfig.ErrorConfigCannotBeChangedOnUpdate(clusterconfig.ImageFluentBitKey, cachedClusterConfig.ImageFluentBit)
388388
}
389-
userClusterConfig.ImageFluentd = cachedClusterConfig.ImageFluentd
389+
userClusterConfig.ImageFluentBit = cachedClusterConfig.ImageFluentBit
390390

391391
if s.Obj(cachedClusterConfig.ImageStatsd) != s.Obj(userClusterConfig.ImageStatsd) {
392392
return clusterconfig.ErrorConfigCannotBeChangedOnUpdate(clusterconfig.ImageStatsdKey, cachedClusterConfig.ImageStatsd)
@@ -691,8 +691,8 @@ func clusterConfigConfirmationStr(clusterConfig clusterconfig.Config, awsCreds A
691691
if clusterConfig.ImageNvidia != defaultConfig.ImageNvidia {
692692
items.Add(clusterconfig.ImageNvidiaUserKey, clusterConfig.ImageNvidia)
693693
}
694-
if clusterConfig.ImageFluentd != defaultConfig.ImageFluentd {
695-
items.Add(clusterconfig.ImageFluentdUserKey, clusterConfig.ImageFluentd)
694+
if clusterConfig.ImageFluentBit != defaultConfig.ImageFluentBit {
695+
items.Add(clusterconfig.ImageFluentBitUserKey, clusterConfig.ImageFluentBit)
696696
}
697697
if clusterConfig.ImageStatsd != defaultConfig.ImageStatsd {
698698
items.Add(clusterconfig.ImageStatsdUserKey, clusterConfig.ImageStatsd)

cli/cmd/lib_manager.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,13 @@ import (
2828
"syscall"
2929
"time"
3030

31+
"github.com/cortexlabs/cortex/cli/lib/routines"
3132
"github.com/cortexlabs/cortex/pkg/consts"
3233
"github.com/cortexlabs/cortex/pkg/lib/archive"
3334
"github.com/cortexlabs/cortex/pkg/lib/docker"
3435
"github.com/cortexlabs/cortex/pkg/lib/errors"
3536
"github.com/cortexlabs/cortex/pkg/lib/exit"
3637
"github.com/cortexlabs/cortex/pkg/lib/files"
37-
"github.com/cortexlabs/cortex/pkg/lib/routines"
3838
"github.com/cortexlabs/cortex/pkg/types/clusterconfig"
3939
"github.com/cortexlabs/yaml"
4040
dockertypes "github.com/docker/docker/api/types"

cli/cmd/logs.go

Lines changed: 28 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -17,28 +17,27 @@ limitations under the License.
1717
package cmd
1818

1919
import (
20-
"fmt"
21-
"net/http"
22-
"net/url"
23-
2420
"github.com/cortexlabs/cortex/cli/cluster"
25-
"github.com/cortexlabs/cortex/pkg/lib/console"
2621
"github.com/cortexlabs/cortex/pkg/lib/exit"
22+
"github.com/cortexlabs/cortex/pkg/lib/prompt"
2723
"github.com/cortexlabs/cortex/pkg/lib/telemetry"
28-
"github.com/cortexlabs/cortex/pkg/types"
2924
"github.com/spf13/cobra"
3025
)
3126

32-
var _flagLogsEnv string
27+
var (
28+
_flagLogsEnv string
29+
_flagLogsDisallowPrompt bool
30+
)
3331

3432
func logsInit() {
3533
_logsCmd.Flags().SortFlags = false
3634
_logsCmd.Flags().StringVarP(&_flagLogsEnv, "env", "e", "", "environment to use")
35+
_logsCmd.Flags().BoolVarP(&_flagLogsDisallowPrompt, "yes", "y", false, "skip prompts")
3736
}
3837

3938
var _logsCmd = &cobra.Command{
4039
Use: "logs API_NAME [JOB_ID]",
41-
Short: "stream logs from an api",
40+
Short: "stream logs from a single replica of an api or a single worker for a job",
4241
Args: cobra.RangeArgs(1, 2),
4342
Run: func(cmd *cobra.Command, args []string) {
4443
envName, err := getEnvFromFlag(_flagLogsEnv)
@@ -59,43 +58,39 @@ var _logsCmd = &cobra.Command{
5958
exit.Error(err)
6059
}
6160

61+
operatorConfig := MustGetOperatorConfig(env.Name)
6262
apiName := args[0]
63-
if env.Provider == types.AWSProviderType {
64-
if len(args) == 1 {
65-
err := cluster.StreamLogs(MustGetOperatorConfig(env.Name), apiName)
66-
if err != nil {
67-
exit.Error(err)
68-
}
63+
64+
if len(args) == 1 {
65+
apiResponse, err := cluster.GetAPI(operatorConfig, apiName)
66+
if err != nil {
67+
exit.Error(err)
6968
}
70-
if len(args) == 2 {
71-
err := cluster.StreamJobLogs(MustGetOperatorConfig(env.Name), apiName, args[1])
72-
if err != nil {
73-
exit.Error(err)
74-
}
69+
70+
if apiResponse[0].Status.Requested > 1 && !_flagLogsDisallowPrompt {
71+
prompt.YesOrExit("logs from a single random replica will be streamed\n\nfor aggregated logs please visit your cloud provider's logging dashboard; see https://docs.cortex.dev for details", "", "")
7572
}
76-
}
7773

78-
if env.Provider == types.GCPProviderType {
79-
gcpLogsResponse, err := cluster.GetGCPLogsURL(MustGetOperatorConfig(env.Name), apiName)
74+
err = cluster.StreamLogs(operatorConfig, apiName)
8075
if err != nil {
8176
exit.Error(err)
8277
}
83-
84-
gcpReq, err := http.NewRequest("GET", "https://console.cloud.google.com/logs/query", nil)
78+
}
79+
if len(args) == 2 {
80+
jobResponse, err := cluster.GetJob(operatorConfig, apiName, args[1])
8581
if err != nil {
8682
exit.Error(err)
8783
}
88-
query := ""
89-
for q, v := range gcpLogsResponse.QueryParams {
90-
query += fmt.Sprintf("%s=\"%s\"\n", q, v)
84+
85+
if jobResponse.JobStatus.Job.Workers > 1 && !_flagLogsDisallowPrompt {
86+
prompt.YesOrExit("logs from a single random worker will be streamed\n\nfor aggregated logs please visit your cloud provider's logging dashboard; see https://docs.cortex.dev for details", "", "")
9187
}
92-
queryValues := make(url.Values)
93-
queryValues.Add("query", query)
94-
gcpReq.URL.RawQuery = queryValues.Encode()
9588

96-
gcpLogsURL := gcpReq.URL.String()
97-
consoleOutput := console.Bold(fmt.Sprintf("visit the following link to view logs for api %s: ", apiName)) + gcpLogsURL
98-
fmt.Println(consoleOutput)
89+
err = cluster.StreamJobLogs(operatorConfig, apiName, args[1])
90+
if err != nil {
91+
exit.Error(err)
92+
}
9993
}
94+
10095
},
10196
}
File renamed without changes.

dev/versions.md

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -235,12 +235,11 @@ Note: overriding horizontal-pod-autoscaler-sync-period on EKS is currently not s
235235
1. In the [GitHub Repo](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/aws), set the tree to the tag for the chosen release, and open `cloudprovider/aws/examples/cluster-autoscaler-autodiscover.yaml` (e.g. <https://github.com/kubernetes/autoscaler/blob/cluster-autoscaler-1.16.5/cluster-autoscaler/cloudprovider/aws/examples/cluster-autoscaler-autodiscover.yaml>)
236236
1. Resolve merge conflicts with the template in `manager/manifests/cluster-autoscaler.yaml.j2`
237237

238-
## Fluentd
238+
## FluentBit
239239

240-
1. Find the latest release on [Dockerhub](https://hub.docker.com/r/fluent/fluentd-kubernetes-daemonset/)
241-
1. Update the base image version in `images/fluentd/Dockerfile`
242-
1. Update record-modifier in `images/fluentd/Dockerfile` to the latest version [here](https://github.com/repeatedly/fluent-plugin-record-modifier/blob/master/VERSION)
243-
1. Update `fluentd.yaml` as necessary (make sure to maintain all Cortex environment variables)
240+
1. Find the latest release on [Dockerhub](https://hub.docker.com/r/amazon/aws-for-fluent-bit/tags?page=1&ordering=last_updated)
241+
1. Update the base image version in `images/fluent-bit/Dockerfile`
242+
1. Update `fluent-bit.yaml` as necessary (make sure to maintain all Cortex environment variables)
244243

245244
## Statsd
246245

@@ -258,7 +257,7 @@ Note: overriding horizontal-pod-autoscaler-sync-period on EKS is currently not s
258257
## kubectl
259258

260259
1. Find the latest release [here](https://storage.googleapis.com/kubernetes-release/release/stable.txt)
261-
1. Update the version in `images/manager/Dockerfile`
260+
1. Update the version in `images/manager/Dockerfile` and `images/operator/Dockerfile`
262261
1. Update your local version and alert developers
263262
* Linux:
264263
1. `curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl`

docs/clients/cli.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,13 +35,14 @@ Flags:
3535
## logs
3636

3737
```text
38-
stream logs from an api
38+
stream logs from a single replica of an api or a single worker for a job
3939
4040
Usage:
4141
cortex logs API_NAME [JOB_ID] [flags]
4242
4343
Flags:
4444
-e, --env string environment to use
45+
-y, --yes skip prompts
4546
-h, --help help for logs
4647
```
4748

0 commit comments

Comments
 (0)