Skip to content

Adding aws-az-experiment, Disable availability zones for load-balancer #36

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Feb 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions bin/experiment/experiment.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#!/usr/bin/env python3

import experiments.aws_az.aws_az_chaos.experiment.aws_az_chaos as aws_az_chaos
import experiments.generic.pod_delete.experiment.pod_delete as pod_delete
import argparse
import logging
Expand All @@ -23,6 +24,8 @@ def main():
# invoke the corresponding experiment based on the the (-name) flag
if args.name == "pod-delete":
pod_delete.PodDelete(clients)
elif args.name == "aws-az-chaos":
aws_az_chaos.AwsAzExperiment(clients)
else:
logging.error("Unsupported -name %s, please provide the correct value of -name args", args.name)
return
Expand Down
Empty file.
Empty file.
145 changes: 145 additions & 0 deletions chaosLib/litmus/aws_az_chaos/lib/aws_az_chaos.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@

import pkg.utils.common.common as common
import pkg.types.types as types
import pkg.events.events as events
import logging
from datetime import datetime
import pkg.maths.maths as maths

#PrepareAWSAZExperiment contains the prepration steps before chaos injection
def PrepareAWSAZExperiment(experimentsDetails , resultDetails, eventsDetails, chaosDetails, clients, statusAws):

# Waiting for the ramp time before chaos injection
if experimentsDetails.RampTime != 0 :
logging.info("[Ramp]: Waiting for the %s ramp time before injecting chaos",experimentsDetails.RampTime)
common.WaitForDuration(experimentsDetails.RampTime)

# mode for chaos injection
if experimentsDetails.Sequence.lower() == "serial":
err = injectChaosInSerialMode(experimentsDetails, chaosDetails, eventsDetails, resultDetails, clients, statusAws)
if err != None:
return err
elif experimentsDetails.Sequence.lower() == "parallel":
err = injectChaosInParallelMode(experimentsDetails, chaosDetails, eventsDetails, resultDetails, clients, statusAws)
if err != None:
return err
else:
return ValueError("{} sequence is not supported".format(experimentsDetails.Sequence))

# Waiting for the ramp time after chaos injection
if experimentsDetails.RampTime != 0 :
logging.info("[Ramp]: Waiting for the %s ramp time after injecting chaos",experimentsDetails.RampTime)
common.WaitForDuration(experimentsDetails.RampTime)

return None

# injectChaosInSerialMode disable the target available zone from loadbalancer in serial mode(one by one)
def injectChaosInSerialMode(experimentsDetails , chaosDetails , eventsDetails , resultDetails, clients, statusAws):

#ChaosStartTimeStamp contains the start timestamp, when the chaos injection begin
ChaosStartTimeStamp = datetime.now()
duration = (datetime.now() - ChaosStartTimeStamp).seconds

while duration < experimentsDetails.ChaosDuration:

# Get the target available zones for the chaos execution
targetZones = experimentsDetails.LoadBalancerZones.split(",")

logging.info("[Info]: Target available zone list, %s", targetZones)

if experimentsDetails.EngineName != "" :
msg = "Injecting " + experimentsDetails.ExperimentName + " chaos on available zone"
types.SetEngineEventAttributes(eventsDetails, types.ChaosInject, msg, "Normal", chaosDetails)
events.GenerateEvents(eventsDetails, chaosDetails, "ChaosEngine", clients)

# Detaching the target zones from loa balancer
for azone in targetZones:

logging.info("[Info]: Detaching the following zone(s), Zone Name : %s", azone)
err = statusAws.detachAZfromLB(experimentsDetails, azone)
if err != None:
return err
if chaosDetails.Randomness:
err = common.RandomInterval(experimentsDetails.ChaosInterval)
if err != None:
return err
else:
#Waiting for the chaos interval after chaos injection
if experimentsDetails.ChaosInterval != "":
logging.info("[Wait]: Wait for the chaos interval %s",(experimentsDetails.ChaosInterval))
waitTime = maths.atoi(experimentsDetails.ChaosInterval)
common.WaitForDuration(waitTime)

# Attaching the target available zone after the chaos injection
logging.info("[Status]: Attach the available zone back to load balancer")
err = statusAws.attachAZtoLB(experimentsDetails, azone)
if err != None:
return err

#Verify the status of available zone after the chaos injection
logging.info("[Status]: Checking AWS load balancer's AZ status")
err = statusAws.CheckAWSStatus(experimentsDetails)
if err != None:
return err

duration = (datetime.now() - ChaosStartTimeStamp).seconds

logging.info("[Completion]: %s chaos is done",(experimentsDetails.ExperimentName))

return None

# injectChaosInParallelMode disable the target available zone from loadbalancer in parallel mode (all at once)
def injectChaosInParallelMode(experimentsDetails , chaosDetails , eventsDetails , resultDetails, clients, statusAws):


#ChaosStartTimeStamp contains the start timestamp, when the chaos injection begin
ChaosStartTimeStamp = datetime.now()
duration = (datetime.now() - ChaosStartTimeStamp).seconds

while duration < experimentsDetails.ChaosDuration:

# Get the target available zone details for the chaos execution
targetZones = experimentsDetails.LoadBalancerZones.split(",")
logging.info("[Info]: Target available zone list, %s", targetZones)

if experimentsDetails.EngineName != "" :
msg = "Injecting " + experimentsDetails.ExperimentName + " chaos on available zone"
types.SetEngineEventAttributes(eventsDetails, types.ChaosInject, msg, "Normal", chaosDetails)
events.GenerateEvents(eventsDetails, chaosDetails, "ChaosEngine",clients)

# Detaching the target zones from loa balancer
for azone in targetZones:
logging.info("[Info]: Detaching the following zone(s), Zone Name %s", azone)
err = statusAws.detachAZfromLB(experimentsDetails, azone)
if err != None:
return err

if chaosDetails.Randomness:
err = common.RandomInterval(experimentsDetails.ChaosInterval)
if err != None:
return err
else:
#Waiting for the chaos interval after chaos injection
if experimentsDetails.ChaosInterval != "" :
logging.info("[Wait]: Wait for the chaos interval %s", experimentsDetails.ChaosInterval)
waitTime = maths.atoi(experimentsDetails.ChaosInterval)
common.WaitForDuration(waitTime)

# Attaching the target available zone after the chaos injection
logging.info("[Status]: Attach the available zone back to load balancer")
for azone in targetZones:
err = statusAws.attachAZtoLB(experimentsDetails, azone)
if err != None:
return err

#Verify the status of available zone after the chaos injection
logging.info("[Status]: Checking AWS load balancer's AZ status")
err = statusAws.CheckAWSStatus(experimentsDetails)
if err != None:
return err

duration = (datetime.now() - ChaosStartTimeStamp).seconds

logging.info("[Completion]: %s chaos is done",(experimentsDetails.ExperimentName))

return None
Empty file.
Empty file.
Empty file.
129 changes: 129 additions & 0 deletions experiments/aws_az/aws_az_chaos/experiment/aws_az_chaos.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
import pkg.types.types as types
import pkg.aws_az.types.types as experimentDetails
import pkg.aws_az.environment.environment as experimentEnv
import pkg.events.events as events
import logging
import chaosLib.litmus.aws_az_chaos.lib.aws_az_chaos as litmusLIB
import pkg.result.chaosresult as chaosResults
import pkg.utils.common.common as common
import pkg.aws_status.status as awsStatus

# AwsAzExperiment contains steps to inject chaos
def AwsAzExperiment(clients):

# Initialising expermentDetails, resultDetails, eventsDetails, chaosDetails, status and result objects
experimentsDetails = experimentDetails.ExperimentDetails()
resultDetails = types.ResultDetails()
eventsDetails = types.EventDetails()
chaosDetails = types.ChaosDetails()
result = chaosResults.ChaosResults()
statusAws = awsStatus.AWS_AZ()

#Fetching all the ENV passed from the runner pod
experimentEnv.GetENV(experimentsDetails)

logging.info("[PreReq]: Initialise Chaos Variables for the %s experiment", experimentsDetails.ExperimentName)

# Intialise the chaos attributes
experimentEnv.InitialiseChaosVariables(chaosDetails, experimentsDetails)

# Intialise Chaos Result Parameters
types.SetResultAttributes(resultDetails, chaosDetails)

#Updating the chaos result in the beginning of experiment
logging.info("[PreReq]: Updating the chaos result of %s experiment (SOT)",(experimentsDetails.ExperimentName))
err = result.ChaosResult(chaosDetails, resultDetails, "SOT", clients)
if err != None:
logging.error("Unable to Create the Chaos Result, err: %s",(err))
failStep = "Updating the chaos result of aws-az-chaos experiment (SOT)"
result.RecordAfterFailure(chaosDetails, resultDetails, failStep, eventsDetails, clients)
return

# Set the chaos result uid
result.SetResultUID(resultDetails, chaosDetails, clients)

# generating the event in chaosresult to marked the verdict as awaited
msg = "Experiment " + experimentsDetails.ExperimentName + ", Result Awaited"
types.SetResultEventAttributes(eventsDetails, types.AwaitedVerdict, msg, "Normal", resultDetails)
events.GenerateEvents(eventsDetails, chaosDetails, "ChaosResult", clients)

# DISPLAY THE LOADBALANCER INFORMATION
logging.info("[Info]: The application information is as follows LoadBalancer Name=%s, LoadBalancer Zones=%s, Ramp Time=%s",experimentsDetails.LoadBalancerName,experimentsDetails.LoadBalancerZones,experimentsDetails.RampTime)

# Calling AbortWatcher, it will continuously watch for the abort signal and generate the required and result
common.AbortWatcher(experimentsDetails.ExperimentName, resultDetails, chaosDetails, eventsDetails, clients)

# PRE-CHAOS APPLICATION STATUS CHECK
logging.info("[Status]: Verify that the AUT (Application Under Test) is running (pre-chaos)")
err = statusAws.CheckAWSStatus(experimentsDetails)
if err != None:
logging.error("Target available zone status check failed, err: %s", err)
failStep = "Verify that the AUT (Application Under Test) is running (pre-chaos)"
result.RecordAfterFailure(chaosDetails, resultDetails, failStep, eventsDetails, clients)
return

if experimentsDetails.EngineName != "":
# marking AUT as running, as we already checked the status of application under test
msg = "AUT: Running"
# generating the for the pre-chaos check
types.SetEngineEventAttributes(eventsDetails, types.PreChaosCheck, msg, "Normal", chaosDetails)
events.GenerateEvents(eventsDetails, chaosDetails, "ChaosEngine", clients)

# Including the litmus lib for aws-az-chaos
if experimentsDetails.ChaosLib == "litmus" :
err = litmusLIB.PrepareAWSAZExperiment(experimentsDetails, resultDetails, eventsDetails, chaosDetails, clients, statusAws)
if err != None:
logging.error("Chaos injection failed, err: %s",(err))
failStep = "failed in chaos injection phase"
result.RecordAfterFailure(chaosDetails, resultDetails, failStep, eventsDetails, clients)
return

else:
logging.info("[Invalid]: Please Provide the correct LIB")
failStep = "no match found for specified lib"
result.RecordAfterFailure(chaosDetails, resultDetails, failStep, eventsDetails, clients)
return

logging.info("[Confirmation]: %s chaos has been injected successfully", experimentsDetails.ExperimentName)
resultDetails.Verdict = "Pass"

# POST-CHAOS APPLICATION STATUS CHECK
logging.info("[Status]: Verify that the AUT (Application Under Test) is running (post-chaos)")
err = statusAws.CheckAWSStatus(experimentsDetails)
if err != None:
logging.error("Target aws instance status check failed, err: %s", err)
failStep = "Verify that the AUT (Application Under Test) is running (post-chaos)"
result.RecordAfterFailure(chaosDetails, resultDetails, failStep, eventsDetails, clients)
return


if experimentsDetails.EngineName != "" :
# marking AUT as running, as we already checked the status of application under test
msg = "AUT: Running"

# generating post chaos event
types.SetEngineEventAttributes(eventsDetails, types.PostChaosCheck, msg, "Normal", chaosDetails)
events.GenerateEvents(eventsDetails, chaosDetails, "ChaosEngine", clients)


#Updating the chaosResult in the end of experiment
logging.info("[The End]: Updating the chaos result of %s experiment (EOT)", experimentsDetails.ExperimentName)
err = result.ChaosResult(chaosDetails, resultDetails, "EOT", clients)
if err != None:
logging.error("Unable to Update the Chaos Result, err: %s", err)
return

# generating the event in chaosresult to marked the verdict as pass/fail
msg = "Experiment " + experimentsDetails.ExperimentName + ", Result " + resultDetails.Verdict
reason = types.PassVerdict
eventType = "Normal"
if resultDetails.Verdict != "Pass":
reason = types.FailVerdict
eventType = "Warning"

types.SetResultEventAttributes(eventsDetails, reason, msg, eventType, resultDetails)
events.GenerateEvents(eventsDetails, chaosDetails, "ChaosResult", clients)
if experimentsDetails.EngineName != "":
msg = experimentsDetails.ExperimentName + " experiment has been " + resultDetails.Verdict + "ed"
types.SetEngineEventAttributes(eventsDetails, types.Summary, msg, "Normal", chaosDetails)
events.GenerateEvents(eventsDetails, chaosDetails, "ChaosEngine", clients)
64 changes: 64 additions & 0 deletions experiments/aws_az/aws_az_chaos/test/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: litmus-experiment
spec:
replicas: 1
selector:
matchLabels:
app: litmus-experiment
template:
metadata:
labels:
app: litmus-experiment
spec:
serviceAccountName: aws_az_chaos-sa
containers:
- name: gotest
image: busybox
command:
- sleep
- "3600"
env:
- name: LOAD_BALANCER_NAME
value: ''

- name: LOAD_BALANCER_ZONES
value: ''

- name: LOAD_BALANCERNAME_ARN
value: ''

- name: AWS_SHARED_CREDENTIALS_FILE
value: ""

- name: TOTAL_CHAOS_DURATION
value: ''

- name: AWS_REGIONS
value: ""

## Period to wait before injection of chaos in sec
- name: RAMP_TIME
value: ''

## env var that describes the library used to execute the chaos
## default: litmus. Supported values: litmus, powerfulseal, chaoskube
- name: LIB
value: ''

# provide the chaos namespace
- name: CHAOS_NAMESPACE
value: ''

- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name

- name: CHAOS_SERVICE_ACCOUNT
valueFrom:
fieldRef:
fieldPath: spec.serviceAccountName

Empty file added pkg/aws_az/__init__.py
Empty file.
Empty file.
Loading