#!/bin/bash # Azure Kubernetes Node Addition Script # This script automates the process of adding new Azure VMs to an existing Kubernetes cluster set -e # Exit on any error # Colors for output RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' # No Color # Configuration SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" INVENTORY_FILE="freeleaps-ops/cluster/ansible/manifests/inventory.ini" KUBESPRAY_DIR="freeleaps-ops/3rd/kubespray" ANSIBLE_USER="wwwadmin@mathmast.com" # Function to print colored output print_status() { echo -e "${BLUE}[INFO]${NC} $1" } print_success() { echo -e "${GREEN}[SUCCESS]${NC} $1" } print_warning() { echo -e "${YELLOW}[WARNING]${NC} $1" } print_error() { echo -e "${RED}[ERROR]${NC} $1" } # Function to validate input validate_input() { if [[ -z "$1" ]]; then print_error "Input cannot be empty" return 1 fi return 0 } # Function to check prerequisites check_prerequisites() { print_status "Checking prerequisites..." # Check if kubectl is installed if ! command -v kubectl &> /dev/null; then print_error "kubectl is not installed" exit 1 fi # Check if ansible is installed if ! command -v ansible &> /dev/null; then print_error "ansible is not installed" exit 1 fi # Check if az CLI is installed if ! command -v az &> /dev/null; then print_error "Azure CLI is not installed" exit 1 fi # Check if inventory file exists if [[ ! -f "$INVENTORY_FILE" ]]; then print_error "Inventory file not found: $INVENTORY_FILE" exit 1 fi # Check if kubespray directory exists if [[ ! -d "$KUBESPRAY_DIR" ]]; then print_error "Kubespray directory not found: $KUBESPRAY_DIR" exit 1 fi print_success "All prerequisites are met" } # Function to get VM details from Azure get_vm_details() { local vm_name="$1" local resource_group="$2" print_status "Getting VM details from Azure..." # Get VM private IP local private_ip=$(az vm show --resource-group "$resource_group" --name "$vm_name" --query "privateIps" -o tsv 2>/dev/null) if [[ -z "$private_ip" ]]; then print_error "Failed to get private IP for VM: $vm_name" return 1 fi # Get VM power state local power_state=$(az vm show --resource-group "$resource_group" --name "$vm_name" --query "powerState" -o tsv 2>/dev/null) if [[ "$power_state" != "VM running" ]]; then print_warning "VM is not running. Current state: $power_state" read -p "Do you want to start the VM? (y/N): " -n 1 -r echo if [[ $REPLY =~ ^[Yy]$ ]]; then az vm start --resource-group "$resource_group" --name "$vm_name" print_status "Waiting for VM to start..." sleep 30 else print_error "VM must be running to proceed" return 1 fi fi echo "$private_ip" } # Function to test SSH connectivity test_ssh_connectivity() { local ip_address="$1" print_status "Testing SSH connectivity to $ip_address..." # Test SSH connection if timeout 10 ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "$ANSIBLE_USER@$ip_address" "echo 'SSH connection successful'" 2>/dev/null; then print_success "SSH connectivity verified" return 0 else print_error "SSH connection failed to $ip_address" print_warning "Please ensure:" print_warning "1. VM is running" print_warning "2. Network security group allows SSH (port 22)" print_warning "3. SSH service is running on the VM" return 1 fi } # Function to update inventory file update_inventory() { local vm_name="$1" local ip_address="$2" local node_type="$3" print_status "Updating inventory file..." # Create backup of inventory file cp "$INVENTORY_FILE" "${INVENTORY_FILE}.backup.$(date +%Y%m%d_%H%M%S)" # Add node to inventory based on type if [[ "$node_type" == "worker" ]]; then echo "$vm_name ansible_host=$ip_address ansible_user=$ANSIBLE_USER host_name=$vm_name" >> "$INVENTORY_FILE" print_success "Added worker node to inventory" elif [[ "$node_type" == "master" ]]; then echo "$vm_name ansible_host=$ip_address ansible_user=$ANSIBLE_USER etcd_member_name=${vm_name}-etcd host_name=$vm_name" >> "$INVENTORY_FILE" print_success "Added master node to inventory" else print_error "Invalid node type: $node_type" return 1 fi } # Function to verify inventory verify_inventory() { print_status "Verifying inventory configuration..." # Test inventory syntax if ansible-inventory -i "$INVENTORY_FILE" --list > /dev/null 2>&1; then print_success "Inventory syntax is valid" else print_error "Inventory syntax is invalid" return 1 fi # Test connectivity to all nodes print_status "Testing connectivity to all nodes..." if ansible -i "$INVENTORY_FILE" all -m ping -kK; then print_success "Connectivity to all nodes verified" else print_error "Connectivity test failed" return 1 fi } # Function to run kubespray scale playbook run_scale_playbook() { print_status "Running Kubespray scale playbook..." cd "$(dirname "$INVENTORY_FILE")" # Run the scale playbook if ansible-playbook -i inventory.ini "$KUBESPRAY_DIR/scale.yml" -kK -b; then print_success "Scale playbook completed successfully" else print_error "Scale playbook failed" return 1 fi } # Function to verify node addition verify_node_addition() { local vm_name="$1" print_status "Verifying node addition..." # Wait for node to appear local max_attempts=30 local attempt=1 while [[ $attempt -le $max_attempts ]]; do if kubectl get nodes | grep -q "$vm_name"; then print_success "Node $vm_name found in cluster" break fi print_status "Waiting for node to appear... (attempt $attempt/$max_attempts)" sleep 10 ((attempt++)) done if [[ $attempt -gt $max_attempts ]]; then print_error "Node $vm_name did not appear in cluster" return 1 fi # Wait for node to be ready attempt=1 while [[ $attempt -le $max_attempts ]]; do local node_status=$(kubectl get nodes "$vm_name" -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null) if [[ "$node_status" == "True" ]]; then print_success "Node $vm_name is ready" break fi print_status "Waiting for node to be ready... (attempt $attempt/$max_attempts)" sleep 10 ((attempt++)) done if [[ $attempt -gt $max_attempts ]]; then print_error "Node $vm_name is not ready" kubectl describe node "$vm_name" return 1 fi } # Function to test pod scheduling test_pod_scheduling() { local vm_name="$1" print_status "Testing pod scheduling on new node..." # Create a test pod local test_pod_name="test-pod-$(date +%s)" kubectl run "$test_pod_name" --image=nginx --restart=Never --overrides="{\"spec\":{\"nodeSelector\":{\"kubernetes.io/hostname\":\"$vm_name\"}}}" # Wait for pod to be scheduled local max_attempts=30 local attempt=1 while [[ $attempt -le $max_attempts ]]; do local pod_status=$(kubectl get pod "$test_pod_name" -o jsonpath='{.status.phase}' 2>/dev/null) if [[ "$pod_status" == "Running" ]]; then print_success "Test pod is running on node $vm_name" break fi print_status "Waiting for test pod to be ready... (attempt $attempt/$max_attempts)" sleep 10 ((attempt++)) done # Clean up test pod kubectl delete pod "$test_pod_name" if [[ $attempt -gt $max_attempts ]]; then print_error "Test pod failed to run on node $vm_name" kubectl describe pod "$test_pod_name" kubectl delete pod "$test_pod_name" return 1 fi } # Function to display final status display_final_status() { local vm_name="$1" print_success "Node addition completed successfully!" echo echo "=== Final Status ===" echo "Node Name: $vm_name" echo "Node Status: $(kubectl get nodes "$vm_name" -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}')" echo "Node IP: $(kubectl get nodes "$vm_name" -o jsonpath='{.status.addresses[?(@.type=="InternalIP")].address}')" echo "Node Capacity: $(kubectl get nodes "$vm_name" -o jsonpath='{.status.capacity.cpu}') CPU, $(kubectl get nodes "$vm_name" -o jsonpath='{.status.capacity.memory}') Memory" echo echo "=== Next Steps ===" echo "1. Monitor the node for any issues" echo "2. Update monitoring and alerting if needed" echo "3. Update documentation" echo "4. Consider running node maintenance tasks" } # Main function main() { echo "==========================================" echo "Azure Kubernetes Node Addition Script" echo "==========================================" echo # Check prerequisites check_prerequisites # Get user input echo "Please provide the following information:" echo read -p "VM Name: " vm_name validate_input "$vm_name" || exit 1 read -p "Resource Group: " resource_group validate_input "$resource_group" || exit 1 read -p "Node Type (worker/master): " node_type if [[ "$node_type" != "worker" && "$node_type" != "master" ]]; then print_error "Node type must be 'worker' or 'master'" exit 1 fi echo print_status "Summary:" echo " VM Name: $vm_name" echo " Resource Group: $resource_group" echo " Node Type: $node_type" echo read -p "Proceed with node addition? (y/N): " -n 1 -r echo if [[ ! $REPLY =~ ^[Yy]$ ]]; then print_status "Operation cancelled" exit 0 fi # Get VM details ip_address=$(get_vm_details "$vm_name" "$resource_group") if [[ $? -ne 0 ]]; then exit 1 fi print_success "VM IP Address: $ip_address" # Test SSH connectivity test_ssh_connectivity "$ip_address" || exit 1 # Update inventory update_inventory "$vm_name" "$ip_address" "$node_type" || exit 1 # Verify inventory verify_inventory || exit 1 # Run scale playbook run_scale_playbook || exit 1 # Verify node addition verify_node_addition "$vm_name" || exit 1 # Test pod scheduling test_pod_scheduling "$vm_name" || exit 1 # Display final status display_final_status "$vm_name" } # Handle script arguments if [[ $# -eq 0 ]]; then main else case "$1" in --help|-h) echo "Usage: $0 [OPTIONS]" echo echo "Options:" echo " --help, -h Show this help message" echo " --version, -v Show version information" echo echo "This script automates the process of adding new Azure VMs to an existing Kubernetes cluster." echo "It will prompt for necessary information and guide you through the process." exit 0 ;; --version|-v) echo "Azure Kubernetes Node Addition Script v1.0" exit 0 ;; *) print_error "Unknown option: $1" echo "Use --help for usage information" exit 1 ;; esac fi