#!/bin/bash # ============================================================================= # One-click deployment script for NPU auto-occupancy monitoring system # ============================================================================= set -e BASE_URL="https://tools.mindspore.cn/tools/ci/watchdog" SCRIPTS=( "npu_auto_occupy.sh" "occupy_npu_8cards.py" "start_occupy.sh" "stop_occupy.sh" "watchdog.sh" ) DEPLOY_DIR="/home/jenkins/.npu-scripts" LOG_FILE="/tmp/deploy_npu_scripts.log" # ---------- Proxy configuration ---------- PROXY_URL="http://w60090183:925552414%40Wy@proxyhk.huawei.com:8080" export http_proxy="$PROXY_URL" export https_proxy="$PROXY_URL" # ----------------------------------------- log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" | tee -a "$LOG_FILE" } check_root() { if [ "$EUID" -ne 0 ]; then echo "Error: Please run this script as root." exit 1 fi } create_jenkins_group() { if getent group jenkins >/dev/null 2>&1; then log "Group jenkins already exists." else log "Creating group jenkins ..." groupadd jenkins fi } create_jenkins_user() { if id "jenkins" &>/dev/null; then log "User jenkins already exists." current_group=$(id -gn jenkins) if [ "$current_group" != "jenkins" ]; then log "Changing jenkins primary group to jenkins ..." usermod -g jenkins jenkins fi else log "Creating user jenkins with home directory /home/jenkins, primary group jenkins ..." useradd -m -d /home/jenkins -g jenkins -s /bin/bash jenkins log "User jenkins created successfully." fi } ensure_jenkins_home() { if [ ! -d "/home/jenkins" ]; then log "Directory /home/jenkins does not exist, creating..." mkdir -p /home/jenkins fi log "Setting /home/jenkins owner to jenkins:jenkins ..." chown -R jenkins:jenkins /home/jenkins } grant_crontab_permission() { local cron_allow="/etc/cron.allow" if [ -f "$cron_allow" ]; then if ! grep -q "^jenkins$" "$cron_allow"; then log "Adding jenkins to $cron_allow ..." echo "jenkins" >> "$cron_allow" else log "jenkins already exists in $cron_allow, skipping." fi else log "$cron_allow does not exist, all users have crontab permissions by default." fi } create_watchdog_dir() { log "Creating deployment directory $DEPLOY_DIR ..." mkdir -p "$DEPLOY_DIR" chown jenkins:jenkins "$DEPLOY_DIR" } download_scripts() { cd "$DEPLOY_DIR" for script in "${SCRIPTS[@]}"; do local url="$BASE_URL/$script" log "Downloading $url -> $script" if command -v wget &>/dev/null; then wget --no-check-certificate -q "$url" -O "$script" || { log "Error: Failed to download $script" exit 1 } elif command -v curl &>/dev/null; then curl -k -s -o "$script" "$url" || { log "Error: Failed to download $script" exit 1 } else log "Error: Neither wget nor curl found on the system." exit 1 fi done log "All scripts downloaded successfully." } set_permissions() { cd "$DEPLOY_DIR" chown -R jenkins:jenkins . chmod -R 755 . chmod +x *.py 2>/dev/null || true log "Execution permissions added and ownership fixed for all scripts." } setup_crontab() { local cron_job="* * * * * $DEPLOY_DIR/watchdog.sh" log "Configuring crontab for jenkins user ..." sudo -u jenkins bash </dev/null | grep -v "watchdog.sh"; echo "$cron_job") | crontab - EOF log "Crontab configured successfully." } start_watchdog() { log "Starting watchdog in background ..." sudo -u jenkins nohup "$DEPLOY_DIR/watchdog.sh" >> "$DEPLOY_DIR/watchdog.log" 2>&1 & log "Watchdog started, PID: $!" } setup_stopwatchdog() { local cmd_path="/usr/local/bin/stopwatchdog" log "Creating global command $cmd_path ..." cat > "$cmd_path" << 'EOF' #!/bin/bash # Stop NPU occupancy process (does not affect monitoring script) pkill -f "occupy_npu_8cards.py" && echo "Occupancy process stopped" || echo "No running occupancy process found" EOF chmod 755 "$cmd_path" log "stopwatchdog command created. Users can type 'stopwatchdog' to stop NPU occupancy." } show_summary() { log "==================== Deployment Complete ====================" log "Deployment directory: $DEPLOY_DIR" log "Deployed scripts:" for s in "${SCRIPTS[@]}"; do log " - $DEPLOY_DIR/$s" done log "Crontab entry: * * * * * $DEPLOY_DIR/watchdog.sh" log "Monitor log: $DEPLOY_DIR/npu_auto_occupy.log" log "Manual stop command: stopwatchdog (available globally)" log "============================================================" } main() { check_root log "Starting one-click deployment of NPU auto-occupancy monitoring system..." create_jenkins_group create_jenkins_user ensure_jenkins_home grant_crontab_permission create_watchdog_dir download_scripts set_permissions setup_crontab start_watchdog setup_stopwatchdog show_summary } main