15.1 企业级部署架构
高可用架构设计
主从架构(Master-Slave):
# docker-compose.yml - Jenkins高可用集群
version: '3.8'
services:
jenkins-master:
image: jenkins/jenkins:lts
container_name: jenkins-master
ports:
- "8080:8080"
- "50000:50000"
volumes:
- jenkins_home:/var/jenkins_home
- /var/run/docker.sock:/var/run/docker.sock
environment:
- JENKINS_OPTS="--httpPort=8080"
- JAVA_OPTS="-Xmx2g -Xms1g -XX:+UseG1GC"
networks:
- jenkins-network
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8080/login"]
interval: 30s
timeout: 10s
retries: 3
jenkins-agent-1:
image: jenkins/inbound-agent:latest
container_name: jenkins-agent-1
environment:
- JENKINS_URL=http://jenkins-master:8080
- JENKINS_SECRET=${JENKINS_AGENT_SECRET}
- JENKINS_AGENT_NAME=agent-1
- JENKINS_AGENT_WORKDIR=/home/jenkins/agent
volumes:
- /var/run/docker.sock:/var/run/docker.sock
- agent1_workspace:/home/jenkins/agent
networks:
- jenkins-network
depends_on:
- jenkins-master
restart: unless-stopped
jenkins-agent-2:
image: jenkins/inbound-agent:latest
container_name: jenkins-agent-2
environment:
- JENKINS_URL=http://jenkins-master:8080
- JENKINS_SECRET=${JENKINS_AGENT_SECRET}
- JENKINS_AGENT_NAME=agent-2
- JENKINS_AGENT_WORKDIR=/home/jenkins/agent
volumes:
- /var/run/docker.sock:/var/run/docker.sock
- agent2_workspace:/home/jenkins/agent
networks:
- jenkins-network
depends_on:
- jenkins-master
restart: unless-stopped
nginx-lb:
image: nginx:alpine
container_name: jenkins-lb
ports:
- "80:80"
- "443:443"
volumes:
- ./nginx.conf:/etc/nginx/nginx.conf
- ./ssl:/etc/nginx/ssl
networks:
- jenkins-network
depends_on:
- jenkins-master
restart: unless-stopped
postgres:
image: postgres:13
container_name: jenkins-db
environment:
- POSTGRES_DB=jenkins
- POSTGRES_USER=jenkins
- POSTGRES_PASSWORD=${DB_PASSWORD}
volumes:
- postgres_data:/var/lib/postgresql/data
networks:
- jenkins-network
restart: unless-stopped
redis:
image: redis:alpine
container_name: jenkins-cache
volumes:
- redis_data:/data
networks:
- jenkins-network
restart: unless-stopped
volumes:
jenkins_home:
agent1_workspace:
agent2_workspace:
postgres_data:
redis_data:
networks:
jenkins-network:
driver: bridge
Nginx负载均衡配置:
# nginx.conf
events {
worker_connections 1024;
}
http {
upstream jenkins {
server jenkins-master:8080 max_fails=3 fail_timeout=30s;
# 可以添加多个Jenkins实例实现负载均衡
# server jenkins-master-2:8080 max_fails=3 fail_timeout=30s;
}
# 限制请求速率
limit_req_zone $binary_remote_addr zone=jenkins:10m rate=10r/s;
server {
listen 80;
server_name jenkins.company.com;
# 重定向到HTTPS
return 301 https://$server_name$request_uri;
}
server {
listen 443 ssl http2;
server_name jenkins.company.com;
# SSL配置
ssl_certificate /etc/nginx/ssl/jenkins.crt;
ssl_certificate_key /etc/nginx/ssl/jenkins.key;
ssl_protocols TLSv1.2 TLSv1.3;
ssl_ciphers ECDHE-RSA-AES256-GCM-SHA512:DHE-RSA-AES256-GCM-SHA512:ECDHE-RSA-AES256-GCM-SHA384:DHE-RSA-AES256-GCM-SHA384;
ssl_prefer_server_ciphers off;
ssl_session_cache shared:SSL:10m;
ssl_session_timeout 10m;
# 安全头
add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always;
add_header X-Frame-Options SAMEORIGIN;
add_header X-Content-Type-Options nosniff;
add_header X-XSS-Protection "1; mode=block";
add_header Referrer-Policy "strict-origin-when-cross-origin";
# 客户端最大请求体大小
client_max_body_size 100M;
# 超时设置
proxy_connect_timeout 60s;
proxy_send_timeout 60s;
proxy_read_timeout 60s;
location / {
# 应用请求限制
limit_req zone=jenkins burst=20 nodelay;
proxy_pass http://jenkins;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Forwarded-Port $server_port;
# WebSocket支持
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
# 缓冲设置
proxy_buffering off;
proxy_request_buffering off;
}
# 静态资源缓存
location ~* \.(css|js|png|jpg|jpeg|gif|ico|svg)$ {
proxy_pass http://jenkins;
proxy_set_header Host $host;
expires 1y;
add_header Cache-Control "public, immutable";
}
# 健康检查
location /health {
access_log off;
return 200 "healthy\n";
add_header Content-Type text/plain;
}
}
}
容器化部署
Kubernetes部署配置:
# jenkins-namespace.yaml
apiVersion: v1
kind: Namespace
metadata:
name: jenkins
labels:
name: jenkins
---
# jenkins-rbac.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: jenkins
namespace: jenkins
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: jenkins
rules:
- apiGroups: [""]
resources: ["pods"]
verbs: ["create","delete","get","list","patch","update","watch"]
- apiGroups: [""]
resources: ["pods/exec"]
verbs: ["create","delete","get","list","patch","update","watch"]
- apiGroups: [""]
resources: ["pods/log"]
verbs: ["get","list","watch"]
- apiGroups: [""]
resources: ["secrets"]
verbs: ["get"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: jenkins
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: jenkins
subjects:
- kind: ServiceAccount
name: jenkins
namespace: jenkins
---
# jenkins-pvc.yaml
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: jenkins-pvc
namespace: jenkins
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 50Gi
storageClassName: fast-ssd
---
# jenkins-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: jenkins
namespace: jenkins
spec:
replicas: 1
selector:
matchLabels:
app: jenkins
template:
metadata:
labels:
app: jenkins
spec:
serviceAccountName: jenkins
securityContext:
fsGroup: 1000
runAsUser: 1000
containers:
- name: jenkins
image: jenkins/jenkins:lts
ports:
- containerPort: 8080
- containerPort: 50000
resources:
limits:
memory: "4Gi"
cpu: "2000m"
requests:
memory: "2Gi"
cpu: "1000m"
env:
- name: JAVA_OPTS
value: "-Xmx3g -Xms2g -XX:+UseG1GC -Djenkins.install.runSetupWizard=false"
- name: JENKINS_OPTS
value: "--httpPort=8080"
volumeMounts:
- name: jenkins-home
mountPath: /var/jenkins_home
- name: docker-sock
mountPath: /var/run/docker.sock
livenessProbe:
httpGet:
path: /login
port: 8080
initialDelaySeconds: 60
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 3
readinessProbe:
httpGet:
path: /login
port: 8080
initialDelaySeconds: 30
periodSeconds: 5
timeoutSeconds: 3
failureThreshold: 3
volumes:
- name: jenkins-home
persistentVolumeClaim:
claimName: jenkins-pvc
- name: docker-sock
hostPath:
path: /var/run/docker.sock
---
# jenkins-service.yaml
apiVersion: v1
kind: Service
metadata:
name: jenkins
namespace: jenkins
spec:
selector:
app: jenkins
ports:
- name: http
port: 8080
targetPort: 8080
- name: jnlp
port: 50000
targetPort: 50000
type: ClusterIP
---
# jenkins-ingress.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: jenkins
namespace: jenkins
annotations:
kubernetes.io/ingress.class: nginx
cert-manager.io/cluster-issuer: letsencrypt-prod
nginx.ingress.kubernetes.io/ssl-redirect: "true"
nginx.ingress.kubernetes.io/proxy-body-size: "100m"
nginx.ingress.kubernetes.io/proxy-read-timeout: "600"
nginx.ingress.kubernetes.io/proxy-send-timeout: "600"
spec:
tls:
- hosts:
- jenkins.company.com
secretName: jenkins-tls
rules:
- host: jenkins.company.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: jenkins
port:
number: 8080
15.2 安全最佳实践
身份认证和授权
LDAP集成配置:
// ldap-security-config.groovy
import jenkins.model.*
import hudson.security.*
import jenkins.security.plugins.ldap.*
def instance = Jenkins.getInstance()
// LDAP配置
def ldapRealm = new LDAPSecurityRealm(
"ldap://ldap.company.com:389", // LDAP服务器
"dc=company,dc=com", // 根DN
"uid={0},ou=people,dc=company,dc=com", // 用户搜索基础
"ou=groups,dc=company,dc=com", // 组搜索基础
"cn={0}", // 组搜索过滤器
new FromGroupSearchLDAPGroupMembershipStrategy(""),
"cn=jenkins,ou=service,dc=company,dc=com", // 管理员DN
"password", // 管理员密码
false, // 禁用邮件后缀
false, // 缓存
null, // 环境属性
"displayName", // 显示名称属性
"mail", // 邮件属性
IdStrategy.CASE_INSENSITIVE, // 用户ID策略
IdStrategy.CASE_INSENSITIVE // 组ID策略
)
// 基于矩阵的授权策略
def strategy = new GlobalMatrixAuthorizationStrategy()
// 管理员权限
strategy.add(Jenkins.ADMINISTER, "admin")
strategy.add(Jenkins.ADMINISTER, "jenkins-admins")
// 开发者权限
strategy.add(Jenkins.READ, "developers")
strategy.add(Item.BUILD, "developers")
strategy.add(Item.CANCEL, "developers")
strategy.add(Item.READ, "developers")
strategy.add(Item.WORKSPACE, "developers")
// 只读用户权限
strategy.add(Jenkins.READ, "viewers")
strategy.add(Item.READ, "viewers")
// 应用配置
instance.setSecurityRealm(ldapRealm)
instance.setAuthorizationStrategy(strategy)
instance.save()
println "LDAP安全配置已应用"
基于角色的访问控制(RBAC):
// rbac-config.groovy
import com.michelin.cio.hudson.plugins.rolestrategy.*
import hudson.security.*
import jenkins.model.*
def instance = Jenkins.getInstance()
// 创建角色策略
def roleStrategy = new RoleBasedAuthorizationStrategy()
// 全局角色
def globalRoles = [
"admin": [
Permission.fromId("hudson.model.Hudson.Administer"),
Permission.fromId("hudson.model.Hudson.Read")
],
"developer": [
Permission.fromId("hudson.model.Hudson.Read"),
Permission.fromId("hudson.model.Item.Build"),
Permission.fromId("hudson.model.Item.Cancel"),
Permission.fromId("hudson.model.Item.Read"),
Permission.fromId("hudson.model.Item.Workspace")
],
"viewer": [
Permission.fromId("hudson.model.Hudson.Read"),
Permission.fromId("hudson.model.Item.Read")
]
]
// 项目角色
def projectRoles = [
"project-admin": [
Permission.fromId("hudson.model.Item.Build"),
Permission.fromId("hudson.model.Item.Cancel"),
Permission.fromId("hudson.model.Item.Configure"),
Permission.fromId("hudson.model.Item.Create"),
Permission.fromId("hudson.model.Item.Delete"),
Permission.fromId("hudson.model.Item.Read"),
Permission.fromId("hudson.model.Item.Workspace")
],
"project-developer": [
Permission.fromId("hudson.model.Item.Build"),
Permission.fromId("hudson.model.Item.Cancel"),
Permission.fromId("hudson.model.Item.Read"),
Permission.fromId("hudson.model.Item.Workspace")
]
]
// 添加全局角色
globalRoles.each { roleName, permissions ->
def role = new Role(roleName, permissions as Set)
roleStrategy.addRole(RoleBasedAuthorizationStrategy.GLOBAL, role)
}
// 添加项目角色
projectRoles.each { roleName, permissions ->
def role = new Role(roleName, ".*", permissions as Set)
roleStrategy.addRole(RoleBasedAuthorizationStrategy.PROJECT, role)
}
// 分配用户到角色
roleStrategy.assignRole(RoleBasedAuthorizationStrategy.GLOBAL, "admin", "admin")
roleStrategy.assignRole(RoleBasedAuthorizationStrategy.GLOBAL, "developer", "dev-team")
roleStrategy.assignRole(RoleBasedAuthorizationStrategy.GLOBAL, "viewer", "qa-team")
// 分配项目角色
roleStrategy.assignRole(RoleBasedAuthorizationStrategy.PROJECT, "project-admin", "project-leads")
roleStrategy.assignRole(RoleBasedAuthorizationStrategy.PROJECT, "project-developer", "developers")
// 应用策略
instance.setAuthorizationStrategy(roleStrategy)
instance.save()
println "基于角色的访问控制已配置"
凭据管理
凭据安全配置:
// credentials-security.groovy
import com.cloudbees.plugins.credentials.*
import com.cloudbees.plugins.credentials.domains.*
import com.cloudbees.plugins.credentials.impl.*
import hudson.util.Secret
import jenkins.model.Jenkins
import org.jenkinsci.plugins.plaincredentials.impl.*
def instance = Jenkins.getInstance()
def store = instance.getExtensionList('com.cloudbees.plugins.credentials.SystemCredentialsProvider')[0].getStore()
// 创建域
def domain = new Domain("production", "生产环境凭据", [
new HostnameSpecification("*.prod.company.com", "")
])
// 添加用户名密码凭据
def usernamePassword = new UsernamePasswordCredentialsImpl(
CredentialsScope.GLOBAL,
"prod-db-credentials",
"生产数据库凭据",
"dbuser",
"secure_password"
)
// 添加SSH密钥凭据
def sshKey = new BasicSSHUserPrivateKey(
CredentialsScope.GLOBAL,
"prod-ssh-key",
"deploy",
new BasicSSHUserPrivateKey.DirectEntryPrivateKeySource("""-----BEGIN OPENSSH PRIVATE KEY-----
...
-----END OPENSSH PRIVATE KEY-----"""),
"passphrase",
"生产服务器SSH密钥"
)
// 添加Secret Text凭据
def secretText = new StringCredentialsImpl(
CredentialsScope.GLOBAL,
"api-token",
"API访问令牌",
Secret.fromString("secret_api_token")
)
// 添加证书凭据
def certificate = new CertificateCredentialsImpl(
CredentialsScope.GLOBAL,
"ssl-certificate",
"SSL证书",
"keystore_password",
new CertificateCredentialsImpl.FileOnMasterKeyStoreSource("/path/to/keystore.p12")
)
// 保存凭据
store.addDomain(domain)
store.addCredentials(domain, usernamePassword)
store.addCredentials(domain, sshKey)
store.addCredentials(Domain.global(), secretText)
store.addCredentials(Domain.global(), certificate)
println "凭据已安全配置"
网络安全
防火墙配置脚本:
#!/bin/bash
# jenkins-firewall.sh
# 清除现有规则
iptables -F
iptables -X
iptables -t nat -F
iptables -t nat -X
iptables -t mangle -F
iptables -t mangle -X
# 设置默认策略
iptables -P INPUT DROP
iptables -P FORWARD DROP
iptables -P OUTPUT ACCEPT
# 允许本地回环
iptables -A INPUT -i lo -j ACCEPT
iptables -A OUTPUT -o lo -j ACCEPT
# 允许已建立的连接
iptables -A INPUT -m state --state ESTABLISHED,RELATED -j ACCEPT
# 允许SSH(限制来源IP)
iptables -A INPUT -p tcp --dport 22 -s 192.168.1.0/24 -j ACCEPT
iptables -A INPUT -p tcp --dport 22 -s 10.0.0.0/8 -j ACCEPT
# 允许HTTP/HTTPS(通过负载均衡器)
iptables -A INPUT -p tcp --dport 80 -s 192.168.1.100 -j ACCEPT
iptables -A INPUT -p tcp --dport 443 -s 192.168.1.100 -j ACCEPT
# 允许Jenkins Web界面(内网访问)
iptables -A INPUT -p tcp --dport 8080 -s 192.168.1.0/24 -j ACCEPT
iptables -A INPUT -p tcp --dport 8080 -s 10.0.0.0/8 -j ACCEPT
# 允许Jenkins Agent连接
iptables -A INPUT -p tcp --dport 50000 -s 192.168.1.0/24 -j ACCEPT
iptables -A INPUT -p tcp --dport 50000 -s 10.0.0.0/8 -j ACCEPT
# 限制连接速率(防止DDoS)
iptables -A INPUT -p tcp --dport 8080 -m limit --limit 25/minute --limit-burst 100 -j ACCEPT
# 记录被拒绝的连接
iptables -A INPUT -m limit --limit 5/min -j LOG --log-prefix "iptables denied: " --log-level 7
# 保存规则
iptables-save > /etc/iptables/rules.v4
echo "防火墙规则已配置"
15.3 性能优化策略
JVM调优
Jenkins启动脚本优化:
#!/bin/bash
# jenkins-optimized-start.sh
# 系统信息检测
TOTAL_MEM=$(free -m | awk 'NR==2{printf "%.0f", $2}')
CPU_CORES=$(nproc)
# 内存分配(总内存的60-70%)
if [ $TOTAL_MEM -gt 16384 ]; then
HEAP_SIZE="8g"
NEW_SIZE="2g"
elif [ $TOTAL_MEM -gt 8192 ]; then
HEAP_SIZE="4g"
NEW_SIZE="1g"
elif [ $TOTAL_MEM -gt 4096 ]; then
HEAP_SIZE="2g"
NEW_SIZE="512m"
else
HEAP_SIZE="1g"
NEW_SIZE="256m"
fi
# GC调优参数
GC_OPTS=""
if [ $TOTAL_MEM -gt 4096 ]; then
# 使用G1GC(推荐用于大内存)
GC_OPTS="-XX:+UseG1GC \
-XX:G1HeapRegionSize=16m \
-XX:G1ReservePercent=25 \
-XX:G1NewSizePercent=10 \
-XX:G1MaxNewSizePercent=25 \
-XX:MaxGCPauseMillis=200 \
-XX:G1HeapWastePercent=5"
else
# 使用ParallelGC(适用于小内存)
GC_OPTS="-XX:+UseParallelGC \
-XX:ParallelGCThreads=$CPU_CORES"
fi
# JVM优化参数
JAVA_OPTS="-Xms$HEAP_SIZE \
-Xmx$HEAP_SIZE \
-Xmn$NEW_SIZE \
$GC_OPTS \
-XX:+AlwaysPreTouch \
-XX:+UseStringDeduplication \
-XX:+OptimizeStringConcat \
-XX:+UseCompressedOops \
-XX:+UseCompressedClassPointers \
-XX:ReservedCodeCacheSize=256m \
-XX:InitialCodeCacheSize=64m \
-Djava.awt.headless=true \
-Dfile.encoding=UTF-8 \
-Dsun.jnu.encoding=UTF-8 \
-Djava.net.preferIPv4Stack=true \
-Djenkins.install.runSetupWizard=false"
# GC日志配置
GC_LOG_OPTS="-Xloggc:/var/log/jenkins/gc.log \
-XX:+UseGCLogFileRotation \
-XX:NumberOfGCLogFiles=5 \
-XX:GCLogFileSize=100M \
-XX:+PrintGC \
-XX:+PrintGCDetails \
-XX:+PrintGCTimeStamps \
-XX:+PrintGCDateStamps \
-XX:+PrintGCApplicationStoppedTime"
# 调试和监控参数
DEBUG_OPTS="-XX:+HeapDumpOnOutOfMemoryError \
-XX:HeapDumpPath=/var/log/jenkins/ \
-XX:+PrintCommandLineFlags \
-Dcom.sun.management.jmxremote \
-Dcom.sun.management.jmxremote.port=9999 \
-Dcom.sun.management.jmxremote.authenticate=false \
-Dcom.sun.management.jmxremote.ssl=false"
# Jenkins特定优化
JENKINS_OPTS="--httpPort=8080 \
--httpKeepAliveTimeout=30000 \
--httpMaxKeepAliveRequests=100 \
--handlerCountMax=300 \
--handlerCountMaxIdle=50"
# 启动Jenkins
echo "启动Jenkins with optimized settings..."
echo "Heap Size: $HEAP_SIZE"
echo "CPU Cores: $CPU_CORES"
echo "Total Memory: ${TOTAL_MEM}MB"
export JAVA_OPTS="$JAVA_OPTS $GC_LOG_OPTS $DEBUG_OPTS"
export JENKINS_OPTS="$JENKINS_OPTS"
# 启动Jenkins
java $JAVA_OPTS -jar /usr/share/jenkins/jenkins.war $JENKINS_OPTS
构建优化
高性能Pipeline模板:
// Jenkinsfile - 高性能构建模板
pipeline {
agent none
options {
// 构建保留策略
buildDiscarder(logRotator(
numToKeepStr: '10',
daysToKeepStr: '30',
artifactNumToKeepStr: '5'
))
// 超时设置
timeout(time: 30, unit: 'MINUTES')
// 并发构建限制
disableConcurrentBuilds()
// 跳过默认检出
skipDefaultCheckout()
// 时间戳
timestamps()
}
environment {
// 缓存配置
MAVEN_OPTS = '-Dmaven.repo.local=/var/jenkins_home/.m2/repository'
GRADLE_USER_HOME = '/var/jenkins_home/.gradle'
NPM_CONFIG_CACHE = '/var/jenkins_home/.npm'
// 并行配置
PARALLEL_JOBS = '4'
// Docker配置
DOCKER_BUILDKIT = '1'
BUILDKIT_PROGRESS = 'plain'
}
stages {
stage('Preparation') {
agent {
label 'master'
}
steps {
script {
// 动态选择构建节点
def availableNodes = jenkins.model.Jenkins.instance.nodes.findAll { node ->
node.computer.isOnline() &&
!node.computer.isTemporarilyOffline() &&
node.labelString.contains('build')
}
if (availableNodes.size() > 0) {
env.BUILD_NODE = availableNodes[0].nodeName
} else {
env.BUILD_NODE = 'master'
}
echo "Selected build node: ${env.BUILD_NODE}"
}
}
}
stage('Checkout & Cache') {
agent {
label "${env.BUILD_NODE}"
}
steps {
// 优化的检出
checkout([
$class: 'GitSCM',
branches: [[name: "${env.BRANCH_NAME}"]],
doGenerateSubmoduleConfigurations: false,
extensions: [
[$class: 'CloneOption', depth: 1, noTags: false, reference: '', shallow: true],
[$class: 'CheckoutOption', timeout: 20],
[$class: 'CleanBeforeCheckout']
],
submoduleCfg: [],
userRemoteConfigs: [[url: env.GIT_URL]]
])
// 缓存依赖
script {
if (fileExists('pom.xml')) {
sh 'mvn dependency:go-offline -B'
}
if (fileExists('package.json')) {
sh 'npm ci --cache /var/jenkins_home/.npm'
}
if (fileExists('build.gradle')) {
sh './gradlew dependencies'
}
}
}
}
stage('Parallel Build & Test') {
parallel {
stage('Unit Tests') {
agent {
label "${env.BUILD_NODE}"
}
steps {
script {
if (fileExists('pom.xml')) {
sh 'mvn test -B -T ${PARALLEL_JOBS}'
} else if (fileExists('package.json')) {
sh 'npm test'
} else if (fileExists('build.gradle')) {
sh './gradlew test --parallel'
}
}
}
post {
always {
publishTestResults testResultsPattern: '**/target/surefire-reports/*.xml, **/test-results/**/*.xml'
}
}
}
stage('Code Quality') {
agent {
label "${env.BUILD_NODE}"
}
steps {
script {
if (fileExists('pom.xml')) {
sh 'mvn sonar:sonar -B'
} else if (fileExists('package.json')) {
sh 'npm run lint'
}
}
}
}
stage('Security Scan') {
agent {
label "${env.BUILD_NODE}"
}
steps {
script {
if (fileExists('pom.xml')) {
sh 'mvn org.owasp:dependency-check-maven:check'
} else if (fileExists('package.json')) {
sh 'npm audit'
}
}
}
}
}
}
stage('Build Artifacts') {
agent {
label "${env.BUILD_NODE}"
}
steps {
script {
if (fileExists('pom.xml')) {
sh 'mvn package -B -T ${PARALLEL_JOBS} -DskipTests'
} else if (fileExists('package.json')) {
sh 'npm run build'
} else if (fileExists('build.gradle')) {
sh './gradlew build -x test --parallel'
}
}
}
post {
success {
archiveArtifacts artifacts: '**/target/*.jar, **/dist/**, **/build/libs/*.jar', fingerprint: true
}
}
}
stage('Docker Build') {
when {
anyOf {
branch 'main'
branch 'develop'
changeRequest()
}
}
agent {
label "${env.BUILD_NODE}"
}
steps {
script {
def image = docker.build("${env.IMAGE_NAME}:${env.BUILD_NUMBER}")
// 多阶段并行推送
parallel (
"Push to Registry": {
docker.withRegistry('https://registry.company.com', 'docker-registry-credentials') {
image.push()
image.push('latest')
}
},
"Security Scan": {
sh "docker run --rm -v /var/run/docker.sock:/var/run/docker.sock aquasec/trivy ${env.IMAGE_NAME}:${env.BUILD_NUMBER}"
}
)
}
}
}
}
post {
always {
node("${env.BUILD_NODE}") {
// 清理工作空间
cleanWs()
// 清理Docker镜像
sh 'docker system prune -f'
}
}
success {
script {
if (env.BRANCH_NAME == 'main') {
// 触发部署
build job: 'deploy-production',
parameters: [
string(name: 'IMAGE_TAG', value: env.BUILD_NUMBER)
],
wait: false
}
}
}
failure {
emailext (
subject: "Build Failed: ${env.JOB_NAME} - ${env.BUILD_NUMBER}",
body: "Build failed. Check console output at ${env.BUILD_URL}",
to: "${env.CHANGE_AUTHOR_EMAIL}"
)
}
}
}
15.4 运维管理规范
监控和告警
Prometheus监控配置:
# prometheus.yml
global:
scrape_interval: 15s
evaluation_interval: 15s
rule_files:
- "jenkins_rules.yml"
alerting:
alertmanagers:
- static_configs:
- targets:
- alertmanager:9093
scrape_configs:
- job_name: 'jenkins'
static_configs:
- targets: ['jenkins:8080']
metrics_path: '/prometheus'
scrape_interval: 30s
- job_name: 'jenkins-nodes'
static_configs:
- targets:
- 'jenkins-agent-1:9100'
- 'jenkins-agent-2:9100'
scrape_interval: 30s
- job_name: 'system'
static_configs:
- targets: ['node-exporter:9100']
scrape_interval: 30s
告警规则配置:
# jenkins_rules.yml
groups:
- name: jenkins
rules:
# Jenkins服务可用性
- alert: JenkinsDown
expr: up{job="jenkins"} == 0
for: 1m
labels:
severity: critical
annotations:
summary: "Jenkins服务不可用"
description: "Jenkins服务已停止响应超过1分钟"
# 构建队列过长
- alert: JenkinsBuildQueueHigh
expr: jenkins_queue_size_value > 10
for: 5m
labels:
severity: warning
annotations:
summary: "Jenkins构建队列过长"
description: "构建队列长度: {{ $value }}"
# 节点离线
- alert: JenkinsNodeOffline
expr: jenkins_node_offline_value > 0
for: 2m
labels:
severity: warning
annotations:
summary: "Jenkins节点离线"
description: "有 {{ $value }} 个节点离线"
# 磁盘空间不足
- alert: JenkinsDiskSpaceHigh
expr: (1 - (node_filesystem_avail_bytes{mountpoint="/var/lib/jenkins"} / node_filesystem_size_bytes{mountpoint="/var/lib/jenkins"})) * 100 > 85
for: 5m
labels:
severity: warning
annotations:
summary: "Jenkins磁盘空间不足"
description: "磁盘使用率: {{ $value }}%"
# 内存使用率高
- alert: JenkinsMemoryHigh
expr: (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 > 90
for: 5m
labels:
severity: critical
annotations:
summary: "Jenkins内存使用率过高"
description: "内存使用率: {{ $value }}%"
# 构建失败率高
- alert: JenkinsBuildFailureRateHigh
expr: (rate(jenkins_builds_failed_build_count[5m]) / rate(jenkins_builds_last_build_result_ordinal[5m])) * 100 > 20
for: 10m
labels:
severity: warning
annotations:
summary: "Jenkins构建失败率过高"
description: "构建失败率: {{ $value }}%"
备份策略
自动化备份脚本:
#!/bin/bash
# jenkins-backup-advanced.sh
# 配置
JENKINS_HOME="/var/lib/jenkins"
BACKUP_DIR="/backup/jenkins"
REMOTE_BACKUP_HOST="backup.company.com"
REMOTE_BACKUP_USER="backup"
REMOTE_BACKUP_PATH="/backup/jenkins"
RETENTION_DAYS=30
LOG_FILE="/var/log/jenkins_backup.log"
LOCK_FILE="/var/run/jenkins_backup.lock"
# 日志函数
log() {
echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a "$LOG_FILE"
}
# 检查锁文件
if [ -f "$LOCK_FILE" ]; then
log "备份进程已在运行,退出"
exit 1
fi
# 创建锁文件
echo $$ > "$LOCK_FILE"
# 清理函数
cleanup() {
rm -f "$LOCK_FILE"
log "备份进程结束"
}
# 设置陷阱
trap cleanup EXIT
log "开始Jenkins备份"
# 创建备份目录
mkdir -p "$BACKUP_DIR"
# 备份文件名
BACKUP_NAME="jenkins_backup_$(date +%Y%m%d_%H%M%S)"
BACKUP_PATH="$BACKUP_DIR/$BACKUP_NAME.tar.gz"
# 检查Jenkins状态
if systemctl is-active jenkins &>/dev/null; then
JENKINS_RUNNING=true
log "Jenkins正在运行,将进行热备份"
else
JENKINS_RUNNING=false
log "Jenkins未运行,进行冷备份"
fi
# 创建备份
log "创建备份: $BACKUP_PATH"
# 排除不需要备份的文件
EXCLUDE_PATTERNS=(
"--exclude=workspace/*"
"--exclude=builds/*/archive"
"--exclude=.m2/repository"
"--exclude=.gradle/caches"
"--exclude=.npm"
"--exclude=logs/*"
"--exclude=*.log"
"--exclude=*.tmp"
)
# 执行备份
if tar czf "$BACKUP_PATH" \
"${EXCLUDE_PATTERNS[@]}" \
-C "$(dirname "$JENKINS_HOME")" \
"$(basename "$JENKINS_HOME")"; then
log "备份创建成功: $BACKUP_PATH"
else
log "备份创建失败"
exit 1
fi
# 验证备份
if tar tzf "$BACKUP_PATH" > /dev/null 2>&1; then
log "备份验证成功"
else
log "备份验证失败"
exit 1
fi
# 计算备份大小和校验和
BACKUP_SIZE=$(du -h "$BACKUP_PATH" | cut -f1)
BACKUP_MD5=$(md5sum "$BACKUP_PATH" | cut -d' ' -f1)
log "备份大小: $BACKUP_SIZE"
log "备份MD5: $BACKUP_MD5"
# 创建备份元数据
cat > "$BACKUP_DIR/$BACKUP_NAME.meta" << EOF
{
"backup_name": "$BACKUP_NAME",
"backup_path": "$BACKUP_PATH",
"backup_time": "$(date -Iseconds)",
"jenkins_home": "$JENKINS_HOME",
"jenkins_running": $JENKINS_RUNNING,
"backup_size": "$BACKUP_SIZE",
"backup_md5": "$BACKUP_MD5",
"jenkins_version": "$(cat $JENKINS_HOME/jenkins.install.InstallUtil.lastExecVersion 2>/dev/null || echo 'unknown')",
"hostname": "$(hostname)",
"os_info": "$(uname -a)"
}
EOF
# 同步到远程备份服务器
if [ -n "$REMOTE_BACKUP_HOST" ]; then
log "同步备份到远程服务器: $REMOTE_BACKUP_HOST"
if rsync -avz --progress \
"$BACKUP_PATH" \
"$BACKUP_DIR/$BACKUP_NAME.meta" \
"$REMOTE_BACKUP_USER@$REMOTE_BACKUP_HOST:$REMOTE_BACKUP_PATH/"; then
log "远程同步成功"
else
log "远程同步失败"
fi
fi
# 清理旧备份
log "清理超过 $RETENTION_DAYS 天的备份"
find "$BACKUP_DIR" -name "jenkins_backup_*.tar.gz" -mtime +$RETENTION_DAYS -delete
find "$BACKUP_DIR" -name "jenkins_backup_*.meta" -mtime +$RETENTION_DAYS -delete
# 清理远程旧备份
if [ -n "$REMOTE_BACKUP_HOST" ]; then
ssh "$REMOTE_BACKUP_USER@$REMOTE_BACKUP_HOST" \
"find $REMOTE_BACKUP_PATH -name 'jenkins_backup_*.tar.gz' -mtime +$RETENTION_DAYS -delete"
ssh "$REMOTE_BACKUP_USER@$REMOTE_BACKUP_HOST" \
"find $REMOTE_BACKUP_PATH -name 'jenkins_backup_*.meta' -mtime +$RETENTION_DAYS -delete"
fi
# 发送通知
if command -v curl &> /dev/null && [ -n "$SLACK_WEBHOOK" ]; then
curl -X POST -H 'Content-type: application/json' \
--data "{
\"text\": \"Jenkins备份完成\\n备份文件: $BACKUP_NAME\\n备份大小: $BACKUP_SIZE\\n备份时间: $(date)\"
}" \
"$SLACK_WEBHOOK"
fi
log "Jenkins备份完成"
15.5 团队协作模式
分支策略
GitFlow Pipeline配置:
// Jenkinsfile.gitflow
pipeline {
agent none
parameters {
choice(
name: 'DEPLOY_ENV',
choices: ['dev', 'staging', 'production'],
description: '部署环境'
)
booleanParam(
name: 'SKIP_TESTS',
defaultValue: false,
description: '跳过测试'
)
}
environment {
DOCKER_REGISTRY = 'registry.company.com'
SLACK_CHANNEL = '#ci-cd'
}
stages {
stage('Branch Strategy') {
agent { label 'master' }
steps {
script {
def branchName = env.BRANCH_NAME
def buildStrategy = ''
switch(branchName) {
case 'main':
buildStrategy = 'production'
env.DEPLOY_TO = 'production'
env.RUN_SECURITY_SCAN = 'true'
env.RUN_PERFORMANCE_TEST = 'true'
break
case 'develop':
buildStrategy = 'integration'
env.DEPLOY_TO = 'staging'
env.RUN_SECURITY_SCAN = 'true'
env.RUN_PERFORMANCE_TEST = 'false'
break
case ~/^feature\/.*$/:
buildStrategy = 'feature'
env.DEPLOY_TO = 'dev'
env.RUN_SECURITY_SCAN = 'false'
env.RUN_PERFORMANCE_TEST = 'false'
break
case ~/^hotfix\/.*$/:
buildStrategy = 'hotfix'
env.DEPLOY_TO = 'staging'
env.RUN_SECURITY_SCAN = 'true'
env.RUN_PERFORMANCE_TEST = 'true'
break
case ~/^release\/.*$/:
buildStrategy = 'release'
env.DEPLOY_TO = 'staging'
env.RUN_SECURITY_SCAN = 'true'
env.RUN_PERFORMANCE_TEST = 'true'
break
default:
buildStrategy = 'experimental'
env.DEPLOY_TO = 'none'
env.RUN_SECURITY_SCAN = 'false'
env.RUN_PERFORMANCE_TEST = 'false'
}
env.BUILD_STRATEGY = buildStrategy
echo "Branch: ${branchName}, Strategy: ${buildStrategy}"
}
}
}
stage('Quality Gates') {
parallel {
stage('Code Quality') {
when {
not { params.SKIP_TESTS }
}
agent { label 'build' }
steps {
sh 'mvn sonar:sonar -Dsonar.qualitygate.wait=true'
}
}
stage('Security Scan') {
when {
environment name: 'RUN_SECURITY_SCAN', value: 'true'
}
agent { label 'security' }
steps {
sh 'mvn org.owasp:dependency-check-maven:check'
sh 'docker run --rm -v $(pwd):/app securecodewarrior/docker-security-scan'
}
}
stage('Performance Test') {
when {
environment name: 'RUN_PERFORMANCE_TEST', value: 'true'
}
agent { label 'performance' }
steps {
sh 'mvn gatling:test'
}
post {
always {
publishHTML([
allowMissing: false,
alwaysLinkToLastBuild: true,
keepAll: true,
reportDir: 'target/gatling',
reportFiles: 'index.html',
reportName: 'Performance Report'
])
}
}
}
}
}
stage('Approval') {
when {
anyOf {
branch 'main'
branch 'release/*'
branch 'hotfix/*'
}
}
steps {
script {
def approvers = []
if (env.BRANCH_NAME == 'main') {
approvers = ['tech-lead', 'devops-lead']
} else {
approvers = ['tech-lead']
}
timeout(time: 24, unit: 'HOURS') {
input(
message: "Approve deployment to ${env.DEPLOY_TO}?",
submitter: approvers.join(',')
)
}
}
}
}
stage('Deploy') {
when {
not { environment name: 'DEPLOY_TO', value: 'none' }
}
agent { label 'deploy' }
steps {
script {
def deploymentStrategy = ''
switch(env.DEPLOY_TO) {
case 'production':
deploymentStrategy = 'blue-green'
break
case 'staging':
deploymentStrategy = 'rolling'
break
case 'dev':
deploymentStrategy = 'recreate'
break
}
sh "ansible-playbook -i inventory/${env.DEPLOY_TO} deploy.yml -e deployment_strategy=${deploymentStrategy}"
}
}
}
}
post {
always {
script {
def status = currentBuild.result ?: 'SUCCESS'
def color = status == 'SUCCESS' ? 'good' : 'danger'
def message = """
*${env.JOB_NAME}* - ${env.BUILD_NUMBER}
*Branch:* ${env.BRANCH_NAME}
*Strategy:* ${env.BUILD_STRATEGY}
*Status:* ${status}
*Duration:* ${currentBuild.durationString}
*Changes:* ${env.CHANGE_TITLE ?: 'No changes'}
""".stripIndent()
slackSend(
channel: env.SLACK_CHANNEL,
color: color,
message: message
)
}
}
}
}
代码审查集成
Pull Request Pipeline:
// Jenkinsfile.pr
pipeline {
agent none
triggers {
// GitHub/GitLab webhook触发
githubPullRequests(
triggerMode: 'HEAVY_HOOKS',
events: [
pullRequestOpened(),
pullRequestUpdated(),
pullRequestSynchronize()
]
)
}
environment {
PR_NUMBER = "${env.CHANGE_ID}"
PR_TITLE = "${env.CHANGE_TITLE}"
PR_AUTHOR = "${env.CHANGE_AUTHOR}"
PR_TARGET = "${env.CHANGE_TARGET}"
}
stages {
stage('PR Validation') {
agent { label 'master' }
steps {
script {
// 验证PR标题格式
def titlePattern = /^(feat|fix|docs|style|refactor|test|chore)(\(.+\))?: .+/
if (!env.PR_TITLE.matches(titlePattern)) {
error "PR标题格式不正确。应该遵循: type(scope): description"
}
// 检查目标分支
def allowedTargets = ['main', 'develop']
if (!allowedTargets.contains(env.PR_TARGET)) {
error "不允许向 ${env.PR_TARGET} 分支提交PR"
}
// 检查文件变更
def changedFiles = sh(
script: "git diff --name-only origin/${env.PR_TARGET}...HEAD",
returnStdout: true
).trim().split('\n')
env.CHANGED_FILES = changedFiles.join(',')
// 确定需要运行的检查
env.RUN_FRONTEND_TESTS = changedFiles.any { it.startsWith('frontend/') || it.endsWith('.js') || it.endsWith('.ts') || it.endsWith('.vue') }
env.RUN_BACKEND_TESTS = changedFiles.any { it.startsWith('backend/') || it.endsWith('.java') || it.endsWith('.py') }
env.RUN_DOCS_CHECK = changedFiles.any { it.endsWith('.md') || it.startsWith('docs/') }
}
}
}
stage('Parallel Checks') {
parallel {
stage('Frontend Tests') {
when {
environment name: 'RUN_FRONTEND_TESTS', value: 'true'
}
agent { label 'nodejs' }
steps {
sh 'npm ci'
sh 'npm run lint'
sh 'npm run test:unit'
sh 'npm run test:e2e'
}
post {
always {
publishTestResults testResultsPattern: 'test-results.xml'
publishHTML([
allowMissing: false,
alwaysLinkToLastBuild: true,
keepAll: true,
reportDir: 'coverage',
reportFiles: 'index.html',
reportName: 'Frontend Coverage Report'
])
}
}
}
stage('Backend Tests') {
when {
environment name: 'RUN_BACKEND_TESTS', value: 'true'
}
agent { label 'java' }
steps {
sh 'mvn clean compile'
sh 'mvn checkstyle:check'
sh 'mvn test'
sh 'mvn jacoco:report'
}
post {
always {
publishTestResults testResultsPattern: '**/target/surefire-reports/*.xml'
publishHTML([
allowMissing: false,
alwaysLinkToLastBuild: true,
keepAll: true,
reportDir: 'target/site/jacoco',
reportFiles: 'index.html',
reportName: 'Backend Coverage Report'
])
}
}
}
stage('Documentation Check') {
when {
environment name: 'RUN_DOCS_CHECK', value: 'true'
}
agent { label 'docs' }
steps {
sh 'markdownlint docs/'
sh 'vale docs/'
}
}
stage('Security Scan') {
agent { label 'security' }
steps {
sh 'mvn org.owasp:dependency-check-maven:check'
sh 'npm audit --audit-level moderate'
script {
// SAST扫描
sh 'sonar-scanner -Dsonar.pullrequest.key=${PR_NUMBER} -Dsonar.pullrequest.branch=${BRANCH_NAME} -Dsonar.pullrequest.base=${PR_TARGET}'
}
}
}
}
}
stage('Build & Package') {
agent { label 'build' }
steps {
sh 'mvn clean package -DskipTests'
sh 'docker build -t ${DOCKER_REGISTRY}/app:pr-${PR_NUMBER} .'
}
}
stage('Integration Tests') {
agent { label 'integration' }
steps {
script {
// 启动测试环境
sh 'docker-compose -f docker-compose.test.yml up -d'
try {
// 等待服务启动
sh 'sleep 30'
// 运行集成测试
sh 'mvn failsafe:integration-test failsafe:verify'
// API测试
sh 'newman run postman/api-tests.json --environment postman/test-env.json'
} finally {
// 清理测试环境
sh 'docker-compose -f docker-compose.test.yml down -v'
}
}
}
}
}
post {
always {
script {
// 更新GitHub PR状态
def status = currentBuild.result ?: 'SUCCESS'
def state = status == 'SUCCESS' ? 'success' : 'failure'
sh """
curl -X POST \
-H "Authorization: token ${env.GITHUB_TOKEN}" \
-H "Accept: application/vnd.github.v3+json" \
https://api.github.com/repos/company/project/statuses/${env.GIT_COMMIT} \
-d '{
"state": "${state}",
"target_url": "${env.BUILD_URL}",
"description": "Jenkins CI",
"context": "continuous-integration/jenkins"
}'
"""
// 发送PR评论
if (status != 'SUCCESS') {
def comment = """
## 🚨 CI检查失败
**构建编号:** ${env.BUILD_NUMBER}
**失败阶段:** ${env.STAGE_NAME}
**构建日志:** [查看详情](${env.BUILD_URL}console)
请修复问题后重新提交。
""".stripIndent()
sh """
curl -X POST \
-H "Authorization: token ${env.GITHUB_TOKEN}" \
-H "Accept: application/vnd.github.v3+json" \
https://api.github.com/repos/company/project/issues/${PR_NUMBER}/comments \
-d '{"body": "${comment}"}'
"""
}
}
}
success {
script {
def comment = """
## ✅ CI检查通过
**构建编号:** ${env.BUILD_NUMBER}
**测试覆盖率:** [查看报告](${env.BUILD_URL}Backend_Coverage_Report/)
**代码质量:** [查看SonarQube](${env.SONAR_URL})
PR已准备好进行代码审查!
""".stripIndent()
sh """
curl -X POST \
-H "Authorization: token ${env.GITHUB_TOKEN}" \
-H "Accept: application/vnd.github.v3+json" \
https://api.github.com/repos/company/project/issues/${PR_NUMBER}/comments \
-d '{"body": "${comment}"}'
"""
}
}
}
}
15.6 持续改进
指标收集和分析
Jenkins指标收集脚本:
#!/usr/bin/env python3
# jenkins_metrics_collector.py
import requests
import json
import time
import sqlite3
from datetime import datetime, timedelta
import logging
from typing import Dict, List, Any
class JenkinsMetricsCollector:
def __init__(self, jenkins_url: str, username: str, api_token: str):
self.jenkins_url = jenkins_url.rstrip('/')
self.auth = (username, api_token)
self.session = requests.Session()
self.session.auth = self.auth
# 设置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
self.logger = logging.getLogger(__name__)
# 初始化数据库
self.init_database()
def init_database(self):
"""初始化SQLite数据库"""
self.conn = sqlite3.connect('jenkins_metrics.db')
cursor = self.conn.cursor()
# 创建表
cursor.execute('''
CREATE TABLE IF NOT EXISTS build_metrics (
id INTEGER PRIMARY KEY AUTOINCREMENT,
job_name TEXT,
build_number INTEGER,
result TEXT,
duration INTEGER,
timestamp DATETIME,
queue_time INTEGER,
node_name TEXT,
cause TEXT
)
''')
cursor.execute('''
CREATE TABLE IF NOT EXISTS system_metrics (
id INTEGER PRIMARY KEY AUTOINCREMENT,
timestamp DATETIME,
queue_length INTEGER,
active_executors INTEGER,
total_executors INTEGER,
nodes_online INTEGER,
nodes_total INTEGER,
memory_usage REAL,
disk_usage REAL
)
''')
cursor.execute('''
CREATE TABLE IF NOT EXISTS job_metrics (
id INTEGER PRIMARY KEY AUTOINCREMENT,
job_name TEXT,
timestamp DATETIME,
success_rate REAL,
avg_duration REAL,
failure_count INTEGER,
total_builds INTEGER
)
''')
self.conn.commit()
def get_jenkins_data(self, endpoint: str) -> Dict[str, Any]:
"""获取Jenkins API数据"""
url = f"{self.jenkins_url}/{endpoint}"
try:
response = self.session.get(url, timeout=30)
response.raise_for_status()
return response.json()
except Exception as e:
self.logger.error(f"获取数据失败 {url}: {e}")
return {}
def collect_build_metrics(self):
"""收集构建指标"""
self.logger.info("收集构建指标...")
# 获取所有作业
jobs_data = self.get_jenkins_data('api/json?tree=jobs[name,url]')
for job in jobs_data.get('jobs', []):
job_name = job['name']
# 获取最近的构建
builds_data = self.get_jenkins_data(
f"job/{job_name}/api/json?tree=builds[number,result,duration,timestamp,actions[causes[shortDescription]],builtOn]"
)
for build in builds_data.get('builds', [])[:10]: # 只处理最近10个构建
build_number = build.get('number')
result = build.get('result', 'UNKNOWN')
duration = build.get('duration', 0)
timestamp = datetime.fromtimestamp(build.get('timestamp', 0) / 1000)
node_name = build.get('builtOn', 'master')
# 获取触发原因
cause = 'Unknown'
for action in build.get('actions', []):
if 'causes' in action:
for c in action['causes']:
cause = c.get('shortDescription', 'Unknown')
break
break
# 获取队列时间
queue_time = 0
build_details = self.get_jenkins_data(
f"job/{job_name}/{build_number}/api/json?tree=actions[queueDurationMillis]"
)
for action in build_details.get('actions', []):
if 'queueDurationMillis' in action:
queue_time = action['queueDurationMillis']
break
# 检查是否已存在
cursor = self.conn.cursor()
cursor.execute(
'SELECT id FROM build_metrics WHERE job_name = ? AND build_number = ?',
(job_name, build_number)
)
if not cursor.fetchone():
cursor.execute('''
INSERT INTO build_metrics
(job_name, build_number, result, duration, timestamp, queue_time, node_name, cause)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
''', (job_name, build_number, result, duration, timestamp, queue_time, node_name, cause))
self.conn.commit()
self.logger.info("构建指标收集完成")
def collect_system_metrics(self):
"""收集系统指标"""
self.logger.info("收集系统指标...")
# 获取系统信息
system_data = self.get_jenkins_data('api/json?tree=quietingDown,numExecutors')
queue_data = self.get_jenkins_data('queue/api/json?tree=items[*]')
computer_data = self.get_jenkins_data('computer/api/json?tree=computer[displayName,offline,numExecutors,monitorData[*]]')
# 计算指标
queue_length = len(queue_data.get('items', []))
total_executors = system_data.get('numExecutors', 0)
active_executors = 0
nodes_online = 0
nodes_total = len(computer_data.get('computer', []))
total_memory = 0
used_memory = 0
total_disk = 0
used_disk = 0
for computer in computer_data.get('computer', []):
if not computer.get('offline', True):
nodes_online += 1
active_executors += computer.get('numExecutors', 0)
# 获取监控数据
monitor_data = computer.get('monitorData', {})
# 内存使用
if 'hudson.node_monitors.SwapSpaceMonitor' in monitor_data:
swap_data = monitor_data['hudson.node_monitors.SwapSpaceMonitor']
if swap_data:
total_memory += swap_data.get('totalPhysicalMemory', 0)
used_memory += swap_data.get('totalPhysicalMemory', 0) - swap_data.get('availablePhysicalMemory', 0)
# 磁盘使用
if 'hudson.node_monitors.DiskSpaceMonitor' in monitor_data:
disk_data = monitor_data['hudson.node_monitors.DiskSpaceMonitor']
if disk_data:
total_disk += disk_data.get('size', 0)
used_disk += disk_data.get('size', 0) - disk_data.get('available', 0)
memory_usage = (used_memory / total_memory * 100) if total_memory > 0 else 0
disk_usage = (used_disk / total_disk * 100) if total_disk > 0 else 0
# 保存到数据库
cursor = self.conn.cursor()
cursor.execute('''
INSERT INTO system_metrics
(timestamp, queue_length, active_executors, total_executors,
nodes_online, nodes_total, memory_usage, disk_usage)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
''', (
datetime.now(), queue_length, active_executors, total_executors,
nodes_online, nodes_total, memory_usage, disk_usage
))
self.conn.commit()
self.logger.info("系统指标收集完成")
def calculate_job_metrics(self):
"""计算作业指标"""
self.logger.info("计算作业指标...")
cursor = self.conn.cursor()
# 获取过去30天的数据
thirty_days_ago = datetime.now() - timedelta(days=30)
cursor.execute('''
SELECT job_name,
COUNT(*) as total_builds,
SUM(CASE WHEN result = 'SUCCESS' THEN 1 ELSE 0 END) as success_count,
SUM(CASE WHEN result = 'FAILURE' THEN 1 ELSE 0 END) as failure_count,
AVG(duration) as avg_duration
FROM build_metrics
WHERE timestamp >= ?
GROUP BY job_name
''', (thirty_days_ago,))
results = cursor.fetchall()
for row in results:
job_name, total_builds, success_count, failure_count, avg_duration = row
success_rate = (success_count / total_builds * 100) if total_builds > 0 else 0
cursor.execute('''
INSERT INTO job_metrics
(job_name, timestamp, success_rate, avg_duration, failure_count, total_builds)
VALUES (?, ?, ?, ?, ?, ?)
''', (job_name, datetime.now(), success_rate, avg_duration, failure_count, total_builds))
self.conn.commit()
self.logger.info("作业指标计算完成")
def generate_report(self) -> Dict[str, Any]:
"""生成指标报告"""
cursor = self.conn.cursor()
# 系统概览
cursor.execute('''
SELECT queue_length, active_executors, total_executors,
nodes_online, nodes_total, memory_usage, disk_usage
FROM system_metrics
ORDER BY timestamp DESC
LIMIT 1
''')
system_metrics = cursor.fetchone()
# 构建趋势(过去7天)
seven_days_ago = datetime.now() - timedelta(days=7)
cursor.execute('''
SELECT DATE(timestamp) as date,
COUNT(*) as total_builds,
SUM(CASE WHEN result = 'SUCCESS' THEN 1 ELSE 0 END) as success_count,
AVG(duration) as avg_duration
FROM build_metrics
WHERE timestamp >= ?
GROUP BY DATE(timestamp)
ORDER BY date
''', (seven_days_ago,))
build_trends = cursor.fetchall()
# 热门作业
cursor.execute('''
SELECT job_name, success_rate, avg_duration, total_builds
FROM job_metrics
WHERE timestamp >= ?
ORDER BY total_builds DESC
LIMIT 10
''', (seven_days_ago,))
top_jobs = cursor.fetchall()
# 问题作业(成功率低于80%)
cursor.execute('''
SELECT job_name, success_rate, failure_count
FROM job_metrics
WHERE timestamp >= ? AND success_rate < 80
ORDER BY success_rate ASC
''', (seven_days_ago,))
problem_jobs = cursor.fetchall()
return {
'system_metrics': {
'queue_length': system_metrics[0] if system_metrics else 0,
'active_executors': system_metrics[1] if system_metrics else 0,
'total_executors': system_metrics[2] if system_metrics else 0,
'nodes_online': system_metrics[3] if system_metrics else 0,
'nodes_total': system_metrics[4] if system_metrics else 0,
'memory_usage': system_metrics[5] if system_metrics else 0,
'disk_usage': system_metrics[6] if system_metrics else 0
},
'build_trends': [
{
'date': row[0],
'total_builds': row[1],
'success_count': row[2],
'success_rate': (row[2] / row[1] * 100) if row[1] > 0 else 0,
'avg_duration': row[3]
}
for row in build_trends
],
'top_jobs': [
{
'job_name': row[0],
'success_rate': row[1],
'avg_duration': row[2],
'total_builds': row[3]
}
for row in top_jobs
],
'problem_jobs': [
{
'job_name': row[0],
'success_rate': row[1],
'failure_count': row[2]
}
for row in problem_jobs
]
}
def run_collection(self):
"""运行完整的指标收集"""
try:
self.collect_build_metrics()
self.collect_system_metrics()
self.calculate_job_metrics()
# 生成报告
report = self.generate_report()
# 保存报告
with open(f'jenkins_report_{datetime.now().strftime("%Y%m%d_%H%M%S")}.json', 'w') as f:
json.dump(report, f, indent=2, default=str)
self.logger.info("指标收集完成")
return report
except Exception as e:
self.logger.error(f"指标收集失败: {e}")
raise
def close(self):
"""关闭数据库连接"""
if hasattr(self, 'conn'):
self.conn.close()
if __name__ == '__main__':
import os
# 从环境变量获取配置
jenkins_url = os.getenv('JENKINS_URL', 'http://localhost:8080')
username = os.getenv('JENKINS_USERNAME', 'admin')
api_token = os.getenv('JENKINS_API_TOKEN')
if not api_token:
print("请设置JENKINS_API_TOKEN环境变量")
exit(1)
collector = JenkinsMetricsCollector(jenkins_url, username, api_token)
try:
report = collector.run_collection()
print("指标收集完成,报告已生成")
print(json.dumps(report, indent=2, default=str))
finally:
collector.close()
本章小结
本章详细介绍了Jenkins的最佳实践,包括:
- 企业级部署架构:高可用架构设计、容器化部署和Kubernetes集成
- 安全最佳实践:身份认证授权、凭据管理和网络安全配置
- 性能优化策略:JVM调优、构建优化和系统级优化
- 运维管理规范:监控告警、备份策略和灾难恢复
- 团队协作模式:分支策略、代码审查集成和持续改进
- 指标收集分析:自动化指标收集、报告生成和性能分析
通过实施这些最佳实践,可以构建一个稳定、高效、安全的Jenkins CI/CD平台。
下一步学习建议
- 深入学习容器化技术:Docker、Kubernetes等
- 掌握云原生CI/CD:GitLab CI、GitHub Actions、Tekton等
- 学习DevOps工具链:Ansible、Terraform、Helm等
- 关注安全实践:DevSecOps、容器安全、密钥管理等
- 持续关注新技术:Serverless CI/CD、AI/ML在CI/CD中的应用等
理论练习
- 设计一个支持1000+开发者的Jenkins集群架构
- 制定适合你团队的分支策略和CI/CD流程
- 设计Jenkins的监控和告警体系
- 制定Jenkins的安全策略和访问控制规范
- 设计Jenkins的备份和灾难恢复方案
实践练习
- 部署一个高可用的Jenkins集群
- 配置基于LDAP的身份认证和基于角色的访问控制
- 实现Jenkins的自动化备份和监控
- 创建一个完整的GitFlow Pipeline
- 实现Jenkins指标收集和报告生成
思考题
- 如何在保证安全的前提下提高Jenkins的易用性?
- 如何设计Jenkins的多租户架构?
- 如何实现Jenkins的零停机升级?
- 如何在Jenkins中实现成本优化?
- 如何将AI/ML技术应用到Jenkins CI/CD流程中?
恭喜! 你已经完成了Jenkins持续集成的完整学习之旅。从基础概念到高级特性,从简单配置到企业级部署,相信你已经掌握了Jenkins的核心技能。继续实践和探索,成为CI/CD领域的专家!