AI 应用 CI/CD 流程设计
AI 应用的持续集成与部署有其特殊性。如何设计适合 AI 应用的 CI/CD 流程?如何保证部署质量?本文详解 AI 应用 CI/CD 的设计与实践。
一、CI/CD 流程设计
1.1 AI 应用特点
AI 应用 CI/CD 特点:
┌─────────────────────────────────────┐
│ 1. 模型版本管理 │
│ - 模型文件大 │
│ - 版本依赖复杂 │
│ - 需要回滚机制 │
├─────────────────────────────────────┤
│ 2. 测试特殊性 │
│ - 非确定性输出 │
│ - 需要质量评估 │
│ - 测试成本高 │
├─────────────────────────────────────┤
│ 3. 部署复杂性 │
│ - 资源需求大 │
│ - 需要灰度发布 │
│ - 监控要求高 │
├─────────────────────────────────────┤
│ 4. 数据依赖性 │
│ - 知识库更新 │
│ - 向量索引重建 │
│ - 数据迁移 │
└─────────────────────────────────────┘
1.2 完整流程
# .github/workflows/ai-cicd.yml
name: AI Application CI/CD
on:
push:
branches: [main, develop]
pull_request:
branches: [main]
jobs:
# 1. 代码质量检查
code-quality:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Lint Code
run: |
pip install flake8 black
flake8 .
black --check .
# 2. 单元测试
unit-tests:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Run Unit Tests
run: |
pip install pytest pytest-cov
pytest tests/unit --cov=src --cov-report=xml
# 3. Prompt 测试
prompt-tests:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Run Prompt Tests
env:
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
run: |
pytest tests/prompts --report=prompt_report.json
# 4. 集成测试
integration-tests:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Run Integration Tests
env:
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
run: |
pytest tests/integration --report=integration_report.json
# 5. 质量评估
quality-evaluation:
runs-on: ubuntu-latest
needs: [prompt-tests, integration-tests]
steps:
- uses: actions/checkout@v3
- name: Evaluate Quality
run: |
python scripts/evaluate_quality.py
# 检查质量指标是否达标
# 6. 构建镜像
build:
runs-on: ubuntu-latest
needs: [quality-evaluation]
steps:
- uses: actions/checkout@v3
- name: Build Docker Image
run: |
docker build -t ai-app:${{ github.sha }} .
- name: Push to Registry
run: |
docker push registry/ai-app:${{ github.sha }}
# 7. 部署到预发布
deploy-staging:
runs-on: ubuntu-latest
needs: [build]
environment: staging
steps:
- name: Deploy to Staging
run: |
kubectl set image deployment/ai-app ai-app=registry/ai-app:${{ github.sha }}
# 8. E2E 测试
e2e-tests:
runs-on: ubuntu-latest
needs: [deploy-staging]
steps:
- name: Run E2E Tests
env:
STAGING_URL: ${{ secrets.STAGING_URL }}
run: |
pytest tests/e2e --base-url=$STAGING_URL
# 9. 灰度发布
deploy-canary:
runs-on: ubuntu-latest
needs: [e2e-tests]
environment: production
steps:
- name: Canary Deployment (5%)
run: |
kubectl set image deployment/ai-app-canary ai-app=registry/ai-app:${{ github.sha }}
# 10. 全量发布
deploy-production:
runs-on: ubuntu-latest
needs: [deploy-canary]
environment: production
steps:
- name: Full Production Deployment
run: |
kubectl set image deployment/ai-app ai-app=registry/ai-app:${{ github.sha }}
二、自动化测试
2.1 测试分层
# test_pyramid.py
"""
AI 应用测试金字塔:
/\
/ \
/ E2E \ 10%
/--------\
/Integration\ 20%
/------------\
/ Unit \ 70%
/----------------\
测试执行策略:
- 单元测试:每次提交必跑
- 集成测试:合并前必跑
- E2E 测试:发布前必跑
"""
class AITestPyramid:
"""AI 测试金字塔"""
def __init__(self):
self.test_levels = {
'unit': {
'scope': '单个函数/类',
'speed': 'fast (<1s)',
'coverage': 'code paths',
'run_frequency': 'every commit'
},
'integration': {
'scope': '组件交互',
'speed': 'medium (<1min)',
'coverage': 'APIs, data flow',
'run_frequency': 'before merge'
},
'e2e': {
'scope': '完整流程',
'speed': 'slow (>1min)',
'coverage': 'user journeys',
'run_frequency': 'before release'
}
}
2.2 测试自动化
# automated_testing.py
from typing import Dict, List
class AutomatedTestSuite:
"""自动化测试套件"""
def __init__(self):
self.tests: List[Dict] = []
def add_test(
self,
name: str,
test_type: str,
script: str,
timeout_seconds: int = 300
):
"""添加测试"""
self.tests.append({
'name': name,
'type': test_type,
'script': script,
'timeout': timeout_seconds
})
def run_suite(self) -> Dict:
"""运行测试套件"""
results = {
'total': len(self.tests),
'passed': 0,
'failed': 0,
'details': []
}
for test in self.tests:
result = self._run_single_test(test)
results['details'].append(result)
if result['passed']:
results['passed'] += 1
else:
results['failed'] += 1
return results
def _run_single_test(self, test: Dict) -> Dict:
"""运行单个测试"""
import subprocess
import time
start_time = time.time()
try:
result = subprocess.run(
test['script'],
shell=True,
capture_output=True,
timeout=test['timeout']
)
passed = result.returncode == 0
return {
'name': test['name'],
'passed': passed,
'duration': time.time() - start_time,
'output': result.stdout.decode(),
'error': result.stderr.decode() if not passed else ''
}
except subprocess.TimeoutExpired:
return {
'name': test['name'],
'passed': False,
'duration': test['timeout'],
'output': '',
'error': 'Test timed out'
}
# 使用示例
test_suite = AutomatedTestSuite()
# 添加单元测试
test_suite.add_test(
name='Unit Tests',
test_type='unit',
script='pytest tests/unit -v'
)
# 添加 Prompt 测试
test_suite.add_test(
name='Prompt Tests',
test_type='prompt',
script='pytest tests/prompts -v'
)
# 添加集成测试
test_suite.add_test(
name='Integration Tests',
test_type='integration',
script='pytest tests/integration -v'
)
# 运行套件
results = test_suite.run_suite()
三、灰度发布
3.1 发布策略
# deployment_strategies.py
from typing import Dict, List
class DeploymentStrategies:
"""部署策略"""
@staticmethod
def canary_deployment(
new_version: str,
traffic_percentage: int = 5
) -> Dict:
"""
金丝雀发布
Args:
new_version: 新版本
traffic_percentage: 流量比例
"""
return {
'strategy': 'canary',
'version': new_version,
'traffic_split': {
'new': traffic_percentage,
'old': 100 - traffic_percentage
},
'steps': [
{'percentage': 5, 'duration_minutes': 30},
{'percentage': 25, 'duration_minutes': 60},
{'percentage': 50, 'duration_minutes': 120},
{'percentage': 100, 'duration_minutes': 0}
],
'rollback_trigger': {
'error_rate_threshold': 0.05,
'latency_threshold_ms': 3000
}
}
@staticmethod
def blue_green_deployment(
new_version: str
) -> Dict:
"""
蓝绿部署
Args:
new_version: 新版本
"""
return {
'strategy': 'blue_green',
'version': new_version,
'environments': {
'blue': 'current_production',
'green': 'new_deployment'
},
'switch_criteria': {
'health_check_pass': True,
'e2e_tests_pass': True,
'manual_approval': False
}
}
@staticmethod
def rolling_deployment(
new_version: str,
max_unavailable: int = 1
) -> Dict:
"""
滚动发布
Args:
new_version: 新版本
max_unavailable: 最大不可用实例数
"""
return {
'strategy': 'rolling',
'version': new_version,
'max_unavailable': max_unavailable,
'update_strategy': 'RollingUpdate',
'rolling_update': {
'max_surge': '25%',
'max_unavailable': '25%'
}
}
3.2 发布流程
# release_pipeline.py
from typing import Dict, List
class ReleasePipeline:
"""发布流水线"""
def __init__(self):
self.stages = []
self.current_stage = 0
def add_stage(
self,
name: str,
actions: List[Dict],
gates: List[Dict]
):
"""添加发布阶段"""
self.stages.append({
'name': name,
'actions': actions,
'gates': gates,
'status': 'pending'
})
def execute(self) -> Dict:
"""执行发布"""
results = {
'success': True,
'stages_completed': 0,
'details': []
}
for i, stage in enumerate(self.stages):
print(f"Executing stage: {stage['name']}")
# 执行阶段动作
for action in stage['actions']:
self._execute_action(action)
# 检查发布关卡
gates_passed = all(
self._check_gate(gate)
for gate in stage['gates']
)
if not gates_passed:
results['success'] = False
results['failed_at_stage'] = stage['name']
break
stage['status'] = 'completed'
results['stages_completed'] += 1
return results
def _execute_action(self, action: Dict):
"""执行动作"""
# 实现动作执行逻辑
pass
def _check_gate(self, gate: Dict) -> bool:
"""检查关卡"""
gate_type = gate.get('type')
if gate_type == 'automated_tests':
return self._check_automated_tests()
elif gate_type == 'manual_approval':
return self._check_manual_approval()
elif gate_type == 'metrics_check':
return self._check_metrics(gate)
return True
def _check_automated_tests(self) -> bool:
"""检查自动化测试"""
# 实现测试检查逻辑
return True
def _check_manual_approval(self) -> bool:
"""检查人工审批"""
# 实现审批检查逻辑
return True
def _check_metrics(self, gate: Dict) -> bool:
"""检查指标"""
# 实现指标检查逻辑
return True
# 使用示例
pipeline = ReleasePipeline()
# 添加阶段
pipeline.add_stage(
name='Canary 5%',
actions=[
{'type': 'deploy', 'percentage': 5}
],
gates=[
{'type': 'automated_tests'},
{'type': 'metrics_check', 'error_rate_max': 0.01}
]
)
pipeline.add_stage(
name='Canary 25%',
actions=[
{'type': 'deploy', 'percentage': 25}
],
gates=[
{'type': 'automated_tests'},
{'type': 'metrics_check', 'error_rate_max': 0.02}
]
)
pipeline.add_stage(
name='Full Rollout',
actions=[
{'type': 'deploy', 'percentage': 100}
],
gates=[
{'type': 'manual_approval'}
]
)
# 执行发布
result = pipeline.execute()
四、回滚机制
4.1 自动回滚
# rollback_mechanism.py
from typing import Dict, List
class RollbackMechanism:
"""回滚机制"""
def __init__(self):
self.rollback_triggers = []
self.rollback_history: List[Dict] = []
def add_trigger(
self,
name: str,
condition: str,
threshold: float,
action: str
):
"""添加回滚触发器"""
self.rollback_triggers.append({
'name': name,
'condition': condition,
'threshold': threshold,
'action': action
})
def check_triggers(self, metrics: Dict) -> List[Dict]:
"""检查触发器"""
triggered = []
for trigger in self.rollback_triggers:
metric_value = metrics.get(trigger['condition'])
if metric_value is not None:
if self._should_rollback(metric_value, trigger['threshold']):
triggered.append({
'trigger': trigger['name'],
'current_value': metric_value,
'threshold': trigger['threshold'],
'action': trigger['action']
})
return triggered
def _should_rollback(
self,
current: float,
threshold: float
) -> bool:
"""判断是否应该回滚"""
# 错误率超过阈值
if 'error_rate' in str(threshold):
return current > threshold
# 延迟超过阈值
elif 'latency' in str(threshold):
return current > threshold
return False
def execute_rollback(
self,
reason: str,
target_version: str
) -> Dict:
"""执行回滚"""
# 实现回滚逻辑
return {
'success': True,
'reason': reason,
'from_version': 'current',
'to_version': target_version,
'timestamp': datetime.now().isoformat()
}
# 预定义回滚策略
DEFAULT_ROLLBACK_TRIGGERS = [
{
'name': 'High Error Rate',
'condition': 'error_rate',
'threshold': 0.05,
'action': 'immediate_rollback'
},
{
'name': 'High Latency',
'condition': 'p99_latency_ms',
'threshold': 5000,
'action': 'rollback_after_5min'
},
{
'name': 'Service Unavailable',
'condition': 'availability',
'threshold': 0.99,
'action': 'immediate_rollback'
}
]
五、总结
5.1 核心要点
-
CI/CD 流程
- 代码质量检查
- 自动化测试
- 灰度发布
- 自动回滚
-
测试策略
- 单元测试 70%
- 集成测试 20%
- E2E 测试 10%
-
发布策略
- 金丝雀发布
- 蓝绿部署
- 滚动发布
5.2 最佳实践
-
自动化优先
- 自动化测试
- 自动化部署
- 自动化回滚
-
渐进式发布
- 小流量验证
- 逐步扩大
- 快速回滚
-
监控驱动
- 实时监控
- 自动告警
- 数据决策
参考资料