Skip to content
清晨的一缕阳光
返回

Sentinel 熔断降级

Sentinel 熔断降级

熔断降级原理

熔断器模式

          ┌─────────────┐
          │   关闭状态   │
          │   CLOSED    │
          └──────┬──────┘

          失败率超过阈值


          ┌─────────────┐
          │   打开状态   │
          │    OPEN     │
          └──────┬──────┘

            等待时间到


          ┌─────────────┐
          │   半开状态   │
          │   HALF_OPEN │
          └──────┬──────┘

          成功 → 关闭
          失败 → 打开

三种状态

  1. 关闭状态(CLOSED):正常状态,请求正常通过
  2. 打开状态(OPEN):熔断状态,直接拒绝请求
  3. 半开状态(HALF_OPEN):恢复探测状态,允许部分请求通过

熔断策略

1. 异常比例熔断

@Configuration
public class ExceptionRatioDegradeRule {
    
    @PostConstruct
    public void initRules() {
        List<DegradeRule> rules = new ArrayList<>();
        
        DegradeRule rule = new DegradeRule();
        rule.setResource("getUserById");
        rule.setGrade(RuleConstant.DEGRADE_GRADE_EXCEPTION_RATIO);  // 异常比例
        rule.setCount(0.5);  // 异常比例超过 50%
        rule.setTimeWindow(10);  // 熔断时长 10 秒
        rule.setMinRequestAmount(10);  // 最小请求数(统计窗口内)
        rule.setStatIntervalMs(10000);  // 统计时长 10 秒
        
        rules.add(rule);
        DegradeRuleManager.loadRules(rules);
    }
}

工作原理

2. 异常数熔断

@Configuration
public class ExceptionCountDegradeRule {
    
    @PostConstruct
    public void initRules() {
        List<DegradeRule> rules = new ArrayList<>();
        
        DegradeRule rule = new DegradeRule();
        rule.setResource("getUserById");
        rule.setGrade(RuleConstant.DEGRADE_GRADE_EXCEPTION_COUNT);  // 异常数
        rule.setCount(10);  // 异常数超过 10 个
        rule.setTimeWindow(10);  // 熔断时长 10 秒
        rule.setMinRequestAmount(10);  // 最小请求数
        rule.setStatIntervalMs(10000);  // 统计时长 10 秒
        
        rules.add(rule);
        DegradeRuleManager.loadRules(rules);
    }
}

适用场景

3. 慢调用比例熔断

@Configuration
public class SlowRequestRatioDegradeRule {
    
    @PostConstruct
    public void initRules() {
        List<DegradeRule> rules = new ArrayList<>();
        
        DegradeRule rule = new DegradeRule();
        rule.setResource("getUserById");
        rule.setGrade(RuleConstant.DEGRADE_GRADE_SLOW_REQUEST_RATIO);  // 慢调用比例
        rule.setCount(0.3);  // 慢调用比例超过 30%
        rule.setSlowRatioThreshold(3);  // 响应时间超过 3 秒算慢调用
        rule.setTimeWindow(10);  // 熔断时长 10 秒
        rule.setMinRequestAmount(10);  // 最小请求数
        rule.setStatIntervalMs(10000);  // 统计时长 10 秒
        
        rules.add(rule);
        DegradeRuleManager.loadRules(rules);
    }
}

适用场景

降级处理

1. @SentinelResource 降级

@Service
public class UserService {
    
    @Autowired
    private UserMapper userMapper;
    
    @SentinelResource(
        value = "getUserById",
        fallback = "getUserByIdFallback",  // 业务异常降级
        blockHandler = "getUserByIdBlock"   // 限流降级
    )
    public User getUserById(Long id) {
        return userMapper.findById(id);
    }
    
    // 业务异常降级(Throwable)
    public User getUserByIdFallback(Long id, Throwable ex) {
        log.error("获取用户失败,id={}", id, ex);
        // 返回默认值或缓存数据
        return User.getDefaultUser();
    }
    
    // 限流降级(BlockException)
    public User getUserByIdBlock(Long id, BlockException ex) {
        log.warn("获取用户被限流,id={}", id);
        return User.getDefaultUser();
    }
}

2. Feign 降级

@FeignClient(
    name = "user-service",
    fallback = UserFallback.class
)
public interface UserClient {
    
    @GetMapping("/users/{id}")
    Result<User> getUser(@PathVariable("id") Long id);
}

@Component
public class UserFallback implements UserClient {
    
    @Override
    public Result<User> getUser(Long id) {
        log.warn("调用 user-service 失败,id={}", id);
        return Result.fail("服务暂时不可用");
    }
}

3. 降级工厂

@FeignClient(
    name = "user-service",
    fallbackFactory = UserFallbackFactory.class
)
public interface UserClient {
    
    @GetMapping("/users/{id}")
    Result<User> getUser(@PathVariable("id") Long id);
}

@Component
public class UserFallbackFactory implements FallbackFactory<UserClient> {
    
    @Override
    public UserClient create(Throwable cause) {
        return new UserClient() {
            @Override
            public Result<User> getUser(Long id) {
                log.error("调用 user-service 失败", cause);
                return Result.fail("服务暂时不可用");
            }
        };
    }
}

异常分类处理

自定义异常分类

@Component
public class CustomExceptionParser implements ExceptionParser {
    
    @Override
    public Class<? extends Throwable> parse(Throwable throwable) {
        // 业务异常不触发熔断
        if (throwable instanceof BusinessException) {
            return IgnoreException.class;
        }
        
        // 系统异常触发熔断
        if (throwable instanceof SystemException) {
            return SystemException.class;
        }
        
        // 默认异常
        return throwable.getClass();
    }
}

// 注册异常分类器
RuleConstant.DEGRADE_GRADE_EXCEPTION_RATIO

忽略特定异常

@Configuration
public class DegradeRuleConfig {
    
    @PostConstruct
    public void initRules() {
        List<DegradeRule> rules = new ArrayList<>();
        
        DegradeRule rule = new DegradeRule();
        rule.setResource("getUserById");
        rule.setGrade(RuleConstant.DEGRADE_GRADE_EXCEPTION_RATIO);
        rule.setCount(0.5);
        rule.setTimeWindow(10);
        
        // 忽略特定异常
        rule.setExceptionTypes(
            BusinessException.class,  // 业务异常不触发熔断
            ValidationException.class // 参数校验异常不触发熔断
        );
        
        rules.add(rule);
        DegradeRuleManager.loadRules(rules);
    }
}

熔断监控

实时监控

@Component
public class CircuitBreakerMonitor {
    
    @Autowired
    private MetricService metricService;
    
    @Scheduled(fixedRate = 5000)  // 每 5 秒检查
    public void checkCircuitBreakerStatus() {
        Map<String, DegradeRuleVo> rules = DegradeRuleManager.getRules();
        
        rules.forEach((resource, rule) -> {
            MetricVo metric = metricService.getMetric(resource);
            
            if (metric != null) {
                log.info("资源:{}, QPS: {}, 异常数:{}, 异常比例:{}%",
                    resource,
                    metric.getQps(),
                    metric.getExceptionQps(),
                    metric.getExceptionQps() * 100 / metric.getQps()
                );
            }
        });
    }
}

熔断事件监听

@Component
public class CircuitBreakerEventListener {
    
    @Autowired
    private AlertService alertService;
    
    @EventListener
    public void onCircuitBreakerEvent(CircuitBreakerEvent event) {
        if (event.getType() == CircuitBreakerEvent.Type.OPEN) {
            alertService.sendAlert(
                String.format("服务 %s 已熔断,原因:%s",
                    event.getResource(),
                    event.getCause())
            );
        } else if (event.getType() == CircuitBreakerEvent.Type.CLOSE) {
            log.info("服务 {} 已恢复", event.getResource());
        }
    }
}

级联熔断

配置级联规则

@Configuration
public class CascadeDegradeRuleConfig {
    
    @PostConstruct
    public void initRules() {
        List<DegradeRule> rules = new ArrayList<>();
        
        // 数据库调用熔断
        DegradeRule dbRule = new DegradeRule();
        dbRule.setResource("com.example.mapper.UserMapper.findById");
        dbRule.setGrade(RuleConstant.DEGRADE_GRADE_EXCEPTION_RATIO);
        dbRule.setCount(0.5);
        dbRule.setTimeWindow(30);  // 熔断 30 秒
        rules.add(dbRule);
        
        // 服务调用熔断(级联)
        DegradeRule serviceRule = new DegradeRule();
        serviceRule.setResource("getUserById");
        serviceRule.setGrade(RuleConstant.DEGRADE_GRADE_EXCEPTION_RATIO);
        serviceRule.setCount(0.5);
        serviceRule.setTimeWindow(10);
        rules.add(serviceRule);
        
        DegradeRuleManager.loadRules(rules);
    }
}

熔断传播

@Service
public class OrderService {
    
    @Autowired
    private UserService userService;
    
    @Autowired
    private ProductService productService;
    
    @SentinelResource(
        value = "createOrder",
        fallback = "createOrderFallback"
    )
    public Order createOrder(Order order) {
        // 检查用户服务是否可用
        User user = userService.getUserById(order.getUserId());
        if (user == null) {
            throw new BusinessException("用户服务不可用");
        }
        
        // 检查产品服务是否可用
        Product product = productService.getProductById(order.getProductId());
        if (product == null) {
            throw new BusinessException("产品服务不可用");
        }
        
        // 创建订单
        return orderRepository.save(order);
    }
    
    public Order createOrderFallback(Order order, Throwable ex) {
        log.error("创建订单失败", ex);
        // 返回默认订单或缓存数据
        return Order.getDefaultOrder();
    }
}

最佳实践

1. 熔断阈值设置

// 核心服务:严格熔断
DegradeRule coreRule = new DegradeRule();
coreRule.setResource("coreService");
coreRule.setGrade(RuleConstant.DEGRADE_GRADE_EXCEPTION_RATIO);
coreRule.setCount(0.3);  // 30% 异常即熔断
coreRule.setTimeWindow(30);  // 熔断 30 秒

// 非核心服务:宽松熔断
DegradeRule normalRule = new DegradeRule();
normalRule.setResource("normalService");
normalRule.setGrade(RuleConstant.DEGRADE_GRADE_EXCEPTION_RATIO);
normalRule.setCount(0.7);  // 70% 异常才熔断
normalRule.setTimeWindow(10);  // 熔断 10 秒

2. 分级降级策略

@Service
public class GradeDegradeService {
    
    @SentinelResource(
        value = "getUserInfo",
        fallback = "getUserInfoFallback"
    )
    public UserInfo getUserInfo(Long userId) {
        UserInfo userInfo = new UserInfo();
        
        // 核心信息:必须获取
        userInfo.setUser(userService.getUserById(userId));
        
        // 非核心信息:可降级
        try {
            userInfo.setProfile(profileService.getProfile(userId));
        } catch (Exception e) {
            log.warn("获取用户画像失败", e);
            userInfo.setProfile(Profile.getDefault());
        }
        
        return userInfo;
    }
    
    public UserInfo getUserInfoFallback(Long userId, Throwable ex) {
        // 完全降级:返回默认信息
        return UserInfo.getDefault();
    }
}

3. 快速失败

@Configuration
public class FastFailConfig {
    
    @PostConstruct
    public void initRules() {
        List<DegradeRule> rules = new ArrayList<>();
        
        // 外部 API 调用:快速失败
        DegradeRule rule = new DegradeRule();
        rule.setResource("externalApi");
        rule.setGrade(RuleConstant.DEGRADE_GRADE_EXCEPTION_COUNT);
        rule.setCount(5);  // 5 次异常即熔断
        rule.setTimeWindow(60);  // 熔断 60 秒
        rules.add(rule);
        
        DegradeRuleManager.loadRules(rules);
    }
}

4. 熔断恢复验证

@Component
public class CircuitBreakerRecoveryChecker {
    
    @Autowired
    private HealthCheckService healthCheckService;
    
    @Scheduled(fixedRate = 30000)  // 每 30 秒检查
    public void checkRecovery() {
        List<String> circuitBreakerResources = getCircuitBreakerResources();
        
        for (String resource : circuitBreakerResources) {
            if (isCircuitOpen(resource)) {
                boolean healthy = healthCheckService.check(resource);
                if (healthy) {
                    log.info("服务 {} 已恢复,准备重新开放", resource);
                    // 可以手动重置熔断器
                }
            }
        }
    }
}

常见问题

1. 熔断不生效

问题:配置了熔断规则但不生效

排查步骤

2. 频繁熔断

问题:服务频繁触发熔断

解决方案

3. 熔断后无法恢复

问题:熔断后服务一直不可用

解决方案

总结

Sentinel 熔断降级是保护系统稳定性的核心机制,通过合理的熔断策略和降级处理,可以有效防止故障传播和雪崩效应。

在生产环境中,建议根据服务重要性设置不同的熔断阈值,并建立完善的监控告警机制。


分享这篇文章到:

上一篇文章
Spring Boot 启动流程与生命周期
下一篇文章
Spring Boot SkyWalking 链路追踪