Gin 错误日志收集与告警
完善的错误日志收集与告警系统是保障服务稳定性的关键。
错误捕获中间件
全局错误捕获
Go
func ErrorCaptureMiddleware() gin.HandlerFunc {
return func(c *gin.Context) {
c.Next()
// 捕获处理过程中的错误
if len(c.Errors) > 0 {
requestID := c.GetString("request_id")
for _, ginErr := range c.Errors {
// 根据错误类型分级处理
switch ginErr.Type {
case gin.ErrorTypeBind:
// 参数绑定错误 - 记录但不告警
logger.WithFields(logrus.Fields{
"request_id": requestID,
"error_type": "bind",
}).Warn(ginErr.Error())
case gin.ErrorTypePrivate:
// 业务错误 - 记录详情
logger.WithFields(logrus.Fields{
"request_id": requestID,
"error_type": "private",
"meta": ginErr.Meta,
}).Error(ginErr.Error())
case gin.ErrorTypePublic:
// 公开错误 - 可能需要告警
logger.WithFields(logrus.Fields{
"request_id": requestID,
"error_type": "public",
}).Error(ginErr.Error())
// 触发告警
TriggerAlert(AlertConfig{
Level: "warning",
Message: ginErr.Error(),
Context: map[string]string{
"request_id": requestID,
"path": c.Request.URL.Path,
},
})
}
}
}
}
}
Panic 恢复中间件
Go
func RecoveryMiddleware() gin.HandlerFunc {
return func(c *gin.Context) {
defer func() {
if err := recover(); err != nil {
requestID := c.GetString("request_id")
// 记录 panic 详情
stack := debug.Stack()
logger.WithFields(logrus.Fields{
"request_id": requestID,
"panic": err,
"stack": string(stack),
"path": c.Request.URL.Path,
"method": c.Request.Method,
}).Error("服务 Panic")
// 立即告警
TriggerAlert(AlertConfig{
Level: "critical",
Message: fmt.Sprintf("Panic: %v", err),
Context: map[string]string{
"request_id": requestID,
"path": c.Request.URL.Path,
"stack": string(stack),
},
})
// 返回友好错误响应
c.JSON(500, gin.H{
"error": "服务内部错误",
"request_id": requestID,
})
c.Abort()
}
}()
c.Next()
}
}
func main() {
r := gin.New()
r.Use(RequestIDMiddleware())
r.Use(RecoveryMiddleware())
r.Use(ErrorCaptureMiddleware())
r.Run(":8080")
}
错误分级处理
错误级别定义
Go
type ErrorLevel string
const (
LevelDebug ErrorLevel = "debug"
LevelInfo ErrorLevel = "info"
LevelWarning ErrorLevel = "warning"
LevelError ErrorLevel = "error"
LevelCritical ErrorLevel = "critical"
)
type AppError struct {
Level ErrorLevel
Message string
Code int
Context map[string]interface{}
Stack string
Timestamp time.Time
}
func NewError(level ErrorLevel, message string, code int) *AppError {
return &AppError{
Level: level,
Message: message,
Code: code,
Timestamp: time.Now(),
}
}
func (e *AppError) WithContext(ctx map[string]interface{}) *AppError {
e.Context = ctx
return e
}
func (e *AppError) WithStack(stack string) *AppError {
e.Stack = stack
return e
}
错误处理策略
Go
type ErrorHandler struct {
alertThreshold map[ErrorLevel]bool // 是否触发告警
logLevel logrus.Level // 记录级别
}
func NewErrorHandler() *ErrorHandler {
return &ErrorHandler{
alertThreshold: map[ErrorLevel]bool{
LevelWarning: false,
LevelError: true,
LevelCritical: true,
},
logLevel: logrus.InfoLevel,
}
}
func (h *ErrorHandler) Handle(err *AppError, c *gin.Context) {
requestID := c.GetString("request_id")
// 记录日志
h.logError(err, requestID)
// 判断是否告警
if h.alertThreshold[err.Level] {
h.triggerAlert(err, requestID, c)
}
// 返回响应
h.writeResponse(err, c)
}
func (h *ErrorHandler) logError(err *AppError, requestID string) {
fields := logrus.Fields{
"request_id": requestID,
"error_code": err.Code,
"level": err.Level,
}
for k, v := range err.Context {
fields[k] = v
}
switch err.Level {
case LevelDebug:
logger.WithFields(fields).Debug(err.Message)
case LevelInfo:
logger.WithFields(fields).Info(err.Message)
case LevelWarning:
logger.WithFields(fields).Warn(err.Message)
case LevelError:
logger.WithFields(fields).Error(err.Message)
case LevelCritical:
logger.WithFields(fields).Error(err.Message)
}
}
告警系统实现
告警配置
Go
type AlertConfig struct {
Level string // 告警级别
Message string // 告警消息
Context map[string]string // 上下文信息
Time time.Time // 告警时间
}
type AlertChannel interface {
Send(alert AlertConfig) error
}
type AlertManager struct {
channels []AlertChannel
queue chan AlertConfig
rateLimit map[string]int // 告警频率限制
}
func NewAlertManager() *AlertManager {
am := &AlertManager{
channels: make([]AlertChannel, 0),
queue: make(chan AlertConfig, 100),
rateLimit: make(map[string]int),
}
// 启动告警处理协程
go am.processAlerts()
return am
}
func (am *AlertManager) AddChannel(channel AlertChannel) {
am.channels = append(am.channels, channel)
}
func (am *AlertManager) Trigger(alert AlertConfig) {
alert.Time = time.Now()
// 频率限制检查
key := alert.Level + ":" + alert.Message
if am.rateLimit[key] > 5 { // 同类告警每小时最多 5 次
return
}
am.rateLimit[key]++
// 加入队列
select {
case am.queue <- alert:
default:
logger.Warn("告警队列已满")
}
}
func (am *AlertManager) processAlerts() {
for alert := range am.queue {
for _, channel := range am.channels {
if err := channel.Send(alert); err != nil {
logger.WithError(err).Error("告警发送失败")
}
}
}
}
多渠道告警
Go
// 邮件告警
type EmailChannel struct {
SMTPHost string
SMTPPort int
From string
To []string
}
func (ec *EmailChannel) Send(alert AlertConfig) error {
// 构建邮件内容
subject := fmt.Sprintf("[%s] 服务告警", alert.Level)
body := fmt.Sprintf("级别: %s\n时间: %s\n消息: %s\n上下文: %v",
alert.Level, alert.Time, alert.Message, alert.Context)
// 发送邮件(简化示例)
return sendEmail(ec.SMTPHost, ec.From, ec.To, subject, body)
}
// Webhook 告警(支持钉钉、企业微信等)
type WebhookChannel struct {
URL string
Headers map[string]string
}
func (wc *WebhookChannel) Send(alert AlertConfig) error {
payload := map[string]interface{}{
"level": alert.Level,
"message": alert.Message,
"time": alert.Time.Format(time.RFC3339),
"context": alert.Context,
}
body, _ := json.Marshal(payload)
req, _ := http.NewRequest("POST", wc.URL, bytes.NewReader(body))
for k, v := range wc.Headers {
req.Header.Set(k, v)
}
client := &http.Client{Timeout: 5 * time.Second}
resp, err := client.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode >= 400 {
return fmt.Errorf("webhook 返回错误: %d", resp.StatusCode)
}
return nil
}
// 钉钉机器人格式
type DingTalkChannel struct {
WebhookURL string
}
func (dc *DingTalkChannel) Send(alert AlertConfig) error {
payload := map[string]interface{}{
"msgtype": "text",
"text": map[string]string{
"content": fmt.Sprintf("【%s告警】\n时间: %s\n消息: %s\n请求ID: %s",
alert.Level, alert.Time.Format("2006-01-02 15:04:05"),
alert.Message, alert.Context["request_id"]),
},
}
body, _ := json.Marshal(payload)
req, _ := http.NewRequest("POST", dc.WebhookURL, bytes.NewReader(body))
req.Header.Set("Content-Type", "application/json")
client := &http.Client{Timeout: 5 * time.Second}
resp, err := client.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
return nil
}
日志收集到外部系统
集成 Elasticsearch
Go
import (
"github.com/olivere/elastic/v7"
)
type ESLogger struct {
client *elastic.Client
indexName string
}
func NewESLogger(url string, index string) (*ESLogger, error) {
client, err := elastic.NewClient(elastic.SetURL(url))
if err != nil {
return nil, err
}
return &ESLogger{
client: client,
indexName: index,
}, nil
}
func (esl *ESLogger) Log(entry LogEntry) error {
ctx := context.Background()
_, err := esl.client.Index().
Index(esl.indexName).
BodyJson(entry).
Do(ctx)
return err
}
type LogEntry struct {
Timestamp time.Time `json:"timestamp"`
Level string `json:"level"`
Message string `json:"message"`
RequestID string `json:"request_id"`
Path string `json:"path"`
Method string `json:"method"`
StatusCode int `json:"status_code"`
Context map[string]interface{} `json:"context"`
}
完整配置示例
Go
func main() {
r := gin.New()
// 请求 ID
r.Use(RequestIDMiddleware())
// Panic 恢复
r.Use(RecoveryMiddleware())
// 错误捕获
errorHandler := NewErrorHandler()
r.Use(ErrorCaptureMiddleware())
// 告警系统
alertManager := NewAlertManager()
// 配置告警渠道
alertManager.AddChannel(&EmailChannel{
SMTPHost: "smtp.example.com",
From: "alerts@example.com",
To: []string{"admin@example.com"},
})
alertManager.AddChannel(&DingTalkChannel{
WebhookURL: "https://oapi.dingtalk.com/sendmsg?access_token=xxx",
})
// 注册全局告警触发函数
TriggerAlert = alertManager.Trigger
// 路由
r.GET("/api/data", func(c *gin.Context) {
// 业务处理
data, err := fetchData()
if err != nil {
errorHandler.Handle(
NewError(LevelError, "数据获取失败", 500).
WithContext(map[string]interface{}{"error": err.Error()}),
c,
)
return
}
c.JSON(200, data)
})
r.Run(":8080")
}
告警级别与处理策略
| 级别 | 场景 | 告警方式 | 频率限制 |
|---|---|---|---|
| warning | 潜在问题 | 日志记录 | 不告警 |
| error | 业务错误 | 邮件/Webhook | 5次/小时 |
| critical | 服务崩溃 | 多渠道+电话 | 立即告警 |
注意:告警频率限制避免告警轰炸,同类错误合并处理。
要点总结
- 错误捕获:中间件统一捕获处理错误和 Panic
- 分级处理:根据错误级别决定记录和告警策略
- 告警系统:队列化处理、频率限制、多渠道通知
- 多渠道支持:邮件、钉钉、企业微信等
- 外部集成:Elasticsearch、日志平台等
- 频率控制:同类告警合并,避免告警轰炸
📝 发现内容有误?点击此处直接编辑