2025-12-23 18:59:43 +08:00
|
|
|
package persistence
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"context"
|
|
|
|
|
"fmt"
|
|
|
|
|
"time"
|
|
|
|
|
|
|
|
|
|
"github.com/ThreeDotsLabs/watermill/message"
|
|
|
|
|
|
2025-12-26 14:35:39 +08:00
|
|
|
"go.yandata.net/wangsiyuan/go-trustlog/api/logger"
|
|
|
|
|
"go.yandata.net/wangsiyuan/go-trustlog/api/model"
|
2025-12-23 18:59:43 +08:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
// RetryWorkerConfig 重试工作器配置
|
|
|
|
|
type RetryWorkerConfig struct {
|
|
|
|
|
// RetryInterval 重试检查间隔
|
|
|
|
|
RetryInterval time.Duration
|
|
|
|
|
// MaxRetryCount 最大重试次数
|
|
|
|
|
MaxRetryCount int
|
|
|
|
|
// BatchSize 每批处理的记录数
|
|
|
|
|
BatchSize int
|
|
|
|
|
// BackoffMultiplier 退避乘数(每次重试间隔翻倍)
|
|
|
|
|
BackoffMultiplier float64
|
|
|
|
|
// InitialBackoff 初始退避时间
|
|
|
|
|
InitialBackoff time.Duration
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// DefaultRetryWorkerConfig 返回默认重试工作器配置
|
|
|
|
|
func DefaultRetryWorkerConfig() RetryWorkerConfig {
|
|
|
|
|
return RetryWorkerConfig{
|
|
|
|
|
RetryInterval: 30 * time.Second,
|
|
|
|
|
MaxRetryCount: 5,
|
|
|
|
|
BatchSize: 100,
|
|
|
|
|
BackoffMultiplier: 2.0,
|
|
|
|
|
InitialBackoff: 1 * time.Minute,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// RetryWorker 重试工作器,负责处理失败的存证操作
|
|
|
|
|
type RetryWorker struct {
|
|
|
|
|
config RetryWorkerConfig
|
|
|
|
|
manager *PersistenceManager
|
|
|
|
|
publisher message.Publisher
|
|
|
|
|
logger logger.Logger
|
|
|
|
|
stopChan chan struct{}
|
|
|
|
|
stoppedChan chan struct{}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// NewRetryWorker 创建重试工作器
|
|
|
|
|
func NewRetryWorker(
|
|
|
|
|
config RetryWorkerConfig,
|
|
|
|
|
manager *PersistenceManager,
|
|
|
|
|
publisher message.Publisher,
|
|
|
|
|
log logger.Logger,
|
|
|
|
|
) *RetryWorker {
|
|
|
|
|
return &RetryWorker{
|
|
|
|
|
config: config,
|
|
|
|
|
manager: manager,
|
|
|
|
|
publisher: publisher,
|
|
|
|
|
logger: log,
|
|
|
|
|
stopChan: make(chan struct{}),
|
|
|
|
|
stoppedChan: make(chan struct{}),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Start 启动重试工作器
|
|
|
|
|
func (w *RetryWorker) Start(ctx context.Context) {
|
|
|
|
|
w.logger.InfoContext(ctx, "starting retry worker",
|
|
|
|
|
"retryInterval", w.config.RetryInterval,
|
|
|
|
|
"maxRetryCount", w.config.MaxRetryCount,
|
|
|
|
|
"batchSize", w.config.BatchSize,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
ticker := time.NewTicker(w.config.RetryInterval)
|
|
|
|
|
defer ticker.Stop()
|
|
|
|
|
defer close(w.stoppedChan)
|
|
|
|
|
|
|
|
|
|
for {
|
|
|
|
|
select {
|
|
|
|
|
case <-ctx.Done():
|
|
|
|
|
w.logger.InfoContext(ctx, "retry worker stopped by context")
|
|
|
|
|
return
|
|
|
|
|
case <-w.stopChan:
|
|
|
|
|
w.logger.InfoContext(ctx, "retry worker stopped by signal")
|
|
|
|
|
return
|
|
|
|
|
case <-ticker.C:
|
|
|
|
|
w.processRetries(ctx)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Stop 停止重试工作器
|
|
|
|
|
func (w *RetryWorker) Stop() {
|
|
|
|
|
w.logger.Info("stopping retry worker")
|
|
|
|
|
close(w.stopChan)
|
|
|
|
|
<-w.stoppedChan
|
|
|
|
|
w.logger.Info("retry worker stopped")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// processRetries 处理待重试的记录
|
|
|
|
|
// 从重试表中读取待处理的记录,无需游标扫描 operation 表
|
|
|
|
|
func (w *RetryWorker) processRetries(ctx context.Context) {
|
|
|
|
|
w.logger.DebugContext(ctx, "processing retries from retry table")
|
|
|
|
|
|
|
|
|
|
retryRepo := w.manager.GetRetryRepo()
|
|
|
|
|
opRepo := w.manager.GetOperationRepo()
|
|
|
|
|
|
|
|
|
|
// 直接从重试表查找待重试的记录(已到重试时间的记录)
|
|
|
|
|
records, err := retryRepo.FindPendingRetries(ctx, w.config.BatchSize)
|
|
|
|
|
if err != nil {
|
|
|
|
|
w.logger.ErrorContext(ctx, "failed to find pending retries",
|
|
|
|
|
"error", err,
|
|
|
|
|
)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if len(records) == 0 {
|
|
|
|
|
w.logger.DebugContext(ctx, "no pending retries found")
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
w.logger.InfoContext(ctx, "found pending retries from retry table",
|
|
|
|
|
"count", len(records),
|
|
|
|
|
"batchSize", w.config.BatchSize,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
// 处理每条重试记录
|
|
|
|
|
for _, record := range records {
|
|
|
|
|
w.processRetry(ctx, record, retryRepo, opRepo)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// processRetry 处理单个重试记录
|
|
|
|
|
func (w *RetryWorker) processRetry(
|
|
|
|
|
ctx context.Context,
|
|
|
|
|
record RetryRecord,
|
|
|
|
|
retryRepo RetryRepository,
|
|
|
|
|
opRepo OperationRepository,
|
|
|
|
|
) {
|
|
|
|
|
w.logger.DebugContext(ctx, "processing retry",
|
|
|
|
|
"opID", record.OpID,
|
|
|
|
|
"retryCount", record.RetryCount,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
// 检查是否超过最大重试次数
|
|
|
|
|
if record.RetryCount >= w.config.MaxRetryCount {
|
|
|
|
|
w.logger.WarnContext(ctx, "max retry count exceeded, marking as dead letter",
|
|
|
|
|
"opID", record.OpID,
|
|
|
|
|
"retryCount", record.RetryCount,
|
|
|
|
|
)
|
|
|
|
|
if err := retryRepo.MarkAsDeadLetter(ctx, record.OpID,
|
|
|
|
|
fmt.Sprintf("exceeded max retry count (%d)", w.config.MaxRetryCount)); err != nil {
|
|
|
|
|
w.logger.ErrorContext(ctx, "failed to mark as dead letter",
|
|
|
|
|
"opID", record.OpID,
|
|
|
|
|
"error", err,
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 查找操作记录
|
|
|
|
|
op, status, err := opRepo.FindByID(ctx, record.OpID)
|
|
|
|
|
if err != nil {
|
|
|
|
|
w.logger.ErrorContext(ctx, "failed to find operation for retry",
|
|
|
|
|
"opID", record.OpID,
|
|
|
|
|
"error", err,
|
|
|
|
|
)
|
|
|
|
|
nextRetry := w.calculateNextRetry(record.RetryCount)
|
|
|
|
|
retryRepo.IncrementRetry(ctx, record.OpID, err.Error(), nextRetry)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 如果已经存证,删除重试记录
|
|
|
|
|
if status == StatusTrustlogged {
|
|
|
|
|
w.logger.InfoContext(ctx, "operation already trustlogged, removing retry record",
|
|
|
|
|
"opID", record.OpID,
|
|
|
|
|
)
|
|
|
|
|
if err := retryRepo.DeleteRetry(ctx, record.OpID); err != nil {
|
|
|
|
|
w.logger.ErrorContext(ctx, "failed to delete retry record",
|
|
|
|
|
"opID", record.OpID,
|
|
|
|
|
"error", err,
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 尝试重新发布到存证系统
|
|
|
|
|
// 这里需要根据实际的存证逻辑来实现
|
|
|
|
|
// 示例:将操作发送到消息队列
|
|
|
|
|
if err := w.republishOperation(ctx, op); err != nil {
|
|
|
|
|
w.logger.ErrorContext(ctx, "failed to republish operation",
|
|
|
|
|
"opID", record.OpID,
|
|
|
|
|
"error", err,
|
|
|
|
|
)
|
|
|
|
|
nextRetry := w.calculateNextRetry(record.RetryCount)
|
|
|
|
|
retryRepo.IncrementRetry(ctx, record.OpID, err.Error(), nextRetry)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 发布成功,更新状态为已存证
|
|
|
|
|
if err := opRepo.UpdateStatus(ctx, record.OpID, StatusTrustlogged); err != nil {
|
|
|
|
|
w.logger.ErrorContext(ctx, "failed to update operation status",
|
|
|
|
|
"opID", record.OpID,
|
|
|
|
|
"error", err,
|
|
|
|
|
)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 删除重试记录
|
|
|
|
|
if err := retryRepo.DeleteRetry(ctx, record.OpID); err != nil {
|
|
|
|
|
w.logger.ErrorContext(ctx, "failed to delete retry record",
|
|
|
|
|
"opID", record.OpID,
|
|
|
|
|
"error", err,
|
|
|
|
|
)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
w.logger.InfoContext(ctx, "operation retry successful",
|
|
|
|
|
"opID", record.OpID,
|
|
|
|
|
"retryCount", record.RetryCount,
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// republishOperation 重新发布操作到存证系统
|
|
|
|
|
// 注意:这里需要序列化为 Envelope 格式
|
|
|
|
|
func (w *RetryWorker) republishOperation(ctx context.Context, op *model.Operation) error {
|
|
|
|
|
// 这里需要根据实际的发布逻辑来实现
|
|
|
|
|
// 简化实现:假设 publisher 已经配置好
|
|
|
|
|
if w.publisher == nil {
|
|
|
|
|
return fmt.Errorf("publisher not configured")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 注意:实际使用时需要使用 Envelope 序列化
|
|
|
|
|
// 这里只是示例,具体实现需要在 HighClient 中集成
|
|
|
|
|
w.logger.WarnContext(ctx, "republish not implemented yet, needs Envelope serialization",
|
|
|
|
|
"opID", op.OpID,
|
|
|
|
|
)
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// calculateNextRetry 计算下次重试时间(指数退避)
|
|
|
|
|
func (w *RetryWorker) calculateNextRetry(retryCount int) time.Time {
|
|
|
|
|
backoff := float64(w.config.InitialBackoff)
|
|
|
|
|
for i := 0; i < retryCount; i++ {
|
|
|
|
|
backoff *= w.config.BackoffMultiplier
|
|
|
|
|
}
|
|
|
|
|
return time.Now().Add(time.Duration(backoff))
|
|
|
|
|
}
|