fix(feishu): invalidate cached token on auth error to enable retry recovery (#1318)

The Lark SDK v3's built-in token retry loop does not clear stale tokens
from cache when the server returns error 99991663 (tenant_access_token
invalid), causing all API calls to fail until the token naturally
expires (~2 hours).

- Add tokenCache struct (implementing larkcore.Cache) with
  Get/Set/InvalidateAll methods and proper expired-entry cleanup
- Wire custom cache into lark.NewClient via WithTokenCache()
- Add invalidateTokenOnAuthError helper called in all API methods
This commit is contained in:
Vast-stars
2026-03-18 19:07:49 +08:00
committed by GitHub
parent 578f90855e
commit 3e9b7ce9c1
2 changed files with 85 additions and 4 deletions
+33 -4
View File
@@ -29,11 +29,17 @@ import (
"github.com/sipeed/picoclaw/pkg/utils"
)
// errCodeTenantTokenInvalid is the Feishu API error code for an expired/revoked
// tenant_access_token. The Lark SDK's built-in retry does not clear its cache
// on this error, so we do it ourselves.
const errCodeTenantTokenInvalid = 99991663
type FeishuChannel struct {
*channels.BaseChannel
config config.FeishuConfig
client *lark.Client
wsClient *larkws.Client
config config.FeishuConfig
client *lark.Client
wsClient *larkws.Client
tokenCache *tokenCache // custom cache that supports invalidation
botOpenID atomic.Value // stores string; populated lazily for @mention detection
@@ -47,10 +53,12 @@ func NewFeishuChannel(cfg config.FeishuConfig, bus *bus.MessageBus) (*FeishuChan
channels.WithReasoningChannelID(cfg.ReasoningChannelID),
)
tc := newTokenCache()
ch := &FeishuChannel{
BaseChannel: base,
config: cfg,
client: lark.NewClient(cfg.AppID, cfg.AppSecret),
tokenCache: tc,
client: lark.NewClient(cfg.AppID, cfg.AppSecret, lark.WithTokenCache(tc)),
}
ch.SetOwner(ch)
return ch, nil
@@ -147,6 +155,7 @@ func (c *FeishuChannel) EditMessage(ctx context.Context, chatID, messageID, cont
return fmt.Errorf("feishu edit: %w", err)
}
if !resp.Success() {
c.invalidateTokenOnAuthError(resp.Code)
return fmt.Errorf("feishu edit api error (code=%d msg=%s)", resp.Code, resp.Msg)
}
return nil
@@ -186,6 +195,7 @@ func (c *FeishuChannel) SendPlaceholder(ctx context.Context, chatID string) (str
return "", fmt.Errorf("feishu placeholder send: %w", err)
}
if !resp.Success() {
c.invalidateTokenOnAuthError(resp.Code)
return "", fmt.Errorf("feishu placeholder api error (code=%d msg=%s)", resp.Code, resp.Msg)
}
@@ -226,6 +236,7 @@ func (c *FeishuChannel) ReactToMessage(ctx context.Context, chatID, messageID st
return func() {}, fmt.Errorf("feishu react: %w", err)
}
if !resp.Success() {
c.invalidateTokenOnAuthError(resp.Code)
logger.ErrorCF("feishu", "Reaction API error", map[string]any{
"emoji": chosenEmoji,
"message_id": messageID,
@@ -451,6 +462,7 @@ func (c *FeishuChannel) fetchBotOpenID(ctx context.Context) error {
return fmt.Errorf("bot info parse: %w", err)
}
if result.Code != 0 {
c.invalidateTokenOnAuthError(result.Code)
return fmt.Errorf("bot info api error (code=%d)", result.Code)
}
if result.Bot.OpenID == "" {
@@ -593,6 +605,7 @@ func (c *FeishuChannel) downloadResource(
return ""
}
if !resp.Success() {
c.invalidateTokenOnAuthError(resp.Code)
logger.ErrorCF("feishu", "Resource download api error", map[string]any{
"code": resp.Code,
"msg": resp.Msg,
@@ -705,6 +718,7 @@ func (c *FeishuChannel) sendCard(ctx context.Context, chatID, cardContent string
}
if !resp.Success() {
c.invalidateTokenOnAuthError(resp.Code)
return fmt.Errorf("feishu api error (code=%d msg=%s): %w", resp.Code, resp.Msg, channels.ErrTemporary)
}
@@ -730,6 +744,7 @@ func (c *FeishuChannel) sendImage(ctx context.Context, chatID string, file *os.F
return fmt.Errorf("feishu image upload: %w", err)
}
if !uploadResp.Success() {
c.invalidateTokenOnAuthError(uploadResp.Code)
return fmt.Errorf("feishu image upload api error (code=%d msg=%s)", uploadResp.Code, uploadResp.Msg)
}
if uploadResp.Data == nil || uploadResp.Data.ImageKey == nil {
@@ -754,6 +769,7 @@ func (c *FeishuChannel) sendImage(ctx context.Context, chatID string, file *os.F
return fmt.Errorf("feishu image send: %w", err)
}
if !resp.Success() {
c.invalidateTokenOnAuthError(resp.Code)
return fmt.Errorf("feishu image send api error (code=%d msg=%s)", resp.Code, resp.Msg)
}
return nil
@@ -784,6 +800,7 @@ func (c *FeishuChannel) sendFile(ctx context.Context, chatID string, file *os.Fi
return fmt.Errorf("feishu file upload: %w", err)
}
if !uploadResp.Success() {
c.invalidateTokenOnAuthError(uploadResp.Code)
return fmt.Errorf("feishu file upload api error (code=%d msg=%s)", uploadResp.Code, uploadResp.Msg)
}
if uploadResp.Data == nil || uploadResp.Data.FileKey == nil {
@@ -808,6 +825,7 @@ func (c *FeishuChannel) sendFile(ctx context.Context, chatID string, file *os.Fi
return fmt.Errorf("feishu file send: %w", err)
}
if !resp.Success() {
c.invalidateTokenOnAuthError(resp.Code)
return fmt.Errorf("feishu file send api error (code=%d msg=%s)", resp.Code, resp.Msg)
}
return nil
@@ -830,3 +848,14 @@ func extractFeishuSenderID(sender *larkim.EventSender) string {
return ""
}
// invalidateTokenOnAuthError clears the cached tenant_access_token when the
// Feishu API reports it as invalid (99991663), so the next request fetches a
// fresh one. The Lark SDK's built-in retry does not clear the cache, causing
// all API calls to fail until the token naturally expires (~2 hours).
func (c *FeishuChannel) invalidateTokenOnAuthError(code int) {
if code == errCodeTenantTokenInvalid {
c.tokenCache.InvalidateAll()
logger.WarnCF("feishu", "Invalidated cached token due to auth error", nil)
}
}
+52
View File
@@ -0,0 +1,52 @@
package feishu
import (
"context"
"sync"
"time"
)
// tokenCache implements larkcore.Cache with an extra InvalidateAll method.
// This works around a bug in the Lark SDK v3 where the built-in token retry
// loop does not clear stale tokens from cache on auth errors.
type tokenCache struct {
mu sync.RWMutex
store map[string]*tokenEntry
}
type tokenEntry struct {
value string
expireAt time.Time
}
func newTokenCache() *tokenCache {
return &tokenCache{store: make(map[string]*tokenEntry)}
}
func (c *tokenCache) Set(_ context.Context, key, value string, ttl time.Duration) error {
c.mu.Lock()
defer c.mu.Unlock()
c.store[key] = &tokenEntry{value: value, expireAt: time.Now().Add(ttl)}
return nil
}
func (c *tokenCache) Get(_ context.Context, key string) (string, error) {
c.mu.Lock()
defer c.mu.Unlock()
e, ok := c.store[key]
if !ok {
return "", nil
}
if e.expireAt.Before(time.Now()) {
delete(c.store, key)
return "", nil
}
return e.value, nil
}
// InvalidateAll removes all cached tokens, forcing fresh acquisition.
func (c *tokenCache) InvalidateAll() {
c.mu.Lock()
defer c.mu.Unlock()
clear(c.store)
}