Merge pull request #1885 from alexhoshina/fix-1884-qq-long-audio-file-fallback

fix(qq): send long audio as file
This commit is contained in:
daming大铭
2026-03-22 17:37:29 +08:00
committed by GitHub
3 changed files with 472 additions and 4 deletions
+231
View File
@@ -0,0 +1,231 @@
package qq
import (
"encoding/binary"
"io"
"os"
"path/filepath"
"strings"
"time"
)
const qqVoiceMaxDuration = 60 * time.Second
func qqAudioDuration(localPath, filename, contentType string) (time.Duration, bool, error) {
if localPath == "" {
return 0, false, nil
}
switch qqAudioDurationFormat(localPath, filename, contentType) {
case "wav":
return qqWAVDuration(localPath)
case "ogg":
return qqOggDuration(localPath)
default:
return 0, false, nil
}
}
func qqAudioDurationFormat(localPath, filename, contentType string) string {
contentType = strings.ToLower(contentType)
switch {
case strings.HasPrefix(contentType, "audio/wav"), strings.HasPrefix(contentType, "audio/x-wav"):
return "wav"
case strings.HasPrefix(contentType, "audio/ogg"),
contentType == "application/ogg",
contentType == "application/x-ogg":
return "ogg"
}
switch filepath.Ext(strings.ToLower(filename)) {
case ".wav":
return "wav"
case ".ogg", ".opus":
return "ogg"
}
switch filepath.Ext(strings.ToLower(localPath)) {
case ".wav":
return "wav"
case ".ogg", ".opus":
return "ogg"
}
return ""
}
func qqWAVDuration(localPath string) (time.Duration, bool, error) {
file, err := os.Open(localPath)
if err != nil {
return 0, false, err
}
defer file.Close()
var header [12]byte
if _, err := io.ReadFull(file, header[:]); err != nil {
return 0, false, err
}
var order binary.ByteOrder
switch string(header[:4]) {
case "RIFF":
order = binary.LittleEndian
case "RIFX":
order = binary.BigEndian
default:
return 0, false, nil
}
if string(header[8:12]) != "WAVE" {
return 0, false, nil
}
var byteRate uint32
var dataSize uint32
var foundFmt bool
var foundData bool
for {
var chunkHeader [8]byte
if _, err := io.ReadFull(file, chunkHeader[:]); err != nil {
if err == io.EOF {
break
}
return 0, false, err
}
chunkSize := order.Uint32(chunkHeader[4:8])
switch string(chunkHeader[:4]) {
case "fmt ":
chunkData := make([]byte, chunkSize)
if _, err := io.ReadFull(file, chunkData); err != nil {
return 0, false, err
}
if len(chunkData) >= 12 {
byteRate = order.Uint32(chunkData[8:12])
foundFmt = true
}
case "data":
dataSize = chunkSize
foundData = true
if _, err := io.CopyN(io.Discard, file, int64(chunkSize)); err != nil {
return 0, false, err
}
default:
if _, err := io.CopyN(io.Discard, file, int64(chunkSize)); err != nil {
return 0, false, err
}
}
if chunkSize%2 == 1 {
if _, err := io.CopyN(io.Discard, file, 1); err != nil {
return 0, false, err
}
}
if foundFmt && foundData {
break
}
}
if !foundFmt || !foundData || byteRate == 0 {
return 0, false, nil
}
durationNS := int64(dataSize) * int64(time.Second) / int64(byteRate)
return time.Duration(durationNS), true, nil
}
func qqOggDuration(localPath string) (time.Duration, bool, error) {
file, err := os.Open(localPath)
if err != nil {
return 0, false, err
}
defer file.Close()
var firstPacket []byte
var codec string
var sampleRate uint32
var lastGranule uint64
var haveGranule bool
for {
var header [27]byte
if _, err := io.ReadFull(file, header[:]); err != nil {
if err == io.EOF {
break
}
return 0, false, err
}
if string(header[:4]) != "OggS" {
return 0, false, nil
}
pageSegments := int(header[26])
segments := make([]byte, pageSegments)
if _, err := io.ReadFull(file, segments); err != nil {
return 0, false, err
}
payloadLen := 0
for _, segLen := range segments {
payloadLen += int(segLen)
}
payload := make([]byte, payloadLen)
if _, err := io.ReadFull(file, payload); err != nil {
return 0, false, err
}
granule := binary.LittleEndian.Uint64(header[6:14])
if granule != ^uint64(0) {
lastGranule = granule
haveGranule = true
}
if codec == "" {
offset := 0
for _, segLen := range segments {
firstPacket = append(firstPacket, payload[offset:offset+int(segLen)]...)
offset += int(segLen)
if segLen < 255 {
codec, sampleRate = qqParseOggCodec(firstPacket)
break
}
}
}
}
if !haveGranule || codec == "" {
return 0, false, nil
}
switch codec {
case "opus":
return time.Duration(lastGranule) * time.Second / 48000, true, nil
case "vorbis":
if sampleRate == 0 {
return 0, false, nil
}
return time.Duration(lastGranule) * time.Second / time.Duration(sampleRate), true, nil
default:
return 0, false, nil
}
}
func qqParseOggCodec(packet []byte) (string, uint32) {
if len(packet) >= 8 && string(packet[:8]) == "OpusHead" {
return "opus", 48000
}
if len(packet) >= 16 && packet[0] == 0x01 && string(packet[1:7]) == "vorbis" {
sampleRate := binary.LittleEndian.Uint32(packet[12:16])
if sampleRate > 0 {
return "vorbis", sampleRate
}
}
return "", 0
}
+53 -4
View File
@@ -387,12 +387,11 @@ func (c *QQChannel) uploadMedia(
}
func (c *QQChannel) buildMediaUpload(part bus.MediaPart) (*qqMediaUpload, error) {
payload := &qqMediaUpload{
FileType: qqFileType(part.Type),
}
payload := &qqMediaUpload{}
mediaRef := part.Ref
if isHTTPURL(mediaRef) {
payload.FileType = qqFileType(c.outboundMediaType(part, ""))
payload.URL = mediaRef
return payload, nil
}
@@ -402,15 +401,23 @@ func (c *QQChannel) buildMediaUpload(part bus.MediaPart) (*qqMediaUpload, error)
return nil, fmt.Errorf("no media store available: %w", channels.ErrSendFailed)
}
resolved, err := store.Resolve(part.Ref)
resolved, meta, err := store.ResolveWithMeta(part.Ref)
if err != nil {
return nil, fmt.Errorf("qq resolve media ref %q: %v: %w", part.Ref, err, channels.ErrSendFailed)
}
if part.Filename == "" {
part.Filename = meta.Filename
}
if part.ContentType == "" {
part.ContentType = meta.ContentType
}
if isHTTPURL(resolved) {
payload.FileType = qqFileType(c.outboundMediaType(part, ""))
payload.URL = resolved
return payload, nil
}
payload.FileType = qqFileType(c.outboundMediaType(part, resolved))
if limitBytes := c.maxBase64FileSizeBytes(); limitBytes > 0 {
info, statErr := os.Stat(resolved)
@@ -437,6 +444,48 @@ func (c *QQChannel) buildMediaUpload(part bus.MediaPart) (*qqMediaUpload, error)
return payload, nil
}
func (c *QQChannel) outboundMediaType(part bus.MediaPart, localPath string) string {
if part.Type != "audio" {
return part.Type
}
if localPath == "" {
logger.InfoCF("qq", "Sending audio as file because duration is unavailable", map[string]any{
"ref": part.Ref,
"filename": part.Filename,
})
return "file"
}
duration, ok, err := qqAudioDuration(localPath, part.Filename, part.ContentType)
if err != nil {
logger.WarnCF("qq", "Failed to detect audio duration, sending as file", map[string]any{
"ref": part.Ref,
"filename": part.Filename,
"error": err.Error(),
})
return "file"
}
if !ok {
logger.InfoCF("qq", "Sending audio as file because duration is unavailable", map[string]any{
"ref": part.Ref,
"filename": part.Filename,
})
return "file"
}
if duration > qqVoiceMaxDuration {
logger.InfoCF("qq", "Sending audio as file because it exceeds QQ voice limit", map[string]any{
"ref": part.Ref,
"filename": part.Filename,
"duration_seconds": duration.Seconds(),
"limit_seconds": qqVoiceMaxDuration.Seconds(),
})
return "file"
}
return "audio"
}
func (c *QQChannel) sendUploadedMedia(
ctx context.Context,
chatKind, chatID string,
+188
View File
@@ -1,8 +1,10 @@
package qq
import (
"bytes"
"context"
"encoding/base64"
"encoding/binary"
"encoding/json"
"errors"
"os"
@@ -264,6 +266,142 @@ func TestSendMedia_UploadsLocalFileAsBase64(t *testing.T) {
}
}
func TestSendMedia_AudioAt60SecondsUsesVoiceUpload(t *testing.T) {
assertAudioWAVUploadType(t, 60*time.Second, 3)
}
func TestSendMedia_AudioOver60SecondsFallsBackToFileUpload(t *testing.T) {
assertAudioWAVUploadType(t, 61*time.Second, 4)
}
func assertAudioWAVUploadType(t *testing.T, duration time.Duration, wantFileType uint64) {
t.Helper()
messageBus := bus.NewMessageBus()
store := media.NewFileMediaStore()
localPath := writeWAVFile(t, t.TempDir(), "voice.wav", duration)
ref, err := store.Store(localPath, media.MediaMeta{
Filename: "voice.wav",
ContentType: "audio/wav",
}, "qq:test")
if err != nil {
t.Fatalf("Store() error = %v", err)
}
api := &fakeQQAPI{
transportResp: mustJSON(t, dto.Message{FileInfo: []byte("file-info")}),
}
ch := &QQChannel{
BaseChannel: channels.NewBaseChannel("qq", nil, messageBus, nil),
api: api,
dedup: make(map[string]time.Time),
done: make(chan struct{}),
ctx: context.Background(),
}
ch.SetRunning(true)
ch.SetMediaStore(store)
ch.chatType.Store("group-1", "group")
err = ch.SendMedia(context.Background(), bus.OutboundMediaMessage{
ChatID: "group-1",
Parts: []bus.MediaPart{{
Type: "audio",
Ref: ref,
}},
})
if err != nil {
t.Fatalf("SendMedia() error = %v", err)
}
if len(api.transportCalls) != 1 {
t.Fatalf("transportCalls = %d, want 1", len(api.transportCalls))
}
if api.transportCalls[0].body.FileType != wantFileType {
t.Fatalf("upload file_type = %d, want %d", api.transportCalls[0].body.FileType, wantFileType)
}
}
func TestSendMedia_RemoteAudioFallsBackToFileUpload(t *testing.T) {
messageBus := bus.NewMessageBus()
api := &fakeQQAPI{
transportResp: mustJSON(t, dto.Message{FileInfo: []byte("remote-file-info")}),
}
ch := &QQChannel{
BaseChannel: channels.NewBaseChannel("qq", nil, messageBus, nil),
api: api,
dedup: make(map[string]time.Time),
done: make(chan struct{}),
ctx: context.Background(),
}
ch.SetRunning(true)
ch.chatType.Store("user-1", "direct")
err := ch.SendMedia(context.Background(), bus.OutboundMediaMessage{
ChatID: "user-1",
Parts: []bus.MediaPart{{
Type: "audio",
Ref: "https://cdn.example.com/voice.ogg",
}},
})
if err != nil {
t.Fatalf("SendMedia() error = %v", err)
}
if len(api.transportCalls) != 1 {
t.Fatalf("transportCalls = %d, want 1", len(api.transportCalls))
}
if api.transportCalls[0].body.FileType != 4 {
t.Fatalf("upload file_type = %d, want 4", api.transportCalls[0].body.FileType)
}
}
func TestSendMedia_LocalAudioWithUnknownDurationFallsBackToFileUpload(t *testing.T) {
messageBus := bus.NewMessageBus()
store := media.NewFileMediaStore()
localPath := writeTempFile(t, t.TempDir(), "voice.mp3", []byte("not-a-real-mp3"))
ref, err := store.Store(localPath, media.MediaMeta{
Filename: "voice.mp3",
ContentType: "audio/mpeg",
}, "qq:test")
if err != nil {
t.Fatalf("Store() error = %v", err)
}
api := &fakeQQAPI{
transportResp: mustJSON(t, dto.Message{FileInfo: []byte("file-info")}),
}
ch := &QQChannel{
BaseChannel: channels.NewBaseChannel("qq", nil, messageBus, nil),
api: api,
dedup: make(map[string]time.Time),
done: make(chan struct{}),
ctx: context.Background(),
}
ch.SetRunning(true)
ch.SetMediaStore(store)
ch.chatType.Store("group-1", "group")
err = ch.SendMedia(context.Background(), bus.OutboundMediaMessage{
ChatID: "group-1",
Parts: []bus.MediaPart{{
Type: "audio",
Ref: ref,
}},
})
if err != nil {
t.Fatalf("SendMedia() error = %v", err)
}
if len(api.transportCalls) != 1 {
t.Fatalf("transportCalls = %d, want 1", len(api.transportCalls))
}
if api.transportCalls[0].body.FileType != 4 {
t.Fatalf("upload file_type = %d, want 4", api.transportCalls[0].body.FileType)
}
}
func TestSendMedia_UsesRemoteURLUploadForC2C(t *testing.T) {
messageBus := bus.NewMessageBus()
api := &fakeQQAPI{
@@ -494,3 +632,53 @@ func writeTempFile(t *testing.T, dir, name string, content []byte) string {
}
return path
}
func writeWAVFile(t *testing.T, dir, name string, duration time.Duration) string {
t.Helper()
const (
sampleRate = 8000
numChannels = 1
bitsPerSample = 8
)
dataSize := uint32(duration / time.Second * sampleRate * numChannels * (bitsPerSample / 8))
byteRate := uint32(sampleRate * numChannels * (bitsPerSample / 8))
blockAlign := uint16(numChannels * (bitsPerSample / 8))
var buf bytes.Buffer
buf.WriteString("RIFF")
if err := binary.Write(&buf, binary.LittleEndian, uint32(36)+dataSize); err != nil {
t.Fatalf("binary.Write(riff size) error = %v", err)
}
buf.WriteString("WAVE")
buf.WriteString("fmt ")
if err := binary.Write(&buf, binary.LittleEndian, uint32(16)); err != nil {
t.Fatalf("binary.Write(fmt chunk size) error = %v", err)
}
if err := binary.Write(&buf, binary.LittleEndian, uint16(1)); err != nil {
t.Fatalf("binary.Write(audio format) error = %v", err)
}
if err := binary.Write(&buf, binary.LittleEndian, uint16(numChannels)); err != nil {
t.Fatalf("binary.Write(channels) error = %v", err)
}
if err := binary.Write(&buf, binary.LittleEndian, uint32(sampleRate)); err != nil {
t.Fatalf("binary.Write(sample rate) error = %v", err)
}
if err := binary.Write(&buf, binary.LittleEndian, byteRate); err != nil {
t.Fatalf("binary.Write(byte rate) error = %v", err)
}
if err := binary.Write(&buf, binary.LittleEndian, blockAlign); err != nil {
t.Fatalf("binary.Write(block align) error = %v", err)
}
if err := binary.Write(&buf, binary.LittleEndian, uint16(bitsPerSample)); err != nil {
t.Fatalf("binary.Write(bits per sample) error = %v", err)
}
buf.WriteString("data")
if err := binary.Write(&buf, binary.LittleEndian, dataSize); err != nil {
t.Fatalf("binary.Write(data size) error = %v", err)
}
buf.Write(make([]byte, dataSize))
return writeTempFile(t, dir, name, buf.Bytes())
}