From 2c317444c5ca423f7be1cc8f256e1f045f8e6110 Mon Sep 17 00:00:00 2001 From: Hoshina Date: Sun, 22 Mar 2026 17:19:11 +0800 Subject: [PATCH] fix(qq): send long audio as file Downgrade outbound QQ audio to file upload when it exceeds the 60 second voice limit or its duration cannot be detected. Refs #1884 --- pkg/channels/qq/audio_duration.go | 231 ++++++++++++++++++++++++++++++ pkg/channels/qq/qq.go | 57 +++++++- pkg/channels/qq/qq_test.go | 188 ++++++++++++++++++++++++ 3 files changed, 472 insertions(+), 4 deletions(-) create mode 100644 pkg/channels/qq/audio_duration.go diff --git a/pkg/channels/qq/audio_duration.go b/pkg/channels/qq/audio_duration.go new file mode 100644 index 000000000..28a9b2e83 --- /dev/null +++ b/pkg/channels/qq/audio_duration.go @@ -0,0 +1,231 @@ +package qq + +import ( + "encoding/binary" + "io" + "os" + "path/filepath" + "strings" + "time" +) + +const qqVoiceMaxDuration = 60 * time.Second + +func qqAudioDuration(localPath, filename, contentType string) (time.Duration, bool, error) { + if localPath == "" { + return 0, false, nil + } + + switch qqAudioDurationFormat(localPath, filename, contentType) { + case "wav": + return qqWAVDuration(localPath) + case "ogg": + return qqOggDuration(localPath) + default: + return 0, false, nil + } +} + +func qqAudioDurationFormat(localPath, filename, contentType string) string { + contentType = strings.ToLower(contentType) + + switch { + case strings.HasPrefix(contentType, "audio/wav"), strings.HasPrefix(contentType, "audio/x-wav"): + return "wav" + case strings.HasPrefix(contentType, "audio/ogg"), + contentType == "application/ogg", + contentType == "application/x-ogg": + return "ogg" + } + + switch filepath.Ext(strings.ToLower(filename)) { + case ".wav": + return "wav" + case ".ogg", ".opus": + return "ogg" + } + + switch filepath.Ext(strings.ToLower(localPath)) { + case ".wav": + return "wav" + case ".ogg", ".opus": + return "ogg" + } + + return "" +} + +func qqWAVDuration(localPath string) (time.Duration, bool, error) { + file, err := os.Open(localPath) + if err != nil { + return 0, false, err + } + defer file.Close() + + var header [12]byte + if _, err := io.ReadFull(file, header[:]); err != nil { + return 0, false, err + } + + var order binary.ByteOrder + switch string(header[:4]) { + case "RIFF": + order = binary.LittleEndian + case "RIFX": + order = binary.BigEndian + default: + return 0, false, nil + } + + if string(header[8:12]) != "WAVE" { + return 0, false, nil + } + + var byteRate uint32 + var dataSize uint32 + var foundFmt bool + var foundData bool + + for { + var chunkHeader [8]byte + if _, err := io.ReadFull(file, chunkHeader[:]); err != nil { + if err == io.EOF { + break + } + return 0, false, err + } + + chunkSize := order.Uint32(chunkHeader[4:8]) + switch string(chunkHeader[:4]) { + case "fmt ": + chunkData := make([]byte, chunkSize) + if _, err := io.ReadFull(file, chunkData); err != nil { + return 0, false, err + } + if len(chunkData) >= 12 { + byteRate = order.Uint32(chunkData[8:12]) + foundFmt = true + } + case "data": + dataSize = chunkSize + foundData = true + if _, err := io.CopyN(io.Discard, file, int64(chunkSize)); err != nil { + return 0, false, err + } + default: + if _, err := io.CopyN(io.Discard, file, int64(chunkSize)); err != nil { + return 0, false, err + } + } + + if chunkSize%2 == 1 { + if _, err := io.CopyN(io.Discard, file, 1); err != nil { + return 0, false, err + } + } + + if foundFmt && foundData { + break + } + } + + if !foundFmt || !foundData || byteRate == 0 { + return 0, false, nil + } + + durationNS := int64(dataSize) * int64(time.Second) / int64(byteRate) + return time.Duration(durationNS), true, nil +} + +func qqOggDuration(localPath string) (time.Duration, bool, error) { + file, err := os.Open(localPath) + if err != nil { + return 0, false, err + } + defer file.Close() + + var firstPacket []byte + var codec string + var sampleRate uint32 + var lastGranule uint64 + var haveGranule bool + + for { + var header [27]byte + if _, err := io.ReadFull(file, header[:]); err != nil { + if err == io.EOF { + break + } + return 0, false, err + } + + if string(header[:4]) != "OggS" { + return 0, false, nil + } + + pageSegments := int(header[26]) + segments := make([]byte, pageSegments) + if _, err := io.ReadFull(file, segments); err != nil { + return 0, false, err + } + + payloadLen := 0 + for _, segLen := range segments { + payloadLen += int(segLen) + } + + payload := make([]byte, payloadLen) + if _, err := io.ReadFull(file, payload); err != nil { + return 0, false, err + } + + granule := binary.LittleEndian.Uint64(header[6:14]) + if granule != ^uint64(0) { + lastGranule = granule + haveGranule = true + } + + if codec == "" { + offset := 0 + for _, segLen := range segments { + firstPacket = append(firstPacket, payload[offset:offset+int(segLen)]...) + offset += int(segLen) + if segLen < 255 { + codec, sampleRate = qqParseOggCodec(firstPacket) + break + } + } + } + } + + if !haveGranule || codec == "" { + return 0, false, nil + } + + switch codec { + case "opus": + return time.Duration(lastGranule) * time.Second / 48000, true, nil + case "vorbis": + if sampleRate == 0 { + return 0, false, nil + } + return time.Duration(lastGranule) * time.Second / time.Duration(sampleRate), true, nil + default: + return 0, false, nil + } +} + +func qqParseOggCodec(packet []byte) (string, uint32) { + if len(packet) >= 8 && string(packet[:8]) == "OpusHead" { + return "opus", 48000 + } + + if len(packet) >= 16 && packet[0] == 0x01 && string(packet[1:7]) == "vorbis" { + sampleRate := binary.LittleEndian.Uint32(packet[12:16]) + if sampleRate > 0 { + return "vorbis", sampleRate + } + } + + return "", 0 +} diff --git a/pkg/channels/qq/qq.go b/pkg/channels/qq/qq.go index 1a48369f8..2cd6e1747 100644 --- a/pkg/channels/qq/qq.go +++ b/pkg/channels/qq/qq.go @@ -387,12 +387,11 @@ func (c *QQChannel) uploadMedia( } func (c *QQChannel) buildMediaUpload(part bus.MediaPart) (*qqMediaUpload, error) { - payload := &qqMediaUpload{ - FileType: qqFileType(part.Type), - } + payload := &qqMediaUpload{} mediaRef := part.Ref if isHTTPURL(mediaRef) { + payload.FileType = qqFileType(c.outboundMediaType(part, "")) payload.URL = mediaRef return payload, nil } @@ -402,15 +401,23 @@ func (c *QQChannel) buildMediaUpload(part bus.MediaPart) (*qqMediaUpload, error) return nil, fmt.Errorf("no media store available: %w", channels.ErrSendFailed) } - resolved, err := store.Resolve(part.Ref) + resolved, meta, err := store.ResolveWithMeta(part.Ref) if err != nil { return nil, fmt.Errorf("qq resolve media ref %q: %v: %w", part.Ref, err, channels.ErrSendFailed) } + if part.Filename == "" { + part.Filename = meta.Filename + } + if part.ContentType == "" { + part.ContentType = meta.ContentType + } if isHTTPURL(resolved) { + payload.FileType = qqFileType(c.outboundMediaType(part, "")) payload.URL = resolved return payload, nil } + payload.FileType = qqFileType(c.outboundMediaType(part, resolved)) if limitBytes := c.maxBase64FileSizeBytes(); limitBytes > 0 { info, statErr := os.Stat(resolved) @@ -437,6 +444,48 @@ func (c *QQChannel) buildMediaUpload(part bus.MediaPart) (*qqMediaUpload, error) return payload, nil } +func (c *QQChannel) outboundMediaType(part bus.MediaPart, localPath string) string { + if part.Type != "audio" { + return part.Type + } + + if localPath == "" { + logger.InfoCF("qq", "Sending audio as file because duration is unavailable", map[string]any{ + "ref": part.Ref, + "filename": part.Filename, + }) + return "file" + } + + duration, ok, err := qqAudioDuration(localPath, part.Filename, part.ContentType) + if err != nil { + logger.WarnCF("qq", "Failed to detect audio duration, sending as file", map[string]any{ + "ref": part.Ref, + "filename": part.Filename, + "error": err.Error(), + }) + return "file" + } + if !ok { + logger.InfoCF("qq", "Sending audio as file because duration is unavailable", map[string]any{ + "ref": part.Ref, + "filename": part.Filename, + }) + return "file" + } + if duration > qqVoiceMaxDuration { + logger.InfoCF("qq", "Sending audio as file because it exceeds QQ voice limit", map[string]any{ + "ref": part.Ref, + "filename": part.Filename, + "duration_seconds": duration.Seconds(), + "limit_seconds": qqVoiceMaxDuration.Seconds(), + }) + return "file" + } + + return "audio" +} + func (c *QQChannel) sendUploadedMedia( ctx context.Context, chatKind, chatID string, diff --git a/pkg/channels/qq/qq_test.go b/pkg/channels/qq/qq_test.go index 3cb3d39bd..108965c00 100644 --- a/pkg/channels/qq/qq_test.go +++ b/pkg/channels/qq/qq_test.go @@ -1,8 +1,10 @@ package qq import ( + "bytes" "context" "encoding/base64" + "encoding/binary" "encoding/json" "errors" "os" @@ -264,6 +266,142 @@ func TestSendMedia_UploadsLocalFileAsBase64(t *testing.T) { } } +func TestSendMedia_AudioAt60SecondsUsesVoiceUpload(t *testing.T) { + assertAudioWAVUploadType(t, 60*time.Second, 3) +} + +func TestSendMedia_AudioOver60SecondsFallsBackToFileUpload(t *testing.T) { + assertAudioWAVUploadType(t, 61*time.Second, 4) +} + +func assertAudioWAVUploadType(t *testing.T, duration time.Duration, wantFileType uint64) { + t.Helper() + + messageBus := bus.NewMessageBus() + store := media.NewFileMediaStore() + + localPath := writeWAVFile(t, t.TempDir(), "voice.wav", duration) + ref, err := store.Store(localPath, media.MediaMeta{ + Filename: "voice.wav", + ContentType: "audio/wav", + }, "qq:test") + if err != nil { + t.Fatalf("Store() error = %v", err) + } + + api := &fakeQQAPI{ + transportResp: mustJSON(t, dto.Message{FileInfo: []byte("file-info")}), + } + ch := &QQChannel{ + BaseChannel: channels.NewBaseChannel("qq", nil, messageBus, nil), + api: api, + dedup: make(map[string]time.Time), + done: make(chan struct{}), + ctx: context.Background(), + } + ch.SetRunning(true) + ch.SetMediaStore(store) + ch.chatType.Store("group-1", "group") + + err = ch.SendMedia(context.Background(), bus.OutboundMediaMessage{ + ChatID: "group-1", + Parts: []bus.MediaPart{{ + Type: "audio", + Ref: ref, + }}, + }) + if err != nil { + t.Fatalf("SendMedia() error = %v", err) + } + + if len(api.transportCalls) != 1 { + t.Fatalf("transportCalls = %d, want 1", len(api.transportCalls)) + } + if api.transportCalls[0].body.FileType != wantFileType { + t.Fatalf("upload file_type = %d, want %d", api.transportCalls[0].body.FileType, wantFileType) + } +} + +func TestSendMedia_RemoteAudioFallsBackToFileUpload(t *testing.T) { + messageBus := bus.NewMessageBus() + api := &fakeQQAPI{ + transportResp: mustJSON(t, dto.Message{FileInfo: []byte("remote-file-info")}), + } + ch := &QQChannel{ + BaseChannel: channels.NewBaseChannel("qq", nil, messageBus, nil), + api: api, + dedup: make(map[string]time.Time), + done: make(chan struct{}), + ctx: context.Background(), + } + ch.SetRunning(true) + ch.chatType.Store("user-1", "direct") + + err := ch.SendMedia(context.Background(), bus.OutboundMediaMessage{ + ChatID: "user-1", + Parts: []bus.MediaPart{{ + Type: "audio", + Ref: "https://cdn.example.com/voice.ogg", + }}, + }) + if err != nil { + t.Fatalf("SendMedia() error = %v", err) + } + + if len(api.transportCalls) != 1 { + t.Fatalf("transportCalls = %d, want 1", len(api.transportCalls)) + } + if api.transportCalls[0].body.FileType != 4 { + t.Fatalf("upload file_type = %d, want 4", api.transportCalls[0].body.FileType) + } +} + +func TestSendMedia_LocalAudioWithUnknownDurationFallsBackToFileUpload(t *testing.T) { + messageBus := bus.NewMessageBus() + store := media.NewFileMediaStore() + + localPath := writeTempFile(t, t.TempDir(), "voice.mp3", []byte("not-a-real-mp3")) + ref, err := store.Store(localPath, media.MediaMeta{ + Filename: "voice.mp3", + ContentType: "audio/mpeg", + }, "qq:test") + if err != nil { + t.Fatalf("Store() error = %v", err) + } + + api := &fakeQQAPI{ + transportResp: mustJSON(t, dto.Message{FileInfo: []byte("file-info")}), + } + ch := &QQChannel{ + BaseChannel: channels.NewBaseChannel("qq", nil, messageBus, nil), + api: api, + dedup: make(map[string]time.Time), + done: make(chan struct{}), + ctx: context.Background(), + } + ch.SetRunning(true) + ch.SetMediaStore(store) + ch.chatType.Store("group-1", "group") + + err = ch.SendMedia(context.Background(), bus.OutboundMediaMessage{ + ChatID: "group-1", + Parts: []bus.MediaPart{{ + Type: "audio", + Ref: ref, + }}, + }) + if err != nil { + t.Fatalf("SendMedia() error = %v", err) + } + + if len(api.transportCalls) != 1 { + t.Fatalf("transportCalls = %d, want 1", len(api.transportCalls)) + } + if api.transportCalls[0].body.FileType != 4 { + t.Fatalf("upload file_type = %d, want 4", api.transportCalls[0].body.FileType) + } +} + func TestSendMedia_UsesRemoteURLUploadForC2C(t *testing.T) { messageBus := bus.NewMessageBus() api := &fakeQQAPI{ @@ -494,3 +632,53 @@ func writeTempFile(t *testing.T, dir, name string, content []byte) string { } return path } + +func writeWAVFile(t *testing.T, dir, name string, duration time.Duration) string { + t.Helper() + + const ( + sampleRate = 8000 + numChannels = 1 + bitsPerSample = 8 + ) + + dataSize := uint32(duration / time.Second * sampleRate * numChannels * (bitsPerSample / 8)) + byteRate := uint32(sampleRate * numChannels * (bitsPerSample / 8)) + blockAlign := uint16(numChannels * (bitsPerSample / 8)) + + var buf bytes.Buffer + buf.WriteString("RIFF") + if err := binary.Write(&buf, binary.LittleEndian, uint32(36)+dataSize); err != nil { + t.Fatalf("binary.Write(riff size) error = %v", err) + } + buf.WriteString("WAVE") + buf.WriteString("fmt ") + if err := binary.Write(&buf, binary.LittleEndian, uint32(16)); err != nil { + t.Fatalf("binary.Write(fmt chunk size) error = %v", err) + } + if err := binary.Write(&buf, binary.LittleEndian, uint16(1)); err != nil { + t.Fatalf("binary.Write(audio format) error = %v", err) + } + if err := binary.Write(&buf, binary.LittleEndian, uint16(numChannels)); err != nil { + t.Fatalf("binary.Write(channels) error = %v", err) + } + if err := binary.Write(&buf, binary.LittleEndian, uint32(sampleRate)); err != nil { + t.Fatalf("binary.Write(sample rate) error = %v", err) + } + if err := binary.Write(&buf, binary.LittleEndian, byteRate); err != nil { + t.Fatalf("binary.Write(byte rate) error = %v", err) + } + if err := binary.Write(&buf, binary.LittleEndian, blockAlign); err != nil { + t.Fatalf("binary.Write(block align) error = %v", err) + } + if err := binary.Write(&buf, binary.LittleEndian, uint16(bitsPerSample)); err != nil { + t.Fatalf("binary.Write(bits per sample) error = %v", err) + } + buf.WriteString("data") + if err := binary.Write(&buf, binary.LittleEndian, dataSize); err != nil { + t.Fatalf("binary.Write(data size) error = %v", err) + } + buf.Write(make([]byte, dataSize)) + + return writeTempFile(t, dir, name, buf.Bytes()) +}