mirror of
https://github.com/QuantumNous/new-api.git
synced 2026-03-30 04:40:59 +00:00
feat: implement audio duration retrieval without ffmpeg dependencies
This commit is contained in:
295
common/audio.go
Normal file
295
common/audio.go
Normal file
@@ -0,0 +1,295 @@
|
||||
package common
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"github.com/abema/go-mp4"
|
||||
"github.com/go-audio/aiff"
|
||||
"github.com/go-audio/wav"
|
||||
"github.com/jfreymuth/oggvorbis"
|
||||
"github.com/mewkiz/flac"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/tcolgate/mp3"
|
||||
"github.com/yapingcat/gomedia/go-codec"
|
||||
)
|
||||
|
||||
// GetAudioDuration 使用纯 Go 库获取音频文件的时长(秒)。
|
||||
// 它不再依赖外部的 ffmpeg 或 ffprobe 程序。
|
||||
func GetAudioDuration(ctx context.Context, f io.ReadSeeker, ext string) (duration float64, err error) {
|
||||
SysLog(fmt.Sprintf("GetAudioDuration: ext=%s", ext))
|
||||
// 根据文件扩展名选择解析器
|
||||
switch ext {
|
||||
case ".mp3":
|
||||
duration, err = getMP3Duration(f)
|
||||
case ".wav":
|
||||
duration, err = getWAVDuration(f)
|
||||
case ".flac":
|
||||
duration, err = getFLACDuration(f)
|
||||
case ".m4a", ".mp4":
|
||||
duration, err = getM4ADuration(f)
|
||||
case ".ogg", ".oga":
|
||||
duration, err = getOGGDuration(f)
|
||||
case ".opus":
|
||||
duration, err = getOpusDuration(f)
|
||||
case ".aiff", ".aif", ".aifc":
|
||||
duration, err = getAIFFDuration(f)
|
||||
case ".webm":
|
||||
duration, err = getWebMDuration(f)
|
||||
case ".aac":
|
||||
duration, err = getAACDuration(f)
|
||||
default:
|
||||
return 0, fmt.Errorf("unsupported audio format: %s", ext)
|
||||
}
|
||||
SysLog(fmt.Sprintf("GetAudioDuration: duration=%f", duration))
|
||||
return duration, err
|
||||
}
|
||||
|
||||
// getMP3Duration 解析 MP3 文件以获取时长。
|
||||
// 注意:对于 VBR (Variable Bitrate) MP3,这个估算可能不完全精确,但通常足够好。
|
||||
// FFmpeg 在这种情况下会扫描整个文件来获得精确值,但这里的库提供了快速估算。
|
||||
func getMP3Duration(r io.Reader) (float64, error) {
|
||||
d := mp3.NewDecoder(r)
|
||||
var f mp3.Frame
|
||||
skipped := 0
|
||||
duration := 0.0
|
||||
|
||||
for {
|
||||
if err := d.Decode(&f, &skipped); err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
return 0, errors.Wrap(err, "failed to decode mp3 frame")
|
||||
}
|
||||
duration += f.Duration().Seconds()
|
||||
}
|
||||
return duration, nil
|
||||
}
|
||||
|
||||
// getWAVDuration 解析 WAV 文件头以获取时长。
|
||||
func getWAVDuration(r io.ReadSeeker) (float64, error) {
|
||||
dec := wav.NewDecoder(r)
|
||||
if !dec.IsValidFile() {
|
||||
return 0, errors.New("invalid wav file")
|
||||
}
|
||||
d, err := dec.Duration()
|
||||
if err != nil {
|
||||
return 0, errors.Wrap(err, "failed to get wav duration")
|
||||
}
|
||||
return d.Seconds(), nil
|
||||
}
|
||||
|
||||
// getFLACDuration 解析 FLAC 文件的 STREAMINFO 块。
|
||||
func getFLACDuration(r io.Reader) (float64, error) {
|
||||
stream, err := flac.Parse(r)
|
||||
if err != nil {
|
||||
return 0, errors.Wrap(err, "failed to parse flac stream")
|
||||
}
|
||||
defer stream.Close()
|
||||
|
||||
// 时长 = 总采样数 / 采样率
|
||||
duration := float64(stream.Info.NSamples) / float64(stream.Info.SampleRate)
|
||||
return duration, nil
|
||||
}
|
||||
|
||||
// getM4ADuration 解析 M4A/MP4 文件的 'mvhd' box。
|
||||
func getM4ADuration(r io.ReadSeeker) (float64, error) {
|
||||
// go-mp4 库需要 ReadSeeker 接口
|
||||
info, err := mp4.Probe(r)
|
||||
if err != nil {
|
||||
return 0, errors.Wrap(err, "failed to probe m4a/mp4 file")
|
||||
}
|
||||
// 时长 = Duration / Timescale
|
||||
return float64(info.Duration) / float64(info.Timescale), nil
|
||||
}
|
||||
|
||||
// getOGGDuration 解析 OGG/Vorbis 文件以获取时长。
|
||||
func getOGGDuration(r io.ReadSeeker) (float64, error) {
|
||||
// 重置 reader 到开头
|
||||
if _, err := r.Seek(0, io.SeekStart); err != nil {
|
||||
return 0, errors.Wrap(err, "failed to seek ogg file")
|
||||
}
|
||||
|
||||
reader, err := oggvorbis.NewReader(r)
|
||||
if err != nil {
|
||||
return 0, errors.Wrap(err, "failed to create ogg vorbis reader")
|
||||
}
|
||||
|
||||
// 计算时长 = 总采样数 / 采样率
|
||||
// 需要读取整个文件来获取总采样数
|
||||
channels := reader.Channels()
|
||||
sampleRate := reader.SampleRate()
|
||||
|
||||
// 估算方法:读取到文件结尾
|
||||
var totalSamples int64
|
||||
buf := make([]float32, 4096*channels)
|
||||
for {
|
||||
n, err := reader.Read(buf)
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return 0, errors.Wrap(err, "failed to read ogg samples")
|
||||
}
|
||||
totalSamples += int64(n / channels)
|
||||
}
|
||||
|
||||
duration := float64(totalSamples) / float64(sampleRate)
|
||||
return duration, nil
|
||||
}
|
||||
|
||||
// getOpusDuration 解析 Opus 文件(在 OGG 容器中)以获取时长。
|
||||
func getOpusDuration(r io.ReadSeeker) (float64, error) {
|
||||
// Opus 通常封装在 OGG 容器中
|
||||
// 我们需要解析 OGG 页面来获取时长信息
|
||||
if _, err := r.Seek(0, io.SeekStart); err != nil {
|
||||
return 0, errors.Wrap(err, "failed to seek opus file")
|
||||
}
|
||||
|
||||
// 读取 OGG 页面头部
|
||||
var totalGranulePos int64
|
||||
buf := make([]byte, 27) // OGG 页面头部最小大小
|
||||
|
||||
for {
|
||||
n, err := r.Read(buf)
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return 0, errors.Wrap(err, "failed to read opus/ogg page")
|
||||
}
|
||||
if n < 27 {
|
||||
break
|
||||
}
|
||||
|
||||
// 检查 OGG 页面标识 "OggS"
|
||||
if string(buf[0:4]) != "OggS" {
|
||||
// 跳过一些字节继续寻找
|
||||
if _, err := r.Seek(-26, io.SeekCurrent); err != nil {
|
||||
break
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// 读取 granule position (字节 6-13, 小端序)
|
||||
granulePos := int64(binary.LittleEndian.Uint64(buf[6:14]))
|
||||
if granulePos > totalGranulePos {
|
||||
totalGranulePos = granulePos
|
||||
}
|
||||
|
||||
// 读取段表大小
|
||||
numSegments := int(buf[26])
|
||||
segmentTable := make([]byte, numSegments)
|
||||
if _, err := io.ReadFull(r, segmentTable); err != nil {
|
||||
break
|
||||
}
|
||||
|
||||
// 计算页面数据大小并跳过
|
||||
var pageSize int
|
||||
for _, segSize := range segmentTable {
|
||||
pageSize += int(segSize)
|
||||
}
|
||||
if _, err := r.Seek(int64(pageSize), io.SeekCurrent); err != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Opus 的采样率固定为 48000 Hz
|
||||
duration := float64(totalGranulePos) / 48000.0
|
||||
return duration, nil
|
||||
}
|
||||
|
||||
// getAIFFDuration 解析 AIFF 文件头以获取时长。
|
||||
func getAIFFDuration(r io.ReadSeeker) (float64, error) {
|
||||
if _, err := r.Seek(0, io.SeekStart); err != nil {
|
||||
return 0, errors.Wrap(err, "failed to seek aiff file")
|
||||
}
|
||||
|
||||
dec := aiff.NewDecoder(r)
|
||||
if !dec.IsValidFile() {
|
||||
return 0, errors.New("invalid aiff file")
|
||||
}
|
||||
|
||||
d, err := dec.Duration()
|
||||
if err != nil {
|
||||
return 0, errors.Wrap(err, "failed to get aiff duration")
|
||||
}
|
||||
|
||||
return d.Seconds(), nil
|
||||
}
|
||||
|
||||
// getWebMDuration 解析 WebM 文件以获取时长。
|
||||
// WebM 使用 Matroska 容器格式
|
||||
func getWebMDuration(r io.ReadSeeker) (float64, error) {
|
||||
if _, err := r.Seek(0, io.SeekStart); err != nil {
|
||||
return 0, errors.Wrap(err, "failed to seek webm file")
|
||||
}
|
||||
|
||||
// WebM/Matroska 文件的解析比较复杂
|
||||
// 这里提供一个简化的实现,读取 EBML 头部
|
||||
// 对于完整的 WebM 解析,可能需要使用专门的库
|
||||
|
||||
// 简单实现:查找 Duration 元素
|
||||
// WebM Duration 的 Element ID 是 0x4489
|
||||
// 这是一个简化版本,可能不适用于所有 WebM 文件
|
||||
buf := make([]byte, 8192)
|
||||
n, err := r.Read(buf)
|
||||
if err != nil && err != io.EOF {
|
||||
return 0, errors.Wrap(err, "failed to read webm file")
|
||||
}
|
||||
|
||||
// 尝试查找 Duration 元素(这是一个简化的方法)
|
||||
// 实际的 WebM 解析需要完整的 EBML 解析器
|
||||
// 这里返回错误,建议使用专门的库
|
||||
if n > 0 {
|
||||
// 检查 EBML 标识
|
||||
if len(buf) >= 4 && binary.BigEndian.Uint32(buf[0:4]) == 0x1A45DFA3 {
|
||||
// 这是一个有效的 EBML 文件
|
||||
// 但完整解析需要更复杂的逻辑
|
||||
return 0, errors.New("webm duration parsing requires full EBML parser (consider using ffprobe for webm files)")
|
||||
}
|
||||
}
|
||||
|
||||
return 0, errors.New("failed to parse webm file")
|
||||
}
|
||||
|
||||
// getAACDuration 解析 AAC (ADTS格式) 文件以获取时长。
|
||||
// 使用 gomedia 库来解析 AAC ADTS 帧
|
||||
func getAACDuration(r io.ReadSeeker) (float64, error) {
|
||||
if _, err := r.Seek(0, io.SeekStart); err != nil {
|
||||
return 0, errors.Wrap(err, "failed to seek aac file")
|
||||
}
|
||||
|
||||
// 读取整个文件内容
|
||||
data, err := io.ReadAll(r)
|
||||
if err != nil {
|
||||
return 0, errors.Wrap(err, "failed to read aac file")
|
||||
}
|
||||
|
||||
var totalFrames int64
|
||||
var sampleRate int
|
||||
|
||||
// 使用 gomedia 的 SplitAACFrame 函数来分割 AAC 帧
|
||||
codec.SplitAACFrame(data, func(aac []byte) {
|
||||
// 解析 ADTS 头部以获取采样率信息
|
||||
if len(aac) >= 7 {
|
||||
// 使用 ConvertADTSToASC 来获取音频配置信息
|
||||
asc, err := codec.ConvertADTSToASC(aac)
|
||||
if err == nil && sampleRate == 0 {
|
||||
sampleRate = codec.AACSampleIdxToSample(int(asc.Sample_freq_index))
|
||||
}
|
||||
totalFrames++
|
||||
}
|
||||
})
|
||||
|
||||
if sampleRate == 0 || totalFrames == 0 {
|
||||
return 0, errors.New("no valid aac frames found")
|
||||
}
|
||||
|
||||
// 每个 AAC ADTS 帧包含 1024 个采样
|
||||
totalSamples := totalFrames * 1024
|
||||
duration := float64(totalSamples) / float64(sampleRate)
|
||||
return duration, nil
|
||||
}
|
||||
@@ -163,7 +163,7 @@ func parseFormData(data []byte, v any) error {
|
||||
return err
|
||||
}
|
||||
|
||||
return json.Unmarshal(jsonData, v)
|
||||
return Unmarshal(jsonData, v)
|
||||
}
|
||||
|
||||
func parseMultipartFormData(c *gin.Context, data []byte, v any) error {
|
||||
@@ -174,7 +174,7 @@ func parseMultipartFormData(c *gin.Context, data []byte, v any) error {
|
||||
}
|
||||
|
||||
if boundary == "" {
|
||||
return json.Unmarshal(data, v) // Fallback to JSON
|
||||
return Unmarshal(data, v) // Fallback to JSON
|
||||
}
|
||||
|
||||
reader := multipart.NewReader(bytes.NewReader(data), boundary)
|
||||
@@ -191,10 +191,10 @@ func parseMultipartFormData(c *gin.Context, data []byte, v any) error {
|
||||
formMap[key] = vals
|
||||
}
|
||||
}
|
||||
jsonData, err := json.Marshal(formMap)
|
||||
jsonData, err := Marshal(formMap)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return json.Unmarshal(jsonData, v)
|
||||
return Unmarshal(jsonData, v)
|
||||
}
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
package common
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
crand "crypto/rand"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
@@ -329,43 +327,6 @@ func SaveTmpFile(filename string, data io.Reader) (string, error) {
|
||||
return f.Name(), nil
|
||||
}
|
||||
|
||||
// GetAudioDuration returns the duration of an audio file in seconds.
|
||||
func GetAudioDuration(ctx context.Context, filename string, ext string) (float64, error) {
|
||||
// ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 {{input}}
|
||||
c := exec.CommandContext(ctx, "ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", filename)
|
||||
output, err := c.Output()
|
||||
if err != nil {
|
||||
return 0, errors.Wrap(err, "failed to get audio duration")
|
||||
}
|
||||
durationStr := string(bytes.TrimSpace(output))
|
||||
if durationStr == "N/A" {
|
||||
// Create a temporary output file name
|
||||
tmpFp, err := os.CreateTemp("", "audio-*"+ext)
|
||||
if err != nil {
|
||||
return 0, errors.Wrap(err, "failed to create temporary file")
|
||||
}
|
||||
tmpName := tmpFp.Name()
|
||||
// Close immediately so ffmpeg can open the file on Windows.
|
||||
_ = tmpFp.Close()
|
||||
defer os.Remove(tmpName)
|
||||
|
||||
// ffmpeg -y -i filename -vcodec copy -acodec copy <tmpName>
|
||||
ffmpegCmd := exec.CommandContext(ctx, "ffmpeg", "-y", "-i", filename, "-vcodec", "copy", "-acodec", "copy", tmpName)
|
||||
if err := ffmpegCmd.Run(); err != nil {
|
||||
return 0, errors.Wrap(err, "failed to run ffmpeg")
|
||||
}
|
||||
|
||||
// Recalculate the duration of the new file
|
||||
c = exec.CommandContext(ctx, "ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", tmpName)
|
||||
output, err := c.Output()
|
||||
if err != nil {
|
||||
return 0, errors.Wrap(err, "failed to get audio duration after ffmpeg")
|
||||
}
|
||||
durationStr = string(bytes.TrimSpace(output))
|
||||
}
|
||||
return strconv.ParseFloat(durationStr, 64)
|
||||
}
|
||||
|
||||
// BuildURL concatenates base and endpoint, returns the complete url string
|
||||
func BuildURL(base string, endpoint string) string {
|
||||
u, err := url.Parse(base)
|
||||
|
||||
Reference in New Issue
Block a user