mirror of
https://github.com/HugeFrog24/go-telegram-bot.git
synced 2026-04-30 23:32:19 +00:00
116 lines
3.7 KiB
Go
116 lines
3.7 KiB
Go
package main
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"mime/multipart"
|
|
"net/http"
|
|
|
|
tgbot "github.com/go-telegram/bot"
|
|
)
|
|
|
|
const (
|
|
elevenLabsTTSURL = "https://api.elevenlabs.io/v1/text-to-speech/"
|
|
elevenLabsSTTURL = "https://api.elevenlabs.io/v1/speech-to-text"
|
|
elevenLabsDefaultModel = "eleven_multilingual_v2"
|
|
)
|
|
|
|
// generateSpeech converts text to an mp3 audio stream via ElevenLabs TTS.
|
|
func (b *Bot) generateSpeech(ctx context.Context, text string) (io.Reader, error) {
|
|
model := b.config.ElevenLabsModel
|
|
if model == "" {
|
|
model = elevenLabsDefaultModel
|
|
}
|
|
body, err := json.Marshal(map[string]string{
|
|
"text": text,
|
|
"model_id": model,
|
|
})
|
|
if err != nil {
|
|
return nil, fmt.Errorf("elevenlabs TTS marshal error: %w", err)
|
|
}
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost,
|
|
elevenLabsTTSURL+b.config.ElevenLabsVoiceID, bytes.NewReader(body))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("elevenlabs TTS request error: %w", err)
|
|
}
|
|
req.Header.Set("Content-Type", "application/json")
|
|
req.Header.Set("xi-api-key", b.config.ElevenLabsAPIKey)
|
|
|
|
resp, err := http.DefaultClient.Do(req)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("elevenlabs TTS error: %w", err)
|
|
}
|
|
if resp.StatusCode != http.StatusOK {
|
|
defer resp.Body.Close()
|
|
errBody, _ := io.ReadAll(resp.Body)
|
|
return nil, fmt.Errorf("elevenlabs TTS error: status %d: %s", resp.StatusCode, errBody)
|
|
}
|
|
return resp.Body, nil
|
|
}
|
|
|
|
// transcribeVoice downloads a Telegram voice file and transcribes it via ElevenLabs STT.
|
|
// Uses a direct multipart HTTP call instead of the SDK wrapper to avoid a bug in the
|
|
// ogen-generated encoder: AdditionalFormats (nil slice) is always written as an empty
|
|
// string with Content-Type: application/json, which ElevenLabs rejects with 400.
|
|
func (b *Bot) transcribeVoice(ctx context.Context, fileID string) (string, error) {
|
|
// 1. Resolve and download the voice file from Telegram.
|
|
fileInfo, err := b.tgBot.GetFile(ctx, &tgbot.GetFileParams{FileID: fileID})
|
|
if err != nil {
|
|
return "", fmt.Errorf("telegram GetFile error: %w", err)
|
|
}
|
|
downloadURL := b.tgBot.FileDownloadLink(fileInfo)
|
|
audioResp, err := http.Get(downloadURL) //nolint:noctx
|
|
if err != nil {
|
|
return "", fmt.Errorf("voice download error: %w", err)
|
|
}
|
|
defer audioResp.Body.Close()
|
|
|
|
// 2. Build multipart body with binary audio — bypasses SDK encoding issues.
|
|
var buf bytes.Buffer
|
|
mw := multipart.NewWriter(&buf)
|
|
if err := mw.WriteField("model_id", "scribe_v1"); err != nil {
|
|
return "", fmt.Errorf("multipart write error: %w", err)
|
|
}
|
|
part, err := mw.CreateFormFile("file", "audio.ogg")
|
|
if err != nil {
|
|
return "", fmt.Errorf("multipart create file error: %w", err)
|
|
}
|
|
if _, err := io.Copy(part, audioResp.Body); err != nil {
|
|
return "", fmt.Errorf("multipart copy error: %w", err)
|
|
}
|
|
if err := mw.Close(); err != nil {
|
|
return "", fmt.Errorf("multipart close error: %w", err)
|
|
}
|
|
|
|
// 3. POST to ElevenLabs STT.
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost,
|
|
elevenLabsSTTURL, &buf)
|
|
if err != nil {
|
|
return "", fmt.Errorf("create STT request error: %w", err)
|
|
}
|
|
req.Header.Set("Content-Type", mw.FormDataContentType())
|
|
req.Header.Set("xi-api-key", b.config.ElevenLabsAPIKey)
|
|
|
|
sttResp, err := http.DefaultClient.Do(req)
|
|
if err != nil {
|
|
return "", fmt.Errorf("elevenlabs STT request error: %w", err)
|
|
}
|
|
defer sttResp.Body.Close()
|
|
|
|
if sttResp.StatusCode != http.StatusOK {
|
|
body, _ := io.ReadAll(sttResp.Body)
|
|
return "", fmt.Errorf("elevenlabs STT error: status %d: %s", sttResp.StatusCode, body)
|
|
}
|
|
|
|
var result struct {
|
|
Text string `json:"text"`
|
|
}
|
|
if err := json.NewDecoder(sttResp.Body).Decode(&result); err != nil {
|
|
return "", fmt.Errorf("elevenlabs STT decode error: %w", err)
|
|
}
|
|
return result.Text, nil
|
|
}
|