mirror of
https://github.com/HugeFrog24/go-telegram-bot.git
synced 2026-04-30 23:32:19 +00:00
Design
This commit is contained in:
+115
@@ -0,0 +1,115 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"mime/multipart"
|
||||
"net/http"
|
||||
|
||||
tgbot "github.com/go-telegram/bot"
|
||||
)
|
||||
|
||||
const (
|
||||
elevenLabsTTSURL = "https://api.elevenlabs.io/v1/text-to-speech/"
|
||||
elevenLabsSTTURL = "https://api.elevenlabs.io/v1/speech-to-text"
|
||||
elevenLabsDefaultModel = "eleven_multilingual_v2"
|
||||
)
|
||||
|
||||
// generateSpeech converts text to an mp3 audio stream via ElevenLabs TTS.
|
||||
func (b *Bot) generateSpeech(ctx context.Context, text string) (io.Reader, error) {
|
||||
model := b.config.ElevenLabsModel
|
||||
if model == "" {
|
||||
model = elevenLabsDefaultModel
|
||||
}
|
||||
body, err := json.Marshal(map[string]string{
|
||||
"text": text,
|
||||
"model_id": model,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("elevenlabs TTS marshal error: %w", err)
|
||||
}
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost,
|
||||
elevenLabsTTSURL+b.config.ElevenLabsVoiceID, bytes.NewReader(body))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("elevenlabs TTS request error: %w", err)
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("xi-api-key", b.config.ElevenLabsAPIKey)
|
||||
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("elevenlabs TTS error: %w", err)
|
||||
}
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
defer resp.Body.Close()
|
||||
errBody, _ := io.ReadAll(resp.Body)
|
||||
return nil, fmt.Errorf("elevenlabs TTS error: status %d: %s", resp.StatusCode, errBody)
|
||||
}
|
||||
return resp.Body, nil
|
||||
}
|
||||
|
||||
// transcribeVoice downloads a Telegram voice file and transcribes it via ElevenLabs STT.
|
||||
// Uses a direct multipart HTTP call instead of the SDK wrapper to avoid a bug in the
|
||||
// ogen-generated encoder: AdditionalFormats (nil slice) is always written as an empty
|
||||
// string with Content-Type: application/json, which ElevenLabs rejects with 400.
|
||||
func (b *Bot) transcribeVoice(ctx context.Context, fileID string) (string, error) {
|
||||
// 1. Resolve and download the voice file from Telegram.
|
||||
fileInfo, err := b.tgBot.GetFile(ctx, &tgbot.GetFileParams{FileID: fileID})
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("telegram GetFile error: %w", err)
|
||||
}
|
||||
downloadURL := b.tgBot.FileDownloadLink(fileInfo)
|
||||
audioResp, err := http.Get(downloadURL) //nolint:noctx
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("voice download error: %w", err)
|
||||
}
|
||||
defer audioResp.Body.Close()
|
||||
|
||||
// 2. Build multipart body with binary audio — bypasses SDK encoding issues.
|
||||
var buf bytes.Buffer
|
||||
mw := multipart.NewWriter(&buf)
|
||||
if err := mw.WriteField("model_id", "scribe_v1"); err != nil {
|
||||
return "", fmt.Errorf("multipart write error: %w", err)
|
||||
}
|
||||
part, err := mw.CreateFormFile("file", "audio.ogg")
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("multipart create file error: %w", err)
|
||||
}
|
||||
if _, err := io.Copy(part, audioResp.Body); err != nil {
|
||||
return "", fmt.Errorf("multipart copy error: %w", err)
|
||||
}
|
||||
if err := mw.Close(); err != nil {
|
||||
return "", fmt.Errorf("multipart close error: %w", err)
|
||||
}
|
||||
|
||||
// 3. POST to ElevenLabs STT.
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost,
|
||||
elevenLabsSTTURL, &buf)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("create STT request error: %w", err)
|
||||
}
|
||||
req.Header.Set("Content-Type", mw.FormDataContentType())
|
||||
req.Header.Set("xi-api-key", b.config.ElevenLabsAPIKey)
|
||||
|
||||
sttResp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("elevenlabs STT request error: %w", err)
|
||||
}
|
||||
defer sttResp.Body.Close()
|
||||
|
||||
if sttResp.StatusCode != http.StatusOK {
|
||||
body, _ := io.ReadAll(sttResp.Body)
|
||||
return "", fmt.Errorf("elevenlabs STT error: status %d: %s", sttResp.StatusCode, body)
|
||||
}
|
||||
|
||||
var result struct {
|
||||
Text string `json:"text"`
|
||||
}
|
||||
if err := json.NewDecoder(sttResp.Body).Decode(&result); err != nil {
|
||||
return "", fmt.Errorf("elevenlabs STT decode error: %w", err)
|
||||
}
|
||||
return result.Text, nil
|
||||
}
|
||||
Reference in New Issue
Block a user