mirror of
https://github.com/HugeFrog24/go-telegram-bot.git
synced 2026-06-29 22:07:12 +00:00
243 lines
9.2 KiB
Go
243 lines
9.2 KiB
Go
package main
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"net/http"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/anthropics/anthropic-sdk-go"
|
|
)
|
|
|
|
// fileNotFoundPrefix is the exact prefix Anthropic uses in its 404 error body
|
|
// when a referenced file_id no longer exists. Used by extractMissingFileID to
|
|
// identify the offender for the runtime self-heal path.
|
|
const fileNotFoundPrefix = "File not found: "
|
|
|
|
// formatUploadFilename returns the canonical filename used when uploading a
|
|
// Telegram photo to the Anthropic Files API. The "tg-" prefix tags the file as
|
|
// bot-owned so a future reconciliation job can distinguish our uploads from
|
|
// foreign files in the same workspace. The triple (botID, chatID, tgMessageID)
|
|
// is unique within Telegram's scope — each photo in an album arrives as a
|
|
// distinct Telegram message with its own message_id, so collisions across
|
|
// album items are impossible.
|
|
func formatUploadFilename(botID uint, chatID int64, tgMessageID int, ext string) string {
|
|
return fmt.Sprintf("tg-%d-%d-%d.%s", botID, chatID, tgMessageID, ext)
|
|
}
|
|
|
|
// uploadImageToAnthropic uploads raw image bytes to the Anthropic Files API and
|
|
// returns the resulting file_id. The filename should follow the formatUploadFilename
|
|
// convention so the reconciliation job can identify the file as bot-owned.
|
|
func (b *Bot) uploadImageToAnthropic(ctx context.Context, data []byte, filename, contentType string) (string, error) {
|
|
resp, err := b.anthropicClient.Beta.Files.Upload(ctx, anthropic.BetaFileUploadParams{
|
|
File: anthropic.File(bytes.NewReader(data), filename, contentType),
|
|
Betas: []anthropic.AnthropicBeta{anthropic.AnthropicBetaFilesAPI2025_04_14},
|
|
})
|
|
if err != nil {
|
|
return "", fmt.Errorf("anthropic files upload: %w", err)
|
|
}
|
|
return resp.ID, nil
|
|
}
|
|
|
|
// deleteFileFromAnthropic removes a file from the Anthropic Files API. A 404
|
|
// is treated as success — the file is already gone, which is the same effective
|
|
// outcome the caller wants. This makes the deletion idempotent and safe for the
|
|
// reconciliation job's retries.
|
|
func (b *Bot) deleteFileFromAnthropic(ctx context.Context, fileID string) error {
|
|
_, err := b.anthropicClient.Beta.Files.Delete(ctx, fileID, anthropic.BetaFileDeleteParams{
|
|
Betas: []anthropic.AnthropicBeta{anthropic.AnthropicBetaFilesAPI2025_04_14},
|
|
})
|
|
if err == nil {
|
|
return nil
|
|
}
|
|
var apiErr *anthropic.Error
|
|
if errors.As(err, &apiErr) && apiErr.StatusCode == http.StatusNotFound {
|
|
return nil
|
|
}
|
|
return fmt.Errorf("anthropic files delete %s: %w", fileID, err)
|
|
}
|
|
|
|
// compensatingDelete fires Delete calls for a set of file_ids that were uploaded
|
|
// successfully but couldn't be committed downstream. Errors are logged rather
|
|
// than returned — the caller has already entered an error path, and orphans on
|
|
// Anthropic are harmless (storage is free until the 500 GB workspace cap and the
|
|
// reconciliation job will mop them up).
|
|
func (b *Bot) compensatingDelete(ctx context.Context, fileIDs []string) {
|
|
for _, fid := range fileIDs {
|
|
if err := b.deleteFileFromAnthropic(ctx, fid); err != nil {
|
|
ErrorLogger.Printf("[%s] compensating delete for %s: %v", b.config.ID, fid, err)
|
|
}
|
|
}
|
|
}
|
|
|
|
// extractMissingFileID inspects an Anthropic API error and returns the file_id
|
|
// that triggered a "File not found:" 404, if any. Returns empty string if the
|
|
// error is not a file-not-found error. Used by the runtime self-heal path to
|
|
// identify which file_id to strip from replay.
|
|
func extractMissingFileID(err error) string {
|
|
if err == nil {
|
|
return ""
|
|
}
|
|
var apiErr *anthropic.Error
|
|
if !errors.As(err, &apiErr) {
|
|
return ""
|
|
}
|
|
if apiErr.StatusCode != http.StatusNotFound {
|
|
return ""
|
|
}
|
|
return parseMissingFileIDFromBody(apiErr.RawJSON())
|
|
}
|
|
|
|
// parseMissingFileIDFromBody pulls a file_id out of a raw "File not found:"
|
|
// 404 body. Split out from extractMissingFileID so the string-parsing logic
|
|
// is unit-testable without having to synthesize an *anthropic.Error (whose
|
|
// JSON.raw field is private to the SDK).
|
|
func parseMissingFileIDFromBody(raw string) string {
|
|
idx := strings.Index(raw, fileNotFoundPrefix)
|
|
if idx == -1 {
|
|
return ""
|
|
}
|
|
rest := raw[idx+len(fileNotFoundPrefix):]
|
|
// File IDs are file_<base62>; the message embeds them with no surrounding
|
|
// quotes, so the id ends at the first character outside the alphanumeric +
|
|
// underscore set.
|
|
end := strings.IndexFunc(rest, func(r rune) bool {
|
|
return (r < 'a' || r > 'z') &&
|
|
(r < 'A' || r > 'Z') &&
|
|
(r < '0' || r > '9') &&
|
|
r != '_'
|
|
})
|
|
if end == -1 {
|
|
return rest
|
|
}
|
|
return rest[:end]
|
|
}
|
|
|
|
// hardDeleteScope performs the three-step hard-delete pattern on every Message
|
|
// row matching the given WHERE clause:
|
|
//
|
|
// 1. Soft-delete the rows (GORM Delete) — they become invisible to replay
|
|
// immediately, regardless of how the Anthropic-side cleanup unfolds.
|
|
// 2. For each row, call Anthropic Files.Delete on its ImageFileIDs. 404 is
|
|
// treated as success (already gone).
|
|
// 3. Rows whose file cleanup succeeded are Unscoped().Delete'd. Rows whose
|
|
// file cleanup failed remain soft-deleted with FilesCleanedAt NULL — the
|
|
// reconciliation job will retry them.
|
|
//
|
|
// This gives hard-delete eventually-consistent semantics across the DB and
|
|
// Anthropic, while still presenting the user with an instant "history cleared"
|
|
// outcome (the soft-delete in step 1 hides the rows from any further reads).
|
|
func (b *Bot) hardDeleteScope(ctx context.Context, query string, args ...interface{}) error {
|
|
// Unscoped on the scan: include already-soft-deleted rows so a hard-delete
|
|
// after a prior soft-delete still removes them completely. Matches the
|
|
// existing "erase and bust all caches" semantics for /clear_hard.
|
|
var rows []Message
|
|
if err := b.db.Unscoped().Where(query, args...).Find(&rows).Error; err != nil {
|
|
return fmt.Errorf("scan rows: %w", err)
|
|
}
|
|
if len(rows) == 0 {
|
|
return nil
|
|
}
|
|
// Soft-delete any rows that aren't already soft-deleted (graceful degradation:
|
|
// if Anthropic-side file cleanup fails, the row stays invisible to replay).
|
|
// Already-soft-deleted rows are unaffected by Delete without Unscoped.
|
|
if err := b.db.Where(query, args...).Delete(&Message{}).Error; err != nil {
|
|
return fmt.Errorf("soft delete: %w", err)
|
|
}
|
|
|
|
hardDeletable := make([]uint, 0, len(rows))
|
|
for _, row := range rows {
|
|
if b.deleteRowFiles(ctx, row) {
|
|
hardDeletable = append(hardDeletable, row.ID)
|
|
}
|
|
}
|
|
if len(hardDeletable) == 0 {
|
|
return nil
|
|
}
|
|
if err := b.db.Unscoped().Where("id IN ?", hardDeletable).Delete(&Message{}).Error; err != nil {
|
|
return fmt.Errorf("hard delete: %w", err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// deleteRowFiles tries to delete every file_id referenced by row from the
|
|
// Anthropic Files API. Returns true iff all deletes succeeded (or the row had
|
|
// no images), making the row eligible for hard-delete. False means at least
|
|
// one delete failed and the row should stay soft-deleted for retry.
|
|
func (b *Bot) deleteRowFiles(ctx context.Context, row Message) bool {
|
|
if len(row.ImageFileIDs) == 0 {
|
|
return true
|
|
}
|
|
allOk := true
|
|
for _, fid := range row.ImageFileIDs {
|
|
if err := b.deleteFileFromAnthropic(ctx, fid); err != nil {
|
|
ErrorLogger.Printf("[%s] anthropic delete %s (row %d): %v", b.config.ID, fid, row.ID, err)
|
|
allOk = false
|
|
}
|
|
}
|
|
return allOk
|
|
}
|
|
|
|
// stripDeadFileIDs returns the subset of src whose ids are NOT in deadSet, and
|
|
// reports whether any were removed. Empty/nil src yields (empty, false).
|
|
func stripDeadFileIDs(src []string, deadSet map[string]struct{}) (survivors []string, dirty bool) {
|
|
survivors = make([]string, 0, len(src))
|
|
for _, fid := range src {
|
|
if _, dead := deadSet[fid]; dead {
|
|
dirty = true
|
|
continue
|
|
}
|
|
survivors = append(survivors, fid)
|
|
}
|
|
return survivors, dirty
|
|
}
|
|
|
|
// markFilesPendingCleanup removes a set of dead file_ids from any stored Message
|
|
// rows that reference them, and stamps FilesCleanedAt on the affected rows so
|
|
// the reconciliation job can see they've been touched. Called by the runtime
|
|
// self-heal path after a "File not found:" 404 surfaces during message-create.
|
|
// Returns the number of rows updated.
|
|
func (b *Bot) markFilesPendingCleanup(ctx context.Context, chatID int64, deadFileIDs []string) (int, error) {
|
|
if len(deadFileIDs) == 0 {
|
|
return 0, nil
|
|
}
|
|
deadSet := make(map[string]struct{}, len(deadFileIDs))
|
|
for _, id := range deadFileIDs {
|
|
deadSet[id] = struct{}{}
|
|
}
|
|
var rows []Message
|
|
if err := b.db.WithContext(ctx).
|
|
Where("bot_id = ? AND chat_id = ? AND image_file_ids IS NOT NULL", b.botID, chatID).
|
|
Find(&rows).Error; err != nil {
|
|
return 0, fmt.Errorf("scan rows for cleanup: %w", err)
|
|
}
|
|
now := time.Now()
|
|
updated := 0
|
|
for _, row := range rows {
|
|
survivors, dirty := stripDeadFileIDs(row.ImageFileIDs, deadSet)
|
|
if !dirty {
|
|
continue
|
|
}
|
|
if len(survivors) == 0 {
|
|
// All files in this row are gone; mark fully cleaned so a future
|
|
// reconciliation job's `WHERE files_cleaned_at IS NULL` filter
|
|
// correctly excludes it from retries.
|
|
row.ImageFileIDs = nil
|
|
row.FilesCleanedAt = &now
|
|
} else {
|
|
// Surviving file_ids are still alive on Anthropic. Leave
|
|
// FilesCleanedAt NULL so a later death of one of them remains
|
|
// visible to the reconciliation job's filter.
|
|
row.ImageFileIDs = survivors
|
|
}
|
|
if err := b.db.WithContext(ctx).Save(&row).Error; err != nil {
|
|
return updated, fmt.Errorf("update row %d: %w", row.ID, err)
|
|
}
|
|
updated++
|
|
}
|
|
return updated, nil
|
|
}
|