package main import ( "bytes" "context" "errors" "fmt" "net/http" "strings" "time" "github.com/anthropics/anthropic-sdk-go" ) // fileNotFoundPrefix is the exact prefix Anthropic uses in its 404 error body // when a referenced file_id no longer exists. Used by extractMissingFileID to // identify the offender for the runtime self-heal path. const fileNotFoundPrefix = "File not found: " // formatUploadFilename returns the canonical filename used when uploading a // Telegram photo to the Anthropic Files API. The "tg-" prefix tags the file as // bot-owned so a future reconciliation job can distinguish our uploads from // foreign files in the same workspace. The triple (botID, chatID, tgMessageID) // is unique within Telegram's scope — each photo in an album arrives as a // distinct Telegram message with its own message_id, so collisions across // album items are impossible. func formatUploadFilename(botID uint, chatID int64, tgMessageID int, ext string) string { return fmt.Sprintf("tg-%d-%d-%d.%s", botID, chatID, tgMessageID, ext) } // uploadImageToAnthropic uploads raw image bytes to the Anthropic Files API and // returns the resulting file_id. The filename should follow the formatUploadFilename // convention so the reconciliation job can identify the file as bot-owned. func (b *Bot) uploadImageToAnthropic(ctx context.Context, data []byte, filename, contentType string) (string, error) { resp, err := b.anthropicClient.Beta.Files.Upload(ctx, anthropic.BetaFileUploadParams{ File: anthropic.File(bytes.NewReader(data), filename, contentType), Betas: []anthropic.AnthropicBeta{anthropic.AnthropicBetaFilesAPI2025_04_14}, }) if err != nil { return "", fmt.Errorf("anthropic files upload: %w", err) } return resp.ID, nil } // deleteFileFromAnthropic removes a file from the Anthropic Files API. A 404 // is treated as success — the file is already gone, which is the same effective // outcome the caller wants. This makes the deletion idempotent and safe for the // reconciliation job's retries. func (b *Bot) deleteFileFromAnthropic(ctx context.Context, fileID string) error { _, err := b.anthropicClient.Beta.Files.Delete(ctx, fileID, anthropic.BetaFileDeleteParams{ Betas: []anthropic.AnthropicBeta{anthropic.AnthropicBetaFilesAPI2025_04_14}, }) if err == nil { return nil } var apiErr *anthropic.Error if errors.As(err, &apiErr) && apiErr.StatusCode == http.StatusNotFound { return nil } return fmt.Errorf("anthropic files delete %s: %w", fileID, err) } // compensatingDelete fires Delete calls for a set of file_ids that were uploaded // successfully but couldn't be committed downstream. Errors are logged rather // than returned — the caller has already entered an error path, and orphans on // Anthropic are harmless (storage is free until the 500 GB workspace cap and the // reconciliation job will mop them up). func (b *Bot) compensatingDelete(ctx context.Context, fileIDs []string) { for _, fid := range fileIDs { if err := b.deleteFileFromAnthropic(ctx, fid); err != nil { ErrorLogger.Printf("[%s] compensating delete for %s: %v", b.config.ID, fid, err) } } } // extractMissingFileID inspects an Anthropic API error and returns the file_id // that triggered a "File not found:" 404, if any. Returns empty string if the // error is not a file-not-found error. Used by the runtime self-heal path to // identify which file_id to strip from replay. func extractMissingFileID(err error) string { if err == nil { return "" } var apiErr *anthropic.Error if !errors.As(err, &apiErr) { return "" } if apiErr.StatusCode != http.StatusNotFound { return "" } return parseMissingFileIDFromBody(apiErr.RawJSON()) } // parseMissingFileIDFromBody pulls a file_id out of a raw "File not found:" // 404 body. Split out from extractMissingFileID so the string-parsing logic // is unit-testable without having to synthesize an *anthropic.Error (whose // JSON.raw field is private to the SDK). func parseMissingFileIDFromBody(raw string) string { idx := strings.Index(raw, fileNotFoundPrefix) if idx == -1 { return "" } rest := raw[idx+len(fileNotFoundPrefix):] // File IDs are file_; the message embeds them with no surrounding // quotes, so the id ends at the first character outside the alphanumeric + // underscore set. end := strings.IndexFunc(rest, func(r rune) bool { return (r < 'a' || r > 'z') && (r < 'A' || r > 'Z') && (r < '0' || r > '9') && r != '_' }) if end == -1 { return rest } return rest[:end] } // hardDeleteScope performs the three-step hard-delete pattern on every Message // row matching the given WHERE clause: // // 1. Soft-delete the rows (GORM Delete) — they become invisible to replay // immediately, regardless of how the Anthropic-side cleanup unfolds. // 2. For each row, call Anthropic Files.Delete on its ImageFileIDs. 404 is // treated as success (already gone). // 3. Rows whose file cleanup succeeded are Unscoped().Delete'd. Rows whose // file cleanup failed remain soft-deleted with FilesCleanedAt NULL — the // reconciliation job will retry them. // // This gives hard-delete eventually-consistent semantics across the DB and // Anthropic, while still presenting the user with an instant "history cleared" // outcome (the soft-delete in step 1 hides the rows from any further reads). func (b *Bot) hardDeleteScope(ctx context.Context, query string, args ...interface{}) error { // Unscoped on the scan: include already-soft-deleted rows so a hard-delete // after a prior soft-delete still removes them completely. Matches the // existing "erase and bust all caches" semantics for /clear_hard. var rows []Message if err := b.db.Unscoped().Where(query, args...).Find(&rows).Error; err != nil { return fmt.Errorf("scan rows: %w", err) } if len(rows) == 0 { return nil } // Soft-delete any rows that aren't already soft-deleted (graceful degradation: // if Anthropic-side file cleanup fails, the row stays invisible to replay). // Already-soft-deleted rows are unaffected by Delete without Unscoped. if err := b.db.Where(query, args...).Delete(&Message{}).Error; err != nil { return fmt.Errorf("soft delete: %w", err) } hardDeletable := make([]uint, 0, len(rows)) for _, row := range rows { if b.deleteRowFiles(ctx, row) { hardDeletable = append(hardDeletable, row.ID) } } if len(hardDeletable) == 0 { return nil } if err := b.db.Unscoped().Where("id IN ?", hardDeletable).Delete(&Message{}).Error; err != nil { return fmt.Errorf("hard delete: %w", err) } return nil } // deleteRowFiles tries to delete every file_id referenced by row from the // Anthropic Files API. Returns true iff all deletes succeeded (or the row had // no images), making the row eligible for hard-delete. False means at least // one delete failed and the row should stay soft-deleted for retry. func (b *Bot) deleteRowFiles(ctx context.Context, row Message) bool { if len(row.ImageFileIDs) == 0 { return true } allOk := true for _, fid := range row.ImageFileIDs { if err := b.deleteFileFromAnthropic(ctx, fid); err != nil { ErrorLogger.Printf("[%s] anthropic delete %s (row %d): %v", b.config.ID, fid, row.ID, err) allOk = false } } return allOk } // stripDeadFileIDs returns the subset of src whose ids are NOT in deadSet, and // reports whether any were removed. Empty/nil src yields (empty, false). func stripDeadFileIDs(src []string, deadSet map[string]struct{}) (survivors []string, dirty bool) { survivors = make([]string, 0, len(src)) for _, fid := range src { if _, dead := deadSet[fid]; dead { dirty = true continue } survivors = append(survivors, fid) } return survivors, dirty } // markFilesPendingCleanup removes a set of dead file_ids from any stored Message // rows that reference them, and stamps FilesCleanedAt on the affected rows so // the reconciliation job can see they've been touched. Called by the runtime // self-heal path after a "File not found:" 404 surfaces during message-create. // Returns the number of rows updated. func (b *Bot) markFilesPendingCleanup(ctx context.Context, chatID int64, deadFileIDs []string) (int, error) { if len(deadFileIDs) == 0 { return 0, nil } deadSet := make(map[string]struct{}, len(deadFileIDs)) for _, id := range deadFileIDs { deadSet[id] = struct{}{} } var rows []Message if err := b.db.WithContext(ctx). Where("bot_id = ? AND chat_id = ? AND image_file_ids IS NOT NULL", b.botID, chatID). Find(&rows).Error; err != nil { return 0, fmt.Errorf("scan rows for cleanup: %w", err) } now := time.Now() updated := 0 for _, row := range rows { survivors, dirty := stripDeadFileIDs(row.ImageFileIDs, deadSet) if !dirty { continue } if len(survivors) == 0 { // All files in this row are gone; mark fully cleaned so a future // reconciliation job's `WHERE files_cleaned_at IS NULL` filter // correctly excludes it from retries. row.ImageFileIDs = nil row.FilesCleanedAt = &now } else { // Surviving file_ids are still alive on Anthropic. Leave // FilesCleanedAt NULL so a later death of one of them remains // visible to the reconciliation job's filter. row.ImageFileIDs = survivors } if err := b.db.WithContext(ctx).Save(&row).Error; err != nil { return updated, fmt.Errorf("update row %d: %w", row.ID, err) } updated++ } return updated, nil }