"use client";
import { useState, useRef, useEffect, useCallback, useMemo } from "react";
import { useChat, type UIMessage } from "@ai-sdk/react";
import {
DefaultChatTransport,
lastAssistantMessageIsCompleteWithToolCalls,
} from "ai";
import { useTranslations, useLocale } from "next-intl";
import {
XMarkIcon,
PaperAirplaneIcon,
ArrowPathIcon,
TrashIcon,
MicrophoneIcon,
StopIcon,
Cog6ToothIcon,
} from "@heroicons/react/24/outline";
import type { ChatToolHandler } from "@/lib/chat-tools";
import { blobToWav, blobToBase64, isWavSilent } from "@/lib/wav-encoder";
import { toSpeechLang } from "@/lib/locales";
import type {
SpeechRecognitionEvent,
SpeechRecognitionErrorEvent,
SpeechRecognitionInstance,
} from "@/lib/speech-recognition";
import { getSpeechRecognition } from "@/lib/speech-recognition";
// ── Chat message persistence ──────────────────────────────────────
// Key is intentionally locale-independent so the conversation survives
// language switches (which re-mount the entire [locale] layout tree).
export const CHAT_STORAGE_KEY = "chat-messages";
/** Read previously stored messages from sessionStorage (runs once on mount). */
function loadStoredMessages(): UIMessage[] | undefined {
if (typeof window === "undefined") return undefined;
try {
const raw = sessionStorage.getItem(CHAT_STORAGE_KEY);
if (!raw) return undefined;
const parsed: UIMessage[] = JSON.parse(raw);
return parsed.length > 0 ? parsed : undefined;
} catch {
return undefined;
}
}
// ── Dictation mode types ──────────────────────────────────────────
type DictationMode = "browser" | "server";
const DICTATION_STORAGE_KEY = "chat-dictation-mode";
/** Format seconds as m:ss for the recording timer. */
function formatRecordingTime(seconds: number): string {
const m = Math.floor(seconds / 60);
const s = seconds % 60;
return `${m}:${s.toString().padStart(2, "0")}`;
}
/**
* Reusable chat panel — "I am the window", not the button that opens me.
*
* The parent controls visibility (`open` / `onClose`).
* The parent also controls positioning (e.g. fixed bottom-right, sidebar,
* modal — whatever the layout needs). This component just fills whatever
* container it's placed in.
*
* Wrap it in a positioned container to get floating-window behaviour:
*
* ```tsx
* {isOpen && (
*
* setIsOpen(false)} />
*
* )}
* ```
*/
interface ChatWidgetProps {
/** Whether the panel is visible. */
open: boolean;
/** Called when the user clicks the close button inside the panel. */
onClose: () => void;
/** Override the API endpoint (defaults to "/api/chat") */
apiEndpoint?: string;
/**
* Client-side tool handlers keyed by tool name.
* Each handler receives the tool input and returns a short result string
* that the model will see as the tool output.
*
* Tools are declared on the server (no `execute`), so the model can call
* them; the handlers here run on the client when the call arrives.
*/
clientTools?: Record;
/** Extra key-value pairs merged into every request body (e.g. theme state). */
extraBody?: Record;
/** Explicit user-initiated reset action from the header bin button. */
onReset?: () => void;
/**
* Fires once when the chat stream finishes and status returns to "ready".
* Useful for deferring side-effects (e.g. navigation) until the model's
* response has been fully streamed and persisted.
*/
onReady?: () => void;
}
export default function ChatWidget({
open,
onClose,
apiEndpoint = "/api/chat",
clientTools,
extraBody,
onReset,
onReady,
}: ChatWidgetProps) {
const t = useTranslations();
const locale = useLocale();
const speechLang = useMemo(() => toSpeechLang(locale), [locale]);
const [input, setInput] = useState("");
const messagesEndRef = useRef(null);
const inputRef = useRef(null);
// ── Dictation mode (browser vs server) ──────────────────────────
const [dictationMode, setDictationMode] = useState(() => {
if (typeof window === "undefined") return "server";
try {
const stored = localStorage.getItem(DICTATION_STORAGE_KEY);
if (stored === "browser" || stored === "server") return stored;
} catch { /* private browsing / quota */ }
return "server";
});
const [settingsOpen, setSettingsOpen] = useState(false);
// Persist dictation mode preference
useEffect(() => {
try { localStorage.setItem(DICTATION_STORAGE_KEY, dictationMode); } catch { /* private browsing / quota */ }
}, [dictationMode]);
// ── Speech-to-text dictation (browser mode) ─────────────────────
const [isListening, setIsListening] = useState(false);
const recognitionRef = useRef(null);
// Tracks whether we *want* to be listening. Chrome's SpeechRecognition
// can fire `onend` at any time (silence, network blip, etc.) even with
// `continuous: true`. When this ref is true, the `onend` handler will
// automatically restart the engine.
const shouldListenRef = useRef(false);
// Check once whether the browser supports the Web Speech API.
const [speechSupported, setSpeechSupported] = useState(false);
const [speechTemporarilyUnavailable, setSpeechTemporarilyUnavailable] =
useState(false);
useEffect(() => {
const supported = getSpeechRecognition() !== null;
setSpeechSupported(supported);
// If "browser" was stored but this browser doesn't support it, auto-correct
if (!supported) {
setDictationMode((prev) => (prev === "browser" ? "server" : prev));
}
}, []);
// ── Server-side recording state ─────────────────────────────────
const [isRecording, setIsRecording] = useState(false);
const [isTranscribing, setIsTranscribing] = useState(false);
const [recordingSeconds, setRecordingSeconds] = useState(0);
const mediaRecorderRef = useRef(null);
const audioChunksRef = useRef([]);
const recordingTimerRef = useRef | null>(
null,
);
const streamRef = useRef(null);
const transcribeAbortRef = useRef(null);
// ── Sound effects ────────────────────────────────────────────────
const sentSoundRef = useRef(null);
const receivedSoundRef = useRef(null);
// Preload audio files once on mount (safe in "use client" components).
useEffect(() => {
sentSoundRef.current = new Audio("/sounds/message_sent.mp3");
receivedSoundRef.current = new Audio("/sounds/new_notification.mp3");
}, []);
const playSent = useCallback(() => {
const s = sentSoundRef.current;
if (!s) return;
s.currentTime = 0;
s.play().catch(() => {});
}, []);
const playReceived = useCallback(() => {
const s = receivedSoundRef.current;
if (!s) return;
s.currentTime = 0;
s.play().catch(() => {});
}, []);
// ── Restore persisted conversation (computed once on mount) ──────
const [storedMessages] = useState(loadStoredMessages);
// Keep a ref so the onToolCall closure always sees the latest handlers
// without needing to re-create the useChat config on every render.
const clientToolsRef = useRef(clientTools);
clientToolsRef.current = clientTools;
// Serialise extraBody so useMemo only re-creates the transport when the
// values actually change (object identity would change every render).
const extraBodyKey = JSON.stringify(extraBody ?? {});
const transport = useMemo(
() =>
new DefaultChatTransport({
api: apiEndpoint,
body: { locale, ...extraBody },
}),
[apiEndpoint, locale, extraBodyKey], // extraBodyKey is the stable proxy for extraBody
);
const { messages, sendMessage, regenerate, addToolOutput, status, error, clearError } = useChat({
messages: storedMessages,
transport,
// After all client-side tool results are available, automatically send
// them back so the model can generate a follow-up text response.
sendAutomaticallyWhen: clientTools
? lastAssistantMessageIsCompleteWithToolCalls
: undefined,
async onToolCall({ toolCall }) {
const handler = clientToolsRef.current?.[toolCall.toolName];
if (!handler) return;
try {
const output = await handler(
toolCall.input as Record,
);
addToolOutput({
tool: toolCall.toolName,
toolCallId: toolCall.toolCallId,
output,
});
} catch {
addToolOutput({
tool: toolCall.toolName,
toolCallId: toolCall.toolCallId,
state: "output-error",
errorText: "Tool execution failed.",
});
}
},
// Play the notification sound when the assistant finishes a text response.
// Skip aborts, disconnects, errors, and intermediate tool-call turns
// (those will be resubmitted automatically by sendAutomaticallyWhen).
onFinish({ isAbort, isDisconnect, isError, finishReason }) {
if (isAbort || isDisconnect || isError) return;
if (finishReason === "tool-calls") return;
playReceived();
},
onError() {
// handled via the `error` return value — no need to rethrow
},
});
const isLoading = status === "streaming" || status === "submitted";
// ── Persist messages to sessionStorage on every change ──────────
// Declared BEFORE the status watcher so messages are saved before
// onReady can trigger navigation (React runs effects in order).
useEffect(() => {
try {
if (messages.length > 0) {
sessionStorage.setItem(CHAT_STORAGE_KEY, JSON.stringify(messages));
} else {
sessionStorage.removeItem(CHAT_STORAGE_KEY);
}
} catch { /* private browsing / quota */ }
}, [messages]);
// ── Notify parent when a stream *truly* completes (active → ready) ─
// Guard: skip the intermediate "ready" after a tool-call message —
// sendAutomaticallyWhen is about to fire another round-trip. Only
// signal when the final text response is done.
const prevStatusRef = useRef(status);
useEffect(() => {
const wasActive =
prevStatusRef.current === "streaming" ||
prevStatusRef.current === "submitted";
prevStatusRef.current = status;
if (!wasActive || status !== "ready") return;
// If the last assistant message is a complete tool call, the auto-send
// will resubmit momentarily — don't fire onReady yet.
if (clientTools && lastAssistantMessageIsCompleteWithToolCalls({ messages })) return;
onReady?.();
}, [status, messages, clientTools, onReady]);
// Auto-scroll to bottom when new messages arrive
useEffect(() => {
messagesEndRef.current?.scrollIntoView({ behavior: "smooth" });
}, [messages]);
// Focus input when panel opens
useEffect(() => {
if (open) {
const timer = setTimeout(() => inputRef.current?.focus(), 150);
return () => clearTimeout(timer);
}
}, [open]);
const handleSubmit = (e: React.FormEvent) => {
e.preventDefault();
const trimmed = input.trim();
if (!trimmed || isLoading) return;
sendMessage({ text: trimmed });
playSent();
setInput("");
// Reset textarea height after sending
if (inputRef.current) {
inputRef.current.style.height = "auto";
}
};
// Auto-resize textarea to fit content (up to max-h cap)
const autoResize = useCallback(() => {
const el = inputRef.current;
if (!el) return;
el.style.height = "auto";
el.style.height = `${el.scrollHeight}px`;
}, []);
// Enter sends, Shift+Enter inserts a newline
const handleKeyDown = (e: React.KeyboardEvent) => {
if (e.key === "Enter" && !e.shiftKey) {
e.preventDefault();
handleSubmit(e);
}
};
// ── Speech-to-text helpers ─────────────────────────────────────
// Mutable ref that holds the latest "committed" (final) transcript
// across recognition restarts so we don't lose words.
const committedRef = useRef("");
/** Stop the current recognition session intentionally. */
const stopListening = useCallback(() => {
shouldListenRef.current = false;
recognitionRef.current?.abort();
recognitionRef.current = null;
setIsListening(false);
}, []);
/** Tear down any in-progress server recording / transcription. */
const cleanupRecording = useCallback(() => {
transcribeAbortRef.current?.abort();
transcribeAbortRef.current = null;
if (recordingTimerRef.current) {
clearInterval(recordingTimerRef.current);
recordingTimerRef.current = null;
}
if (mediaRecorderRef.current?.state === "recording") {
try { mediaRecorderRef.current.stop(); } catch { /* already stopped */ }
}
streamRef.current?.getTracks().forEach((t) => t.stop());
streamRef.current = null;
mediaRecorderRef.current = null;
audioChunksRef.current = [];
setIsRecording(false);
setIsTranscribing(false);
setRecordingSeconds(0);
}, []);
// Ensure all dictation is stopped when the widget closes or unmounts.
useEffect(() => {
if (!open) {
stopListening();
cleanupRecording();
}
return () => {
stopListening();
cleanupRecording();
};
}, [open, stopListening, cleanupRecording]);
/**
* Create, configure, and start a SpeechRecognition instance.
* Extracted so both `startListening` and the auto-restart in
* `onend` can share the same setup logic.
*/
const bootRecognition = useCallback(() => {
const SR = getSpeechRecognition();
if (!SR) return;
const recognition = new SR();
recognition.lang = speechLang;
recognition.interimResults = true;
recognition.continuous = true;
recognition.onresult = (event: SpeechRecognitionEvent) => {
let interim = "";
for (let i = event.resultIndex; i < event.results.length; i++) {
const transcript = event.results[i][0].transcript;
if (event.results[i].isFinal) {
const trimmed = transcript.trim();
if (trimmed) {
committedRef.current +=
(committedRef.current ? " " : "") + trimmed;
}
} else {
interim += transcript;
}
}
// Show committed + interim preview in the textarea
const preview = interim
? committedRef.current +
(committedRef.current ? " " : "") +
interim
: committedRef.current;
setInput(preview);
// Auto-grow the textarea to fit the dictated text
requestAnimationFrame(() => {
const el = inputRef.current;
if (!el) return;
el.style.height = "auto";
el.style.height = `${el.scrollHeight}px`;
});
};
recognition.onerror = (event: SpeechRecognitionErrorEvent) => {
// Helpful during integration/testing: shows why no text is emitted.
console.warn("[chat] SpeechRecognition error:", event.error);
// Fatal errors that mean we should truly stop.
const fatal = new Set([
"not-allowed",
"service-not-allowed",
"language-not-supported",
"network",
]);
if (fatal.has(event.error)) {
shouldListenRef.current = false;
if (event.error === "network") {
// Constructor exists but backend speech service is unreachable.
// Disable dictation button for this page session to avoid a retry loop.
setSpeechTemporarilyUnavailable(true);
}
}
// Non-fatal errors (no-speech, aborted, audio-capture)
// will trigger `onend`, which will auto-restart if shouldListenRef
// is still true.
};
recognition.onend = () => {
recognitionRef.current = null;
// Auto-restart if we haven't explicitly stopped.
if (shouldListenRef.current) {
try {
bootRecognition();
} catch {
shouldListenRef.current = false;
setIsListening(false);
}
return;
}
setIsListening(false);
};
recognitionRef.current = recognition;
recognition.start();
}, [speechLang]);
/** Start browser speech recognition and stream results into the textarea. */
const startListening = useCallback(() => {
if (!getSpeechRecognition() || speechTemporarilyUnavailable) return;
// Seed the committed buffer with whatever text is already in the input
committedRef.current = input;
shouldListenRef.current = true;
setIsListening(true);
bootRecognition();
}, [input, bootRecognition, speechTemporarilyUnavailable]);
// ── Server-side recording helpers ──────────────────────────────
/** Start capturing audio via MediaRecorder (server transcription mode). */
const startRecording = useCallback(async () => {
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
streamRef.current = stream;
audioChunksRef.current = [];
const recorder = new MediaRecorder(stream);
mediaRecorderRef.current = recorder;
recorder.ondataavailable = (e) => {
if (e.data.size > 0) audioChunksRef.current.push(e.data);
};
recorder.onstop = async () => {
const blob = new Blob(audioChunksRef.current, {
type: recorder.mimeType,
});
audioChunksRef.current = [];
// Release mic
streamRef.current?.getTracks().forEach((t) => t.stop());
streamRef.current = null;
if (blob.size === 0) return;
setIsTranscribing(true);
try {
const wavBlob = await blobToWav(blob);
// ── Silence / too-short guard ───────────────────────────
// Whisper-family models hallucinate plausible text on silent
// audio. Catch it client-side to save an API round-trip.
const silent = await isWavSilent(wavBlob);
if (silent) {
console.log("[chat] Recording was silent — skipping transcription");
return; // finally block resets isTranscribing
}
const base64 = await blobToBase64(wavBlob);
const controller = new AbortController();
transcribeAbortRef.current = controller;
const res = await fetch("/api/transcribe", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
audio: base64,
mimeType: "audio/wav",
locale,
}),
signal: controller.signal,
});
const data = await res.json();
if (data.text) {
setInput((prev) => {
const separator = prev.trim() ? " " : "";
return prev + separator + data.text;
});
// Auto-grow textarea
requestAnimationFrame(() => {
const el = inputRef.current;
if (!el) return;
el.style.height = "auto";
el.style.height = `${el.scrollHeight}px`;
});
} else if (data.error) {
console.warn("[chat] Transcription failed:", data.error);
}
} catch (err) {
if (err instanceof DOMException && err.name === "AbortError") return;
console.warn("[chat] Transcription error:", err);
} finally {
transcribeAbortRef.current = null;
setIsTranscribing(false);
}
};
recorder.start();
setIsRecording(true);
setRecordingSeconds(0);
recordingTimerRef.current = setInterval(() => {
setRecordingSeconds((s) => s + 1);
}, 1000);
} catch (err) {
console.warn("[chat] Microphone access denied:", err);
}
}, [locale]);
/** Stop the MediaRecorder — triggers onstop → transcribe flow. */
const stopRecording = useCallback(() => {
if (recordingTimerRef.current) {
clearInterval(recordingTimerRef.current);
recordingTimerRef.current = null;
}
if (mediaRecorderRef.current?.state === "recording") {
mediaRecorderRef.current.stop();
}
setIsRecording(false);
}, []);
if (!open) return null;
return (
{/* Header */}
{t("Chat.title")}
{/* Settings panel */}
{settingsOpen && (
)}
{/* Messages area */}
{messages.length === 0 && (
{t("Chat.emptyState")}
)}
{messages.map((message) => {
const isUser = message.role === "user";
return (
{message.parts.map((part, i) => {
if (part.type === "text") {
return {part.text};
}
return null;
})}
);
})}
{isLoading &&
messages[messages.length - 1]?.role !== "assistant" && (
)}
{/* Error banner with retry */}
{error && (
{t("Chat.errorMessage")}
)}
{/* Input area */}
);
}