// Name: Multi-Method Audio Transcriber // Description: Transcribe audio/video via local Whisper or Deepgram with auto ffmpeg extraction, batch, and save/copy options. // Author: dallascrilley // GitHub: dallascrilley import "@johnlindquist/kit" type Method = "auto" | "whisper" | "deepgram" const audioExts = new Set([ "mp3","wav","m4a","flac","aac","ogg","oga","opus","wma","aiff","aif","alac","amr","midi","mid","caf","ra","rm","mp2","mka","mp1","aifc","pcm","wavpack","wv","ape" ]) const videoExts = new Set([ "mp4","mov","avi","mkv","webm","wmv","flv","m4v","mpg","mpeg","3gp","3g2","mts","m2ts","ts","vob","ogv","mxf","rmvb","divx","asf","f4v","mpe" ]) const ensureFfmpeg = () => { if (!which("ffmpeg")) { throw new Error("ffmpeg not found. Please install ffmpeg and ensure it's on your PATH.") } } const chooseMethod = async (): Promise<Method> => { const choice = await arg("Choose transcription method", [ { name: "Auto (Prefer Local Whisper, fallback to Deepgram)", value: "auto" }, { name: "Whisper (Local)", value: "whisper" }, { name: "Deepgram (API)", value: "deepgram" }, ]) return choice as Method } const getExt = (p: string) => path.extname(p).toLowerCase().replace(".", "") const isAudio = (p: string) => audioExts.has(getExt(p)) const isVideo = (p: string) => videoExts.has(getExt(p)) const toMp3_128 = async (inputPath: string) => { ensureFfmpeg() const out = tmpPath(`${path.basename(inputPath, path.extname(inputPath))}-${uuid().slice(0, 8)}.mp3`) const cmd = `ffmpeg -y -i ${JSON.stringify(inputPath)} -vn -acodec libmp3lame -b:a 128k ${JSON.stringify(out)}` await exec(cmd) return out } const deepgramTranscribe = async (mp3Path: string, apiKey: string) => { const url = `https://api.deepgram.com/v1/listen?model=nova-3&smart_format=true` const data = await readFile(mp3Path) const { data: resp } = await post(url, data, { headers: { Authorization: `Token ${apiKey}`, "Content-Type": "audio/mp3", }, maxBodyLength: Infinity, }) const transcript = resp?.results?.channels?.[0]?.alternatives?.[0]?.transcript || resp?.results?.utterances?.map((u: any) => u?.transcript).filter(Boolean).join("\n") || "" return String(transcript || "").trim() } const whisperTranscribe = async (mp3Path: string, model: string) => { const whisperBin = which("whisper") if (!whisperBin) throw new Error("Local 'whisper' CLI not found.") const outDir = tmpPath(`whisper-${uuid().slice(0, 8)}`) await ensureDir(outDir) const cmd = `${JSON.stringify(whisperBin)} ${JSON.stringify(mp3Path)} --model ${JSON.stringify(model)} --output_format txt --output_dir ${JSON.stringify(outDir)}` await exec(cmd) // Find a generated .txt for this file const base = path.basename(mp3Path) const pattern = path.join(outDir, `${base}*.txt`) const matches = await globby(pattern) let txtPath = matches[0] if (!txtPath) { // Fallback: any txt in outDir const anyTxt = await globby(path.join(outDir, `*.txt`)) txtPath = anyTxt[0] } if (!txtPath) throw new Error("Whisper output .txt not found.") const txt = await readFile(txtPath, "utf8") return txt.trim() } const pickFiles = async (): Promise<string[]> => { const infos = await drop({ placeholder: "Drop audio/video files to transcribe", enter: "Use Dropped Files", }) if (typeof infos === "string") { await div(md(`Please drop files, not text.`)) exit() } const paths = infos?.map((i: any) => i?.path).filter(Boolean) || [] if (!paths.length) { await div(md(`No files dropped.`)) exit() } return paths } const pickWhisperModel = async () => { const m = await env("WHISPER_MODEL", { placeholder: "Whisper model name (e.g., base, small, medium, large-v3)", hint: "Press enter for default 'base'", }) return m?.trim() || "base" } const ensureDeepgramKey = async () => { const key = await env("DEEPGRAM_API_KEY", { secret: true, placeholder: "Enter your Deepgram API Key", hint: "Required for Deepgram transcription", }) return key.trim() } const preferWhisper = async () => Boolean(which("whisper")) const resolveMethod = async (method: Method) => { if (method === "whisper") { if (!(await preferWhisper())) throw new Error("Local Whisper not available.") return "whisper" } if (method === "deepgram") return "deepgram" // auto if (await preferWhisper()) return "whisper" return "deepgram" } const updateProgress = async (current: number, total: number, fileName: string) => { await setPanel( md( `### Transcribing ${current}/${total} - File: ${path.basename(fileName)} - Please wait...` ) ) } const summarizeResults = (results: { file: string; ok: boolean; error?: string }[]) => { const ok = results.filter(r => r.ok).length const fail = results.length - ok return md(`### Done - Success: ${ok} - Failed: ${fail}`) } const main = async () => { const inputPaths = await pickFiles() const methodChoice = await chooseMethod() let methodResolved: Method try { methodResolved = (await resolveMethod(methodChoice)) as Method } catch (e: any) { await div(md(`Error resolving method: ${e?.message || e}`)) exit() return } let whisperModel = "base" let deepgramKey = "" if (methodResolved === "whisper") { whisperModel = await pickWhisperModel() } else { deepgramKey = await ensureDeepgramKey() } const transcripts: Record<string, string> = {} const results: { file: string; ok: boolean; error?: string }[] = [] await setPanel(md(`Preparing files...`)) for (let i = 0; i < inputPaths.length; i++) { const filePath = inputPaths[i] try { if (!(isAudio(filePath) || isVideo(filePath))) { throw new Error(`Unsupported format: ${path.extname(filePath) || "unknown"}`) } await updateProgress(i + 1, inputPaths.length, filePath) // Always convert/extract to 128kbps mp3 for compatibility const mp3 = await toMp3_128(filePath) let text = "" if (methodResolved === "whisper") { text = await whisperTranscribe(mp3, whisperModel) } else { text = await deepgramTranscribe(mp3, deepgramKey) } transcripts[filePath] = text results.push({ file: filePath, ok: true }) await remove(mp3).catch(() => {}) } catch (err: any) { results.push({ file: filePath, ok: false, error: err?.message || String(err) }) } } await setPanel(summarizeResults(results)) const combined = Object.entries(transcripts) .map(([file, text]) => `# ${path.basename(file)}\n\n${text}\n`) .join("\n") if (combined.trim().length) { await copy(combined) await notify(`Transcripts copied to clipboard`) } // Offer to save individual transcript files const saveChoice = await arg("Save transcripts as .txt files?", ["Yes", "No"]) if (saveChoice === "Yes") { const folder = await selectFolder("Select folder to save transcripts") for (const [file, text] of Object.entries(transcripts)) { const base = path.basename(file, path.extname(file)) const out = path.join(folder, `${base}.txt`) await writeFile(out, text, "utf8") } await notify(`Saved ${Object.keys(transcripts).length} transcript(s)`) await revealInFinder(folder) } // Show a quick summary and allow viewing combined const view = await arg( { placeholder: "View results?", enter: "Open summary", strict: true, }, [ { name: "Open Combined Transcript", value: "open" }, { name: "Close", value: "close" }, ] ) if (view === "open") { await editor(combined || "No transcripts") } } try { await main() } catch (error: any) { await div( md( `## Error ${error?.message || String(error)}` ) ) }