Deepgram Video Transcriber

// Name: Deepgram Video Transcriber
// Description: Transcribes selected video/audio files via Deepgram using smart formatting with automatic audio extraction to 128kbps MP3, batch processing, and optional file saving.
// Author: dallascrilley
// GitHub: dallascrilley

import "@johnlindquist/kit"

type TranscribeResult = {
  filePath: string
  transcript?: string
  error?: string
}

const DG_KEY = await env("DEEPGRAM_API_KEY", {
  placeholder: "Enter your Deepgram API Key",
  secret: true,
  hint: md("Get a key at https://deepgram.com"),
})

const videoExts = [
  "3g2","3gp","amv","asf","avi","drc","dv","f4v","flv","gifs","gifv","m2ts","m2v","m4v","mkv","mng","mov","mp2","mp4","mpe","mpeg","mpg","mpv","mts","mxf","nsv","ogg","ogv","qt","rm","rmvb","roq","svi","ts","vob","webm","wmv","yuv"
]
const audioDirectExts = ["mp3","wav","m4a"]
const audioOtherExts = [
  "aac","aiff","amr","ape","au","caf","dts","flac","gsm","mka","mp2","oga","ogg","opus","ra","tta","voc","wma","wv"
]

const allSupportedExts = new Set([...videoExts, ...audioDirectExts, ...audioOtherExts])

const dropped = await drop({
  placeholder: "Drop video/audio files to transcribe",
  hint: "Supports most common video/audio formats. Press Enter to start.",
  enter: "Transcribe",
})

const files = Array.isArray(dropped)
  ? dropped.filter((f: any) => typeof f?.path === "string").map((f: any) => f.path as string)
  : []

if (files.length === 0) {
  await div(md("No files dropped. Exiting."))
  exit()
}

// Validate extensions and build a work list
const workList = files
  .map(fp => ({ filePath: fp, ext: path.extname(fp).toLowerCase().replace(".", "") }))
  .filter(item => {
    if (!allSupportedExts.has(item.ext)) {
      warn(`Skipping unsupported format: ${item.filePath}`)
      return false
    }
    return true
  })

if (workList.length === 0) {
  await div(md("No supported files found. Exiting."))
  exit()
}

// If any item requires ffmpeg, ensure it's available
const requiresFfmpeg = workList.some(item => videoExts.includes(item.ext) || audioOtherExts.includes(item.ext))
if (requiresFfmpeg) {
  const ffmpegFound = which("ffmpeg")
  if (!ffmpegFound) {
    await div(
      md(
        "FFmpeg not found. Please install FFmpeg and try again.\n\n- macOS: brew install ffmpeg\n- Windows: choco install ffmpeg\n- Linux: consult your distro package manager"
      )
    )
    exit()
  }
}

await setProgress(2)

const results: TranscribeResult[] = []
const tmpFiles: string[] = []

const total = workList.length
let completed = 0

const logLines: string[] = []
const updateStatus = async () => {
  const pct = Math.min(98, Math.round((completed / total) * 100))
  await setProgress(pct)
}

const ensureMp3128 = async (inputPath: string, ext: string) => {
  // Returns path to an MP3 (128kbps) file suitable for the API. May be original if direct audio supported.
  if (audioDirectExts.includes(ext)) {
    // Use as-is (Deepgram supports wav/m4a/mp3). Convert only if m4a/wav cause upload size concerns? We'll keep direct.
    return inputPath
  }
  // Convert/extract to 128kbps MP3
  const outPath = tmpPath(`${path.basename(inputPath, "." + ext)}-${uuid().slice(0, 8)}.mp3`)
  // -vn ensures we drop video if present; harmless for audio
  await $`ffmpeg -y -i ${inputPath} -vn -acodec libmp3lame -ab 128k -ar 44100 ${outPath}`
  tmpFiles.push(outPath)
  return outPath
}

const transcribeBuffer = async (buffer: Buffer): Promise<string> => {
  const url = "https://api.deepgram.com/v1/listen?model=nova-3&smart_format=true"
  const { data } = await post(url, buffer, {
    headers: {
      Authorization: `Token ${DG_KEY}`,
      "Content-Type": "audio/mpeg",
    },
  })
  // Try common Deepgram shapes
  const transcript =
    data?.results?.channels?.[0]?.alternatives?.[0]?.transcript ??
    data?.channel?.alternatives?.[0]?.transcript ??
    data?.results?.alternatives?.[0]?.transcript ??
    data?.transcript ??
    ""

  return transcript
}

const processOne = async (filePath: string, ext: string): Promise<TranscribeResult> => {
  try {
    logLines.push(`Processing: ${path.basename(filePath)}`)
    const mp3Path = await ensureMp3128(filePath, ext)
    const buffer = await readFile(mp3Path)
    const transcript = await transcribeBuffer(buffer)
    if (!transcript) {
      return { filePath, error: "Empty transcript" }
    }
    return { filePath, transcript }
  } catch (err: any) {
    return { filePath, error: err?.message || String(err) }
  } finally {
    completed++
    await updateStatus()
  }
}

// Start processing in parallel
await setProgress(5)
const settled = await Promise.allSettled(workList.map(w => processOne(w.filePath, w.ext)))

for (const s of settled) {
  if (s.status === "fulfilled") results.push(s.value)
  else {
    // Promise rejected
    // Try to extract file path if available
    results.push({
      filePath: "",
      error: s.reason?.message || String(s.reason),
    })
  }
}

// Cleanup temporary files
for (const tmp of tmpFiles) {
  try {
    await remove(tmp)
  } catch {}
}

await setProgress(100)

// Prepare combined output
const successes = results.filter(r => r.transcript)
const failures = results.filter(r => r.error)

const combined = successes
  .map(r => {
    const name = r.filePath ? path.basename(r.filePath) : "Unknown"
    return `=== ${name} ===\n${r.transcript?.trim()}\n`
  })
  .join("\n")

if (combined.trim().length > 0) {
  await copy(combined)
  await toast("Transcripts copied to clipboard")
} else {
  await notify("No transcripts produced")
}

// Report summary
let summary = `Processed: ${total}\nSucceeded: ${successes.length}\nFailed: ${failures.length}`
if (failures.length) {
  summary += `\n\nErrors:\n${failures
    .map(f => `- ${f.filePath ? path.basename(f.filePath) + ": " : ""}${f.error}`)
    .join("\n")}`
}
await div(md("## Deepgram Video Transcriber\n\n" + "```\n" + summary + "\n```"))

// Optional save to files
if (successes.length > 0) {
  const saveChoice = await arg("Save transcripts as .txt next to source files?", ["Yes", "No"])
  if (saveChoice === "Yes") {
    for (const r of successes) {
      try {
        const dir = path.dirname(r.filePath)
        const base = path.basename(r.filePath, path.extname(r.filePath))
        const outPath = path.join(dir, `${base}.transcript.txt`)
        const content = `=== ${path.basename(r.filePath)} ===\n${r.transcript?.trim()}\n`
        await writeFile(outPath, content, "utf8")
      } catch (e) {
        warn(`Failed to save transcript for ${path.basename(r.filePath)}: ${String((e as any)?.message || e)}`)
      }
    }
    await notify("Saved transcript files")
  }
}