Script Kit

OCR Clipboard Image w Nanonets

// Name: OCR Clipboard Image to markdown
// Description: OCR a clipboard image using local Nanonets OCR model
// Author: benhaotang
// GitHub: benhaotang

import "@johnlindquist/kit"

const clipboardImage = await clipboard.readImage()

if (!clipboardImage || clipboardImage.byteLength === 0) {
  await div(md("# No image found in clipboard"))
  exit()
}

// Convert image buffer to base64
const base64Image = clipboardImage.toString('base64')

// OpenAI compatible endpoint (placeholder URL)
const OCR_ENDPOINT = await env('OLLAMA_ENDPOINT', 'http://localhost:11434/v1/chat/completions')
const API_KEY = await env('OLLAMA_API_KEY', "sk-1234")

try {
  setStatus({ message: "Processing image...", status: "busy" })
  
  const response = await post(OCR_ENDPOINT, {
    model: "Nanonets-OCR-s", 
    messages: [
      {
        role: "user",
        content: [
          {
            type: "text",
            text: ""
          },
          {
            type: "image_url",
            image_url: {
              url: `data:image/png;base64,${base64Image}`
            }
          }
        ]
      }
    ]
  }, {
    headers: {
      'Authorization': `Bearer ${API_KEY}`,
      'Content-Type': 'application/json'
    }
  })

  const extractedText = response.data.choices[0].message.content.trim()
  
  // Add extracted text to clipboard
  await clipboard.writeText(extractedText)
  
  await toast("Text extracted and copied to clipboard!")
  
  // Show the extracted text
  await editor({
    value: extractedText,
    hint: "Extracted text (already copied to clipboard)"
  })

} catch (error) {
  await div(md(`# Error processing image\n\n${error.message}`))
}