Quick Start

To use our latest model, visit our Transformers Integration Guide. Updated python and node clients will be released soon.

Environment Choose your deployment method
Cloud API	Local Deployment Click to select

Get Your API Key

Visit console.moondream.ai to create an account and get your API key.

Installation Script

# Install dependencies in your project directory
# pip install moondream

import moondream as md
from PIL import Image

# Initialize with API key
model = md.vl(api_key="your-api-key")

# Load an image
image = Image.open("./path/to/image.jpg")
encoded_image = model.encode_image(image)  # Encode image (recommended for multiple operations)


# Generate a caption (length options: "short" or "normal" (default))
caption = model.caption(encoded_image)["caption"]
print("Caption:", caption)

# Stream the caption
for chunk in model.caption(encoded_image, stream=True)["caption"]:
    print(chunk, end="", flush=True)

# Ask a question
answer = model.query(encoded_image, "What's in this image?")["answer"]
print("Answer:", answer)

# Stream the answer
for chunk in model.query(encoded_image, "What's in this image?", stream=True)["answer"]:
    print(chunk, end="", flush=True)

# Detect objects
detect_result = model.detect(image, 'subject')  # change 'subject' to what you want to detect
print("Detected objects:", detect_result["objects"])

# Point at an object
point_result = model.point(image, 'subject')  # change 'subject' to what you want to point at
print("Points:", point_result["points"])

// Install dependencies in your project directory
// npm install moondream
const { vl } = require('moondream');
const fs = require('fs');

async function main() {
const model = new vl({
	apiKey: "your-api-key"
});

// Load an image
const encodedImage = Buffer.from(fs.readFileSync("./path/to/image.jpg"))  // Load and encode image

// Generate caption (length options: "short" or "normal" (default))
const caption = await model.caption({ image: encodedImage })
console.log("Caption:", caption)

// Stream the caption
process.stdout.write("Streaming caption: ")
const captionStream = await model.caption({ image: encodedImage, stream: true })
for await (const chunk of captionStream.caption) process.stdout.write(chunk)

// Ask questions about the image
const answer = await model.query({ image: encodedImage, question: "What's in this image?" })
console.log("\nAnswer:", answer)

// Stream the answer
process.stdout.write("Streaming answer: ")
const answerStream = await model.query({ image: encodedImage, question: "What's in this image?", stream: true })
for await (const chunk of answerStream.answer) process.stdout.write(chunk)

// Detect objects
const detectResult = await model.detect({ image: encodedImage, object: "subject" })  // change 'subject' to what you want to detect
console.log("Detected objects:", detectResult.objects)

// Point at an object
const pointResult = await model.point({ image: encodedImage, object: "subject" })  // change 'subject' to what you want to point at
console.log("Points:", pointResult.points)

}

main().catch(console.error);

# Replace ${process.env.MOONDREAM_API_KEY} with your API key from console.moondream.ai


# Replace {endpoint} with: query, caption, detect, or point
curl --location 'https://api.moondream.ai/v1/{endpoint}' \
--header 'X-Moondream-Auth: ${process.env.MOONDREAM_API_KEY}' \
--header 'Content-Type: application/json' \
--data '{
    "image_url": "data:image/jpeg;base64,<BASE64-STRING>",
	# /query and /caption endpoints support streaming
    "stream": false,
    # Additional parameters per endpoint:
    # query:   "question": "What is this?" # Question to ask
    # caption: "length": "normal" or "long" # Length of caption
    # detect:  "object": "object" # Object to detect
    # point:   "object": "object" # Object to point at
}'