import OpenAI from "openai"
const openai = new OpenAI({
baseURL: "https://api.aiapilab.com/v1",
apiKey: $AIAPILAB_API_KEY
})
async function main() {
const completion = await openai.chat.completions.create({
model: "meta-llama/llama-3.2-90b-vision-instruct",
messages: [
{
"role": "user",
"content": [
{
"type": "text",
"text": "What's in this image?"
},
{
"type": "image_url",
"image_url": {
"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
}
}
]
}
]
})
console.log(completion.choices[0].message)
}
main()
Feature/Aspect | Model A (e.g., GPT-4) | Model B (e.g., Gemini) | Llama 3.2 90B Vision Instruct |
---|---|---|---|
Parameters | 175 billion | 70 billion | 90 billion |
Context Length | 32,000 tokens | 64,000 tokens | 128,000 tokens |
Supported Modalities | Text only | Text and Image | Text and Image |
Fine-tuning Techniques | RLHF and instruction tuning | Pretraining with diverse datasets | Supervised fine-tuning and RLHF |
Performance Benchmarking | Strong performance in general tasks | Competitive in visual tasks | High accuracy in VQA, image captioning |