import OpenAI from "openai"
const openai = new OpenAI({
baseURL: "https://api.aiapilab.com/v1",
apiKey: $AIAPILAB_API_KEY
})
async function main() {
const completion = await openai.chat.completions.create({
model: "google/gemini-pro-1.5",
messages: [
{
"role": "user",
"content": [
{
"type": "text",
"text": "What's in this image?"
},
{
"type": "image_url",
"image_url": {
"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
}
}
]
}
]
})
console.log(completion.choices[0].message)
}
main()
Feature/Aspect | GPT-4 | Claude 3.5 | Google Gemini Pro 1.5 |
---|---|---|---|
Context Window | Up to 128,000 tokens | Up to 200,000 tokens | Up to 2 million tokens |
Multimodal Capabilities | Primarily text and limited image | Supports text and images | Supports text, images, audio, and video |
Real-World Applications | Effective for conversational tasks and coding | Excels in creative tasks and nuanced interactions | Analyzes lengthy documents, videos, and codebases efficiently |
Long Context Understanding | Performance degrades with larger prompts | Good but less effective with large inputs | Excellent at maintaining context across large datasets |
Reasoning and Comprehension | Strong reasoning but limited by context size | Good reasoning with nuanced understanding | High accuracy in complex tasks, near-perfect recall in long contexts |