Vision Language Models (VLM)
Usage
import base64
from openai import OpenAI
# Function to encode the image
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
# Path to your image
image_path = "path/to/your/image.jpg"
# Getting the base64 string
base64_image = encode_image(image_path)
client = OpenAI(
base_url="https://bridge.crynux.io/v1/llm",
api_key="YOUR_API_KEY", # Replace with your actual API key
timeout=60,
max_retries=1,
)
response = client.chat.completions.create(
model="Qwen/Qwen2.5-VL-3B-Instruct",
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": "What is in this image?",
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
},
},
],
}
],
max_tokens=300,
extra_body={
"vram_limit": 24, # Ensure the node has enough VRAM
}
)
print(response.choices[0].message.content)VRAM Requirement
Last updated