Skip to content

Crashing when processing image using MiniCPM 2.6 #1087

@jabberjabberjabber

Description

@jabberjabberjabber

Describe the Issue
When sending a specific image to kobold using MiniCPM 2.6 the Kobold server crashes. Works with Llava 1.5.

Additional Information:
Image causing crash:
2024-02-24_171639
Crash log:
crash.txt
Script used:

import argparse
import base64
import requests
from PIL import Image
import io

class ImageProcessor:
    def __init__(self):
        pass

    def process_image(self, file_path):
        try:
            with Image.open(file_path) as img:
                if img.mode != 'RGB':
                    img = img.convert('RGB')
                
                jpeg_bytes = io.BytesIO()
                img.save(jpeg_bytes, format='JPEG', quality=95)
                jpeg_bytes.seek(0)
                base64_encoded = base64.b64encode(jpeg_bytes.getvalue()).decode('utf-8')
            
            return base64_encoded

        except Exception as e:
            print(f"Error processing image: {str(e)}")
            return None
    


class LLMProcessor:
    def __init__(self, api_url, api_password):
        self.api_url = api_url
        self.headers = {
            "Content-Type": "application/json",
            "Authorization": f"Bearer {api_password}",
        }

    def send_image_to_llm(self, base64_image):
        payload = {
            "prompt": "Describe this image in detail.",
            "max_length": 300,
            "images": [base64_image],
         
        }
        response = requests.post(f"{self.api_url}/api/v1/generate", json=payload, headers=self.headers)
        if response.status_code == 200:
            return response.json()["results"][0].get("text")
        else:
            print(f"Error: {response.status_code} - {response.text}")
            return None

def main():
    parser = argparse.ArgumentParser(description="Send an image to LLM API")
    parser.add_argument("image_path", help="Path to the image file")
    parser.add_argument("--api-url", default="http://localhost:5001", help="URL for the LLM API")
    parser.add_argument("--api-password", default="", help="Password for the LLM API")
    args = parser.parse_args()

    image_processor = ImageProcessor()
    llm_processor = LLMProcessor(args.api_url, args.api_password)

    base64_image = image_processor.process_image(args.image_path)
    if base64_image:
        result = llm_processor.send_image_to_llm(base64_image)
        if result:
            print("LLM Response:")
            print(result)
        else:
            print("Failed to get a response from the LLM.")
    else:
        print("Failed to process the image.")

if __name__ == "__main__":
    main()

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions