Whitepaper
Docs
Sign In
Function
Function
pipe
HuggingFace_LLama_Vision
Function ID
huggingface_llama__vision
Creator
@abhinavasr
Downloads
580+
Implementation of HuggingFace LLama Vision (Needs CUDA)
Get
README
No README available
Function Code
Show
from pydantic import BaseModel, Field from typing import Union, Generator, Iterator import os import logging import torch from PIL import Image # Import Image from PIL from transformers import AutoModelForCausalLM, AutoProcessor from huggingface_hub import HfApi logging.basicConfig(level=logging.INFO) class Pipe: class Valves(BaseModel): MODEL_NAME: str = Field( default="meta-llama/Llama-3.2-11B-Vision-Instruct", # Corrected model name description="Model name for Llama Vision.", ) HUGGINGFACE_API_TOKEN: str = Field( default=os.getenv("HUGGINGFACE_API_TOKEN", ""), description="API token for authenticating requests to the Hugging Face API.", ) def __init__(self): self.valves = self.Valves() self.model = None self.processor = None def validate_token(self, access_token) -> HfApi: # Replace with your actual access token api = HfApi(token=access_token) return ( api # Optionally return False if the token is invalid or an error occurred ) def load_model(self, access_token): if self.model is None or self.processor is None: # Download the model and processor self.processor = AutoProcessor.from_pretrained( self.valves.MODEL_NAME, token=access_token, # Use self.valves.MODEL_NAME ) self.model = AutoModelForCausalLM.from_pretrained( self.valves.MODEL_NAME, # Use self.valves.MODEL_NAME token=access_token, torch_dtype=torch.float16, ).cuda() def pipe(self, body: dict, __user__: dict) -> Union[str, Generator, Iterator]: access_token = self.valves.HUGGINGFACE_API_TOKEN logging.info(body) try: prompt = body.get( "prompt", "" ) # Get prompt from body or default to empty string if ( "image" in body and isinstance(body["image"], list) and len(body["image"]) > 0 ): image_path = body["image"][0] # Get the first image from the list else: image_path = None logging.info(prompt) logging.info(image_path) self.validate_token(access_token) self.load_model(access_token) logging.info("Model loaded successfully") if image_path: image = Image.open(image_path) # Open the image using PIL inputs = self.processor( images=image, text=prompt, return_tensors="pt" ).to(self.model.device) else: inputs = self.processor(text=prompt, return_tensors="pt").to( self.model.device ) output = self.model.generate(**inputs, max_new_tokens=30) return self.processor.decode(output[0], skip_special_tokens=True) except Exception as e: logging.error(f"Failed to process request: {e}") return f"Error: {e}" if __name__ == "__main__": pipe = Pipe() # Example usage result = pipe.pipe({"prompt": "Describe this image", "image": "path/to/image.jpg"}) print(result)