import torch from PIL import Image import open_clip
model, _, preprocess = open_clip.create_model_and_transforms('ViT-g-14', pretrained='laion2b_s12b_b42k') model.eval() # model in train mode by default, impacts some models with BatchNorm or stochastic depth active tokenizer = open_clip.get_tokenizer('ViT-g-14')
image = preprocess(Image.open("test/ts.jpg")).unsqueeze(0) text = tokenizer(["a picture of taylor swift", "a picture of AOC"])
url = 'http://localhost:8000/classify/' files = {'images': open('path_to_image.jpg', 'rb')} data = {'prompts': ['a picture of taylor swift', 'a picture of AOC']}
from fastapi import FastAPI, File, UploadFile, HTTPException from fastapi.responses import JSONResponse import torch from PIL import Image import open_clip from io import BytesIO import numpy as np
app = FastAPI()
# Load the CLIP model model, _, preprocess = open_clip.create_model_and_transforms('ViT-g-14', pretrained='laion2b_s12b_b42k') model.eval() tokenizer = open_clip.get_tokenizer('ViT-g-14')
@app.post("/classify/") asyncdefclassify_images(images: list[UploadFile] = File(...), prompts: list[str]): ifnot images ornot prompts: raise HTTPException(status_code=400, detail="Images and prompts cannot be empty.")
try: # Tokenize the prompts once for all images text_tokens = tokenizer(prompts)
# Load and preprocess all images, then stack into a single tensor image_tensors = [] for image_file in images: pil_image = Image.open(BytesIO(await image_file.read())) processed_image = preprocess(pil_image) image_tensors.append(processed_image) image_batch = torch.stack(image_tensors)
# Perform inference in a single pass with torch.no_grad(), torch.cuda.amp.autocast(): image_features = model.encode_image(image_batch) text_features = model.encode_text(text_tokens) image_features /= image_features.norm(dim=-1, keepdim=True) text_features /= text_features.norm(dim=-1, keepdim=True)
return JSONResponse(content={"results": results}) except Exception as e: raise HTTPException(status_code=500, detail=str(e))
if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8000)
步骤 2:如何测试修改后的服务
要测试该服务,你可以编写一个脚本,将 base 64 编码的图像与提示词一起作为 JSON 发送。以下是如何使用 python 的 requests 库进行操作:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
import requests import base64
url = 'http://localhost:8000/classify/'
# Encode the image to base64 defencode_image_to_base64(filepath): withopen(filepath, "rb") as image_file: return base64.b64encode(image_file.read()).decode('utf-8')
# Prepare data data = { "images": [encode_image_to_base64("path_to_image1.jpg"), encode_image_to_base64("path_to_image2.jpg")], "prompts": ["a picture of taylor swift", "a picture of AOC"] }
import requests import base64 from PIL import Image from io import BytesIO
url = 'http://localhost:8000/classify/'
# Function to encode a PIL image to base64 defencode_pil_image_to_base64(pil_img): buffered = BytesIO() pil_img.save(buffered, format="JPEG") # You can change the format to PNG or other supported types depending on the image return base64.b64encode(buffered.getvalue()).decode('utf-8')
# Load or create a PIL image (example) image1 = Image.open("path_to_image1.jpg") # Assume this is how you might have loaded the image image2 = Image.open("path_to_image2.jpg") # Another example image
# Prepare data with the images already loaded into PIL data = { "images": [encode_pil_image_to_base64(image1), encode_pil_image_to_base64(image2)], "prompts": ["a picture of taylor swift", "a picture of AOC"] }