Skip to content

BLIP reports an error #54

@ycformal

Description

@ycformal

I'm using this code to test GQA:

import os
os.environ["HF_HOME"] = "..."
os.environ['TORCH_HOME'] = '...'
import sys
import json
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
  sys.path.append(module_path)

from main_simple_lib import *

from tqdm import tqdm

folder_name = f'results_vipergpt'
if not os.path.exists(folder_name):
    os.makedirs(folder_name)
# test my method
data_GQA = json.load(open('../visagent/sampled_GQA/sampled_data.json'))

for data in tqdm(data_GQA):
    image = load_image('../visagent/sampled_GQA/' + data['imageId'] + '.jpg')
    question = data['question']
    print(question)
    answer = data['answer']
    print('reference answer:', answer)
    try:
        code = get_code(question)
    except Exception as e:
        try:
            code = get_code(question)
        except Exception as e:
            code = get_code(question)
    print(f'code:\n```\n{code}\n```')
    try:
        answer = execute_code(code, image, show_intermediate_steps=False)
    except Exception as e:
        answer = 'Runtime error: ' + str(e)
    
    print('Answer:', answer)
    print('\n')

The base config:

multiprocessing: False                              # Run the models and samples in parallel
path_pretrained_models: './pretrained_models'       # Path to the pretrained models
execute_code: False                                 # Execute the code after generating it. Only applies to main_batch
prompt : ./prompts/benchmarks/gqa.prompt

dataset:                                            # Dataset configuration
    dataset_name: 'MyDataset'                       # Dataset name
    data_path: 'data'                               # Dataset path
    split: ''                                       # Dataset split. If '', it assumes there is only one split
    max_samples:                                    # Maximum number of samples to load
    batch_size: 20                                  # Batch size
    start_sample: 0                                 # Start sample index. Only used if max_samples is not None

load_models:
    maskrcnn: True
    clip: False
    glip: True
    owlvit: False
    tcl: False
    gpt3_list: False
    gpt3_qa: True
    gpt3_guess: False
    gpt3_general: True
    depth: False
    blip: True
    saliency: False
    codex: True
    xvlm: True

detect_thresholds:                                  # Thresholds for the models that perform detection
    glip: 0.5
    maskrcnn: 0.8
    owlvit: 0.1
ratio_box_area_to_image_area: 0.0                   # Any detected patch under this size will not be returned
crop_larger_margin: True                            # Increase size of crop by 10% to include more context

verify_property:                                    # Parameters for verify_property
    model: xvlm                                     # Model to use for verify_property
    thresh_clip: 0.6
    thresh_tcl: 0.25
    thresh_xvlm: 0.6

best_match_model: xvlm                              # Which model to use for best_[image, text]_match

gpt3:                                               # GPT-3 configuration
    n_votes: 1                                      # Number of tries to use for GPT-3. Use with temperature > 0
    qa_prompt: ./prompts/gpt3/gpt3_qa.txt
    guess_prompt: ./prompts/gpt3/gpt3_process_guess.txt
    temperature: 0.                                 # Temperature for GPT-3. Almost deterministic if 0
    model: chatgpt                         # See openai.Model.list() for available models

codex:
    temperature: 0.                                 # Temperature for Codex. (Almost) deterministic if 0
    best_of: 1                                      # Number of tries to choose from. Use when temperature > 0
    max_tokens: 512                                 # Maximum number of tokens to generate for Codex
    prompt: ./prompts/chatapi.prompt                # Codex prompt file, which defines the API. (doesn't support video for now due to token limits)
    model: gpt-3.5-turbo                            # Codex model to use. [code-davinci-002, gpt-3.5-turbo, gpt-4]. See openai.Model.list() for available models

# Saving and loading parameters
save: True                                          # Save the results to a file
save_new_results: True                              # If False, overwrite the results file
results_dir: ./results/                             # Directory to save the results
use_cache: True                                     # Use cache for the models that support it (now, GPT-3)
clear_cache: False                                  # Clear stored cache
use_cached_codex: False                             # Use previously-computed Codex results
cached_codex_path: ''                               # Path to the csv results file from which to load Codex results
log_every: 20                                       # Log accuracy every n batches
wandb: False                                        # Use Weights and Biases

blip_half_precision: True                           # Use 8bit (Faster but slightly less accurate) for BLIP if True
blip_v2_model_type: blip2-flan-t5-xxl               # Which model to use for BLIP-2

use_fixed_code: False                               # Use a fixed code for all samples (do not generate with Codex)
fixed_code_file: ./prompts/fixed_code/blip2.prompt  # Path to the fixed code file

Running python main.py, I get the blip error on some cases:

error detectedcuBLAS API failed with status 15
A: torch.Size([257, 1408]), B: torch.Size([4224, 1408]), C: (257, 4224); (lda, ldb, ldc): (c_int(8224), c_int(135168), c_int(8224)); (m, n, k): (c_int(257), c_int(4224), c_int(1408))
Error in blip model: cublasLt ran into an error!

   0 def execute_command(image) -> str:
   1     image_patch = ImagePatch(image)
   2     appliance_patches = image_patch.find('appliance')
   3     rounded_appliances = [patch for patch in appliance_patches if patch.verify_property('appliance', 'rounded')]
   4     if rounded_appliances:
   5         return rounded_appliances[0].simple_query('What is this?')
   6     else:
   7         return 'No rounded appliance found in the image.'

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions