-
Notifications
You must be signed in to change notification settings - Fork 130
Open
Description
I'm using this code to test GQA:
import os
os.environ["HF_HOME"] = "..."
os.environ['TORCH_HOME'] = '...'
import sys
import json
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
sys.path.append(module_path)
from main_simple_lib import *
from tqdm import tqdm
folder_name = f'results_vipergpt'
if not os.path.exists(folder_name):
os.makedirs(folder_name)
# test my method
data_GQA = json.load(open('../visagent/sampled_GQA/sampled_data.json'))
for data in tqdm(data_GQA):
image = load_image('../visagent/sampled_GQA/' + data['imageId'] + '.jpg')
question = data['question']
print(question)
answer = data['answer']
print('reference answer:', answer)
try:
code = get_code(question)
except Exception as e:
try:
code = get_code(question)
except Exception as e:
code = get_code(question)
print(f'code:\n```\n{code}\n```')
try:
answer = execute_code(code, image, show_intermediate_steps=False)
except Exception as e:
answer = 'Runtime error: ' + str(e)
print('Answer:', answer)
print('\n')The base config:
multiprocessing: False # Run the models and samples in parallel
path_pretrained_models: './pretrained_models' # Path to the pretrained models
execute_code: False # Execute the code after generating it. Only applies to main_batch
prompt : ./prompts/benchmarks/gqa.prompt
dataset: # Dataset configuration
dataset_name: 'MyDataset' # Dataset name
data_path: 'data' # Dataset path
split: '' # Dataset split. If '', it assumes there is only one split
max_samples: # Maximum number of samples to load
batch_size: 20 # Batch size
start_sample: 0 # Start sample index. Only used if max_samples is not None
load_models:
maskrcnn: True
clip: False
glip: True
owlvit: False
tcl: False
gpt3_list: False
gpt3_qa: True
gpt3_guess: False
gpt3_general: True
depth: False
blip: True
saliency: False
codex: True
xvlm: True
detect_thresholds: # Thresholds for the models that perform detection
glip: 0.5
maskrcnn: 0.8
owlvit: 0.1
ratio_box_area_to_image_area: 0.0 # Any detected patch under this size will not be returned
crop_larger_margin: True # Increase size of crop by 10% to include more context
verify_property: # Parameters for verify_property
model: xvlm # Model to use for verify_property
thresh_clip: 0.6
thresh_tcl: 0.25
thresh_xvlm: 0.6
best_match_model: xvlm # Which model to use for best_[image, text]_match
gpt3: # GPT-3 configuration
n_votes: 1 # Number of tries to use for GPT-3. Use with temperature > 0
qa_prompt: ./prompts/gpt3/gpt3_qa.txt
guess_prompt: ./prompts/gpt3/gpt3_process_guess.txt
temperature: 0. # Temperature for GPT-3. Almost deterministic if 0
model: chatgpt # See openai.Model.list() for available models
codex:
temperature: 0. # Temperature for Codex. (Almost) deterministic if 0
best_of: 1 # Number of tries to choose from. Use when temperature > 0
max_tokens: 512 # Maximum number of tokens to generate for Codex
prompt: ./prompts/chatapi.prompt # Codex prompt file, which defines the API. (doesn't support video for now due to token limits)
model: gpt-3.5-turbo # Codex model to use. [code-davinci-002, gpt-3.5-turbo, gpt-4]. See openai.Model.list() for available models
# Saving and loading parameters
save: True # Save the results to a file
save_new_results: True # If False, overwrite the results file
results_dir: ./results/ # Directory to save the results
use_cache: True # Use cache for the models that support it (now, GPT-3)
clear_cache: False # Clear stored cache
use_cached_codex: False # Use previously-computed Codex results
cached_codex_path: '' # Path to the csv results file from which to load Codex results
log_every: 20 # Log accuracy every n batches
wandb: False # Use Weights and Biases
blip_half_precision: True # Use 8bit (Faster but slightly less accurate) for BLIP if True
blip_v2_model_type: blip2-flan-t5-xxl # Which model to use for BLIP-2
use_fixed_code: False # Use a fixed code for all samples (do not generate with Codex)
fixed_code_file: ./prompts/fixed_code/blip2.prompt # Path to the fixed code file
Running python main.py, I get the blip error on some cases:
error detectedcuBLAS API failed with status 15
A: torch.Size([257, 1408]), B: torch.Size([4224, 1408]), C: (257, 4224); (lda, ldb, ldc): (c_int(8224), c_int(135168), c_int(8224)); (m, n, k): (c_int(257), c_int(4224), c_int(1408))
Error in blip model: cublasLt ran into an error!
0 def execute_command(image) -> str:
1 image_patch = ImagePatch(image)
2 appliance_patches = image_patch.find('appliance')
3 rounded_appliances = [patch for patch in appliance_patches if patch.verify_property('appliance', 'rounded')]
4 if rounded_appliances:
5 return rounded_appliances[0].simple_query('What is this?')
6 else:
7 return 'No rounded appliance found in the image.'
Metadata
Metadata
Assignees
Labels
No labels