Spaces:
Runtime error
Runtime error
| model: | |
| arch: mm_gpt4 | |
| model_type: pretrain_vicuna | |
| freeze_imagebind: True | |
| freeze_qformer: False | |
| max_txt_len: 160 | |
| end_sym: "###" | |
| low_resource: False | |
| prompt_path: "prompts/alignment.txt" | |
| prompt_template: '###Human: {} ###Assistant: ' | |
| ckpt: [ | |
| "bubogpt/output/mmgpt4_stage2_mm_blipvision_13b/20230701204/checkpoint_4.pth", | |
| ] | |
| with_bind_head: False | |
| use_blip_vision: True | |
| proj_model: "checkpoints/prerained_minigpt4_13b.pth" | |
| llama_model: "/mnt/bn/bykang/chixma/data/pretrained_models/vicuna-13b-v0/" | |
| joiner_cfg: | |
| # NOTE: uncomment below to share qformer across modalities | |
| # share_key: vision | |
| vision: | |
| feat_dim: 1408 | |
| post_dims: [768,] | |
| num_query_token: 32 | |
| freeze_qformer: True | |
| audio: | |
| feat_dim: 768 | |
| datasets: | |
| default: # Double check | |
| vis_processor: | |
| eval: | |
| name: "imagebind_vision_eval" | |
| image_size: 224 | |
| text_processor: | |
| eval: | |
| name: "imagebind_caption" | |
| audio_processor: | |
| eval: | |
| name: "imagebind_audio_eval" | |
| # d2c18 | |
| # clip_duration: 2 | |
| # clips_per_video: 18 | |
| # d5c6 | |
| use_global: True | |
| clip_duration: 5 | |
| clips_per_video: 6 | |
| run: | |
| task: image_text_pretrain | |
| evaluate: True | |