Namespace(annotations_path='/projects/nlp_mgr-AUDIT/data/dataset/sxk199/mscoco/annotations', features_path='/projects/nlp_mgr-AUDIT/data/dataset/sxk199/mscoco/resnet101_faster_rcnn_genome_imgfeats', dataset='mscoco', masked_splits=None, dataset_splits_dir=None, from_pretrained='/home/sxk199/mnt/volta/cFGANaAtmN', config_file='/home/sxk199/code/multimodal-gender-bias/src/volta/config/original_lxmert.json', resume_file='', output_dir='/home/sxk199/experiments-output/multimodal/pretrain/lxmert_original', logdir='logs/mscoco', timestamp=True, max_seq_length=20, train_batch_size=256, learning_rate=5e-05, grad_acc_steps=1, num_train_epochs=1.0, p_neutral_cap=0.15, warmup_proportion=0.05, warmup_steps=None, last_epoch=None, seed=42, local_rank=-1, num_workers=25, distributed=False, objective=1, adam_epsilon=1e-06, adam_betas=[0.9, 0.999], weight_decay=0.01, clip_grad_norm=1.0, use_wandb=True, wandb_entity='lcp', wandb_run_name='lxmert_original_train') { "add_global_imgfeat": null, "attention_probs_dropout_prob": 0.1, "bert_layer2attn_sublayer": { "0": 0, "1": 2, "10": 22, "11": 25, "2": 4, "3": 6, "4": 8, "5": 10, "6": 12, "7": 14, "8": 16, "9": 19 }, "bert_layer2ff_sublayer": { "0": 1, "1": 3, "10": 23, "11": 26, "2": 5, "3": 7, "4": 9, "5": 11, "6": 13, "7": 15, "8": 17, "9": 20 }, "bert_model": "bert-base-uncased", "clf_hidden_size": 1536, "do_lower_case": true, "fixed_embs": null, "fixed_layers": [], "fusion_act": "relu", "fusion_method": "text", "has_mapping": "linear", "has_mapping_bias": false, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "image_embeddings": "lxmert", "image_head_ln": true, "initializer_range": 0.02, "intermediate_size": 3072, "itm_dim": 2, "layer_norm_eps": 1e-12, "load_x_model": false, "m_encoder": null, "m_layer": 0, "max_position_embeddings": 512, "model": "bert", "norm_embeddings": false, "num_attention_heads": 12, "num_locs": 4, "objective": 0, "pad_token_id": 0, "pooler_size": 768, "qa_num_answers": 9500, "qa_task_weight": 1, "shared_sublayers": [ 18, 21, 24, 27, 30 ], "single_ln_sublayers": [], "sublayer2attn_hidden_size": {}, "sublayer2intermediate_size": {}, "sublayer2num_attention_heads": {}, "sublayer2v_attn_hidden_size": {}, "sublayer2v_intermediate_size": {}, "sublayer2v_num_attention_heads": {}, "t_ff_sublayers": [ 1, 3, 5, 7, 9, 11, 13, 15, 17, 20, 23, 26, 29, 32 ], "tt_attn_sublayers": [ 0, 2, 4, 6, 8, 10, 12, 14, 16, 19, 22, 25, 28, 31 ], "tv_attn_sublayers": [ 18, 21, 24, 27, 30 ], "type_vocab_size": 2, "v_attention_probs_dropout_prob": 0.1, "v_coordinate_embeddings_dim": null, "v_feature_size": 2048, "v_ff_sublayers": [ 1, 3, 5, 7, 9, 20, 23, 26, 29, 32 ], "v_hidden_act": "gelu", "v_hidden_dropout_prob": 0.1, "v_hidden_size": 768, "v_initializer_range": 0.02, "v_intermediate_size": 3072, "v_layers": [], "v_num_attention_heads": 12, "v_pooler_size": 1024, "visual_target_weights": { "3": 6.667, "4": 6.667, "5": 6.667 }, "visualization": false, "vocab_size": 30522, "vt_attn_sublayers": [ 18, 21, 24, 27, 30 ], "vv_attn_sublayers": [ 0, 2, 4, 6, 8, 19, 22, 25, 28, 31 ] }