| import transformers | |
| from transformers import (Qwen2Config, Qwen2ForCausalLM, SiglipVisionConfig, | |
| SiglipVisionModel, PretrainedConfig) | |
| from transformers import AutoConfig | |
| import copy | |
| class FlashVLStaticConfig(PretrainedConfig): | |
| model_type = 'FlashVLStaticConfig' | |
| is_composition = True | |
| def __init__( | |
| self, | |
| vision_config=dict(model_type='siglip_vision_model'), | |
| llm_config=dict(architectures=['Qwen2ForCausalLM']), | |
| **kwargs | |
| ): | |
| super().__init__(**kwargs) | |
| self.vision_config = SiglipVisionConfig(**vision_config) | |
| self.llm_config = Qwen2Config(**llm_config) | |
| def to_dict(self): | |
| output = copy.deepcopy(self.__dict__) | |
| output['vision_config'] = self.vision_config.to_dict() | |
| output['llm_config'] = self.llm_config.to_dict() | |
| return output | |