openbmb
/

InfLLM-V2-Short-Dense-Base

@@ -48,8 +48,8 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
 # Load model and tokenizer
 model_id = "openbmb/InfLLM-V2-Short-Dense-Base"
-tokenizer = AutoTokenizer.from_pretrained(model_id)
-model = AutoModelForCausalLM.from_pretrained(model_id).to(device)
 # Create a prompt
 prompt = "The capital of France is"

 # Load model and tokenizer
 model_id = "openbmb/InfLLM-V2-Short-Dense-Base"
+tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained(model_id,trust_remote_code=True).to(device,dtype=torch.bfloat16)
 # Create a prompt
 prompt = "The capital of France is"