dacorvo HF Staff commited on
Commit
1fdf53e
·
verified ·
1 Parent(s): 5531e20

Update inference-cache-config/trn1/granite.json

Browse files
inference-cache-config/trn1/granite.json CHANGED
@@ -62,5 +62,13 @@
62
  "tensor_parallel_size": 8,
63
  "instance_type" : "trn1"
64
  }
 
 
 
 
 
 
 
 
65
  ]
66
  }
 
62
  "tensor_parallel_size": 8,
63
  "instance_type" : "trn1"
64
  }
65
+ ],
66
+ "ibm-granite/granite-3.3-8b-instruct": [
67
+ {
68
+ "batch_size": 1,
69
+ "sequence_length": 16384,
70
+ "tensor_parallel_size": 8,
71
+ "instance_type" : "trn1"
72
+ }
73
  ]
74
  }