Update inference-cache-config/trn1/granite.json
Browse files
inference-cache-config/trn1/granite.json
CHANGED
|
@@ -62,5 +62,13 @@
|
|
| 62 |
"tensor_parallel_size": 8,
|
| 63 |
"instance_type" : "trn1"
|
| 64 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
]
|
| 66 |
}
|
|
|
|
| 62 |
"tensor_parallel_size": 8,
|
| 63 |
"instance_type" : "trn1"
|
| 64 |
}
|
| 65 |
+
],
|
| 66 |
+
"ibm-granite/granite-3.3-8b-instruct": [
|
| 67 |
+
{
|
| 68 |
+
"batch_size": 1,
|
| 69 |
+
"sequence_length": 16384,
|
| 70 |
+
"tensor_parallel_size": 8,
|
| 71 |
+
"instance_type" : "trn1"
|
| 72 |
+
}
|
| 73 |
]
|
| 74 |
}
|