Update README.md
Browse files[DOC] update vllm startup command
README.md
CHANGED
|
@@ -58,6 +58,11 @@ see [Official vLLM Deepseek-V3.2 Guide](https://docs.vllm.ai/projects/recipes/en
|
|
| 58 |
|
| 59 |
```
|
| 60 |
export VLLM_USE_DEEP_GEMM=0 # ATM, this line is a "must" for Hopper devices
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
CONTEXT_LENGTH=32768
|
| 62 |
vllm serve \
|
| 63 |
__YOUR_PATH__/QuantTrio/DeepSeek-V3.2-AWQ \
|
|
|
|
| 58 |
|
| 59 |
```
|
| 60 |
export VLLM_USE_DEEP_GEMM=0 # ATM, this line is a "must" for Hopper devices
|
| 61 |
+
export TORCH_ALLOW_TF32_CUBLAS_OVERRIDE=1
|
| 62 |
+
export VLLM_USE_FLASHINFER_MOE_FP16=1
|
| 63 |
+
export VLLM_USE_FLASHINFER_SAMPLER=0
|
| 64 |
+
export OMP_NUM_THREADS=4
|
| 65 |
+
|
| 66 |
CONTEXT_LENGTH=32768
|
| 67 |
vllm serve \
|
| 68 |
__YOUR_PATH__/QuantTrio/DeepSeek-V3.2-AWQ \
|