add readme
Browse files- README.md +1 -0
- torch-ext/sage_attention/core.py +1 -1
README.md
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
This is a build of the [SageAttention](https://github.com/thu-ml/SageAttention) compatible with kernels library
|
torch-ext/sage_attention/core.py
CHANGED
|
@@ -116,7 +116,7 @@ def sageattn(
|
|
| 116 |
- The tensors `q`, `k`, and `v` must have the dtype ``torch.float16`` or ``torch.bfloat16``
|
| 117 |
- All tensors must be on the same cuda device.
|
| 118 |
"""
|
| 119 |
-
|
| 120 |
arch = get_cuda_arch_versions()[q.device.index]
|
| 121 |
if arch == "sm80":
|
| 122 |
return sageattn_qk_int8_pv_fp16_cuda(
|
|
|
|
| 116 |
- The tensors `q`, `k`, and `v` must have the dtype ``torch.float16`` or ``torch.bfloat16``
|
| 117 |
- All tensors must be on the same cuda device.
|
| 118 |
"""
|
| 119 |
+
print("########tensor_layout", tensor_layout)
|
| 120 |
arch = get_cuda_arch_versions()[q.device.index]
|
| 121 |
if arch == "sm80":
|
| 122 |
return sageattn_qk_int8_pv_fp16_cuda(
|