Add files using upload-large-folder tool
Browse files- README.md +1 -1
- model-00001-of-00010.safetensors +2 -2
- model-00002-of-00010.safetensors +2 -2
- model-00003-of-00010.safetensors +2 -2
- model-00004-of-00010.safetensors +2 -2
- model-00005-of-00010.safetensors +2 -2
- model-00006-of-00010.safetensors +2 -2
- model-00007-of-00010.safetensors +2 -2
- model-00008-of-00010.safetensors +2 -2
- model-00009-of-00010.safetensors +2 -2
- model-00010-of-00010.safetensors +2 -2
- model.safetensors.index.json +28 -28
README.md
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
license: mit
|
| 3 |
pipeline_tag: text-generation
|
| 4 |
library_name: mlx
|
| 5 |
-
base_model: moonshotai/Kimi-Linear-48B-A3B-Instruct
|
| 6 |
tags:
|
| 7 |
- mlx
|
|
|
|
| 8 |
---
|
| 9 |
|
| 10 |
# mlx-community/Kimi-Linear-48B-A3B-Instruct-8bit
|
|
|
|
| 2 |
license: mit
|
| 3 |
pipeline_tag: text-generation
|
| 4 |
library_name: mlx
|
|
|
|
| 5 |
tags:
|
| 6 |
- mlx
|
| 7 |
+
base_model: moonshotai/Kimi-Linear-48B-A3B-Instruct
|
| 8 |
---
|
| 9 |
|
| 10 |
# mlx-community/Kimi-Linear-48B-A3B-Instruct-8bit
|
model-00001-of-00010.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fba1f96929c5a64a530bec39d458b1d75e3a8b4a2563ed2e6db4037a9d88c0ea
|
| 3 |
+
size 5134894271
|
model-00002-of-00010.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f3fb508aeda057d3c401f017355631a99908b559eb6f97e6c2468c1d0dba1145
|
| 3 |
+
size 5284444567
|
model-00003-of-00010.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:061631ab19d4471ccb3ebc986b24d7fbfffc8082b1f00c6ea1c000d284bc053d
|
| 3 |
+
size 5223130674
|
model-00004-of-00010.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a803fae387e4bac59871354d296341971ef018f8fe1c2093c73e58800c2635cf
|
| 3 |
+
size 5273335273
|
model-00005-of-00010.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a81405870982a8c3162c2de832e2dde61c0ced11cbcf41a2f46f5289978cf632
|
| 3 |
+
size 5284444719
|
model-00006-of-00010.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4c044009c311faa489c31b7cd7442b2098ea2aabf95033d5f96706343e2d3b1e
|
| 3 |
+
size 5223130790
|
model-00007-of-00010.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:15b7e7e35b4c6d1af49410205787b790e2809a0fa299870aecb856163260c083
|
| 3 |
+
size 5273335293
|
model-00008-of-00010.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e57c3796b3ecc1e3964ded2b88067799609a9eb544873694b9a41c91ffff037d
|
| 3 |
+
size 5284444753
|
model-00009-of-00010.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:07633e50a3f4a8c68689cb3ce6dcae1f42e584af4cd72d0b42f32dc2bc76b428
|
| 3 |
+
size 5223130744
|
model-00010-of-00010.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b18a4830f9d00bb608033374ac7e92de091d628ee19a7cd91b05ef12acd360bc
|
| 3 |
+
size 4990018667
|
model.safetensors.index.json
CHANGED
|
@@ -192,10 +192,10 @@
|
|
| 192 |
"model.layers.11.mlp.switch_mlp.up_proj.scales": "model-00005-of-00010.safetensors",
|
| 193 |
"model.layers.11.mlp.switch_mlp.up_proj.weight": "model-00005-of-00010.safetensors",
|
| 194 |
"model.layers.11.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
|
| 195 |
-
"model.layers.11.self_attn.
|
| 196 |
-
"model.layers.11.self_attn.
|
| 197 |
-
"model.layers.11.self_attn.
|
| 198 |
-
"model.layers.11.self_attn.
|
| 199 |
"model.layers.11.self_attn.kv_b_proj.biases": "model-00004-of-00010.safetensors",
|
| 200 |
"model.layers.11.self_attn.kv_b_proj.scales": "model-00004-of-00010.safetensors",
|
| 201 |
"model.layers.11.self_attn.kv_b_proj.weight": "model-00004-of-00010.safetensors",
|
|
@@ -400,10 +400,10 @@
|
|
| 400 |
"model.layers.15.mlp.switch_mlp.up_proj.scales": "model-00006-of-00010.safetensors",
|
| 401 |
"model.layers.15.mlp.switch_mlp.up_proj.weight": "model-00006-of-00010.safetensors",
|
| 402 |
"model.layers.15.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
|
| 403 |
-
"model.layers.15.self_attn.
|
| 404 |
-
"model.layers.15.self_attn.
|
| 405 |
-
"model.layers.15.self_attn.
|
| 406 |
-
"model.layers.15.self_attn.
|
| 407 |
"model.layers.15.self_attn.kv_b_proj.biases": "model-00006-of-00010.safetensors",
|
| 408 |
"model.layers.15.self_attn.kv_b_proj.scales": "model-00006-of-00010.safetensors",
|
| 409 |
"model.layers.15.self_attn.kv_b_proj.weight": "model-00006-of-00010.safetensors",
|
|
@@ -608,10 +608,10 @@
|
|
| 608 |
"model.layers.19.mlp.switch_mlp.up_proj.scales": "model-00008-of-00010.safetensors",
|
| 609 |
"model.layers.19.mlp.switch_mlp.up_proj.weight": "model-00008-of-00010.safetensors",
|
| 610 |
"model.layers.19.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
|
| 611 |
-
"model.layers.19.self_attn.
|
| 612 |
-
"model.layers.19.self_attn.
|
| 613 |
-
"model.layers.19.self_attn.
|
| 614 |
-
"model.layers.19.self_attn.
|
| 615 |
"model.layers.19.self_attn.kv_b_proj.biases": "model-00007-of-00010.safetensors",
|
| 616 |
"model.layers.19.self_attn.kv_b_proj.scales": "model-00007-of-00010.safetensors",
|
| 617 |
"model.layers.19.self_attn.kv_b_proj.weight": "model-00007-of-00010.safetensors",
|
|
@@ -873,10 +873,10 @@
|
|
| 873 |
"model.layers.23.mlp.switch_mlp.up_proj.scales": "model-00009-of-00010.safetensors",
|
| 874 |
"model.layers.23.mlp.switch_mlp.up_proj.weight": "model-00009-of-00010.safetensors",
|
| 875 |
"model.layers.23.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
|
| 876 |
-
"model.layers.23.self_attn.
|
| 877 |
-
"model.layers.23.self_attn.
|
| 878 |
-
"model.layers.23.self_attn.
|
| 879 |
-
"model.layers.23.self_attn.
|
| 880 |
"model.layers.23.self_attn.kv_b_proj.biases": "model-00009-of-00010.safetensors",
|
| 881 |
"model.layers.23.self_attn.kv_b_proj.scales": "model-00009-of-00010.safetensors",
|
| 882 |
"model.layers.23.self_attn.kv_b_proj.weight": "model-00009-of-00010.safetensors",
|
|
@@ -1024,10 +1024,10 @@
|
|
| 1024 |
"model.layers.26.mlp.switch_mlp.up_proj.scales": "model-00010-of-00010.safetensors",
|
| 1025 |
"model.layers.26.mlp.switch_mlp.up_proj.weight": "model-00010-of-00010.safetensors",
|
| 1026 |
"model.layers.26.post_attention_layernorm.weight": "model-00010-of-00010.safetensors",
|
| 1027 |
-
"model.layers.26.self_attn.
|
| 1028 |
-
"model.layers.26.self_attn.
|
| 1029 |
-
"model.layers.26.self_attn.
|
| 1030 |
-
"model.layers.26.self_attn.
|
| 1031 |
"model.layers.26.self_attn.kv_b_proj.biases": "model-00010-of-00010.safetensors",
|
| 1032 |
"model.layers.26.self_attn.kv_b_proj.scales": "model-00010-of-00010.safetensors",
|
| 1033 |
"model.layers.26.self_attn.kv_b_proj.weight": "model-00010-of-00010.safetensors",
|
|
@@ -1061,10 +1061,10 @@
|
|
| 1061 |
"model.layers.3.mlp.switch_mlp.up_proj.scales": "model-00002-of-00010.safetensors",
|
| 1062 |
"model.layers.3.mlp.switch_mlp.up_proj.weight": "model-00002-of-00010.safetensors",
|
| 1063 |
"model.layers.3.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
|
| 1064 |
-
"model.layers.3.self_attn.
|
| 1065 |
-
"model.layers.3.self_attn.
|
| 1066 |
-
"model.layers.3.self_attn.
|
| 1067 |
-
"model.layers.3.self_attn.
|
| 1068 |
"model.layers.3.self_attn.kv_b_proj.biases": "model-00001-of-00010.safetensors",
|
| 1069 |
"model.layers.3.self_attn.kv_b_proj.scales": "model-00001-of-00010.safetensors",
|
| 1070 |
"model.layers.3.self_attn.kv_b_proj.weight": "model-00001-of-00010.safetensors",
|
|
@@ -1269,10 +1269,10 @@
|
|
| 1269 |
"model.layers.7.mlp.switch_mlp.up_proj.scales": "model-00003-of-00010.safetensors",
|
| 1270 |
"model.layers.7.mlp.switch_mlp.up_proj.weight": "model-00003-of-00010.safetensors",
|
| 1271 |
"model.layers.7.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
|
| 1272 |
-
"model.layers.7.self_attn.
|
| 1273 |
-
"model.layers.7.self_attn.
|
| 1274 |
-
"model.layers.7.self_attn.
|
| 1275 |
-
"model.layers.7.self_attn.
|
| 1276 |
"model.layers.7.self_attn.kv_b_proj.biases": "model-00003-of-00010.safetensors",
|
| 1277 |
"model.layers.7.self_attn.kv_b_proj.scales": "model-00003-of-00010.safetensors",
|
| 1278 |
"model.layers.7.self_attn.kv_b_proj.weight": "model-00003-of-00010.safetensors",
|
|
|
|
| 192 |
"model.layers.11.mlp.switch_mlp.up_proj.scales": "model-00005-of-00010.safetensors",
|
| 193 |
"model.layers.11.mlp.switch_mlp.up_proj.weight": "model-00005-of-00010.safetensors",
|
| 194 |
"model.layers.11.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
|
| 195 |
+
"model.layers.11.self_attn.kv_a_layernorm.weight": "model-00004-of-00010.safetensors",
|
| 196 |
+
"model.layers.11.self_attn.kv_a_proj_with_mqa.biases": "model-00004-of-00010.safetensors",
|
| 197 |
+
"model.layers.11.self_attn.kv_a_proj_with_mqa.scales": "model-00004-of-00010.safetensors",
|
| 198 |
+
"model.layers.11.self_attn.kv_a_proj_with_mqa.weight": "model-00004-of-00010.safetensors",
|
| 199 |
"model.layers.11.self_attn.kv_b_proj.biases": "model-00004-of-00010.safetensors",
|
| 200 |
"model.layers.11.self_attn.kv_b_proj.scales": "model-00004-of-00010.safetensors",
|
| 201 |
"model.layers.11.self_attn.kv_b_proj.weight": "model-00004-of-00010.safetensors",
|
|
|
|
| 400 |
"model.layers.15.mlp.switch_mlp.up_proj.scales": "model-00006-of-00010.safetensors",
|
| 401 |
"model.layers.15.mlp.switch_mlp.up_proj.weight": "model-00006-of-00010.safetensors",
|
| 402 |
"model.layers.15.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
|
| 403 |
+
"model.layers.15.self_attn.kv_a_layernorm.weight": "model-00006-of-00010.safetensors",
|
| 404 |
+
"model.layers.15.self_attn.kv_a_proj_with_mqa.biases": "model-00006-of-00010.safetensors",
|
| 405 |
+
"model.layers.15.self_attn.kv_a_proj_with_mqa.scales": "model-00006-of-00010.safetensors",
|
| 406 |
+
"model.layers.15.self_attn.kv_a_proj_with_mqa.weight": "model-00006-of-00010.safetensors",
|
| 407 |
"model.layers.15.self_attn.kv_b_proj.biases": "model-00006-of-00010.safetensors",
|
| 408 |
"model.layers.15.self_attn.kv_b_proj.scales": "model-00006-of-00010.safetensors",
|
| 409 |
"model.layers.15.self_attn.kv_b_proj.weight": "model-00006-of-00010.safetensors",
|
|
|
|
| 608 |
"model.layers.19.mlp.switch_mlp.up_proj.scales": "model-00008-of-00010.safetensors",
|
| 609 |
"model.layers.19.mlp.switch_mlp.up_proj.weight": "model-00008-of-00010.safetensors",
|
| 610 |
"model.layers.19.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
|
| 611 |
+
"model.layers.19.self_attn.kv_a_layernorm.weight": "model-00007-of-00010.safetensors",
|
| 612 |
+
"model.layers.19.self_attn.kv_a_proj_with_mqa.biases": "model-00007-of-00010.safetensors",
|
| 613 |
+
"model.layers.19.self_attn.kv_a_proj_with_mqa.scales": "model-00007-of-00010.safetensors",
|
| 614 |
+
"model.layers.19.self_attn.kv_a_proj_with_mqa.weight": "model-00007-of-00010.safetensors",
|
| 615 |
"model.layers.19.self_attn.kv_b_proj.biases": "model-00007-of-00010.safetensors",
|
| 616 |
"model.layers.19.self_attn.kv_b_proj.scales": "model-00007-of-00010.safetensors",
|
| 617 |
"model.layers.19.self_attn.kv_b_proj.weight": "model-00007-of-00010.safetensors",
|
|
|
|
| 873 |
"model.layers.23.mlp.switch_mlp.up_proj.scales": "model-00009-of-00010.safetensors",
|
| 874 |
"model.layers.23.mlp.switch_mlp.up_proj.weight": "model-00009-of-00010.safetensors",
|
| 875 |
"model.layers.23.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
|
| 876 |
+
"model.layers.23.self_attn.kv_a_layernorm.weight": "model-00009-of-00010.safetensors",
|
| 877 |
+
"model.layers.23.self_attn.kv_a_proj_with_mqa.biases": "model-00009-of-00010.safetensors",
|
| 878 |
+
"model.layers.23.self_attn.kv_a_proj_with_mqa.scales": "model-00009-of-00010.safetensors",
|
| 879 |
+
"model.layers.23.self_attn.kv_a_proj_with_mqa.weight": "model-00009-of-00010.safetensors",
|
| 880 |
"model.layers.23.self_attn.kv_b_proj.biases": "model-00009-of-00010.safetensors",
|
| 881 |
"model.layers.23.self_attn.kv_b_proj.scales": "model-00009-of-00010.safetensors",
|
| 882 |
"model.layers.23.self_attn.kv_b_proj.weight": "model-00009-of-00010.safetensors",
|
|
|
|
| 1024 |
"model.layers.26.mlp.switch_mlp.up_proj.scales": "model-00010-of-00010.safetensors",
|
| 1025 |
"model.layers.26.mlp.switch_mlp.up_proj.weight": "model-00010-of-00010.safetensors",
|
| 1026 |
"model.layers.26.post_attention_layernorm.weight": "model-00010-of-00010.safetensors",
|
| 1027 |
+
"model.layers.26.self_attn.kv_a_layernorm.weight": "model-00010-of-00010.safetensors",
|
| 1028 |
+
"model.layers.26.self_attn.kv_a_proj_with_mqa.biases": "model-00010-of-00010.safetensors",
|
| 1029 |
+
"model.layers.26.self_attn.kv_a_proj_with_mqa.scales": "model-00010-of-00010.safetensors",
|
| 1030 |
+
"model.layers.26.self_attn.kv_a_proj_with_mqa.weight": "model-00010-of-00010.safetensors",
|
| 1031 |
"model.layers.26.self_attn.kv_b_proj.biases": "model-00010-of-00010.safetensors",
|
| 1032 |
"model.layers.26.self_attn.kv_b_proj.scales": "model-00010-of-00010.safetensors",
|
| 1033 |
"model.layers.26.self_attn.kv_b_proj.weight": "model-00010-of-00010.safetensors",
|
|
|
|
| 1061 |
"model.layers.3.mlp.switch_mlp.up_proj.scales": "model-00002-of-00010.safetensors",
|
| 1062 |
"model.layers.3.mlp.switch_mlp.up_proj.weight": "model-00002-of-00010.safetensors",
|
| 1063 |
"model.layers.3.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
|
| 1064 |
+
"model.layers.3.self_attn.kv_a_layernorm.weight": "model-00001-of-00010.safetensors",
|
| 1065 |
+
"model.layers.3.self_attn.kv_a_proj_with_mqa.biases": "model-00001-of-00010.safetensors",
|
| 1066 |
+
"model.layers.3.self_attn.kv_a_proj_with_mqa.scales": "model-00001-of-00010.safetensors",
|
| 1067 |
+
"model.layers.3.self_attn.kv_a_proj_with_mqa.weight": "model-00001-of-00010.safetensors",
|
| 1068 |
"model.layers.3.self_attn.kv_b_proj.biases": "model-00001-of-00010.safetensors",
|
| 1069 |
"model.layers.3.self_attn.kv_b_proj.scales": "model-00001-of-00010.safetensors",
|
| 1070 |
"model.layers.3.self_attn.kv_b_proj.weight": "model-00001-of-00010.safetensors",
|
|
|
|
| 1269 |
"model.layers.7.mlp.switch_mlp.up_proj.scales": "model-00003-of-00010.safetensors",
|
| 1270 |
"model.layers.7.mlp.switch_mlp.up_proj.weight": "model-00003-of-00010.safetensors",
|
| 1271 |
"model.layers.7.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
|
| 1272 |
+
"model.layers.7.self_attn.kv_a_layernorm.weight": "model-00003-of-00010.safetensors",
|
| 1273 |
+
"model.layers.7.self_attn.kv_a_proj_with_mqa.biases": "model-00003-of-00010.safetensors",
|
| 1274 |
+
"model.layers.7.self_attn.kv_a_proj_with_mqa.scales": "model-00003-of-00010.safetensors",
|
| 1275 |
+
"model.layers.7.self_attn.kv_a_proj_with_mqa.weight": "model-00003-of-00010.safetensors",
|
| 1276 |
"model.layers.7.self_attn.kv_b_proj.biases": "model-00003-of-00010.safetensors",
|
| 1277 |
"model.layers.7.self_attn.kv_b_proj.scales": "model-00003-of-00010.safetensors",
|
| 1278 |
"model.layers.7.self_attn.kv_b_proj.weight": "model-00003-of-00010.safetensors",
|