kernelpool commited on
Commit
757f137
·
verified ·
1 Parent(s): 0badec7

Add files using upload-large-folder tool

Browse files
README.md CHANGED
@@ -2,9 +2,9 @@
2
  license: mit
3
  pipeline_tag: text-generation
4
  library_name: mlx
5
- base_model: moonshotai/Kimi-Linear-48B-A3B-Instruct
6
  tags:
7
  - mlx
 
8
  ---
9
 
10
  # mlx-community/Kimi-Linear-48B-A3B-Instruct-8bit
 
2
  license: mit
3
  pipeline_tag: text-generation
4
  library_name: mlx
 
5
  tags:
6
  - mlx
7
+ base_model: moonshotai/Kimi-Linear-48B-A3B-Instruct
8
  ---
9
 
10
  # mlx-community/Kimi-Linear-48B-A3B-Instruct-8bit
model-00001-of-00010.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65e0033ca7270344f647017cf7d1d7f976ba81185205d9e045fe00ad533f2da6
3
- size 5134894221
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fba1f96929c5a64a530bec39d458b1d75e3a8b4a2563ed2e6db4037a9d88c0ea
3
+ size 5134894271
model-00002-of-00010.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e5ad3385dcd775c0bd1f2d8b09c131655862f3728c5cbfa3ae4c224ec56f379
3
- size 5284444549
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3fb508aeda057d3c401f017355631a99908b559eb6f97e6c2468c1d0dba1145
3
+ size 5284444567
model-00003-of-00010.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf1517a248f2384f1fff5c967fa68dd280a374d40e3b8bcee9674db5941596c6
3
- size 5223130628
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:061631ab19d4471ccb3ebc986b24d7fbfffc8082b1f00c6ea1c000d284bc053d
3
+ size 5223130674
model-00004-of-00010.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b35008d906ba2e4a6f150a92db3ba4d8686f05dbd5d3b3aeaad484556cfb008c
3
- size 5273335229
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a803fae387e4bac59871354d296341971ef018f8fe1c2093c73e58800c2635cf
3
+ size 5273335273
model-00005-of-00010.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b32a6972e8c919ac3e50d4eff68b79f563d1e7ef1b0ccb246d4b0a29a2223d42
3
- size 5284444701
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a81405870982a8c3162c2de832e2dde61c0ced11cbcf41a2f46f5289978cf632
3
+ size 5284444719
model-00006-of-00010.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72fc8748ab6022923e3728691e81f7c241378999ebb573f4ac62456954dcdfa6
3
- size 5223130754
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c044009c311faa489c31b7cd7442b2098ea2aabf95033d5f96706343e2d3b1e
3
+ size 5223130790
model-00007-of-00010.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:125d9202d0a93df77109cbca6d9b13e00f6b078e12413f72659c7c2f95ce65fb
3
- size 5273335249
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15b7e7e35b4c6d1af49410205787b790e2809a0fa299870aecb856163260c083
3
+ size 5273335293
model-00008-of-00010.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15f79195e3ad13a5c0b71227f518b53b910a9e36f8eb73f5f051f6c7d24f7b84
3
- size 5284444735
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e57c3796b3ecc1e3964ded2b88067799609a9eb544873694b9a41c91ffff037d
3
+ size 5284444753
model-00009-of-00010.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7c79465b89d0e189d030d157053734d96050c53904cfa77f95b64e162556bdf
3
- size 5223130706
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07633e50a3f4a8c68689cb3ce6dcae1f42e584af4cd72d0b42f32dc2bc76b428
3
+ size 5223130744
model-00010-of-00010.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb3791eb5cf69ff5f09248234a02ba363994a71e91aa99c2e3d2b1f091613478
3
- size 4990018635
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b18a4830f9d00bb608033374ac7e92de091d628ee19a7cd91b05ef12acd360bc
3
+ size 4990018667
model.safetensors.index.json CHANGED
@@ -192,10 +192,10 @@
192
  "model.layers.11.mlp.switch_mlp.up_proj.scales": "model-00005-of-00010.safetensors",
193
  "model.layers.11.mlp.switch_mlp.up_proj.weight": "model-00005-of-00010.safetensors",
194
  "model.layers.11.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
195
- "model.layers.11.self_attn.kv_a_norm.weight": "model-00004-of-00010.safetensors",
196
- "model.layers.11.self_attn.kv_a_proj.biases": "model-00004-of-00010.safetensors",
197
- "model.layers.11.self_attn.kv_a_proj.scales": "model-00004-of-00010.safetensors",
198
- "model.layers.11.self_attn.kv_a_proj.weight": "model-00004-of-00010.safetensors",
199
  "model.layers.11.self_attn.kv_b_proj.biases": "model-00004-of-00010.safetensors",
200
  "model.layers.11.self_attn.kv_b_proj.scales": "model-00004-of-00010.safetensors",
201
  "model.layers.11.self_attn.kv_b_proj.weight": "model-00004-of-00010.safetensors",
@@ -400,10 +400,10 @@
400
  "model.layers.15.mlp.switch_mlp.up_proj.scales": "model-00006-of-00010.safetensors",
401
  "model.layers.15.mlp.switch_mlp.up_proj.weight": "model-00006-of-00010.safetensors",
402
  "model.layers.15.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
403
- "model.layers.15.self_attn.kv_a_norm.weight": "model-00006-of-00010.safetensors",
404
- "model.layers.15.self_attn.kv_a_proj.biases": "model-00006-of-00010.safetensors",
405
- "model.layers.15.self_attn.kv_a_proj.scales": "model-00006-of-00010.safetensors",
406
- "model.layers.15.self_attn.kv_a_proj.weight": "model-00006-of-00010.safetensors",
407
  "model.layers.15.self_attn.kv_b_proj.biases": "model-00006-of-00010.safetensors",
408
  "model.layers.15.self_attn.kv_b_proj.scales": "model-00006-of-00010.safetensors",
409
  "model.layers.15.self_attn.kv_b_proj.weight": "model-00006-of-00010.safetensors",
@@ -608,10 +608,10 @@
608
  "model.layers.19.mlp.switch_mlp.up_proj.scales": "model-00008-of-00010.safetensors",
609
  "model.layers.19.mlp.switch_mlp.up_proj.weight": "model-00008-of-00010.safetensors",
610
  "model.layers.19.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
611
- "model.layers.19.self_attn.kv_a_norm.weight": "model-00007-of-00010.safetensors",
612
- "model.layers.19.self_attn.kv_a_proj.biases": "model-00007-of-00010.safetensors",
613
- "model.layers.19.self_attn.kv_a_proj.scales": "model-00007-of-00010.safetensors",
614
- "model.layers.19.self_attn.kv_a_proj.weight": "model-00007-of-00010.safetensors",
615
  "model.layers.19.self_attn.kv_b_proj.biases": "model-00007-of-00010.safetensors",
616
  "model.layers.19.self_attn.kv_b_proj.scales": "model-00007-of-00010.safetensors",
617
  "model.layers.19.self_attn.kv_b_proj.weight": "model-00007-of-00010.safetensors",
@@ -873,10 +873,10 @@
873
  "model.layers.23.mlp.switch_mlp.up_proj.scales": "model-00009-of-00010.safetensors",
874
  "model.layers.23.mlp.switch_mlp.up_proj.weight": "model-00009-of-00010.safetensors",
875
  "model.layers.23.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
876
- "model.layers.23.self_attn.kv_a_norm.weight": "model-00009-of-00010.safetensors",
877
- "model.layers.23.self_attn.kv_a_proj.biases": "model-00009-of-00010.safetensors",
878
- "model.layers.23.self_attn.kv_a_proj.scales": "model-00009-of-00010.safetensors",
879
- "model.layers.23.self_attn.kv_a_proj.weight": "model-00009-of-00010.safetensors",
880
  "model.layers.23.self_attn.kv_b_proj.biases": "model-00009-of-00010.safetensors",
881
  "model.layers.23.self_attn.kv_b_proj.scales": "model-00009-of-00010.safetensors",
882
  "model.layers.23.self_attn.kv_b_proj.weight": "model-00009-of-00010.safetensors",
@@ -1024,10 +1024,10 @@
1024
  "model.layers.26.mlp.switch_mlp.up_proj.scales": "model-00010-of-00010.safetensors",
1025
  "model.layers.26.mlp.switch_mlp.up_proj.weight": "model-00010-of-00010.safetensors",
1026
  "model.layers.26.post_attention_layernorm.weight": "model-00010-of-00010.safetensors",
1027
- "model.layers.26.self_attn.kv_a_norm.weight": "model-00010-of-00010.safetensors",
1028
- "model.layers.26.self_attn.kv_a_proj.biases": "model-00010-of-00010.safetensors",
1029
- "model.layers.26.self_attn.kv_a_proj.scales": "model-00010-of-00010.safetensors",
1030
- "model.layers.26.self_attn.kv_a_proj.weight": "model-00010-of-00010.safetensors",
1031
  "model.layers.26.self_attn.kv_b_proj.biases": "model-00010-of-00010.safetensors",
1032
  "model.layers.26.self_attn.kv_b_proj.scales": "model-00010-of-00010.safetensors",
1033
  "model.layers.26.self_attn.kv_b_proj.weight": "model-00010-of-00010.safetensors",
@@ -1061,10 +1061,10 @@
1061
  "model.layers.3.mlp.switch_mlp.up_proj.scales": "model-00002-of-00010.safetensors",
1062
  "model.layers.3.mlp.switch_mlp.up_proj.weight": "model-00002-of-00010.safetensors",
1063
  "model.layers.3.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
1064
- "model.layers.3.self_attn.kv_a_norm.weight": "model-00001-of-00010.safetensors",
1065
- "model.layers.3.self_attn.kv_a_proj.biases": "model-00001-of-00010.safetensors",
1066
- "model.layers.3.self_attn.kv_a_proj.scales": "model-00001-of-00010.safetensors",
1067
- "model.layers.3.self_attn.kv_a_proj.weight": "model-00001-of-00010.safetensors",
1068
  "model.layers.3.self_attn.kv_b_proj.biases": "model-00001-of-00010.safetensors",
1069
  "model.layers.3.self_attn.kv_b_proj.scales": "model-00001-of-00010.safetensors",
1070
  "model.layers.3.self_attn.kv_b_proj.weight": "model-00001-of-00010.safetensors",
@@ -1269,10 +1269,10 @@
1269
  "model.layers.7.mlp.switch_mlp.up_proj.scales": "model-00003-of-00010.safetensors",
1270
  "model.layers.7.mlp.switch_mlp.up_proj.weight": "model-00003-of-00010.safetensors",
1271
  "model.layers.7.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
1272
- "model.layers.7.self_attn.kv_a_norm.weight": "model-00003-of-00010.safetensors",
1273
- "model.layers.7.self_attn.kv_a_proj.biases": "model-00003-of-00010.safetensors",
1274
- "model.layers.7.self_attn.kv_a_proj.scales": "model-00003-of-00010.safetensors",
1275
- "model.layers.7.self_attn.kv_a_proj.weight": "model-00003-of-00010.safetensors",
1276
  "model.layers.7.self_attn.kv_b_proj.biases": "model-00003-of-00010.safetensors",
1277
  "model.layers.7.self_attn.kv_b_proj.scales": "model-00003-of-00010.safetensors",
1278
  "model.layers.7.self_attn.kv_b_proj.weight": "model-00003-of-00010.safetensors",
 
192
  "model.layers.11.mlp.switch_mlp.up_proj.scales": "model-00005-of-00010.safetensors",
193
  "model.layers.11.mlp.switch_mlp.up_proj.weight": "model-00005-of-00010.safetensors",
194
  "model.layers.11.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
195
+ "model.layers.11.self_attn.kv_a_layernorm.weight": "model-00004-of-00010.safetensors",
196
+ "model.layers.11.self_attn.kv_a_proj_with_mqa.biases": "model-00004-of-00010.safetensors",
197
+ "model.layers.11.self_attn.kv_a_proj_with_mqa.scales": "model-00004-of-00010.safetensors",
198
+ "model.layers.11.self_attn.kv_a_proj_with_mqa.weight": "model-00004-of-00010.safetensors",
199
  "model.layers.11.self_attn.kv_b_proj.biases": "model-00004-of-00010.safetensors",
200
  "model.layers.11.self_attn.kv_b_proj.scales": "model-00004-of-00010.safetensors",
201
  "model.layers.11.self_attn.kv_b_proj.weight": "model-00004-of-00010.safetensors",
 
400
  "model.layers.15.mlp.switch_mlp.up_proj.scales": "model-00006-of-00010.safetensors",
401
  "model.layers.15.mlp.switch_mlp.up_proj.weight": "model-00006-of-00010.safetensors",
402
  "model.layers.15.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
403
+ "model.layers.15.self_attn.kv_a_layernorm.weight": "model-00006-of-00010.safetensors",
404
+ "model.layers.15.self_attn.kv_a_proj_with_mqa.biases": "model-00006-of-00010.safetensors",
405
+ "model.layers.15.self_attn.kv_a_proj_with_mqa.scales": "model-00006-of-00010.safetensors",
406
+ "model.layers.15.self_attn.kv_a_proj_with_mqa.weight": "model-00006-of-00010.safetensors",
407
  "model.layers.15.self_attn.kv_b_proj.biases": "model-00006-of-00010.safetensors",
408
  "model.layers.15.self_attn.kv_b_proj.scales": "model-00006-of-00010.safetensors",
409
  "model.layers.15.self_attn.kv_b_proj.weight": "model-00006-of-00010.safetensors",
 
608
  "model.layers.19.mlp.switch_mlp.up_proj.scales": "model-00008-of-00010.safetensors",
609
  "model.layers.19.mlp.switch_mlp.up_proj.weight": "model-00008-of-00010.safetensors",
610
  "model.layers.19.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
611
+ "model.layers.19.self_attn.kv_a_layernorm.weight": "model-00007-of-00010.safetensors",
612
+ "model.layers.19.self_attn.kv_a_proj_with_mqa.biases": "model-00007-of-00010.safetensors",
613
+ "model.layers.19.self_attn.kv_a_proj_with_mqa.scales": "model-00007-of-00010.safetensors",
614
+ "model.layers.19.self_attn.kv_a_proj_with_mqa.weight": "model-00007-of-00010.safetensors",
615
  "model.layers.19.self_attn.kv_b_proj.biases": "model-00007-of-00010.safetensors",
616
  "model.layers.19.self_attn.kv_b_proj.scales": "model-00007-of-00010.safetensors",
617
  "model.layers.19.self_attn.kv_b_proj.weight": "model-00007-of-00010.safetensors",
 
873
  "model.layers.23.mlp.switch_mlp.up_proj.scales": "model-00009-of-00010.safetensors",
874
  "model.layers.23.mlp.switch_mlp.up_proj.weight": "model-00009-of-00010.safetensors",
875
  "model.layers.23.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
876
+ "model.layers.23.self_attn.kv_a_layernorm.weight": "model-00009-of-00010.safetensors",
877
+ "model.layers.23.self_attn.kv_a_proj_with_mqa.biases": "model-00009-of-00010.safetensors",
878
+ "model.layers.23.self_attn.kv_a_proj_with_mqa.scales": "model-00009-of-00010.safetensors",
879
+ "model.layers.23.self_attn.kv_a_proj_with_mqa.weight": "model-00009-of-00010.safetensors",
880
  "model.layers.23.self_attn.kv_b_proj.biases": "model-00009-of-00010.safetensors",
881
  "model.layers.23.self_attn.kv_b_proj.scales": "model-00009-of-00010.safetensors",
882
  "model.layers.23.self_attn.kv_b_proj.weight": "model-00009-of-00010.safetensors",
 
1024
  "model.layers.26.mlp.switch_mlp.up_proj.scales": "model-00010-of-00010.safetensors",
1025
  "model.layers.26.mlp.switch_mlp.up_proj.weight": "model-00010-of-00010.safetensors",
1026
  "model.layers.26.post_attention_layernorm.weight": "model-00010-of-00010.safetensors",
1027
+ "model.layers.26.self_attn.kv_a_layernorm.weight": "model-00010-of-00010.safetensors",
1028
+ "model.layers.26.self_attn.kv_a_proj_with_mqa.biases": "model-00010-of-00010.safetensors",
1029
+ "model.layers.26.self_attn.kv_a_proj_with_mqa.scales": "model-00010-of-00010.safetensors",
1030
+ "model.layers.26.self_attn.kv_a_proj_with_mqa.weight": "model-00010-of-00010.safetensors",
1031
  "model.layers.26.self_attn.kv_b_proj.biases": "model-00010-of-00010.safetensors",
1032
  "model.layers.26.self_attn.kv_b_proj.scales": "model-00010-of-00010.safetensors",
1033
  "model.layers.26.self_attn.kv_b_proj.weight": "model-00010-of-00010.safetensors",
 
1061
  "model.layers.3.mlp.switch_mlp.up_proj.scales": "model-00002-of-00010.safetensors",
1062
  "model.layers.3.mlp.switch_mlp.up_proj.weight": "model-00002-of-00010.safetensors",
1063
  "model.layers.3.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
1064
+ "model.layers.3.self_attn.kv_a_layernorm.weight": "model-00001-of-00010.safetensors",
1065
+ "model.layers.3.self_attn.kv_a_proj_with_mqa.biases": "model-00001-of-00010.safetensors",
1066
+ "model.layers.3.self_attn.kv_a_proj_with_mqa.scales": "model-00001-of-00010.safetensors",
1067
+ "model.layers.3.self_attn.kv_a_proj_with_mqa.weight": "model-00001-of-00010.safetensors",
1068
  "model.layers.3.self_attn.kv_b_proj.biases": "model-00001-of-00010.safetensors",
1069
  "model.layers.3.self_attn.kv_b_proj.scales": "model-00001-of-00010.safetensors",
1070
  "model.layers.3.self_attn.kv_b_proj.weight": "model-00001-of-00010.safetensors",
 
1269
  "model.layers.7.mlp.switch_mlp.up_proj.scales": "model-00003-of-00010.safetensors",
1270
  "model.layers.7.mlp.switch_mlp.up_proj.weight": "model-00003-of-00010.safetensors",
1271
  "model.layers.7.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
1272
+ "model.layers.7.self_attn.kv_a_layernorm.weight": "model-00003-of-00010.safetensors",
1273
+ "model.layers.7.self_attn.kv_a_proj_with_mqa.biases": "model-00003-of-00010.safetensors",
1274
+ "model.layers.7.self_attn.kv_a_proj_with_mqa.scales": "model-00003-of-00010.safetensors",
1275
+ "model.layers.7.self_attn.kv_a_proj_with_mqa.weight": "model-00003-of-00010.safetensors",
1276
  "model.layers.7.self_attn.kv_b_proj.biases": "model-00003-of-00010.safetensors",
1277
  "model.layers.7.self_attn.kv_b_proj.scales": "model-00003-of-00010.safetensors",
1278
  "model.layers.7.self_attn.kv_b_proj.weight": "model-00003-of-00010.safetensors",