Skip to content

Commit 84a71b3

Browse files
committed
ggml-cpu: refactor repack, format
1 parent d09f5df commit 84a71b3

2 files changed

Lines changed: 258 additions & 206 deletions

File tree

ggml/src/ggml-cpu/arch-fallback.h

Lines changed: 24 additions & 124 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,6 @@
3636
#define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
3737
#define ggml_quantize_mat_q8_K_4x4_generic ggml_quantize_mat_q8_K_4x4
3838
#define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
39-
#define ggml_repack_mat_f16_7x1_generic ggml_repack_mat_f16_7x1
40-
#define ggml_repack_mat_f32_7x1_generic ggml_repack_mat_f32_7x1
4139
#define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
4240
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
4341
#define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0
@@ -49,14 +47,8 @@
4947
#define ggml_gemv_q6_K_8x8_q8_K_generic ggml_gemv_q6_K_8x8_q8_K
5048
#define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
5149
#define ggml_gemv_iq4_nl_8x8_q8_0_generic ggml_gemv_iq4_nl_8x8_q8_0
52-
#define ggml_gemv_f16_1x16_f16_generic ggml_gemv_f16_1x16_f16
53-
#define ggml_gemv_f16_1x32_f16_generic ggml_gemv_f16_1x32_f16
54-
#define ggml_gemv_f16_1x64_f16_generic ggml_gemv_f16_1x64_f16
55-
#define ggml_gemv_f16_1x128_f16_generic ggml_gemv_f16_1x128_f16
56-
#define ggml_gemv_f32_1x16_f32_generic ggml_gemv_f32_1x16_f32
57-
#define ggml_gemv_f32_1x32_f32_generic ggml_gemv_f32_1x32_f32
58-
#define ggml_gemv_f32_1x64_f32_generic ggml_gemv_f32_1x64_f32
59-
#define ggml_gemv_f32_1x128_f32_generic ggml_gemv_f32_1x128_f32
50+
#define ggml_gemv_q8_0_4x4_q8_0_generic ggml_gemv_q8_0_4x4_q8_0
51+
#define ggml_gemv_q8_0_4x8_q8_0_generic ggml_gemv_q8_0_4x8_q8_0
6052
#define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
6153
#define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
6254
#define ggml_gemm_q4_0_8x8_q8_0_generic ggml_gemm_q4_0_8x8_q8_0
@@ -68,74 +60,38 @@
6860
#define ggml_gemm_q6_K_8x8_q8_K_generic ggml_gemm_q6_K_8x8_q8_K
6961
#define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
7062
#define ggml_gemm_iq4_nl_8x8_q8_0_generic ggml_gemm_iq4_nl_8x8_q8_0
71-
#define ggml_gemm_f16_7x1x16_f16_generic ggml_gemm_f16_7x1x16_f16
72-
#define ggml_gemm_f16_7x1x32_f16_generic ggml_gemm_f16_7x1x32_f16
73-
#define ggml_gemm_f16_7x1x64_f16_generic ggml_gemm_f16_7x1x64_f16
74-
#define ggml_gemm_f16_7x1x128_f16_generic ggml_gemm_f16_7x1x128_f16
75-
#define ggml_gemm_f32_7x1x16_f32_generic ggml_gemm_f32_7x1x16_f32
76-
#define ggml_gemm_f32_7x1x32_f32_generic ggml_gemm_f32_7x1x32_f32
77-
#define ggml_gemm_f32_7x1x64_f32_generic ggml_gemm_f32_7x1x64_f32
78-
#define ggml_gemm_f32_7x1x128_f32_generic ggml_gemm_f32_7x1x128_f32
63+
#define ggml_gemm_q8_0_4x4_q8_0_generic ggml_gemm_q8_0_4x4_q8_0
64+
#define ggml_gemm_q8_0_4x8_q8_0_generic ggml_gemm_q8_0_4x8_q8_0
7965
#elif defined(__aarch64__) || defined(__arm__) || defined(_M_ARM) || defined(_M_ARM64)
8066
// repack.cpp
8167
#define ggml_quantize_mat_q8_K_4x4_generic ggml_quantize_mat_q8_K_4x4
8268
#define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
8369
#define ggml_gemv_iq4_nl_8x8_q8_0_generic ggml_gemv_iq4_nl_8x8_q8_0
8470
#define ggml_gemv_q2_K_8x8_q8_K_generic ggml_gemv_q2_K_8x8_q8_K
85-
#define ggml_gemv_f16_1x16_f16_generic ggml_gemv_f16_1x16_f16
86-
#define ggml_gemv_f16_1x32_f16_generic ggml_gemv_f16_1x32_f16
87-
#define ggml_gemv_f16_1x64_f16_generic ggml_gemv_f16_1x64_f16
88-
#define ggml_gemv_f16_1x128_f16_generic ggml_gemv_f16_1x128_f16
89-
#define ggml_gemv_f32_1x16_f32_generic ggml_gemv_f32_1x16_f32
90-
#define ggml_gemv_f32_1x32_f32_generic ggml_gemv_f32_1x32_f32
91-
#define ggml_gemv_f32_1x64_f32_generic ggml_gemv_f32_1x64_f32
92-
#define ggml_gemv_f32_1x128_f32_generic ggml_gemv_f32_1x128_f32
9371
#define ggml_gemm_iq4_nl_8x8_q8_0_generic ggml_gemm_iq4_nl_8x8_q8_0
9472
#define ggml_gemm_q2_K_8x8_q8_K_generic ggml_gemm_q2_K_8x8_q8_K
95-
#define ggml_gemm_f16_7x1x16_f16_generic ggml_gemm_f16_7x1x16_f16
96-
#define ggml_gemm_f16_7x1x32_f16_generic ggml_gemm_f16_7x1x32_f16
97-
#define ggml_gemm_f16_7x1x64_f16_generic ggml_gemm_f16_7x1x64_f16
98-
#define ggml_gemm_f16_7x1x128_f16_generic ggml_gemm_f16_7x1x128_f16
99-
#define ggml_gemm_f32_7x1x16_f32_generic ggml_gemm_f32_7x1x16_f32
100-
#define ggml_gemm_f32_7x1x32_f32_generic ggml_gemm_f32_7x1x32_f32
101-
#define ggml_gemm_f32_7x1x64_f32_generic ggml_gemm_f32_7x1x64_f32
102-
#define ggml_gemm_f32_7x1x128_f32_generic ggml_gemm_f32_7x1x128_f32
10373
#elif defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64)
10474
// repack.cpp
10575
#define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
10676
#define ggml_quantize_mat_q8_K_4x4_generic ggml_quantize_mat_q8_K_4x4
107-
#define ggml_repack_mat_f16_7x1_generic ggml_repack_mat_f16_7x1
108-
#define ggml_repack_mat_f32_7x1_generic ggml_repack_mat_f32_7x1
10977
#define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
11078
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
11179
#define ggml_gemv_q4_K_8x4_q8_K_generic ggml_gemv_q4_K_8x4_q8_K
11280
#define ggml_gemv_q5_K_8x8_q8_K_generic ggml_gemv_q5_K_8x8_q8_K
11381
#define ggml_gemv_q6_K_8x4_q8_K_generic ggml_gemv_q6_K_8x4_q8_K
11482
#define ggml_gemv_q6_K_8x8_q8_K_generic ggml_gemv_q6_K_8x8_q8_K
11583
#define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
116-
#define ggml_gemv_f16_1x16_f16_generic ggml_gemv_f16_1x16_f16
117-
#define ggml_gemv_f16_1x32_f16_generic ggml_gemv_f16_1x32_f16
118-
#define ggml_gemv_f16_1x64_f16_generic ggml_gemv_f16_1x64_f16
119-
#define ggml_gemv_f16_1x128_f16_generic ggml_gemv_f16_1x128_f16
120-
#define ggml_gemv_f32_1x16_f32_generic ggml_gemv_f32_1x16_f32
121-
#define ggml_gemv_f32_1x32_f32_generic ggml_gemv_f32_1x32_f32
122-
#define ggml_gemv_f32_1x64_f32_generic ggml_gemv_f32_1x64_f32
123-
#define ggml_gemv_f32_1x128_f32_generic ggml_gemv_f32_1x128_f32
84+
#define ggml_gemv_q8_0_4x4_q8_0_generic ggml_gemv_q8_0_4x4_q8_0
85+
#define ggml_gemv_q8_0_4x8_q8_0_generic ggml_gemv_q8_0_4x8_q8_0
12486
#define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
12587
#define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
12688
#define ggml_gemm_q4_K_8x4_q8_K_generic ggml_gemm_q4_K_8x4_q8_K
12789
#define ggml_gemm_q5_K_8x8_q8_K_generic ggml_gemm_q5_K_8x8_q8_K
12890
#define ggml_gemm_q6_K_8x4_q8_K_generic ggml_gemm_q6_K_8x4_q8_K
12991
#define ggml_gemm_q6_K_8x8_q8_K_generic ggml_gemm_q6_K_8x8_q8_K
13092
#define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
131-
#define ggml_gemm_f16_7x1x16_f16_generic ggml_gemm_f16_7x1x16_f16
132-
#define ggml_gemm_f16_7x1x32_f16_generic ggml_gemm_f16_7x1x32_f16
133-
#define ggml_gemm_f16_7x1x64_f16_generic ggml_gemm_f16_7x1x64_f16
134-
#define ggml_gemm_f16_7x1x128_f16_generic ggml_gemm_f16_7x1x128_f16
135-
#define ggml_gemm_f32_7x1x16_f32_generic ggml_gemm_f32_7x1x16_f32
136-
#define ggml_gemm_f32_7x1x32_f32_generic ggml_gemm_f32_7x1x32_f32
137-
#define ggml_gemm_f32_7x1x64_f32_generic ggml_gemm_f32_7x1x64_f32
138-
#define ggml_gemm_f32_7x1x128_f32_generic ggml_gemm_f32_7x1x128_f32
93+
#define ggml_gemm_q8_0_4x4_q8_0_generic ggml_gemm_q8_0_4x4_q8_0
94+
#define ggml_gemm_q8_0_4x8_q8_0_generic ggml_gemm_q8_0_4x8_q8_0
13995
#elif defined(__POWERPC__) || defined(__powerpc__)
14096
// ref: https://github.com/ggml-org/llama.cpp/pull/14146#issuecomment-2972561679
14197
// quants.c
@@ -148,8 +104,6 @@
148104
#define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
149105
#define ggml_quantize_mat_q8_K_4x4_generic ggml_quantize_mat_q8_K_4x4
150106
#define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
151-
#define ggml_repack_mat_f16_7x1_generic ggml_repack_mat_f16_7x1
152-
#define ggml_repack_mat_f32_7x1_generic ggml_repack_mat_f32_7x1
153107
#define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
154108
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
155109
#define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0
@@ -161,14 +115,8 @@
161115
#define ggml_gemv_q6_K_8x8_q8_K_generic ggml_gemv_q6_K_8x8_q8_K
162116
#define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
163117
#define ggml_gemv_iq4_nl_8x8_q8_0_generic ggml_gemv_iq4_nl_8x8_q8_0
164-
#define ggml_gemv_f16_1x16_f16_generic ggml_gemv_f16_1x16_f16
165-
#define ggml_gemv_f16_1x32_f16_generic ggml_gemv_f16_1x32_f16
166-
#define ggml_gemv_f16_1x64_f16_generic ggml_gemv_f16_1x64_f16
167-
#define ggml_gemv_f16_1x128_f16_generic ggml_gemv_f16_1x128_f16
168-
#define ggml_gemv_f32_1x16_f32_generic ggml_gemv_f32_1x16_f32
169-
#define ggml_gemv_f32_1x32_f32_generic ggml_gemv_f32_1x32_f32
170-
#define ggml_gemv_f32_1x64_f32_generic ggml_gemv_f32_1x64_f32
171-
#define ggml_gemv_f32_1x128_f32_generic ggml_gemv_f32_1x128_f32
118+
#define ggml_gemv_q8_0_4x4_q8_0_generic ggml_gemv_q8_0_4x4_q8_0
119+
#define ggml_gemv_q8_0_4x8_q8_0_generic ggml_gemv_q8_0_4x8_q8_0
172120
#define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
173121
#define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
174122
#define ggml_gemm_q4_0_8x8_q8_0_generic ggml_gemm_q4_0_8x8_q8_0
@@ -180,14 +128,8 @@
180128
#define ggml_gemm_q6_K_8x8_q8_K_generic ggml_gemm_q6_K_8x8_q8_K
181129
#define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
182130
#define ggml_gemm_iq4_nl_8x8_q8_0_generic ggml_gemm_iq4_nl_8x8_q8_0
183-
#define ggml_gemm_f16_7x1x16_f16_generic ggml_gemm_f16_7x1x16_f16
184-
#define ggml_gemm_f16_7x1x32_f16_generic ggml_gemm_f16_7x1x32_f16
185-
#define ggml_gemm_f16_7x1x64_f16_generic ggml_gemm_f16_7x1x64_f16
186-
#define ggml_gemm_f16_7x1x128_f16_generic ggml_gemm_f16_7x1x128_f16
187-
#define ggml_gemm_f32_7x1x16_f32_generic ggml_gemm_f32_7x1x16_f32
188-
#define ggml_gemm_f32_7x1x32_f32_generic ggml_gemm_f32_7x1x32_f32
189-
#define ggml_gemm_f32_7x1x64_f32_generic ggml_gemm_f32_7x1x64_f32
190-
#define ggml_gemm_f32_7x1x128_f32_generic ggml_gemm_f32_7x1x128_f32
131+
#define ggml_gemm_q8_0_4x4_q8_0_generic ggml_gemm_q8_0_4x4_q8_0
132+
#define ggml_gemm_q8_0_4x8_q8_0_generic ggml_gemm_q8_0_4x8_q8_0
191133
#elif defined(__loongarch64)
192134
// quants.c
193135
#define quantize_row_q8_K_generic quantize_row_q8_K
@@ -200,8 +142,6 @@
200142
#define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
201143
#define ggml_quantize_mat_q8_K_4x4_generic ggml_quantize_mat_q8_K_4x4
202144
#define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
203-
#define ggml_repack_mat_f16_7x1_generic ggml_repack_mat_f16_7x1
204-
#define ggml_repack_mat_f32_7x1_generic ggml_repack_mat_f32_7x1
205145
#define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
206146
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
207147
#define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0
@@ -213,14 +153,8 @@
213153
#define ggml_gemv_q6_K_8x8_q8_K_generic ggml_gemv_q6_K_8x8_q8_K
214154
#define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
215155
#define ggml_gemv_iq4_nl_8x8_q8_0_generic ggml_gemv_iq4_nl_8x8_q8_0
216-
#define ggml_gemv_f16_1x16_f16_generic ggml_gemv_f16_1x16_f16
217-
#define ggml_gemv_f16_1x32_f16_generic ggml_gemv_f16_1x32_f16
218-
#define ggml_gemv_f16_1x64_f16_generic ggml_gemv_f16_1x64_f16
219-
#define ggml_gemv_f16_1x128_f16_generic ggml_gemv_f16_1x128_f16
220-
#define ggml_gemv_f32_1x16_f32_generic ggml_gemv_f32_1x16_f32
221-
#define ggml_gemv_f32_1x32_f32_generic ggml_gemv_f32_1x32_f32
222-
#define ggml_gemv_f32_1x64_f32_generic ggml_gemv_f32_1x64_f32
223-
#define ggml_gemv_f32_1x128_f32_generic ggml_gemv_f32_1x128_f32
156+
#define ggml_gemv_q8_0_4x4_q8_0_generic ggml_gemv_q8_0_4x4_q8_0
157+
#define ggml_gemv_q8_0_4x8_q8_0_generic ggml_gemv_q8_0_4x8_q8_0
224158
#define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
225159
#define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
226160
#define ggml_gemm_q4_0_8x8_q8_0_generic ggml_gemm_q4_0_8x8_q8_0
@@ -232,14 +166,8 @@
232166
#define ggml_gemm_q6_K_8x8_q8_K_generic ggml_gemm_q6_K_8x8_q8_K
233167
#define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
234168
#define ggml_gemm_iq4_nl_8x8_q8_0_generic ggml_gemm_iq4_nl_8x8_q8_0
235-
#define ggml_gemm_f16_7x1x16_f16_generic ggml_gemm_f16_7x1x16_f16
236-
#define ggml_gemm_f16_7x1x32_f16_generic ggml_gemm_f16_7x1x32_f16
237-
#define ggml_gemm_f16_7x1x64_f16_generic ggml_gemm_f16_7x1x64_f16
238-
#define ggml_gemm_f16_7x1x128_f16_generic ggml_gemm_f16_7x1x128_f16
239-
#define ggml_gemm_f32_7x1x16_f32_generic ggml_gemm_f32_7x1x16_f32
240-
#define ggml_gemm_f32_7x1x32_f32_generic ggml_gemm_f32_7x1x32_f32
241-
#define ggml_gemm_f32_7x1x64_f32_generic ggml_gemm_f32_7x1x64_f32
242-
#define ggml_gemm_f32_7x1x128_f32_generic ggml_gemm_f32_7x1x128_f32
169+
#define ggml_gemm_q8_0_4x4_q8_0_generic ggml_gemm_q8_0_4x4_q8_0
170+
#define ggml_gemm_q8_0_4x8_q8_0_generic ggml_gemm_q8_0_4x8_q8_0
243171
#elif defined(__riscv)
244172
// quants.c
245173
#define quantize_row_q8_K_generic quantize_row_q8_K
@@ -304,8 +232,6 @@
304232
#define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
305233
#define ggml_quantize_mat_q8_K_4x4_generic ggml_quantize_mat_q8_K_4x4
306234
#define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
307-
#define ggml_repack_mat_f16_7x1_generic ggml_repack_mat_f16_7x1
308-
#define ggml_repack_mat_f32_7x1_generic ggml_repack_mat_f32_7x1
309235
#define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
310236
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
311237
#define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0
@@ -317,14 +243,8 @@
317243
#define ggml_gemv_q6_K_8x8_q8_K_generic ggml_gemv_q6_K_8x8_q8_K
318244
#define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
319245
#define ggml_gemv_iq4_nl_8x8_q8_0_generic ggml_gemv_iq4_nl_8x8_q8_0
320-
#define ggml_gemv_f16_1x16_f16_generic ggml_gemv_f16_1x16_f16
321-
#define ggml_gemv_f16_1x32_f16_generic ggml_gemv_f16_1x32_f16
322-
#define ggml_gemv_f16_1x64_f16_generic ggml_gemv_f16_1x64_f16
323-
#define ggml_gemv_f16_1x128_f16_generic ggml_gemv_f16_1x128_f16
324-
#define ggml_gemv_f32_1x16_f32_generic ggml_gemv_f32_1x16_f32
325-
#define ggml_gemv_f32_1x32_f32_generic ggml_gemv_f32_1x32_f32
326-
#define ggml_gemv_f32_1x64_f32_generic ggml_gemv_f32_1x64_f32
327-
#define ggml_gemv_f32_1x128_f32_generic ggml_gemv_f32_1x128_f32
246+
#define ggml_gemv_q8_0_4x4_q8_0_generic ggml_gemv_q8_0_4x4_q8_0
247+
#define ggml_gemv_q8_0_4x8_q8_0_generic ggml_gemv_q8_0_4x8_q8_0
328248
#define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
329249
#define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
330250
#define ggml_gemm_q4_0_8x8_q8_0_generic ggml_gemm_q4_0_8x8_q8_0
@@ -336,14 +256,8 @@
336256
#define ggml_gemm_q6_K_8x8_q8_K_generic ggml_gemm_q6_K_8x8_q8_K
337257
#define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
338258
#define ggml_gemm_iq4_nl_8x8_q8_0_generic ggml_gemm_iq4_nl_8x8_q8_0
339-
#define ggml_gemm_f16_7x1x16_f16_generic ggml_gemm_f16_7x1x16_f16
340-
#define ggml_gemm_f16_7x1x32_f16_generic ggml_gemm_f16_7x1x32_f16
341-
#define ggml_gemm_f16_7x1x64_f16_generic ggml_gemm_f16_7x1x64_f16
342-
#define ggml_gemm_f16_7x1x128_f16_generic ggml_gemm_f16_7x1x128_f16
343-
#define ggml_gemm_f32_7x1x16_f32_generic ggml_gemm_f32_7x1x16_f32
344-
#define ggml_gemm_f32_7x1x32_f32_generic ggml_gemm_f32_7x1x32_f32
345-
#define ggml_gemm_f32_7x1x64_f32_generic ggml_gemm_f32_7x1x64_f32
346-
#define ggml_gemm_f32_7x1x128_f32_generic ggml_gemm_f32_7x1x128_f32
259+
#define ggml_gemm_q8_0_4x4_q8_0_generic ggml_gemm_q8_0_4x4_q8_0
260+
#define ggml_gemm_q8_0_4x8_q8_0_generic ggml_gemm_q8_0_4x8_q8_0
347261
#elif defined(__wasm__)
348262
// quants.c
349263
#define ggml_vec_dot_q4_1_q8_1_generic ggml_vec_dot_q4_1_q8_1
@@ -364,8 +278,6 @@
364278
#define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
365279
#define ggml_quantize_mat_q8_K_4x4_generic ggml_quantize_mat_q8_K_4x4
366280
#define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
367-
#define ggml_repack_mat_f16_7x1_generic ggml_repack_mat_f16_7x1
368-
#define ggml_repack_mat_f32_7x1_generic ggml_repack_mat_f32_7x1
369281
#define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
370282
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
371283
#define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0
@@ -377,14 +289,8 @@
377289
#define ggml_gemv_q6_K_8x8_q8_K_generic ggml_gemv_q6_K_8x8_q8_K
378290
#define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
379291
#define ggml_gemv_iq4_nl_8x8_q8_0_generic ggml_gemv_iq4_nl_8x8_q8_0
380-
#define ggml_gemv_f16_1x16_f16_generic ggml_gemv_f16_1x16_f16
381-
#define ggml_gemv_f16_1x32_f16_generic ggml_gemv_f16_1x32_f16
382-
#define ggml_gemv_f16_1x64_f16_generic ggml_gemv_f16_1x64_f16
383-
#define ggml_gemv_f16_1x128_f16_generic ggml_gemv_f16_1x128_f16
384-
#define ggml_gemv_f32_1x16_f32_generic ggml_gemv_f32_1x16_f32
385-
#define ggml_gemv_f32_1x32_f32_generic ggml_gemv_f32_1x32_f32
386-
#define ggml_gemv_f32_1x64_f32_generic ggml_gemv_f32_1x64_f32
387-
#define ggml_gemv_f32_1x128_f32_generic ggml_gemv_f32_1x128_f32
292+
#define ggml_gemv_q8_0_4x4_q8_0_generic ggml_gemv_q8_0_4x4_q8_0
293+
#define ggml_gemv_q8_0_4x8_q8_0_generic ggml_gemv_q8_0_4x8_q8_0
388294
#define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
389295
#define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
390296
#define ggml_gemm_q4_0_8x8_q8_0_generic ggml_gemm_q4_0_8x8_q8_0
@@ -396,12 +302,6 @@
396302
#define ggml_gemm_q6_K_8x8_q8_K_generic ggml_gemm_q6_K_8x8_q8_K
397303
#define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
398304
#define ggml_gemm_iq4_nl_8x8_q8_0_generic ggml_gemm_iq4_nl_8x8_q8_0
399-
#define ggml_gemm_f16_7x1x16_f16_generic ggml_gemm_f16_7x1x16_f16
400-
#define ggml_gemm_f16_7x1x32_f16_generic ggml_gemm_f16_7x1x32_f16
401-
#define ggml_gemm_f16_7x1x64_f16_generic ggml_gemm_f16_7x1x64_f16
402-
#define ggml_gemm_f16_7x1x128_f16_generic ggml_gemm_f16_7x1x128_f16
403-
#define ggml_gemm_f32_7x1x16_f32_generic ggml_gemm_f32_7x1x16_f32
404-
#define ggml_gemm_f32_7x1x32_f32_generic ggml_gemm_f32_7x1x32_f32
405-
#define ggml_gemm_f32_7x1x64_f32_generic ggml_gemm_f32_7x1x64_f32
406-
#define ggml_gemm_f32_7x1x128_f32_generic ggml_gemm_f32_7x1x128_f32
305+
#define ggml_gemm_q8_0_4x4_q8_0_generic ggml_gemm_q8_0_4x4_q8_0
306+
#define ggml_gemm_q8_0_4x8_q8_0_generic ggml_gemm_q8_0_4x8_q8_0
407307
#endif

0 commit comments

Comments
 (0)