Skip to content

Commit 5e3fce6

Browse files
committed
optimizing
1 parent c22e9f4 commit 5e3fce6

File tree

2 files changed

+89
-222
lines changed

2 files changed

+89
-222
lines changed

quaddtype/numpy_quaddtype/src/casts.cpp

Lines changed: 28 additions & 220 deletions
Original file line numberDiff line numberDiff line change
@@ -250,50 +250,11 @@ unicode_to_quad_convert(const Py_UCS4 *ucs4_str, npy_intp unicode_size_chars,
250250
return 0;
251251
}
252252

253+
template <bool Aligned>
253254
static int
254-
unicode_to_quad_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[],
255-
npy_intp const dimensions[], npy_intp const strides[],
256-
void *NPY_UNUSED(auxdata))
257-
{
258-
npy_intp N = dimensions[0];
259-
char *in_ptr = data[0];
260-
char *out_ptr = data[1];
261-
npy_intp in_stride = strides[0];
262-
npy_intp out_stride = strides[1];
263-
264-
PyArray_Descr *const *descrs = context->descriptors;
265-
QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)descrs[1];
266-
QuadBackendType backend = descr_out->backend;
267-
268-
// Unicode strings are stored as UCS4 (4 bytes per character)
269-
npy_intp unicode_size_chars = descrs[0]->elsize / 4;
270-
271-
while (N--) {
272-
Py_UCS4 *ucs4_str = (Py_UCS4 *)in_ptr;
273-
quad_value out_val;
274-
275-
if (unicode_to_quad_convert(ucs4_str, unicode_size_chars, backend, &out_val) < 0) {
276-
return -1;
277-
}
278-
279-
if (backend == BACKEND_SLEEF) {
280-
memcpy(out_ptr, &out_val.sleef_value, sizeof(Sleef_quad));
281-
}
282-
else {
283-
memcpy(out_ptr, &out_val.longdouble_value, sizeof(long double));
284-
}
285-
286-
in_ptr += in_stride;
287-
out_ptr += out_stride;
288-
}
289-
290-
return 0;
291-
}
292-
293-
static int
294-
unicode_to_quad_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[],
295-
npy_intp const dimensions[], npy_intp const strides[],
296-
void *NPY_UNUSED(auxdata))
255+
unicode_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[],
256+
npy_intp const dimensions[], npy_intp const strides[],
257+
void *NPY_UNUSED(auxdata))
297258
{
298259
npy_intp N = dimensions[0];
299260
char *in_ptr = data[0];
@@ -316,12 +277,7 @@ unicode_to_quad_strided_loop_aligned(PyArrayMethod_Context *context, char *const
316277
return -1;
317278
}
318279

319-
if (backend == BACKEND_SLEEF) {
320-
*(Sleef_quad *)out_ptr = out_val.sleef_value;
321-
}
322-
else {
323-
*(long double *)out_ptr = out_val.longdouble_value;
324-
}
280+
store_quad<Aligned>(out_ptr, out_val, backend);
325281

326282
in_ptr += in_stride;
327283
out_ptr += out_stride;
@@ -417,10 +373,11 @@ copy_string_to_ucs4(const char *str, Py_UCS4 *out_ucs4, npy_intp unicode_size_ch
417373
}
418374
}
419375

376+
template <bool Aligned>
420377
static int
421-
quad_to_unicode_loop_unaligned(PyArrayMethod_Context *context, char *const data[],
422-
npy_intp const dimensions[], npy_intp const strides[],
423-
void *NPY_UNUSED(auxdata))
378+
quad_to_unicode_loop(PyArrayMethod_Context *context, char *const data[],
379+
npy_intp const dimensions[], npy_intp const strides[],
380+
void *NPY_UNUSED(auxdata))
424381
{
425382
npy_intp N = dimensions[0];
426383
char *in_ptr = data[0];
@@ -433,70 +390,9 @@ quad_to_unicode_loop_unaligned(PyArrayMethod_Context *context, char *const data[
433390
QuadBackendType backend = descr_in->backend;
434391

435392
npy_intp unicode_size_chars = descrs[1]->elsize / 4;
436-
size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double);
437393

438394
while (N--) {
439-
quad_value in_val;
440-
if (backend == BACKEND_SLEEF) {
441-
memcpy(&in_val.sleef_value, in_ptr, sizeof(Sleef_quad));
442-
}
443-
else {
444-
memcpy(&in_val.longdouble_value, in_ptr, sizeof(long double));
445-
}
446-
447-
// Convert to Sleef_quad for Dragon4
448-
Sleef_quad sleef_val = quad_to_sleef_quad(&in_val, backend);
449-
450-
// Get string representation with adaptive notation
451-
PyObject *py_str = quad_to_string_adaptive(&sleef_val, unicode_size_chars);
452-
if (py_str == NULL) {
453-
return -1;
454-
}
455-
456-
const char *temp_str = PyUnicode_AsUTF8(py_str);
457-
if (temp_str == NULL) {
458-
Py_DECREF(py_str);
459-
return -1;
460-
}
461-
462-
// Convert char string to UCS4 and store in output
463-
Py_UCS4 *out_ucs4 = (Py_UCS4 *)out_ptr;
464-
copy_string_to_ucs4(temp_str, out_ucs4, unicode_size_chars);
465-
466-
Py_DECREF(py_str);
467-
468-
in_ptr += in_stride;
469-
out_ptr += out_stride;
470-
}
471-
472-
return 0;
473-
}
474-
475-
static int
476-
quad_to_unicode_loop_aligned(PyArrayMethod_Context *context, char *const data[],
477-
npy_intp const dimensions[], npy_intp const strides[],
478-
void *NPY_UNUSED(auxdata))
479-
{
480-
npy_intp N = dimensions[0];
481-
char *in_ptr = data[0];
482-
char *out_ptr = data[1];
483-
npy_intp in_stride = strides[0];
484-
npy_intp out_stride = strides[1];
485-
486-
PyArray_Descr *const *descrs = context->descriptors;
487-
QuadPrecDTypeObject *descr_in = (QuadPrecDTypeObject *)descrs[0];
488-
QuadBackendType backend = descr_in->backend;
489-
490-
npy_intp unicode_size_chars = descrs[1]->elsize / 4;
491-
492-
while (N--) {
493-
quad_value in_val;
494-
if (backend == BACKEND_SLEEF) {
495-
in_val.sleef_value = *(Sleef_quad *)in_ptr;
496-
}
497-
else {
498-
in_val.longdouble_value = *(long double *)in_ptr;
499-
}
395+
quad_value in_val = load_quad<Aligned>(in_ptr, backend);
500396

501397
// Convert to Sleef_quad for Dragon4
502398
Sleef_quad sleef_val = quad_to_sleef_quad(&in_val, backend);
@@ -598,44 +494,11 @@ bytes_to_quad_convert(const char *bytes_str, npy_intp bytes_size,
598494
return 0;
599495
}
600496

497+
template <bool Aligned>
601498
static int
602-
bytes_to_quad_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[],
603-
npy_intp const dimensions[], npy_intp const strides[],
604-
void *NPY_UNUSED(auxdata))
605-
{
606-
npy_intp N = dimensions[0];
607-
char *in_ptr = data[0];
608-
char *out_ptr = data[1];
609-
npy_intp in_stride = strides[0];
610-
npy_intp out_stride = strides[1];
611-
612-
PyArray_Descr *const *descrs = context->descriptors;
613-
QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)descrs[1];
614-
QuadBackendType backend = descr_out->backend;
615-
616-
npy_intp bytes_size = descrs[0]->elsize;
617-
size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double);
618-
619-
while (N--) {
620-
quad_value out_val;
621-
622-
if (bytes_to_quad_convert(in_ptr, bytes_size, backend, &out_val) < 0) {
623-
return -1;
624-
}
625-
626-
memcpy(out_ptr, &out_val, elem_size);
627-
628-
in_ptr += in_stride;
629-
out_ptr += out_stride;
630-
}
631-
632-
return 0;
633-
}
634-
635-
static int
636-
bytes_to_quad_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[],
637-
npy_intp const dimensions[], npy_intp const strides[],
638-
void *NPY_UNUSED(auxdata))
499+
bytes_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[],
500+
npy_intp const dimensions[], npy_intp const strides[],
501+
void *NPY_UNUSED(auxdata))
639502
{
640503
npy_intp N = dimensions[0];
641504
char *in_ptr = data[0];
@@ -656,12 +519,7 @@ bytes_to_quad_strided_loop_aligned(PyArrayMethod_Context *context, char *const d
656519
return -1;
657520
}
658521

659-
if (backend == BACKEND_SLEEF) {
660-
*(Sleef_quad *)(out_ptr) = out_val.sleef_value;
661-
}
662-
else {
663-
*(long double *)(out_ptr) = out_val.longdouble_value;
664-
}
522+
store_quad<Aligned>(out_ptr, out_val, backend);
665523

666524
in_ptr += in_stride;
667525
out_ptr += out_stride;
@@ -718,10 +576,11 @@ copy_string_to_bytes(const char *str, char *out_bytes, npy_intp bytes_size)
718576
}
719577
}
720578

579+
template <bool Aligned>
721580
static int
722-
quad_to_bytes_loop_unaligned(PyArrayMethod_Context *context, char *const data[],
723-
npy_intp const dimensions[], npy_intp const strides[],
724-
void *NPY_UNUSED(auxdata))
581+
quad_to_bytes_loop(PyArrayMethod_Context *context, char *const data[],
582+
npy_intp const dimensions[], npy_intp const strides[],
583+
void *NPY_UNUSED(auxdata))
725584
{
726585
npy_intp N = dimensions[0];
727586
char *in_ptr = data[0];
@@ -734,16 +593,9 @@ quad_to_bytes_loop_unaligned(PyArrayMethod_Context *context, char *const data[],
734593
QuadBackendType backend = descr_in->backend;
735594

736595
npy_intp bytes_size = descrs[1]->elsize;
737-
size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double);
738596

739597
while (N--) {
740-
quad_value in_val;
741-
if (backend == BACKEND_SLEEF) {
742-
memcpy(&in_val.sleef_value, in_ptr, sizeof(Sleef_quad));
743-
}
744-
else {
745-
memcpy(&in_val.longdouble_value, in_ptr, sizeof(long double));
746-
}
598+
quad_value in_val = load_quad<Aligned>(in_ptr, backend);
747599
Sleef_quad sleef_val = quad_to_sleef_quad(&in_val, backend);
748600
PyObject *py_str = quad_to_string_adaptive(&sleef_val, bytes_size);
749601
if (py_str == NULL) {
@@ -766,50 +618,6 @@ quad_to_bytes_loop_unaligned(PyArrayMethod_Context *context, char *const data[],
766618
return 0;
767619
}
768620

769-
static int
770-
quad_to_bytes_loop_aligned(PyArrayMethod_Context *context, char *const data[],
771-
npy_intp const dimensions[], npy_intp const strides[],
772-
void *NPY_UNUSED(auxdata))
773-
{
774-
npy_intp N = dimensions[0];
775-
char *in_ptr = data[0];
776-
char *out_ptr = data[1];
777-
npy_intp in_stride = strides[0];
778-
npy_intp out_stride = strides[1];
779-
780-
PyArray_Descr *const *descrs = context->descriptors;
781-
QuadPrecDTypeObject *descr_in = (QuadPrecDTypeObject *)descrs[0];
782-
QuadBackendType backend = descr_in->backend;
783-
784-
npy_intp bytes_size = descrs[1]->elsize;
785-
786-
while (N--) {
787-
quad_value in_val;
788-
if (backend == BACKEND_SLEEF) {
789-
in_val.sleef_value = *(Sleef_quad *)in_ptr;
790-
}
791-
else {
792-
in_val.longdouble_value = *(long double *)in_ptr;
793-
}
794-
Sleef_quad sleef_val = quad_to_sleef_quad(&in_val, backend);
795-
PyObject *py_str = quad_to_string_adaptive(&sleef_val, bytes_size);
796-
if (py_str == NULL) {
797-
return -1;
798-
}
799-
const char *temp_str = PyUnicode_AsUTF8(py_str);
800-
if (temp_str == NULL) {
801-
Py_DECREF(py_str);
802-
return -1;
803-
}
804-
805-
copy_string_to_bytes(temp_str, out_ptr, bytes_size); Py_DECREF(py_str);
806-
in_ptr += in_stride;
807-
out_ptr += out_stride;
808-
}
809-
810-
return 0;
811-
}
812-
813621
// Tag dispatching to ensure npy_bool/npy_ubyte and npy_half/npy_ushort do not alias in templates
814622
// see e.g. https://stackoverflow.com/q/32522279
815623
struct spec_npy_bool {};
@@ -1528,8 +1336,8 @@ init_casts_internal(void)
15281336
PyArray_DTypeMeta **unicode_to_quad_dtypes = new PyArray_DTypeMeta *[2]{&PyArray_UnicodeDType, &QuadPrecDType};
15291337
PyType_Slot *unicode_to_quad_slots = new PyType_Slot[4]{
15301338
{NPY_METH_resolve_descriptors, (void *)&unicode_to_quad_resolve_descriptors},
1531-
{NPY_METH_strided_loop, (void *)&unicode_to_quad_strided_loop_aligned},
1532-
{NPY_METH_unaligned_strided_loop, (void *)&unicode_to_quad_strided_loop_unaligned},
1339+
{NPY_METH_strided_loop, (void *)&unicode_to_quad_strided_loop<true>},
1340+
{NPY_METH_unaligned_strided_loop, (void *)&unicode_to_quad_strided_loop<false>},
15331341
{0, nullptr}};
15341342

15351343
PyArrayMethod_Spec *unicode_to_quad_spec = new PyArrayMethod_Spec{
@@ -1547,8 +1355,8 @@ init_casts_internal(void)
15471355
PyArray_DTypeMeta **quad_to_unicode_dtypes = new PyArray_DTypeMeta *[2]{&QuadPrecDType, &PyArray_UnicodeDType};
15481356
PyType_Slot *quad_to_unicode_slots = new PyType_Slot[4]{
15491357
{NPY_METH_resolve_descriptors, (void *)&quad_to_unicode_resolve_descriptors},
1550-
{NPY_METH_strided_loop, (void *)&quad_to_unicode_loop_aligned},
1551-
{NPY_METH_unaligned_strided_loop, (void *)&quad_to_unicode_loop_unaligned},
1358+
{NPY_METH_strided_loop, (void *)&quad_to_unicode_loop<true>},
1359+
{NPY_METH_unaligned_strided_loop, (void *)&quad_to_unicode_loop<false>},
15521360
{0, nullptr}};
15531361

15541362
PyArrayMethod_Spec *quad_to_unicode_spec = new PyArrayMethod_Spec{
@@ -1566,8 +1374,8 @@ init_casts_internal(void)
15661374
PyArray_DTypeMeta **bytes_to_quad_dtypes = new PyArray_DTypeMeta *[2]{&PyArray_BytesDType, &QuadPrecDType};
15671375
PyType_Slot *bytes_to_quad_slots = new PyType_Slot[4]{
15681376
{NPY_METH_resolve_descriptors, (void *)&bytes_to_quad_resolve_descriptors},
1569-
{NPY_METH_strided_loop, (void *)&bytes_to_quad_strided_loop_aligned},
1570-
{NPY_METH_unaligned_strided_loop, (void *)&bytes_to_quad_strided_loop_unaligned},
1377+
{NPY_METH_strided_loop, (void *)&bytes_to_quad_strided_loop<true>},
1378+
{NPY_METH_unaligned_strided_loop, (void *)&bytes_to_quad_strided_loop<false>},
15711379
{0, nullptr}};
15721380

15731381
PyArrayMethod_Spec *bytes_to_quad_spec = new PyArrayMethod_Spec{
@@ -1585,8 +1393,8 @@ init_casts_internal(void)
15851393
PyArray_DTypeMeta **quad_to_bytes_dtypes = new PyArray_DTypeMeta *[2]{&QuadPrecDType, &PyArray_BytesDType};
15861394
PyType_Slot *quad_to_bytes_slots = new PyType_Slot[4]{
15871395
{NPY_METH_resolve_descriptors, (void *)&quad_to_bytes_resolve_descriptors},
1588-
{NPY_METH_strided_loop, (void *)&quad_to_bytes_loop_aligned},
1589-
{NPY_METH_unaligned_strided_loop, (void *)&quad_to_bytes_loop_unaligned},
1396+
{NPY_METH_strided_loop, (void *)&quad_to_bytes_loop<true>},
1397+
{NPY_METH_unaligned_strided_loop, (void *)&quad_to_bytes_loop<false>},
15901398
{0, nullptr}};
15911399

15921400
PyArrayMethod_Spec *quad_to_bytes_spec = new PyArrayMethod_Spec{

0 commit comments

Comments
 (0)