@@ -250,50 +250,11 @@ unicode_to_quad_convert(const Py_UCS4 *ucs4_str, npy_intp unicode_size_chars,
250250 return 0 ;
251251}
252252
253+ template <bool Aligned>
253254static int
254- unicode_to_quad_strided_loop_unaligned (PyArrayMethod_Context *context, char *const data[],
255- npy_intp const dimensions[], npy_intp const strides[],
256- void *NPY_UNUSED (auxdata))
257- {
258- npy_intp N = dimensions[0 ];
259- char *in_ptr = data[0 ];
260- char *out_ptr = data[1 ];
261- npy_intp in_stride = strides[0 ];
262- npy_intp out_stride = strides[1 ];
263-
264- PyArray_Descr *const *descrs = context->descriptors ;
265- QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)descrs[1 ];
266- QuadBackendType backend = descr_out->backend ;
267-
268- // Unicode strings are stored as UCS4 (4 bytes per character)
269- npy_intp unicode_size_chars = descrs[0 ]->elsize / 4 ;
270-
271- while (N--) {
272- Py_UCS4 *ucs4_str = (Py_UCS4 *)in_ptr;
273- quad_value out_val;
274-
275- if (unicode_to_quad_convert (ucs4_str, unicode_size_chars, backend, &out_val) < 0 ) {
276- return -1 ;
277- }
278-
279- if (backend == BACKEND_SLEEF) {
280- memcpy (out_ptr, &out_val.sleef_value , sizeof (Sleef_quad));
281- }
282- else {
283- memcpy (out_ptr, &out_val.longdouble_value , sizeof (long double ));
284- }
285-
286- in_ptr += in_stride;
287- out_ptr += out_stride;
288- }
289-
290- return 0 ;
291- }
292-
293- static int
294- unicode_to_quad_strided_loop_aligned (PyArrayMethod_Context *context, char *const data[],
295- npy_intp const dimensions[], npy_intp const strides[],
296- void *NPY_UNUSED (auxdata))
255+ unicode_to_quad_strided_loop (PyArrayMethod_Context *context, char *const data[],
256+ npy_intp const dimensions[], npy_intp const strides[],
257+ void *NPY_UNUSED (auxdata))
297258{
298259 npy_intp N = dimensions[0 ];
299260 char *in_ptr = data[0 ];
@@ -316,12 +277,7 @@ unicode_to_quad_strided_loop_aligned(PyArrayMethod_Context *context, char *const
316277 return -1 ;
317278 }
318279
319- if (backend == BACKEND_SLEEF) {
320- *(Sleef_quad *)out_ptr = out_val.sleef_value ;
321- }
322- else {
323- *(long double *)out_ptr = out_val.longdouble_value ;
324- }
280+ store_quad<Aligned>(out_ptr, out_val, backend);
325281
326282 in_ptr += in_stride;
327283 out_ptr += out_stride;
@@ -417,10 +373,11 @@ copy_string_to_ucs4(const char *str, Py_UCS4 *out_ucs4, npy_intp unicode_size_ch
417373 }
418374}
419375
376+ template <bool Aligned>
420377static int
421- quad_to_unicode_loop_unaligned (PyArrayMethod_Context *context, char *const data[],
422- npy_intp const dimensions[], npy_intp const strides[],
423- void *NPY_UNUSED (auxdata))
378+ quad_to_unicode_loop (PyArrayMethod_Context *context, char *const data[],
379+ npy_intp const dimensions[], npy_intp const strides[],
380+ void *NPY_UNUSED (auxdata))
424381{
425382 npy_intp N = dimensions[0 ];
426383 char *in_ptr = data[0 ];
@@ -433,70 +390,9 @@ quad_to_unicode_loop_unaligned(PyArrayMethod_Context *context, char *const data[
433390 QuadBackendType backend = descr_in->backend ;
434391
435392 npy_intp unicode_size_chars = descrs[1 ]->elsize / 4 ;
436- size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof (Sleef_quad) : sizeof (long double );
437393
438394 while (N--) {
439- quad_value in_val;
440- if (backend == BACKEND_SLEEF) {
441- memcpy (&in_val.sleef_value , in_ptr, sizeof (Sleef_quad));
442- }
443- else {
444- memcpy (&in_val.longdouble_value , in_ptr, sizeof (long double ));
445- }
446-
447- // Convert to Sleef_quad for Dragon4
448- Sleef_quad sleef_val = quad_to_sleef_quad (&in_val, backend);
449-
450- // Get string representation with adaptive notation
451- PyObject *py_str = quad_to_string_adaptive (&sleef_val, unicode_size_chars);
452- if (py_str == NULL ) {
453- return -1 ;
454- }
455-
456- const char *temp_str = PyUnicode_AsUTF8 (py_str);
457- if (temp_str == NULL ) {
458- Py_DECREF (py_str);
459- return -1 ;
460- }
461-
462- // Convert char string to UCS4 and store in output
463- Py_UCS4 *out_ucs4 = (Py_UCS4 *)out_ptr;
464- copy_string_to_ucs4 (temp_str, out_ucs4, unicode_size_chars);
465-
466- Py_DECREF (py_str);
467-
468- in_ptr += in_stride;
469- out_ptr += out_stride;
470- }
471-
472- return 0 ;
473- }
474-
475- static int
476- quad_to_unicode_loop_aligned (PyArrayMethod_Context *context, char *const data[],
477- npy_intp const dimensions[], npy_intp const strides[],
478- void *NPY_UNUSED (auxdata))
479- {
480- npy_intp N = dimensions[0 ];
481- char *in_ptr = data[0 ];
482- char *out_ptr = data[1 ];
483- npy_intp in_stride = strides[0 ];
484- npy_intp out_stride = strides[1 ];
485-
486- PyArray_Descr *const *descrs = context->descriptors ;
487- QuadPrecDTypeObject *descr_in = (QuadPrecDTypeObject *)descrs[0 ];
488- QuadBackendType backend = descr_in->backend ;
489-
490- npy_intp unicode_size_chars = descrs[1 ]->elsize / 4 ;
491-
492- while (N--) {
493- quad_value in_val;
494- if (backend == BACKEND_SLEEF) {
495- in_val.sleef_value = *(Sleef_quad *)in_ptr;
496- }
497- else {
498- in_val.longdouble_value = *(long double *)in_ptr;
499- }
395+ quad_value in_val = load_quad<Aligned>(in_ptr, backend);
500396
501397 // Convert to Sleef_quad for Dragon4
502398 Sleef_quad sleef_val = quad_to_sleef_quad (&in_val, backend);
@@ -598,44 +494,11 @@ bytes_to_quad_convert(const char *bytes_str, npy_intp bytes_size,
598494 return 0 ;
599495}
600496
497+ template <bool Aligned>
601498static int
602- bytes_to_quad_strided_loop_unaligned (PyArrayMethod_Context *context, char *const data[],
603- npy_intp const dimensions[], npy_intp const strides[],
604- void *NPY_UNUSED (auxdata))
605- {
606- npy_intp N = dimensions[0 ];
607- char *in_ptr = data[0 ];
608- char *out_ptr = data[1 ];
609- npy_intp in_stride = strides[0 ];
610- npy_intp out_stride = strides[1 ];
611-
612- PyArray_Descr *const *descrs = context->descriptors ;
613- QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)descrs[1 ];
614- QuadBackendType backend = descr_out->backend ;
615-
616- npy_intp bytes_size = descrs[0 ]->elsize ;
617- size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof (Sleef_quad) : sizeof (long double );
618-
619- while (N--) {
620- quad_value out_val;
621-
622- if (bytes_to_quad_convert (in_ptr, bytes_size, backend, &out_val) < 0 ) {
623- return -1 ;
624- }
625-
626- memcpy (out_ptr, &out_val, elem_size);
627-
628- in_ptr += in_stride;
629- out_ptr += out_stride;
630- }
631-
632- return 0 ;
633- }
634-
635- static int
636- bytes_to_quad_strided_loop_aligned (PyArrayMethod_Context *context, char *const data[],
637- npy_intp const dimensions[], npy_intp const strides[],
638- void *NPY_UNUSED (auxdata))
499+ bytes_to_quad_strided_loop (PyArrayMethod_Context *context, char *const data[],
500+ npy_intp const dimensions[], npy_intp const strides[],
501+ void *NPY_UNUSED (auxdata))
639502{
640503 npy_intp N = dimensions[0 ];
641504 char *in_ptr = data[0 ];
@@ -656,12 +519,7 @@ bytes_to_quad_strided_loop_aligned(PyArrayMethod_Context *context, char *const d
656519 return -1 ;
657520 }
658521
659- if (backend == BACKEND_SLEEF) {
660- *(Sleef_quad *)(out_ptr) = out_val.sleef_value ;
661- }
662- else {
663- *(long double *)(out_ptr) = out_val.longdouble_value ;
664- }
522+ store_quad<Aligned>(out_ptr, out_val, backend);
665523
666524 in_ptr += in_stride;
667525 out_ptr += out_stride;
@@ -718,10 +576,11 @@ copy_string_to_bytes(const char *str, char *out_bytes, npy_intp bytes_size)
718576 }
719577}
720578
579+ template <bool Aligned>
721580static int
722- quad_to_bytes_loop_unaligned (PyArrayMethod_Context *context, char *const data[],
723- npy_intp const dimensions[], npy_intp const strides[],
724- void *NPY_UNUSED (auxdata))
581+ quad_to_bytes_loop (PyArrayMethod_Context *context, char *const data[],
582+ npy_intp const dimensions[], npy_intp const strides[],
583+ void *NPY_UNUSED (auxdata))
725584{
726585 npy_intp N = dimensions[0 ];
727586 char *in_ptr = data[0 ];
@@ -734,16 +593,9 @@ quad_to_bytes_loop_unaligned(PyArrayMethod_Context *context, char *const data[],
734593 QuadBackendType backend = descr_in->backend ;
735594
736595 npy_intp bytes_size = descrs[1 ]->elsize ;
737- size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof (Sleef_quad) : sizeof (long double );
738596
739597 while (N--) {
740- quad_value in_val;
741- if (backend == BACKEND_SLEEF) {
742- memcpy (&in_val.sleef_value , in_ptr, sizeof (Sleef_quad));
743- }
744- else {
745- memcpy (&in_val.longdouble_value , in_ptr, sizeof (long double ));
746- }
598+ quad_value in_val = load_quad<Aligned>(in_ptr, backend);
747599 Sleef_quad sleef_val = quad_to_sleef_quad (&in_val, backend);
748600 PyObject *py_str = quad_to_string_adaptive (&sleef_val, bytes_size);
749601 if (py_str == NULL ) {
@@ -766,50 +618,6 @@ quad_to_bytes_loop_unaligned(PyArrayMethod_Context *context, char *const data[],
766618 return 0 ;
767619}
768620
769- static int
770- quad_to_bytes_loop_aligned (PyArrayMethod_Context *context, char *const data[],
771- npy_intp const dimensions[], npy_intp const strides[],
772- void *NPY_UNUSED (auxdata))
773- {
774- npy_intp N = dimensions[0 ];
775- char *in_ptr = data[0 ];
776- char *out_ptr = data[1 ];
777- npy_intp in_stride = strides[0 ];
778- npy_intp out_stride = strides[1 ];
779-
780- PyArray_Descr *const *descrs = context->descriptors ;
781- QuadPrecDTypeObject *descr_in = (QuadPrecDTypeObject *)descrs[0 ];
782- QuadBackendType backend = descr_in->backend ;
783-
784- npy_intp bytes_size = descrs[1 ]->elsize ;
785-
786- while (N--) {
787- quad_value in_val;
788- if (backend == BACKEND_SLEEF) {
789- in_val.sleef_value = *(Sleef_quad *)in_ptr;
790- }
791- else {
792- in_val.longdouble_value = *(long double *)in_ptr;
793- }
794- Sleef_quad sleef_val = quad_to_sleef_quad (&in_val, backend);
795- PyObject *py_str = quad_to_string_adaptive (&sleef_val, bytes_size);
796- if (py_str == NULL ) {
797- return -1 ;
798- }
799- const char *temp_str = PyUnicode_AsUTF8 (py_str);
800- if (temp_str == NULL ) {
801- Py_DECREF (py_str);
802- return -1 ;
803- }
804-
805- copy_string_to_bytes (temp_str, out_ptr, bytes_size); Py_DECREF (py_str);
806- in_ptr += in_stride;
807- out_ptr += out_stride;
808- }
809-
810- return 0 ;
811- }
812-
813621// Tag dispatching to ensure npy_bool/npy_ubyte and npy_half/npy_ushort do not alias in templates
814622// see e.g. https://stackoverflow.com/q/32522279
815623struct spec_npy_bool {};
@@ -1528,8 +1336,8 @@ init_casts_internal(void)
15281336 PyArray_DTypeMeta **unicode_to_quad_dtypes = new PyArray_DTypeMeta *[2 ]{&PyArray_UnicodeDType, &QuadPrecDType};
15291337 PyType_Slot *unicode_to_quad_slots = new PyType_Slot[4 ]{
15301338 {NPY_METH_resolve_descriptors, (void *)&unicode_to_quad_resolve_descriptors},
1531- {NPY_METH_strided_loop, (void *)&unicode_to_quad_strided_loop_aligned },
1532- {NPY_METH_unaligned_strided_loop, (void *)&unicode_to_quad_strided_loop_unaligned },
1339+ {NPY_METH_strided_loop, (void *)&unicode_to_quad_strided_loop< true > },
1340+ {NPY_METH_unaligned_strided_loop, (void *)&unicode_to_quad_strided_loop< false > },
15331341 {0 , nullptr }};
15341342
15351343 PyArrayMethod_Spec *unicode_to_quad_spec = new PyArrayMethod_Spec{
@@ -1547,8 +1355,8 @@ init_casts_internal(void)
15471355 PyArray_DTypeMeta **quad_to_unicode_dtypes = new PyArray_DTypeMeta *[2 ]{&QuadPrecDType, &PyArray_UnicodeDType};
15481356 PyType_Slot *quad_to_unicode_slots = new PyType_Slot[4 ]{
15491357 {NPY_METH_resolve_descriptors, (void *)&quad_to_unicode_resolve_descriptors},
1550- {NPY_METH_strided_loop, (void *)&quad_to_unicode_loop_aligned },
1551- {NPY_METH_unaligned_strided_loop, (void *)&quad_to_unicode_loop_unaligned },
1358+ {NPY_METH_strided_loop, (void *)&quad_to_unicode_loop< true > },
1359+ {NPY_METH_unaligned_strided_loop, (void *)&quad_to_unicode_loop< false > },
15521360 {0 , nullptr }};
15531361
15541362 PyArrayMethod_Spec *quad_to_unicode_spec = new PyArrayMethod_Spec{
@@ -1566,8 +1374,8 @@ init_casts_internal(void)
15661374 PyArray_DTypeMeta **bytes_to_quad_dtypes = new PyArray_DTypeMeta *[2 ]{&PyArray_BytesDType, &QuadPrecDType};
15671375 PyType_Slot *bytes_to_quad_slots = new PyType_Slot[4 ]{
15681376 {NPY_METH_resolve_descriptors, (void *)&bytes_to_quad_resolve_descriptors},
1569- {NPY_METH_strided_loop, (void *)&bytes_to_quad_strided_loop_aligned },
1570- {NPY_METH_unaligned_strided_loop, (void *)&bytes_to_quad_strided_loop_unaligned },
1377+ {NPY_METH_strided_loop, (void *)&bytes_to_quad_strided_loop< true > },
1378+ {NPY_METH_unaligned_strided_loop, (void *)&bytes_to_quad_strided_loop< false > },
15711379 {0 , nullptr }};
15721380
15731381 PyArrayMethod_Spec *bytes_to_quad_spec = new PyArrayMethod_Spec{
@@ -1585,8 +1393,8 @@ init_casts_internal(void)
15851393 PyArray_DTypeMeta **quad_to_bytes_dtypes = new PyArray_DTypeMeta *[2 ]{&QuadPrecDType, &PyArray_BytesDType};
15861394 PyType_Slot *quad_to_bytes_slots = new PyType_Slot[4 ]{
15871395 {NPY_METH_resolve_descriptors, (void *)&quad_to_bytes_resolve_descriptors},
1588- {NPY_METH_strided_loop, (void *)&quad_to_bytes_loop_aligned },
1589- {NPY_METH_unaligned_strided_loop, (void *)&quad_to_bytes_loop_unaligned },
1396+ {NPY_METH_strided_loop, (void *)&quad_to_bytes_loop< true > },
1397+ {NPY_METH_unaligned_strided_loop, (void *)&quad_to_bytes_loop< false > },
15901398 {0 , nullptr }};
15911399
15921400 PyArrayMethod_Spec *quad_to_bytes_spec = new PyArrayMethod_Spec{
0 commit comments