@@ -2612,8 +2612,7 @@ AK_DR_ProcessRecord(AK_DelimitedReader *dr,
26122612 PyObject * line_select
26132613 )
26142614{
2615- Py_UCS4 c ;
2616- Py_ssize_t pos , linelen ;
2615+ Py_ssize_t linelen ;
26172616 unsigned int kind ;
26182617 const void * data ;
26192618 PyObject * record ;
@@ -2667,20 +2666,43 @@ AK_DR_ProcessRecord(AK_DelimitedReader *dr,
26672666
26682667 kind = PyUnicode_KIND (record );
26692668 data = PyUnicode_DATA (record );
2670- pos = 0 ;
26712669 linelen = PyUnicode_GET_LENGTH (record );
2672- while (linelen -- ) {
2673- c = PyUnicode_READ (kind , data , pos );
2674- if (c == '\0' ) {
2675- Py_DECREF (record );
2676- PyErr_Format (PyExc_RuntimeError , "line contains NUL" );
2677- return -1 ;
2670+
2671+ // NOTE: we used to check that the read character was not \0; this seems rare enough to not be necessary to handle explicit, as AK_DR_process_char will treat it as an end of record
2672+ switch (kind ) {
2673+ case PyUnicode_1BYTE_KIND : {
2674+ Py_UCS1 * uc = (Py_UCS1 * )data ;
2675+ Py_UCS1 * uc_end = uc + linelen ;
2676+ while (uc < uc_end ) {
2677+ if (AK_DR_process_char (dr , cpg , * uc ++ )) {
2678+ Py_DECREF (record );
2679+ return -1 ;
2680+ }
2681+ }
2682+ break ;
26782683 }
2679- if (AK_DR_process_char (dr , cpg , c )) {
2680- Py_DECREF (record );
2681- return -1 ;
2684+ case PyUnicode_2BYTE_KIND : {
2685+ Py_UCS2 * uc = (Py_UCS2 * )data ;
2686+ Py_UCS2 * uc_end = uc + linelen ;
2687+ while (uc < uc_end ) {
2688+ if (AK_DR_process_char (dr , cpg , * uc ++ )) {
2689+ Py_DECREF (record );
2690+ return -1 ;
2691+ }
2692+ }
2693+ break ;
2694+ }
2695+ case PyUnicode_4BYTE_KIND : {
2696+ Py_UCS4 * uc = (Py_UCS4 * )data ;
2697+ Py_UCS4 * uc_end = uc + linelen ;
2698+ while (uc < uc_end ) {
2699+ if (AK_DR_process_char (dr , cpg , * uc ++ )) {
2700+ Py_DECREF (record );
2701+ return -1 ;
2702+ }
2703+ }
2704+ break ;
26822705 }
2683- pos ++ ;
26842706 }
26852707 Py_DECREF (record );
26862708 // force signaling we are at the end of a line
0 commit comments