@@ -425,61 +425,98 @@ dchar decode(in dchar[] s, ref size_t idx)
425425 return c; // dummy return
426426 }
427427
428-
429428/* =================== Encode ======================= */
430429
431- /* ******************************
432- * Encodes character c and appends it to array s[].
430+ /**
431+ * Encodes `c` into the static array `buf`.
432+ *
433+ * Params:
434+ * buf = destination of encoded character
435+ * c = character to encode
436+ *
437+ * Returns:
438+ * The length of the encoded character (a number between `1` and `4` for
439+ * `char[4]` buffers and a number between `1` and `2` for `wchar[2]` buffers)
440+ * or `0` in case of failure.
433441 */
434- @safe pure nothrow
435- void encode (ref char [] s, dchar c)
436- in
442+ @nogc nothrow pure @safe
443+ size_t encode (out char [4 ] buf, dchar c)
444+ in
445+ {
446+ assert (isValidDchar(c));
447+ }
448+ do
449+ {
450+ if (c <= 0x7F )
437451 {
438- assert (isValidDchar(c));
452+ buf[0 ] = cast (char ) c;
453+ return 1 ;
439454 }
440- do
455+ else if (c <= 0x7FF )
441456 {
442- char [] r = s;
443-
444- if (c <= 0x7F )
445- {
446- r ~= cast (char ) c;
447- }
448- else
449- {
450- char [4 ] buf;
451- uint L;
457+ buf[0 ] = cast (char )(0xC0 | (c >> 6 ));
458+ buf[1 ] = cast (char )(0x80 | (c & 0x3F ));
459+ return 2 ;
460+ }
461+ else if (c <= 0xFFFF )
462+ {
463+ buf[0 ] = cast (char )(0xE0 | (c >> 12 ));
464+ buf[1 ] = cast (char )(0x80 | ((c >> 6 ) & 0x3F ));
465+ buf[2 ] = cast (char )(0x80 | (c & 0x3F ));
466+ return 3 ;
467+ }
468+ else if (c <= 0x10FFFF )
469+ {
470+ buf[0 ] = cast (char )(0xF0 | (c >> 18 ));
471+ buf[1 ] = cast (char )(0x80 | ((c >> 12 ) & 0x3F ));
472+ buf[2 ] = cast (char )(0x80 | ((c >> 6 ) & 0x3F ));
473+ buf[3 ] = cast (char )(0x80 | (c & 0x3F ));
474+ return 4 ;
475+ }
476+ return 0 ;
477+ }
452478
453- if (c <= 0x7FF )
454- {
455- buf[0 ] = cast (char )(0xC0 | (c >> 6 ));
456- buf[1 ] = cast (char )(0x80 | (c & 0x3F ));
457- L = 2 ;
458- }
459- else if (c <= 0xFFFF )
460- {
461- buf[0 ] = cast (char )(0xE0 | (c >> 12 ));
462- buf[1 ] = cast (char )(0x80 | ((c >> 6 ) & 0x3F ));
463- buf[2 ] = cast (char )(0x80 | (c & 0x3F ));
464- L = 3 ;
465- }
466- else if (c <= 0x10FFFF )
467- {
468- buf[0 ] = cast (char )(0xF0 | (c >> 18 ));
469- buf[1 ] = cast (char )(0x80 | ((c >> 12 ) & 0x3F ));
470- buf[2 ] = cast (char )(0x80 | ((c >> 6 ) & 0x3F ));
471- buf[3 ] = cast (char )(0x80 | (c & 0x3F ));
472- L = 4 ;
473- }
474- else
475- {
476- assert (0 );
477- }
478- r ~= buf[0 .. L];
479- }
480- s = r;
479+ // / ditto
480+ @nogc nothrow pure @safe
481+ size_t encode (out wchar [2 ] buf, dchar c)
482+ in
483+ {
484+ assert (isValidDchar(c));
485+ }
486+ do
487+ {
488+ if (c <= 0xFFFF )
489+ {
490+ buf[0 ] = cast (wchar ) c;
491+ return 1 ;
481492 }
493+ else if (c <= 0x10FFFF )
494+ {
495+ buf[0 ] = cast (wchar ) ((((c - 0x10000 ) >> 10 ) & 0x3FF ) + 0xD800 );
496+ buf[1 ] = cast (wchar ) (((c - 0x10000 ) & 0x3FF ) + 0xDC00 );
497+ return 2 ;
498+ }
499+ return 0 ;
500+ }
482501
502+ /**
503+ * Encodes character c and appends it to array s[].
504+ */
505+ nothrow pure @safe
506+ void encode (ref char [] s, dchar c)
507+ in
508+ {
509+ assert (isValidDchar(c));
510+ }
511+ do
512+ {
513+ char [4 ] buf;
514+ size_t L = encode(buf, c);
515+ assert (L); // If L is 0, then encode has failed
516+ s ~= buf[0 .. L];
517+ }
518+
519+ // /
483520unittest
484521{
485522 debug (utf) printf(" utf.encode.unittest\n " );
@@ -499,43 +536,32 @@ unittest
499536 assert (s == " abcda\xC2\xA9\xE2\x89\xA0 " );
500537}
501538
502- /* * ditto */
503- @safe pure nothrow
539+ // / ditto
540+ nothrow pure @safe
504541void encode (ref wchar [] s, dchar c)
505- in
506- {
507- assert (isValidDchar(c));
508- }
509- do
510- {
511- wchar [] r = s;
512-
513- if (c <= 0xFFFF )
514- {
515- r ~= cast (wchar ) c;
516- }
517- else
518- {
519- wchar [2 ] buf;
520-
521- buf[0 ] = cast (wchar ) ((((c - 0x10000 ) >> 10 ) & 0x3FF ) + 0xD800 );
522- buf[1 ] = cast (wchar ) (((c - 0x10000 ) & 0x3FF ) + 0xDC00 );
523- r ~= buf;
524- }
525- s = r;
526- }
542+ in
543+ {
544+ assert (isValidDchar(c));
545+ }
546+ do
547+ {
548+ wchar [2 ] buf;
549+ size_t L = encode(buf, c);
550+ assert (L);
551+ s ~= buf[0 .. L];
552+ }
527553
528- /* * ditto */
529- @safe pure nothrow
554+ // / ditto
555+ nothrow pure @safe
530556void encode (ref dchar [] s, dchar c)
531- in
532- {
533- assert (isValidDchar(c));
534- }
535- do
536- {
537- s ~= c;
538- }
557+ in
558+ {
559+ assert (isValidDchar(c));
560+ }
561+ do
562+ {
563+ s ~= c;
564+ }
539565
540566/**
541567Returns the code length of $(D c) in the encoding using $(D C) as a
0 commit comments