|
| 1 | +{-# LANGUAGE BangPatterns #-} |
| 2 | +{-# LANGUAGE PatternSynonyms #-} |
| 3 | + |
| 4 | +-- | @Data.Builder.Catenable@ specialized to @ShortText@. |
| 5 | +module Data.Builder.Catenable.ShortText |
| 6 | + ( -- * Type |
| 7 | + Builder (..) |
| 8 | + |
| 9 | + -- * Convenient infix operators |
| 10 | + , pattern (:<) |
| 11 | + , pattern (:>) |
| 12 | + |
| 13 | + -- * Run |
| 14 | + , run |
| 15 | + |
| 16 | + -- * Properties |
| 17 | + , length |
| 18 | + |
| 19 | + -- * Create |
| 20 | + , shortText |
| 21 | + , char |
| 22 | + , word32Dec |
| 23 | + , word64Dec |
| 24 | + , int32Dec |
| 25 | + , int64Dec |
| 26 | + ) where |
| 27 | + |
| 28 | +import Prelude hiding (length) |
| 29 | + |
| 30 | +import Control.Monad.ST (ST, runST) |
| 31 | +import Data.ByteString.Short.Internal (ShortByteString (SBS)) |
| 32 | +import Data.Bytes.Chunks (Chunks (ChunksNil)) |
| 33 | +import Data.Int (Int32, Int64) |
| 34 | +import Data.Primitive (ByteArray (ByteArray)) |
| 35 | +import Data.String (IsString (fromString)) |
| 36 | +import Data.Text.Short (ShortText) |
| 37 | +import Data.Word (Word32, Word64) |
| 38 | + |
| 39 | +import qualified Arithmetic.Nat as Nat |
| 40 | +import qualified Data.Bytes.Builder as BB |
| 41 | +import qualified Data.Bytes.Builder.Bounded as Bounded |
| 42 | +import qualified Data.Bytes.Builder.Unsafe as BBU |
| 43 | +import qualified Data.Bytes.Chunks as Chunks |
| 44 | +import qualified Data.Text.Short as TS |
| 45 | +import qualified Data.Text.Short.Unsafe as TS |
| 46 | + |
| 47 | +infixr 5 :< |
| 48 | +infixl 5 :> |
| 49 | + |
| 50 | +data Builder |
| 51 | + = Empty |
| 52 | + | Cons !ShortText !Builder |
| 53 | + | Snoc !Builder !ShortText |
| 54 | + | Append !Builder !Builder |
| 55 | + |
| 56 | +shortText :: ShortText -> Builder |
| 57 | +shortText !t = Cons t Empty |
| 58 | + |
| 59 | +char :: Char -> Builder |
| 60 | +char !c = Cons (TS.singleton c) Empty |
| 61 | + |
| 62 | +word32Dec :: Word32 -> Builder |
| 63 | +word32Dec !i = Cons (ba2st (Bounded.run Nat.constant (Bounded.word32Dec i))) Empty |
| 64 | + |
| 65 | +word64Dec :: Word64 -> Builder |
| 66 | +word64Dec !i = Cons (ba2st (Bounded.run Nat.constant (Bounded.word64Dec i))) Empty |
| 67 | + |
| 68 | +int32Dec :: Int32 -> Builder |
| 69 | +int32Dec !i = Cons (ba2st (Bounded.run Nat.constant (Bounded.int32Dec i))) Empty |
| 70 | + |
| 71 | +int64Dec :: Int64 -> Builder |
| 72 | +int64Dec !i = Cons (ba2st (Bounded.run Nat.constant (Bounded.int64Dec i))) Empty |
| 73 | + |
| 74 | +-- | Number of Unicode code points in the sequence. |
| 75 | +length :: Builder -> Int |
| 76 | +length b0 = case b0 of |
| 77 | + Empty -> 0 |
| 78 | + Cons x b1 -> TS.length x + length b1 |
| 79 | + Snoc b1 x -> TS.length x + length b1 |
| 80 | + Append x y -> length x + length y |
| 81 | + |
| 82 | +{- | Note: The choice of appending to the left side of @Empty@ instead |
| 83 | +of the right side of arbitrary. Under ordinary use, this difference |
| 84 | +cannot be observed by the user. |
| 85 | +-} |
| 86 | +instance IsString Builder where |
| 87 | + fromString t = Cons (TS.pack t) Empty |
| 88 | + |
| 89 | +instance Monoid Builder where |
| 90 | + {-# INLINE mempty #-} |
| 91 | + mempty = Empty |
| 92 | + |
| 93 | +instance Semigroup Builder where |
| 94 | + {-# INLINE (<>) #-} |
| 95 | + (<>) = Append |
| 96 | + |
| 97 | +{- | Not structural equality. Converts builders to chunks and then |
| 98 | +compares the chunks. |
| 99 | +-} |
| 100 | +instance Eq Builder where |
| 101 | + a == b = run a == run b |
| 102 | + |
| 103 | +instance Show Builder where |
| 104 | + show b = TS.unpack (ba2st (Chunks.concatU (run b))) |
| 105 | + |
| 106 | +ba2st :: ByteArray -> ShortText |
| 107 | +{-# INLINE ba2st #-} |
| 108 | +ba2st (ByteArray x) = TS.fromShortByteStringUnsafe (SBS x) |
| 109 | + |
| 110 | +pattern (:<) :: ShortText -> Builder -> Builder |
| 111 | +pattern (:<) x y = Cons x y |
| 112 | + |
| 113 | +pattern (:>) :: Builder -> ShortText -> Builder |
| 114 | +pattern (:>) x y = Snoc x y |
| 115 | + |
| 116 | +{- | The result is chunks, but this is guaranteed to be UTF-8 encoded |
| 117 | +text, so if needed, you can flatten out the chunks and convert back |
| 118 | +to @ShortText@. |
| 119 | +-} |
| 120 | +run :: Builder -> Chunks |
| 121 | +{-# NOINLINE run #-} |
| 122 | +run b = runST $ do |
| 123 | + bldr0 <- BBU.newBuilderState 128 |
| 124 | + bldr1 <- pushCatenable bldr0 b |
| 125 | + BBU.reverseCommitsOntoChunks ChunksNil (BBU.closeBuilderState bldr1) |
| 126 | + |
| 127 | +pushCatenable :: BBU.BuilderState s -> Builder -> ST s (BBU.BuilderState s) |
| 128 | +pushCatenable !bldr0 b = case b of |
| 129 | + Empty -> pure bldr0 |
| 130 | + Cons x b1 -> do |
| 131 | + bldr1 <- BBU.pasteST (BB.shortTextUtf8 x) bldr0 |
| 132 | + pushCatenable bldr1 b1 |
| 133 | + Snoc b1 x -> do |
| 134 | + bldr1 <- pushCatenable bldr0 b1 |
| 135 | + BBU.pasteST (BB.shortTextUtf8 x) bldr1 |
| 136 | + Append x y -> do |
| 137 | + bldr1 <- pushCatenable bldr0 x |
| 138 | + pushCatenable bldr1 y |
0 commit comments