Skip to content

Commit e64db09

Browse files
committed
Add true text builder and release 0.2.0.0
1 parent 789d0ac commit e64db09

File tree

4 files changed

+167
-16
lines changed

4 files changed

+167
-16
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# Revision history for array-builder
22

3+
## 0.2.0.0 -- 2025-09-08
4+
5+
* Rename `Data.Builder.Catenable.Text` to `Data.Builder.Catenable.ShortText`
6+
* Add `Data.Builder.Catenable.Text` that uses the `Text` type.
7+
38
## 0.1.4.1 -- 2024-02-01
49

510
* Update package metadata.

array-builder.cabal

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
cabal-version: 2.2
22
name: array-builder
3-
version: 0.1.4.1
3+
version: 0.2.0.0
44
synopsis: Builders for arrays
55
description: Builders for arrays.
66
homepage: https://github.com/byteverse/array-builder
@@ -24,6 +24,7 @@ library
2424
Data.Builder
2525
Data.Builder.Catenable
2626
Data.Builder.Catenable.Bytes
27+
Data.Builder.Catenable.ShortText
2728
Data.Builder.Catenable.Text
2829
Data.Builder.ST
2930

@@ -37,6 +38,7 @@ library
3738
, natural-arithmetic >=0.1.3 && <0.3
3839
, primitive >=0.6.4 && <0.10
3940
, text-short >=0.1.3 && <0.2
41+
, text >=2.1
4042

4143
hs-source-dirs: src
4244

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
{-# LANGUAGE BangPatterns #-}
2+
{-# LANGUAGE PatternSynonyms #-}
3+
4+
-- | @Data.Builder.Catenable@ specialized to @ShortText@.
5+
module Data.Builder.Catenable.ShortText
6+
( -- * Type
7+
Builder (..)
8+
9+
-- * Convenient infix operators
10+
, pattern (:<)
11+
, pattern (:>)
12+
13+
-- * Run
14+
, run
15+
16+
-- * Properties
17+
, length
18+
19+
-- * Create
20+
, shortText
21+
, char
22+
, word32Dec
23+
, word64Dec
24+
, int32Dec
25+
, int64Dec
26+
) where
27+
28+
import Prelude hiding (length)
29+
30+
import Control.Monad.ST (ST, runST)
31+
import Data.ByteString.Short.Internal (ShortByteString (SBS))
32+
import Data.Bytes.Chunks (Chunks (ChunksNil))
33+
import Data.Int (Int32, Int64)
34+
import Data.Primitive (ByteArray (ByteArray))
35+
import Data.String (IsString (fromString))
36+
import Data.Text.Short (ShortText)
37+
import Data.Word (Word32, Word64)
38+
39+
import qualified Arithmetic.Nat as Nat
40+
import qualified Data.Bytes.Builder as BB
41+
import qualified Data.Bytes.Builder.Bounded as Bounded
42+
import qualified Data.Bytes.Builder.Unsafe as BBU
43+
import qualified Data.Bytes.Chunks as Chunks
44+
import qualified Data.Text.Short as TS
45+
import qualified Data.Text.Short.Unsafe as TS
46+
47+
infixr 5 :<
48+
infixl 5 :>
49+
50+
data Builder
51+
= Empty
52+
| Cons !ShortText !Builder
53+
| Snoc !Builder !ShortText
54+
| Append !Builder !Builder
55+
56+
shortText :: ShortText -> Builder
57+
shortText !t = Cons t Empty
58+
59+
char :: Char -> Builder
60+
char !c = Cons (TS.singleton c) Empty
61+
62+
word32Dec :: Word32 -> Builder
63+
word32Dec !i = Cons (ba2st (Bounded.run Nat.constant (Bounded.word32Dec i))) Empty
64+
65+
word64Dec :: Word64 -> Builder
66+
word64Dec !i = Cons (ba2st (Bounded.run Nat.constant (Bounded.word64Dec i))) Empty
67+
68+
int32Dec :: Int32 -> Builder
69+
int32Dec !i = Cons (ba2st (Bounded.run Nat.constant (Bounded.int32Dec i))) Empty
70+
71+
int64Dec :: Int64 -> Builder
72+
int64Dec !i = Cons (ba2st (Bounded.run Nat.constant (Bounded.int64Dec i))) Empty
73+
74+
-- | Number of Unicode code points in the sequence.
75+
length :: Builder -> Int
76+
length b0 = case b0 of
77+
Empty -> 0
78+
Cons x b1 -> TS.length x + length b1
79+
Snoc b1 x -> TS.length x + length b1
80+
Append x y -> length x + length y
81+
82+
{- | Note: The choice of appending to the left side of @Empty@ instead
83+
of the right side of arbitrary. Under ordinary use, this difference
84+
cannot be observed by the user.
85+
-}
86+
instance IsString Builder where
87+
fromString t = Cons (TS.pack t) Empty
88+
89+
instance Monoid Builder where
90+
{-# INLINE mempty #-}
91+
mempty = Empty
92+
93+
instance Semigroup Builder where
94+
{-# INLINE (<>) #-}
95+
(<>) = Append
96+
97+
{- | Not structural equality. Converts builders to chunks and then
98+
compares the chunks.
99+
-}
100+
instance Eq Builder where
101+
a == b = run a == run b
102+
103+
instance Show Builder where
104+
show b = TS.unpack (ba2st (Chunks.concatU (run b)))
105+
106+
ba2st :: ByteArray -> ShortText
107+
{-# INLINE ba2st #-}
108+
ba2st (ByteArray x) = TS.fromShortByteStringUnsafe (SBS x)
109+
110+
pattern (:<) :: ShortText -> Builder -> Builder
111+
pattern (:<) x y = Cons x y
112+
113+
pattern (:>) :: Builder -> ShortText -> Builder
114+
pattern (:>) x y = Snoc x y
115+
116+
{- | The result is chunks, but this is guaranteed to be UTF-8 encoded
117+
text, so if needed, you can flatten out the chunks and convert back
118+
to @ShortText@.
119+
-}
120+
run :: Builder -> Chunks
121+
{-# NOINLINE run #-}
122+
run b = runST $ do
123+
bldr0 <- BBU.newBuilderState 128
124+
bldr1 <- pushCatenable bldr0 b
125+
BBU.reverseCommitsOntoChunks ChunksNil (BBU.closeBuilderState bldr1)
126+
127+
pushCatenable :: BBU.BuilderState s -> Builder -> ST s (BBU.BuilderState s)
128+
pushCatenable !bldr0 b = case b of
129+
Empty -> pure bldr0
130+
Cons x b1 -> do
131+
bldr1 <- BBU.pasteST (BB.shortTextUtf8 x) bldr0
132+
pushCatenable bldr1 b1
133+
Snoc b1 x -> do
134+
bldr1 <- pushCatenable bldr0 b1
135+
BBU.pasteST (BB.shortTextUtf8 x) bldr1
136+
Append x y -> do
137+
bldr1 <- pushCatenable bldr0 x
138+
pushCatenable bldr1 y

src/Data/Builder/Catenable/Text.hs

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ module Data.Builder.Catenable.Text
1818

1919
-- * Create
2020
, shortText
21+
, text
2122
, char
2223
, word32Dec
2324
, word64Dec
@@ -33,6 +34,7 @@ import Data.Bytes.Chunks (Chunks (ChunksNil))
3334
import Data.Int (Int32, Int64)
3435
import Data.Primitive (ByteArray (ByteArray))
3536
import Data.String (IsString (fromString))
37+
import Data.Text (Text)
3638
import Data.Text.Short (ShortText)
3739
import Data.Word (Word32, Word64)
3840

@@ -41,6 +43,7 @@ import qualified Data.Bytes.Builder as BB
4143
import qualified Data.Bytes.Builder.Bounded as Bounded
4244
import qualified Data.Bytes.Builder.Unsafe as BBU
4345
import qualified Data.Bytes.Chunks as Chunks
46+
import qualified Data.Text as T
4447
import qualified Data.Text.Short as TS
4548
import qualified Data.Text.Short.Unsafe as TS
4649

@@ -49,42 +52,45 @@ infixl 5 :>
4952

5053
data Builder
5154
= Empty
52-
| Cons !ShortText !Builder
53-
| Snoc !Builder !ShortText
55+
| Cons !Text !Builder
56+
| Snoc !Builder !Text
5457
| Append !Builder !Builder
5558

5659
shortText :: ShortText -> Builder
57-
shortText !t = Cons t Empty
60+
shortText !t = Cons (TS.toText t) Empty
61+
62+
text :: Text -> Builder
63+
text !t = Cons t Empty
5864

5965
char :: Char -> Builder
60-
char !c = Cons (TS.singleton c) Empty
66+
char !c = Cons (T.singleton c) Empty
6167

6268
word32Dec :: Word32 -> Builder
63-
word32Dec !i = Cons (ba2st (Bounded.run Nat.constant (Bounded.word32Dec i))) Empty
69+
word32Dec !i = Cons (TS.toText (ba2st (Bounded.run Nat.constant (Bounded.word32Dec i)))) Empty
6470

6571
word64Dec :: Word64 -> Builder
66-
word64Dec !i = Cons (ba2st (Bounded.run Nat.constant (Bounded.word64Dec i))) Empty
72+
word64Dec !i = Cons (TS.toText (ba2st (Bounded.run Nat.constant (Bounded.word64Dec i)))) Empty
6773

6874
int32Dec :: Int32 -> Builder
69-
int32Dec !i = Cons (ba2st (Bounded.run Nat.constant (Bounded.int32Dec i))) Empty
75+
int32Dec !i = Cons (TS.toText (ba2st (Bounded.run Nat.constant (Bounded.int32Dec i)))) Empty
7076

7177
int64Dec :: Int64 -> Builder
72-
int64Dec !i = Cons (ba2st (Bounded.run Nat.constant (Bounded.int64Dec i))) Empty
78+
int64Dec !i = Cons (TS.toText (ba2st (Bounded.run Nat.constant (Bounded.int64Dec i)))) Empty
7379

7480
-- | Number of Unicode code points in the sequence.
7581
length :: Builder -> Int
7682
length b0 = case b0 of
7783
Empty -> 0
78-
Cons x b1 -> TS.length x + length b1
79-
Snoc b1 x -> TS.length x + length b1
84+
Cons x b1 -> T.length x + length b1
85+
Snoc b1 x -> T.length x + length b1
8086
Append x y -> length x + length y
8187

8288
{- | Note: The choice of appending to the left side of @Empty@ instead
8389
of the right side of arbitrary. Under ordinary use, this difference
8490
cannot be observed by the user.
8591
-}
8692
instance IsString Builder where
87-
fromString t = Cons (TS.pack t) Empty
93+
fromString t = Cons (T.pack t) Empty
8894

8995
instance Monoid Builder where
9096
{-# INLINE mempty #-}
@@ -107,10 +113,10 @@ ba2st :: ByteArray -> ShortText
107113
{-# INLINE ba2st #-}
108114
ba2st (ByteArray x) = TS.fromShortByteStringUnsafe (SBS x)
109115

110-
pattern (:<) :: ShortText -> Builder -> Builder
116+
pattern (:<) :: Text -> Builder -> Builder
111117
pattern (:<) x y = Cons x y
112118

113-
pattern (:>) :: Builder -> ShortText -> Builder
119+
pattern (:>) :: Builder -> Text -> Builder
114120
pattern (:>) x y = Snoc x y
115121

116122
{- | The result is chunks, but this is guaranteed to be UTF-8 encoded
@@ -128,11 +134,11 @@ pushCatenable :: BBU.BuilderState s -> Builder -> ST s (BBU.BuilderState s)
128134
pushCatenable !bldr0 b = case b of
129135
Empty -> pure bldr0
130136
Cons x b1 -> do
131-
bldr1 <- BBU.pasteST (BB.shortTextUtf8 x) bldr0
137+
bldr1 <- BBU.pasteST (BB.textUtf8 x) bldr0
132138
pushCatenable bldr1 b1
133139
Snoc b1 x -> do
134140
bldr1 <- pushCatenable bldr0 b1
135-
BBU.pasteST (BB.shortTextUtf8 x) bldr1
141+
BBU.pasteST (BB.textUtf8 x) bldr1
136142
Append x y -> do
137143
bldr1 <- pushCatenable bldr0 x
138144
pushCatenable bldr1 y

0 commit comments

Comments
 (0)