11"""
2- convnextblock(planes, drop_path_rate = 0., λ = 1f -6)
2+ convnextblock(planes::Integer , drop_path_rate = 0.0, layerscale_init = 1.0f -6)
33
44Creates a single block of ConvNeXt.
55([reference](https://arxiv.org/abs/2201.03545))
@@ -8,61 +8,64 @@ Creates a single block of ConvNeXt.
88
99 - `planes`: number of input channels.
1010 - `drop_path_rate`: Stochastic depth rate.
11- - `λ `: Initial value for [`LayerScale`](#)
11+ - `layerscale_init `: Initial value for [`LayerScale`](#)
1212"""
13- function convnextblock (planes, drop_path_rate = 0.0 , λ = 1.0f-6 )
13+ function convnextblock (planes:: Integer , drop_path_rate = 0.0 , layerscale_init = 1.0f-6 )
1414 layers = SkipConnection (Chain (DepthwiseConv ((7 , 7 ), planes => planes; pad = 3 ),
1515 swapdims ((3 , 1 , 2 , 4 )),
1616 LayerNorm (planes; ϵ = 1.0f-6 ),
1717 mlp_block (planes, 4 * planes),
18- LayerScale (planes, λ ),
18+ LayerScale (planes, layerscale_init ),
1919 swapdims ((2 , 3 , 1 , 4 )),
2020 DropPath (drop_path_rate)), + )
2121 return layers
2222end
2323
2424"""
25- convnext(depths, planes; inchannels = 3, drop_path_rate = 0., λ = 1f-6, nclasses = 1000)
25+ convnext(depths::AbstractVector{<:Integer}, planes::AbstractVector{<:Integer};
26+ drop_path_rate = 0.0, layerscale_init = 1.0f-6, inchannels::Integer = 3,
27+ nclasses::Integer = 1000)
2628
2729Creates the layers for a ConvNeXt model.
2830([reference](https://arxiv.org/abs/2201.03545))
2931
3032# Arguments
3133
32- - `inchannels`: number of input channels.
3334 - `depths`: list with configuration for depth of each block
3435 - `planes`: list with configuration for number of output channels in each block
3536 - `drop_path_rate`: Stochastic depth rate.
36- - `λ `: Initial value for [`LayerScale`](#)
37+ - `layerscale_init `: Initial value for [`LayerScale`](#)
3738 ([reference](https://arxiv.org/abs/2103.17239))
39+ - `inchannels`: number of input channels.
3840 - `nclasses`: number of output classes
3941"""
40- function convnext (depths, planes; inchannels = 3 , drop_path_rate = 0.0 , λ = 1.0f-6 ,
41- nclasses = 1000 )
42+ function convnext (depths:: AbstractVector{<:Integer} , planes:: AbstractVector{<:Integer} ;
43+ drop_path_rate = 0.0 , layerscale_init = 1.0f-6 , inchannels:: Integer = 3 ,
44+ nclasses:: Integer = 1000 )
4245 @assert length (depths) == length (planes)
4346 " `planes` should have exactly one value for each block"
4447 downsample_layers = []
45- stem = Chain ( Conv (( 4 , 4 ), inchannels => planes[ 1 ]; stride = 4 ) ,
46- ChannelLayerNorm ( planes[1 ]))
47- push! (downsample_layers, stem )
48+ push! (downsample_layers ,
49+ Chain ( conv_norm (( 4 , 4 ), inchannels => planes[1 ]; stride = 4 ,
50+ norm_layer = ChannelLayerNorm) ... ) )
4851 for m in 1 : (length (depths) - 1 )
49- downsample_layer = Chain ( ChannelLayerNorm (planes[m]) ,
50- Conv (( 2 , 2 ), planes[m] => planes[m + 1 ]; stride = 2 ))
51- push! (downsample_layers, downsample_layer )
52+ push! (downsample_layers ,
53+ Chain ( conv_norm (( 2 , 2 ), planes[m] => planes[m + 1 ]; stride = 2 ,
54+ norm_layer = ChannelLayerNorm, revnorm = true ) ... ) )
5255 end
5356 stages = []
5457 dp_rates = linear_scheduler (drop_path_rate; depth = sum (depths))
5558 cur = 0
5659 for i in eachindex (depths)
57- push! (stages, [convnextblock (planes[i], dp_rates[cur + j], λ) for j in 1 : depths[i]])
60+ push! (stages,
61+ [convnextblock (planes[i], dp_rates[cur + j], layerscale_init)
62+ for j in 1 : depths[i]])
5863 cur += depths[i]
5964 end
6065 backbone = collect (Iterators. flatten (Iterators. flatten (zip (downsample_layers, stages))))
61- head = Chain (GlobalMeanPool (),
62- MLUtils. flatten,
63- LayerNorm (planes[end ]),
64- Dense (planes[end ], nclasses))
65- return Chain (Chain (backbone), head)
66+ classifier = Chain (GlobalMeanPool (), MLUtils. flatten,
67+ LayerNorm (planes[end ]), Dense (planes[end ], nclasses))
68+ return Chain (Chain (backbone... ), classifier)
6669end
6770
6871# Configurations for ConvNeXt models
@@ -72,30 +75,28 @@ const CONVNEXT_CONFIGS = Dict(:tiny => ([3, 3, 9, 3], [96, 192, 384, 768]),
7275 :large => ([3 , 3 , 27 , 3 ], [192 , 384 , 768 , 1536 ]),
7376 :xlarge => ([3 , 3 , 27 , 3 ], [256 , 512 , 1024 , 2048 ]))
7477
75- struct ConvNeXt
76- layers:: Any
77- end
78- @functor ConvNeXt
79-
8078"""
81- ConvNeXt(mode ::Symbol = :base ; inchannels = 3, drop_path_rate = 0., λ = 1f-6, nclasses = 1000)
79+ ConvNeXt(config ::Symbol; inchannels::Integer = 3, nclasses::Integer = 1000)
8280
8381Creates a ConvNeXt model.
8482([reference](https://arxiv.org/abs/2201.03545))
8583
8684# Arguments
8785
86+ - `config`: The size of the model, one of `tiny`, `small`, `base`, `large` or `xlarge`.
8887 - `inchannels`: The number of channels in the input.
89- - `drop_path_rate`: Stochastic depth rate.
90- - `λ`: Init value for [LayerScale](https://arxiv.org/abs/2103.17239)
9188 - `nclasses`: number of output classes
9289
9390See also [`Metalhead.convnext`](#).
9491"""
95- function ConvNeXt (mode:: Symbol = :base ; inchannels = 3 , drop_path_rate = 0.0 , λ = 1.0f-6 ,
96- nclasses = 1000 )
97- _checkconfig (mode, keys (CONVNEXT_CONFIGS))
98- layers = convnext (CONVNEXT_CONFIGS[mode]. .. ; inchannels, drop_path_rate, λ, nclasses)
92+ struct ConvNeXt
93+ layers:: Any
94+ end
95+ @functor ConvNeXt
96+
97+ function ConvNeXt (config:: Symbol ; inchannels:: Integer = 3 , nclasses:: Integer = 1000 )
98+ _checkconfig (config, keys (CONVNEXT_CONFIGS))
99+ layers = convnext (CONVNEXT_CONFIGS[config]. .. ; inchannels, nclasses)
99100 return ConvNeXt (layers)
100101end
101102
0 commit comments