@@ -176,7 +176,8 @@ def from_pretrained(
176176        )
177177
178178        # set of all quantization schemes 
179-         quantization_schemes : Set [QuantizationScheme ] =  set ()
179+         # TODO: make quant config/scheme/args frozen/hashable and use a set 
180+         quantization_schemes : List [QuantizationScheme ] =  list ()
180181
181182        # use any status from modules (in practice, use the last module) 
182183        model_status  =  None 
@@ -198,8 +199,9 @@ def from_pretrained(
198199            if  is_module_quantized (submodule ):
199200                # add to running set of schemes/layer_type_names 
200201                model_status  =  getattr (submodule , "quantization_status" , model_status )
201-                 quantization_schemes .add (submodule .quantization_scheme )
202202                quantization_type_names .add (layer_type )
203+                 if  submodule .quantization_scheme  not  in quantization_schemes :
204+                     quantization_schemes .append (submodule .quantization_scheme )
203205
204206                # attention quantization implies kv cache quantization 
205207                if  is_attention_module (submodule ):
@@ -225,7 +227,7 @@ def from_pretrained(
225227
226228        # create config groups from all unique schemes 
227229        config_groups  =  {}
228-         for  idx , scheme  in  enumerate (list ( quantization_schemes ) ):
230+         for  idx , scheme  in  enumerate (quantization_schemes ):
229231            group_name  =  "group_"  +  str (idx )
230232            config_groups [group_name ] =  scheme 
231233
0 commit comments