@@ -94,34 +94,34 @@ def smiles2mol(
9494
9595 Functional usage example:
9696
97- >>> import pandas as pd
98- >>> import janitor.chemistry
97+ >>> import pandas as pd
98+ >>> import janitor.chemistry
9999
100- >>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
100+ >>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
101101
102- >>> df = janitor.chemistry.smiles2mol(
103- ... df=df,
104- ... smiles_column_name='smiles',
105- ... mols_column_name='mols'
106- ... )
102+ >>> df = janitor.chemistry.smiles2mol(
103+ ... df=df,
104+ ... smiles_column_name='smiles',
105+ ... mols_column_name='mols'
106+ ... )
107107
108- >>> df.mols[0].GetNumAtoms(), df.mols[0].GetNumBonds()
109- (3, 2)
110- >>> df.mols[1].GetNumAtoms(), df.mols[1].GetNumBonds()
111- (5, 4)
108+ >>> df.mols[0].GetNumAtoms(), df.mols[0].GetNumBonds()
109+ (3, 2)
110+ >>> df.mols[1].GetNumAtoms(), df.mols[1].GetNumBonds()
111+ (5, 4)
112112
113113 Method chaining usage example:
114114
115- >>> import pandas as pd
116- >>> import janitor.chemistry
115+ >>> import pandas as pd
116+ >>> import janitor.chemistry
117117
118- >>> df = df.smiles2mol(
119- ... smiles_column_name='smiles',
120- ... mols_column_name='rdkmol'
121- ... )
118+ >>> df = df.smiles2mol(
119+ ... smiles_column_name='smiles',
120+ ... mols_column_name='rdkmol'
121+ ... )
122122
123- >>> df.rdkmol[0].GetNumAtoms(), df.rdkmol[0].GetNumBonds()
124- (3, 2)
123+ >>> df.rdkmol[0].GetNumAtoms(), df.rdkmol[0].GetNumBonds()
124+ (3, 2)
125125
126126 A progressbar can be optionally used.
127127
@@ -184,78 +184,78 @@ def morgan_fingerprint(
184184
185185 Functional usage example:
186186
187- >>> import pandas as pd
188- >>> import janitor.chemistry
187+ >>> import pandas as pd
188+ >>> import janitor.chemistry
189189
190- >>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
190+ >>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
191191
192- # For "counts" kind
193- >>> morgans = janitor.chemistry.morgan_fingerprint(
194- ... df=df.smiles2mol('smiles', 'mols'),
195- ... mols_column_name='mols',
196- ... radius=3, # Defaults to 3
197- ... nbits=2048, # Defaults to 2048
198- ... kind='counts' # Defaults to "counts"
199- ... )
192+ # For "counts" kind
193+ >>> morgans = janitor.chemistry.morgan_fingerprint(
194+ ... df=df.smiles2mol('smiles', 'mols'),
195+ ... mols_column_name='mols',
196+ ... radius=3, # Defaults to 3
197+ ... nbits=2048, # Defaults to 2048
198+ ... kind='counts' # Defaults to "counts"
199+ ... )
200200
201- >>> set(morgans.iloc[0])
202- {0.0, 1.0, 2.0}
201+ >>> set(morgans.iloc[0])
202+ {0.0, 1.0, 2.0}
203203
204- # For "bits" kind
205- >>> morgans = janitor.chemistry.morgan_fingerprint(
206- ... df=df.smiles2mol('smiles', 'mols'),
207- ... mols_column_name='mols',
208- ... radius=3, # Defaults to 3
209- ... nbits=2048, # Defaults to 2048
210- ... kind='bits' # Defaults to "counts"
211- ... )
204+ # For "bits" kind
205+ >>> morgans = janitor.chemistry.morgan_fingerprint(
206+ ... df=df.smiles2mol('smiles', 'mols'),
207+ ... mols_column_name='mols',
208+ ... radius=3, # Defaults to 3
209+ ... nbits=2048, # Defaults to 2048
210+ ... kind='bits' # Defaults to "counts"
211+ ... )
212212
213- >>> set(morgans.iloc[0])
214- {0.0, 1.0}
213+ >>> set(morgans.iloc[0])
214+ {0.0, 1.0}
215215
216216 Method chaining usage example:
217217
218- >>> import pandas as pd
219- >>> import janitor.chemistry
220-
221- >>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
222-
223- # For "counts" kind
224- >>> morgans = (
225- ... df.smiles2mol('smiles', 'mols')
226- ... .morgan_fingerprint(
227- ... mols_column_name='mols',
228- ... radius=3, # Defaults to 3
229- ... nbits=2048, # Defaults to 2048
230- ... kind='counts' # Defaults to "counts"
231- ... )
232- ... )
233-
234- >>> set(morgans.iloc[0])
235- {0.0, 1.0, 2.0}
236-
237- # For "bits" kind
238- >>> morgans = (
239- ... df
240- ... .smiles2mol('smiles', 'mols')
241- ... .morgan_fingerprint(
242- ... mols_column_name='mols',
243- ... radius=3, # Defaults to 3
244- ... nbits=2048, # Defaults to 2048
245- ... kind='bits' # Defaults to "counts"
246- ... )
247- ... )
248-
249- >>> set(morgans.iloc[0])
250- {0.0, 1.0}
218+ >>> import pandas as pd
219+ >>> import janitor.chemistry
220+
221+ >>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
222+
223+ # For "counts" kind
224+ >>> morgans = (
225+ ... df.smiles2mol('smiles', 'mols')
226+ ... .morgan_fingerprint(
227+ ... mols_column_name='mols',
228+ ... radius=3, # Defaults to 3
229+ ... nbits=2048, # Defaults to 2048
230+ ... kind='counts' # Defaults to "counts"
231+ ... )
232+ ... )
233+
234+ >>> set(morgans.iloc[0])
235+ {0.0, 1.0, 2.0}
236+
237+ # For "bits" kind
238+ >>> morgans = (
239+ ... df
240+ ... .smiles2mol('smiles', 'mols')
241+ ... .morgan_fingerprint(
242+ ... mols_column_name='mols',
243+ ... radius=3, # Defaults to 3
244+ ... nbits=2048, # Defaults to 2048
245+ ... kind='bits' # Defaults to "counts"
246+ ... )
247+ ... )
248+
249+ >>> set(morgans.iloc[0])
250+ {0.0, 1.0}
251251
252252 If you wish to join the morgan fingerprints back into the original
253253 dataframe, this can be accomplished by doing a `join`,
254254 because the indices are preserved:
255255
256- >>> joined = df.join(morgans)
257- >>> len(joined.columns)
258- 2050
256+ >>> joined = df.join(morgans)
257+ >>> len(joined.columns)
258+ 2050
259259
260260 :param df: A pandas DataFrame.
261261 :param mols_column_name: The name of the column that has the RDKIT
@@ -324,47 +324,47 @@ def molecular_descriptors(
324324
325325 Functional usage example:
326326
327- >>> import pandas as pd
328- >>> import janitor.chemistry
327+ >>> import pandas as pd
328+ >>> import janitor.chemistry
329329
330- >>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
330+ >>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
331331
332- >>> mol_desc = (
333- ... janitor.chemistry.molecular_descriptors(
334- ... df=df.smiles2mol('smiles', 'mols'),
335- ... mols_column_name='mols'
336- ... )
337- ... )
332+ >>> mol_desc = (
333+ ... janitor.chemistry.molecular_descriptors(
334+ ... df=df.smiles2mol('smiles', 'mols'),
335+ ... mols_column_name='mols'
336+ ... )
337+ ... )
338338
339- >>> mol_desc.TPSA
340- 0 34.14
341- 1 37.30
342- Name: TPSA, dtype: float64
339+ >>> mol_desc.TPSA
340+ 0 34.14
341+ 1 37.30
342+ Name: TPSA, dtype: float64
343343
344344 Method chaining usage example:
345345
346- >>> import pandas as pd
347- >>> import janitor.chemistry
346+ >>> import pandas as pd
347+ >>> import janitor.chemistry
348348
349- >>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
349+ >>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
350350
351- >>> mol_desc = (
352- ... df.smiles2mol('smiles', 'mols')
353- ... .molecular_descriptors(mols_column_name='mols')
354- ... )
351+ >>> mol_desc = (
352+ ... df.smiles2mol('smiles', 'mols')
353+ ... .molecular_descriptors(mols_column_name='mols')
354+ ... )
355355
356- >>> mol_desc.TPSA
357- 0 34.14
358- 1 37.30
359- Name: TPSA, dtype: float64
356+ >>> mol_desc.TPSA
357+ 0 34.14
358+ 1 37.30
359+ Name: TPSA, dtype: float64
360360
361361 If you wish to join the molecular descriptors back into the original
362362 dataframe, this can be accomplished by doing a `join`,
363363 because the indices are preserved:
364364
365- >>> joined = df.join(mol_desc)
366- >>> len(joined.columns)
367- 41
365+ >>> joined = df.join(mol_desc)
366+ >>> len(joined.columns)
367+ 41
368368
369369 :param df: A pandas DataFrame.
370370 :param mols_column_name: The name of the column that has the RDKIT mol
@@ -435,33 +435,33 @@ def maccs_keys_fingerprint(
435435
436436 Functional usage example:
437437
438- >>> import pandas as pd
439- >>> import janitor.chemistry
438+ >>> import pandas as pd
439+ >>> import janitor.chemistry
440440
441- >>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
441+ >>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
442442
443- >>> maccs = janitor.chemistry.maccs_keys_fingerprint(
444- ... df=df.smiles2mol('smiles', 'mols'),
445- ... mols_column_name='mols'
446- ... )
443+ >>> maccs = janitor.chemistry.maccs_keys_fingerprint(
444+ ... df=df.smiles2mol('smiles', 'mols'),
445+ ... mols_column_name='mols'
446+ ... )
447447
448- >>> len(maccs.columns)
449- 167
448+ >>> len(maccs.columns)
449+ 167
450450
451451 Method chaining usage example:
452452
453- >>> import pandas as pd
454- >>> import janitor.chemistry
453+ >>> import pandas as pd
454+ >>> import janitor.chemistry
455455
456- >>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
456+ >>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
457457
458- >>> maccs = (
459- ... df.smiles2mol('smiles', 'mols')
460- ... .maccs_keys_fingerprint(mols_column_name='mols')
461- ... )
458+ >>> maccs = (
459+ ... df.smiles2mol('smiles', 'mols')
460+ ... .maccs_keys_fingerprint(mols_column_name='mols')
461+ ... )
462462
463- >>> len(maccs.columns)
464- 167
463+ >>> len(maccs.columns)
464+ 167
465465
466466 If you wish to join the maccs keys fingerprints back into the
467467 original dataframe, this can be accomplished by doing a `join`,
0 commit comments