RDKit
Open-source cheminformatics and machine learning.
MolOps.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2001-2021 Greg Landrum and Rational Discovery LLC
3 // Copyright (c) 2014, Novartis Institutes for BioMedical Research Inc.
4 //
5 // @@ All Rights Reserved @@
6 // This file is part of the RDKit.
7 // The contents are covered by the terms of the BSD license
8 // which is included in the file license.txt, found at the root
9 // of the RDKit source tree.
10 //
11 #include <RDGeneral/export.h>
12 #ifndef _RD_MOL_OPS_H_
13 #define _RD_MOL_OPS_H_
14 
15 #include <vector>
16 #include <map>
17 #include <list>
19 #include <boost/smart_ptr.hpp>
20 #include <boost/dynamic_bitset.hpp>
22 #include <RDGeneral/types.h>
23 #include "SanitException.h"
24 
25 RDKIT_GRAPHMOL_EXPORT extern const int ci_LOCAL_INF;
26 namespace RDKit {
27 class ROMol;
28 class RWMol;
29 class Atom;
30 class Bond;
31 class Conformer;
32 typedef std::vector<double> INVAR_VECT;
33 typedef INVAR_VECT::iterator INVAR_VECT_I;
34 typedef INVAR_VECT::const_iterator INVAR_VECT_CI;
35 
36 //! \brief Groups a variety of molecular query and transformation operations.
37 namespace MolOps {
38 
39 //! return the number of electrons available on an atom to donate for
40 /// aromaticity
41 /*!
42  The result is determined using the default valency, number of lone pairs,
43  number of bonds and the formal charge. Note that the atom may not donate
44  all of these electrons to a ring for aromaticity (also used in Conjugation
45  and hybridization code).
46 
47  \param at the atom of interest
48 
49  \return the number of electrons
50 */
52 
53 //! sums up all atomic formal charges and returns the result
55 
56 //! returns whether or not the given Atom is involved in a conjugated bond
58 
59 //! find fragments (disconnected components of the molecular graph)
60 /*!
61 
62  \param mol the molecule of interest
63  \param mapping used to return the mapping of Atoms->fragments.
64  On return \c mapping will be <tt>mol->getNumAtoms()</tt> long
65  and will contain the fragment assignment for each Atom
66 
67  \return the number of fragments found.
68 
69 */
70 RDKIT_GRAPHMOL_EXPORT unsigned int getMolFrags(const ROMol &mol,
71  std::vector<int> &mapping);
72 //! find fragments (disconnected components of the molecular graph)
73 /*!
74 
75  \param mol the molecule of interest
76  \param frags used to return the Atoms in each fragment
77  On return \c mapping will be \c numFrags long, and each entry
78  will contain the indices of the Atoms in that fragment.
79 
80  \return the number of fragments found.
81 
82 */
84  const ROMol &mol, std::vector<std::vector<int>> &frags);
85 
86 //! splits a molecule into its component fragments
87 /// (disconnected components of the molecular graph)
88 /*!
89 
90  \param mol the molecule of interest
91  \param sanitizeFrags toggles sanitization of the fragments after
92  they are built
93  \param frags used to return the mapping of Atoms->fragments.
94  if provided, \c frags will be <tt>mol->getNumAtoms()</tt> long
95  on return and will contain the fragment assignment for each Atom
96  \param fragsMolAtomMapping used to return the Atoms in each fragment
97  On return \c mapping will be \c numFrags long, and each entry
98  will contain the indices of the Atoms in that fragment.
99  \param copyConformers toggles copying conformers of the fragments after
100  they are built
101  \return a vector of the fragments as smart pointers to ROMols
102 
103 */
104 RDKIT_GRAPHMOL_EXPORT std::vector<boost::shared_ptr<ROMol>> getMolFrags(
105  const ROMol &mol, bool sanitizeFrags = true,
106  std::vector<int> *frags = nullptr,
107  std::vector<std::vector<int>> *fragsMolAtomMapping = nullptr,
108  bool copyConformers = true);
109 
110 //! splits a molecule into pieces based on labels assigned using a query
111 /*!
112 
113  \param mol the molecule of interest
114  \param query the query used to "label" the molecule for fragmentation
115  \param sanitizeFrags toggles sanitization of the fragments after
116  they are built
117  \param whiteList if provided, only labels in the list will be kept
118  \param negateList if true, the white list logic will be inverted: only labels
119  not in the list will be kept
120 
121  \return a map of the fragments and their labels
122 
123 */
124 template <typename T>
125 RDKIT_GRAPHMOL_EXPORT std::map<T, boost::shared_ptr<ROMol>>
126 getMolFragsWithQuery(const ROMol &mol, T (*query)(const ROMol &, const Atom *),
127  bool sanitizeFrags = true,
128  const std::vector<T> *whiteList = nullptr,
129  bool negateList = false);
130 
131 #if 0
132  //! finds a molecule's minimum spanning tree (MST)
133  /*!
134  \param mol the molecule of interest
135  \param mst used to return the MST as a vector of bond indices
136  */
137  RDKIT_GRAPHMOL_EXPORT void findSpanningTree(const ROMol &mol,std::vector<int> &mst);
138 #endif
139 
140 //! DEPRECATED calculates Balaban's J index for the molecule
141 /*!
142  \param mol the molecule of interest
143  \param useBO toggles inclusion of the bond order in the calculation
144  (when false, we're not really calculating the J value)
145  \param force forces the calculation (instead of using cached results)
146  \param bondPath when included, only paths using bonds whose indices occur
147  in this vector will be included in the calculation
148  \param cacheIt If this is true, the calculated value will be cached
149  as a property on the molecule
150  \return the J index
151 
152 */
154  const ROMol &mol, bool useBO = true, bool force = false,
155  const std::vector<int> *bondPath = nullptr, bool cacheIt = true);
156 //! DEPRECATED \overload
157 RDKIT_GRAPHMOL_EXPORT double computeBalabanJ(double *distMat, int nb, int nAts);
158 
159 //! \name Dealing with hydrogens
160 //{@
161 
162 //! returns a copy of a molecule with hydrogens added in as explicit Atoms
163 /*!
164  \param mol the molecule to add Hs to
165  \param explicitOnly (optional) if this \c true, only explicit Hs will be
166  added
167  \param addCoords (optional) If this is true, estimates for the atomic
168  coordinates
169  of the added Hs will be used.
170  \param onlyOnAtoms (optional) if provided, this should be a vector of
171  IDs of the atoms that will be considered for H addition.
172  \param addResidueInfo (optional) if this is true, add residue info to
173  hydrogen atoms (useful for PDB files).
174 
175  \return the new molecule
176 
177  <b>Notes:</b>
178  - it makes no sense to use the \c addCoords option if the molecule's
179  heavy
180  atoms don't already have coordinates.
181  - the caller is responsible for <tt>delete</tt>ing the pointer this
182  returns.
183  */
184 RDKIT_GRAPHMOL_EXPORT ROMol *addHs(const ROMol &mol, bool explicitOnly = false,
185  bool addCoords = false,
186  const UINT_VECT *onlyOnAtoms = nullptr,
187  bool addResidueInfo = false);
188 //! \overload
189 /// modifies the molecule in place
190 RDKIT_GRAPHMOL_EXPORT void addHs(RWMol &mol, bool explicitOnly = false,
191  bool addCoords = false,
192  const UINT_VECT *onlyOnAtoms = nullptr,
193  bool addResidueInfo = false);
194 
195 //! Sets Cartesian coordinates for a terminal atom.
196 //! Useful for growing an atom off a molecule with sensible
197 //! coordinates based on the geometry of the neighbor.
198 /*!
199  NOTE: this sets appropriate coordinates in all of the molecule's conformers.
200  \param mol the molecule the atoms belong to
201  \param idx index of the terminal atom whose coordinates are set
202  \param otherIdx index of the bonded neighbor atom
203 */
204 
206  unsigned int otherIdx);
207 
208 //! returns a copy of a molecule with hydrogens removed
209 /*!
210  \param mol the molecule to remove Hs from
211  \param implicitOnly (optional) if this \c true, only implicit Hs will be
212  removed
213  \param updateExplicitCount (optional) If this is \c true, when explicit Hs
214  are removed
215  from the graph, the heavy atom to which they are bound will have its
216  counter of
217  explicit Hs increased.
218  \param sanitize: (optional) If this is \c true, the final molecule will be
219  sanitized
220 
221  \return the new molecule
222 
223  <b>Notes:</b>
224  - Hydrogens which aren't connected to a heavy atom will not be
225  removed. This prevents molecules like <tt>"[H][H]"</tt> from having
226  all atoms removed.
227  - Labelled hydrogen (e.g. atoms with atomic number=1, but mass > 1),
228  will not be removed.
229  - two coordinate Hs, like the central H in C[H-]C, will not be removed
230  - Hs connected to dummy atoms will not be removed
231  - Hs that are part of the definition of double bond Stereochemistry
232  will not be removed
233  - Hs that are not connected to anything else will not be removed
234  - Hs that have a query defined (i.e. hasQuery() returns true) will not
235  be removed
236 
237  - the caller is responsible for <tt>delete</tt>ing the pointer this
238  returns.
239 */
240 
242  bool implicitOnly = false,
243  bool updateExplicitCount = false,
244  bool sanitize = true);
245 //! \overload
246 /// modifies the molecule in place
247 RDKIT_GRAPHMOL_EXPORT void removeHs(RWMol &mol, bool implicitOnly = false,
248  bool updateExplicitCount = false,
249  bool sanitize = true);
251  bool removeDegreeZero = false; /**< hydrogens that have no bonds */
252  bool removeHigherDegrees = false; /**< hydrogens with two (or more) bonds */
253  bool removeOnlyHNeighbors =
254  false; /**< hydrogens with bonds only to other hydrogens */
255  bool removeIsotopes = false; /**< hydrogens with non-default isotopes */
256  bool removeAndTrackIsotopes = false; /**< removes hydrogens with non-default
257  isotopes and keeps track of the heavy atom the isotopes were attached to in
258  the private _isotopicHs atom property, so they are re-added by AddHs() as the
259  original isotopes if possible*/
260  bool removeDummyNeighbors =
261  false; /**< hydrogens with at least one dummy-atom neighbor */
262  bool removeDefiningBondStereo =
263  false; /**< hydrogens defining bond stereochemistry */
264  bool removeWithWedgedBond = true; /**< hydrogens with wedged bonds to them */
265  bool removeWithQuery = false; /**< hydrogens with queries defined */
266  bool removeMapped = true; /**< mapped hydrogens */
267  bool removeInSGroups = false; /**< part of a SubstanceGroup */
268  bool showWarnings = true; /**< display warnings for Hs that are not removed */
269  bool removeNonimplicit = true; /**< DEPRECATED equivalent of !implicitOnly */
270  bool updateExplicitCount =
271  false; /**< DEPRECATED equivalent of updateExplicitCount */
272  bool removeHydrides = true; /**< Removing Hydrides */
273 };
274 //! \overload
275 /// modifies the molecule in place
277  bool sanitize = true);
278 //! \overload
279 /// The caller owns the pointer this returns
281  const RemoveHsParameters &ps,
282  bool sanitize = true);
283 
284 //! removes all Hs from a molecule
285 RDKIT_GRAPHMOL_EXPORT void removeAllHs(RWMol &mol, bool sanitize = true);
286 //! \overload
287 /// The caller owns the pointer this returns
289  bool sanitize = true);
290 
291 //! returns a copy of a molecule with hydrogens removed and added as queries
292 //! to the heavy atoms to which they are bound.
293 /*!
294  This is really intended to be used with molecules that contain QueryAtoms
295 
296  \param mol the molecule to remove Hs from
297 
298  \return the new molecule
299 
300  <b>Notes:</b>
301  - Atoms that do not already have hydrogen count queries will have one
302  added, other H-related queries will not be touched. Examples:
303  - C[H] -> [C;!H0]
304  - [C;H1][H] -> [C;H1]
305  - [C;H2][H] -> [C;H2]
306  - Hydrogens which aren't connected to a heavy atom will not be
307  removed. This prevents molecules like <tt>"[H][H]"</tt> from having
308  all atoms removed.
309  - the caller is responsible for <tt>delete</tt>ing the pointer this
310  returns.
311  - By default all hydrogens are removed, however if
312  mergeUnmappedOnly is true, any hydrogen participating
313  in an atom map will be retained
314 
315 */
317  bool mergeUnmappedOnly = false);
318 //! \overload
319 /// modifies the molecule in place
321  bool mergeUnmappedOnly = false);
322 
323 typedef enum {
330  ADJUST_IGNOREALL = 0xFFFFFFF
332 
333 //! Parameters controlling the behavior of MolOps::adjustQueryProperties
334 /*!
335 
336  Note that some of the options here are either directly contradictory or make
337  no sense when combined with each other. We generally assume that client code
338  is doing something sensible and don't attempt to detect possible conflicts or
339  problems.
340 
341 */
343  bool adjustDegree = true; /**< add degree queries */
344  std::uint32_t adjustDegreeFlags = ADJUST_IGNOREDUMMIES | ADJUST_IGNORECHAINS;
345 
346  bool adjustRingCount = false; /**< add ring-count queries */
347  std::uint32_t adjustRingCountFlags =
349 
350  bool makeDummiesQueries = true; /**< convert dummy atoms without isotope
351  labels to any-atom queries */
352 
353  bool aromatizeIfPossible = true; /**< perceive and set aromaticity */
354 
355  bool makeBondsGeneric =
356  false; /**< convert bonds to generic queries (any bonds) */
357  std::uint32_t makeBondsGenericFlags = ADJUST_IGNORENONE;
358 
359  bool makeAtomsGeneric =
360  false; /**< convert atoms to generic queries (any atoms) */
361  std::uint32_t makeAtomsGenericFlags = ADJUST_IGNORENONE;
362 
363  bool adjustHeavyDegree = false; /**< adjust the heavy-atom degree instead of
364  overall degree */
365  std::uint32_t adjustHeavyDegreeFlags =
367 
368  bool adjustRingChain = false; /**< add ring-chain queries */
369  std::uint32_t adjustRingChainFlags = ADJUST_IGNORENONE;
370 
371  bool useStereoCareForBonds =
372  false; /**< remove stereochemistry info from double bonds that do not have
373  the stereoCare property set */
374 
375  bool adjustConjugatedFiveRings =
376  false; /**< sets bond queries in conjugated five-rings to
377  SINGLE|DOUBLE|AROMATIC */
378 
379  bool setMDLFiveRingAromaticity =
380  false; /**< uses the 5-ring aromaticity behavior of the (former) MDL
381  software as documented in the Chemical Representation Guide */
382 
383  bool adjustSingleBondsToDegreeOneNeighbors =
384  false; /**< sets single bonds between aromatic atoms and degree one
385  neighbors to SINGLE|AROMATIC */
386 
387  bool adjustSingleBondsBetweenAromaticAtoms =
388  false; /**< sets non-ring single bonds between two aromatic atoms to
389  SINGLE|AROMATIC */
390  //! \brief returns an AdjustQueryParameters object with all adjustments
391  //! disabled
394  res.adjustDegree = false;
395  res.makeDummiesQueries = false;
396  res.aromatizeIfPossible = false;
397  return res;
398  }
400 };
401 
402 //! updates an AdjustQueryParameters object from a JSON string
404  MolOps::AdjustQueryParameters &p, const std::string &json);
405 
406 //! returns a copy of a molecule with query properties adjusted
407 /*!
408  \param mol the molecule to adjust
409  \param params controls the adjustments made
410 
411  \return the new molecule, the caller owns the memory
412 */
414  const ROMol &mol, const AdjustQueryParameters *params = nullptr);
415 //! \overload
416 /// modifies the molecule in place
418  RWMol &mol, const AdjustQueryParameters *params = nullptr);
419 
420 //! returns a copy of a molecule with the atoms renumbered
421 /*!
422 
423  \param mol the molecule to work with
424  \param newOrder the new ordering of the atoms (should be numAtoms long)
425  for example: if newOrder is [3,2,0,1], then atom 3 in the original
426  molecule will be atom 0 in the new one
427 
428  \return the new molecule
429 
430  <b>Notes:</b>
431  - the caller is responsible for <tt>delete</tt>ing the pointer this
432  returns.
433 
434 */
436  const ROMol &mol, const std::vector<unsigned int> &newOrder);
437 
438 //@}
439 
440 //! \name Sanitization
441 /// {
442 
443 typedef enum {
455  SANITIZE_ALL = 0xFFFFFFF
457 
458 //! \brief carries out a collection of tasks for cleaning up a molecule and
459 /// ensuring
460 //! that it makes "chemical sense"
461 /*!
462  This functions calls the following in sequence
463  -# MolOps::cleanUp()
464  -# mol.updatePropertyCache()
465  -# MolOps::symmetrizeSSSR()
466  -# MolOps::Kekulize()
467  -# MolOps::assignRadicals()
468  -# MolOps::setAromaticity()
469  -# MolOps::setConjugation()
470  -# MolOps::setHybridization()
471  -# MolOps::cleanupChirality()
472  -# MolOps::adjustHs()
473 
474  \param mol : the RWMol to be cleaned
475 
476  \param operationThatFailed : the first (if any) sanitization operation that
477  fails is set here.
478  The values are taken from the \c SanitizeFlags
479  enum. On success, the value is \c
480  SanitizeFlags::SANITIZE_NONE
481 
482  \param sanitizeOps : the bits here are used to set which sanitization
483  operations are carried out. The elements of the \c
484  SanitizeFlags enum define the operations.
485 
486  <b>Notes:</b>
487  - If there is a failure in the sanitization, a \c MolSanitizeException
488  will be thrown.
489  - in general the user of this function should cast the molecule following
490  this function to a ROMol, so that new atoms and bonds cannot be added to
491  the molecule and screw up the sanitizing that has been done here
492 */
494  unsigned int &operationThatFailed,
495  unsigned int sanitizeOps = SANITIZE_ALL);
496 //! \overload
498 
499 //! \brief Identifies chemistry problems (things that don't make chemical
500 //! sense) in a molecule
501 /*!
502  This functions uses the operations in sanitizeMol but does not change
503  the input structure and returns a list of the problems encountered instead
504  of stopping at the first failure,
505 
506  The problems this looks for come from the sanitization operations:
507  -# mol.updatePropertyCache() : Unreasonable valences
508  -# MolOps::Kekulize() : Unkekulizable ring systems, aromatic atoms not
509  in rings, aromatic bonds to non-aromatic atoms.
510 
511  \param mol : the ROMol to be cleaned
512 
513  \param sanitizeOps : the bits here are used to set which sanitization
514  operations are carried out. The elements of the \c
515  SanitizeFlags enum define the operations.
516 
517  \return a vector of \c MolSanitizeException values that indicate what
518  problems were encountered
519 
520 */
522 std::vector<std::unique_ptr<MolSanitizeException>> detectChemistryProblems(
523  const ROMol &mol, unsigned int sanitizeOps = SANITIZE_ALL);
524 
525 //! Possible aromaticity models
526 /*!
527 - \c AROMATICITY_DEFAULT at the moment always uses \c AROMATICITY_RDKIT
528 - \c AROMATICITY_RDKIT is the standard RDKit model (as documented in the RDKit
529 Book)
530 - \c AROMATICITY_SIMPLE only considers 5- and 6-membered simple rings (it
531 does not consider the outer envelope of fused rings)
532 - \c AROMATICITY_MDL
533 - \c AROMATICITY_CUSTOM uses a caller-provided function
534 */
535 typedef enum {
536  AROMATICITY_DEFAULT = 0x0, ///< future proofing
540  AROMATICITY_CUSTOM = 0xFFFFFFF ///< use a function
542 
543 //! Sets up the aromaticity for a molecule
544 /*!
545 
546  This is what happens here:
547  -# find all the simple rings by calling the findSSSR function
548  -# loop over all the Atoms in each ring and mark them if they are
549  candidates
550  for aromaticity. A ring atom is a candidate if it can spare electrons
551  to the ring and if it's from the first two rows of the periodic table.
552  -# based on the candidate atoms, mark the rings to be either candidates
553  or non-candidates. A ring is a candidate only if all its atoms are
554  candidates
555  -# apply Hueckel rule to each of the candidate rings to check if the ring
556  can be
557  aromatic
558 
559  \param mol the RWMol of interest
560  \param model the aromaticity model to use
561  \param func a custom function for assigning aromaticity (only used when
562  model=\c AROMATICITY_CUSTOM)
563 
564  \return >0 on success, <= 0 otherwise
565 
566  <b>Assumptions:</b>
567  - Kekulization has been done (i.e. \c MolOps::Kekulize() has already
568  been called)
569 
570 */
573  int (*func)(RWMol &) = nullptr);
574 
575 //! Designed to be called by the sanitizer to handle special cases before
576 /// anything is done.
577 /*!
578 
579  Currently this:
580  - modifies nitro groups, so that the nitrogen does not have an
581  unreasonable valence of 5, as follows:
582  - the nitrogen gets a positive charge
583  - one of the oxygens gets a negative chage and the double bond to
584  this oxygen is changed to a single bond The net result is that nitro groups
585  can be counted on to be: \c "[N+](=O)[O-]"
586  - modifies halogen-oxygen containing species as follows:
587  \c [Cl,Br,I](=O)(=O)(=O)O -> [X+3]([O-])([O-])([O-])O
588  \c [Cl,Br,I](=O)(=O)O -> [X+3]([O-])([O-])O
589  \c [Cl,Br,I](=O)O -> [X+]([O-])O
590  - converts the substructure [N,C]=P(=O)-* to [N,C]=[P+](-[O-])-*
591 
592  \param mol the molecule of interest
593 
594 */
596 
597 //! Called by the sanitizer to assign radical counts to atoms
599 
600 //! adjust the number of implicit and explicit Hs for special cases
601 /*!
602 
603  Currently this:
604  - modifies aromatic nitrogens so that, when appropriate, they have an
605  explicit H marked (e.g. so that we get things like \c "c1cc[nH]cc1"
606 
607  \param mol the molecule of interest
608 
609  <b>Assumptions</b>
610  - this is called after the molecule has been sanitized,
611  aromaticity has been perceived, and the implicit valence of
612  everything has been calculated.
613 
614 */
616 
617 //! Kekulizes the molecule
618 /*!
619 
620  \param mol the molecule of interest
621 
622  \param markAtomsBonds if this is set to true, \c isAromatic boolean settings
623  on both the Bonds and Atoms are turned to false following the Kekulization,
624  otherwise they are left alone in their original state.
625 
626  \param maxBackTracks the maximum number of attempts at back-tracking. The
627  algorithm uses a back-tracking procedure to revisit a previous setting of
628  double bond if we hit a wall in the kekulization process
629 
630  <b>Notes:</b>
631  - even if \c markAtomsBonds is \c false the \c BondType for all aromatic
632  bonds will be changed from \c RDKit::Bond::AROMATIC to \c
633  RDKit::Bond::SINGLE or RDKit::Bond::DOUBLE during Kekulization.
634 
635 */
636 RDKIT_GRAPHMOL_EXPORT void Kekulize(RWMol &mol, bool markAtomsBonds = true,
637  unsigned int maxBackTracks = 100);
638 //! Kekulizes the molecule if possible. If the kekulization fails the molecule
639 //! will not be modified
640 /*!
641 
642  \param mol the molecule of interest
643 
644  \param markAtomsBonds if this is set to true, \c isAromatic boolean settings
645  on both the Bonds and Atoms are turned to false following the Kekulization,
646  otherwise they are left alone in their original state.
647 
648  \param maxBackTracks the maximum number of attempts at back-tracking. The
649  algorithm uses a back-tracking procedure to revisit a previous setting of
650  double bond if we hit a wall in the kekulization process
651 
652  \returns whether or not the kekulization succeeded
653 
654  <b>Notes:</b>
655  - even if \c markAtomsBonds is \c false the \c BondType for all aromatic
656  bonds will be changed from \c RDKit::Bond::AROMATIC to \c
657  RDKit::Bond::SINGLE or RDKit::Bond::DOUBLE during Kekulization.
658 
659 */
661  bool markAtomsBonds = true,
662  unsigned int maxBackTracks = 100);
663 
664 //! flags the molecule's conjugated bonds
666 
667 //! calculates and sets the hybridization of all a molecule's Stoms
669 
670 // @}
671 
672 //! \name Ring finding and SSSR
673 //@{
674 
675 //! finds a molecule's Smallest Set of Smallest Rings
676 /*!
677  Currently this implements a modified form of Figueras algorithm
678  (JCICS - Vol. 36, No. 5, 1996, 986-991)
679 
680  \param mol the molecule of interest
681  \param res used to return the vector of rings. Each entry is a vector with
682  atom indices. This information is also stored in the molecule's
683  RingInfo structure, so this argument is optional (see overload)
684 
685  \return number of smallest rings found
686 
687  Base algorithm:
688  - The original algorithm starts by finding representative degree 2
689  nodes.
690  - Representative because if a series of deg 2 nodes are found only
691  one of them is picked.
692  - The smallest ring around each of them is found.
693  - The bonds that connect to this degree 2 node are them chopped off,
694  yielding
695  new deg two nodes
696  - The process is repeated on the new deg 2 nodes.
697  - If no deg 2 nodes are found, a deg 3 node is picked. The smallest ring
698  with it is found. A bond from this is "carefully" (look in the paper)
699  selected and chopped, yielding deg 2 nodes. The process is same as
700  above once this is done.
701 
702  Our Modifications:
703  - If available, more than one smallest ring around a representative deg 2
704  node will be computed and stored
705  - Typically 3 rings are found around a degree 3 node (when no deg 2s are
706  available)
707  and all the bond to that node are chopped.
708  - The extra rings that were found in this process are removed after all
709  the nodes have been covered.
710 
711  These changes were motivated by several factors:
712  - We believe the original algorithm fails to find the correct SSSR
713  (finds the correct number of them but the wrong ones) on some sample
714  mols
715  - Since SSSR may not be unique, a post-SSSR step to symmetrize may be
716  done. The extra rings this process adds can be quite useful.
717 */
719  std::vector<std::vector<int>> &res);
720 //! \overload
722  const ROMol &mol, std::vector<std::vector<int>> *res = nullptr);
723 
724 //! use a DFS algorithm to identify ring bonds and atoms in a molecule
725 /*!
726  \b NOTE: though the RingInfo structure is populated by this function,
727  the only really reliable calls that can be made are to check if
728  mol.getRingInfo().numAtomRings(idx) or mol.getRingInfo().numBondRings(idx)
729  return values >0
730 */
732 
734 
735 //! symmetrize the molecule's Smallest Set of Smallest Rings
736 /*!
737  SSSR rings obatined from "findSSSR" can be non-unique in some case.
738  For example, cubane has five SSSR rings, not six as one would hope.
739 
740  This function adds additional rings to the SSSR list if necessary
741  to make the list symmetric, e.g. all atoms in cubane will be part of the
742  same number of SSSRs. This function choses these extra rings from the extra
743  rings computed and discarded during findSSSR. The new ring are chosen such
744  that:
745  - replacing a same sized ring in the SSSR list with an extra ring yields
746  the same union of bond IDs as the original SSSR list
747 
748  \param mol - the molecule of interest
749  \param res used to return the vector of rings. Each entry is a vector with
750  atom indices. This information is also stored in the molecule's
751  RingInfo structure, so this argument is optional (see overload)
752 
753  \return the total number of rings = (new rings + old SSSRs)
754 
755  <b>Notes:</b>
756  - if no SSSR rings are found on the molecule - MolOps::findSSSR() is called
757  first
758 */
760  std::vector<std::vector<int>> &res);
761 //! \overload
763 
764 //@}
765 
766 //! \name Shortest paths and other matrices
767 //@{
768 
769 //! returns a molecule's adjacency matrix
770 /*!
771  \param mol the molecule of interest
772  \param useBO toggles use of bond orders in the matrix
773  \param emptyVal sets the empty value (for non-adjacent atoms)
774  \param force forces calculation of the matrix, even if already
775  computed
776  \param propNamePrefix used to set the cached property name
777 
778  \return the adjacency matrix.
779 
780  <b>Notes</b>
781  - The result of this is cached in the molecule's local property
782  dictionary, which will handle deallocation. The caller should <b>not</b> \c
783  delete this pointer.
784 
785 */
787  const ROMol &mol, bool useBO = false, int emptyVal = 0, bool force = false,
788  const char *propNamePrefix = nullptr,
789  const boost::dynamic_bitset<> *bondsToUse = nullptr);
790 
791 //! Computes the molecule's topological distance matrix
792 /*!
793  Uses the Floyd-Warshall all-pairs-shortest-paths algorithm.
794 
795  \param mol the molecule of interest
796  \param useBO toggles use of bond orders in the matrix
797  \param useAtomWts sets the diagonal elements of the result to
798  6.0/(atomic number) so that the matrix can be used to calculate
799  Balaban J values. This does not affect the bond weights.
800  \param force forces calculation of the matrix, even if already
801  computed
802  \param propNamePrefix used to set the cached property name
803 
804  \return the distance matrix.
805 
806  <b>Notes</b>
807  - The result of this is cached in the molecule's local property
808  dictionary, which will handle deallocation. The caller should <b>not</b> \c
809  delete this pointer.
810 
811 
812 */
814  const ROMol &mol, bool useBO = false, bool useAtomWts = false,
815  bool force = false, const char *propNamePrefix = nullptr);
816 
817 //! Computes the molecule's topological distance matrix
818 /*!
819  Uses the Floyd-Warshall all-pairs-shortest-paths algorithm.
820 
821  \param mol the molecule of interest
822  \param activeAtoms only elements corresponding to these atom indices
823  will be included in the calculation
824  \param bonds only bonds found in this list will be included in the
825  calculation
826  \param useBO toggles use of bond orders in the matrix
827  \param useAtomWts sets the diagonal elements of the result to
828  6.0/(atomic number) so that the matrix can be used to calculate
829  Balaban J values. This does not affect the bond weights.
830 
831  \return the distance matrix.
832 
833  <b>Notes</b>
834  - The results of this call are not cached, the caller <b>should</b> \c
835  delete
836  this pointer.
837 
838 
839 */
841  const ROMol &mol, const std::vector<int> &activeAtoms,
842  const std::vector<const Bond *> &bonds, bool useBO = false,
843  bool useAtomWts = false);
844 
845 //! Computes the molecule's 3D distance matrix
846 /*!
847 
848  \param mol the molecule of interest
849  \param confId the conformer to use
850  \param useAtomWts sets the diagonal elements of the result to
851  6.0/(atomic number)
852  \param force forces calculation of the matrix, even if already
853  computed
854  \param propNamePrefix used to set the cached property name
855  (if set to an empty string, the matrix will not be
856  cached)
857 
858  \return the distance matrix.
859 
860  <b>Notes</b>
861  - If propNamePrefix is not empty the result of this is cached in the
862  molecule's local property dictionary, which will handle deallocation.
863  In other cases the caller is responsible for freeing the memory.
864 
865 */
867  const ROMol &mol, int confId = -1, bool useAtomWts = false,
868  bool force = false, const char *propNamePrefix = nullptr);
869 //! Find the shortest path between two atoms
870 /*!
871  Uses the Bellman-Ford algorithm
872 
873  \param mol molecule of interest
874  \param aid1 index of the first atom
875  \param aid2 index of the second atom
876 
877  \return an std::list with the indices of the atoms along the shortest
878  path
879 
880  <b>Notes:</b>
881  - the starting and end atoms are included in the path
882  - if no path is found, an empty path is returned
883 
884 */
885 RDKIT_GRAPHMOL_EXPORT std::list<int> getShortestPath(const ROMol &mol, int aid1,
886  int aid2);
887 
888 //@}
889 
890 //! \name Stereochemistry
891 //@{
892 
893 //! removes bogus chirality markers (those on non-sp3 centers):
895 
896 //! \brief Uses a conformer to assign ChiralType to a molecule's atoms
897 /*!
898  \param mol the molecule of interest
899  \param confId the conformer to use
900  \param replaceExistingTags if this flag is true, any existing atomic chiral
901  tags will be replaced
902 
903  If the conformer provided is not a 3D conformer, nothing will be done.
904 */
906  ROMol &mol, int confId = -1, bool replaceExistingTags = true);
907 
908 //! \brief Uses a conformer to assign ChiralTypes to a molecule's atoms and
909 //! stereo flags to its bonds
910 /*!
911 
912  \param mol the molecule of interest
913  \param confId the conformer to use
914  \param replaceExistingTags if this flag is true, any existing info about
915  stereochemistry will be replaced
916 
917  If the conformer provided is not a 3D conformer, nothing will be done.
918 */
920  ROMol &mol, int confId = -1, bool replaceExistingTags = true);
921 
922 //! \brief Use bond directions to assign ChiralTypes to a molecule's atoms and
923 //! stereo flags to its bonds
924 /*!
925 
926  \param mol the molecule of interest
927  \param confId the conformer to use
928  \param replaceExistingTags if this flag is true, any existing info about
929  stereochemistry will be replaced
930 */
932  ROMol &mol, int confId = -1, bool replaceExistingTags = true);
933 
934 //! \deprecated: this function will be removed in a future release. Use
935 //! setDoubleBondNeighborDirections() instead
937  int confId = -1);
938 //! Sets bond directions based on double bond stereochemistry
940  ROMol &mol, const Conformer *conf = nullptr);
941 
942 //! Assign CIS/TRANS bond stereochemistry tags based on neighboring directions
944 
945 //! Assign stereochemistry tags to atoms (i.e. R/S) and bonds (i.e. Z/E)
946 /*!
947  Does the CIP stereochemistry assignment for the molecule's atoms
948  (R/S) and double bond (Z/E). Chiral atoms will have a property
949  '_CIPCode' indicating their chiral code.
950 
951  \param mol the molecule to use
952  \param cleanIt if true, any existing values of the property `_CIPCode`
953  will be cleared, atoms with a chiral specifier that aren't
954  actually chiral (e.g. atoms with duplicate
955  substituents or only 2 substituents, etc.) will have
956  their chiral code set to CHI_UNSPECIFIED. Bonds with
957  STEREOCIS/STEREOTRANS specified that have duplicate
958  substituents based upon the CIP atom ranks will be
959  marked STEREONONE.
960  \param force causes the calculation to be repeated even if it has
961  already been done
962  \param flagPossibleStereoCenters set the _ChiralityPossible property on
963  atoms that are possible stereocenters
964 
965  <b>Notes:M</b>
966  - Throughout we assume that we're working with a hydrogen-suppressed
967  graph.
968 
969 */
971  ROMol &mol, bool cleanIt = false, bool force = false,
972  bool flagPossibleStereoCenters = false);
973 //! Removes all stereochemistry information from atoms (i.e. R/S) and bonds
974 /// i.e. Z/E)
975 /*!
976 
977  \param mol the molecule of interest
978 */
980 
981 //! \brief finds bonds that could be cis/trans in a molecule and mark them as
982 //! Bond::STEREOANY.
983 /*!
984  \param mol the molecule of interest
985  \param cleanIt toggles removal of stereo flags from double bonds that can
986  not have stereochemistry
987 
988  This function finds any double bonds that can potentially be part of
989  a cis/trans system. No attempt is made here to mark them cis or
990  trans. No attempt is made to detect double bond stereo in ring systems.
991 
992  This function is useful in the following situations:
993  - when parsing a mol file; for the bonds marked here, coordinate
994  information on the neighbors can be used to indentify cis or trans states
995  - when writing a mol file; bonds that can be cis/trans but not marked as
996  either need to be specially marked in the mol file
997  - finding double bonds with unspecified stereochemistry so they
998  can be enumerated for downstream 3D tools
999 
1000  The CIPranks on the neighboring atoms are checked in this function. The
1001  _CIPCode property if set to any on the double bond.
1002 */
1004  bool cleanIt = false);
1005 //! \brief Uses the molParity atom property to assign ChiralType to a molecule's
1006 //! atoms
1007 /*!
1008  \param mol the molecule of interest
1009  \param replaceExistingTags if this flag is true, any existing atomic chiral
1010  tags will be replaced
1011 */
1013  ROMol &mol, bool replaceExistingTags = true);
1014 
1015 //@}
1016 
1017 //! returns the number of atoms which have a particular property set
1019  const ROMol &mol, std::string prop);
1020 
1021 //! returns whether or not a molecule needs to have Hs added to it.
1023 
1024 namespace details {
1025 //! not recommended for use in other code
1027  RWMol &mol, const boost::dynamic_bitset<> &atomsToUse,
1028  const boost::dynamic_bitset<> &bondsToUse, bool markAtomsBonds = true,
1029  unsigned int maxBackTracks = 100);
1030 } // namespace details
1031 
1032 } // namespace MolOps
1033 } // namespace RDKit
1034 
1035 #endif
RDKIT_GRAPHMOL_EXPORT const int ci_LOCAL_INF
The class for representing atoms.
Definition: Atom.h:68
The class for representing 2D or 3D conformation of a molecule.
Definition: Conformer.h:45
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:32
#define RDKIT_GRAPHMOL_EXPORT
Definition: export.h:209
RDKIT_GRAPHMOL_EXPORT void KekulizeFragment(RWMol &mol, const boost::dynamic_bitset<> &atomsToUse, const boost::dynamic_bitset<> &bondsToUse, bool markAtomsBonds=true, unsigned int maxBackTracks=100)
not recommended for use in other code
RDKIT_GRAPHMOL_EXPORT double * get3DDistanceMat(const ROMol &mol, int confId=-1, bool useAtomWts=false, bool force=false, const char *propNamePrefix=nullptr)
Computes the molecule's 3D distance matrix.
RDKIT_GRAPHMOL_EXPORT void cleanUp(RWMol &mol)
RDKIT_GRAPHMOL_EXPORT void assignStereochemistry(ROMol &mol, bool cleanIt=false, bool force=false, bool flagPossibleStereoCenters=false)
Assign stereochemistry tags to atoms (i.e. R/S) and bonds (i.e. Z/E)
RDKIT_GRAPHMOL_EXPORT bool KekulizeIfPossible(RWMol &mol, bool markAtomsBonds=true, unsigned int maxBackTracks=100)
RDKIT_GRAPHMOL_EXPORT std::vector< std::unique_ptr< MolSanitizeException > > detectChemistryProblems(const ROMol &mol, unsigned int sanitizeOps=SANITIZE_ALL)
Identifies chemistry problems (things that don't make chemical sense) in a molecule.
RDKIT_GRAPHMOL_EXPORT double * getAdjacencyMatrix(const ROMol &mol, bool useBO=false, int emptyVal=0, bool force=false, const char *propNamePrefix=nullptr, const boost::dynamic_bitset<> *bondsToUse=nullptr)
returns a molecule's adjacency matrix
RDKIT_GRAPHMOL_EXPORT ROMol * mergeQueryHs(const ROMol &mol, bool mergeUnmappedOnly=false)
RDKIT_GRAPHMOL_EXPORT void assignChiralTypesFromBondDirs(ROMol &mol, int confId=-1, bool replaceExistingTags=true)
Use bond directions to assign ChiralTypes to a molecule's atoms and stereo flags to its bonds.
RDKIT_GRAPHMOL_EXPORT int setAromaticity(RWMol &mol, AromaticityModel model=AROMATICITY_DEFAULT, int(*func)(RWMol &)=nullptr)
Sets up the aromaticity for a molecule.
RDKIT_GRAPHMOL_EXPORT void findRingFamilies(const ROMol &mol)
RDKIT_GRAPHMOL_EXPORT bool needsHs(const ROMol &mol)
returns whether or not a molecule needs to have Hs added to it.
RDKIT_GRAPHMOL_EXPORT void fastFindRings(const ROMol &mol)
use a DFS algorithm to identify ring bonds and atoms in a molecule
RDKIT_GRAPHMOL_EXPORT double * getDistanceMat(const ROMol &mol, bool useBO=false, bool useAtomWts=false, bool force=false, const char *propNamePrefix=nullptr)
Computes the molecule's topological distance matrix.
RDKIT_GRAPHMOL_EXPORT int getFormalCharge(const ROMol &mol)
sums up all atomic formal charges and returns the result
AromaticityModel
Possible aromaticity models.
Definition: MolOps.h:535
@ AROMATICITY_RDKIT
Definition: MolOps.h:537
@ AROMATICITY_MDL
Definition: MolOps.h:539
@ AROMATICITY_CUSTOM
use a function
Definition: MolOps.h:540
@ AROMATICITY_DEFAULT
future proofing
Definition: MolOps.h:536
@ AROMATICITY_SIMPLE
Definition: MolOps.h:538
RDKIT_GRAPHMOL_EXPORT void setTerminalAtomCoords(ROMol &mol, unsigned int idx, unsigned int otherIdx)
RDKIT_GRAPHMOL_EXPORT std::map< T, boost::shared_ptr< ROMol > > getMolFragsWithQuery(const ROMol &mol, T(*query)(const ROMol &, const Atom *), bool sanitizeFrags=true, const std::vector< T > *whiteList=nullptr, bool negateList=false)
splits a molecule into pieces based on labels assigned using a query
RDKIT_GRAPHMOL_EXPORT void removeStereochemistry(ROMol &mol)
RDKIT_GRAPHMOL_EXPORT ROMol * addHs(const ROMol &mol, bool explicitOnly=false, bool addCoords=false, const UINT_VECT *onlyOnAtoms=nullptr, bool addResidueInfo=false)
returns a copy of a molecule with hydrogens added in as explicit Atoms
RDKIT_GRAPHMOL_EXPORT double computeBalabanJ(const ROMol &mol, bool useBO=true, bool force=false, const std::vector< int > *bondPath=nullptr, bool cacheIt=true)
DEPRECATED calculates Balaban's J index for the molecule.
RDKIT_GRAPHMOL_EXPORT void assignChiralTypesFromMolParity(ROMol &mol, bool replaceExistingTags=true)
Uses the molParity atom property to assign ChiralType to a molecule's atoms.
RDKIT_GRAPHMOL_EXPORT unsigned int getMolFrags(const ROMol &mol, std::vector< int > &mapping)
find fragments (disconnected components of the molecular graph)
RDKIT_GRAPHMOL_EXPORT void adjustHs(RWMol &mol)
adjust the number of implicit and explicit Hs for special cases
RDKIT_GRAPHMOL_EXPORT void assignStereochemistryFrom3D(ROMol &mol, int confId=-1, bool replaceExistingTags=true)
Uses a conformer to assign ChiralTypes to a molecule's atoms and stereo flags to its bonds.
@ SANITIZE_ALL
Definition: MolOps.h:455
@ SANITIZE_SETAROMATICITY
Definition: MolOps.h:450
@ SANITIZE_NONE
Definition: MolOps.h:444
@ SANITIZE_PROPERTIES
Definition: MolOps.h:446
@ SANITIZE_SETCONJUGATION
Definition: MolOps.h:451
@ SANITIZE_SYMMRINGS
Definition: MolOps.h:447
@ SANITIZE_ADJUSTHS
Definition: MolOps.h:454
@ SANITIZE_CLEANUPCHIRALITY
Definition: MolOps.h:453
@ SANITIZE_FINDRADICALS
Definition: MolOps.h:449
@ SANITIZE_KEKULIZE
Definition: MolOps.h:448
@ SANITIZE_SETHYBRIDIZATION
Definition: MolOps.h:452
@ SANITIZE_CLEANUP
Definition: MolOps.h:445
RDKIT_GRAPHMOL_EXPORT int countAtomElec(const Atom *at)
RDKIT_GRAPHMOL_EXPORT void detectBondStereochemistry(ROMol &mol, int confId=-1)
RDKIT_GRAPHMOL_EXPORT void sanitizeMol(RWMol &mol, unsigned int &operationThatFailed, unsigned int sanitizeOps=SANITIZE_ALL)
carries out a collection of tasks for cleaning up a molecule and ensuring that it makes "chemical sen...
RDKIT_GRAPHMOL_EXPORT ROMol * removeHs(const ROMol &mol, bool implicitOnly=false, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
RDKIT_GRAPHMOL_EXPORT void parseAdjustQueryParametersFromJSON(MolOps::AdjustQueryParameters &p, const std::string &json)
updates an AdjustQueryParameters object from a JSON string
RDKIT_GRAPHMOL_EXPORT void removeAllHs(RWMol &mol, bool sanitize=true)
removes all Hs from a molecule
RDKIT_GRAPHMOL_EXPORT ROMol * adjustQueryProperties(const ROMol &mol, const AdjustQueryParameters *params=nullptr)
returns a copy of a molecule with query properties adjusted
RDKIT_GRAPHMOL_EXPORT void setBondStereoFromDirections(ROMol &mol)
Assign CIS/TRANS bond stereochemistry tags based on neighboring directions.
RDKIT_GRAPHMOL_EXPORT ROMol * renumberAtoms(const ROMol &mol, const std::vector< unsigned int > &newOrder)
returns a copy of a molecule with the atoms renumbered
RDKIT_GRAPHMOL_EXPORT int findSSSR(const ROMol &mol, std::vector< std::vector< int >> &res)
finds a molecule's Smallest Set of Smallest Rings
RDKIT_GRAPHMOL_EXPORT bool atomHasConjugatedBond(const Atom *at)
returns whether or not the given Atom is involved in a conjugated bond
RDKIT_GRAPHMOL_EXPORT void cleanupChirality(RWMol &mol)
removes bogus chirality markers (those on non-sp3 centers):
RDKIT_GRAPHMOL_EXPORT void Kekulize(RWMol &mol, bool markAtomsBonds=true, unsigned int maxBackTracks=100)
Kekulizes the molecule.
RDKIT_GRAPHMOL_EXPORT void assignRadicals(RWMol &mol)
Called by the sanitizer to assign radical counts to atoms.
RDKIT_GRAPHMOL_EXPORT void findPotentialStereoBonds(ROMol &mol, bool cleanIt=false)
finds bonds that could be cis/trans in a molecule and mark them as Bond::STEREOANY.
RDKIT_GRAPHMOL_EXPORT void setHybridization(ROMol &mol)
calculates and sets the hybridization of all a molecule's Stoms
RDKIT_GRAPHMOL_EXPORT std::list< int > getShortestPath(const ROMol &mol, int aid1, int aid2)
Find the shortest path between two atoms.
RDKIT_GRAPHMOL_EXPORT unsigned getNumAtomsWithDistinctProperty(const ROMol &mol, std::string prop)
returns the number of atoms which have a particular property set
RDKIT_GRAPHMOL_EXPORT void assignChiralTypesFrom3D(ROMol &mol, int confId=-1, bool replaceExistingTags=true)
Uses a conformer to assign ChiralType to a molecule's atoms.
RDKIT_GRAPHMOL_EXPORT int symmetrizeSSSR(ROMol &mol, std::vector< std::vector< int >> &res)
symmetrize the molecule's Smallest Set of Smallest Rings
RDKIT_GRAPHMOL_EXPORT void setConjugation(ROMol &mol)
flags the molecule's conjugated bonds
RDKIT_GRAPHMOL_EXPORT void setDoubleBondNeighborDirections(ROMol &mol, const Conformer *conf=nullptr)
Sets bond directions based on double bond stereochemistry.
AdjustQueryWhichFlags
Definition: MolOps.h:323
@ ADJUST_IGNORERINGS
Definition: MolOps.h:326
@ ADJUST_IGNORENONE
Definition: MolOps.h:324
@ ADJUST_IGNOREMAPPED
Definition: MolOps.h:329
@ ADJUST_IGNORENONDUMMIES
Definition: MolOps.h:328
@ ADJUST_IGNOREDUMMIES
Definition: MolOps.h:327
@ ADJUST_IGNORECHAINS
Definition: MolOps.h:325
@ ADJUST_IGNOREALL
Definition: MolOps.h:330
Std stuff.
Definition: Abbreviations.h:18
std::vector< double > INVAR_VECT
Definition: MolOps.h:31
INVAR_VECT::iterator INVAR_VECT_I
Definition: MolOps.h:33
INVAR_VECT::const_iterator INVAR_VECT_CI
Definition: MolOps.h:34
std::vector< UINT > UINT_VECT
Definition: types.h:293
Parameters controlling the behavior of MolOps::adjustQueryProperties.
Definition: MolOps.h:342
static AdjustQueryParameters noAdjustments()
returns an AdjustQueryParameters object with all adjustments disabled
Definition: MolOps.h:392