RDKit
Open-source cheminformatics and machine learning.
MolEnumerator.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2020-2021 Greg Landrum and T5 Informatics GmbH
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #ifndef RDKIT_MOLENUMERATOR_H
11 #define RDKIT_MOLENUMERATOR_H
12 
13 #include <RDGeneral/export.h>
14 #include <GraphMol/RDKitBase.h>
15 #include <GraphMol/MolBundle.h>
16 
17 #include <vector>
18 #include <map>
19 #include <string>
20 #include <memory>
21 
22 namespace RDKit {
23 class ChemicalReaction;
24 namespace MolEnumerator {
25 
26 namespace detail {
27 extern const std::string idxPropName;
30 } // namespace detail
31 
32 //! abstract base class for the a molecule enumeration operation
34  public:
36  virtual ~MolEnumeratorOp() {}
37  //! returns a vector of the number of possible variations at variability point
38  //! covered by this operation
39  virtual std::vector<size_t> getVariationCounts() const = 0;
40  //! returns a the molecule corresponding to a particular variation
41  /*! which.size() should be equal to the number of variation counts.
42  */
43  virtual std::unique_ptr<ROMol> operator()(
44  const std::vector<size_t> &which) const = 0;
45  //! initializes this operation to work on a particular molecule
46  virtual void initFromMol(const ROMol &mol) = 0;
47  //! polymorphic copy
48  virtual std::unique_ptr<MolEnumeratorOp> copy() const = 0;
49 };
50 
51 //! Molecule enumeration operation corresponding to position variation bonds
52 /*! This uses ATTACH and ENDPTS properties on bonds and requires that the bond
53  * has one dummy atom (which will be discarded). The other atom of the bond will
54  * be connected to the atoms listed in the ENDPTS property
55  */
57  public:
59  PositionVariationOp(const std::shared_ptr<ROMol> mol) : dp_mol(mol) {
60  PRECONDITION(mol, "bad molecule");
61  initFromMol();
62  }
63  PositionVariationOp(const ROMol &mol) : dp_mol(new ROMol(mol)) {
64  initFromMol();
65  }
67  : dp_mol(other.dp_mol), d_variationPoints(other.d_variationPoints) {}
69  if (&other == this) {
70  return *this;
71  }
72  dp_mol = other.dp_mol;
73  d_variationPoints = other.d_variationPoints;
74  return *this;
75  }
76  //! \override
77  std::vector<size_t> getVariationCounts() const override;
78 
79  //! \override
80  std::unique_ptr<ROMol> operator()(
81  const std::vector<size_t> &which) const override;
82 
83  //! \override
84  void initFromMol(const ROMol &mol) override;
85 
86  //! \override
87  std::unique_ptr<MolEnumeratorOp> copy() const override {
88  return std::unique_ptr<MolEnumeratorOp>(new PositionVariationOp(*this));
89  }
90 
91  private:
92  std::shared_ptr<ROMol> dp_mol{nullptr};
93  std::vector<std::pair<unsigned int, std::vector<unsigned int>>>
94  d_variationPoints{};
95  std::vector<size_t> d_dummiesAtEachPoint{};
96  void initFromMol();
97 };
98 
99 //! Molecule enumeration operation corresponding to LINKNODES
100 /*!
101  */
103  public:
105  LinkNodeOp(const std::shared_ptr<ROMol> mol) : dp_mol(mol) {
106  PRECONDITION(mol, "bad molecule");
107  initFromMol();
108  }
109  LinkNodeOp(const ROMol &mol) : dp_mol(new ROMol(mol)) { initFromMol(); }
110  LinkNodeOp(const LinkNodeOp &other)
111  : dp_mol(other.dp_mol),
112  dp_frame(other.dp_frame),
113  d_countAtEachPoint(other.d_countAtEachPoint),
114  d_variations(other.d_variations),
115  d_pointRanges(other.d_pointRanges),
116  d_isotopeMap(other.d_isotopeMap),
117  d_atomMap(other.d_atomMap) {}
118  LinkNodeOp &operator=(const LinkNodeOp &other) {
119  if (&other == this) {
120  return *this;
121  }
122  dp_mol = other.dp_mol;
123  dp_frame = other.dp_frame;
124  d_countAtEachPoint = other.d_countAtEachPoint;
125  d_variations = other.d_variations;
126  d_pointRanges = other.d_pointRanges;
127  d_isotopeMap = other.d_isotopeMap;
128  d_atomMap = other.d_atomMap;
129  return *this;
130  }
131  //! \override
132  std::vector<size_t> getVariationCounts() const override;
133 
134  //! \override
135  std::unique_ptr<ROMol> operator()(
136  const std::vector<size_t> &which) const override;
137 
138  //! \override
139  void initFromMol(const ROMol &mol) override;
140 
141  //! \override
142  std::unique_ptr<MolEnumeratorOp> copy() const override {
143  return std::unique_ptr<MolEnumeratorOp>(new LinkNodeOp(*this));
144  }
145 
146  private:
147  std::shared_ptr<ROMol> dp_mol{nullptr};
148  std::shared_ptr<RWMol> dp_frame{nullptr};
149  std::vector<size_t> d_countAtEachPoint{};
150  std::vector<std::tuple<unsigned, unsigned, unsigned>> d_variations;
151  std::vector<std::pair<unsigned, unsigned>> d_pointRanges;
152  std::map<unsigned, unsigned> d_isotopeMap;
153  std::map<unsigned, Atom *> d_atomMap;
154 
155  void initFromMol();
156 };
157 
158 //! Molecule enumeration operation corresponding to SRUs
159 /*!
160  This should be considered a work-in-progress and to be somewhat fragile.
161 
162  Known limitations:
163  - Overlapping SRUs, i.e. where one monomer is contained within another, are
164  not supported
165 
166  */
168  public:
170  RepeatUnitOp(const std::shared_ptr<ROMol> mol) : dp_mol(mol) {
171  PRECONDITION(mol, "bad molecule");
172  initFromMol();
173  };
174  RepeatUnitOp(const ROMol &mol) : dp_mol(new ROMol(mol)) { initFromMol(); };
176  : d_defaultRepeatCount(other.d_defaultRepeatCount),
177  dp_mol(other.dp_mol),
178  dp_frame(other.dp_frame),
179  d_repeats(other.d_repeats),
180  d_countAtEachPoint(other.d_countAtEachPoint),
181  d_variations(other.d_variations),
182  d_pointRanges(other.d_pointRanges),
183  d_isotopeMap(other.d_isotopeMap),
184  d_atomMap(other.d_atomMap){};
186  if (&other == this) {
187  return *this;
188  }
189  dp_mol = other.dp_mol;
190  dp_frame = other.dp_frame;
191  d_repeats = other.d_repeats;
192  d_countAtEachPoint = other.d_countAtEachPoint;
193  d_variations = other.d_variations;
194  d_pointRanges = other.d_pointRanges;
195  d_isotopeMap = other.d_isotopeMap;
196  d_atomMap = other.d_atomMap;
197  d_defaultRepeatCount = other.d_defaultRepeatCount;
198  return *this;
199  };
200  //! \override
201  std::vector<size_t> getVariationCounts() const override;
202 
203  //! \override
204  std::unique_ptr<ROMol> operator()(
205  const std::vector<size_t> &which) const override;
206 
207  //! \override
208  void initFromMol(const ROMol &mol) override;
209 
210  //! \override
211  std::unique_ptr<MolEnumeratorOp> copy() const override {
212  return std::unique_ptr<MolEnumeratorOp>(new RepeatUnitOp(*this));
213  }
214 
215  size_t d_defaultRepeatCount =
216  4; //! from mol files we typically don't know the repeat count. This is
217  //! what we use instead
218  private:
219  std::shared_ptr<ROMol> dp_mol{nullptr};
220  std::shared_ptr<RWMol> dp_frame{nullptr};
221  std::vector<std::shared_ptr<RWMol>> d_repeats;
222  std::vector<RWMol> dp_repeatUnits{};
223  std::vector<size_t> d_countAtEachPoint{};
224  std::vector<unsigned> d_sruOrder{};
225  std::vector<std::tuple<unsigned, unsigned, unsigned>> d_variations;
226  std::vector<std::pair<unsigned, unsigned>> d_pointRanges;
227  std::map<unsigned, unsigned> d_isotopeMap;
228  std::map<unsigned, Atom *> d_atomMap;
229 
230  void initFromMol();
231 };
232 
233 //! Parameters used to control the molecule enumeration
235  bool sanitize = false;
236  size_t maxToEnumerate = 1000;
237  bool doRandom = false; //< not yet implemented
238  int randomSeed = -1; //< not yet implemented
239  std::shared_ptr<MolEnumeratorOp> dp_operation;
240 };
241 
242 //! Returns a MolBundle containing the molecules resulting from applying the
243 //! operators contained in \c paramsLists to \c mol.
244 //! the operators are applied in order
245 /*!
246 NOTE: the current implementation does not support molecules which include
247 both LINKNODE and SRU features.
248 
249 */
251 enumerate(const ROMol &mol, const std::vector<MolEnumeratorParams> &paramsList);
252 
253 //! Returns a MolBundle containing the molecules resulting from applying the
254 //! enumerable operators contained in \c mol.
255 /*!
256 \param maxPerOperation: the maximum number of molecules which an individual
257 operation is allowed to generate
258 
259 NOTE: the current implementation does not support molecules which include
260 both LINKNODE and SRU features.
261 
262 */
264  size_t maxPerOperation = 0);
265 
266 //! Returns a MolBundle containing the molecules resulting from applying the
267 //! operator contained in \c params to \c mol.
268 inline MolBundle enumerate(const ROMol &mol,
269  const MolEnumeratorParams &params) {
270  std::vector<MolEnumeratorParams> v = {params};
271  return enumerate(mol, v);
272 };
273 } // namespace MolEnumerator
274 } // namespace RDKit
275 
276 #endif
#define PRECONDITION(expr, mess)
Definition: Invariant.h:109
Defines a class for managing bundles of molecules.
pulls in the core RDKit functionality
MolBundle contains a collection of related ROMols.
Definition: MolBundle.h:39
Molecule enumeration operation corresponding to LINKNODES.
LinkNodeOp(const LinkNodeOp &other)
LinkNodeOp & operator=(const LinkNodeOp &other)
void initFromMol(const ROMol &mol) override
\override
LinkNodeOp(const std::shared_ptr< ROMol > mol)
std::unique_ptr< MolEnumeratorOp > copy() const override
\override
std::vector< size_t > getVariationCounts() const override
\override
std::unique_ptr< ROMol > operator()(const std::vector< size_t > &which) const override
\override
abstract base class for the a molecule enumeration operation
Definition: MolEnumerator.h:33
virtual std::unique_ptr< MolEnumeratorOp > copy() const =0
polymorphic copy
virtual std::unique_ptr< ROMol > operator()(const std::vector< size_t > &which) const =0
returns a the molecule corresponding to a particular variation
virtual std::vector< size_t > getVariationCounts() const =0
virtual void initFromMol(const ROMol &mol)=0
initializes this operation to work on a particular molecule
Molecule enumeration operation corresponding to position variation bonds.
Definition: MolEnumerator.h:56
std::unique_ptr< MolEnumeratorOp > copy() const override
\override
Definition: MolEnumerator.h:87
std::vector< size_t > getVariationCounts() const override
\override
PositionVariationOp & operator=(const PositionVariationOp &other)
Definition: MolEnumerator.h:68
void initFromMol(const ROMol &mol) override
\override
PositionVariationOp(const PositionVariationOp &other)
Definition: MolEnumerator.h:66
PositionVariationOp(const std::shared_ptr< ROMol > mol)
Definition: MolEnumerator.h:59
std::unique_ptr< ROMol > operator()(const std::vector< size_t > &which) const override
\override
Molecule enumeration operation corresponding to SRUs.
void initFromMol(const ROMol &mol) override
\override
RepeatUnitOp(const RepeatUnitOp &other)
std::unique_ptr< ROMol > operator()(const std::vector< size_t > &which) const override
\override
std::unique_ptr< MolEnumeratorOp > copy() const override
\override
RepeatUnitOp(const std::shared_ptr< ROMol > mol)
std::vector< size_t > getVariationCounts() const override
\override
RepeatUnitOp & operator=(const RepeatUnitOp &other)
#define RDKIT_MOLENUMERATOR_EXPORT
Definition: export.h:281
const std::string idxPropName
void removeOrigIndices(ROMol &mol)
void preserveOrigIndices(ROMol &mol)
RDKIT_MOLENUMERATOR_EXPORT MolBundle enumerate(const ROMol &mol, const std::vector< MolEnumeratorParams > &paramsList)
Std stuff.
Definition: Abbreviations.h:18
Parameters used to control the molecule enumeration.
std::shared_ptr< MolEnumeratorOp > dp_operation