User 870ab5b546
01-05-2011 20:13:59
The code:
/** Determines whether a response molecule (target in ChemAxon parlance)
* matches to an author's molecule (query). If stereochemistry is not
* ignored, any stereo bond in the author's molecule must be present in
* the response molecule, but a nonstereobond in the author's molecule
* matches to any stereo bond (or none) in the response molecule.
* (Value of setImplicitHMatching() is set to IMPLICIT_H_MATCHING_ENABLED
* by default. DISABLED would mean that explicit H atoms in the author's
* substructure would have to be explicit in the response.)
* @param respMol a response molecule
* @param authMol an author's molecule
* @param stereoType flags for treating stereochemistry
* @return true if the response molecule matches the author's molecule
*/
public static boolean matchExact(Molecule respMol, Molecule authMol,
int stereoType) throws MolFileException {
final String SELF = "MolFunctions.matchExact: ";
boolean match = false;
final MolSearchOptions searchOpts = new MolSearchOptions();
searchOpts.setSearchType(FULL);
searchOpts.setVagueBondLevel(VAGUE_BOND_OFF);
// required for comparing nonaromatized aromatic rings
searchOpts.setStereoModel(STEREO_MODEL_GLOBAL);
searchOpts.setChargeMatching(CHARGE_MATCHING_EXACT);
searchOpts.setIsotopeMatching(ISOTOPE_MATCHING_EXACT);
searchOpts.setRadicalMatching(RADICAL_MATCHING_EXACT);
searchOpts.setValenceMatching(true);
setStereoOptions(searchOpts, stereoType);
final MolSearch search = new MolSearch();
search.setSearchOptions(searchOpts);
search.setTarget(respMol);
search.setQuery(authMol);
debugPrint(SELF + "stereotype = ", stereoType);
debugPrintMRV(SELF + "response:\n", respMol);
debugPrintMRV(SELF + "author structure:\n", authMol);
try {
match = search.isMatching();
debugPrint(SELF + "search result is ", match);
} catch (SearchException e2) {
Utils.alwaysPrint("Error in " + SELF);
e2.printStackTrace();
throw new MolFileException(ERROR + e2.getMessage());
} // try
return match;
} // matchExact(Molecule, Molecule, int)
/** Sets the search options related to stereochemistry.
* @param searchOpts contains the search options
* @param stereoType flags for treating stereochemistry
*/
private static void setStereoOptions(MolSearchOptions searchOpts,
int stereoType) {
final String SELF = "MolFunctions.setStereoOptions: ";
final int searchType = searchOpts.getSearchType();
debugPrint(SELF + "searchType = ",
(searchType == DUPLICATE ? "DUPLICATE"
: searchType == FULL ? "FULL" : searchType));
final boolean ignore2D =
(stereoType & IGNORE_DBL_BOND_STEREO) != 0;
final boolean ignore3D =
(stereoType & IGNORE_TETRAHEDRAL_STEREO) != 0;
final boolean wavyAnd = (stereoType & WAVY_AND) != 0;
if (ignore2D) {
debugPrint(SELF + "ignoring 2D stereochemistry.");
searchOpts.setDoubleBondStereoMatchingMode(DBS_NONE);
} else {
debugPrint(SELF + "pay attention to 2D stereochemistry.");
searchOpts.setDoubleBondStereoMatchingMode(DBS_ALL);
} // if ignore2D
if (ignore3D) {
debugPrint(SELF + "ignoring 3D stereochemistry.");
searchOpts.setStereoSearchType(STEREO_IGNORE);
} else {
debugPrint(SELF + "pay attention to 3D stereochemistry.");
if (searchType != DUPLICATE) {
searchOpts.setStereoSearchType(STEREO_SPECIFIC);
} // if search initially set to FULL, not DUPLICATE
if (wavyAnd) {
searchOpts.setKeepQueryOrder(true); // ChemAxon says it's needed
debugPrint(SELF + "adding WavyBondMatcher.");
searchOpts.addUserComparator(new WavyBondMatcher());
} else debugPrint(SELF + "not adding WavyBondMatcher.");
} // if ignore3D
} // setStereoOptions(MolSearchOptions, int)
MolFunctions.setStereoOptions: searchType = FULL
MolFunctions.setStereoOptions: pay attention to 2D stereochemistry.
MolFunctions.setStereoOptions: pay attention to 3D stereochemistry.
MolFunctions.setStereoOptions: adding WavyBondMatcher.
MolFunctions.matchExact: stereotype = 1
MolFunctions.matchExact: response:
<?xml version="1.0" ?>
<cml>
<MDocument>
<MChemicalStruct>
<molecule molID="m1">
<atomArray
atomID="a1 a2 a3 a4 a5 a6 a7 a8 a9 a10"
elementType="C C C C C C C C C C"
x2="54.23687744140625 52.90320810035799 52.90320810035799 54.23687744140625 55.570546782454514 55.570546782454514 56.90421612350277 56.90421612350277 58.237885464551034 58.237885464551034"
y2="32.687014166762616 31.91699722620357 30.376963345085493 29.606946404526447 30.376963345085493 31.91699722620357 32.687014166762616 29.606946404526447 30.376963345085493 31.91699722620357"
/>
<bondArray>
<bond atomRefs2="a1 a2" order="1" />
<bond atomRefs2="a1 a6" order="1" />
<bond atomRefs2="a2 a3" order="1" />
<bond atomRefs2="a3 a4" order="1" />
<bond atomRefs2="a4 a5" order="1" />
<bond atomRefs2="a6 a5" order="2" />
<bond atomRefs2="a5 a8" order="1" />
<bond atomRefs2="a7 a6" order="1" />
<bond atomRefs2="a7 a10" order="1" />
<bond atomRefs2="a8 a9" order="1" />
<bond atomRefs2="a9 a10" order="1" />
</bondArray>
</molecule>
</MChemicalStruct>
</MDocument>
</cml>
MolFunctions.matchExact: author structure:
<?xml version="1.0" ?>
<cml>
<MDocument>
<MChemicalStruct>
<molecule molID="m1">
<atomArray
atomID="a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11"
elementType="C C C C C C C C C C H"
x2="54.26373105015538 52.93006170910712 52.93006170910712 54.26373105015538 55.59740039120364 55.59740039120364 56.9310697322519 56.9310697322519 58.26473907330016 58.26473907330016 56.9310697322519"
y2="18.06352453710796 17.293507596548913 15.753473715430836 14.98345677487179 15.753473715430836 17.293507596548913 18.06352453710796 14.98345677487179 15.753473715430836 17.293507596548913 13.443456774871791"
/>
<bondArray>
<bond atomRefs2="a1 a2" order="1" />
<bond atomRefs2="a1 a6" order="1" />
<bond atomRefs2="a2 a3" order="1" />
<bond atomRefs2="a3 a4" order="1" />
<bond atomRefs2="a4 a5" order="1" />
<bond atomRefs2="a6 a5" order="2" />
<bond atomRefs2="a5 a8" order="1" />
<bond atomRefs2="a7 a6" order="1" />
<bond atomRefs2="a7 a10" order="1" />
<bond atomRefs2="a8 a9" order="1" />
<bond atomRefs2="a8 a11" order="1" />
<bond atomRefs2="a9 a10" order="1" />
</bondArray>
</molecule>
</MChemicalStruct>
</MDocument>
</cml>
MolFunctions.matchExact: search result is false
MolFunctions.setStereoOptions: searchType = FULL
MolFunctions.setStereoOptions: pay attention to 2D stereochemistry.
MolFunctions.setStereoOptions: pay attention to 3D stereochemistry.
MolFunctions.setStereoOptions: adding WavyBondMatcher.
MolFunctions.matchExact: stereotype = 1
MolFunctions.matchExact: response:
<?xml version="1.0" ?>
<cml>
<MDocument>
<MChemicalStruct>
<molecule molID="m1">
<atomArray
atomID="a1 a2 a3 a4 a5 a6 a7 a8 a9 a10"
elementType="C C C C C C C C C C"
x2="54.23687744140625 52.90320810035799 52.90320810035799 54.23687744140625 55.570546782454514 55.570546782454514 56.90421612350277 56.90421612350277 58.237885464551034 58.237885464551034"
y2="32.687014166762616 31.91699722620357 30.376963345085493 29.606946404526447 30.376963345085493 31.91699722620357 32.687014166762616 29.606946404526447 30.376963345085493 31.91699722620357"
/>
<bondArray>
<bond atomRefs2="a1 a2" order="1" />
<bond atomRefs2="a2 a3" order="1" />
<bond atomRefs2="a3 a4" order="1" />
<bond atomRefs2="a4 a5" order="1" />
<bond atomRefs2="a1 a6" order="1" />
<bond atomRefs2="a7 a6" order="1" />
<bond atomRefs2="a6 a5" order="2" />
<bond atomRefs2="a5 a8" order="1" />
<bond atomRefs2="a8 a9" order="1" />
<bond atomRefs2="a9 a10" order="1" />
<bond atomRefs2="a7 a10" order="1" />
</bondArray>
</molecule>
</MChemicalStruct>
</MDocument>
</cml>
MolFunctions.matchExact: author structure:
<?xml version="1.0" ?>
<cml>
<MDocument>
<MChemicalStruct>
<molecule molID="m1">
<atomArray
atomID="a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11"
elementType="C C C C C C C C C C H"
x2="54.26373105015538 52.93006170910712 52.93006170910712 54.26373105015538 55.59740039120364 55.59740039120364 56.9310697322519 56.9310697322519 58.26473907330016 58.26473907330016 56.9310697322519"
y2="18.06352453710796 17.293507596548913 15.753473715430836 14.98345677487179 15.753473715430836 17.293507596548913 18.06352453710796 14.98345677487179 15.753473715430836 17.293507596548913 13.443456774871791"
/>
<bondArray>
<bond atomRefs2="a1 a2" order="1" />
<bond atomRefs2="a2 a3" order="1" />
<bond atomRefs2="a3 a4" order="1" />
<bond atomRefs2="a4 a5" order="1" />
<bond atomRefs2="a1 a6" order="1" />
<bond atomRefs2="a7 a6" order="1" />
<bond atomRefs2="a6 a5" order="2" />
<bond atomRefs2="a5 a8" order="1" />
<bond atomRefs2="a8 a9" order="1" />
<bond atomRefs2="a9 a10" order="1" />
<bond atomRefs2="a7 a10" order="1" />
<bond atomRefs2="a8 a11" order="1" />
</bondArray>
</molecule>
</MChemicalStruct>
</MDocument>
</cml>
MolFunctions.matchExact: search result is true
I cannot for the life of me figure out why the molecules give a false result in the first case, and a true result in the second. Any ideas? The behavior is consistent. I notice that the sequences of the bonds in the two cases are different, but that shouldn't matter -- or maybe it does?
Here's some additional information about the incorrect matching behavior (the first case). If I add an H atom anywhere to the target, it matches correctly to the query. If I add a second H atom to the C of the query that already has one H, the original target does not match it. If the query has one H atom on one, two, or three allylic C atoms, the target does not match it, but if it has one H atom on all four allylic C atoms, then it does. However, if the query has one H atom on three allylic C atoms and two on the fourth, then the target does not match it. So there seems to be some symmetry issue going on here: the symmetry of the H atom substitution pattern in the query matters to whether the target matches. But not always, as shown by the other submission.