parity matching problem

User 870ab5b546

30-03-2010 16:13:40

The target:


<?xml version="1.0" ?>
<cml>
<MDocument>
<MChemicalStruct>
<molecule molID="m1">
<propertyList>
<property dictRef="Reactor index" title="Reactor index">
<scalar>1</scalar>
</property>
</propertyList>
<atomArray
atomID="a1 a2 a3 a4 a5 a6 a7 a8 a9"
elementType="C C C C C C C O O"
/>
<bondArray>
<bond atomRefs2="a7 a1" order="1" />
<bond atomRefs2="a4 a2" order="1" />
<bond atomRefs2="a3 a4" order="1" />
<bond atomRefs2="a4 a5" order="1" />
<bond atomRefs2="a4 a9" order="1" />
<bond atomRefs2="a5 a6" order="1" />
<bond atomRefs2="a6 a7" order="1" />
<bond atomRefs2="a7 a8" order="1" />
</bondArray>
</molecule>
</MChemicalStruct>
</MDocument>
</cml>

The query:


<?xml version="1.0" ?>
<cml>
<MDocument>
<MChemicalStruct>
<molecule molID="m1">
<propertyList>
<property dictRef="Reactor index" title="Reactor index">
<scalar>1</scalar>
</property>
</propertyList>
<atomArray
atomID="a1 a2 a3 a4 a5 a6 a7 a8 a9"
elementType="C C C C C C C O O"
/>
<bondArray>
<bond atomRefs2="a4 a1" order="1" />
<bond atomRefs2="a7 a2" order="1" />
<bond atomRefs2="a3 a4" order="1" />
<bond atomRefs2="a4 a5" order="1" />
<bond atomRefs2="a4 a9" order="1" />
<bond atomRefs2="a5 a6" order="1" />
<bond atomRefs2="a6 a7" order="1" />
<bond atomRefs2="a7 a8" order="1" />
</bondArray>
</molecule>
</MChemicalStruct>
</MDocument>
</cml>

The code:


    public static boolean matchExact(Molecule respMol, Molecule authMol,
int stereoType) throws MolFileException {
// NOTE: Following options are set using an extensive testing
// of possible combinations in MolSearch params to get an
// expected behavior. If changes are made to this, make sure
// that expected behavior is unchanged for all input combinations
final String SELF = "MolFunctions.matchExact: ";
final MolSearch search = new MolSearch();
final MolSearchOptions searchOpts = search.getSearchOptions();
searchOpts.setSearchType(SearchConstants.FULL);
final boolean ignore3D = ((stereoType & IGNORE_TETRAHEDRAL_STEREO) != 0);
if (ignore3D) {
debugPrint(SELF + "ignoring 3D stereochemistry.");
searchOpts.setStereoSearchType(SearchConstants.STEREO_IGNORE);
} else {
searchOpts.setStereoSearchType(SearchConstants.STEREO_SPECIFIC);
final int wavyType = (stereoType & WAVY_AND);
if (wavyType == WAVY_AND) {
debugPrint(SELF + "adding WavyBondMatcher.");
search.addComparator(new WavyBondMatcher());
// following line needed to avoid bug in JChem 5.1.3_2 and earlier
searchOpts.setKeepQueryOrder(true);
} else { // WAVY_XOR
debugPrint(SELF + "not adding WavyBondMatcher.");
} // if stereoType == WAVY_AND
} // if ignore3D
final boolean ignore2D = ((stereoType & IGNORE_DBL_BOND_STEREO) != 0);
if (ignore2D) {
debugPrint(SELF + "ignoring 2D stereochemistry.");
searchOpts.setDoubleBondStereoMatchingMode(StereoConstants.DBS_NONE);
} else {
searchOpts.setDoubleBondStereoMatchingMode(StereoConstants.DBS_ALL);
} // if ignore2D
searchOpts.setVagueBondLevel(SearchConstants.VAGUE_BOND_OFF);
// required for comparing nonaromatized aromatic rings
searchOpts.setStereoModel(SearchConstants.STEREO_MODEL_GLOBAL);
searchOpts.setChargeMatching(SearchConstants.CHARGE_MATCHING_EXACT);
searchOpts.setIsotopeMatching(SearchConstants.ISOTOPE_MATCHING_EXACT);
searchOpts.setRadicalMatching(SearchConstants.RADICAL_MATCHING_EXACT);
searchOpts.setValenceMatching(true);
search.setSearchOptions(searchOpts);
search.setTarget(respMol);
search.setQuery(authMol);
debugPrintMRV(SELF + "Match exact response:\n", respMol);
debugPrintMRV(SELF + "Match exact author structure:\n", authMol);
try {
final boolean match = search.isMatching();
debugPrint(SELF + "JChem search result is ", match);
return match;
} catch (SearchException e2) {
Utils.alwaysPrint("Error in " + SELF);
e2.printStackTrace();
throw new MolFileException(ERROR + e2.getMessage());
} // try
} // matchExact(Molecule, Molecule, int)

public final class WavyBondMatcher extends MolComparator
implements StereoConstants {

public boolean compareAtoms(int queryAtomNum, int targetAtomNum) {
boolean match = true;
final int qAtomNum = getOrigQueryAtom(queryAtomNum);
final int tAtomNum = getOrigTargetAtom(targetAtomNum);
if (qAtomNum != -1 && tAtomNum != -1) {
final int qParity = query.getLocalParity(qAtomNum);
final int tParity = target.getLocalParity(tAtomNum);
match = !(qParity == PARITY_EITHER && tParity != PARITY_EITHER);
debugPrint("WavyBondMatcher.compareAtoms: "
+ "query = ", query, ", target = ", target,
"; \nqAtom ", query.getAtom(qAtomNum), qAtomNum + 1,
" has parity ", qParity,
"; tAtom ", target.getAtom(tAtomNum), tAtomNum + 1,
" has parity ", tParity,
" (PARITY_EITHER = ", PARITY_EITHER, "); atoms ",
(match ? "match" : "do not match"));
} // if neither query nor target atom is implicit H
return match;
} // compareAtoms(int, int)

} // WavyBondMatcher

By all indications, these structures are identical.  However, when I submit them to matchExact(), I am told that they are different.  The problem appears to be in the MolComparator:


WavyBondMatcher.compareAtoms: query = CC(O)CCC(C)(C)O, target = CC(O)CCC(C)(C)O; 
qAtom C1 has parity 0; tAtom C1 has parity 0 (PARITY_EITHER = 3); atoms match
WavyBondMatcher.compareAtoms: query = CC(O)CCC(C)(C)O, target = CC(O)CCC(C)(C)O;
qAtom C1 has parity 0; tAtom C2 has parity 0 (PARITY_EITHER = 3); atoms match
WavyBondMatcher.compareAtoms: query = CC(O)CCC(C)(C)O, target = CC(O)CCC(C)(C)O;
qAtom C1 has parity 0; tAtom C3 has parity 0 (PARITY_EITHER = 3); atoms match
WavyBondMatcher.compareAtoms: query = CC(O)CCC(C)(C)O, target = CC(O)CCC(C)(C)O;
qAtom C1 has parity 0; tAtom C4 has parity 0 (PARITY_EITHER = 3); atoms match
WavyBondMatcher.compareAtoms: query = CC(O)CCC(C)(C)O, target = CC(O)CCC(C)(C)O;
qAtom C1 has parity 0; tAtom C5 has parity 0 (PARITY_EITHER = 3); atoms match
WavyBondMatcher.compareAtoms: query = CC(O)CCC(C)(C)O, target = CC(O)CCC(C)(C)O;
qAtom C1 has parity 0; tAtom C6 has parity 0 (PARITY_EITHER = 3); atoms match
WavyBondMatcher.compareAtoms: query = CC(O)CCC(C)(C)O, target = CC(O)CCC(C)(C)O;
qAtom C1 has parity 0; tAtom C7 has parity 3 (PARITY_EITHER = 3); atoms match
WavyBondMatcher.compareAtoms: query = CC(O)CCC(C)(C)O, target = CC(O)CCC(C)(C)O;
qAtom C2 has parity 0; tAtom C1 has parity 0 (PARITY_EITHER = 3); atoms match
WavyBondMatcher.compareAtoms: query = CC(O)CCC(C)(C)O, target = CC(O)CCC(C)(C)O;
qAtom C2 has parity 0; tAtom C2 has parity 0 (PARITY_EITHER = 3); atoms match
WavyBondMatcher.compareAtoms: query = CC(O)CCC(C)(C)O, target = CC(O)CCC(C)(C)O;
qAtom C2 has parity 0; tAtom C3 has parity 0 (PARITY_EITHER = 3); atoms match
WavyBondMatcher.compareAtoms: query = CC(O)CCC(C)(C)O, target = CC(O)CCC(C)(C)O;
qAtom C2 has parity 0; tAtom C4 has parity 0 (PARITY_EITHER = 3); atoms match
WavyBondMatcher.compareAtoms: query = CC(O)CCC(C)(C)O, target = CC(O)CCC(C)(C)O;
qAtom C2 has parity 0; tAtom C5 has parity 0 (PARITY_EITHER = 3); atoms match
WavyBondMatcher.compareAtoms: query = CC(O)CCC(C)(C)O, target = CC(O)CCC(C)(C)O;
qAtom C2 has parity 0; tAtom C6 has parity 0 (PARITY_EITHER = 3); atoms match
WavyBondMatcher.compareAtoms: query = CC(O)CCC(C)(C)O, target = CC(O)CCC(C)(C)O;
qAtom C2 has parity 0; tAtom C7 has parity 3 (PARITY_EITHER = 3); atoms match
WavyBondMatcher.compareAtoms: query = CC(O)CCC(C)(C)O, target = CC(O)CCC(C)(C)O;
qAtom C3 has parity 0; tAtom C1 has parity 0 (PARITY_EITHER = 3); atoms match
WavyBondMatcher.compareAtoms: query = CC(O)CCC(C)(C)O, target = CC(O)CCC(C)(C)O;
qAtom C3 has parity 0; tAtom C2 has parity 0 (PARITY_EITHER = 3); atoms match
WavyBondMatcher.compareAtoms: query = CC(O)CCC(C)(C)O, target = CC(O)CCC(C)(C)O;
qAtom C3 has parity 0; tAtom C3 has parity 0 (PARITY_EITHER = 3); atoms match
WavyBondMatcher.compareAtoms: query = CC(O)CCC(C)(C)O, target = CC(O)CCC(C)(C)O;
qAtom C3 has parity 0; tAtom C4 has parity 0 (PARITY_EITHER = 3); atoms match
WavyBondMatcher.compareAtoms: query = CC(O)CCC(C)(C)O, target = CC(O)CCC(C)(C)O;
qAtom C3 has parity 0; tAtom C5 has parity 0 (PARITY_EITHER = 3); atoms match
WavyBondMatcher.compareAtoms: query = CC(O)CCC(C)(C)O, target = CC(O)CCC(C)(C)O;
qAtom C3 has parity 0; tAtom C6 has parity 0 (PARITY_EITHER = 3); atoms match
WavyBondMatcher.compareAtoms: query = CC(O)CCC(C)(C)O, target = CC(O)CCC(C)(C)O;
qAtom C3 has parity 0; tAtom C7 has parity 3 (PARITY_EITHER = 3); atoms match
WavyBondMatcher.compareAtoms: query = CC(O)CCC(C)(C)O, target = CC(O)CCC(C)(C)O;
qAtom C4 has parity 3; tAtom C1 has parity 0 (PARITY_EITHER = 3); atoms do not match
WavyBondMatcher.compareAtoms: query = CC(O)CCC(C)(C)O, target = CC(O)CCC(C)(C)O;
qAtom C4 has parity 3; tAtom C2 has parity 0 (PARITY_EITHER = 3); atoms do not match
WavyBondMatcher.compareAtoms: query = CC(O)CCC(C)(C)O, target = CC(O)CCC(C)(C)O;
qAtom C4 has parity 3; tAtom C3 has parity 0 (PARITY_EITHER = 3); atoms do not match
WavyBondMatcher.compareAtoms: query = CC(O)CCC(C)(C)O, target = CC(O)CCC(C)(C)O;
qAtom C4 has parity 3; tAtom C4 has parity 0 (PARITY_EITHER = 3); atoms do not match
WavyBondMatcher.compareAtoms: query = CC(O)CCC(C)(C)O, target = CC(O)CCC(C)(C)O;
qAtom C4 has parity 3; tAtom C5 has parity 0 (PARITY_EITHER = 3); atoms do not match
WavyBondMatcher.compareAtoms: query = CC(O)CCC(C)(C)O, target = CC(O)CCC(C)(C)O;
qAtom C4 has parity 3; tAtom C6 has parity 0 (PARITY_EITHER = 3); atoms do not match
WavyBondMatcher.compareAtoms: query = CC(O)CCC(C)(C)O, target = CC(O)CCC(C)(C)O;
qAtom C4 has parity 3; tAtom C7 has parity 3 (PARITY_EITHER = 3); atoms match
MolFunctions.matchExact: JChem search result is false

Why is it finding a parity of 3 (indicating a wavy bond) for an atom that neither has a wavy bond nor is a stereocenter?


Both molecules are generated by Reactor.  Strangely, in the debugging output, the target is sometimes printed as [#6]C(O)CCC([#6])([#6])O and sometimes as CC(O)CCC(C)(C)O.

User 870ab5b546

31-03-2010 02:49:56

I figured this one out.  The target and query have no coordinates (zero-dimensional).  The local parity calculation is apparently invalid with 0D compounds.  The workaround is to do a 2D clean of the compounds before comparing them.