User 22337819af
30-01-2012 21:44:26
Hi
I have written a multi-threaded application to carry out r-group decomposition. I have a set of cores/query molecules (~30) that I am running against a library of structures (~100000). For each thread is assigned 1/n (n = # threads) of the structures, I create a Molecule object for each core, an RGroupDecomposition object, and then iterate over the assigned structures, creating a Molecule object for one, running the r-group decomposition, then storing the result in memory.
The performance is about half of what we expect it to be. We have an "8 threaded" intel CORE i& vPro chip, which (with other code) we can get gains as we increase the number of threads to 8. With the r-group decomposition, we only get gains increasing to 4 threads. When we run an analyzer (YourKit) we observe lots of blocking on the threads. We have identified these classes/methods (from a snapshot) as being possibly responsible for the blocking:
chemaxon.enumeration.QueryEnumeratorFactory.getFirstEnumeratorClone(Molecule, List, int, boolean, String)
chemaxon.enumeration.SearchEnumerator.getMarkushFactory(int)
chemaxon.enumeration.getQueryFactory(int)
chemaxon.license.LicenseHandler.checkLicense(String, String)
java.util.LinkedHashMap.newEntryIterator()
code:
final Map<Core, Molecule> coreMoleculeMap;
try {
coreMoleculeMap = buildCoreMoleculeMap(coreSet);
} catch (MolFormatException e2) {
e2.printStackTrace();
throw new RuntimeException("", e2);
}
Standardizer standardizer;
try {
standardizer = new Standardizer("aromatize..clearstereo:chirality");
} catch (StandardizerException e1) {
e1.printStackTrace();
throw new RuntimeException("", e1);
}
RGroupDecomposition rGroupDecomp = new RGroupDecomposition();
rGroupDecomp.setAttachmentType(RGroupDecomposition.ATTACHMENT_RLABEL);
Stereochemistry stereoChemistry = new Stereochemistry();
int structureProgress = 0;
for (Structure curStruct : structureList) {
Molecule curMol;
try {
curMol = MolImporter.importMol(curStruct.getSmiles());
} catch (MolFormatException e1) {
e1.printStackTrace();
throw new RuntimeException("", e1);
}
Molecule curWithoutStereoMol = curMol.cloneMolecule();
try {
standardizer.standardize(curWithoutStereoMol);
} catch (SearchException e1) {
e1.printStackTrace();
throw new RuntimeException("", e1);
} catch (LicenseException e1) {
e1.printStackTrace();
throw new RuntimeException("", e1);
}
rGroupDecomp.setTarget(curWithoutStereoMol);
for (Core core : coreSet) {
Molecule coreMol = coreMoleculeMap.get(core);
rGroupDecomp.setQuery(coreMol);
final Decomposition rDecomp = getFirstDecompAndHandleErrors(rGroupDecomp);
if (rDecomp != null) { //assuming that decomp returns null if no match found
if (curStruct.getCore() != null) {
System.err.println("more than one match found for structure " + curStruct.getBroadCoreId() + " "
+ curStruct.getCollectionName() + " " + curStruct.getSmiles() + " " + curStruct.getCore().getCxSmiles() + " " + core.getCxSmiles());
//warning / error - a previous match was found with a previous core
//and this molecule
} else {
curStruct.setCore(core);
for (Molecule ligandMol : rDecomp.getLigands()) {
if (ligandMol != null) {
curStruct.addRGroup(findRIndexForLigand(ligandMol), ligandMol);
}
}
int[][] groupHit = rDecomp.getGroupHit();
if (groupHit[0].length != 1) {
//warning / error either more than one hit, or no hit was found
}
stereoChemistry.setMolecule(coreMol);
for (int coreAsymmIndex : core.getStereocenterAtomIndexSet()) {
final int structAsymmIndex = groupHit[coreAsymmIndex][0];
final String rsVal;
if (StereoConstants.CHIRALITY_R == curMol.getChirality(structAsymmIndex)) {
rsVal = "R";
} else if (StereoConstants.CHIRALITY_S == curMol.getChirality(structAsymmIndex)) {
rsVal = "S";
} else {
throw new RuntimeException("RGroupDecomposition chirality is neither R or S for asymmetric atom " + structAsymmIndex + " of molecule " + curStruct.getSmiles());
}
curStruct.addStereocenterConfig(coreAsymmIndex, rsVal);
}
if (mixComp != null) {
handleMixture(mixComp, curStruct, rGroupDecomp, groupHit, coreMol, curMol);
}
}
}
}
if (curStruct.getCore() == null) {
System.err.println(threadName + " No matching core found for " + curStruct.getBroadCoreId() + " " + curStruct.getSmiles());
}
if (structureProgress % 1000 == 0) {
System.out.println(threadName + " progress " + structureProgress);
}
structureProgress++;
}
Edit: attached screenshot of profile showing extensive blocking during multi-thread operation