User 6a59cb749d
12-10-2013 09:19:32
I want to establish a structure database and import pubchem sdf into it. There are more than 2500 sdf files . It is not efficient enough to use JChem Manager to import one by one. So I use JChem API to build a java program.
At the beggining , the program worked quite well. It imported one sdf within less than 1 minute. But after several hours, the performance became worse and worse. Eventually, it costs 10 minutes to import one sdf.
I think my program should be optimized to improve the performance. Maybe the java program produced too much redundant objects and the java virsual machine has to suspend the program and do some clean up.
below is my java code, I hope some one can help me optimize my code. Thanks.
import java.io.IOException;
import java.sql.SQLException;
import java.util.Iterator;
import java.util.List;
import org.apache.log4j.Logger;
import chemaxon.jchem.db.Importer;
import chemaxon.jchem.db.TransferException;
import chemaxon.util.ConnectionHandler;
/**
* Provide methods that will be used to import sdf files into database.
* @author Administrator
*
*/
public class InImporter {
private Log log=Log.getLog();
public void importSDF() {
Logger logger=log.logger;
ConnectionHandler connHandler = new ConnectionHandler(true);
//Setting database connection
connHandler.setDriver("com.microsoft.sqlserver.jdbc.SQLServerDriver");
connHandler.setUrl("jdbc:sqlserver://localhost:1433;databaseName=pubchem_db");
connHandler.setLoginName("sa");
connHandler.setPassword("111111");
try {
connHandler.connectToDatabase();
} catch (SQLException e2) {
e2.printStackTrace();
logger.fatal("", e2);
} catch (ClassNotFoundException e2) {
e2.printStackTrace();
logger.fatal("", e2);
}
Importer importer = new Importer();
importer.setConnectionHandler(connHandler);
//dizhi.txt contains all the file paths of pubchem sdfs. every line is a file path, such as E:/pubchemSDF/Compound_040050001_040075000.sdf
String filePath="E:/pubchemSDF/dizhi.txt";
ReadTextLine readTextLine=new ReadTextLine(filePath);
List <String> sdfNameList = null;
try {
sdfNameList=readTextLine.read();
} catch (IOException e2) {
e2.printStackTrace();
logger.error("", e2);
}
String fileName;
importer.setTableName("pubchem_structure");
importer.setLinesToCheck(100);
importer.setHaltOnError(true);
importer.setFieldConnections("PUBCHEM_COMPOUND_CID=PUBCHEM_COMPOUND_CID");
Iterator<String> iterator=sdfNameList.iterator();
//StringBuffer currentInfo=new StringBuffer("Importing structures from ");
//StringBuffer finishInfo=new StringBuffer("Imported ");
int imported = 0;
while(iterator.hasNext()){
fileName = (String)iterator.next();//String object
importer.setInput(fileName);
//System.out.println("Collecting file information ...");
try {
importer.init();
} catch (TransferException e1) {
e1.printStackTrace();
logger.error("", e1);
}
logger.info("Importing structures from " + fileName + " ...");
//logger.info(currentInfo.append(fileName));
imported=0;
try {
imported = importer.importMols();
} catch (TransferException e) {
e.printStackTrace();
logger.fatal("", e);
}
//logger.info(finishInfo.append(imported).append(" structures successfully"));
logger.info("Imported "+imported+" structures successfully");
//currentInfo;
//finishInfo;
}
}
/**
* @param args
*/
public static void main(String[] args) {
InImporter integleImporter=new InImporter();
integleImporter.importSDF();
}
}