Split a large file into smaller part using molconvert

User 2b2c2ee399

18-04-2012 10:28:31

Hi,


I'll like to know if it is possible to split a big file into 4 smaller part. e.g 4 part of 1000 compounds with molconvert ?


So far I use molconvert -m, but it will split the file according the number of molecule in there...


Many thanks,


 


Best

ChemAxon a202a732bf

18-04-2012 16:07:00

Hi Florent,


it is not possible to do it using molconverter but it can be done quite easily using the Chemaxon API, if you are interested I can send you a code example.


Best regards,


Zsuzsa

User 2b2c2ee399

20-04-2012 09:53:25

Sure, it can help me !


Thanks,

ChemAxon a202a732bf

20-04-2012 21:17:26

Hi Florent,


Below you can find the code example which generates the splitted files (it is also attached: SplitToFiles.java).


Usage: SplitToFiles <filename.extension> <number of molecules per export file>


Best regards,


Zsuzsa


package chemaxon.util;

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;

import chemaxon.formats.MolExporter;
import chemaxon.formats.MolFormatException;
import chemaxon.formats.MolImporter;
import chemaxon.struc.Molecule;

/**
 * Exports molecules in the given file into several others with the same name a the
 * given file and indexes in the end.
 * @author Zsuzsanna Szabo
 */
public class SplitToFiles {

    /**
     * @param args program arguments: name of file(first argument) and number of molecules
     * per export file(second argument)  
     */
    public static void main(String[] args) {
    if (args.length != 2) {
        System.out.println("Usage: SplitToFiles <filename.extension> <number of molecules per export file>\n" +
            "Name of export files <filename_<index>.extension>,\n" +
            "Example: file Benzene.mol is exported to Benzene_1.mol, Benzene_2.mol etc.");
        System.exit(0);
    }

    String inFileName = args[0];
    int exportFileCount = Integer.parseInt(args[1]);
    if (exportFileCount < 1) {
        System.out.println("Wrong number of molecules per file: " + exportFileCount);
        System.exit(0);
    }
    try {
        splitToFiles(inFileName, exportFileCount);
    } catch (MolFormatException e) {
        System.err.println(inFileName + ": error in molecule file format.");
        e.printStackTrace();
        System.exit(1);
    } catch (FileNotFoundException e) {
        System.err.println(inFileName + ": file is not found.");
        e.printStackTrace();
        System.exit(1);
    } catch (IOException e) {
        System.err.println(inFileName + ": input/output error.");
        e.printStackTrace();
        System.exit(1);
    }
    }
    
    private static void splitToFiles(String inFileName, int exportFileCount)
            throws MolFormatException, FileNotFoundException, IOException {
    MolImporter importer = new MolImporter(new FileInputStream(inFileName));
    Molecule mol = importer.read();
    for (int fileIndex = 1; mol != null; fileIndex++) {
        int dotIndex = inFileName.indexOf('.');
        String outFileName = inFileName.substring(0, dotIndex);
        outFileName += "_" + fileIndex;
        outFileName += inFileName.substring(dotIndex);
        System.out.println("Generating file " + outFileName);        
        MolExporter exporter = new MolExporter(
            new FileOutputStream(outFileName), importer.getFormat());
        for (int i = 0; i < exportFileCount && mol != null; i++) {
        exporter.write(mol);
        mol = importer.read();
        }
        exporter.close();
    }
    importer.close();
    }
}