User 8c68bb23cf
12-12-2008 01:49:33
Hi,
JChem substructure search appears to be failing in my situation. To keep things testable, below is the code I'm using, as well as the input coordinate files for both the query and the target. The target contains the structure of the query and should be detected as a hit, but it doesn't happen.
When the query is a simple aromatic ring, the search succeeds, but as soon as the query gets more complex, it starts failing.
Any suggestions would be very much appreciated
Thanks
Sasha
####### code #####
String query = ... (read from the query SDF file, below)
String target = ... (read from the target mol2 file, below)
#get the query
MolImporter mi = new MolImporter(new ByteArrayInputStream(record.getBytes()));
Molecule mol = mi.read();
mi.close();
mol.aromatize();
MolSearch search = new MolSearch();
search.setQuery(mol);
#get the target
mi = new MolImporter(new ByteArrayInputStream(struct.getBytes()));
Molecule molecule = mi.read();
mi.close();
molecule.aromatize();
search.setTarget(molecule);
int[][] hits = null;
hits = search.findAll();
########## end of code ###########
Query
18 20 0 0 0 0 0 0 0 V2000
11.0644 -7.8213 7.1522 C 0 0 0 0 0 0 0 0 0 0 0
9.8914 -8.4451 6.7512 C 0 0 0 0 0 0 0 0 0 0 0
8.9223 -7.7472 6.0883 C 0 0 0 0 0 0 0 0 0 0 0
9.1092 -6.3895 5.8084 C 0 0 0 0 0 0 0 0 0 0 0
10.2979 -5.7575 6.2148 C 0 0 0 0 0 0 0 0 0 0 0
11.2709 -6.4839 6.8874 C 0 0 0 0 0 0 0 0 0 0 0
10.1971 -4.4439 5.8005 N 0 0 0 0 0 0 0 0 0 0 0
8.9853 -4.3074 5.1790 C 0 0 0 0 0 0 0 0 0 0 0
8.3523 -5.4547 5.1841 N 0 0 0 0 0 0 0 0 0 0 0
8.4702 -3.0543 4.5851 C 0 0 0 0 0 0 0 0 0 0 0
9.2350 -1.8879 4.6389 C 0 0 0 0 0 0 0 0 0 0 0
8.7520 -0.7199 4.0850 C 0 0 0 0 0 0 0 0 0 0 0
7.5142 -0.7015 3.4697 C 0 0 0 0 0 0 0 0 0 0 0
6.7428 -1.8561 3.4160 C 0 0 0 0 0 0 0 0 0 0 0
7.2192 -3.0345 3.9702 C 0 0 0 0 0 0 0 0 0 0 0
5.4882 -1.8309 2.7980 N 0 0 0 0 0 0 0 0 0 0 0
4.4827 -2.5858 3.2833 C 0 0 0 0 0 0 0 0 0 0 0
4.6876 -3.3515 4.2053 O 0 0 0 0 0 0 0 0 0 0 0
1 6 4 0 0 0 0
1 2 4 0 0 0 0
2 3 4 0 0 0 0
3 4 4 0 0 0 0
4 5 4 0 0 0 0
4 9 1 0 0 0 0
5 6 4 0 0 0 0
5 7 1 0 0 0 0
7 8 1 0 0 0 0
8 9 2 0 0 0 0
8 10 1 0 0 0 0
10 11 4 0 0 0 0
10 15 4 0 0 0 0
11 12 4 0 0 0 0
12 13 4 0 0 0 0
13 14 4 0 0 0 0
14 15 4 0 0 0 0
14 16 1 0 0 0 0
16 17 1 0 0 0 0
17 18 2 0 0 0 0
M END
$$$$
@<TRIPOS>MOLECULE
TARGET
42 46 0 0 0
SMALL
USER_CHARGES
@<TRIPOS>ATOM
1 C1 9.5294 -8.8201 6.6505 C.ar 1 <0> -0.0966
2 C2 10.6479 -8.1869 7.1739 C.ar 1 <0> -0.1288
3 C3 10.8495 -6.8508 6.9744 C.ar 1 <0> -0.0593
4 C4 9.9218 -6.1085 6.2362 C.ar 1 <0> 0.0448
5 C5 8.7882 -6.7501 5.7064 C.ar 1 <0> 0.0569
6 C6 8.6011 -8.1089 5.9199 C.ar 1 <0> -0.1143
7 N1 8.0708 -5.7767 5.0390 N.pl3 1 <0> -0.5946
8 H1 7.2085 -5.9059 4.5494 H 1 <0> 0.4298
9 C7 8.7608 -4.6021 5.1729 C.2 1 <0> 0.3138
10 N2 9.8502 -4.8049 5.8725 N.2 1 <0> -0.4503
11 C8 8.3430 -3.3010 4.6075 C.ar 1 <0> -0.0005
12 C9 7.1039 -3.1765 3.9795 C.ar 1 <0> -0.0952
13 C10 6.7192 -1.9542 3.4516 C.ar 1 <0> 0.1524
14 C11 7.5692 -0.8580 3.5439 C.ar 1 <0> -0.1153
15 C12 8.7947 -0.9789 4.1705 C.ar 1 <0> -0.0891
16 C13 9.1879 -2.1930 4.7002 C.ar 1 <0> -0.0128
17 Cl1 10.7318 -2.3411 5.4800 Cl 1 <0> -0.0200
18 N3 5.4772 -1.8248 2.8204 N.am 1 <0> -0.6668
19 C14 4.9589 -2.8623 2.1337 C.2 1 <0> 0.5690
20 O1 5.6103 -3.8780 1.9835 O.2 1 <0> -0.5119
21 C15 3.6009 -2.7655 1.5641 C.ar 1 <0> -0.1342
22 C16 2.8508 -1.5994 1.7360 C.ar 1 <0> -0.0692
23 C17 1.5830 -1.5112 1.2042 C.ar 1 <0> -0.1075
24 C18 1.0463 -2.5782 0.4960 C.ar 1 <0> 0.0685
25 C19 1.7904 -3.7457 0.3208 C.ar 1 <0> 0.0257
26 C20 3.0619 -3.8401 0.8519 C.ar 1 <0> -0.0374
27 O2 1.0452 -4.6324 -0.4022 O.3 1 <0> -0.3027
28 C21 0.0047 -3.8487 -1.0155 C.3 1 <0> 0.2117
29 O3 -0.1616 -2.7365 -0.1166 O.3 1 <0> -0.3024
30 H2 9.3848 -9.8774 6.8169 H 1 <0> 0.1313
31 H3 11.3666 -8.7565 7.7444 H 1 <0> 0.1306
32 H4 11.7235 -6.3682 7.3863 H 1 <0> 0.1302
33 H5 7.7325 -8.6069 5.5151 H 1 <0> 0.1237
34 H6 6.4461 -4.0298 3.9052 H 1 <0> 0.1423
35 H7 7.2690 0.0923 3.1278 H 1 <0> 0.1428
36 H8 9.4493 -0.1229 4.2426 H 1 <0> 0.1482
37 H9 4.9891 -0.9884 2.8769 H 1 <0> 0.4154
38 H10 3.2649 -0.7671 2.2857 H 1 <0> 0.1435
39 H11 1.0048 -0.6089 1.3382 H 1 <0> 0.1474
40 H12 3.6388 -4.7430 0.7166 H 1 <0> 0.1514
41 H13 -0.9177 -4.4252 -1.0862 H 1 <0> 0.1384
42 H14 0.3169 -3.5015 -2.0004 H 1 <0> 0.0911
@<TRIPOS>BOND
1 1 6 ar
2 1 2 ar
3 1 30 1
4 2 3 ar
5 2 31 1
6 3 4 ar
7 3 32 1
8 4 10 1
9 4 5 ar
10 5 6 ar
11 5 7 1
12 6 33 1
13 7 8 1
14 7 9 1
15 9 10 2
16 9 11 1
17 11 16 ar
18 11 12 ar
19 12 13 ar
20 12 34 1
21 13 14 ar
22 13 18 1
23 14 15 ar
24 14 35 1
25 15 16 ar
26 15 36 1
27 16 17 1
28 18 19 am
29 18 37 1
30 19 20 2
31 19 21 1
32 21 26 ar
33 21 22 ar
34 22 23 ar
35 22 38 1
36 23 24 ar
37 23 39 1
38 24 29 1
39 24 25 ar
40 25 26 ar
41 25 27 1
42 26 40 1
43 27 28 1
44 28 29 1
45 28 41 1
46 28 42 1
JChem substructure search appears to be failing in my situation. To keep things testable, below is the code I'm using, as well as the input coordinate files for both the query and the target. The target contains the structure of the query and should be detected as a hit, but it doesn't happen.
When the query is a simple aromatic ring, the search succeeds, but as soon as the query gets more complex, it starts failing.
Any suggestions would be very much appreciated
Thanks
Sasha
####### code #####
String query = ... (read from the query SDF file, below)
String target = ... (read from the target mol2 file, below)
#get the query
MolImporter mi = new MolImporter(new ByteArrayInputStream(record.getBytes()));
Molecule mol = mi.read();
mi.close();
mol.aromatize();
MolSearch search = new MolSearch();
search.setQuery(mol);
#get the target
mi = new MolImporter(new ByteArrayInputStream(struct.getBytes()));
Molecule molecule = mi.read();
mi.close();
molecule.aromatize();
search.setTarget(molecule);
int[][] hits = null;
hits = search.findAll();
########## end of code ###########
Query
18 20 0 0 0 0 0 0 0 V2000
11.0644 -7.8213 7.1522 C 0 0 0 0 0 0 0 0 0 0 0
9.8914 -8.4451 6.7512 C 0 0 0 0 0 0 0 0 0 0 0
8.9223 -7.7472 6.0883 C 0 0 0 0 0 0 0 0 0 0 0
9.1092 -6.3895 5.8084 C 0 0 0 0 0 0 0 0 0 0 0
10.2979 -5.7575 6.2148 C 0 0 0 0 0 0 0 0 0 0 0
11.2709 -6.4839 6.8874 C 0 0 0 0 0 0 0 0 0 0 0
10.1971 -4.4439 5.8005 N 0 0 0 0 0 0 0 0 0 0 0
8.9853 -4.3074 5.1790 C 0 0 0 0 0 0 0 0 0 0 0
8.3523 -5.4547 5.1841 N 0 0 0 0 0 0 0 0 0 0 0
8.4702 -3.0543 4.5851 C 0 0 0 0 0 0 0 0 0 0 0
9.2350 -1.8879 4.6389 C 0 0 0 0 0 0 0 0 0 0 0
8.7520 -0.7199 4.0850 C 0 0 0 0 0 0 0 0 0 0 0
7.5142 -0.7015 3.4697 C 0 0 0 0 0 0 0 0 0 0 0
6.7428 -1.8561 3.4160 C 0 0 0 0 0 0 0 0 0 0 0
7.2192 -3.0345 3.9702 C 0 0 0 0 0 0 0 0 0 0 0
5.4882 -1.8309 2.7980 N 0 0 0 0 0 0 0 0 0 0 0
4.4827 -2.5858 3.2833 C 0 0 0 0 0 0 0 0 0 0 0
4.6876 -3.3515 4.2053 O 0 0 0 0 0 0 0 0 0 0 0
1 6 4 0 0 0 0
1 2 4 0 0 0 0
2 3 4 0 0 0 0
3 4 4 0 0 0 0
4 5 4 0 0 0 0
4 9 1 0 0 0 0
5 6 4 0 0 0 0
5 7 1 0 0 0 0
7 8 1 0 0 0 0
8 9 2 0 0 0 0
8 10 1 0 0 0 0
10 11 4 0 0 0 0
10 15 4 0 0 0 0
11 12 4 0 0 0 0
12 13 4 0 0 0 0
13 14 4 0 0 0 0
14 15 4 0 0 0 0
14 16 1 0 0 0 0
16 17 1 0 0 0 0
17 18 2 0 0 0 0
M END
$$$$
@<TRIPOS>MOLECULE
TARGET
42 46 0 0 0
SMALL
USER_CHARGES
@<TRIPOS>ATOM
1 C1 9.5294 -8.8201 6.6505 C.ar 1 <0> -0.0966
2 C2 10.6479 -8.1869 7.1739 C.ar 1 <0> -0.1288
3 C3 10.8495 -6.8508 6.9744 C.ar 1 <0> -0.0593
4 C4 9.9218 -6.1085 6.2362 C.ar 1 <0> 0.0448
5 C5 8.7882 -6.7501 5.7064 C.ar 1 <0> 0.0569
6 C6 8.6011 -8.1089 5.9199 C.ar 1 <0> -0.1143
7 N1 8.0708 -5.7767 5.0390 N.pl3 1 <0> -0.5946
8 H1 7.2085 -5.9059 4.5494 H 1 <0> 0.4298
9 C7 8.7608 -4.6021 5.1729 C.2 1 <0> 0.3138
10 N2 9.8502 -4.8049 5.8725 N.2 1 <0> -0.4503
11 C8 8.3430 -3.3010 4.6075 C.ar 1 <0> -0.0005
12 C9 7.1039 -3.1765 3.9795 C.ar 1 <0> -0.0952
13 C10 6.7192 -1.9542 3.4516 C.ar 1 <0> 0.1524
14 C11 7.5692 -0.8580 3.5439 C.ar 1 <0> -0.1153
15 C12 8.7947 -0.9789 4.1705 C.ar 1 <0> -0.0891
16 C13 9.1879 -2.1930 4.7002 C.ar 1 <0> -0.0128
17 Cl1 10.7318 -2.3411 5.4800 Cl 1 <0> -0.0200
18 N3 5.4772 -1.8248 2.8204 N.am 1 <0> -0.6668
19 C14 4.9589 -2.8623 2.1337 C.2 1 <0> 0.5690
20 O1 5.6103 -3.8780 1.9835 O.2 1 <0> -0.5119
21 C15 3.6009 -2.7655 1.5641 C.ar 1 <0> -0.1342
22 C16 2.8508 -1.5994 1.7360 C.ar 1 <0> -0.0692
23 C17 1.5830 -1.5112 1.2042 C.ar 1 <0> -0.1075
24 C18 1.0463 -2.5782 0.4960 C.ar 1 <0> 0.0685
25 C19 1.7904 -3.7457 0.3208 C.ar 1 <0> 0.0257
26 C20 3.0619 -3.8401 0.8519 C.ar 1 <0> -0.0374
27 O2 1.0452 -4.6324 -0.4022 O.3 1 <0> -0.3027
28 C21 0.0047 -3.8487 -1.0155 C.3 1 <0> 0.2117
29 O3 -0.1616 -2.7365 -0.1166 O.3 1 <0> -0.3024
30 H2 9.3848 -9.8774 6.8169 H 1 <0> 0.1313
31 H3 11.3666 -8.7565 7.7444 H 1 <0> 0.1306
32 H4 11.7235 -6.3682 7.3863 H 1 <0> 0.1302
33 H5 7.7325 -8.6069 5.5151 H 1 <0> 0.1237
34 H6 6.4461 -4.0298 3.9052 H 1 <0> 0.1423
35 H7 7.2690 0.0923 3.1278 H 1 <0> 0.1428
36 H8 9.4493 -0.1229 4.2426 H 1 <0> 0.1482
37 H9 4.9891 -0.9884 2.8769 H 1 <0> 0.4154
38 H10 3.2649 -0.7671 2.2857 H 1 <0> 0.1435
39 H11 1.0048 -0.6089 1.3382 H 1 <0> 0.1474
40 H12 3.6388 -4.7430 0.7166 H 1 <0> 0.1514
41 H13 -0.9177 -4.4252 -1.0862 H 1 <0> 0.1384
42 H14 0.3169 -3.5015 -2.0004 H 1 <0> 0.0911
@<TRIPOS>BOND
1 1 6 ar
2 1 2 ar
3 1 30 1
4 2 3 ar
5 2 31 1
6 3 4 ar
7 3 32 1
8 4 10 1
9 4 5 ar
10 5 6 ar
11 5 7 1
12 6 33 1
13 7 8 1
14 7 9 1
15 9 10 2
16 9 11 1
17 11 16 ar
18 11 12 ar
19 12 13 ar
20 12 34 1
21 13 14 ar
22 13 18 1
23 14 15 ar
24 14 35 1
25 15 16 ar
26 15 36 1
27 16 17 1
28 18 19 am
29 18 37 1
30 19 20 2
31 19 21 1
32 21 26 ar
33 21 22 ar
34 22 23 ar
35 22 38 1
36 23 24 ar
37 23 39 1
38 24 29 1
39 24 25 ar
40 25 26 ar
41 25 27 1
42 26 40 1
43 27 28 1
44 28 29 1
45 28 41 1
46 28 42 1