import java.awt.image.bufferedimage;
import java.io.file;
import javax.imageio.imageio;
//生成等比例高质量缩略图
public class thumbnailutil {
private static int width;
private static int height;
private static int scalewidth;
static double support = (double) 3.0;
static double pi = (double) 3.14159265358978;
static double[] contrib;
static double[] normcontrib;
static double[] tmpcontrib;
static int startcontrib, stopcontrib;
static int ndots;
static int nhalfdots;
public static void saveimage(string fromfilestr, string savetofilestr, int formatwidth, int formatheight) throws exception {
file savefile = new file(savetofilestr);
file fromfile = new file(fromfilestr);
saveimage(fromfile, savefile, formatwidth, formatheight);
}
public static void saveimage(file fromfile , file savefile, int formatwidth, int formatheight) throws exception {
bufferedimage srcimage;
srcimage = javax.imageio.imageio.read(fromfile); // construct image
int imagewidth = srcimage.getwidth(null);
int imageheight = srcimage.getheight(null);
int changetowidth = 0;
int changetoheight = 0;
if (formatwidth > 0 && formatheight > 0) {
changetowidth = formatwidth;
changetoheight = formatheight;
} else {
if (imagewidth > 0 && imageheight > 0) {
if (imagewidth / imageheight >= formatwidth / formatheight) {
if (imagewidth > formatwidth) {
changetowidth = formatwidth;
changetoheight = (imageheight * formatwidth) / imagewidth;
} else {
changetowidth = imagewidth;
changetoheight = imageheight;
}
} else {
if (imageheight > formatheight) {
changetoheight = formatheight;
changetowidth = (imagewidth * formatheight) / imageheight;
} else {
changetowidth = imagewidth;
changetoheight = imageheight;
}
}
}
}
srcimage = imagezoomout(srcimage, changetowidth, changetoheight);
imageio.write(srcimage, "jpeg", savefile);
}
public static bufferedimage imagezoomout(bufferedimage srcbufferimage, int w, int h) {
width = srcbufferimage.getwidth();
height = srcbufferimage.getheight();
scalewidth = w;
if (determineresultsize(w, h) == 1) {
return srcbufferimage;
}
calcontrib();
bufferedimage pbout = horizontalfiltering(srcbufferimage, w);
bufferedimage pbfinalout = verticalfiltering(pbout, h);
return pbfinalout;
}
/**
* 决定图像尺寸
*/
private static int determineresultsize(int w, int h) {
double scaleh, scalev;
scaleh = (double) w / (double) width;
scalev = (double) h / (double) height;
// 需要判断一下scaleh,scalev,不做放大操作
if (scaleh >= 1.0 && scalev >= 1.0) {
return 1;
}
return 0;
}
private static double lanczos(int i, int inwidth, int outwidth, double support) {
double x;
x = (double) i * (double) outwidth / (double) inwidth;
return math.sin(x * pi) / (x * pi) * math.sin(x * pi / support)
/ (x * pi / support);
}
private static void calcontrib() {
nhalfdots = (int) ((double) width * support / (double) scalewidth);
ndots = nhalfdots * 2 1;
try {
contrib = new double[ndots];
normcontrib = new double[ndots];
tmpcontrib = new double[ndots];
} catch (exception e) {
system.out.println("init contrib,normcontrib,tmpcontrib" e);
}
int center = nhalfdots;
contrib[center] = 1.0;
double weight = 0.0;
int i = 0;
for (i = 1; i <= center; i ) {
contrib[center i] = lanczos(i, width, scalewidth, support);
weight = contrib[center i];
}
for (i = center - 1; i >= 0; i--) {
contrib[i] = contrib[center * 2 - i];
}
weight = weight * 2 1.0;
for (i = 0; i <= center; i ) {
normcontrib[i] = contrib[i] / weight;
}
for (i = center 1; i < ndots; i ) {
normcontrib[i] = normcontrib[center * 2 - i];
}
}
// 处理边缘
private static void caltempcontrib(int start, int stop) {
double weight = 0;
int i = 0;
for (i = start; i <= stop; i ) {
weight = contrib[i];
}
for (i = start; i <= stop; i ) {
tmpcontrib[i] = contrib[i] / weight;
}
}
private static int getredvalue(int rgbvalue) {
int temp = rgbvalue & 0x00ff0000;
return temp >> 16;
}
private static int getgreenvalue(int rgbvalue) {
int temp = rgbvalue & 0x0000ff00;
return temp >> 8;
}
private static int getbluevalue(int rgbvalue) {
return rgbvalue & 0x000000ff;
}
private static int comrgb(int redvalue, int greenvalue, int bluevalue) {
return (redvalue << 16) (greenvalue << 8) bluevalue;
}
// 行水平滤波
private static int horizontalfilter(bufferedimage bufimg, int startx, int stopx,
int start, int stop, int y, double[] pcontrib) {
double valuered = 0.0;
double valuegreen = 0.0;
double valueblue = 0.0;
int valuergb = 0;
int i, j;
for (i = startx, j = start; i <= stopx; i , j ) {
valuergb = bufimg.getrgb(i, y);
valuered = getredvalue(valuergb) * pcontrib[j];
valuegreen = getgreenvalue(valuergb) * pcontrib[j];
valueblue = getbluevalue(valuergb) * pcontrib[j];
}
valuergb = comrgb(clip((int) valuered), clip((int) valuegreen),
clip((int) valueblue));
return valuergb;
}
// 图片水平滤波
private static bufferedimage horizontalfiltering(bufferedimage bufimage, int ioutw) {
int dwinw = bufimage.getwidth();
int dwinh = bufimage.getheight();
int value = 0;
bufferedimage pbout = new bufferedimage(ioutw, dwinh,
bufferedimage.type_int_rgb);
for (int x = 0; x < ioutw; x ) {
int startx;
int start;
int x = (int) (((double) x) * ((double) dwinw) / ((double) ioutw) 0.5);
int y = 0;
startx = x - nhalfdots;
if (startx < 0) {
startx = 0;
start = nhalfdots - x;
} else {
start = 0;
}
int stop;
int stopx = x nhalfdots;
if (stopx > (dwinw - 1)) {
stopx = dwinw - 1;
stop = nhalfdots (dwinw - 1 - x);
} else {
stop = nhalfdots * 2;
}
if (start > 0 || stop < ndots - 1) {
caltempcontrib(start, stop);
for (y = 0; y < dwinh; y ) {
value = horizontalfilter(bufimage, startx, stopx, start,
stop, y, tmpcontrib);
pbout.setrgb(x, y, value);
}
} else {
for (y = 0; y < dwinh; y ) {
value = horizontalfilter(bufimage, startx, stopx, start,
stop, y, normcontrib);
pbout.setrgb(x, y, value);
}
}
}
return pbout;
}
private static int verticalfilter(bufferedimage pbinimage, int starty, int stopy,
int start, int stop, int x, double[] pcontrib) {
double valuered = 0.0;
double valuegreen = 0.0;
double valueblue = 0.0;
int valuergb = 0;
int i, j;
for (i = starty, j = start; i <= stopy; i , j ) {
valuergb = pbinimage.getrgb(x, i);
valuered = getredvalue(valuergb) * pcontrib[j];
valuegreen = getgreenvalue(valuergb) * pcontrib[j];
valueblue = getbluevalue(valuergb) * pcontrib[j];
}
valuergb = comrgb(clip((int) valuered), clip((int) valuegreen), clip((int) valueblue));
return valuergb;
}
private static bufferedimage verticalfiltering(bufferedimage pbimage, int iouth) {
int iw = pbimage.getwidth();
int ih = pbimage.getheight();
int value = 0;
bufferedimage pbout = new bufferedimage(iw, iouth,
bufferedimage.type_int_rgb);
for (int y = 0; y < iouth; y ) {
int starty;
int start;
int y = (int) (((double) y) * ((double) ih) / ((double) iouth) 0.5);
starty = y - nhalfdots;
if (starty < 0) {
starty = 0;
start = nhalfdots - y;
} else {
start = 0;
}
int stop;
int stopy = y nhalfdots;
if (stopy > (int) (ih - 1)) {
stopy = ih - 1;
stop = nhalfdots (ih - 1 - y);
} else {
stop = nhalfdots * 2;
}
if (start > 0 || stop < ndots - 1) {
caltempcontrib(start, stop);
for (int x = 0; x < iw; x ) {
value = verticalfilter(pbimage, starty, stopy, start, stop,
x, tmpcontrib);
pbout.setrgb(x, y, value);
}
} else {
for (int x = 0; x < iw; x ) {
value = verticalfilter(pbimage, starty, stopy, start, stop,
x, normcontrib);
pbout.setrgb(x, y, value);
}
}
}
return pbout;
}
static int clip(int x) {
if (x < 0)
return 0;
if (x > 255)
return 255;
return x;
}
}
首先在pl/sql中分别执行:
create or replace and compile java source named testjava1 as
public class testjava1
{
public static void test()
{
system.out.println("hello");
}
}
create or replace procedure testjava1 as language java name 'testjava1.test()';
---------------------------------------------------------------------------------------------------------
在sqlplus中
c:\windows\system32>sqlplus nc5520110105/nc5520110105@192.168.10.87
sql*plus: release 11.2.0.1.0 production on fri apr 1 14:06:02 2011
凯发天生赢家一触即发官网 copyright (c) 1982, 2010, oracle. all rights reserved.
connected to:
oracle database 10g enterprise edition release 10.2.0.1.0 - 64bit production
with the partitioning, olap and data mining options
sql> set serveroutput on;
sql> show serveroutput;
serveroutput on size unlimited format word_wrapped
sql> call dbms_java.set_output(2000);
call completed.
sql>
sql> show serveroutput;
serveroutput on size unlimited format word_wrapped
sql> exec testjava1();
hello
pl/sql procedure successfully completed.
sql>
---------------------------------------------------------------------------------------------------------
再看一个例子:
在pl/sql中执行:
--用java编写oracle存储过程。
create or replace and compile java source named test as
public class mytest
{
public static void myproc(int a,int b,int[] ret){
ret[0]=a b;
}
public static int myfunc(int a,int b){
return a b;
}
}
--创建存储过程
create or replace procedure myproc(a in number, b in number, ret out number) as
language java name 'mytest.myproc(int,int,int[])';
--创建函数
create or replace function myfunc(a in number, b in number) return number is
language java name 'mytest.myfunc(int,int) return int';
然后在sqlplus中测试存储过程——
sql> set serveroutput on
sql> declare a integer;
2 begin
3 myproc(1, 2, a);
4 dbms_output.put_line(a);
5 end;
6 /
3
pl/sql procedure successfully completed.
sql> select myfunc(1,2) from dual;
myfunc(1,2)
-----------
3
sql>
the basic steps you'll need to follow to configure ehcache for web page caching are (note that these steps assume you already have ehcache installed in your application):
the following settings should help you setup web caching for your application.
the first thing you'll need to do is add a filter to enable page caching.
the following web.xml settings will enable a servlet filter for page caching:
simplepagecachingfilter net.sf.ehcache.constructs.web.filter.simplepagecachingfilter simplepagecachingfilter /*
the second step to enabling web page caching is to configure ehcache with an appropriate ehcache.xml.
the following ehcache.xml file should configure a reasonable default ehcache:
now start your application server. pages should be cached.
package com.founder.opsin;
import nu.xom.element;
import uk.ac.cam.ch.wwmm.opsin.nametoinchi;
import uk.ac.cam.ch.wwmm.opsin.nametostructure;
import uk.ac.cam.ch.wwmm.opsin.nametostructureconfig;
import uk.ac.cam.ch.wwmm.opsin.nametostructureexception;
import uk.ac.cam.ch.wwmm.opsin.opsinresult;
public class opsintest {
/**
* @param args
* @author zhou rui
* @throws nametostructureexception
*/
public static void main(string[] args) throws nametostructureexception {
nametostructure n2s = nametostructure.getinstance();
nametostructureconfig n2sconfig = new nametostructureconfig();
opsinresult result = n2s.parsechemicalname("acetonitrile", n2sconfig);
system.out.println(result.getstatus());
string smiles = result.getsmiles();
string inchi = nametoinchi.convertresulttoinchi(result);
system.out.println(smiles);
system.out.println(inchi);
}
}
输出结果如下:
success
c(c)#n
inchi=1/c2h3n/c1-2-3/h1h3
计算插件(脂水分配系数/考虑电解时的脂水分配系数、极性表面积、溶解性、电解常数、lipinski五规则)
all are supported except solubility, in jchembase, cartridge, knime, pipeline pilot, instant jchem, jchem for excel and in marvin. see full list of our calculating lipinski rule of 5:
2. bulid and maintain project data viewer (sar understanding)
sar: structure-activity relationship, 结果与活性关系,简称构效关系
we have , also a viewer in jchem for excel, . that can be used for sar understanding.
3. library enumeration, cleanup, profile and analysis
, , , , , , knime, pipeline pilot
some presentations on the topic:
4. customized spotfire view
yes this is the tibco spotfire tool. marvin is integrated into spotfire, i think even jchem cartridge can communicate with spotfire, our new project is instant jchem integration which is under development
5.similarity search
yes, jchembase, cartridge, instant jchem, jchem for excel
for a more sophisticated approach of similarity, we provide .
6.clustering
jklustor, libmcs
7.generate sar tables
生成构效关系表格
we do not support directly but we have rgroup decomposition, fragmentation toolkit that can be visualized and analysed later.
8.ligand binding efficiency
配体结合效果
le can be calculated if the database contains the activity value, heavy atom counts can be calculated in jchem for excel, instant jchem
9.structure visualization
结构可视化
marvin
10.overlay/docking
叠合/对接
no, we do not support docking. can be done in marvin,, and a standalone gui for low throughput screening.
11.build predictive admet models
建立预测admet模型。admet分别代表吸收、分布、代谢、排泄和毒性。
we do not support directly, although we have some calculation plugins that can be further used for these property calculations such as pka, logp/d, atom counts, psa.
import org.rosuda.rengine.rexp;
import org.rosuda.rengine.rexpmismatchexception;
import org.rosuda.rengine.rserve.rconnection;
import org.rosuda.rengine.rserve.rserveexception;
public class rtest {
/**
* @param args
* @author zhou rui
* @throws rserveexception
* @throws rexpmismatchexception
*/
public static void main(string[] args) throws rserveexception, rexpmismatchexception {
rconnection c = new rconnection();
rexp x = c.eval("r.version.string");
system.out.println(x.asstring());
}
}
凯发天生赢家一触即发官网的解决方案:
javaee版本和javamail的版本不一致,请将sun公司上下载最新版本.http://java.sun.com/products/javamail/downloads/index.html
例如:javamail 1.3以下的如果在javaee5上就会出现上面的错误,
如果还出现此问题,则是因为javaee5中包含有javamail的类但是却不全面,所以出本身的javamail
包冲突.用rar打开x:/program files/myeclipse 6.0/myeclipse/eclipse/plugins/com.genuitec.eclipse.j2eedt.core_x.x.x.zmyeclipsexxxxxxxxx/data/libraryset/ee_5/javaee.jar
,然后删除mail,一切就ok了.
写完部署什么都没问题,可当我写了测试类进行测试时发现主键的初始值竟然是50,其步长亦是50,在同事的帮助下发现原来是hibernate在做鬼,@sequencegenerator中添加两个参数(allocationsize = 1, initialvalue = 1)就ok。通过查找hibernate的资料发现原来是因为allocationsize的默认值是50.具体请参考http://www.oracle.com/technology/global/cn/products/ias/toplink/jpa/resources/toplink-jpa-annotations.html#sequencegenerator
只需要增加allocationsize = 1就可以
select * from dba_registry where comp_id = 'javavm'
为空,则未安装,请执行 $oracle_home/javavm/install/initjvm.sql安装.
创建函数
create or replace function fn_oraclecall(marea in varchar2,mdevid in number,mport in number)
return varchar2
as
language name 'caller.call(java.lang.string,integer,integer) return .lang.string';
创建存储过程
create or replace procedure chk_setcab_num
(marea in varchar2,mdevid in number,mport in number,v_out out varchar2) is
begin
v_out := fn_oraclecall(marea,mdevid,mport);
end chk_setcab_num;
loadjava
loadjava -u sys/sys@sid -oci8 -verbose -grant user -synonym -resolve -schema user d:\caller.jar
--这里也可以是class文件,注意兼容oracle的jre版本
注意编写的java文件里,即caller.java的call()方法,需要是staic
import .util.bitset;
import org.openscience.cdk.defaultchemobjectbuilder;
import org.openscience.cdk.exception.cdkexception;
import org.openscience.cdk.exception.invalidsmilesexception;
import org.openscience.cdk.fingerprint.extendedfingerprinter;
import org.openscience.cdk.smiles.smilesparser;
public class fingerprintertest {
/**
* @param args
* @throws cdkexception
* @throws invalidsmilesexception
*/
public static void main(string[] args) throws invalidsmilesexception, cdkexception {
extendedfingerprinter fingerprinter = new extendedfingerprinter();
smilesparser sp = new smilesparser(defaultchemobjectbuilder.getinstance());
bitset bt = fingerprinter.getfingerprint(sp.parsesmiles("c2ccc1ccccc1c2"));
}
}
mol3 = pybel.readstring('smi', 'c1ccccc1')
fp3 = mol3.calcfp()
print fp3.__or__(fp2) //计算相似度值
4. 读取sdf文件
#encoding=utf-8
import pybel
for mymol in pybel.readfile("sdf", "structures_all.sdf"):
fp = mymol.calcfp("fp2")
print fp
5. 输出txt文件和sdf文件
import javax.servlet.http.httpservletresponse;
import javax.vecmath.point2d;
import org.apache.log4j.logger;
import org.openscience.cdk.molecule;
import org.openscience.cdk.interfaces.iatom;
import org.openscience.cdk.interfaces.imolecule;
import org.openscience.cdk.io.mdlreader;
import org.openscience.cdk.layout.structurediagramgenerator;
import org.openscience.cdk.renderer.renderer2dmodel;
import org.openscience.cdk.renderer.simplerenderer2d;
public class imagetypeexporterutil {
private static final logger logger = logger.getlogger(imagetypeexporterutil.class);
/**
* show molecule structure to image type (png, jpeg)
*
* @param mol string molecule stucture
* @param length width and height
* @param response httpservletresponse object
* @throws exception
* if occurred exception ,then throw exception
*/
public static void showasimage(string stucture, integer length, httpservletresponse response) throws exception {
logger.debug("imagetypeexporterutil.showasimage..");
stringreader mdl = new stringreader(stucture);
mdlreader cdkmdl = new mdlreader(mdl);
molecule mol = new molecule();
cdkmdl.read(mol);
// null coordinates
iterator
while (itatoms.hasnext()) {
iatom atom = itatoms.next();
atom.setpoint2d(null);
atom.setpoint3d(null);
}
// generate 2d coordinates
structurediagramgenerator sdg = new structurediagramgenerator();
sdg.setmolecule(mol);
try {
sdg.generatecoordinates();
} catch (exception ex) {
ex.printstacktrace();
}
imolecule layedoutmol = sdg.getmolecule();
// scale molecule
final double undef_pos = 100000;
double minx = undef_pos, miny = undef_pos, maxx = undef_pos, maxy = undef_pos;
itatoms = layedoutmol.atoms();
while (itatoms.hasnext()) {
iatom atom = itatoms.next();
point2d point2d = atom.getpoint2d();
if (minx == undef_pos || minx > point2d.x)
minx = point2d.x;
if (miny == undef_pos || miny > point2d.y)
miny = point2d.y;
if (maxx == undef_pos || maxx < point2d.x)
maxx = point2d.x;
if (maxy == undef_pos || maxy < point2d.y)
maxy = point2d.y;
}
double scalex = length / (maxx - minx 1);
double scaley = length / (maxy - miny 1);
double scale = scalex > scaley ? scaley : scalex;
double centrex = scale * (maxx minx) / 2.;
double centrey = scale * (maxy miny) / 2.;
double offsetx = length / 2. - centrex;
double offsety = length / 2. - centrey;
itatoms = layedoutmol.atoms();
while (itatoms.hasnext()) {
iatom atom = itatoms.next();
point2d a = atom.getpoint2d();
point2d b = new point2d();
b.x = a.x * scale offsetx;
b.y = a.y * scale offsety;
atom.setpoint2d(b);
}
// set rendering properties
renderer2dmodel r2dm = new renderer2dmodel();
r2dm.setdrawnumbers(false);
r2dm.setuseantialiasing(true);
r2dm.setcoloratomsbytype(true);
r2dm.setshowatomtypenames(false);
r2dm.setshowaromaticity(true);
r2dm.setshowimplicithydrogens(false);
r2dm.setshowreactionboxes(false);
r2dm.setkekulestructure(false);
dimension dim = new dimension();
dim.setsize(length, length);
r2dm.setbackgrounddimension(dim);
r2dm.setbackcolor(java.awt.color.white);
// render the image
simplerenderer2d renderer = new simplerenderer2d();
renderer.setrenderer2dmodel(r2dm);
bufferedimage bufferedimage = new bufferedimage(length, length,
bufferedimage.type_int_rgb);
graphics2d graphics = bufferedimage.creategraphics();
graphics.setpaint(java.awt.color.white);
rectangle2d.float rectangle = new rectangle2d.float(0, 0, length, length);
graphics.fill(rectangle);
renderer.paintmolecule(layedoutmol, graphics);
// write the image to response
response.setcontenttype("image/png");
outputstream out = response.getoutputstream();
try {
javax.imageio.imageio.write(bufferedimage, "png", out);
} finally {
out.close();
}
}
}
package com.founder.cdk;
import .io.file;
import .io.filenotfoundexception;
import .io.filereader;
import .util.arraylist;
import .util.list;
import org.openscience.cdk.chemfile;
import org.openscience.cdk.chemobject;
import org.openscience.cdk.exception.cdkexception;
import org.openscience.cdk.interfaces.iatomcontainer;
import org.openscience.cdk.io.mdlv2000reader;
import org.openscience.cdk.smiles.smarts.smartsquerytool;
import org.openscience.cdk.tools.manipulator.chemfilemanipulator;
public class smartsquerytooltest {
static smartsquerytool sqt;static {
try {
sqt = new smartsquerytool("c2ccc1ccccc1c2");
} catch (cdkexception e) {
}
}
/**
* @param args
*/
public static void main(string[] args) {
string filename = "h:\\molecules.sdf";
try {
mdlv2000reader reader = new mdlv2000reader(new filereader(new file(filename)));
chemfile chemfile = (chemfile) reader.read((chemobject) new chemfile());
list
list
sqt.setsmarts("c1ccc3c(c1)ccc4c2ccccc2ccc34"); //重新设置匹配的smiles值
boolean matched = false;
for (iatomcontainer molecule : containerslist) {
matched = sqt.matches(molecule);
if (matched){
substructurelist.add(molecule);
}
}
system.out.println(substructurelist.size());
for (iatomcontainer molecule : substructurelist) {
system.out.println(molecule.getproperty("id"));
}
} catch (cdkexception e) {
e.printstacktrace();
} catch (filenotfoundexception e) {
e.printstacktrace();
}
}
}
通过测试, matches方法速度很慢, 一般一个结构需要200ms-1000ms左右.
import .io.file;
import .io.filenotfoundexception;
import .io.filereader;
import .util.list;
import org.openscience.cdk.chemfile;
import org.openscience.cdk.chemobject;
import org.openscience.cdk.molecule;
import org.openscience.cdk.exception.cdkexception;
import org.openscience.cdk.interfaces.iatomcontainer;
import org.openscience.cdk.io.mdlreader;
import org.openscience.cdk.io.mdlv2000reader;
import org.openscience.cdk.tools.manipulator.chemfilemanipulator;
public class readsdftest {
/**
* @param args
* @throws cdkexception
* @throws filenotfoundexception
*/
public static void main(string[] args) throws cdkexception, filenotfoundexception {
string filename = "h:\\molecules.sdf";
// inputstream ins = readsdftest.class.getclassloader().getresourceasstream(filename);
// mdlreader reader = new mdlreader(ins);
//alternatively, you can specify a file directly
mdlv2000reader reader = new mdlv2000reader(new filereader(new file(filename)));
chemfile chemfile = (chemfile)reader.read((chemobject)new chemfile());
list
molecule molecule = null;
for (iatomcontainer mol : containerslist) {
molecule = (molecule) mol;
system.out.println(molecule.getproperties());
system.out.println(molecule.getproperty("cd_molweight"));
// fingerprinter fp = new fingerprinter();
// bitset bt = fp.getfingerprint(molecule);
// system.out.println(bt);
}
}
}
import .io.stringreader;
import .sql.connection;
import .sql.resultset;
import .sql.sqlexception;
import .util.arraylist;
import .util.bitset;
import .util.list;
import org.openscience.cdk.molecule;
import org.openscience.cdk.exception.cdkexception;
import org.openscience.cdk.fingerprint.fingerprinter;
import org.openscience.cdk.io.mdlreader;
import org.openscience.cdk.similarity.tanimoto;
public class cdktest {
/**
* @param args
*/
public static void main(string[] args) {
// mysql
long t1 = system.currenttimemillis();
try {
class.forname("com.mysql.jdbc.driver").newinstance();
connection con = .sql.drivermanager
.getconnection(
"jdbc:mysql://localhost/coocoo?useunicode=true&characterencoding=utf-8&zerodatetimebehavior=converttonull",
"root", "root");
resultset results = null;
string querysql = "select id, structure from structure ";
results = con.createstatement().executequery(querysql);
// dump out the results
list
fingerprinter fp = new fingerprinter();
bitset bt = null;
while (results.next()) {
long id = results.getlong("id");
//根据结构数据生成分子对象
stringreader mdl = new stringreader(results.getstring("structure"));
mdlreader cdkmdl = new mdlreader(mdl);
molecule molecule = new molecule();
cdkmdl.read(molecule);
if (id == 1220) {
bt = fp.getfingerprint(molecule);
}
list.add(molecule);
}
system.out.println("size:=" list.size());
list
long t2 = system.currenttimemillis();
system.out.println("thread: collection data in " (t2 - t1) " ms.");
for (molecule molecule : list) {
try {
float coefficient = tanimoto.calculate(fp.getfingerprint(molecule), bt); //计算相似度
if (coefficient > 0.9) {
resultlist.add(molecule);
}
} catch (cdkexception e) {
}
}
long t3 = system.currenttimemillis();
system.out.println(resultlist.size());
system.out.println("thread: search in " (t3 - t2) " ms.");
con.close();
} catch (instantiationexception e) {
e.printstacktrace();
} catch (illegalaccessexception e) {
e.printstacktrace();
} catch (classnotfoundexception e) {
e.printstacktrace();
} catch (sqlexception e) {
e.printstacktrace();
} catch (cdkexception e) {
e.printstacktrace();
}
long t4 = system.currenttimemillis();
system.out.println("thread: all in " (t4 - t1) " ms.");
}
}
wrote a great serious posts named fast substructure search using open source tools providing details on substructure search with mysql. but, however, poor binary data operation functions of mysql limited the implementation of similar structure search which typically depends on the calculation of tanimato coefficient. we are going to use & cdk to add this feature.
as default output of cdk fingerprint, with interface is perfect data format of fingerprint data storage. itself provides several collections such as , , class in package .util. to provide web access to the search engine, thread unsafe arraylist and linkedlist have to be kicked out. how about vector? once all the fingerprint data is well prepared, the collection function we need to do similarity search is just iteration. no add, no delete. so, a light weight array is enough.
most of the molecule information is stored in mysql database, so we are going to map fingerprint to corresponding row in data table. here is the moldfdata class, we use a long variable to store corresponding primary key in data table.
public class moldfdata implements serializable {
private long id;
private bitset fingerprint;
public moldfdata(long id, bitset fingerprint) {
this.id = id;
this.fingerprint = fingerprint;
}
public long getid() {
return id;
}
public void setid(long id) {
this.id = id;
}
public bitset getfingerprint() {
return fingerprint;
}
public void setfingerprint(bitset fingerprint) {
this.fingerprint = fingerprint;
}
}
this is how we storage our fingerprints.
private molfpdata[] arraydata;
no big deal with similarity search. just calculate the tanimoto coefficient, if it’s bigger than minimal similarity you set, add this one into result.
public list searchtanimoto(bitset bt, float minsimlarity) {
list resultlist = new linkedlist();
int i;
for (i = 0; i < arraydata.length; i ) {
moldfdata alistdata = arraydata[i];
try {
float coefficient = tanimoto.calculate(alistdata.getfingerprint(), bt);
if (coefficient > minsimlarity) {
resultlist.add(new searchresultdata(alistdata.getid(), coefficient));
}
} catch (cdkexception e) {
}
collections.sort(resultlist);
}
return resultlist;
}
pretty ugly code? maybe. but it really works, at a acceptable speed.
tests were done using the code blow on a macbook(intel core due 1.83 ghz, 2g ram).
long t3 = system.currenttimemillis();
list listresult = se.searchtanimoto(bs, 0.8f);
long t4 = system.currenttimemillis();
system.out.println("thread: search done in " (t4 - t3) " ms.");
in my database of 87364 commercial compounds, it takes 335 ms.
import org.openscience.cdk.annotations.testclass;
import org.openscience.cdk.annotations.testmethod;
import org.openscience.cdk.exception.cdkexception;
import .util.bitset;
/**
* calculates the tanimoto coefficient for a given pair of two
* fingerprint bitsets or real valued feature vectors.
*
* the tanimoto coefficient is one way to
* quantitatively measure the "distance" or similarity of
* two chemical structures.
*
*
you can use the fingerprinter class to retrieve two fingerprint bitsets.
* we assume that you have two structures stored in cdk.molecule objects.
* a tanimoto coefficient can then be calculated like:
*
* bitset fingerprint1 = fingerprinter.getfingerprint(molecule1);
* bitset fingerprint2 = fingerprinter.getfingerprint(molecule2);
* float tanimoto_coefficient = tanimoto.calculate(fingerprint1, fingerprint2);
*
the fingerprinter assumes that hydrogens are explicitely given, if this
* is desired!
*
note that the continuous tanimoto coefficient does not lead to a metric space
*
*@author steinbeck
* @cdk.githash
*@cdk.created 2005-10-19
*@cdk.keyword jaccard
*@cdk.keyword similarity, tanimoto
* @cdk.module fingerprint
*/
@testclass("org.openscience.cdk.similarity.tanimototest")
public class tanimoto
{
/**
* evaluates tanimoto coefficient for two bit sets.
*
* @param bitset1 a bitset (such as a fingerprint) for the first molecule
* @param bitset2 a bitset (such as a fingerprint) for the second molecule
* @return the tanimoto coefficient
* @throws org.openscience.cdk.exception.cdkexception if bitsets are not of the same length
*/
@testmethod("testtanimoto1,testtanimoto2")
public static float calculate(bitset bitset1, bitset bitset2) throws cdkexception
{
float _bitset1_cardinality = bitset1.cardinality();
float _bitset2_cardinality = bitset2.cardinality();
if (bitset1.size() != bitset2.size()) {
throw new cdkexception("bisets must have the same bit length");
}
bitset one_and_two = (bitset)bitset1.clone();
one_and_two.and(bitset2);
float _common_bit_count = one_and_two.cardinality();
return _common_bit_count/(_bitset1_cardinality _bitset2_cardinality - _common_bit_count);
}
/**
* evaluates the continuous tanimoto coefficient for two real valued vectors.
*
* @param features1 the first feature vector
* @param features2 the second feature vector
* @return the continuous tanimoto coefficient
* @throws org.openscience.cdk.exception.cdkexception if the features are not of the same length
*/
@testmethod("testtanimoto3")
public static float calculate(double[] features1, double[] features2) throws cdkexception {
if (features1.length != features2.length) {
throw new cdkexception("features vectors must be of the same length");
}
int n = features1.length;
double ab = 0.0;
double a2 = 0.0;
double b2 = 0.0;
for (int i = 0; i < n; i ) {
ab = features1[i] * features2[i];
a2 = features1[i]*features1[i];
b2 = features2[i]*features2[i];
}
return (float)ab/(float)(a2 b2-ab);
}
}
通过源码可以看出calculate(bitset bitset1, bitset bitset2)方法,是通过比较两个分子的fingerprint的位,来计算相似度.通过bitset的and操作得到共同的个数,然后在除以总共为true的个数,这样就得到相似值.