lucene Ë÷ÒýHTMLÎĵµ
ÉîδÀ´¼¼Êõ
1¡¢´ó²¿·ÖWEBÎĵµ²ÉÓÃHTML¸ñʽ¡£
2¡¢±¾ÀýÓÃÈçÏÂHTMLÎĵµ
<html>
<head>
<title>
Laptop power supplies are avaliable in First class only
</title>
</head>
<body>
<h1>code,write,fly</h1>
</body>
</html>
3¡¢Ê¹ÓÃJTidy
JTidyÓÉAndy Quick±àдµÄTidyµÄJava°æ±¾¡£
public class JTidyHTMLHandler implements DocumentHandler{
publicorg.apache.lucene.document.Document getDocument(InputStreamis)
throwsDocumentHandlerException{ //´«ÈëÒ»¸ö´ú±íHTMLÎĵµµÄInputStream¶ÔÏó
Tidy tidy=new Tidy();
tidy.setQuiet(true);
tidy.setShowWarnings(false);
//½âÎö´ú±íHTMLÎĵµµÄInputStream¶ÔÏó
org.w3c.dom.Documentroot=tidy.parseDOM(is,null);
ElementrawDoc=root.getDocumentElement();
org.apache.lucene.document.Document doc=neworg.apache.lucene.document.Document();
Stringtitle=getTitle(rawDoc);//»ñµÃ±êÌâ
Stringbody=getBody(rawDoc);//»ñµÃ<body>ºÍ</body>Ö®¼äËùÓÐÔªËØ
if((title!=null)&&(!title.equals(""))) {
doc.add(Field.Text("title",title));
}
if((body!=null)&&(!body.equals(""))){
doc.add(Field.Text("body",body));
}
return doc;
}
protected String getTitle(Element rawDoc){
if(rawDoc==null){
returnnull;
}
Stringtitle="";
NodeListchildren=rawDoc.getElementsB
Ïà¹ØÎĵµ£º
×ªÔØ£ºhttp://jiangzhengjun.javaeye.com/blog/480996
ʼþ
DOMͬʱ֧³ÖÁ½ÖÖʼþģʽ£º²¶»ñÐÍʼþºÍðÅÝÐÍʼþ£¬µ«ÊÇ£¬²¶»ñÐÍʼþÏÈ·¢Éú¡£Á½ÖÖʼþÁ÷»á´¥¼°DOMÖеÄËùÓжÔÏ󣬴Ódocument¶ÔÏó¿ª
ʼ£¬Ò²ÔÚdocument¶ÔÏó½áÊø£¨´ó²¿·Ö¼æÈݱê×¼µÄä¯ÀÀ»á¼ÌÐø½«Ê¼þ²¶»ñ/ðÅÝÑÓÐøÖÁwindow¶ÔÏ󣩣¬DOMÖеÄÔªËØ¶¼»áÁ¬ÐøÊÕµ½Á½´ÎÊ ......
<Html>
<Head>
<SCRIPT LANGUAGE="JavaScript">
<!--
//¶¨Òå select ÔÖµ
var oldValue,oldText;
//selectÏÂÀ¿òµÄonkeydownʼþ£¬ÐÞ¸ÄÏÂÀ¿òµÄÖµ
function catch_keydown(sel){
switch(event.keyCode) {
case 13: //ȯ ......
<html>
<frameset rows="10%,*">
<frame src="http://g.cn" scrolling="no">
<frameset cols="25%,*">
<frame src="http://g.cn" scrolling="no">
<frameset rows="10%,*">
<frame src="http://g.cn" scrolling="no">
......
PageReleaserÐèÒªÒ»ÖÖHTMLµÄѹËõËã·¨£¬GoogleÁ˺ܾ㬷¢ÏÖÈç¹ûÖ»ÊǼòµ¥È¥³ý¿Õ°×ºÍ×¢Ê͵ϰ£¬Ê¹ÓÃXLinq¾Í¿ÉÒÔÇáÒ×µÄʵÏÖ ÏÈ¿´¿´MSDNÊÇÔõô˵µÄ£º Ò»ÖÖ³£Ó÷½°¸ÊǶÁÈ¡Ëõ½øµÄ XML£¬ÔÚÄÚ´æÖд´½¨Ò»¸öûÓÐÈκοհ×Îı¾½Úµã£¨¼´²»±£Áô¿Õ°×£©µÄ XML Ê÷£¬¶Ô¸Ã XML Ö´ÐÐijЩ²Ù×÷£¬È»ºó±£´æ´øËõ½øµÄ XML¡£ÔÚÐòÁл¯´ø¸ñʽµÄ XML Ê ......
<html>
<script language="javascript" defer>
function isvalid()
{
Input_Str=document.getElementById("tsinput").value;
document.getElementById("ts").innerHTML=Input_Str;
if(Input_Str!=document.getElementById("ts").innerText)
alert("include Html Element... ......