ʹÓÃpython»ñÈ¡htmlÒ³ÃæµÄÄÚÈÝ
import urllib
from HTMLParser import HTMLParser
class TitleParser(HTMLParser):
def __init__(self):
self.title = ''
self.divcontent = ''
self.readingtitle = 0
self.readingdiv = 0
HTMLParser.__init__(self)
def handle_starttag(self, tag, attrs):
if tag == 'title':
self.readingtitle = 1
if -1 != tag.find("div"):
self.readingdiv = 1
def handle_data(self, data):
if self.readingtitle:
# Ordinarily, this is slow and a bad practice, but
# we can get away with it because a title is usually
# small and simple.
self.title += data
if self.readingdiv:
self.divcontent += data
def handle_endtag(self, tag):
if tag == 'title':
self.readingtitle = 0
if tag == "div":
self.readingdiv = 0
def gettitle(self):
return self.title
def getdiv(self):
return self.divcontent
def getweb(url):
web = urllib.urlopen('http://blog.chinaunix.net/u3/105068/showart_2223566.html').read()
return web
web = getweb('http://blog.chinaunix.net/u3/105068/showart_2223566.html')
test = TitleParser()
test.feed(web)
file_object = open('abinfile', 'w')
file_object.write(test.title)
file_object.write("\r\n")
file_object.write(test.divcontent)
file_object.close()
Ïà¹ØÎĵµ£º
1 ËùÓÐµÄ .java|.jsp|.html|.xml Ô´Îļþ¾ùʹÓÃutf-8±àÂë¸ñʽ±£´æµ½ÏµÍ³´ÅÅÌ¡£
È磺ÔÚEclipseÖбà¼Îļþ£¬Ñ¡ÖÐÎļþ´ò¿ªÓÒ¼ü²Ëµ¥Ñ¡ÔñÊôÐÔ£¬½«Îı¾Îļþ±àÂëÉèÖÃΪÆäËû²¢Ñ¡ÔñUTF-8£»Ò²¿ÉÒÔÔÚ
Eclipse——Ê×Ñ¡Ïî——³£¹æ——ÄÚÈÝÀàÐÍÖÐÉèÖø÷ÖÖÎļþµÄȱʡ±àÂ룬ÕâÑùÒÔºóËùÓеÄÎı¾Îļþ¶¼Ê¹ÓÃÍ³Ò ......
¼ÙÉèÄãÄãµÄ²Ù×÷ϵͳÊÇWinXP£º
¿ªÊ¼²Ëµ¥-> ÔËÐÐ-> cmd
È»ºó¾Í¿ÉÒÔÇÃÃüÁîÁË¡£
Èç¹ûÄãÓÃIDLE£¬¿ÉÒÔÓÃIDLE´ò¿ªÕâ¸ö.pyÎļþ£¬È»ºóÔڲ˵¥ "Run "ÏÂÓÐÒ»Ïî "Run Module "£¬µã»÷¼´¿É¡£ ......
ÄãÕæµÄÖªµÀÒ»¸öHTML¼°×ÊÔ´ÊÇÈçºÎloadµÄÂð(Á˽â¸÷¸ö²¿·ÖÊǺÎʱÏÂÔØºÍÖ´ÐеÄ)
ÔÎĵØÖ·£ºhttp://www.cnblogs.com/mindsbook/archive/2009/12/03/sequence_of_response.html
±¾²©¿ÍËùÓÐÄÚÈݲÉÓà Creative Commons Licenses Ðí¿ÉʹÓÃ. ÒýÓñ¾ÄÚÈÝʱ£¬Çë±£Áô ÖìÌÎ, ³ö´¦ £¬²¢ÇÒ ·ÇÉÌÒµ& ......
1£® Ê×ÏȾÍÊÇÔÚ±àÒëÆ÷ÖаÑpython°²×°Ä¿Â¼include/Óëlibs/¼ÓÈ룬¶ÔÓÚÕâµãÎÒÔÚvc6ÖпÉÒÔ£¬µ«ÊÇÔÚdev c++Öм´Ê¹¼ÓÈëÁ˱àÒëÒ²»á³ö´í£¬ËµÕÒ²»µ½pythonÍ·Îļþ£¬Õâµã±È½ÏÓôÃÆ£¬²»¹ý¿¼Âǵ½Ò»°ãwindows±à³Ì¶¼ÓõÄÊÇvc£¬ËùÒÔ²¢Ã»ÓÐʲôӰÏì°É£¡£¡£¡
È»ºóÓÃ#include <Python.h>¾Í¿ÉÒÔ°ÑpythonµÄÖ÷Í·Îļþ°üº¬½øÀ´ÁË¡£
µ«Ê ......
ǰÁ½ÌìÀí½âÁËunicode¡¢utf-8¡¢gb2312ÕâЩ±àÂëÖ®¼äµÄ¹ØÏµÒԺ󣬽ñÌìÖÕÓÚŪÃ÷°×ÁËÔÚpythonÀïÃæµÄ±àÂëÎÊÌâ¡£ÎÒÃÇÔÚдpython½Å±¾Ê±Èç¹ûÓÐÖÐÎĵÄ×Ö·û´®£¬ÔÚÔËÐеÄʱºòÓпÉÄܻᱨ´íÒ²ÓпÉÄÜ»á³öÏÖÂÒÂë¡£Ò»°ã¼ÓÉÏ# -*- coding:utf-8 -*-¾Í²»»á±¨´íÁË£¬µ«ÊÇ»¹¿ÉÄÜÓÐÂÒÂëÎÊÌ⣬¶øÇÒͬÑùµÄ´úÂëÔÚ²»Í¬µÄ±à¼Æ÷ÖеóöµÄ½á¹û ......