ʹÓÃpython»ñÈ¡htmlÒ³ÃæµÄÄÚÈÝ
import urllib
from HTMLParser import HTMLParser
class TitleParser(HTMLParser):
def __init__(self):
self.title = ''
self.divcontent = ''
self.readingtitle = 0
self.readingdiv = 0
HTMLParser.__init__(self)
def handle_starttag(self, tag, attrs):
if tag == 'title':
self.readingtitle = 1
if -1 != tag.find("div"):
self.readingdiv = 1
def handle_data(self, data):
if self.readingtitle:
# Ordinarily, this is slow and a bad practice, but
# we can get away with it because a title is usually
# small and simple.
self.title += data
if self.readingdiv:
self.divcontent += data
def handle_endtag(self, tag):
if tag == 'title':
self.readingtitle = 0
if tag == "div":
self.readingdiv = 0
def gettitle(self):
return self.title
def getdiv(self):
return self.divcontent
def getweb(url):
web = urllib.urlopen('http://blog.chinaunix.net/u3/105068/showart_2223566.html').read()
return web
web = getweb('http://blog.chinaunix.net/u3/105068/showart_2223566.html')
test = TitleParser()
test.feed(web)
file_object = open('abinfile', 'w')
file_object.write(test.title)
file_object.write("\r\n")
file_object.write(test.divcontent)
file_object.close()
Ïà¹ØÎĵµ£º
1¡¢strÀàÐÍ¿ÉÒÔÀí½âΪһ¸ö¶þ½øÖÆblock£¬»òmultibyte
2¡¢multibyte_str.decode("<multibyte_encode_method>") -> unicode
3¡¢unicode_str.encode("<multibyte_encode_method>") -> multibyte_str(binary block)
4¡¢unicode_str µÄ²Ù×÷²ÎÊýҲӦΪunicode£¬È磺unicode_str.find("Ñù±¾".deco ......
ÐèÇó£º
AÓòÓÐÒ³Ãæa.html£¬ÆäÖÐÓÐiframe°üº¬BÓòµÄÒ³Ãæb.html£¬ÏÖÔÚҪͨ¹ýa.htmlÉϵÄÒ»¸ö°´Å¥£¬À´°Ña.htmlÒ³ÃæÉÏÒ»¸öÎı¾¿òµÄÖµ´«µÝµ½b.htmlÒ³ÃæµÄÎı¾¿ò¡£
×¢£ºÕâÀïb.htmlÊÇhtmlÍøÒ³£¬²»ÄܽÓÊÕÆäËûÍøÕ¾post¹ýÀ´µÄÖµ£¬ËùÒÔ²»ÄÜÓÃÖ±½ÓpostµÄ·½·¨À´´«Öµ£¬µ«ÊÇ£¬Èç¹û½ÓÊÕÒ³ÃæÊÇb.aspx»òÕßb.asp ÄØ£¬ÄDz»ÊÇ¿ÉÒÔÖ±½ÓpostÁËô£¿´ð ......
ÄãÕæµÄÖªµÀÒ»¸öHTML¼°×ÊÔ´ÊÇÈçºÎloadµÄÂð(Á˽â¸÷¸ö²¿·ÖÊǺÎʱÏÂÔØºÍÖ´ÐеÄ)
ÔÎĵØÖ·£ºhttp://www.cnblogs.com/mindsbook/archive/2009/12/03/sequence_of_response.html
±¾²©¿ÍËùÓÐÄÚÈݲÉÓà Creative Commons Licenses Ðí¿ÉʹÓÃ. ÒýÓñ¾ÄÚÈÝʱ£¬Çë±£Áô ÖìÌÎ, ³ö´¦ £¬²¢ÇÒ ·ÇÉÌÒµ& ......
1.¸Ä±ä±¾ÎļþµÄÊôÐÔ
import
os
import
stat
os.chmod( filename, stat.S_IWRITE )
2.¸Ä±ä±¾Ä¿Â¼¼°Æä×ÓĿ¼ÊôÐÔ
import
os
os.system(r
'
attrib -r' + path +'\\*.* /s
'
)
3.½éÉܸıäÎļþÊôÐÔµÄdosÖ¸Áî
Attrib
ÏÔʾ¡¢ÉèÖûòɾ³ýÖ¸ÅɸøÎļþ»òĿ¼µÄÖ»¶Á¡¢´æµµ¡¢ÏµÍ³ÒÔ¼°Òþ²ØÊôÐÔ¡£Èç¹ûÔÚ²»º¬²ÎÊýµÄÇ ......