原文地址http://soft.zhiding.cn/software_zone/2003/0820/93769.shtml
URL extractor
import sys, string
from htmllib import *
from formatter import *
class URLExtractParser(HTMLParser):
def init (self, verbose = 0):
self.anchors = []
formatter = NullFormatter()
HTMLParser.init(self, formatter, 1)
def anchor_bgn(self, href, name, type):
self.save_bgn()
self.anchor = href
def anchor_end(self):
tagname = string.strip(self.save_end())
if self.anchor and tagname:
self.anchors.append(tagname)
def getAnchors (self):
return self.anchors
class URLExtractor:
public_methods = [“extract”]
reg_progid = “URLExtractor.Extractor”
#Has to be unique per class. Use a guid generator
reg_clsid = “”
def extract (self, strHTML):
parser = URLExtractParser()
parser.feed(strHTML)
parser.close()
return parser.getAnchors()
if name == “main“:
import win32com.server.register
try:
win32com.server.register.UseCommandLine(URLExtractor)
print “URLExtractor successfully registed.”
except:
print “URLExtractor failed to register”