Here is the code...
import urllib
import sys
import os.path
import sgmllib
print "\n\n\t\tlipun4u[at]gmail[dot]com"
print "\t\t------------------------"
appname = os.path.basename(sys.argv[0])
class MyParser(sgmllib.SGMLParser):
"A simple parser class."
def parse(self, s):
"Parse the given string 's'."
self.feed(s)
self.close()
def __init__(self, verbose=0):
"Initialise an object, passing 'verbose' to the superclass."
sgmllib.SGMLParser.__init__(self, verbose)
self.hyperlinks = []
def start_a(self, attributes):
"Process a hyperlink and its 'attributes'."
for name, value in attributes:
if name == "href":
self.hyperlinks.append(value)
def get_hyperlinks(self):
"Return the list of hyperlinks."
return self.hyperlinks
if len(sys.argv) not in [2,]:
print "Usage : " + appname + ""
print "e.g. : " + appname + " www.google.com "
sys.exit(1)
elif "-h" in sys.argv:
print "Usage : " + appname + ""
print "e.g. : " + appname + " www.google.com "
sys.exit(1)
elif "--help" in sys.argv:
print "Usage : " + appname + ""
print "e.g. : " + appname + " www.google.com "
sys.exit(1)
site = sys.argv[1].replace("http://","")
site = "http://" + site.lower()
print "Target : " + site
try:
site_data = urllib.urlopen(site)
parser = MyParser()
parser.parse(site_data.read())
except(IOError),msg:
print "Error in connecting site ", site
print msg
sys.exit(1)
links = parser.get_hyperlinks()
print "Total no. of hyperlinks : " + str(len(links))
print ""
for l in links:
print l
Here is the help file
I:\Python26>linkscan1.py
lipun4u[at]gmail[dot]com
------------------------
Usage : linkscan1.py
e.g. : linkscan1.py www.google.com
I:\Python26>linkscan1.py www.iter.ac.in
lipun4u[at]gmail[dot]com
------------------------
Target : http://www.iter.ac.in
Total no. of hyperlinks : 12
http://iter.ac.in
default.asp
contactus.asp
http://iter.ac.in:8383
time-table.xls
http://www.soauniversity.ac.in/saat_2009.htm
images/advertisement_Saat2009.gif
#
#
#
#
http://www.allindiaonline.in/
I:\Python26>
But some guys added some spice to it and look what they made...
No comments:
Post a Comment