需要:MySQLdb
下面是数据表结构:/*Navicat MySQL Data Transfer Source Server : 127.0.0.1Source Server Version : 50509Source Host : 127.0.0.1:3306Source Database : wooyun Target Server Type : MYSQLTarget Server Version : 50509File Encoding : 65001 Date: 2015-09-24 17:38:14*/ SET FOREIGN_KEY_CHECKS=0; -- ------------------------------ Table structure for wooyun_vul-- ----------------------------DROP TABLE IF EXISTS `wooyun_vul`;CREATE TABLE `wooyun_vul` ( `id` int(8) NOT NULL AUTO_INCREMENT, `corpsname` varchar(255) DEFAULT NULL, `corpsurl` varchar(255) DEFAULT NULL, `vulcount` int(255) DEFAULT NULL, PRIMARY KEY (`id`)) ENGINE=InnoDB DEFAULT CHARSET=latin1;
python 脚本:
#conding=utf-8import urllib2import urllibimport reimport MySQLdb url = "http://wooyun.org/corps/page/"def getWooyuncorps(url): request = urllib2.Request(url) request.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36') reponse = urllib2.urlopen(request) content = reponse.read() pattern1 = re.compile(r'.*?<\/a><\/td>') pattern2 = re.compile(r'.*?(\d+).*') count = pattern.findall(content) return count corpslist = []corpsurllist = []countlist = []for i in range(1,37): corps,corpsUrl = getWooyuncorps(url+str(i)) for corp in corps: corpslist.append(corp) for urls in corpsUrl: corpsurllist.append(urls)print len(corpslist),len(corpsurllist) for i in range(0,len(corpslist)): newurl = "http://www.wooyun.org/corps/"+urllib.quote(corpslist[i]) #print newurl count = getcorpscount(newurl) #print count for countA in count: countlist.append(countA) #print len(countlist)conn = MySQLdb.connect('localhost','root','','wooyun')cur = conn.cursor()sql = "set names 'utf8'"cur.execute(sql)conn.commit() for s in range(0,len(countlist)): sql = 'insert into wooyun_vul(corpsname,corpsurl,vulcount) values("%s","%s",%d)' %(corpslist[s],corpsurllist[s],int(countlist[s])) print sql cur.execute(sql) conn.commit() conn.close()print "success"