imac 2006-7-14 09:29 PM
the python code for holyokemall/hampshiremall deal info
[code]
#!/usr/local/bin/python
import datetime
def gendatestr(diff):
today = datetime.date.today()
delta = datetime.timedelta(days=diff)
thisday = today + delta
datestr = str(thisday.month)+"/"+str(thisday.day)+"/"+str(thisday.year)
return datestr
import urllib
def crawler(baseurl, diff):
request = baseurl + gendatestr(diff)
try:
conn = urllib.urlopen(request)
except IOError:
return False
page = conn.read()
conn.close()
return page
import re
def parser(s):
#strip comments
reg = re.compile('<!--.*?-->', re.I | re.M | re.S)
s = reg.sub("",s)
#build few table list
reg = re.compile('<table border=\"0\" cellpadding=\"0\" cellspacing=\"0\" class=\"listheadertable\">.*?<\/table>', re.I | re.M | re.S)
tblist = reg.findall(s)
#build triplets
triplets = []
for ele in tblist:
reg = re.compile('(?<=<td colspan="4" class="calendarlisttitle">).*?(?=<\/td>)',re.I | re.M | re.S)
tname = reg.findall(ele)
reg = re.compile('(?<=<td colspan="4" class="upcoming_datelist">).*?(?=<\/td>)',re.I | re.M | re.S)
ttime = reg.findall(ele)
reg = re.compile('(?<=<td colspan="4">).*?(?=<\/td>)',re.I | re.M | re.S)
tcontent = reg.findall(ele)
triplets = triplets + [[tname,ttime,tcontent]]
return triplets
def writer(filename, sumlist):
b = "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n<"+filename+">\n"
e = "</"+filename+">\n"
tstr = ""
for ele in sumlist:
tstr = tstr + "<"+str(ele[0])+">\n"
if len(ele[1]) != 0:
for k in ele[1]:
tstr = tstr + "<tname>\n" + str(k[0]) + "\n</tname>\n"
tstr = tstr + "<tdate>\n" + str(k[1]) + "\n</tdate>\n"
tstr = tstr + "<tcontent>\n" + str(k[2]) + "\n</tcontent>\n"
tstr = tstr + "</"+str(ele[0])+">\n"
f = open(filename+'.xml','wt')
f.write(b+tstr+e)
f.close()
def writer_html(filename, sumlist):
b = "<html>\n<table border=1>\n"
e = "</table></html>\n"
tstr = ""
for ele in sumlist:
tstr = tstr + "<tr>\n<td>\n------" + str(ele[0]) + "------<br /><br />"
if len(ele[1]) != 0:
for k in ele[1]:
tname = str(k[0])
tdate = str(k[1])
tcontent = str(k[2])
tname = tname[2:len(tname)-2]
tdate = tdate[2:len(tdate)-2]
tcontent = tcontent[2:len(tcontent)-2]
ttstr = tname + tdate + tcontent
ttstr = re.sub('\\\\t',' ',ttstr)
ttstr = re.sub('\\\\r\\\\n','<br />',ttstr)
ttstr = re.sub('<p><\/p>',' ',ttstr)
tstr = tstr + ttstr
tstr = tstr + "</td></tr>\n"
f = open(filename+'.html','wt')
f.write(b+tstr+e)
f.close()
#main function loop
http_hampshiremall = "http://www.hampshiremall.com/calendar.asp?action=detail&incDate="
http_holyokemall = "http://www.holyokemall.com/calendar.asp?action=detail&incDate="
#hampshiremall
sumlist = []
sumlist_html = []
maxdays = 7
filename = "hampshiremall"
for days in range(0, maxdays):
sdate = gendatestr(days)
s = crawler(http_hampshiremall, days)
triplets = parser(s)
sumlist = sumlist + [["M_D_Y"+sdate.replace("/","_"),triplets]]
sumlist_html = sumlist_html + [[sdate,triplets]]
writer(filename,sumlist)
writer_html(filename,sumlist_html)
#holyokemall
sumlist = []
sumlist_html = []
maxdays = 7
filename = "holyokemall"
for days in range(0, maxdays):
sdate = gendatestr(days)
s = crawler(http_holyokemall, days)
triplets = parser(s)
sumlist = sumlist + [["M_D_Y"+sdate.replace("/","_"),triplets]]
sumlist_html = sumlist_html + [[sdate,triplets]]
writer(filename,sumlist)
writer_html(filename,sumlist_html)
[/code]
imac 2006-7-14 09:31 PM
faint... html kills all my tab indent
python need tab indent to identify logic structure...
so if anyone want to use this code, email me to ask for it or add the indents yourself (it is not difficult if you can understand the code...)
good luck
chao 2006-7-14 09:31 PM
顶啊!
大家很快就能local的一些deal在主页上了
[url]http://www.umasscssa.org/[/url]
那一片俗 2006-7-21 01:21 AM
虽然对编程一窍不通,但是。。。。。。。。。。。。。。还是要来灌一次水
嘿嘿
freei 2006-9-8 05:01 PM
呵呵,偶在上baby course, 191B, special interest group里面有python group, 会不会特别难~
[quote]原帖由 [i]那一片俗[/i] 于 2006-7-21 12:21 AM 发表
虽然对编程一窍不通,但是。。。。。。。。。。。。。。还是要来灌一次水
嘿嘿 [/quote]
yuxi 2006-9-8 05:05 PM
俺们系要开python seminar,看来俺要去上一个。
chao 2006-9-8 05:19 PM
欢迎参加python discussion!
有什么问题放上来吧
imac免费解答!:):)
chao 2006-9-8 09:34 PM
[quote]原帖由 [i]满弓刀[/i] 于 2006-9-8 05:37 PM 发表
往哪放啊? [/quote]
贴上在这个版啊