Python代码分享
邕圣祐
# encoding=utf-8
from
utils
import
util, dbmysql
import
re, time,datetime,requests
from
lxml
import
etree
def
rule(path,content):
res=re.findall(path, content)
if
res!=[]:
response=res[0]
else:
response=''
return
response
head
=
{"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.157 Safari/537.36"}
def
headers():
timestamp=int(time.time())
head_detail={
"Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.157 Safari/537.36"
}
return
head_detail
def
ssjzw():
for
page
in
range(1,100):
try:
res
=
util.get('https://www.ssjzw.com/jzzp/pn{}'.format(page))
content
=
res['data'].decode('gb2312','ignore').replace('\n', '').replace('\r', '').replace('\t', '')
url
=
re.findall('<li class="tys" style="float:left;width:360px;"><a href="(.*?)" target=', content)
i=0
for
num
in
url:
i+=1
sql
=
"select * from ssjzw where url='%s'"
%
num
rs
=
dbmysql.fetchall(sql)
if
len(rs)
==
0:
try:
timestamp=int(time.time())
detail
=util.get(num,headers=headers())
content_detail
=
detail['data'].decode('gb2312','ignore').replace('\n', '').replace('\r', '').replace('\t', '')
# 公司匹配
company_ture
=
re.findall('<ul>招聘单位:<a href=.*?target="_blank">(.*?)</a></ul>', content_detail)
if
company_ture
==
[]:
company_2=
re.findall('<ul>招聘单位:(.*?)</ul>', content_detail)
if
company_2
==
[]:
company=''
else:
company=company_2[0]
else:
company
=
company_ture[0]
area=rule('<ul>兼职地区:<a href=.*?>(.*?)兼职',content_detail)
phonenum=rule("innerHTML='(.*?)'",content_detail)
uploadTime=rule('<ul>更新时间:(.*?)</ul>',content_detail)
sql2
=
"insert into ssjzw (url,company,area,phonenum,uploadTime,insertTime) value ('%s','%s','%s','%s','%s','%s')"
%
(num,company,area,phonenum,uploadTime,datetime.datetime.now())
rs=dbmysql.execute(sql2)
if
rs==False:
print("上上兼职网第{}页第{}条insert失败".format(page,i))
print("正在insert上上兼职网第{}页第{}条{}".format(page,i,num))
time.sleep(2)
except
Exception as e:
print(num)
print(e)
continue
else:
print("上上兼职网第{}页第{}条已存在".format(page,i))
time.sleep(2)
continue
except
Exception as e:
print(e)
continue
ssjzw()
你的回复
回复请先 登录 , 或 注册相关内容推荐
- 分享|非常全的ds interview preparation resources
- 学习方法|给非coding专业想转码的友友提供一些个人学习思路...
- 转码之后 | 从测试到iOS,从iOS 到 Python-NLP
- 经验分享|未来5-10年懂编程能踩到风口
- AI应用专区|用了多年的DeepL被我用OpenAI Translator替代了
- .NET平台和Csharp编程开发学习教程
- 软件测试入门基础视频教程 性能测试loadrunner 自动化测试QTP
- 分享经验|分享最近使用的量化学习的软件
- 2020年3月 - Tableau + MySQL + SAS + 复习ML
- Coursera上最受欢迎的Python课几乎免费啦
最新讨论 ( 更多 )
- 唯美英文诗词四首 (邕圣祐)
- 宿陈处士书斋 (邕圣祐)
- 次韵蒲左丞游虎丘十首 (邕圣祐)
- 宋代·朱长文《次韵司封使君和程给中越来溪三章》 (邕圣祐)
- Python代码分享 (邕圣祐)