{"id":586,"date":"2019-10-15T16:23:48","date_gmt":"2019-10-15T08:23:48","guid":{"rendered":"https:\/\/www.linuxdevops.cn\/?p=586"},"modified":"2019-10-15T16:26:59","modified_gmt":"2019-10-15T08:26:59","slug":"python-crawler-exercise-second-hand-room-data","status":"publish","type":"post","link":"https:\/\/www.linuxdevops.cn\/2019\/10\/python-crawler-exercise-second-hand-room-data\/","title":{"rendered":"Python\u722c\u866b\u7ec3\u4e60-\u4e8c\u624b\u623f\u6570\u636e"},"content":{"rendered":"\n
from pyquery import PyQuery as pq\nimport re\nimport pymysql\n\ndef huoqushuju():\n for num in range(1,10):\n #\u8bf7\u6c42\u4fe1\u606f\n headers = {'User-Agent': 'Mozilla\/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko\/20100101 Firefox\/23.0'}\n list_url = \"https:\/\/url....\"+ str(num) + \".....\"\n doc = pq(url=list_url,encoding='utf-8',headers=headers)\n items = doc('.list-ershou')\n lis = items.find('li').items()\n list=[]\n for i in lis:\n xiaoqu = i('.xicon-xq').siblings().text()\n weizhi = i('.xicon-addr').siblings().text()\n xiangqing = i('.list-houstTab').text()\n guige = re.findall('\\s\\d\u5ba4\\s\\d\u5385\\s\\d\u536b', xiangqing)\n if not guige:\n guige = 'null'\n louceng = re.findall('\u7b2c\\d+\u5c42\\s\u5171\\d+\u5c42', xiangqing)\n if not louceng:\n louceng = 'null'\n fangling = re.findall('\u623f\u9f84\\s\\d+\u5e74', xiangqing)\n if not fangling:\n fangling = 'null'\n zongjia = i('.price1').text()\n zongjia = re.findall('\\d+', zongjia)\n zhongjie = i('.tab').eq(1).text()\n try:\n zhongjie = re.split('\uff1a',zhongjie)[1]\n except:\n zhongjie ='null'\n jiage = i('.price2').text()\n jiage = re.findall('\\d+',jiage)\n # print(xiaoqu, zongjia, jiage, weizhi, louceng, guige, fangling, zhongjie)\n\n db = pymysql.connect(host='localhost',user='root',password='123123',port=3306, db='test')\n cursor = db.cursor()\n sql = 'insert into ershoufang2 (xiaoqu,zongjia,jiage,weizhi,louceng,guige,fangling,zhongjie) values(%s,%s,%s,%s,%s,%s,%s,%s)'\n # cursor.execute(sql, (xiaoqu, zongjia, jiage, weizhi, louceng, guige, fangling, zhongjie))\n try:\n cursor.execute(sql, (xiaoqu,zongjia,jiage,weizhi,louceng,guige,fangling,zhongjie))\n db.commit()\n print('\u63d2\u5165\u6210\u529f\uff01')\n except:\n print('\u63d2\u5165\u5931\u8d25!!!!!!!!!!!!!!!\uff01')\n db.close()\n\nif __name__ == '__main__':\n huoqushuju()\n<\/pre>\n","protected":false},"excerpt":{"rendered":"from pyquery import PyQuery as pq import re import pymy<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[13],"tags":[32],"yoast_head":"\n
Python\u722c\u866b\u7ec3\u4e60-\u4e8c\u624b\u623f\u6570\u636e - Linux\u81ea\u52a8\u5316\u8fd0\u7ef4<\/title>\n\n\n\n\n\n\n\n\n\n\n\n\n