{"id":586,"date":"2019-10-15T16:23:48","date_gmt":"2019-10-15T08:23:48","guid":{"rendered":"https:\/\/www.linuxdevops.cn\/?p=586"},"modified":"2019-10-15T16:26:59","modified_gmt":"2019-10-15T08:26:59","slug":"python-crawler-exercise-second-hand-room-data","status":"publish","type":"post","link":"https:\/\/www.linuxdevops.cn\/2019\/10\/python-crawler-exercise-second-hand-room-data\/","title":{"rendered":"Python\u722c\u866b\u7ec3\u4e60-\u4e8c\u624b\u623f\u6570\u636e"},"content":{"rendered":"\n
from pyquery import PyQuery as pq\nimport re\nimport pymysql\n\ndef huoqushuju():\n    for num in range(1,10):\n        #\u8bf7\u6c42\u4fe1\u606f\n        headers = {'User-Agent': 'Mozilla\/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko\/20100101 Firefox\/23.0'}\n        list_url = \"https:\/\/url....\"+ str(num) + \".....\"\n        doc = pq(url=list_url,encoding='utf-8',headers=headers)\n        items = doc('.list-ershou')\n        lis = items.find('li').items()\n        list=[]\n        for i in lis:\n            xiaoqu = i('.xicon-xq').siblings().text()\n            weizhi = i('.xicon-addr').siblings().text()\n            xiangqing = i('.list-houstTab').text()\n            guige = re.findall('\\s\\d\u5ba4\\s\\d\u5385\\s\\d\u536b', xiangqing)\n            if not guige:\n                guige = 'null'\n            louceng = re.findall('\u7b2c\\d+\u5c42\\s\u5171\\d+\u5c42', xiangqing)\n            if not louceng:\n                louceng = 'null'\n            fangling = re.findall('\u623f\u9f84\\s\\d+\u5e74', xiangqing)\n            if not fangling:\n                fangling = 'null'\n            zongjia = i('.price1').text()\n            zongjia = re.findall('\\d+', zongjia)\n            zhongjie = i('.tab').eq(1).text()\n            try:\n                zhongjie = re.split('\uff1a',zhongjie)[1]\n            except:\n                zhongjie ='null'\n            jiage = i('.price2').text()\n            jiage = re.findall('\\d+',jiage)\n            # print(xiaoqu, zongjia, jiage, weizhi, louceng, guige, fangling, zhongjie)\n\n            db = pymysql.connect(host='localhost',user='root',password='123123',port=3306, db='test')\n            cursor = db.cursor()\n            sql = 'insert into ershoufang2 (xiaoqu,zongjia,jiage,weizhi,louceng,guige,fangling,zhongjie) values(%s,%s,%s,%s,%s,%s,%s,%s)'\n            # cursor.execute(sql, (xiaoqu, zongjia, jiage, weizhi, louceng, guige, fangling, zhongjie))\n            try:\n                cursor.execute(sql, (xiaoqu,zongjia,jiage,weizhi,louceng,guige,fangling,zhongjie))\n                db.commit()\n                print('\u63d2\u5165\u6210\u529f\uff01')\n            except:\n                print('\u63d2\u5165\u5931\u8d25!!!!!!!!!!!!!!!\uff01')\n                db.close()\n\nif __name__ == '__main__':\n    huoqushuju()\n<\/pre>\n","protected":false},"excerpt":{"rendered":"

from pyquery import PyQuery as pq import re import pymy<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[13],"tags":[32],"yoast_head":"\nPython\u722c\u866b\u7ec3\u4e60-\u4e8c\u624b\u623f\u6570\u636e - Linux\u81ea\u52a8\u5316\u8fd0\u7ef4<\/title>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/www.linuxdevops.cn\/2019\/10\/python-crawler-exercise-second-hand-room-data\/\" \/>\n<meta property=\"og:locale\" content=\"zh_CN\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"Python\u722c\u866b\u7ec3\u4e60-\u4e8c\u624b\u623f\u6570\u636e - Linux\u81ea\u52a8\u5316\u8fd0\u7ef4\" \/>\n<meta property=\"og:description\" content=\"from pyquery import PyQuery as pq import re import pymy\" \/>\n<meta property=\"og:url\" content=\"https:\/\/www.linuxdevops.cn\/2019\/10\/python-crawler-exercise-second-hand-room-data\/\" \/>\n<meta property=\"og:site_name\" content=\"Linux\u81ea\u52a8\u5316\u8fd0\u7ef4\" \/>\n<meta property=\"article:published_time\" content=\"2019-10-15T08:23:48+00:00\" \/>\n<meta property=\"article:modified_time\" content=\"2019-10-15T08:26:59+00:00\" \/>\n<meta name=\"author\" content=\"\u7ba1\u7406\u5458\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\/\/schema.org\",\"@graph\":[{\"@type\":\"Article\",\"@id\":\"https:\/\/www.linuxdevops.cn\/2019\/10\/python-crawler-exercise-second-hand-room-data\/#article\",\"isPartOf\":{\"@id\":\"https:\/\/www.linuxdevops.cn\/2019\/10\/python-crawler-exercise-second-hand-room-data\/\"},\"author\":{\"name\":\"\u7ba1\u7406\u5458\",\"@id\":\"https:\/\/www.linuxdevops.cn\/#\/schema\/person\/3e206335d5796fdd8679e449df72a0d1\"},\"headline\":\"Python\u722c\u866b\u7ec3\u4e60-\u4e8c\u624b\u623f\u6570\u636e\",\"datePublished\":\"2019-10-15T08:23:48+00:00\",\"dateModified\":\"2019-10-15T08:26:59+00:00\",\"mainEntityOfPage\":{\"@id\":\"https:\/\/www.linuxdevops.cn\/2019\/10\/python-crawler-exercise-second-hand-room-data\/\"},\"wordCount\":2,\"commentCount\":0,\"publisher\":{\"@id\":\"https:\/\/www.linuxdevops.cn\/#\/schema\/person\/3e206335d5796fdd8679e449df72a0d1\"},\"keywords\":[\"Python\u722c\u866b\"],\"articleSection\":[\"Python\"],\"inLanguage\":\"zh-CN\",\"potentialAction\":[{\"@type\":\"CommentAction\",\"name\":\"Comment\",\"target\":[\"https:\/\/www.linuxdevops.cn\/2019\/10\/python-crawler-exercise-second-hand-room-data\/#respond\"]}]},{\"@type\":\"WebPage\",\"@id\":\"https:\/\/www.linuxdevops.cn\/2019\/10\/python-crawler-exercise-second-hand-room-data\/\",\"url\":\"https:\/\/www.linuxdevops.cn\/2019\/10\/python-crawler-exercise-second-hand-room-data\/\",\"name\":\"Python\u722c\u866b\u7ec3\u4e60-\u4e8c\u624b\u623f\u6570\u636e - Linux\u81ea\u52a8\u5316\u8fd0\u7ef4\",\"isPartOf\":{\"@id\":\"https:\/\/www.linuxdevops.cn\/#website\"},\"datePublished\":\"2019-10-15T08:23:48+00:00\",\"dateModified\":\"2019-10-15T08:26:59+00:00\",\"breadcrumb\":{\"@id\":\"https:\/\/www.linuxdevops.cn\/2019\/10\/python-crawler-exercise-second-hand-room-data\/#breadcrumb\"},\"inLanguage\":\"zh-CN\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\/\/www.linuxdevops.cn\/2019\/10\/python-crawler-exercise-second-hand-room-data\/\"]}]},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\/\/www.linuxdevops.cn\/2019\/10\/python-crawler-exercise-second-hand-room-data\/#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"\u9996\u9875\",\"item\":\"https:\/\/www.linuxdevops.cn\/\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"Python\",\"item\":\"https:\/\/www.linuxdevops.cn\/python\/\"},{\"@type\":\"ListItem\",\"position\":3,\"name\":\"Python\u722c\u866b\u7ec3\u4e60-\u4e8c\u624b\u623f\u6570\u636e\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\/\/www.linuxdevops.cn\/#website\",\"url\":\"https:\/\/www.linuxdevops.cn\/\",\"name\":\"Linux\u81ea\u52a8\u5316\u8fd0\u7ef4\",\"description\":\"Linux\u81ea\u52a8\u5316\u8fd0\u7ef4\u7b14\u8bb0\",\"publisher\":{\"@id\":\"https:\/\/www.linuxdevops.cn\/#\/schema\/person\/3e206335d5796fdd8679e449df72a0d1\"},\"alternateName\":\"linuxdevops\",\"potentialAction\":[{\"@type\":\"SearchAction\",\"target\":{\"@type\":\"EntryPoint\",\"urlTemplate\":\"https:\/\/www.linuxdevops.cn\/?s={search_term_string}\"},\"query-input\":\"required name=search_term_string\"}],\"inLanguage\":\"zh-CN\"},{\"@type\":[\"Person\",\"Organization\"],\"@id\":\"https:\/\/www.linuxdevops.cn\/#\/schema\/person\/3e206335d5796fdd8679e449df72a0d1\",\"name\":\"\u7ba1\u7406\u5458\",\"image\":{\"@type\":\"ImageObject\",\"inLanguage\":\"zh-CN\",\"@id\":\"https:\/\/www.linuxdevops.cn\/#\/schema\/person\/image\/\",\"url\":\"https:\/\/www.linuxdevops.cn\/wp-content\/uploads\/2019\/07\/cropped-index.jpg\",\"contentUrl\":\"https:\/\/www.linuxdevops.cn\/wp-content\/uploads\/2019\/07\/cropped-index.jpg\",\"width\":512,\"height\":512,\"caption\":\"\u7ba1\u7406\u5458\"},\"logo\":{\"@id\":\"https:\/\/www.linuxdevops.cn\/#\/schema\/person\/image\/\"},\"description\":\"\u7ba1\u7406\u5458\",\"url\":\"https:\/\/www.linuxdevops.cn\/author\/root\/\"}]}<\/script>\n<!-- \/ Yoast SEO plugin. -->","yoast_head_json":{"title":"Python\u722c\u866b\u7ec3\u4e60-\u4e8c\u624b\u623f\u6570\u636e - Linux\u81ea\u52a8\u5316\u8fd0\u7ef4","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/www.linuxdevops.cn\/2019\/10\/python-crawler-exercise-second-hand-room-data\/","og_locale":"zh_CN","og_type":"article","og_title":"Python\u722c\u866b\u7ec3\u4e60-\u4e8c\u624b\u623f\u6570\u636e - Linux\u81ea\u52a8\u5316\u8fd0\u7ef4","og_description":"from pyquery import PyQuery as pq import re import pymy","og_url":"https:\/\/www.linuxdevops.cn\/2019\/10\/python-crawler-exercise-second-hand-room-data\/","og_site_name":"Linux\u81ea\u52a8\u5316\u8fd0\u7ef4","article_published_time":"2019-10-15T08:23:48+00:00","article_modified_time":"2019-10-15T08:26:59+00:00","author":"\u7ba1\u7406\u5458","twitter_card":"summary_large_image","schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"Article","@id":"https:\/\/www.linuxdevops.cn\/2019\/10\/python-crawler-exercise-second-hand-room-data\/#article","isPartOf":{"@id":"https:\/\/www.linuxdevops.cn\/2019\/10\/python-crawler-exercise-second-hand-room-data\/"},"author":{"name":"\u7ba1\u7406\u5458","@id":"https:\/\/www.linuxdevops.cn\/#\/schema\/person\/3e206335d5796fdd8679e449df72a0d1"},"headline":"Python\u722c\u866b\u7ec3\u4e60-\u4e8c\u624b\u623f\u6570\u636e","datePublished":"2019-10-15T08:23:48+00:00","dateModified":"2019-10-15T08:26:59+00:00","mainEntityOfPage":{"@id":"https:\/\/www.linuxdevops.cn\/2019\/10\/python-crawler-exercise-second-hand-room-data\/"},"wordCount":2,"commentCount":0,"publisher":{"@id":"https:\/\/www.linuxdevops.cn\/#\/schema\/person\/3e206335d5796fdd8679e449df72a0d1"},"keywords":["Python\u722c\u866b"],"articleSection":["Python"],"inLanguage":"zh-CN","potentialAction":[{"@type":"CommentAction","name":"Comment","target":["https:\/\/www.linuxdevops.cn\/2019\/10\/python-crawler-exercise-second-hand-room-data\/#respond"]}]},{"@type":"WebPage","@id":"https:\/\/www.linuxdevops.cn\/2019\/10\/python-crawler-exercise-second-hand-room-data\/","url":"https:\/\/www.linuxdevops.cn\/2019\/10\/python-crawler-exercise-second-hand-room-data\/","name":"Python\u722c\u866b\u7ec3\u4e60-\u4e8c\u624b\u623f\u6570\u636e - Linux\u81ea\u52a8\u5316\u8fd0\u7ef4","isPartOf":{"@id":"https:\/\/www.linuxdevops.cn\/#website"},"datePublished":"2019-10-15T08:23:48+00:00","dateModified":"2019-10-15T08:26:59+00:00","breadcrumb":{"@id":"https:\/\/www.linuxdevops.cn\/2019\/10\/python-crawler-exercise-second-hand-room-data\/#breadcrumb"},"inLanguage":"zh-CN","potentialAction":[{"@type":"ReadAction","target":["https:\/\/www.linuxdevops.cn\/2019\/10\/python-crawler-exercise-second-hand-room-data\/"]}]},{"@type":"BreadcrumbList","@id":"https:\/\/www.linuxdevops.cn\/2019\/10\/python-crawler-exercise-second-hand-room-data\/#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"\u9996\u9875","item":"https:\/\/www.linuxdevops.cn\/"},{"@type":"ListItem","position":2,"name":"Python","item":"https:\/\/www.linuxdevops.cn\/python\/"},{"@type":"ListItem","position":3,"name":"Python\u722c\u866b\u7ec3\u4e60-\u4e8c\u624b\u623f\u6570\u636e"}]},{"@type":"WebSite","@id":"https:\/\/www.linuxdevops.cn\/#website","url":"https:\/\/www.linuxdevops.cn\/","name":"Linux\u81ea\u52a8\u5316\u8fd0\u7ef4","description":"Linux\u81ea\u52a8\u5316\u8fd0\u7ef4\u7b14\u8bb0","publisher":{"@id":"https:\/\/www.linuxdevops.cn\/#\/schema\/person\/3e206335d5796fdd8679e449df72a0d1"},"alternateName":"linuxdevops","potentialAction":[{"@type":"SearchAction","target":{"@type":"EntryPoint","urlTemplate":"https:\/\/www.linuxdevops.cn\/?s={search_term_string}"},"query-input":"required name=search_term_string"}],"inLanguage":"zh-CN"},{"@type":["Person","Organization"],"@id":"https:\/\/www.linuxdevops.cn\/#\/schema\/person\/3e206335d5796fdd8679e449df72a0d1","name":"\u7ba1\u7406\u5458","image":{"@type":"ImageObject","inLanguage":"zh-CN","@id":"https:\/\/www.linuxdevops.cn\/#\/schema\/person\/image\/","url":"https:\/\/www.linuxdevops.cn\/wp-content\/uploads\/2019\/07\/cropped-index.jpg","contentUrl":"https:\/\/www.linuxdevops.cn\/wp-content\/uploads\/2019\/07\/cropped-index.jpg","width":512,"height":512,"caption":"\u7ba1\u7406\u5458"},"logo":{"@id":"https:\/\/www.linuxdevops.cn\/#\/schema\/person\/image\/"},"description":"\u7ba1\u7406\u5458","url":"https:\/\/www.linuxdevops.cn\/author\/root\/"}]}},"_links":{"self":[{"href":"https:\/\/www.linuxdevops.cn\/wp-json\/wp\/v2\/posts\/586"}],"collection":[{"href":"https:\/\/www.linuxdevops.cn\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.linuxdevops.cn\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.linuxdevops.cn\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.linuxdevops.cn\/wp-json\/wp\/v2\/comments?post=586"}],"version-history":[{"count":3,"href":"https:\/\/www.linuxdevops.cn\/wp-json\/wp\/v2\/posts\/586\/revisions"}],"predecessor-version":[{"id":677,"href":"https:\/\/www.linuxdevops.cn\/wp-json\/wp\/v2\/posts\/586\/revisions\/677"}],"wp:attachment":[{"href":"https:\/\/www.linuxdevops.cn\/wp-json\/wp\/v2\/media?parent=586"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.linuxdevops.cn\/wp-json\/wp\/v2\/categories?post=586"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.linuxdevops.cn\/wp-json\/wp\/v2\/tags?post=586"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}