{"id":1162,"date":"2026-07-02T12:26:51","date_gmt":"2026-07-02T04:26:51","guid":{"rendered":"https:\/\/onehustle.io\/index.php\/2026\/07\/02\/scrape-images-from-website-python\/"},"modified":"2026-07-02T12:26:51","modified_gmt":"2026-07-02T04:26:51","slug":"scrape-images-from-website-python","status":"publish","type":"post","link":"https:\/\/onehustle.io\/index.php\/2026\/07\/02\/scrape-images-from-website-python\/","title":{"rendered":"\u5982\u4f55\u7528 Python \u722c\u53d6\u7f51\u7ad9\u56fe\u7247\uff1a\u722c\u56fe\u6559\u7a0b"},"content":{"rendered":"<p class=\"wp-block-paragraph\">\u56fe\u7247\u722c\u53d6\u662f\u7f51\u9875\u722c\u53d6\u4e2d\u975e\u5e38\u5e38\u89c1\u7684\u9700\u6c42\u2014\u2014\u65e0\u8bba\u662f\u6536\u96c6\u8bad\u7ec3\u6570\u636e\u3001\u6784\u5efa\u56fe\u7247\u5e93\u8fd8\u662f\u7ade\u54c1\u5206\u6790\uff0c\u4f60\u90fd\u4f1a\u9700\u8981\u4ece\u7f51\u7ad9\u4e0a\u6279\u91cf\u4e0b\u8f7d\u56fe\u7247\u3002\u672c\u6587\u5c06\u6559\u4f60<strong>\u5982\u4f55\u4f7f\u7528 Python \u722c\u53d6\u7f51\u7ad9\u4e0a\u7684\u56fe\u7247<\/strong>\u3002<\/p>\n<h2 class=\"wp-block-heading\">\u56fe\u7247\u722c\u53d6\u7684\u57fa\u672c\u539f\u7406<\/h2>\n<ol class=\"wp-block-list\"><li>\u53d1\u9001 HTTP \u8bf7\u6c42\u83b7\u53d6\u9875\u9762 HTML<\/li><li>\u4f7f\u7528\u89e3\u6790\u5e93\u63d0\u53d6\u6240\u6709 <code>&lt;img&gt;<\/code> \u6807\u7b7e<\/li><li>\u83b7\u53d6\u6bcf\u4e2a\u56fe\u7247\u7684 <code>src<\/code> \u5c5e\u6027<\/li><li>\u4e0b\u8f7d\u56fe\u7247\u5230\u672c\u5730<\/li><\/ol>\n<h2 class=\"wp-block-heading\">Python \u56fe\u7247\u722c\u53d6\u793a\u4f8b\u4ee3\u7801<\/h2>\n<pre class=\"wp-block-code\"><code>import requests\nfrom bs4 import BeautifulSoup\nimport os\n\ndef scrape_images(url, save_dir='images'):\n    os.makedirs(save_dir, exist_ok=True)\n    r = requests.get(url, headers={'User-Agent': 'Mozilla\/5.0...'})\n    soup = BeautifulSoup(r.content, 'html.parser')\n    images = soup.find_all('img')\n    for i, img in enumerate(images):\n        src = img.get('src')\n        if src:\n            if not src.startswith('http'):\n                src = url + src\n            img_data = requests.get(src).content\n            with open(f'{save_dir}\/img_{i}.jpg', 'wb') as f:\n                f.write(img_data)\n    print(f'Downloaded {len(images)} images')<\/code><\/pre>\n<h2 class=\"wp-block-heading\">\u56fe\u7247\u722c\u53d6\u6ce8\u610f\u4e8b\u9879<\/h2>\n<ul class=\"wp-block-list\"><li><strong>\u5904\u7406\u76f8\u5bf9\u8def\u5f84<\/strong>\uff1a\u6709\u4e9b\u56fe\u7247 src \u662f\u76f8\u5bf9\u8def\u5f84\uff0c\u9700\u8981\u62fc\u63a5\u5b8c\u6574 URL<\/li><li><strong>\u4f7f\u7528\u4ee3\u7406<\/strong>\uff1a\u5927\u89c4\u6a21\u722c\u53d6\u9700\u8981\u4ee3\u7406\u6765\u907f\u514d\u88ab\u5c01\u9501<\/li><li><strong>\u5c0a\u91cd robots.txt<\/strong>\uff1a\u68c0\u67e5\u7f51\u7ad9\u662f\u5426\u5141\u8bb8\u722c\u53d6<\/li><li><strong>\u7248\u6743\u95ee\u9898<\/strong>\uff1a\u6ce8\u610f\u56fe\u7247\u7248\u6743\uff0c\u4e0d\u8981\u7528\u4e8e\u5546\u4e1a\u7528\u9014\u672a\u6388\u6743\u4f7f\u7528<\/li><li><strong>\u8bf7\u6c42\u95f4\u9694<\/strong>\uff1a\u8bbe\u7f6e\u5408\u7406\u5ef6\u8fdf\uff0c\u907f\u514d\u538b\u57ae\u76ee\u6807\u670d\u52a1\u5668<\/li><li><strong>\u5904\u7406\u61d2\u52a0\u8f7d<\/strong>\uff1a\u73b0\u4ee3\u7f51\u7ad9\u5927\u91cf\u4f7f\u7528\u61d2\u52a0\u8f7d\uff0c\u771f\u5b9e src \u53ef\u80fd\u5b58\u50a8\u5728 <code>data-src<\/code> \u5c5e\u6027\u4e2d<\/li><\/ul>\n<h2 class=\"wp-block-heading\">\u63a8\u8350\u5de5\u5177\u548c\u5e93<\/h2>\n<ul class=\"wp-block-list\"><li><strong><a href=\"https:\/\/www.crummy.com\/software\/BeautifulSoup\/bs4\/doc\/\">BeautifulSoup<\/a><\/strong>\uff1aHTML \u89e3\u6790<\/li><li><strong><a href=\"https:\/\/scrapy.org\/\">Scrapy<\/a><\/strong>\uff1a\u5b8c\u6574\u7684\u722c\u53d6\u6846\u67b6\uff0c\u5185\u7f6e\u56fe\u7247\u7ba1\u9053<\/li><li><strong><a href=\"https:\/\/www.selenium.dev\/\">Selenium<\/a><\/strong>\uff1a\u5904\u7406 JS \u6e32\u67d3\u7684\u56fe\u7247<\/li><\/ul>\n<h2 class=\"wp-block-heading\">\u603b\u7ed3<\/h2>\n<p class=\"wp-block-paragraph\">\u56fe\u7247\u722c\u53d6\u5728\u6280\u672f\u5c42\u9762\u76f8\u5bf9\u76f4\u63a5\uff0c\u96be\u70b9\u5728\u4e8e\u89c4\u6a21\u5316\u2014\u2014\u9700\u8981\u5904\u7406\u61d2\u52a0\u8f7d\u3001\u53cd\u722c\u673a\u5236\u3001CDN \u9632\u62a4\u548c\u4ee3\u7406\u7ba1\u7406\u3002\u5efa\u8bae\u5c0f\u6279\u91cf\u5148\u7528 BeautifulSoup\uff0c\u5927\u6279\u91cf\u4f7f\u7528 Scrapy \u7684\u56fe\u7247\u7ba1\u9053\u3002\u53c2\u8003\uff1a<a href=\"https:\/\/onehustle.io\/index.php\/2026\/07\/02\/web-scraping-practices-guide\/\">\u7f51\u9875\u722c\u866b\u6700\u4f73\u5b9e\u8df5<\/a>\u3002<\/p>","protected":false},"excerpt":{"rendered":"<p>\u56fe\u7247\u722c\u53d6\u662f\u7f51\u9875\u722c\u53d6\u4e2d\u975e\u5e38\u5e38\u89c1\u7684\u9700\u6c42\u2014\u2014\u65e0\u8bba\u662f\u6536\u96c6\u8bad\u7ec3\u6570\u636e\u3001\u6784\u5efa\u56fe\u7247\u5e93\u8fd8\u662f\u7ade\u54c1\u5206\u6790\uff0c\u4f60\u90fd\u4f1a\u9700\u8981\u4ece\u7f51\u7ad9\u4e0a\u6279\u91cf\u4e0b\u8f7d\u56fe\u7247\u3002\u672c\u6587\u5c06&#46;&#46;&#46;<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[],"tags":[235,237,217],"class_list":["post-1162","post","type-post","status-publish","format-standard","hentry","tag-proxy-en","tag--en","tag-217"],"_links":{"self":[{"href":"https:\/\/onehustle.io\/index.php\/wp-json\/wp\/v2\/posts\/1162","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/onehustle.io\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/onehustle.io\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/onehustle.io\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/onehustle.io\/index.php\/wp-json\/wp\/v2\/comments?post=1162"}],"version-history":[{"count":0,"href":"https:\/\/onehustle.io\/index.php\/wp-json\/wp\/v2\/posts\/1162\/revisions"}],"wp:attachment":[{"href":"https:\/\/onehustle.io\/index.php\/wp-json\/wp\/v2\/media?parent=1162"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/onehustle.io\/index.php\/wp-json\/wp\/v2\/categories?post=1162"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/onehustle.io\/index.php\/wp-json\/wp\/v2\/tags?post=1162"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}