Python实现批量下载文件 - 哈喽比特

914次阅读 | 发布于6年以前

Python实现批量下载文件


    #!/usr/bin/env python
    # -*- coding:utf-8 -*-

    from gevent import monkey
    monkey.patch_all()
    from gevent.pool import Pool
    import requests
    import sys
    import os

    def download(url):
     chrome = 'Mozilla/5.0 (X11; Linux i86_64) AppleWebKit/537.36 ' + 
     '(KHTML, like Gecko) Chrome/41.0.2272.101 Safari/537.36'
     headers = {'User-Agent': chrome}
     filename = url.split('/')[-1].strip()
     r = requests.get(url.strip(), headers=headers, stream=True)
     with open(filename, 'wb') as f:
     for chunk in r.iter_content(chunk_size=1024):
     if chunk:
    f.write(chunk)
    f.flush()
     print filename,"is ok"

    def removeLine(key, filename):
     os.system('sed -i /%s/d %s' % (key, filename))

    if __name__ =="__main__":
     if len(sys.argv) == 2:
     filename = sys.argv[1]
     f = open(filename,"r")
     p = Pool(4)
     for line in f.readlines():
     if line:
     p.spawn(download, line.strip())
     key = line.split('/')[-1].strip()
     removeLine(key, filename)
    f.close()
    p.join()
    else:
     print 'Usage: python %s urls.txt' % sys.argv[0]

其他网友的方法：


    from os.path import basename
    from urlparse import urlsplit
    def url2name(url):
      return basename(urlsplit(url)[2])

    def download(url, localFileName = None):
      localName = url2name(url)
      req = urllib2.Request(url)
      r = urllib2.urlopen(req)
      if r.info().has_key('Content-Disposition'):
        # If the response has Content-Disposition, we take file name from it
        localName = r.info()['Content-Disposition'].split('filename=')[1]
        if localName[0] == '"' or localName[0] == "'":
          localName = localName[1:-1]
      elif r.url != url:
        # if we were redirected, the real file name we take from the final URL
        localName = url2name(r.url)
      if localFileName:
        # we can force to save the file as specified name
        localName = localFileName
      f = open(localName, 'wb')
      f.write(r.read())
      f.close()

    download(r'你要下载的python文件的url地址')

以上便是本文给大家分享的全部内容了，小伙伴们可以测试下哪种方法效率更高呢。