Python实现的下载8000首儿歌的代码分享

1002次阅读  |  发布于5年以前

下载8000首儿歌的python的代码:

复制代码 代码如下:

-- coding: UTF-8 --

from pyquery import PyQuery as py
from lxml import etree
import urllib
import re
import os
import sys
import logging

def format(filename):
tuple=(' ',''','\'')
for char in tuple:
if (filename.find(char)!=-1):
filename=filename.replace(char,"_")
return filename

def download_mp3(mp3_url, filename,dir):
f = dir+"\\"+filename
if os.path.exists(f):
logger.debug(f+" is existed.")
return

try:  
    open(f, 'wb').write(urllib.urlopen(mp3_url).read())  
    logger.debug(  filename + ' is downloaded.')  
except:  
    logger.debug( filename + ' is not downloaded.')

def download_all_mp3(start,end,dir,logger):
for x in range(start,end):
try:
url = "http://www.youban.com/mp3-d" + str(x) + ".html"
logger.debug(str(x) + ": "+url)
doc = py(url=url)
e = doc('.mp3downloadbox')
if e is None or e == '':
logger.debug(url+" is not existed.")
return

    e = unicode(e)  
    #logger.debug( e)  
    regex = re.compile(ur".*<h1>(.*)</h1>.*downloadboxlist.*?<a.*?\"(.*?)\"",re.UNICODE|re.S)  
    m = regex.search(e)  
    if m is not None:  
      title = m.group(1).strip()  
      title2 = str(x)+"_"+title + ".mp3"   
      #title2 = re.sub(' ','_',title2)  
      title2 = format(title2)  
      link = m.group(2)  
      #logger.debug( "title:" + title + " link:" + link)  
      if link == '' or title == '':  
        logger.debug(url + " is not useful")  
        continue  
      logger.debug(str(x)+": "+link)  
      download_mp3(link,title2,dir)  
except:  
    logger.debug(url+" met exception.")  
    continue  

if name == "main":
dir_root = "e:\\song"
if sys.argv[3] != '': dir_root=sys.argv[3]

start,end = 1,8000  
if sys.argv[1] >= 0 and sys.argv[2]>=0:  
  start,end = int(sys.argv[1]),int(sys.argv[2])  
  print ("Download from %s to %s.\n" % (start,end))        

dir = dir_root + "\\\"+str(start)+"-"+str(end)  
if not os.path.exists(dir):   
  os.mkdir(dir)       
print "Download to " + dir + ".\n"  

logger = logging.getLogger("simple")  
logger.setLevel(logging.DEBUG)    
fh = logging.FileHandler(dir+"\\\"+"download.log")  
ch = logging.StreamHandler()  
formatter = logging.Formatter("%(message)s")  
ch.setFormatter(formatter)  
fh.setFormatter(formatter)   
logger.addHandler(ch)  
logger.addHandler(fh)  
download_all_mp3(start,end,dir,logger)  

Copyright© 2013-2020

All Rights Reserved 京ICP备2023019179号-8