Kevin Kien: Python

from urllib2 import urlopen

import urllib2

import re

import sys

import string

from datetime import datetime

import os

def httpDownloader(out_file, req=None, url=None):

    if req: 

        net_stream = urlopen(req)

    elif url:

        net_stream = urlopen(url)

    else:

        return 0

    file_len = int(net_stream.info().getheaders('content-length')[0])

    if os.path.isfile(out_file):

        if os.path.getsize(out_file) == file_len:

            print "%s exists!" % out_file

            return 0

        else:

            print "%s exists but was not fully downloaded!" % out_file

    print "Downloading %s..." % out_file

    out_stream = open (out_file, 'wb')

    try:

        while True:

            bs = 512*8

            block = net_stream.read(bs)

            if not block:

                break

            out_stream.write(block)

    except:

        out_stream.close()

        net_stream.close()

        return 0

    out_stream.close()

    net_stream.close()

    return 1

#########################

#Main program

#########################

if __name__ == '__main__':

    #Destination directory, where folder will be placed

    #dest_dir = "~/Music"

    dest_dir = "/media/Data/Music"

    #url

    url = "http://mp3.zing.vn/mp3/nghe-album/album-hot/nhac-viet.html"

    #fake the mozilla headers

    hdrs = {'User-Agent' : 'Mozilla/5.0 (X11; U; Linux i686; en-US)AppleWebKit/533.2\

            (KHTML, like Gecko) Chrome/5.0.342.7 Safari/533.2'}

    #get the current month

    dt = datetime.today()

    month = dt.month

    dest_dir = os.path.expanduser(dest_dir)

    sub_dir = string.join(["Thang", str(month)], '')

    #absolute path to destination directory 

    if dest_dir[-1] == '/':

        abs_dir = string.join([dest_dir, sub_dir, '/'], '') 

    else:

        abs_dir = string.join([dest_dir, '/', sub_dir, '/'], '')

    if not os.path.isdir(abs_dir):

        try:

            os.mkdir(abs_dir)

        except:

            print "Error, Can not create the directory %s" % abs_dir

            sys.exit()

    #make request

    req = urllib2.Request(url, headers=hdrs)

    #create streamreader

    fpage = urlopen(req)

    #regex

    fline = re.compile(r'http://.*dl\.mp3.*\.mp3\"')

    _rem = re.compile(r'\?filename=|\"')

    links_list = []

    try: 

        while True: 

            lnk = fpage.readline()

            if not lnk:

                break

            else:

                rlink = fline.search(lnk) 

                if rlink:

                    link_and_name = _rem.sub(' ', rlink.group())

                    splitted = link_and_name.split()

                    links_list.append([splitted[0], splitted[1]])

    finally:

        fpage.close()

    count = 0

    try:

        for [link, file_name] in links_list:

            link_req = urllib2.Request(link, headers=hdrs)

            out_file = string.join([abs_dir, file_name], '')

            count = count + httpDownloader(out_file, req=link_req)

    except KeyboardInterrupt:

        print "User pressed Ctrl-C! Quitting..."

        sys.exit(1)

    except :

        print "Something was wrong with links, please check the links!"

    print "=========================================================="

    print "%d file(s) have/s been downloaded to %s" % (count, abs_dir)

    print "=========================================================="
Python - tai nhac

Blog Archive

Labels