脚本下载谷歌高质量的音乐

谷歌的音乐一上线,就有人搞出来脚本下载,不过他这个脚本是Python3.0的,我不太喜欢Python3.0,就将其改成了2.5的了,且我喜欢下载整张专辑,也就改成了给脚本专辑地址了,避免搜索。

#!/usr/bin/python
import sys, os, re, urllib
from HTMLParser import HTMLParser
import httplib
from subprocess import Popen
import time
 
def decode(string):
	return re.sub('&#[0-9]{5};', lambda x: unichr(int(x.group(0)[2:-1])),re.sub(' ', ' ', string))
 
def get_attr(attrs, attr):
	for key, value in attrs:
		if key == attr:
			return value
	return None
 
class SongParser(HTMLParser):
	song_list = []
	def parse(self, htm):
		self.feed(htm.read().decode("utf-8"))
		return self.song_list
 
	def handle_starttag(self, tag, attrs):
		if get_attr(attrs, "class") == "SongItem BottomBorder":
			self.song_list.append( get_attr(attrs, "id")[3:] )
 
class SongInfoParser(HTMLParser):
	# song = [ name, singer, size, format ]
	tags = {
			"td-song-name":"name",
			"td-singer":"singer",
			"td-size":"size",
			"td-format":"format"
	}
	song = {}
	attr = None
	def parse(self, htm):
		self.feed(decode(htm.read()))
		return self.song.copy()
 
	def handle_starttag(self, tag, attrs):
		if tag == "a":
			tag_href = get_attr(attrs, "href")
			if tag_href[0:7] == "/music/":
				self.song["address"] = "http://www.google.cn"+tag_href
		elif get_attr(attrs, "class") == "song-meta-data-table":
			self.attr = ''
		elif self.attr == '':
			tag_class = get_attr(attrs, "class")
			if tag_class in self.tags:
				self.attr = self.tags[tag_class]
 
	def handle_data(self, data):
		if self.attr:
			self.song[self.attr] = data
 
	def handle_endtag(self, tag):
		self.attr = ''
 
conn = httplib.HTTPConnection("www.google.cn")
 
if len(sys.argv) == 1:
	exit(0)
 
"""
query = ' '.join(sys.argv[1:])
print "send query: %s" %query
qeuery = urllib.quote(query)
conn.request("GET", "http://www.google.cn/music/search?q="+query)
result = conn.getresponse()
"""
result = urllib.urlopen(sys.argv[1])
 
#print result.read().decode("utf-8")
 
print "Get song list..."
 
p = SongParser()
song_ids = p.parse(result)
songs = []
parser = SongInfoParser()
 
for i, id in enumerate(song_ids):
	conn.request("GET", "/music/top100/musicdownload?id="+id)
	response = conn.getresponse()
	song = parser.parse(response)
	name = song["name"]
	time.sleep(1)
	print "%d\t%s\t%s\t%s\t%s" %(i, name, song["singer"],song["format"], song["size"])
	songs.append(song)
 
choice = range(len(songs))
 
for i in choice:
	song = songs[ int(i) ]
	if isinstance(song, dict):
		print "download: \t\t%s" %(song["address"])
		Popen(['wget', '-c', '-P', song["singer"], song["address"]]).wait()
conn.close()

如果你想用,可以在这里下载。

下载完了以后用mutagen自带工具mid3iconv可以转换mp3的tag编码,不过这里有个小问题,就是对于谷歌高质量音乐有一点不爽,谷歌音乐都是自带歌词的,这个工具不能转换歌词,我就改了改,让他可以该歌词编码了,这样iTunes等只支持unicode的就没问题了,ipod touch/iphone中也可以真长看到歌词了。其实主要就是改了很小一点,原来mid3iconv只是修改T开头的tag,我加上了对歌词所在的”USLT”tag的处理就OK了,代码如下:

#!/usr/bin/python
 
import os
import sys
import locale
 
from optparse import OptionParser
 
VERSION = (0, 1)
 
def isascii(string):
    return not string or min(string) < '\x127'
 
class ID3OptionParser(OptionParser):
    def __init__(self):
        mutagen_version = ".".join(map(str, mutagen.version))
        my_version = ".".join(map(str, VERSION))
        version = "mid3iconv %s\nUses Mutagen %s" % (
            my_version, mutagen_version)
        return OptionParser.__init__(
            self, version=version,
            usage="%prog [OPTION] [FILE]...",
            description=("Mutagen-based replacement the id3iconv utility, "
                         "which converts ID3 tags from legacy encodings "
                         "to Unicode and stores them using the ID3v2 format."))
 
    def format_help(self, *args, **kwargs):
        text = OptionParser.format_help(self, *args, **kwargs)
        return text + "\nFiles are updated in-place, so use --dry-run first.\n"
 
def update(options, filenames):
    encoding = options.encoding or locale.getpreferredencoding()
    verbose = options.verbose
    noupdate = options.noupdate
    force_v1 = options.force_v1
    remove_v1 = options.remove_v1
 
    def conv(uni):
        return uni.encode('iso-8859-1').decode(encoding)
 
    for filename in filenames:
        if verbose != "quiet":
            print "Updating", filename
 
        if has_id3v1(filename) and not noupdate and force_v1:
            mutagen.id3.delete(filename, False, True)
 
        try: id3 = mutagen.id3.ID3(filename)
        except mutagen.id3.ID3NoHeaderError:
            if verbose != "quiet":
                print "No ID3 header found; skipping..."
            continue
        except Exception, err:
            if verbose != "quiet":
                print str(err)
            continue
 
        for tag in filter(lambda t: t.startswith("T"), id3):
            frame = id3[tag]
            if isinstance(frame, mutagen.id3.TimeStampTextFrame): # non-unicode fields
                continue
 
            try:
                text = map(conv, frame.text)
            except (UnicodeError, LookupError):
                continue
            else:
                frame.text = text
                if min(map(isascii, text)):
                    frame.encoding = 3
                else:
                    frame.encoding = 1
 
        print "Process USLT"
        for tag in filter(lambda t: t.startswith("U"), id3):
            frame = id3[tag]
            if isinstance(frame, mutagen.id3.TimeStampTextFrame): # non-unicode fields
                continue
            if frame.encoding == 1:
				continue
 
            text=frame.text
            text = text.encode('iso-8859-1').decode(encoding)
 
            frame.text = text
            if min(map(isascii, text)):
                frame.encoding = 3
            else:
                frame.encoding = 1
 
        enc = locale.getpreferredencoding()
        if verbose == "debug":
            print id3.pprint().encode(enc, "replace")
 
        if not noupdate:
            if remove_v1: id3.save(filename, v1=False)
            else: id3.save(filename)
 
def has_id3v1(filename):
    f = open(filename, 'rb+')
    try: f.seek(-128, 2)
    except IOError: pass
    else: return (f.read(3) == "TAG")
 
def main(argv):
    parser = ID3OptionParser()
    parser.add_option(
        "-e", "--encoding", metavar="ENCODING", action="store",
        type="string", dest="encoding",
        help=("Specify original tag encoding (default is %s)" %(
        locale.getpreferredencoding())))
    parser.add_option(
        "-p", "--dry-run", action="store_true", dest="noupdate",
        help="Do not actually modify files")
    parser.add_option(
        "--force-v1", action="store_true", dest="force_v1",
        help="Use an ID3v1 tag even if an ID3v2 tag is present")
    parser.add_option(
        "--remove-v1", action="store_true", dest="remove_v1",
        help="Remove v1 tag after processing the files")
    parser.add_option(
        "-q", "--quiet", action="store_const", dest="verbose",
        const="quiet", help="Only output errors")
    parser.add_option(
        "-d", "--debug", action="store_const", dest="verbose",
        const="debug", help="Output updated tags")
 
    for i, arg in enumerate(sys.argv):
        if arg == "-v1": sys.argv[i] = "--force-v1"
        elif arg == "-removev1": sys.argv[i] = "--remove-v1"
 
    (options, args) = parser.parse_args(argv[1:])
 
    if args:
        update(options, args)
    else:
        parser.print_help()
 
if __name__ == "__main__":
    try: import mutagen, mutagen.id3
    except ImportError:
        # Run out of tools/
        sys.path.append(os.path.abspath("../"))
        import mutagen, mutagen.id3
    main(sys.argv)

可以到这里下载。

5 Comments

Post a Comment

Your email is never shared. Required fields are marked *