#!/usr/bin/python import sys, os, re, urllib from HTMLParser import HTMLParser import httplib from subprocess import Popen import time def decode(string): return re.sub('&#[0-9]{5};', lambda x: unichr(int(x.group(0)[2:-1])),re.sub(' ', ' ', string)) def get_attr(attrs, attr): for key, value in attrs: if key == attr: return value return None class SongParser(HTMLParser): song_list = [] def parse(self, htm): self.feed(htm.read().decode("utf-8")) return self.song_list def handle_starttag(self, tag, attrs): if get_attr(attrs, "class") == "SongItem BottomBorder": self.song_list.append( get_attr(attrs, "id")[3:] ) class SongInfoParser(HTMLParser): # song = [ name, singer, size, format ] tags = { "td-song-name":"name", "td-singer":"singer", "td-size":"size", "td-format":"format" } song = {} attr = None def parse(self, htm): self.feed(decode(htm.read())) return self.song.copy() def handle_starttag(self, tag, attrs): if tag == "a": tag_href = get_attr(attrs, "href") if tag_href[0:7] == "/music/": self.song["address"] = "http://www.google.cn"+tag_href elif get_attr(attrs, "class") == "song-meta-data-table": self.attr = '' elif self.attr == '': tag_class = get_attr(attrs, "class") if tag_class in self.tags: self.attr = self.tags[tag_class] def handle_data(self, data): if self.attr: self.song[self.attr] = data def handle_endtag(self, tag): self.attr = '' conn = httplib.HTTPConnection("www.google.cn") if len(sys.argv) == 1: exit(0) """ query = ' '.join(sys.argv[1:]) print "send query: %s" %query qeuery = urllib.quote(query) conn.request("GET", "http://www.google.cn/music/search?q="+query) result = conn.getresponse() """ result = urllib.urlopen(sys.argv[1]) #print result.read().decode("utf-8") print "Get song list..." p = SongParser() song_ids = p.parse(result) songs = [] parser = SongInfoParser() for i, id in enumerate(song_ids): conn.request("GET", "/music/top100/musicdownload?id="+id) response = conn.getresponse() song = parser.parse(response) name = song["name"] time.sleep(1) print "%d\t%s\t%s\t%s\t%s" %(i, name, song["singer"],song["format"], song["size"]) songs.append(song) choice = range(len(songs)) for i in choice: song = songs[ int(i) ] if isinstance(song, dict): print "download: \t\t%s" %(song["address"]) Popen(['wget', '-c', '-P', song["singer"], song["address"]]).wait() conn.close()