Linux 下使用 wget/aria2 進行離線迅雷批量下載

←手機掃碼閱讀火星人 @ 2014-03-12 , reply:0

在Linux下資源下載的速度長期受限，ed2k，torrent什麼都木有速度，坑爹呀，自從購買了迅雷VIP的服務，可以直接以http形式來從迅雷伺服器內下載自己托迅雷下載的東西，而且如果你這個資源別人下載過的話，你就不用再次下載了，迅雷馬上提示你這個任務已經完成了。–#roowe攢寫

至於其他的，用過的人都知道了，也不再細說。如果windows平台配合迅雷客戶端用迅雷VIP的話，這個腳本也沒有啥意義了（因為客戶端更人性化^_^，當然佔用資源也不少，嘿嘿），所以前提是你的OS要是Linux，然後使用迅雷離線的web界面。

由於firefox下載迅雷離線的東西存在這樣幾個問題，比如文件名中文亂碼，要自己改（暗罵編碼ing），不支持斷點續傳（我掛過幾次，不過無奈重新下載了T_T），迅雷在點擊下載的時候，響應慢死了，好久才跳出窗口。

出於這幾個原因，我就去研究了下PT醬的那個離線下載的腳本，然後根據自己的需要重新寫如下：（也可以在這裡下載腳本）

  1  2  3  4  5  6  7  8  9  10  11  12  13  14  15  16  17  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90  91  92  93  94  95  96  97  98  99  100  101  102  103  104  105  106  107  108  109  110  111  112  113  114  115  116  117  118  119  120  121  122  123  124  125  126  127  128  129  130  131  132  133  134  135  136  137  138  139  140  141  142  143  144  145  146  147  148  149  150  151  152  153  154  155  156

#!/usr/bin/env python  # -*- coding: utf-8 -*-    #Time-stamp: <2011-10-25 21:36:28 Tuesday by roowe>  #File Name: thuner_xl_with_wget.py  #Author: bestluoliwe@gmail.com  #My Blog: www.iroowe.com     import re  import time  import os  import logging  import sys  from htmlentitydefs import entitydefs  import subprocess  LOG_FILE = "/tmp/thuner_with_wget.log"  log = None  def log_init(log_file, quiet=False):      logger = logging.getLogger()      logger.setLevel(logging.DEBUG)      hdlr = logging.FileHandler(log_file)      formatter = logging.Formatter("%(asctime)s %(levelname)s %(message)s")      hdlr.setFormatter(formatter)      logger.addHandler(hdlr)      if not quiet:          hdlr = logging.StreamHandler()          hdlr.setFormatter(formatter)          logger.addHandler(hdlr)      return logger     def handle_entitydef(matchobj):      key = matchobj.group(1)      if entitydefs.has_key(key):          return entitydefs[key]      else:          return matchobj.group(0)  def collect_urls(html, only_bturls = False):      """     collect urls     """      urls = []      for name, url in re.findall(r"<a.+?name=['\"]bturls['\"] title=['\"](.+?)['\"].+?href=['\"](http.+?)['\"]>", html):          name = re.sub("&(.*?);", handle_entitydef, name)          url = re.sub("&(.*?);", handle_entitydef, url)          urls.append((name, url))      if not only_bturls:          for id, name in re.findall(r'<input id=[\'"]durl(\w+?)[\'"].+title=[\'"](.+?)[\'"].+', html):              result = re.search(r'<input id=[\'"]dl_url%s[\'"].+value=[\'"](http.*?)[\'"]' % id, html)              if result:                  name = re.sub("&(.*?);", handle_entitydef, name)                  url = result.group(1)                  url = re.sub("&(.*?);", handle_entitydef, url)                  urls.append((name, url))      log.info("Filter get %d links" % len(urls))      return urls  def choose_download(urls):      download_list = {}      for name, url in urls:          while True:              ans = raw_input("Download %s?[Y/n](default: Y) " % name)              if len(ans) == 0:                  ans = True                  break              elif ans.lower() == 'y':                  ans = True                  break              elif ans.lower() == 'n':                  ans = False                  break              else:                  sys.stdout.write("please enter y or n!\n")                  continue          download_list[name] = ans      return download_list  def thuner_xl_with_wget(urls, output_dir, cookies_file, quiet=False):      download_list = choose_download(urls)      for name, url in urls:          if len(url) == 0:              log.debug("Empty Link, Name: " + name)              continue          if not download_list[name]:              continue          cmd = ["wget", "--load-cookies", cookies_file, "-c", "-t", "5", "-O", os.path.join(output_dir, name), url]          if quiet:              cmd.insert(1, "-q")          log.info("wget cmd: '%s'" % ' '.join(cmd))          ret = subprocess.call(cmd)          if ret != 0:              log.debug("wget returned %d." % ret)              if ret in (3, 8):                  log.error("Give up '%s', may be already finished download, or something wrong with disk." % name)              else:                  urls.append((name, url))                  log.error("will retry for %s later." % name)              continue          else:              log.info("Finished %s" % name)          time.sleep(2)  def thuner_xl_with_aria2c(urls, output_dir, cookies_file, quiet=False):      """     download with aria2c     """      download_list = choose_download(urls)      for name, url in urls:          if len(url) == 0:              log.debug("Empty Link, Name: " + name)              continue          if not download_list[name]:              continue          cmd = ["aria2c", "--load-cookies", cookies_file, "-d", output_dir, "-c", "-m", "5", "-s", "5", "-o", name, url]          if quiet:              cmd.insert(1, "-q")          log.info("wget cmd: '%s'" % ' '.join(cmd))          ret = subprocess.call(cmd)          if ret != 0:              log.debug("wget returned %d." % ret)              if ret in (13):                  log.error("Give up '%s', file already existed." % name)              else:                  urls.append((name, url))                  log.error("the exit status number is %d, and then will retry for %s later." % (ret, name))              continue          else:              log.info("Finished %s" % name)          time.sleep(2)     if __name__ == "__main__":      import argparse      parser = argparse.ArgumentParser(description='Thuner li xian with wget', formatter_class=argparse.ArgumentDefaultsHelpFormatter)      parser.add_argument('-p', nargs='?', default="~/user_task.htm", help="load page file")      parser.add_argument('-c', nargs='?', default="~/cookies.txt", help="load cookie file")      parser.add_argument('-o', nargs='?', default="~/Downloads", help="output dir")      parser.add_argument('-b', action='store_true', default=False, help="bt files only")      parser.add_argument('-q', action="store_true", default=False, help="quiet, only log to file.")      parser.add_argument('-a', action="store_true", default=False, help="download with aria2c")      args = parser.parse_args()         only_bturls, cookies_file, output_dir, page_file, quiet = args.b, args.c, args.o, args.p, args.q         page_file = os.path.expanduser(page_file)       cookies_file = os.path.realpath(os.path.expanduser(cookies_file))      output_dir = os.path.expanduser(output_dir)         log = log_init(LOG_FILE, quiet = quiet)      if not os.path.exists(cookies_file):          log.info("please export cookies file")          sys.exit(0)      if not os.path.isdir(output_dir):          log.info("No such %s", output_dir)          sys.exit(0)      with open(page_file) as f:          page_html = f.read()      urls = collect_urls(page_html, only_bturls)      if not args.a:          thuner_xl_with_wget(urls, output_dir, cookies_file, quiet)      else:          thuner_xl_with_aria2c(urls, output_dir, cookies_file, quiet)

好文,頂一下

(3)

100%

文章真差,踩一下

(0)

------分隔線----------------------------

上一篇：想知道如何將你的Linux伺服器清空空嗎？
下一篇：使用Squid配置反向代理伺服器

我要評論!
收藏
挑錯
推薦
列印

Tags:

[火星人 ] Linux 下使用 wget/aria2 進行離線迅雷批量下載已經有666次圍觀

本文地址：http://coctec.com/docs/linux/show-post-65345.html

Linux 下使用 wget/aria2 進行離線迅雷批量下載

熱門文章

最新文章