Source code for downloader

# Copyright (c) 2015-2016 Junpei Kawamoto
# This software is released under the MIT License.
""" Download objects from several services.
import contextlib
import gzip
import io
import logging
import os
import re
import shutil
import subprocess
import sys
import urllib2
import urlparse

LOGGER = logging.getLogger(__name__)

def _open_url(url):
    """ Open a URL by urllib2 via gzip encoding.

      url: URL string.

      Response object returned by urllib2.urlopen.
    req = urllib2.Request(url)
    req.add_header("Accept-encoding", "gzip")
    return urllib2.urlopen(req)

def _copy_response(res, dest):
    """ Copy a response opened by urllib2 to a destination.

      res: Response made by urllib2.urlopen.
      dest: Destination path.
    if"Content-Encoding") == "gzip":
        res = gzip.GzipFile(fileobj=io.BytesIO(

    with open(dest, "wb") as fp:
        shutil.copyfileobj(res, fp)

[docs]def curl(url, dest): """ Download an object using curl. Args: url: Parsed URL specifying an object. dest: Destination path. Returns: Path for the downloaded file. """ with contextlib.closing(_open_url(urlparse.urlunparse(url))) as res: _copy_response(res, dest) return dest
[docs]def gsutil(url, dest): """ Download an object using gsutil. Args: url: Parsed URL specifying an object. dest: Destination path. Returns: Path for the downloaded file. """ p = subprocess.Popen( ["gsutil", "cp", urlparse.urlunparse(url), dest], stdout=sys.stdout, stderr=sys.stderr) p.wait() return dest
[docs]def dropbox(url, dest): """ Download an object from dropbox. Args: url: Parsed URL specifying an object. dest: Destination path. Returns: Downloaded filename. """ new_url = "https://{host}{path}?dl=1".format( host=url.netloc, path=url.path) with contextlib.closing(_open_url(new_url)) as res: disposition ="content-disposition") match ="filename=\"(.*)\";", disposition) if match and".zip"): dest += ".zip" _copy_response(res, dest) return dest
[docs]def download(url, unzip=True): """ Download an object specified by a url. Url can have a destination path. The format is - scheme://host/path - scheme://host/path:dest where dest is the destination path. Scheme is one of http, https, gs, dropbox. Args: url: An extended url specifying the url of an object and an destination path. unzip: If True and the object specified by the url is zipped, unzip them. """ # Check the url contains a destination path. dest = "." if url.find(":") != url.rfind(":"): dest = url[url.rfind(":") + 1:] url = url[:url.rfind(":")] # Parse the URL. purl = urlparse.urlparse(url) # If the destination path is a directory, use filename as same as URL. if os.path.isdir(dest) or dest[:-1] == "/": dest = os.path.join(dest, purl.path[purl.path.rfind("/") + 1:]) # Choose downloader. downloader = curl if purl.scheme == "gs": downloader = gsutil elif purl.scheme == "dropbox": downloader = dropbox"Downloading %s to %s", url, dest) res = downloader(purl, dest) if unzip and res.endswith(".zip"): # If donloaded file is a zip, unzip and remove it."Unzipping %s", res) p = subprocess.Popen( ["unzip", "-o", "-d", os.path.dirname(res), res], stdout=sys.stdout, stderr=sys.stderr) p.communicate() os.remove(res) elif res.endswith(".tar.gz"):"Unpacking %s", res) p = subprocess.Popen( ["tar", "-zxvf", res], cwd=os.path.dirname(res), stdout=sys.stdout, stderr=sys.stderr) p.communicate() os.remove(res) elif res.endswith(".tar"):"Unpacking %s", res) p = subprocess.Popen( ["tar", "-xvf", res], cwd=os.path.dirname(res), stdout=sys.stdout, stderr=sys.stderr) p.communicate() os.remove(res)