Source code for genometools.misc.download

# Copyright (c) 2015-2017 Florian Wagner
#
# This file is part of GenomeTools.
#
# GenomeTools is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License, Version 3,
# as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""HTTP and FTP download functions."""

from __future__ import (absolute_import, division,
                        print_function, unicode_literals)
_oldstr = str
from builtins import *

import logging
import os
import shutil
import ftplib

import six
if six.PY3:
    from urllib import parse as urlparse
else:
    import urlparse

import requests

_logger = logging.getLogger(__name__)


[docs]def http_download(url, download_file, overwrite=False, raise_http_exception=True): """Download a file over HTTP(S). See: http://stackoverflow.com/a/13137873/5651021 Parameters ---------- url : str The URL. download_file : str The path of the local file to write to. overwrite : bool, optional Whether to overwrite an existing file (if present). [False] raise_http_exception : bool, optional Whether to raise an exception if there is an HTTP error. [True] Raises ------ OSError If the file already exists and overwrite is set to False. `requests.HTTPError` If an HTTP error occurred and `raise_http_exception` was set to `True`. """ assert isinstance(url, (str, _oldstr)) assert isinstance(download_file, (str, _oldstr)) assert isinstance(overwrite, bool) assert isinstance(raise_http_exception, bool) u = urlparse.urlparse(url) assert u.scheme in ['http', 'https'] if os.path.isfile(download_file) and not overwrite: raise OSError('File "%s" already exists!' % download_file) r = requests.get(url, stream=True) if raise_http_exception: r.raise_for_status() if r.status_code == 200: with open(download_file, 'wb') as fh: r.raw.decode_content = True shutil.copyfileobj(r.raw, fh) _logger.info('Downloaded file "%s".', download_file) else: _logger.error('Failed to download url "%s": HTTP status %d/%s', url, r.status_code, r.reason)
[docs]def ftp_download(url, download_file, if_exists='error', user_name='anonymous', password='', blocksize=4194304): """Downloads a file from an FTP server. Parameters ---------- url : str The URL of the file to download. download_file : str The path of the local file to download to. if_exists : str, optional Desired behavior when the download file already exists. One of: 'error' - Raise an OSError 'skip' - Do nothing, only report a warning. 'overwrite' - Overwrite the file. reporting a warning. Default: 'error'. user_name : str, optional The user name to use for logging into the FTP server. ['anonymous'] password : str, optional The password to use for logging into the FTP server. [''] blocksize : int, optional The blocksize (in bytes) to use for downloading. [4194304] Returns ------- None """ assert isinstance(url, (str, _oldstr)) assert isinstance(download_file, (str, _oldstr)) assert isinstance(if_exists, (str, _oldstr)) assert isinstance(user_name, (str, _oldstr)) assert isinstance(password, (str, _oldstr)) u = urlparse.urlparse(url) assert u.scheme == 'ftp' if if_exists not in ['error', 'skip', 'overwrite']: raise ValueError('"if_exists" must be "error", "skip", or "overwrite" ' '(was: "%s").', str(if_exists)) if os.path.isfile(download_file): if if_exists == 'error': raise OSError('File "%s" already exists.' % download_file) elif if_exists == 'skip': _logger.warning('File "%s" already exists. Skipping...', download_file) return else: _logger.warning('Overwriting file "%s"...', download_file) ftp_server = u.netloc ftp_path = u.path if six.PY3: with ftplib.FTP(ftp_server) as ftp: ftp.login(user_name, password) with open(download_file, 'wb') as ofh: ftp.retrbinary('RETR %s' % ftp_path, callback=ofh.write, blocksize=blocksize) else: ftp = ftplib.FTP(ftp_server) ftp.login(user_name, password) with open(download_file, 'wb') as ofh: ftp.retrbinary('RETR %s' % ftp_path, callback=ofh.write, blocksize=blocksize) ftp.close() _logger.info('Downloaded file "%s" over FTP.', download_file)