From d8ee5e2a4a0e425320204a56d2470d56e228acda Mon Sep 17 00:00:00 2001 From: Robey Pointer Date: Mon, 18 Jul 2005 05:43:44 +0000 Subject: [PATCH] [project @ Arch-1:robey@lag.net--2005-master-shake%paramiko--dev--1--patch-46] add SFTPFile.check and server support (and test) -- it's an sftp extension that allows a client to retrieve the hash of part or all of a file without downloading it. we're probably the only ones who implement it yet --- README | 5 ++- paramiko/sftp.py | 7 +++- paramiko/sftp_file.py | 54 ++++++++++++++++++++++++++++++ paramiko/sftp_server.py | 73 +++++++++++++++++++++++++++++++++++++++++ tests/test_sftp.py | 23 ++++++++++++- 5 files changed, 157 insertions(+), 5 deletions(-) diff --git a/README b/README index b3ebc27..b2cf76e 100644 --- a/README +++ b/README @@ -238,12 +238,11 @@ v0.9 FEAROW * ctr forms of ciphers are missing (blowfish-ctr, aes128-ctr, aes256-ctr) -* could channels have a __del__ if i used weak pointers? - -* cool sftp extension: retreive MD5 or SHA1 of section of a file * SFTPClient.from_url('sftp://robey@arch.lag.net/folder/filename', 'r+') keep cache of opened sftp clients by (host, port, username) how to auth? how to check host key? + ... maybe just a util function that parses the url and returns components +* sftp protocol 6 support (ugh....) -- once it settles down more * why are big files so slow to transfer? profiling needed... * what is psyco? diff --git a/paramiko/sftp.py b/paramiko/sftp.py index 917d0ee..78c204b 100644 --- a/paramiko/sftp.py +++ b/paramiko/sftp.py @@ -110,7 +110,12 @@ class BaseSFTP (object): return version def _send_server_version(self): - self._send_packet(CMD_VERSION, struct.pack('>I', _VERSION)) + # advertise that we support "check-file" + extension_pairs = [ 'check-file', 'md5,sha1' ] + msg = Message() + msg.add_int(_VERSION) + msg.add(*extension_pairs) + self._send_packet(CMD_VERSION, str(msg)) t, data = self._read_packet() if t != CMD_INIT: raise SFTPError('Incompatible sftp protocol') diff --git a/paramiko/sftp_file.py b/paramiko/sftp_file.py index a359510..6b8e935 100644 --- a/paramiko/sftp_file.py +++ b/paramiko/sftp_file.py @@ -130,6 +130,60 @@ class SFTPFile (BufferedFile): if t != CMD_ATTRS: raise SFTPError('Expected attributes') return SFTPAttributes._from_msg(msg) + + def check(self, hash_algorithm, offset=0, length=0, block_size=0): + """ + Ask the server for a hash of a section of this file. This can be used + to verify a successful upload or download, or for various rsync-like + operations. + + The file is hashed from C{offset}, for C{length} bytes. If C{length} + is 0, the remainder of the file is hashed. Thus, if both C{offset} + and C{length} are zero, the entire file is hashed. + + Normally, C{block_size} will be 0 (the default), and this method will + return a byte string representing the requested hash (for example, a + string of length 16 for MD5, or 20 for SHA-1). If a non-zero + C{block_size} is given, each chunk of the file (from C{offset} to + C{offset + length}) of C{block_size} bytes is computed as a separate + hash. The hash results are all concatenated and returned as a single + string. + + For example, C{check('sha1', 0, 1024, 512)} will return a string of + length 40. The first 20 bytes will be the SHA-1 of the first 512 bytes + of the file, and the last 20 bytes will be the SHA-1 of the next 512 + bytes. + + @param hash_algorithm: the name of the hash algorithm to use (normally + C{"sha1"} or C{"md5"}) + @type hash_algorithm: str + @param offset: offset into the file to begin hashing (0 means to start + from the beginning) + @type offset: int or long + @param length: number of bytes to hash (0 means continue to the end of + the file) + @type length: int or long + @param block_size: number of bytes to hash per result (must not be less + than 256; 0 means to compute only one hash of the entire segment) + @type block_size: int + @return: string of bytes representing the hash of each block, + concatenated together + @rtype: str + + @note: Many (most?) servers don't support this extension yet. + + @raise IOError: if the server doesn't support the "check-file" + extension, or possibly doesn't support the hash algorithm + requested + + @since: 1.4 + """ + t, msg = self.sftp._request(CMD_EXTENDED, 'check-file', self.handle, + hash_algorithm, long(offset), long(length), block_size) + ext = msg.get_string() + alg = msg.get_string() + data = msg.get_remainder() + return data ### internals... diff --git a/paramiko/sftp_server.py b/paramiko/sftp_server.py index 94a9e6c..13e05c0 100644 --- a/paramiko/sftp_server.py +++ b/paramiko/sftp_server.py @@ -23,6 +23,7 @@ Server-mode SFTP support. """ import os, errno +from Crypto.Hash import MD5, SHA from common import * from server import SubsystemHandler from sftp import * @@ -30,6 +31,13 @@ from sftp_si import * from sftp_attr import * +# known hash algorithms for the "check-file" extension +_hash_class = { + 'sha1': SHA, + 'md5': MD5, +} + + class SFTPServer (BaseSFTP, SubsystemHandler): """ Server-side SFTP subsystem support. Since this is a L{SubsystemHandler}, @@ -204,6 +212,65 @@ class SFTPServer (BaseSFTP, SubsystemHandler): attr._pack(msg) self._send_packet(CMD_NAME, str(msg)) + def _check_file(self, request_number, msg): + # this extension actually comes from v6 protocol, but since it's an + # extension, i feel like we can reasonably support it backported. + # it's very useful for verifying uploaded files or checking for + # rsync-like differences between local and remote files. + handle = msg.get_string() + alg_list = msg.get_list() + start = msg.get_int64() + length = msg.get_int64() + block_size = msg.get_int() + if not self.file_table.has_key(handle): + self._send_status(request_number, SFTP_BAD_MESSAGE, 'Invalid handle') + return + f = self.file_table[handle] + for x in alg_list: + if x in _hash_class: + algname = x + alg = _hash_class[x] + break + else: + self._send_status(request_number, SFTP_FAILURE, 'No supported hash types found') + return + if length == 0: + st = f.stat() + if not issubclass(type(st), SFTPAttributes): + self._send_status(request_number, st, 'Unable to stat file') + return + length = st.st_size - start + if block_size == 0: + block_size = length + if block_size < 256: + self._send_status(request_number, SFTP_FAILURE, 'Block size too small') + return + + sum = '' + offset = start + while offset < start + length: + blocklen = min(block_size, start + length - offset) + # don't try to read more than about 64KB at a time + chunklen = min(blocklen, 65536) + count = 0 + hash = alg.new() + while count < blocklen: + data = f.read(offset, chunklen) + if not type(data) is str: + self._send_status(request_number, data, 'Unable to hash file') + return + hash.update(data) + count += len(data) + offset += count + sum += hash.digest() + + msg = Message() + msg.add_int(request_number) + msg.add_string('check-file') + msg.add_string(algname) + msg.add_bytes(sum) + self._send_packet(CMD_EXTENDED_REPLY, str(msg)) + def _convert_pflags(self, pflags): "convert SFTP-style open() flags to python's os.open() flags" if (pflags & SFTP_FLAG_READ) and (pflags & SFTP_FLAG_WRITE): @@ -340,6 +407,12 @@ class SFTPServer (BaseSFTP, SubsystemHandler): path = msg.get_string() rpath = self.server.canonicalize(path) self._response(request_number, CMD_NAME, 1, rpath, '', SFTPAttributes()) + elif t == CMD_EXTENDED: + tag = msg.get_string() + if tag == 'check-file': + self._check_file(request_number, msg) + else: + send._send_status(request_number, SFTP_OP_UNSUPPORTED) else: self._send_status(request_number, SFTP_OP_UNSUPPORTED) diff --git a/tests/test_sftp.py b/tests/test_sftp.py index 992c9dc..d5de063 100755 --- a/tests/test_sftp.py +++ b/tests/test_sftp.py @@ -565,7 +565,28 @@ class SFTPTest (unittest.TestCase): f = open(localname, 'r') self.assertEquals(text, f.read(128)) f.close() - + os.unlink(localname) sftp.unlink(FOLDER + '/bunny.txt') + + def test_K_check(self): + """ + verify that file.check() works against our own server. + (it's an sftp extension that we support, and may be the only ones who + support it.) + """ + f = sftp.open(FOLDER + '/kitty.txt', 'w') + f.write('here kitty kitty' * 64) + f.close() + try: + f = sftp.open(FOLDER + '/kitty.txt', 'r') + sum = f.check('sha1') + self.assertEquals('91059CFC6615941378D413CB5ADAF4C5EB293402', paramiko.util.hexify(sum)) + sum = f.check('md5', 0, 512) + self.assertEquals('93DE4788FCA28D471516963A1FE3856A', paramiko.util.hexify(sum)) + sum = f.check('md5', 0, 0, 510) + self.assertEquals('EB3B45B8CD55A0707D99B177544A319F373183D241432BB2157AB9E46358C4AC90370B5CADE5D90336FC1716F90B36D6', + paramiko.util.hexify(sum)) + finally: + sftp.unlink(FOLDER + '/kitty.txt')