[project @ Arch-1:robey@lag.net--2005-master-shake%paramiko--dev--1--patch-46]

add SFTPFile.check and server support (and test) -- it's an sftp extension that allows a client to retrieve the hash of part or all of a file without downloading it.  we're probably the only ones who implement it yet
This commit is contained in:
Robey Pointer 2005-07-18 05:43:44 +00:00
parent e9ccd7ea20
commit d8ee5e2a4a
5 changed files with 157 additions and 5 deletions

5
README
View File

@ -238,12 +238,11 @@ v0.9 FEAROW
* ctr forms of ciphers are missing (blowfish-ctr, aes128-ctr, aes256-ctr) * ctr forms of ciphers are missing (blowfish-ctr, aes128-ctr, aes256-ctr)
* could channels have a __del__ if i used weak pointers?
* cool sftp extension: retreive MD5 or SHA1 of section of a file
* SFTPClient.from_url('sftp://robey@arch.lag.net/folder/filename', 'r+') * SFTPClient.from_url('sftp://robey@arch.lag.net/folder/filename', 'r+')
keep cache of opened sftp clients by (host, port, username) keep cache of opened sftp clients by (host, port, username)
how to auth? how to check host key? how to auth? how to check host key?
... maybe just a util function that parses the url and returns components
* sftp protocol 6 support (ugh....) -- once it settles down more
* why are big files so slow to transfer? profiling needed... * why are big files so slow to transfer? profiling needed...
* what is psyco? * what is psyco?

View File

@ -110,7 +110,12 @@ class BaseSFTP (object):
return version return version
def _send_server_version(self): def _send_server_version(self):
self._send_packet(CMD_VERSION, struct.pack('>I', _VERSION)) # advertise that we support "check-file"
extension_pairs = [ 'check-file', 'md5,sha1' ]
msg = Message()
msg.add_int(_VERSION)
msg.add(*extension_pairs)
self._send_packet(CMD_VERSION, str(msg))
t, data = self._read_packet() t, data = self._read_packet()
if t != CMD_INIT: if t != CMD_INIT:
raise SFTPError('Incompatible sftp protocol') raise SFTPError('Incompatible sftp protocol')

View File

@ -130,6 +130,60 @@ class SFTPFile (BufferedFile):
if t != CMD_ATTRS: if t != CMD_ATTRS:
raise SFTPError('Expected attributes') raise SFTPError('Expected attributes')
return SFTPAttributes._from_msg(msg) return SFTPAttributes._from_msg(msg)
def check(self, hash_algorithm, offset=0, length=0, block_size=0):
"""
Ask the server for a hash of a section of this file. This can be used
to verify a successful upload or download, or for various rsync-like
operations.
The file is hashed from C{offset}, for C{length} bytes. If C{length}
is 0, the remainder of the file is hashed. Thus, if both C{offset}
and C{length} are zero, the entire file is hashed.
Normally, C{block_size} will be 0 (the default), and this method will
return a byte string representing the requested hash (for example, a
string of length 16 for MD5, or 20 for SHA-1). If a non-zero
C{block_size} is given, each chunk of the file (from C{offset} to
C{offset + length}) of C{block_size} bytes is computed as a separate
hash. The hash results are all concatenated and returned as a single
string.
For example, C{check('sha1', 0, 1024, 512)} will return a string of
length 40. The first 20 bytes will be the SHA-1 of the first 512 bytes
of the file, and the last 20 bytes will be the SHA-1 of the next 512
bytes.
@param hash_algorithm: the name of the hash algorithm to use (normally
C{"sha1"} or C{"md5"})
@type hash_algorithm: str
@param offset: offset into the file to begin hashing (0 means to start
from the beginning)
@type offset: int or long
@param length: number of bytes to hash (0 means continue to the end of
the file)
@type length: int or long
@param block_size: number of bytes to hash per result (must not be less
than 256; 0 means to compute only one hash of the entire segment)
@type block_size: int
@return: string of bytes representing the hash of each block,
concatenated together
@rtype: str
@note: Many (most?) servers don't support this extension yet.
@raise IOError: if the server doesn't support the "check-file"
extension, or possibly doesn't support the hash algorithm
requested
@since: 1.4
"""
t, msg = self.sftp._request(CMD_EXTENDED, 'check-file', self.handle,
hash_algorithm, long(offset), long(length), block_size)
ext = msg.get_string()
alg = msg.get_string()
data = msg.get_remainder()
return data
### internals... ### internals...

View File

@ -23,6 +23,7 @@ Server-mode SFTP support.
""" """
import os, errno import os, errno
from Crypto.Hash import MD5, SHA
from common import * from common import *
from server import SubsystemHandler from server import SubsystemHandler
from sftp import * from sftp import *
@ -30,6 +31,13 @@ from sftp_si import *
from sftp_attr import * from sftp_attr import *
# known hash algorithms for the "check-file" extension
_hash_class = {
'sha1': SHA,
'md5': MD5,
}
class SFTPServer (BaseSFTP, SubsystemHandler): class SFTPServer (BaseSFTP, SubsystemHandler):
""" """
Server-side SFTP subsystem support. Since this is a L{SubsystemHandler}, Server-side SFTP subsystem support. Since this is a L{SubsystemHandler},
@ -204,6 +212,65 @@ class SFTPServer (BaseSFTP, SubsystemHandler):
attr._pack(msg) attr._pack(msg)
self._send_packet(CMD_NAME, str(msg)) self._send_packet(CMD_NAME, str(msg))
def _check_file(self, request_number, msg):
# this extension actually comes from v6 protocol, but since it's an
# extension, i feel like we can reasonably support it backported.
# it's very useful for verifying uploaded files or checking for
# rsync-like differences between local and remote files.
handle = msg.get_string()
alg_list = msg.get_list()
start = msg.get_int64()
length = msg.get_int64()
block_size = msg.get_int()
if not self.file_table.has_key(handle):
self._send_status(request_number, SFTP_BAD_MESSAGE, 'Invalid handle')
return
f = self.file_table[handle]
for x in alg_list:
if x in _hash_class:
algname = x
alg = _hash_class[x]
break
else:
self._send_status(request_number, SFTP_FAILURE, 'No supported hash types found')
return
if length == 0:
st = f.stat()
if not issubclass(type(st), SFTPAttributes):
self._send_status(request_number, st, 'Unable to stat file')
return
length = st.st_size - start
if block_size == 0:
block_size = length
if block_size < 256:
self._send_status(request_number, SFTP_FAILURE, 'Block size too small')
return
sum = ''
offset = start
while offset < start + length:
blocklen = min(block_size, start + length - offset)
# don't try to read more than about 64KB at a time
chunklen = min(blocklen, 65536)
count = 0
hash = alg.new()
while count < blocklen:
data = f.read(offset, chunklen)
if not type(data) is str:
self._send_status(request_number, data, 'Unable to hash file')
return
hash.update(data)
count += len(data)
offset += count
sum += hash.digest()
msg = Message()
msg.add_int(request_number)
msg.add_string('check-file')
msg.add_string(algname)
msg.add_bytes(sum)
self._send_packet(CMD_EXTENDED_REPLY, str(msg))
def _convert_pflags(self, pflags): def _convert_pflags(self, pflags):
"convert SFTP-style open() flags to python's os.open() flags" "convert SFTP-style open() flags to python's os.open() flags"
if (pflags & SFTP_FLAG_READ) and (pflags & SFTP_FLAG_WRITE): if (pflags & SFTP_FLAG_READ) and (pflags & SFTP_FLAG_WRITE):
@ -340,6 +407,12 @@ class SFTPServer (BaseSFTP, SubsystemHandler):
path = msg.get_string() path = msg.get_string()
rpath = self.server.canonicalize(path) rpath = self.server.canonicalize(path)
self._response(request_number, CMD_NAME, 1, rpath, '', SFTPAttributes()) self._response(request_number, CMD_NAME, 1, rpath, '', SFTPAttributes())
elif t == CMD_EXTENDED:
tag = msg.get_string()
if tag == 'check-file':
self._check_file(request_number, msg)
else:
send._send_status(request_number, SFTP_OP_UNSUPPORTED)
else: else:
self._send_status(request_number, SFTP_OP_UNSUPPORTED) self._send_status(request_number, SFTP_OP_UNSUPPORTED)

View File

@ -565,7 +565,28 @@ class SFTPTest (unittest.TestCase):
f = open(localname, 'r') f = open(localname, 'r')
self.assertEquals(text, f.read(128)) self.assertEquals(text, f.read(128))
f.close() f.close()
os.unlink(localname) os.unlink(localname)
sftp.unlink(FOLDER + '/bunny.txt') sftp.unlink(FOLDER + '/bunny.txt')
def test_K_check(self):
"""
verify that file.check() works against our own server.
(it's an sftp extension that we support, and may be the only ones who
support it.)
"""
f = sftp.open(FOLDER + '/kitty.txt', 'w')
f.write('here kitty kitty' * 64)
f.close()
try:
f = sftp.open(FOLDER + '/kitty.txt', 'r')
sum = f.check('sha1')
self.assertEquals('91059CFC6615941378D413CB5ADAF4C5EB293402', paramiko.util.hexify(sum))
sum = f.check('md5', 0, 512)
self.assertEquals('93DE4788FCA28D471516963A1FE3856A', paramiko.util.hexify(sum))
sum = f.check('md5', 0, 0, 510)
self.assertEquals('EB3B45B8CD55A0707D99B177544A319F373183D241432BB2157AB9E46358C4AC90370B5CADE5D90336FC1716F90B36D6',
paramiko.util.hexify(sum))
finally:
sftp.unlink(FOLDER + '/kitty.txt')