[project @ Arch-1:robey@lag.net--2005-master-shake%paramiko--dev--1--patch-46]

add SFTPFile.check and server support (and test) -- it's an sftp extension that allows a client to retrieve the hash of part or all of a file without downloading it.  we're probably the only ones who implement it yet
This commit is contained in:
Robey Pointer 2005-07-18 05:43:44 +00:00
parent e9ccd7ea20
commit d8ee5e2a4a
5 changed files with 157 additions and 5 deletions

5
README
View File

@ -238,12 +238,11 @@ v0.9 FEAROW
* ctr forms of ciphers are missing (blowfish-ctr, aes128-ctr, aes256-ctr)
* could channels have a __del__ if i used weak pointers?
* cool sftp extension: retreive MD5 or SHA1 of section of a file
* SFTPClient.from_url('sftp://robey@arch.lag.net/folder/filename', 'r+')
keep cache of opened sftp clients by (host, port, username)
how to auth? how to check host key?
... maybe just a util function that parses the url and returns components
* sftp protocol 6 support (ugh....) -- once it settles down more
* why are big files so slow to transfer? profiling needed...
* what is psyco?

View File

@ -110,7 +110,12 @@ class BaseSFTP (object):
return version
def _send_server_version(self):
self._send_packet(CMD_VERSION, struct.pack('>I', _VERSION))
# advertise that we support "check-file"
extension_pairs = [ 'check-file', 'md5,sha1' ]
msg = Message()
msg.add_int(_VERSION)
msg.add(*extension_pairs)
self._send_packet(CMD_VERSION, str(msg))
t, data = self._read_packet()
if t != CMD_INIT:
raise SFTPError('Incompatible sftp protocol')

View File

@ -130,6 +130,60 @@ class SFTPFile (BufferedFile):
if t != CMD_ATTRS:
raise SFTPError('Expected attributes')
return SFTPAttributes._from_msg(msg)
def check(self, hash_algorithm, offset=0, length=0, block_size=0):
"""
Ask the server for a hash of a section of this file. This can be used
to verify a successful upload or download, or for various rsync-like
operations.
The file is hashed from C{offset}, for C{length} bytes. If C{length}
is 0, the remainder of the file is hashed. Thus, if both C{offset}
and C{length} are zero, the entire file is hashed.
Normally, C{block_size} will be 0 (the default), and this method will
return a byte string representing the requested hash (for example, a
string of length 16 for MD5, or 20 for SHA-1). If a non-zero
C{block_size} is given, each chunk of the file (from C{offset} to
C{offset + length}) of C{block_size} bytes is computed as a separate
hash. The hash results are all concatenated and returned as a single
string.
For example, C{check('sha1', 0, 1024, 512)} will return a string of
length 40. The first 20 bytes will be the SHA-1 of the first 512 bytes
of the file, and the last 20 bytes will be the SHA-1 of the next 512
bytes.
@param hash_algorithm: the name of the hash algorithm to use (normally
C{"sha1"} or C{"md5"})
@type hash_algorithm: str
@param offset: offset into the file to begin hashing (0 means to start
from the beginning)
@type offset: int or long
@param length: number of bytes to hash (0 means continue to the end of
the file)
@type length: int or long
@param block_size: number of bytes to hash per result (must not be less
than 256; 0 means to compute only one hash of the entire segment)
@type block_size: int
@return: string of bytes representing the hash of each block,
concatenated together
@rtype: str
@note: Many (most?) servers don't support this extension yet.
@raise IOError: if the server doesn't support the "check-file"
extension, or possibly doesn't support the hash algorithm
requested
@since: 1.4
"""
t, msg = self.sftp._request(CMD_EXTENDED, 'check-file', self.handle,
hash_algorithm, long(offset), long(length), block_size)
ext = msg.get_string()
alg = msg.get_string()
data = msg.get_remainder()
return data
### internals...

View File

@ -23,6 +23,7 @@ Server-mode SFTP support.
"""
import os, errno
from Crypto.Hash import MD5, SHA
from common import *
from server import SubsystemHandler
from sftp import *
@ -30,6 +31,13 @@ from sftp_si import *
from sftp_attr import *
# known hash algorithms for the "check-file" extension
_hash_class = {
'sha1': SHA,
'md5': MD5,
}
class SFTPServer (BaseSFTP, SubsystemHandler):
"""
Server-side SFTP subsystem support. Since this is a L{SubsystemHandler},
@ -204,6 +212,65 @@ class SFTPServer (BaseSFTP, SubsystemHandler):
attr._pack(msg)
self._send_packet(CMD_NAME, str(msg))
def _check_file(self, request_number, msg):
# this extension actually comes from v6 protocol, but since it's an
# extension, i feel like we can reasonably support it backported.
# it's very useful for verifying uploaded files or checking for
# rsync-like differences between local and remote files.
handle = msg.get_string()
alg_list = msg.get_list()
start = msg.get_int64()
length = msg.get_int64()
block_size = msg.get_int()
if not self.file_table.has_key(handle):
self._send_status(request_number, SFTP_BAD_MESSAGE, 'Invalid handle')
return
f = self.file_table[handle]
for x in alg_list:
if x in _hash_class:
algname = x
alg = _hash_class[x]
break
else:
self._send_status(request_number, SFTP_FAILURE, 'No supported hash types found')
return
if length == 0:
st = f.stat()
if not issubclass(type(st), SFTPAttributes):
self._send_status(request_number, st, 'Unable to stat file')
return
length = st.st_size - start
if block_size == 0:
block_size = length
if block_size < 256:
self._send_status(request_number, SFTP_FAILURE, 'Block size too small')
return
sum = ''
offset = start
while offset < start + length:
blocklen = min(block_size, start + length - offset)
# don't try to read more than about 64KB at a time
chunklen = min(blocklen, 65536)
count = 0
hash = alg.new()
while count < blocklen:
data = f.read(offset, chunklen)
if not type(data) is str:
self._send_status(request_number, data, 'Unable to hash file')
return
hash.update(data)
count += len(data)
offset += count
sum += hash.digest()
msg = Message()
msg.add_int(request_number)
msg.add_string('check-file')
msg.add_string(algname)
msg.add_bytes(sum)
self._send_packet(CMD_EXTENDED_REPLY, str(msg))
def _convert_pflags(self, pflags):
"convert SFTP-style open() flags to python's os.open() flags"
if (pflags & SFTP_FLAG_READ) and (pflags & SFTP_FLAG_WRITE):
@ -340,6 +407,12 @@ class SFTPServer (BaseSFTP, SubsystemHandler):
path = msg.get_string()
rpath = self.server.canonicalize(path)
self._response(request_number, CMD_NAME, 1, rpath, '', SFTPAttributes())
elif t == CMD_EXTENDED:
tag = msg.get_string()
if tag == 'check-file':
self._check_file(request_number, msg)
else:
send._send_status(request_number, SFTP_OP_UNSUPPORTED)
else:
self._send_status(request_number, SFTP_OP_UNSUPPORTED)

View File

@ -565,7 +565,28 @@ class SFTPTest (unittest.TestCase):
f = open(localname, 'r')
self.assertEquals(text, f.read(128))
f.close()
os.unlink(localname)
sftp.unlink(FOLDER + '/bunny.txt')
def test_K_check(self):
"""
verify that file.check() works against our own server.
(it's an sftp extension that we support, and may be the only ones who
support it.)
"""
f = sftp.open(FOLDER + '/kitty.txt', 'w')
f.write('here kitty kitty' * 64)
f.close()
try:
f = sftp.open(FOLDER + '/kitty.txt', 'r')
sum = f.check('sha1')
self.assertEquals('91059CFC6615941378D413CB5ADAF4C5EB293402', paramiko.util.hexify(sum))
sum = f.check('md5', 0, 512)
self.assertEquals('93DE4788FCA28D471516963A1FE3856A', paramiko.util.hexify(sum))
sum = f.check('md5', 0, 0, 510)
self.assertEquals('EB3B45B8CD55A0707D99B177544A319F373183D241432BB2157AB9E46358C4AC90370B5CADE5D90336FC1716F90B36D6',
paramiko.util.hexify(sum))
finally:
sftp.unlink(FOLDER + '/kitty.txt')