[project @ robey@lag.net-20060726000907-b9a2d46eecc64cec]

allow prefetch + readv to occur at the same time (even though it will be really inefficient). instead of a moving pointer, use the prefetched buffers as an indication of what we've downloaded so far. break up large readv requests into the max packet size. add 2 more unit tests to test this stuff.
2006-07-25 17:09:07 -07:00 · 2006-07-25 17:09:07 -07:00 · c24db3e38c
parent 787b0b461d
commit c24db3e38c
2 changed files with 111 additions and 27 deletions
--- a/paramiko/sftp_file.py
+++ b/paramiko/sftp_file.py
@ -46,7 +46,6 @@ class SFTPFile (BufferedFile):
        self.pipelined = False
        self._prefetching = False
        self._prefetch_done = False
        self._prefetch_so_far = 0
        self._prefetch_data = {}
        self._prefetch_reads = []
        self._saved_exception = None
@ -84,37 +83,45 @@ class SFTPFile (BufferedFile):
            # may have outlived the Transport connection
            pass
    def _data_in_prefetch_buffers(self, offset):
        """
        if a block of data is present in the prefetch buffers, at the given
        offset, return the offset of the relevant prefetch buffer.  otherwise,
        return None.  this guarantees nothing about the number of bytes
        collected in the prefetch buffer so far.
        """
        k = [i for i in self._prefetch_data.keys() if i <= offset]
        if len(k) == 0:
            return None
        index = max(k)
        buf_offset = offset - index
        if buf_offset >= len(self._prefetch_data[index]):
            # it's not here
            return None
        return index
    def _read_prefetch(self, size):
        """
        read data out of the prefetch buffer, if possible.  if the data isn't
        in the buffer, return None.  otherwise, behaves like a normal read.
        """
        # while not closed, and haven't fetched past the current position, and haven't reached EOF...
-        while (self._prefetch_so_far <= self._realpos) and not self._closed:
+        while not self._prefetch_done and not self._closed:
-            if self._prefetch_done:
+            offset = self._data_in_prefetch_buffers(self._realpos)
-                return None
+            if offset is not None:
                break
            self.sftp._read_response()
            self._check_exception()
-        k = self._prefetch_data.keys()
+        if offset is None:
        if len(k) == 0:
            self._prefetching = False
            return ''
        # find largest offset < realpos
        pos_list = [i for i in k if i <= self._realpos]
        if len(pos_list) == 0:
            return None
-        index = max(pos_list)
+        prefetch = self._prefetch_data[offset]
-        prefetch = self._prefetch_data[index]
+        del self._prefetch_data[offset]
        del self._prefetch_data[index]
-        buf_offset = self._realpos - index
+        buf_offset = self._realpos - offset
        if buf_offset > 0:
-            self._prefetch_data[index] = prefetch[:buf_offset]
+            self._prefetch_data[offset] = prefetch[:buf_offset]
            prefetch = prefetch[buf_offset:]
        if buf_offset >= len(prefetch):
            # it's not here.
            return None
        if size < len(prefetch):
            self._prefetch_data[self._realpos + size] = prefetch[size:]
            prefetch = prefetch[:size]
@ -384,10 +391,22 @@ class SFTPFile (BufferedFile):
        # put the offsets in order, since we depend on that for determining
        # when the reads have finished.
        self.sftp._log(DEBUG, 'readv(%s, %r)' % (util.hexify(self.handle), chunks))
        # FIXME: if prefetch() was already called (not readv), don't prefetch.
        ordered_chunks = list(chunks)
        ordered_chunks.sort(lambda x, y: cmp(x[0], y[0]))
-        self._start_prefetch(ordered_chunks)
+
        # break up anything larger than the max read size
        if len([size for offset, size in ordered_chunks if size > self.MAX_REQUEST_SIZE]) > 0:
            read_chunks = []
            for offset, size in ordered_chunks:
                while size > 0:
                    chunk_size = min(size, self.MAX_REQUEST_SIZE)
                    read_chunks.append((offset, chunk_size))
                    offset += chunk_size
                    size -= chunk_size
        else:
            read_chunks = ordered_chunks
        self._start_prefetch(read_chunks)
        # now we can just devolve to a bunch of read()s :)
        for x in chunks:
            self.seek(x[0])
@ -406,8 +425,6 @@ class SFTPFile (BufferedFile):
    def _start_prefetch(self, chunks):
        self._prefetching = True
        self._prefetch_done = False
        self._prefetch_so_far = chunks[0][0]
        self._prefetch_data = {}
        self._prefetch_reads.extend(chunks)
        t = threading.Thread(target=self._prefetch_thread, args=(chunks,))
@ -434,7 +451,6 @@ class SFTPFile (BufferedFile):
        offset, length = self._prefetch_reads.pop(0)
        assert length == len(data)
        self._prefetch_data[offset] = data
        self._prefetch_so_far = offset + length
        if len(self._prefetch_reads) == 0:
            self._prefetch_done = True
--- a/tests/test_sftp_big.py
+++ b/tests/test_sftp_big.py
@ -272,8 +272,76 @@ class BigSFTPTest (unittest.TestCase):
            sys.stderr.write(' ')
        finally:
            sftp.remove('%s/hongry.txt' % FOLDER)
-        
+    
-    def test_7_big_file_big_buffer(self):
+    def test_7_prefetch_readv(self):
        """
        verify that prefetch and readv don't conflict with each other.
        """
        sftp = get_sftp()
        kblob = ''.join([struct.pack('>H', n) for n in xrange(512)])
        try:
            f = sftp.open('%s/hongry.txt' % FOLDER, 'w')
            f.set_pipelined(True)
            for n in range(1024):
                f.write(kblob)
                if n % 128 == 0:
                    sys.stderr.write('.')
            f.close()
            sys.stderr.write(' ')
            self.assertEqual(sftp.stat('%s/hongry.txt' % FOLDER).st_size, 1024 * 1024)
            f = sftp.open('%s/hongry.txt' % FOLDER, 'r')
            f.prefetch()
            data = f.read(1024)
            self.assertEqual(data, kblob)
            chunk_size = 793
            base_offset = 512 * 1024
            k2blob = kblob + kblob
            chunks = [(base_offset + (chunk_size * i), chunk_size) for i in range(20)]
            for data in f.readv(chunks):
                offset = base_offset % 1024
                self.assertEqual(chunk_size, len(data))
                self.assertEqual(k2blob[offset:offset + chunk_size], data)
                base_offset += chunk_size
            f.close()
            sys.stderr.write(' ')
        finally:
            sftp.remove('%s/hongry.txt' % FOLDER)
    def test_8_large_readv(self):
        """
        verify that a very large readv is broken up correctly and still
        returned as a single blob.
        """
        sftp = get_sftp()
        kblob = ''.join([struct.pack('>H', n) for n in xrange(512)])
        try:
            f = sftp.open('%s/hongry.txt' % FOLDER, 'w')
            f.set_pipelined(True)
            for n in range(1024):
                f.write(kblob)
                if n % 128 == 0:
                    sys.stderr.write('.')
            f.close()
            sys.stderr.write(' ')
            self.assertEqual(sftp.stat('%s/hongry.txt' % FOLDER).st_size, 1024 * 1024)
            f = sftp.open('%s/hongry.txt' % FOLDER, 'r')
            data = list(f.readv([(23 * 1024, 128 * 1024)]))
            self.assertEqual(1, len(data))
            data = data[0]
            self.assertEqual(128 * 1024, len(data))
            f.close()
            sys.stderr.write(' ')
        finally:
            sftp.remove('%s/hongry.txt' % FOLDER)
    def test_9_big_file_big_buffer(self):
        """
        write a 1MB file, with no linefeeds, and a big buffer.
        """
@ -288,7 +356,7 @@ class BigSFTPTest (unittest.TestCase):
        finally:
            sftp.remove('%s/hongry.txt' % FOLDER)
-    def test_8_big_file_renegotiate(self):
+    def test_A_big_file_renegotiate(self):
        """
        write a 1MB file, forcing key renegotiation in the middle.
        """