From 3fce8abf68f386d18f2fad9f086e0d436af57b3a Mon Sep 17 00:00:00 2001 From: Antoine Brenner Date: Wed, 16 Apr 2014 21:58:03 +0200 Subject: [PATCH 1/4] BufferedFile.read() now returns byte strings instead of text strings It is the right thing to do since we have no idea what encoding the file is in, or even if the file is text data. BufferedFile.readline() is unchanged and returns text strings assuming the file is utf-8 encoded. This should fix the following issue: http://comments.gmane.org/gmane.comp.sysutils.backup.obnam/252 Antoine Brenner Conflicts: sites/www/changelog.rst --- paramiko/file.py | 21 +++++++++++++------ sites/www/changelog.rst | 7 +++++++ tests/test_file.py | 45 +++++++++++++++++++++++------------------ tests/test_sftp.py | 10 ++++----- tests/test_sftp_big.py | 4 ++-- 5 files changed, 54 insertions(+), 33 deletions(-) diff --git a/paramiko/file.py b/paramiko/file.py index f57aa79..725ca5f 100644 --- a/paramiko/file.py +++ b/paramiko/file.py @@ -124,10 +124,15 @@ class BufferedFile (object): file first). If the ``size`` argument is negative or omitted, read all the remaining data in the file. + `'b' mode flag is ignored (self.FLAG_BINARY in self._flags), because + SSH treats all files as binary, since we have no idea what encoding + the file is in, or even if the file is text data. + + :param int size: maximum number of bytes to read :return: - data read from the file (as a `str`), or an empty string if EOF was - encountered immediately + data read from the file (as bytes ), or an empty string + if EOF was encountered immediately """ if self._closed: raise IOError('File is closed') @@ -148,12 +153,12 @@ class BufferedFile (object): result += new_data self._realpos += len(new_data) self._pos += len(new_data) - return result if self._flags & self.FLAG_BINARY else u(result) + return result if size <= len(self._rbuffer): result = self._rbuffer[:size] self._rbuffer = self._rbuffer[size:] self._pos += len(result) - return result if self._flags & self.FLAG_BINARY else u(result) + return result while len(self._rbuffer) < size: read_size = size - len(self._rbuffer) if self._flags & self.FLAG_BUFFERED: @@ -169,7 +174,7 @@ class BufferedFile (object): result = self._rbuffer[:size] self._rbuffer = self._rbuffer[size:] self._pos += len(result) - return result if self._flags & self.FLAG_BINARY else u(result) + return result def readline(self, size=None): """ @@ -186,8 +191,12 @@ class BufferedFile (object): :param int size: maximum length of returned string. :return: - next line of the file (`str`), or an empty string if the end of the + next line of the file, or an empty string if the end of the file has been reached. + If the file was opened in binary 'b' mode: bytes are returned + Else: the encoding of the file is assumed to be utf-8 (default + encoding used by paramiko.py3compat.u) and character strings + (`str`) are returned """ # it's almost silly how complex this function is. if self._closed: diff --git a/sites/www/changelog.rst b/sites/www/changelog.rst index 21ba6e1..67c4f82 100644 --- a/sites/www/changelog.rst +++ b/sites/www/changelog.rst @@ -2,6 +2,13 @@ Changelog ========= +* :bug: BufferedFile.read() now returns byte strings instead of text strings + It is the right thing to do since we have no idea what encoding the file + is in, or even if the file is text data. BufferedFile.readline() is + unchanged and returns text strings assuming the file is utf-8 encoded. + This should fix the following issue: + http://comments.gmane.org/gmane.comp.sysutils.backup.obnam/252 + Thanks Antoine Brenner * :bug:`-` Added self.args for exception classes. Used for unpickling. Related to (`Fabric #986 `_, `Fabric #714 `_). Thanks to Alex diff --git a/tests/test_file.py b/tests/test_file.py index e11d7fd..c6edd7a 100755 --- a/tests/test_file.py +++ b/tests/test_file.py @@ -53,7 +53,7 @@ class BufferedFileTest (unittest.TestCase): def test_1_simple(self): f = LoopbackFile('r') try: - f.write('hi') + f.write(b'hi') self.assertTrue(False, 'no exception on write to read-only file') except: pass @@ -69,7 +69,7 @@ class BufferedFileTest (unittest.TestCase): def test_2_readline(self): f = LoopbackFile('r+U') - f.write('First line.\nSecond line.\r\nThird line.\nFinal line non-terminated.') + f.write(b'First line.\nSecond line.\r\nThird line.\nFinal line non-terminated.') self.assertEqual(f.readline(), 'First line.\n') # universal newline mode should convert this linefeed: self.assertEqual(f.readline(), 'Second line.\n') @@ -93,9 +93,9 @@ class BufferedFileTest (unittest.TestCase): try to trick the linefeed detector. """ f = LoopbackFile('r+U') - f.write('First line.\r') + f.write(b'First line.\r') self.assertEqual(f.readline(), 'First line.\n') - f.write('\nSecond.\r\n') + f.write(b'\nSecond.\r\n') self.assertEqual(f.readline(), 'Second.\n') f.close() self.assertEqual(f.newlines, crlf) @@ -105,7 +105,7 @@ class BufferedFileTest (unittest.TestCase): verify that write buffering is on. """ f = LoopbackFile('r+', 1) - f.write('Complete line.\nIncomplete line.') + f.write(b'Complete line.\nIncomplete line.') self.assertEqual(f.readline(), 'Complete line.\n') self.assertEqual(f.readline(), '') f.write('..\n') @@ -118,12 +118,12 @@ class BufferedFileTest (unittest.TestCase): """ f = LoopbackFile('r+', 512) f.write('Not\nquite\n512 bytes.\n') - self.assertEqual(f.read(1), '') + self.assertEqual(f.read(1), b'') f.flush() - self.assertEqual(f.read(5), 'Not\nq') - self.assertEqual(f.read(10), 'uite\n512 b') - self.assertEqual(f.read(9), 'ytes.\n') - self.assertEqual(f.read(3), '') + self.assertEqual(f.read(5), b'Not\nq') + self.assertEqual(f.read(10), b'uite\n512 b') + self.assertEqual(f.read(9), b'ytes.\n') + self.assertEqual(f.read(3), b'') f.close() def test_6_buffering(self): @@ -131,12 +131,12 @@ class BufferedFileTest (unittest.TestCase): verify that flushing happens automatically on buffer crossing. """ f = LoopbackFile('r+', 16) - f.write('Too small.') - self.assertEqual(f.read(4), '') - f.write(' ') - self.assertEqual(f.read(4), '') - f.write('Enough.') - self.assertEqual(f.read(20), 'Too small. Enough.') + f.write(b'Too small.') + self.assertEqual(f.read(4), b'') + f.write(b' ') + self.assertEqual(f.read(4), b'') + f.write(b'Enough.') + self.assertEqual(f.read(20), b'Too small. Enough.') f.close() def test_7_read_all(self): @@ -144,9 +144,14 @@ class BufferedFileTest (unittest.TestCase): verify that read(-1) returns everything left in the file. """ f = LoopbackFile('r+', 16) - f.write('The first thing you need to do is open your eyes. ') - f.write('Then, you need to close them again.\n') + f.write(b'The first thing you need to do is open your eyes. ') + f.write(b'Then, you need to close them again.\n') s = f.read(-1) - self.assertEqual(s, 'The first thing you need to do is open your eyes. Then, you ' + - 'need to close them again.\n') + self.assertEqual(s, b'The first thing you need to do is open your eyes. Then, you ' + + b'need to close them again.\n') f.close() + +if __name__ == '__main__': + from unittest import main + main() + diff --git a/tests/test_sftp.py b/tests/test_sftp.py index e0534eb..720b821 100755 --- a/tests/test_sftp.py +++ b/tests/test_sftp.py @@ -405,7 +405,7 @@ class SFTPTest (unittest.TestCase): self.assertEqual(sftp.stat(FOLDER + '/testing.txt').st_size, 13) with sftp.open(FOLDER + '/testing.txt', 'r') as f: data = f.read(20) - self.assertEqual(data, 'hello kiddy.\n') + self.assertEqual(data, b'hello kiddy.\n') finally: sftp.remove(FOLDER + '/testing.txt') @@ -466,8 +466,8 @@ class SFTPTest (unittest.TestCase): f.write('?\n') with sftp.open(FOLDER + '/happy.txt', 'r') as f: - self.assertEqual(f.readline(), 'full line?\n') - self.assertEqual(f.read(7), 'partial') + self.assertEqual(f.readline(), u'full line?\n') + self.assertEqual(f.read(7), b'partial') finally: try: sftp.remove(FOLDER + '/happy.txt') @@ -662,8 +662,8 @@ class SFTPTest (unittest.TestCase): fd, localname = mkstemp() os.close(fd) - text = 'All I wanted was a plastic bunny rabbit.\n' - with open(localname, 'w') as f: + text = b'All I wanted was a plastic bunny rabbit.\n' + with open(localname, 'wb') as f: f.write(text) saved_progress = [] diff --git a/tests/test_sftp_big.py b/tests/test_sftp_big.py index 521fbdc..abed27b 100644 --- a/tests/test_sftp_big.py +++ b/tests/test_sftp_big.py @@ -85,7 +85,7 @@ class BigSFTPTest (unittest.TestCase): write a 1MB file with no buffering. """ sftp = get_sftp() - kblob = (1024 * 'x') + kblob = (1024 * b'x') start = time.time() try: with sftp.open('%s/hongry.txt' % FOLDER, 'w') as f: @@ -231,7 +231,7 @@ class BigSFTPTest (unittest.TestCase): without using it, to verify that paramiko doesn't get confused. """ sftp = get_sftp() - kblob = (1024 * 'x') + kblob = (1024 * b'x') try: with sftp.open('%s/hongry.txt' % FOLDER, 'w') as f: f.set_pipelined(True) From 5636381591aa26a9f31efab450b6bfdd6659cbb3 Mon Sep 17 00:00:00 2001 From: Jeff Forcier Date: Thu, 24 Apr 2014 09:33:38 -0700 Subject: [PATCH 2/4] Reword docs/changelog re #315 --- paramiko/file.py | 19 +++++++++---------- sites/www/changelog.rst | 20 +++++++++++++------- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/paramiko/file.py b/paramiko/file.py index 725ca5f..70243e4 100644 --- a/paramiko/file.py +++ b/paramiko/file.py @@ -124,15 +124,14 @@ class BufferedFile (object): file first). If the ``size`` argument is negative or omitted, read all the remaining data in the file. - `'b' mode flag is ignored (self.FLAG_BINARY in self._flags), because - SSH treats all files as binary, since we have no idea what encoding - the file is in, or even if the file is text data. - + ``'b'`` mode flag is ignored (``self.FLAG_BINARY`` in ``self._flags``), + because SSH treats all files as binary, since we have no idea what + encoding the file is in, or even if the file is text data. :param int size: maximum number of bytes to read :return: - data read from the file (as bytes ), or an empty string - if EOF was encountered immediately + data read from the file (as bytes), or an empty string if EOF was + encountered immediately """ if self._closed: raise IOError('File is closed') @@ -193,10 +192,10 @@ class BufferedFile (object): :return: next line of the file, or an empty string if the end of the file has been reached. - If the file was opened in binary 'b' mode: bytes are returned - Else: the encoding of the file is assumed to be utf-8 (default - encoding used by paramiko.py3compat.u) and character strings - (`str`) are returned + + If the file was opened in binary (``'b'``) mode: bytes are returned + Else: the encoding of the file is assumed to be UTF-8 and character + strings (`str`) are returned """ # it's almost silly how complex this function is. if self._closed: diff --git a/sites/www/changelog.rst b/sites/www/changelog.rst index 67c4f82..8a4b0f5 100644 --- a/sites/www/changelog.rst +++ b/sites/www/changelog.rst @@ -2,13 +2,19 @@ Changelog ========= -* :bug: BufferedFile.read() now returns byte strings instead of text strings - It is the right thing to do since we have no idea what encoding the file - is in, or even if the file is text data. BufferedFile.readline() is - unchanged and returns text strings assuming the file is utf-8 encoded. - This should fix the following issue: - http://comments.gmane.org/gmane.comp.sysutils.backup.obnam/252 - Thanks Antoine Brenner +* :bug:`-` `paramiko.file.BufferedFile.read` incorrectly returned text strings + after the Python 3 migration, despite bytes being more appropriate for file + contents (which may be binary or of an unknown encoding.) This has been + addressed. + + .. note:: + `paramiko.file.BufferedFile.readline` continues to return strings, not + bytes, as "lines" only make sense for textual data. It assumes UTF-8 by + default. + + This should fix `this issue raised on the Obnam mailing list + `_. Thanks + to Antoine Brenner for the patch. * :bug:`-` Added self.args for exception classes. Used for unpickling. Related to (`Fabric #986 `_, `Fabric #714 `_). Thanks to Alex From a759a8e8df0127e624d69f0e2c02704a95e0cc0c Mon Sep 17 00:00:00 2001 From: Jeff Forcier Date: Thu, 24 Apr 2014 09:33:48 -0700 Subject: [PATCH 3/4] Fix incorrect monospacing --- sites/www/installing.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sites/www/installing.rst b/sites/www/installing.rst index 3587949..0ca9b15 100644 --- a/sites/www/installing.rst +++ b/sites/www/installing.rst @@ -18,8 +18,8 @@ We currently support **Python 2.5/2.6/2.7**, with support for Python 3 coming soon. Users on Python 2.4 or older are urged to upgrade. Paramiko *may* work on Python 2.4 still, but there is no longer any support guarantee. -Paramiko has two dependencies: the pure-Python ECDSA module `ecdsa`, and the -PyCrypto C extension. `ecdsa` is easily installable from wherever you +Paramiko has two dependencies: the pure-Python ECDSA module ``ecdsa``, and the +PyCrypto C extension. ``ecdsa`` is easily installable from wherever you obtained Paramiko's package; PyCrypto may require more work. Read on for details. From ac4c75471821fa5686c1f15e1b8613fe70639355 Mon Sep 17 00:00:00 2001 From: Jeff Forcier Date: Thu, 24 Apr 2014 09:33:48 -0700 Subject: [PATCH 4/4] Fix incorrect monospacing --- sites/www/installing.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sites/www/installing.rst b/sites/www/installing.rst index 3587949..0ca9b15 100644 --- a/sites/www/installing.rst +++ b/sites/www/installing.rst @@ -18,8 +18,8 @@ We currently support **Python 2.5/2.6/2.7**, with support for Python 3 coming soon. Users on Python 2.4 or older are urged to upgrade. Paramiko *may* work on Python 2.4 still, but there is no longer any support guarantee. -Paramiko has two dependencies: the pure-Python ECDSA module `ecdsa`, and the -PyCrypto C extension. `ecdsa` is easily installable from wherever you +Paramiko has two dependencies: the pure-Python ECDSA module ``ecdsa``, and the +PyCrypto C extension. ``ecdsa`` is easily installable from wherever you obtained Paramiko's package; PyCrypto may require more work. Read on for details.