BufferedFile.read() now returns byte strings instead of text strings

It is the right thing to do since we have no idea what encoding the file
is in, or even if the file is text data. BufferedFile.readline() is
unchanged and returns text strings assuming the file is utf-8 encoded.
This should fix the following issue:
http://comments.gmane.org/gmane.comp.sysutils.backup.obnam/252

Antoine Brenner

Conflicts:
	sites/www/changelog.rst
This commit is contained in:
Antoine Brenner 2014-04-16 21:58:03 +02:00 committed by Jeff Forcier
parent 9b2388cad5
commit 3fce8abf68
5 changed files with 54 additions and 33 deletions

View File

@ -124,10 +124,15 @@ class BufferedFile (object):
file first). If the ``size`` argument is negative or omitted, read all file first). If the ``size`` argument is negative or omitted, read all
the remaining data in the file. the remaining data in the file.
`'b' mode flag is ignored (self.FLAG_BINARY in self._flags), because
SSH treats all files as binary, since we have no idea what encoding
the file is in, or even if the file is text data.
:param int size: maximum number of bytes to read :param int size: maximum number of bytes to read
:return: :return:
data read from the file (as a `str`), or an empty string if EOF was data read from the file (as bytes ), or an empty string
encountered immediately if EOF was encountered immediately
""" """
if self._closed: if self._closed:
raise IOError('File is closed') raise IOError('File is closed')
@ -148,12 +153,12 @@ class BufferedFile (object):
result += new_data result += new_data
self._realpos += len(new_data) self._realpos += len(new_data)
self._pos += len(new_data) self._pos += len(new_data)
return result if self._flags & self.FLAG_BINARY else u(result) return result
if size <= len(self._rbuffer): if size <= len(self._rbuffer):
result = self._rbuffer[:size] result = self._rbuffer[:size]
self._rbuffer = self._rbuffer[size:] self._rbuffer = self._rbuffer[size:]
self._pos += len(result) self._pos += len(result)
return result if self._flags & self.FLAG_BINARY else u(result) return result
while len(self._rbuffer) < size: while len(self._rbuffer) < size:
read_size = size - len(self._rbuffer) read_size = size - len(self._rbuffer)
if self._flags & self.FLAG_BUFFERED: if self._flags & self.FLAG_BUFFERED:
@ -169,7 +174,7 @@ class BufferedFile (object):
result = self._rbuffer[:size] result = self._rbuffer[:size]
self._rbuffer = self._rbuffer[size:] self._rbuffer = self._rbuffer[size:]
self._pos += len(result) self._pos += len(result)
return result if self._flags & self.FLAG_BINARY else u(result) return result
def readline(self, size=None): def readline(self, size=None):
""" """
@ -186,8 +191,12 @@ class BufferedFile (object):
:param int size: maximum length of returned string. :param int size: maximum length of returned string.
:return: :return:
next line of the file (`str`), or an empty string if the end of the next line of the file, or an empty string if the end of the
file has been reached. file has been reached.
If the file was opened in binary 'b' mode: bytes are returned
Else: the encoding of the file is assumed to be utf-8 (default
encoding used by paramiko.py3compat.u) and character strings
(`str`) are returned
""" """
# it's almost silly how complex this function is. # it's almost silly how complex this function is.
if self._closed: if self._closed:

View File

@ -2,6 +2,13 @@
Changelog Changelog
========= =========
* :bug: BufferedFile.read() now returns byte strings instead of text strings
It is the right thing to do since we have no idea what encoding the file
is in, or even if the file is text data. BufferedFile.readline() is
unchanged and returns text strings assuming the file is utf-8 encoded.
This should fix the following issue:
http://comments.gmane.org/gmane.comp.sysutils.backup.obnam/252
Thanks Antoine Brenner
* :bug:`-` Added self.args for exception classes. Used for unpickling. Related * :bug:`-` Added self.args for exception classes. Used for unpickling. Related
to (`Fabric #986 <https://github.com/fabric/fabric/issues/986>`_, `Fabric to (`Fabric #986 <https://github.com/fabric/fabric/issues/986>`_, `Fabric
#714 <https://github.com/fabric/fabric/issues/714>`_). Thanks to Alex #714 <https://github.com/fabric/fabric/issues/714>`_). Thanks to Alex

View File

@ -53,7 +53,7 @@ class BufferedFileTest (unittest.TestCase):
def test_1_simple(self): def test_1_simple(self):
f = LoopbackFile('r') f = LoopbackFile('r')
try: try:
f.write('hi') f.write(b'hi')
self.assertTrue(False, 'no exception on write to read-only file') self.assertTrue(False, 'no exception on write to read-only file')
except: except:
pass pass
@ -69,7 +69,7 @@ class BufferedFileTest (unittest.TestCase):
def test_2_readline(self): def test_2_readline(self):
f = LoopbackFile('r+U') f = LoopbackFile('r+U')
f.write('First line.\nSecond line.\r\nThird line.\nFinal line non-terminated.') f.write(b'First line.\nSecond line.\r\nThird line.\nFinal line non-terminated.')
self.assertEqual(f.readline(), 'First line.\n') self.assertEqual(f.readline(), 'First line.\n')
# universal newline mode should convert this linefeed: # universal newline mode should convert this linefeed:
self.assertEqual(f.readline(), 'Second line.\n') self.assertEqual(f.readline(), 'Second line.\n')
@ -93,9 +93,9 @@ class BufferedFileTest (unittest.TestCase):
try to trick the linefeed detector. try to trick the linefeed detector.
""" """
f = LoopbackFile('r+U') f = LoopbackFile('r+U')
f.write('First line.\r') f.write(b'First line.\r')
self.assertEqual(f.readline(), 'First line.\n') self.assertEqual(f.readline(), 'First line.\n')
f.write('\nSecond.\r\n') f.write(b'\nSecond.\r\n')
self.assertEqual(f.readline(), 'Second.\n') self.assertEqual(f.readline(), 'Second.\n')
f.close() f.close()
self.assertEqual(f.newlines, crlf) self.assertEqual(f.newlines, crlf)
@ -105,7 +105,7 @@ class BufferedFileTest (unittest.TestCase):
verify that write buffering is on. verify that write buffering is on.
""" """
f = LoopbackFile('r+', 1) f = LoopbackFile('r+', 1)
f.write('Complete line.\nIncomplete line.') f.write(b'Complete line.\nIncomplete line.')
self.assertEqual(f.readline(), 'Complete line.\n') self.assertEqual(f.readline(), 'Complete line.\n')
self.assertEqual(f.readline(), '') self.assertEqual(f.readline(), '')
f.write('..\n') f.write('..\n')
@ -118,12 +118,12 @@ class BufferedFileTest (unittest.TestCase):
""" """
f = LoopbackFile('r+', 512) f = LoopbackFile('r+', 512)
f.write('Not\nquite\n512 bytes.\n') f.write('Not\nquite\n512 bytes.\n')
self.assertEqual(f.read(1), '') self.assertEqual(f.read(1), b'')
f.flush() f.flush()
self.assertEqual(f.read(5), 'Not\nq') self.assertEqual(f.read(5), b'Not\nq')
self.assertEqual(f.read(10), 'uite\n512 b') self.assertEqual(f.read(10), b'uite\n512 b')
self.assertEqual(f.read(9), 'ytes.\n') self.assertEqual(f.read(9), b'ytes.\n')
self.assertEqual(f.read(3), '') self.assertEqual(f.read(3), b'')
f.close() f.close()
def test_6_buffering(self): def test_6_buffering(self):
@ -131,12 +131,12 @@ class BufferedFileTest (unittest.TestCase):
verify that flushing happens automatically on buffer crossing. verify that flushing happens automatically on buffer crossing.
""" """
f = LoopbackFile('r+', 16) f = LoopbackFile('r+', 16)
f.write('Too small.') f.write(b'Too small.')
self.assertEqual(f.read(4), '') self.assertEqual(f.read(4), b'')
f.write(' ') f.write(b' ')
self.assertEqual(f.read(4), '') self.assertEqual(f.read(4), b'')
f.write('Enough.') f.write(b'Enough.')
self.assertEqual(f.read(20), 'Too small. Enough.') self.assertEqual(f.read(20), b'Too small. Enough.')
f.close() f.close()
def test_7_read_all(self): def test_7_read_all(self):
@ -144,9 +144,14 @@ class BufferedFileTest (unittest.TestCase):
verify that read(-1) returns everything left in the file. verify that read(-1) returns everything left in the file.
""" """
f = LoopbackFile('r+', 16) f = LoopbackFile('r+', 16)
f.write('The first thing you need to do is open your eyes. ') f.write(b'The first thing you need to do is open your eyes. ')
f.write('Then, you need to close them again.\n') f.write(b'Then, you need to close them again.\n')
s = f.read(-1) s = f.read(-1)
self.assertEqual(s, 'The first thing you need to do is open your eyes. Then, you ' + self.assertEqual(s, b'The first thing you need to do is open your eyes. Then, you ' +
'need to close them again.\n') b'need to close them again.\n')
f.close() f.close()
if __name__ == '__main__':
from unittest import main
main()

View File

@ -405,7 +405,7 @@ class SFTPTest (unittest.TestCase):
self.assertEqual(sftp.stat(FOLDER + '/testing.txt').st_size, 13) self.assertEqual(sftp.stat(FOLDER + '/testing.txt').st_size, 13)
with sftp.open(FOLDER + '/testing.txt', 'r') as f: with sftp.open(FOLDER + '/testing.txt', 'r') as f:
data = f.read(20) data = f.read(20)
self.assertEqual(data, 'hello kiddy.\n') self.assertEqual(data, b'hello kiddy.\n')
finally: finally:
sftp.remove(FOLDER + '/testing.txt') sftp.remove(FOLDER + '/testing.txt')
@ -466,8 +466,8 @@ class SFTPTest (unittest.TestCase):
f.write('?\n') f.write('?\n')
with sftp.open(FOLDER + '/happy.txt', 'r') as f: with sftp.open(FOLDER + '/happy.txt', 'r') as f:
self.assertEqual(f.readline(), 'full line?\n') self.assertEqual(f.readline(), u'full line?\n')
self.assertEqual(f.read(7), 'partial') self.assertEqual(f.read(7), b'partial')
finally: finally:
try: try:
sftp.remove(FOLDER + '/happy.txt') sftp.remove(FOLDER + '/happy.txt')
@ -662,8 +662,8 @@ class SFTPTest (unittest.TestCase):
fd, localname = mkstemp() fd, localname = mkstemp()
os.close(fd) os.close(fd)
text = 'All I wanted was a plastic bunny rabbit.\n' text = b'All I wanted was a plastic bunny rabbit.\n'
with open(localname, 'w') as f: with open(localname, 'wb') as f:
f.write(text) f.write(text)
saved_progress = [] saved_progress = []

View File

@ -85,7 +85,7 @@ class BigSFTPTest (unittest.TestCase):
write a 1MB file with no buffering. write a 1MB file with no buffering.
""" """
sftp = get_sftp() sftp = get_sftp()
kblob = (1024 * 'x') kblob = (1024 * b'x')
start = time.time() start = time.time()
try: try:
with sftp.open('%s/hongry.txt' % FOLDER, 'w') as f: with sftp.open('%s/hongry.txt' % FOLDER, 'w') as f:
@ -231,7 +231,7 @@ class BigSFTPTest (unittest.TestCase):
without using it, to verify that paramiko doesn't get confused. without using it, to verify that paramiko doesn't get confused.
""" """
sftp = get_sftp() sftp = get_sftp()
kblob = (1024 * 'x') kblob = (1024 * b'x')
try: try:
with sftp.open('%s/hongry.txt' % FOLDER, 'w') as f: with sftp.open('%s/hongry.txt' % FOLDER, 'w') as f:
f.set_pipelined(True) f.set_pipelined(True)