From 6f4c159b052eae52def9fde0ddcbb7c864ef6592 Mon Sep 17 00:00:00 2001 From: Jeff Forcier Date: Thu, 24 Apr 2014 10:25:37 -0700 Subject: [PATCH 1/3] Merge updated a01e449 from al-tonio --- paramiko/file.py | 8 +++++--- tests/test_sftp.py | 31 ++++++++++++++++++++++++++++++- 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/paramiko/file.py b/paramiko/file.py index 70243e4..856cc10 100644 --- a/paramiko/file.py +++ b/paramiko/file.py @@ -124,9 +124,11 @@ class BufferedFile (object): file first). If the ``size`` argument is negative or omitted, read all the remaining data in the file. - ``'b'`` mode flag is ignored (``self.FLAG_BINARY`` in ``self._flags``), - because SSH treats all files as binary, since we have no idea what - encoding the file is in, or even if the file is text data. + .. note:: + ``'b'`` mode flag is ignored (``self.FLAG_BINARY`` in + ``self._flags``), because SSH treats all files as binary, since we + have no idea what encoding the file is in, or even if the file is + text data. :param int size: maximum number of bytes to read :return: diff --git a/tests/test_sftp.py b/tests/test_sftp.py index 720b821..c70d0cd 100755 --- a/tests/test_sftp.py +++ b/tests/test_sftp.py @@ -67,6 +67,18 @@ liver insulin receptors. Their sensitivity to insulin is, however, similarly decreased compared with chicken. ''' + +# Here is how unicode characters are encoded over 1 to 6 bytes in utf-8 +# U-00000000 - U-0000007F: 0xxxxxxx +# U-00000080 - U-000007FF: 110xxxxx 10xxxxxx +# U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx +# U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx +# U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx +# U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx +# Note that: hex(int('11000011',2)) == '0xc3' +# Thus, the following 2-bytes sequence is not valid utf8: "invalid continuation byte" +NON_UTF8_DATA = b'\xC3\xC3' + FOLDER = os.environ.get('TEST_FOLDER', 'temp-testing000') sftp = None @@ -466,7 +478,7 @@ class SFTPTest (unittest.TestCase): f.write('?\n') with sftp.open(FOLDER + '/happy.txt', 'r') as f: - self.assertEqual(f.readline(), u'full line?\n') + self.assertEqual(f.readline(), u('full line?\n')) self.assertEqual(f.read(7), b'partial') finally: try: @@ -747,6 +759,23 @@ class SFTPTest (unittest.TestCase): sftp.remove(FOLDER + '/test%file') + def test_O_non_utf8_data(self): + """Test write() and read() of non utf8 data""" + try: + with sftp.open('%s/nonutf8data' % FOLDER, 'w') as f: + f.write(NON_UTF8_DATA) + with sftp.open('%s/nonutf8data' % FOLDER, 'r') as f: + data = f.read() + self.assertEqual(data, NON_UTF8_DATA) + with sftp.open('%s/nonutf8data' % FOLDER, 'wb') as f: + f.write(NON_UTF8_DATA) + with sftp.open('%s/nonutf8data' % FOLDER, 'rb') as f: + data = f.read() + self.assertEqual(data, NON_UTF8_DATA) + finally: + sftp.remove('%s/nonutf8data' % FOLDER) + + if __name__ == '__main__': SFTPTest.init_loopback() # logging is required by test_N_file_with_percent From c7c1a24e3023a45cf6713e553c176e42a71a6d3d Mon Sep 17 00:00:00 2001 From: Jeff Forcier Date: Thu, 24 Apr 2014 10:26:33 -0700 Subject: [PATCH 2/3] Fix some trailing whitespace --- paramiko/file.py | 2 +- tests/test_sftp.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/paramiko/file.py b/paramiko/file.py index 856cc10..3ebcfa3 100644 --- a/paramiko/file.py +++ b/paramiko/file.py @@ -124,7 +124,7 @@ class BufferedFile (object): file first). If the ``size`` argument is negative or omitted, read all the remaining data in the file. - .. note:: + .. note:: ``'b'`` mode flag is ignored (``self.FLAG_BINARY`` in ``self._flags``), because SSH treats all files as binary, since we have no idea what encoding the file is in, or even if the file is diff --git a/tests/test_sftp.py b/tests/test_sftp.py index c70d0cd..2b6aa3b 100755 --- a/tests/test_sftp.py +++ b/tests/test_sftp.py @@ -69,8 +69,8 @@ decreased compared with chicken. # Here is how unicode characters are encoded over 1 to 6 bytes in utf-8 -# U-00000000 - U-0000007F: 0xxxxxxx -# U-00000080 - U-000007FF: 110xxxxx 10xxxxxx +# U-00000000 - U-0000007F: 0xxxxxxx +# U-00000080 - U-000007FF: 110xxxxx 10xxxxxx # U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx # U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx # U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx From 5837b7c21a10b4c0664412cae1f9604580ba5934 Mon Sep 17 00:00:00 2001 From: Jeff Forcier Date: Thu, 24 Apr 2014 10:26:46 -0700 Subject: [PATCH 3/3] Formatting --- paramiko/file.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/paramiko/file.py b/paramiko/file.py index 3ebcfa3..2238f0b 100644 --- a/paramiko/file.py +++ b/paramiko/file.py @@ -287,7 +287,8 @@ class BufferedFile (object): Set the file's current position, like stdio's ``fseek``. Not all file objects support seeking. - .. note:: If a file is opened in append mode (``'a'`` or ``'a+'``), any seek + .. note:: + If a file is opened in append mode (``'a'`` or ``'a+'``), any seek operations will be undone at the next write (as the file position will move back to the end of the file).