Преглед на файлове

saferepr: Handle unicode errors in bytes repr on Python 3. Closes #3610

Ask Solem преди 8 години
родител
ревизия
5ceb74ac50
променени са 4 файла, в които са добавени 42 реда и са изтрити 22 реда
  1. 19 5
      celery/utils/saferepr.py
  2. 0 7
      celery/utils/text.py
  3. 23 0
      t/unit/utils/test_saferepr.py
  4. 0 10
      t/unit/utils/test_text.py

+ 19 - 5
celery/utils/saferepr.py

@@ -21,11 +21,9 @@ from itertools import chain
 from numbers import Number
 from pprint import _recursion
 
-from kombu.utils.encoding import bytes_to_str
-
 from celery.five import items, text_t
 
-from .text import truncate, truncate_bytes
+from .text import truncate
 
 __all__ = ['saferepr', 'reprstream']
 
@@ -110,11 +108,27 @@ def _safetext(val):
     return val
 
 
+def _format_binary_bytes(val, maxlen, ellipsis='...'):
+    if maxlen and len(val) > maxlen:
+        # we don't want to copy all the data, just take what we need.
+        chunk = memoryview(val)[:maxlen].tobytes()
+        return "b'{0}{1}'".format(_repr_binary_bytes(chunk), ellipsis)
+    return "b'{0}'".format(_repr_binary_bytes(val))
+
+
+def _repr_binary_bytes(val):
+    try:
+        return val.decode('utf-8')
+    except UnicodeDecodeError:
+        # possibly not unicode, but binary data so format as hex.
+        return val.hex()
+
+
 def _format_chars(val, maxlen):
     if IS_PY3 and isinstance(val, bytes):  # pragma: no cover
-        return "b'%s'" % (bytes_to_str(truncate_bytes(val, maxlen)),)
+        return _format_binary_bytes(val, maxlen)
     else:
-        return "'%s'" % (truncate(val, maxlen),)
+        return "'{0}'".format(truncate(val, maxlen))
 
 
 def _saferepr(o, maxlen=None, maxlevels=3, seen=None):

+ 0 - 7
celery/utils/text.py

@@ -101,13 +101,6 @@ def truncate(s, maxlen=128, suffix='...'):
     return s
 
 
-def truncate_bytes(s, maxlen=128, suffix=b'...'):
-    # type: (bytes, int, bytes) -> bytes
-    if maxlen and len(s) >= maxlen:
-        return s[:maxlen].rsplit(b' ', 1)[0] + suffix
-    return s
-
-
 def pluralize(n, text, suffix='s'):
     # type: (int, str, str) -> str
     """Pluralize term when n is greater than one."""

+ 23 - 0
t/unit/utils/test_saferepr.py

@@ -2,6 +2,7 @@
 from __future__ import absolute_import, unicode_literals
 import pytest
 import re
+import struct
 from case import skip
 from decimal import Decimal
 from pprint import pprint
@@ -193,3 +194,25 @@ class test_saferepr:
         val = X()
         assert repr(val)
         assert saferepr(val)
+
+    @skip.unless_python3()
+    def test_unicode_bytes(self):
+        val = 'øystein'.encode('utf-8')
+        assert saferepr(val) == "b'øystein'"
+
+    @skip.unless_python3()
+    def test_unicode_bytes__long(self):
+        val = 'øystein'.encode('utf-8') * 1024
+        assert saferepr(val, maxlen=128).endswith("...'")
+
+    @skip.unless_python3()
+    def test_binary_bytes(self):
+        val = struct.pack('>QQQ', 12223, 1234, 3123)
+        assert '2fbf' in saferepr(val, maxlen=128)
+
+    @skip.unless_python3()
+    def test_binary_bytes__long(self):
+        val = struct.pack('>QQQ', 12223, 1234, 3123) * 1024
+        result = saferepr(val, maxlen=128)
+        assert '2fbf' in result
+        assert result.endswith("...'")

+ 0 - 10
t/unit/utils/test_text.py

@@ -7,7 +7,6 @@ from celery.utils.text import (
     indent,
     pretty,
     truncate,
-    truncate_bytes,
 )
 
 RANDTEXT = """\
@@ -66,15 +65,6 @@ def test_truncate_text(s, maxsize, expected):
     assert truncate(s, maxsize) == expected
 
 
-@pytest.mark.parametrize('s,maxsize,expected', [
-    (b'ABCDEFGHI', 3, b'ABC...'),
-    (b'ABCDEFGHI', 10, b'ABCDEFGHI'),
-
-])
-def test_truncate_bytes(s, maxsize, expected):
-    assert truncate_bytes(s, maxsize) == expected
-
-
 @pytest.mark.parametrize('args,expected', [
     ((None, 3), '???'),
     (('ABCDEFGHI', 6), 'ABC...'),