Browse Source

Attempt to fix saferepr with __repr__ returning bytes with non-ascii chars. Closes #3600

Ask Solem 8 years ago
parent
commit
5a4d1ef24a
2 changed files with 34 additions and 6 deletions
  1. 20 6
      celery/utils/saferepr.py
  2. 14 0
      t/unit/utils/test_saferepr.py

+ 20 - 6
celery/utils/saferepr.py

@@ -99,6 +99,24 @@ def _repr_empty_set(s):
     return '%s()' % (type(s).__name__,)
 
 
+def _safetext(val):
+    if isinstance(val, bytes):
+        try:
+            val.encode('utf-8')
+        except UnicodeDecodeError:
+            # is bytes with unrepresentable characters, attempt
+            # to convert back to unicode
+            return val.decode('utf-8', errors='backslashreplace')
+    return val
+
+
+def _format_chars(val, maxlen):
+    if IS_PY3 and isinstance(val, bytes):  # pragma: no cover
+        return "b'%s'" % (bytes_to_str(truncate_bytes(val, maxlen)),)
+    else:
+        return "'%s'" % (truncate(val, maxlen),)
+
+
 def _saferepr(o, maxlen=None, maxlevels=3, seen=None):
     stack = deque([iter([o])])
     for token, it in reprstream(stack, seen=seen, maxlevels=maxlevels):
@@ -113,13 +131,9 @@ def _saferepr(o, maxlen=None, maxlevels=3, seen=None):
         elif isinstance(token, _key):
             val = saferepr(token.value, maxlen, maxlevels)
         elif isinstance(token, _quoted):
-            val = token.value
-            if IS_PY3 and isinstance(val, bytes):  # pragma: no cover
-                val = "b'%s'" % (bytes_to_str(truncate_bytes(val, maxlen)),)
-            else:
-                val = "'%s'" % (truncate(val, maxlen),)
+            val = _format_chars(token.value, maxlen)
         else:
-            val = truncate(token, maxlen)
+            val = _safetext(truncate(token, maxlen))
         yield val
         if maxlen is not None:
             maxlen -= len(val)

+ 14 - 0
t/unit/utils/test_saferepr.py

@@ -1,6 +1,8 @@
+# -*- coding: utf-8 -*-
 from __future__ import absolute_import, unicode_literals
 import pytest
 import re
+from case import skip
 from decimal import Decimal
 from pprint import pprint
 from celery.five import (
@@ -179,3 +181,15 @@ class test_saferepr:
         # aren't tested here.
         native = old_repr(value)
         assert saferepr(value) == native
+
+    @skip.if_python3()
+    def test_bytes_with_unicode(self):
+        class X(object):
+
+            def __repr__(self):
+                return 'æ e i a æ å'.encode(
+                    'utf-8', errors='backslash replace')
+
+        val = X()
+        assert repr(val)
+        assert saferepr(val)