"""Contains methods for building Thrift structures for interacting with IDE The methods from this file are used for Python console interaction. Please note that the debugger still uses XML structures with the similar methods contained in `pydevd_xml.py` file. """ import sys import traceback from _pydev_bundle import pydev_log from _pydevd_bundle import pydevd_extension_utils from _pydevd_bundle import pydevd_resolver from _pydevd_bundle.pydevd_constants import dict_iter_items, dict_keys, IS_PY3K, \ MAXIMUM_VARIABLE_REPRESENTATION_SIZE, RETURN_VALUES_DICT, LOAD_VALUES_POLICY, DEFAULT_VALUES_DICT, NUMPY_NUMERIC_TYPES, \ GET_FRAME_RETURN_GROUP from _pydevd_bundle.pydevd_extension_api import TypeResolveProvider, StrPresentationProvider from _pydevd_bundle.pydevd_user_type_renderers_utils import try_get_type_renderer_for_var from _pydevd_bundle.pydevd_utils import is_string, should_evaluate_full_value, should_evaluate_shape from _pydevd_bundle.pydevd_vars import get_label, array_default_format, is_able_to_format_number, MAXIMUM_ARRAY_SIZE, \ get_column_formatter_by_type, get_formatted_row_elements, IAtPolarsAccessor, DEFAULT_DF_FORMAT, DATAFRAME_HEADER_LOAD_MAX_SIZE from pydev_console.pydev_protocol import DebugValue, GetArrayResponse, ArrayData, ArrayHeaders, ColHeader, RowHeader, \ UnsupportedArrayTypeException, ExceedingArrayDimensionsException from _pydevd_bundle.pydevd_xml import ExceptionOnEvaluate from _pydevd_bundle.pydevd_frame_type_handler import get_vars_handler, DO_NOT_PROCESS_VARS, THRIFT_COMMUNICATION_VARS_HANDLER from _pydevd_bundle.pydevd_repr_utils import get_value_repr try: import types frame_type = types.FrameType except: frame_type = None _IS_JYTHON = sys.platform.startswith("java") def _create_default_type_map(): if not _IS_JYTHON: default_type_map = [ # None means that it should not be treated as a compound variable # isintance does not accept a tuple on some versions of python, so, we must declare it expanded (type(None), None,), (int, None), (float, None), (complex, None), (str, None), (tuple, pydevd_resolver.tupleResolver), (list, pydevd_resolver.tupleResolver), (dict, pydevd_resolver.dictResolver), ] try: default_type_map.append((long, None)) # @UndefinedVariable except: pass # not available on all python versions try: default_type_map.append((unicode, None)) # @UndefinedVariable except: pass # not available on all python versions try: default_type_map.append((set, pydevd_resolver.setResolver)) except: pass # not available on all python versions try: default_type_map.append((frozenset, pydevd_resolver.setResolver)) except: pass # not available on all python versions try: from django.utils.datastructures import MultiValueDict default_type_map.insert(0, (MultiValueDict, pydevd_resolver.multiValueDictResolver)) # we should put it before dict except: pass # django may not be installed try: from django.forms import BaseForm default_type_map.insert(0, (BaseForm, pydevd_resolver.djangoFormResolver)) # we should put it before instance resolver except: pass # django may not be installed try: from collections import deque default_type_map.append((deque, pydevd_resolver.dequeResolver)) except: pass if frame_type is not None: default_type_map.append((frame_type, pydevd_resolver.frameResolver)) else: from org.python import core # @UnresolvedImport default_type_map = [ (core.PyNone, None), (core.PyInteger, None), (core.PyLong, None), (core.PyFloat, None), (core.PyComplex, None), (core.PyString, None), (core.PyTuple, pydevd_resolver.tupleResolver), (core.PyList, pydevd_resolver.tupleResolver), (core.PyDictionary, pydevd_resolver.dictResolver), (core.PyStringMap, pydevd_resolver.dictResolver), ] if hasattr(core, 'PyJavaInstance'): # Jython 2.5b3 removed it. default_type_map.append((core.PyJavaInstance, pydevd_resolver.instanceResolver)) return default_type_map class TypeResolveHandler(object): NO_PROVIDER = [] # Sentinel value (any mutable object to be used as a constant would be valid). def __init__(self): # Note: don't initialize with the types we already know about so that the extensions can override # the default resolvers that are already available if they want. self._type_to_resolver_cache = {} self._type_to_str_provider_cache = {} self._initialized = False def _initialize(self): self._default_type_map = _create_default_type_map() self._resolve_providers = pydevd_extension_utils.extensions_of_type(TypeResolveProvider) self._str_providers = pydevd_extension_utils.extensions_of_type(StrPresentationProvider) self._initialized = True def get_type(self, o): try: try: # Faster than type(o) as we don't need the function call. type_object = o.__class__ except: # Not all objects have __class__ (i.e.: there are bad bindings around). type_object = type(o) type_name = type_object.__name__ except: # This happens for org.python.core.InitModule return 'Unable to get Type', 'Unable to get Type', None return self._get_type(o, type_object, type_name) def _get_type(self, o, type_object, type_name): resolver = self._type_to_resolver_cache.get(type_object) if resolver is not None: return type_object, type_name, resolver if not self._initialized: self._initialize() try: for resolver in self._resolve_providers: if resolver.can_provide(type_object, type_name): # Cache it self._type_to_resolver_cache[type_object] = resolver return type_object, type_name, resolver for t in self._default_type_map: if isinstance(o, t[0]): # Cache it resolver = t[1] self._type_to_resolver_cache[type_object] = resolver return (type_object, type_name, resolver) except: traceback.print_exc() # No match return default (and cache it). resolver = pydevd_resolver.defaultResolver self._type_to_resolver_cache[type_object] = resolver return type_object, type_name, resolver if _IS_JYTHON: _base_get_type = _get_type def _get_type(self, o, type_object, type_name): if type_name == 'org.python.core.PyJavaInstance': return type_object, type_name, pydevd_resolver.instanceResolver if type_name == 'org.python.core.PyArray': return type_object, type_name, pydevd_resolver.jyArrayResolver return self._base_get_type(o, type_name, type_name) def str_from_providers(self, o, type_object, type_name, do_trim=True): provider = self._type_to_str_provider_cache.get(type_object) if provider is self.NO_PROVIDER: return None if provider is not None: try: return provider.get_str(o, do_trim) except TypeError: return provider.get_str(o) if not self._initialized: self._initialize() for provider in self._str_providers: if provider.can_provide(type_object, type_name): self._type_to_str_provider_cache[type_object] = provider try: return provider.get_str(o, do_trim) except TypeError: return provider.get_str(o) self._type_to_str_provider_cache[type_object] = self.NO_PROVIDER return None _TYPE_RESOLVE_HANDLER = TypeResolveHandler() """ def get_type(o): Receives object and returns a triple (typeObject, typeString, resolver). resolver != None means that variable is a container, and should be displayed as a hierarchy. Use the resolver to get its attributes. All container objects should have a resolver. """ get_type = _TYPE_RESOLVE_HANDLER.get_type _str_from_providers = _TYPE_RESOLVE_HANDLER.str_from_providers def get_sorted_keys(frame_f_locals): keys = dict_keys(frame_f_locals) if hasattr(keys, 'sort'): keys.sort() # Python 3.0 does not have it else: keys = sorted(keys) # Jython 2.1 does not have it return keys def frame_vars_to_struct(frame_f_locals, group_type, hidden_ns=None, user_type_renderers={}): """Returns frame variables as the list of `DebugValue` structures """ keys = get_sorted_keys(frame_f_locals) type_handler = get_vars_handler(var_to_struct, handler_type=THRIFT_COMMUNICATION_VARS_HANDLER, group_type=group_type) for k in keys: try: v = frame_f_locals[k] eval_full_val = should_evaluate_full_value(v, group_type) type_handler.handle(k, v, hidden_ns, eval_full_val, user_type_renderers=user_type_renderers) except Exception: traceback.print_exc() pydev_log.error("Unexpected error, recovered safely.\n") # Show return values as the first entry. return type_handler.get_list() def _get_default_var_string_representation(v, _type, typeName, format, do_trim=True): str_from_provider = _str_from_providers(v, _type, typeName, do_trim) if str_from_provider is not None: return str_from_provider return get_value_repr(v, do_trim, format) def var_to_struct(val, name, format='%s', do_trim=True, evaluate_full_value=True, user_type_renderers=None): """ single variable or dictionary to Thrift struct representation """ debug_value = DebugValue() if name in DO_NOT_PROCESS_VARS: debug_value.name = name debug_value.value = val return debug_value try: # This should be faster than isinstance (but we have to protect against not having a '__class__' attribute). is_exception_on_eval = val.__class__ == ExceptionOnEvaluate except: is_exception_on_eval = False if is_exception_on_eval: v = val.result else: v = val _type, typeName, resolver = get_type(v) # type qualifier to struct type_qualifier = getattr(_type, "__module__", "") if type_qualifier: debug_value.qualifier = type_qualifier # type renderer to struct type_renderer = None if user_type_renderers is not None: type_renderer = try_get_type_renderer_for_var(v, user_type_renderers) if type_renderer is not None: debug_value.typeRendererId = type_renderer.to_type # name and type to struct debug_value.name = name debug_value.type = typeName # value to struct value = None if not evaluate_full_value: value = DEFAULT_VALUES_DICT[LOAD_VALUES_POLICY] elif type_renderer is not None: value = type_renderer.evaluate_var_string_repr(v) if value is None: value = _get_default_var_string_representation(v, _type, typeName, format, do_trim) # fix to work with unicode values try: if not IS_PY3K: if value.__class__ == unicode: # @UndefinedVariable value = value.encode('utf-8') else: if value.__class__ == bytes: value = value.encode('utf-8') except TypeError: # in java, unicode is a function pass debug_value.value = value # shape to struct try: if should_evaluate_shape(): if hasattr(v, 'shape') and not callable(v.shape): debug_value.shape = str(tuple(v.shape)) elif hasattr(v, '__len__') and not is_string(v): debug_value.shape = str(len(v)) except: pass # data type info to xml (for arrays and tensors) debug_value.arrayElementType = '' try: if hasattr(v, 'dtype') and hasattr(v.dtype, 'name'): debug_value.arrayElementType = v.dtype.name except: pass # additional info to struct if is_exception_on_eval: debug_value.isErrorOnEval = True else: if resolver is not None: debug_value.isContainer = True else: pass return debug_value def var_to_str(val, format, do_trim=True, evaluate_full_value=True): struct = var_to_struct(val, '', format, do_trim, evaluate_full_value) value = struct.value return value if value is not None else '' # from pydevd_vars.py def array_to_thrift_struct(array, name, roffset, coffset, rows, cols, format): """ """ array, array_chunk, r, c, f = array_to_meta_thrift_struct(array, name, format) format = '%' + f if rows == -1 and cols == -1: rows = r cols = c rows = min(rows, MAXIMUM_ARRAY_SIZE) cols = min(cols, MAXIMUM_ARRAY_SIZE) if rows == 0 and cols == 0: array_chunk.data = array_data_to_thrift_struct(rows, cols, lambda r: (get_value(r, c) for c in range(cols)), format) return array_chunk # there is no obvious rule for slicing (at least 5 choices) if len(array) == 1 and (rows > 1 or cols > 1): array = array[0] if array.size > len(array): array = array[roffset:, coffset:] rows = min(rows, len(array)) cols = min(cols, len(array[0])) if len(array) == 1: array = array[0] elif array.size == len(array): if roffset == 0 and rows == 1: array = array[coffset:] cols = min(cols, len(array)) elif coffset == 0 and cols == 1: array = array[roffset:] rows = min(rows, len(array)) def get_value(row, col): value = array if rows == 1 or cols == 1: if rows == 1 and cols == 1: value = array[0] else: value = array[(col if rows == 1 else row)] if "ndarray" in str(type(value)): value = value[0] else: value = array[row][col] return value array_chunk.data = array_data_to_thrift_struct(rows, cols, lambda r: (get_value(r, c) for c in range(cols)), format) return array_chunk def tf_to_thrift_struct(tensor, name, roffset, coffset, rows, cols, format): try: return array_to_thrift_struct(tensor.numpy(), name, roffset, coffset, rows, cols, format) except TypeError: return array_to_thrift_struct(tensor.to_dense().numpy(), name, roffset, coffset, rows, cols, format) def torch_to_thrift_struct(tensor, name, roffset, coffset, rows, cols, format): try: if tensor.requires_grad: tensor = tensor.detach() return array_to_thrift_struct(tensor.numpy(), name, roffset, coffset, rows, cols, format) except TypeError: return array_to_thrift_struct(tensor.to_dense().numpy(), name, roffset, coffset, rows, cols, format) def tf_sparse_to_thrift_struct(tensor, name, roffset, coffset, rows, cols, format): try: import tensorflow as tf return tf_to_thrift_struct(tf.sparse.to_dense(tf.sparse.reorder(tensor)), name, roffset, coffset, rows, cols, format) except ImportError: pass def dataset_to_thrift_struct(dataset, name, roffset, coffset, rows, cols, format): return dataframe_to_thrift_struct(dataset.to_pandas(), name, roffset, coffset, rows, cols, format) def array_to_meta_thrift_struct(array, name, format): type = array.dtype.kind slice = name l = len(array.shape) # initial load, compute slice if format == '%': if l > 2: slice += '[0]' * (l - 2) for r in range(l - 2): array = array[0] if type == 'f': format = '.5f' elif type == 'i' or type == 'u': format = 'd' else: format = 's' else: format = format.replace('%', '') l = len(array.shape) reslice = "" if l > 2: raise ExceedingArrayDimensionsException elif l == 0: rows = 0 cols = 0 elif l == 1: # special case with 1D arrays arr[i, :] - row, but arr[:, i] - column with equal shape and ndim # http://stackoverflow.com/questions/16837946/numpy-a-2-rows-1-column-file-loadtxt-returns-1row-2-columns # explanation: http://stackoverflow.com/questions/15165170/how-do-i-maintain-row-column-orientation-of-vectors-in-numpy?rq=1 # we use kind of a hack - get information about memory from C_CONTIGUOUS cols = 1 rows = len(array) if rows < len(array): reslice = '[0:%s]' % (rows) array = array[0:rows] elif l == 2: rows = array.shape[-2] cols = array.shape[-1] if cols < array.shape[-1] or rows < array.shape[-2]: reslice = '[0:%s, 0:%s]' % (rows, cols) array = array[0:rows, 0:cols] # avoid slice duplication if not slice.endswith(reslice): slice += reslice bounds = (0, 0) if type in NUMPY_NUMERIC_TYPES and array.size != 0: bounds = (array.min(), array.max()) array_chunk = GetArrayResponse() array_chunk.slice = slice array_chunk.rows = rows array_chunk.cols = cols array_chunk.format = "%" + format array_chunk.type = type array_chunk.max = "%s" % bounds[1] array_chunk.min = "%s" % bounds[0] return array, array_chunk, rows, cols, format def dataframe_to_thrift_struct(df, name, roffset, coffset, rows, cols, format): """ :type df: pandas.core.frame.DataFrame :type name: str :type coffset: int :type roffset: int :type rows: int :type cols: int :type format: str """ original_df = df dim = len(df.axes) if hasattr(df, 'axes') else -1 num_rows = df.shape[0] num_cols = df.shape[1] if dim > 1 else 1 array_chunk = GetArrayResponse() array_chunk.slice = name array_chunk.rows = num_rows array_chunk.cols = num_cols array_chunk.type = "" array_chunk.max = "0" array_chunk.min = "0" format = format.replace("%", "") if not format: if num_rows > 0 and num_cols == 1: # series or data frame with one column try: kind = df.dtype.kind except AttributeError: try: kind = df.dtypes[0].kind except (IndexError, KeyError, AttributeError): kind = "O" format = array_default_format(kind) else: format = array_default_format(DEFAULT_DF_FORMAT) array_chunk.format = "%" + format if (rows, cols) == (-1, -1): rows, cols = num_rows, num_cols elif (rows, cols) == (0, 0): # return header only r = min(num_rows, DATAFRAME_HEADER_LOAD_MAX_SIZE) c = min(num_cols, DATAFRAME_HEADER_LOAD_MAX_SIZE) array_chunk.headers = header_data_to_thrift_struct(r, c, [""] * num_cols, [(0, 0)] * num_cols, lambda x: DEFAULT_DF_FORMAT, original_df, dim) array_chunk.data = array_data_to_thrift_struct(rows, cols, None, '%' + format) return array_chunk rows = min(rows, MAXIMUM_ARRAY_SIZE) cols = min(cols, MAXIMUM_ARRAY_SIZE, num_cols) # need to precompute column bounds here before slicing! col_bounds = [None] * cols dtypes = [None] * cols if dim > 1: for col in range(cols): dtype = df.dtypes.iloc[coffset + col].kind dtypes[col] = dtype if dtype in NUMPY_NUMERIC_TYPES and df.size != 0: cvalues = df.iloc[:, coffset + col] bounds = (cvalues.min(), cvalues.max()) else: bounds = (0, 0) col_bounds[col] = bounds elif dim == -1: dtype = '0' dtypes[0] = dtype col_bounds[0] = (df.min(), df.max()) if dtype in NUMPY_NUMERIC_TYPES and df.size != 0 else (0, 0) else: dtype = df.dtype.kind dtypes[0] = dtype col_bounds[0] = (df.min(), df.max()) if dtype in NUMPY_NUMERIC_TYPES and df.size != 0 else (0, 0) if dim > 1: df = df.iloc[roffset: roffset + rows, coffset: coffset + cols] elif dim == -1: df = df[roffset: roffset + rows] else: df = df.iloc[roffset: roffset + rows] rows = df.shape[0] cols = df.shape[1] if dim > 1 else 1 def col_to_format(c): return get_column_formatter_by_type(format, dtypes[c]) if dim == -1: iat = IAtPolarsAccessor(df) elif dim == 1 or len(df.columns.unique()) == len(df.columns): iat = df.iat else: iat = df.iloc def formatted_row_elements(row): return get_formatted_row_elements(row, iat, dim, cols, format, dtypes) array_chunk.headers = header_data_to_thrift_struct(rows, cols, dtypes, col_bounds, col_to_format, df, dim) # we already have here formatted_row_elements, so we pass here %s as a default format array_chunk.data = array_data_to_thrift_struct(rows, cols, formatted_row_elements, format='%s') return array_chunk def array_data_to_thrift_struct(rows, cols, get_row, format): array_data = ArrayData() array_data.rows = rows array_data.cols = cols # `ArrayData.data` data = [] for row in range(rows): data.append([var_to_str(value, format) for value in get_row(row)]) array_data.data = data return array_data def header_data_to_thrift_struct(rows, cols, dtypes, col_bounds, col_to_format, df, dim): array_headers = ArrayHeaders() col_headers = [] for col in range(cols): col_label = get_label(df.axes[1].values[col]) if dim > 1 else str(col) bounds = col_bounds[col] col_format = "%" + col_to_format(col) col_header = ColHeader() # col_header.index = col col_header.label = col_label col_header.type = dtypes[col] col_header.format = col_to_format(col) col_header.max = col_format % bounds[1] col_header.min = col_format % bounds[0] col_headers.append(col_header) row_headers = [] for row in range(rows): row_header = RowHeader() row_header.index = row row_header.label = get_label(df.axes[0].values[row] if dim != -1 else str(row)) row_headers.append(row_header) array_headers.colHeaders = col_headers array_headers.rowHeaders = row_headers return array_headers TYPE_TO_THRIFT_STRUCT_CONVERTERS = { "ndarray": array_to_thrift_struct, "recarray": array_to_thrift_struct, "EagerTensor": tf_to_thrift_struct, "ResourceVariable": tf_to_thrift_struct, "SparseTensor": tf_sparse_to_thrift_struct, "Tensor": torch_to_thrift_struct, "DataFrame": dataframe_to_thrift_struct, "Series": dataframe_to_thrift_struct, "Dataset": dataset_to_thrift_struct, "GeoDataFrame": dataframe_to_thrift_struct, "GeoSeries": dataframe_to_thrift_struct } def table_like_struct_to_thrift_struct(array, name, roffset, coffset, rows, cols, format): """Returns `GetArrayResponse` structure for table-like structure The `array` might be either `numpy.ndarray`, `pandas.DataFrame` or `pandas.Series`. """ _, type_name, _ = get_type(array) format = format if is_able_to_format_number(format) else '%' if type_name in TYPE_TO_THRIFT_STRUCT_CONVERTERS: return TYPE_TO_THRIFT_STRUCT_CONVERTERS[type_name](array, name, roffset, coffset, rows, cols, format) else: raise UnsupportedArrayTypeException(type_name)