mirror of
https://gitflic.ru/project/openide/openide.git
synced 2025-12-16 22:51:17 +07:00
[pycharm] PY-71967 Add impl (except sorting in outputs -- to fix)
GitOrigin-RevId: ac079eefe71d7b32daa5e95eaef9c986ab842294
This commit is contained in:
committed by
intellij-monorepo-bot
parent
58af8101d9
commit
2fc8ea1630
@@ -21,6 +21,7 @@ enum class NotebookOutputKeyType {
|
||||
POLARS_SERIES,
|
||||
PYSPARK_TABLE,
|
||||
R_MARKDOWN,
|
||||
HF_DATASET,
|
||||
SVG,
|
||||
SWING_COMPONENT,
|
||||
TEST,
|
||||
|
||||
@@ -84,5 +84,7 @@ def __get_table_provider(output):
|
||||
type_qualified_name.endswith('DataFrame')
|
||||
or type_qualified_name.endswith('Series')):
|
||||
import _pydevd_bundle.tables.pydevd_polars as table_provider
|
||||
elif type_qualified_name == 'datasets.arrow_dataset.Dataset':
|
||||
import _pydevd_bundle.tables.pydevd_dataset as table_provider
|
||||
|
||||
return table_provider
|
||||
|
||||
@@ -420,6 +420,10 @@ def sparse_tensor_to_thrift_struct(tensor, name, roffset, coffset, rows, cols, f
|
||||
pass
|
||||
|
||||
|
||||
def dataset_to_thrift_struct(dataset, name, roffset, coffset, rows, cols, format):
|
||||
return dataframe_to_thrift_struct(dataset.to_pandas(), name, roffset, coffset, rows, cols, format)
|
||||
|
||||
|
||||
def array_to_meta_thrift_struct(array, name, format):
|
||||
type = array.dtype.kind
|
||||
slice = name
|
||||
@@ -621,6 +625,7 @@ TYPE_TO_THRIFT_STRUCT_CONVERTERS = {
|
||||
"Tensor": tensor_to_thrift_struct,
|
||||
"DataFrame": dataframe_to_thrift_struct,
|
||||
"Series": dataframe_to_thrift_struct,
|
||||
"Dataset": dataset_to_thrift_struct,
|
||||
"GeoDataFrame": dataframe_to_thrift_struct,
|
||||
"GeoSeries": dataframe_to_thrift_struct
|
||||
}
|
||||
|
||||
@@ -774,6 +774,9 @@ def dataframe_to_xml(df, name, roffset, coffset, rows, cols, format):
|
||||
xml += array_data_to_xml(rows, cols, formatted_row_elements, format)
|
||||
return xml
|
||||
|
||||
def dataset_to_xml(dataset, name, roffset, coffset, rows, cols, format):
|
||||
return dataframe_to_xml(dataset.to_pandas(), name, roffset, coffset, rows, cols, format)
|
||||
|
||||
|
||||
def array_data_to_xml(rows, cols, get_row, format):
|
||||
xml = "<arraydata rows=\"%s\" cols=\"%s\"/>\n" % (rows, cols)
|
||||
@@ -820,7 +823,8 @@ TYPE_TO_XML_CONVERTERS = {
|
||||
"EagerTensor": tensor_to_xml,
|
||||
"ResourceVariable": tensor_to_xml,
|
||||
"SparseTensor": sparse_tensor_to_xml,
|
||||
"Tensor": tensor_to_xml
|
||||
"Tensor": tensor_to_xml,
|
||||
"Dataset": dataset_to_xml
|
||||
}
|
||||
|
||||
|
||||
|
||||
102
python/helpers/pydev/_pydevd_bundle/tables/pydevd_dataset.py
Normal file
102
python/helpers/pydev/_pydevd_bundle/tables/pydevd_dataset.py
Normal file
@@ -0,0 +1,102 @@
|
||||
# Copyright 2000-2023 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import typing
|
||||
|
||||
TABLE_TYPE_NEXT_VALUE_SEPARATOR = '__pydev_table_column_type_val__'
|
||||
MAX_COLWIDTH_PYTHON_2 = 100000
|
||||
BATCH_SIZE = 10000
|
||||
|
||||
|
||||
def get_type(table):
|
||||
return str(type(table))
|
||||
|
||||
|
||||
# noinspection PyUnresolvedReferences
|
||||
def get_shape(table):
|
||||
return str(table.shape[0])
|
||||
|
||||
|
||||
# noinspection PyUnresolvedReferences
|
||||
def get_head(table):
|
||||
table = pd.concat(list(__convert_to_df(table)), ignore_index=True)
|
||||
return repr(table.head().to_html(notebook=True, max_cols=None))
|
||||
|
||||
|
||||
# noinspection PyUnresolvedReferences
|
||||
def get_column_types(table):
|
||||
table = pd.concat(list(__convert_to_df(table)), ignore_index=True)
|
||||
return str(table.index.dtype) + TABLE_TYPE_NEXT_VALUE_SEPARATOR + \
|
||||
TABLE_TYPE_NEXT_VALUE_SEPARATOR.join([str(t) for t in table.dtypes])
|
||||
|
||||
|
||||
# used by pydevd
|
||||
# noinspection PyUnresolvedReferences
|
||||
def get_data(table, start_index=None, end_index=None):
|
||||
|
||||
def convert_data_to_html(data, max_cols):
|
||||
return repr(data.to_html(notebook=True, max_cols=max_cols))
|
||||
|
||||
return _compute_sliced_data(table, convert_data_to_html, start_index, end_index)
|
||||
|
||||
|
||||
# used by DSTableCommands
|
||||
# noinspection PyUnresolvedReferences
|
||||
def display_data(table, start_index, end_index):
|
||||
def ipython_display(data, max_cols):
|
||||
from IPython.display import display
|
||||
display(data)
|
||||
|
||||
_compute_sliced_data(table, ipython_display, start_index, end_index)
|
||||
|
||||
|
||||
def __get_data_slice(table, start, end):
|
||||
return table.select(range(start, end)).to_pandas()
|
||||
|
||||
|
||||
def _compute_sliced_data(table, fun, start_index=None, end_index=None):
|
||||
max_cols, max_colwidth = __get_tables_display_options()
|
||||
|
||||
_jb_max_cols = pd.get_option('display.max_columns')
|
||||
_jb_max_colwidth = pd.get_option('display.max_colwidth')
|
||||
|
||||
pd.set_option('display.max_columns', max_cols)
|
||||
pd.set_option('display.max_colwidth', max_colwidth)
|
||||
|
||||
if start_index is not None and end_index is not None:
|
||||
table = __get_data_slice(table, start_index, end_index)
|
||||
else:
|
||||
table = pd.concat(list(__convert_to_df(table)), ignore_index=True)
|
||||
|
||||
data = fun(table, max_cols)
|
||||
|
||||
pd.set_option('display.max_columns', _jb_max_cols)
|
||||
pd.set_option('display.max_colwidth', _jb_max_colwidth)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
# In old versions of pandas max_colwidth accepted only Int-s
|
||||
def __get_tables_display_options():
|
||||
import sys
|
||||
if sys.version_info < (3, 0):
|
||||
return None, MAX_COLWIDTH_PYTHON_2
|
||||
return None, None
|
||||
|
||||
|
||||
# noinspection PyUnresolvedReferences
|
||||
def __convert_to_df(table):
|
||||
try:
|
||||
import datasets
|
||||
if type(table) is datasets.arrow_dataset.Dataset:
|
||||
return __dataset_to_df(table)
|
||||
except ImportError as e:
|
||||
pass
|
||||
return table
|
||||
|
||||
|
||||
def __dataset_to_df(dataset):
|
||||
try:
|
||||
return dataset.to_pandas(batched=True, batch_size=min(len(dataset), BATCH_SIZE))
|
||||
except ImportError as e:
|
||||
pass
|
||||
@@ -42,7 +42,8 @@ public class PyDebugValue extends XNamedValue {
|
||||
DATA_FRAME, DATA_FRAME,
|
||||
SERIES, SERIES,
|
||||
"GeoDataFrame", DATA_FRAME,
|
||||
"GeoSeries", SERIES
|
||||
"GeoSeries", SERIES,
|
||||
"Dataset", DATA_FRAME
|
||||
);
|
||||
private static final int MAX_ITEMS_TO_HANDLE = 100;
|
||||
public static final int MAX_VALUE = 256;
|
||||
|
||||
@@ -25,6 +25,7 @@ public abstract class DataViewStrategy {
|
||||
ArrayViewStrategy.createInstanceForTensor(),
|
||||
DataFrameViewStrategy.createInstanceForDataFrame(),
|
||||
DataFrameViewStrategy.createInstanceForGeoDataFrame(),
|
||||
DataFrameViewStrategy.createInstanceForDataset(),
|
||||
SeriesViewStrategy.createInstanceForSeries(),
|
||||
SeriesViewStrategy.createInstanceForGeoSeries()
|
||||
);
|
||||
|
||||
@@ -72,7 +72,7 @@ public class PyViewNumericContainerAction extends XDebuggerTreeActionBase {
|
||||
e.getPresentation().setText(PyBundle.message("debugger.numeric.view.as.array"));
|
||||
e.getPresentation().setVisible(true);
|
||||
}
|
||||
else if ("DataFrame".equals(nodeType) || "GeoDataFrame".equals(nodeType)) {
|
||||
else if ("DataFrame".equals(nodeType) || "GeoDataFrame".equals(nodeType) || "Dataset".equals(nodeType)) {
|
||||
e.getPresentation().setText(PyBundle.message("debugger.numeric.view.as.dataframe"));
|
||||
e.getPresentation().setVisible(true);
|
||||
}
|
||||
|
||||
@@ -24,6 +24,10 @@ public class DataFrameViewStrategy extends DataViewStrategy {
|
||||
return new DataFrameViewStrategy("GeoDataFrame");
|
||||
}
|
||||
|
||||
public static @NotNull DataFrameViewStrategy createInstanceForDataset() {
|
||||
return new DataFrameViewStrategy("Dataset");
|
||||
}
|
||||
|
||||
protected DataFrameViewStrategy(final @NotNull String typeName) {
|
||||
this.myTypeName = typeName;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user