[pycharm] PY-72208 Tables(Jupyter, SciView): Add csv mode for py tables

GitOrigin-RevId: cb8d849952c80c882f6025d541efe670764a8cd5
This commit is contained in:
ekaterina.itsenko
2024-09-22 22:20:27 +02:00
committed by intellij-monorepo-bot
parent 608f4a304f
commit 7a743e5c57
5 changed files with 141 additions and 29 deletions

View File

@@ -33,20 +33,38 @@ def get_column_types(table):
# used by pydevd
# noinspection PyUnresolvedReferences
def get_data(table, start_index=None, end_index=None, format=None):
def get_data(table, start_index=None, end_index=None, format=None, conv_mode=False):
# type: (datasets.arrow_dataset.Dataset, int, int) -> str
def convert_data_to_csv(data):
return repr(data.to_csv())
def convert_data_to_html(data, max_cols):
return repr(data.to_html(notebook=True, max_cols=max_cols))
return _compute_sliced_data(table, convert_data_to_html, start_index, end_index, format)
if conv_mode:
computed_data = _compute_sliced_data(table, convert_data_to_csv, start_index, end_index, format)
else:
computed_data = _compute_sliced_data(table, convert_data_to_html, start_index, end_index, format)
return computed_data
# used by DSTableCommands
# noinspection PyUnresolvedReferences
def display_data(table, start_index, end_index):
def display_data_csv(table, start_index, end_index):
# type: (datasets.arrow_dataset.Dataset, int, int) -> None
def ipython_display(data, max_cols):
def ipython_display(data):
from IPython.display import display
display(data)
_compute_sliced_data(table, ipython_display, start_index, end_index)
# used by DSTableCommands
# noinspection PyUnresolvedReferences
def display_data_html(table, start_index, end_index):
# type: (datasets.arrow_dataset.Dataset, int, int) -> None
def ipython_display(data):
from IPython.display import display
display(data)
@@ -58,7 +76,7 @@ def __get_data_slice(table, start, end):
return __convert_to_df(table).iloc[start:end]
def _compute_sliced_data(table, fun, start_index=None, end_index=None, format=None):
def _compute_sliced_data(table, fun, start_index=None, end_index=None, format=None, conv_mode=False):
# type: (datasets.arrow_dataset.Dataset, function, int, int) -> str
max_cols, max_colwidth, max_rows = __get_tables_display_options()
@@ -81,7 +99,10 @@ def _compute_sliced_data(table, fun, start_index=None, end_index=None, format=No
else:
table = __convert_to_df(table)
data = fun(table, max_cols)
if conv_mode:
data = fun(table)
else:
data = fun(table, max_cols)
pd.set_option('display.max_columns', _jb_max_cols)
pd.set_option('display.max_colwidth', _jb_max_colwidth)

View File

@@ -1,5 +1,6 @@
# Copyright 2000-2023 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
import numpy as np
import io
TABLE_TYPE_NEXT_VALUE_SEPARATOR = '__pydev_table_column_type_val__'
MAX_COLWIDTH = 100000
@@ -46,15 +47,23 @@ def get_column_types(arr):
TABLE_TYPE_NEXT_VALUE_SEPARATOR.join(cols_types)
def get_data(arr, start_index=None, end_index=None, format=None):
def get_data(arr, start_index=None, end_index=None, format=None, conv_mode=False):
# type: (Union[np.ndarray, dict], int, int) -> str
def convert_data_to_html(data, max_cols):
return repr(_create_table(data, start_index, end_index, format).to_html(notebook=True, max_cols=max_cols))
return _compute_data(arr, convert_data_to_html, format)
def convert_data_to_csv(data):
return repr(_create_table(data, start_index, end_index, format).to_csv())
if conv_mode:
computed_data = _compute_data(arr, convert_data_to_csv, format, conv_mode)
else:
computed_data = _compute_data(arr, convert_data_to_html, format, conv_mode)
return computed_data
def display_data(arr, start_index=None, end_index=None):
def display_data_html(arr, start_index=None, end_index=None):
# type: (np.ndarray, int, int) -> None
def ipython_display(data, max_cols):
from IPython.display import display, HTML
@@ -63,6 +72,15 @@ def display_data(arr, start_index=None, end_index=None):
_compute_data(arr, ipython_display)
def display_data_csv(arr, start_index=None, end_index=None):
# type: (np.ndarray, int, int) -> None
def ipython_display(data):
from IPython.display import display
display(_create_table(data, start_index, end_index).to_csv())
_compute_data(arr, ipython_display)
class _NpTable:
def __init__(self, np_array, format=None):
self.array = np_array
@@ -153,6 +171,14 @@ class _NpTable:
html.append('</tbody>\n')
return html
def to_csv(self):
csv_stream = io.StringIO()
np.savetxt(csv_stream, self.array, delimiter=',')
csv_string = csv_stream.getvalue()
return csv_string
def slice(self, start_index=None, end_index=None):
if end_index is not None and start_index is not None:
self.array = self.array[start_index:end_index]
@@ -243,7 +269,7 @@ def _create_table(command, start_index=None, end_index=None, format=None):
return _NpTable(np_array, format=format).sort(sort_keys).slice(start_index, end_index)
def _compute_data(arr, fun, format=None):
def _compute_data(arr, fun, format=None, conv_mode=False):
is_sort_command = type(arr) is dict
data = arr['data'] if is_sort_command else arr
@@ -255,7 +281,10 @@ def _compute_data(arr, fun, format=None):
arr['data'] = data
data = arr
data = fun(data, None)
if conv_mode:
data = fun(data)
else:
data = fun(data, None)
if is_pd:
_reset_pd_options(jb_max_cols, jb_max_colwidth, jb_max_rows, jb_float_options)

View File

@@ -1,5 +1,6 @@
# Copyright 2000-2024 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
import numpy as np
import io
TABLE_TYPE_NEXT_VALUE_SEPARATOR = '__pydev_table_column_type_val__'
MAX_COLWIDTH = 100000
@@ -43,19 +44,36 @@ def get_column_types(arr):
TABLE_TYPE_NEXT_VALUE_SEPARATOR.join(cols_types)
def get_data(arr, start_index=None, end_index=None, format=None):
def get_data(arr, start_index=None, end_index=None, format=None, conv_mode=False):
# type: (Union[np.ndarray, dict], int, int) -> str
def convert_data_to_html(data, max_cols):
return repr(_create_table(data, start_index, end_index, format).to_html(notebook=True, max_cols=max_cols))
return _compute_data(arr, convert_data_to_html, format)
def convert_data_to_csv(data):
return repr(_create_table(data, start_index, end_index, format).to_csv())
if conv_mode:
computed_data = _compute_data(arr, convert_data_to_csv, format, conv_mode)
else:
computed_data = _compute_data(arr, convert_data_to_html, format, conv_mode)
return computed_data
def display_data(arr, start_index=None, end_index=None):
def display_data_html(arr, start_index=None, end_index=None):
# type: (np.ndarray, int, int) -> None
def ipython_display(data, max_cols):
from IPython.display import display, HTML
display(HTML(_create_table(data, start_index, end_index).to_html(notebook=True, max_cols=max_cols)))
display(HTML(_create_table(data, start_index, end_index).to_html(notebook=True,
max_cols=max_cols)))
_compute_data(arr, ipython_display)
def display_data_csv(arr, start_index=None, end_index=None):
# type: (np.ndarray, int, int) -> None
def ipython_display(data):
from IPython.display import display
display(_create_table(data, start_index, end_index).to_csv())
_compute_data(arr, ipython_display)
@@ -96,24 +114,30 @@ class _NpTable:
html.append('<thead>\n'
'<tr style="text-align: right;">\n'
'<th></th>\n')
html += self._collect_cols_names()
html += self._collect_cols_names_html()
html.append('</tr>\n'
'</thead>\n')
# tbody
html += self._collect_values(max_cols)
html += self._collect_values_html(max_cols)
html.append('</table>\n')
return "".join(html)
def _collect_cols_names(self):
def to_csv(self):
csv_stream = io.StringIO()
np.savetxt(csv_stream, self.array, delimiter=',')
csv_string = csv_stream.getvalue()
return csv_string
def _collect_cols_names_html(self):
if self.type == ONE_DIM:
return ['<th>0</th>\n']
return ['<th>{}</th>\n'.format(i) for i in range(len(self.array[0]))]
def _collect_values(self, max_cols):
def _collect_values_html(self, max_cols):
html = ['<tbody>\n']
rows = self.array.shape[0]
for row_num in range(rows):
@@ -224,7 +248,7 @@ def _create_table(command, start_index=None, end_index=None, format=None):
return _NpTable(np_array, format=format).sort(sort_keys).slice(start_index, end_index)
def _compute_data(arr, fun, format=None):
def _compute_data(arr, fun, format=None, conv_mode=False):
is_sort_command = type(arr) is dict
data = arr['data'] if is_sort_command else arr
@@ -236,7 +260,10 @@ def _compute_data(arr, fun, format=None):
arr['data'] = data
data = arr
data = fun(data, None)
if conv_mode:
data = fun(data)
else:
data = fun(data, None)
if is_pd:
_reset_pd_options(jb_max_cols, jb_max_colwidth, jb_max_rows, jb_float_options)

View File

@@ -34,20 +34,38 @@ def get_column_types(table):
# used by pydevd
# noinspection PyUnresolvedReferences
def get_data(table, start_index=None, end_index=None, format=None):
def get_data(table, start_index=None, end_index=None, format=None, conv_mode=False):
# type: (Union[pd.DataFrame, pd.Series], int, int) -> str
def convert_data_to_csv(data):
return repr(__convert_to_df(data).to_csv())
def convert_data_to_html(data, max_cols):
return repr(__convert_to_df(data).to_html(notebook=True, max_cols=max_cols))
return _compute_sliced_data(table, convert_data_to_html, start_index, end_index, format)
if conv_mode:
computed_data = _compute_sliced_data(table, convert_data_to_csv, start_index, end_index, format)
else:
computed_data = _compute_sliced_data(table, convert_data_to_html, start_index, end_index, format)
return computed_data
# used by DSTableCommands
# noinspection PyUnresolvedReferences
def display_data(table, start_index, end_index):
def display_data_csv(table, start_index, end_index):
# type: (Union[pd.DataFrame, pd.Series], int, int) -> None
def ipython_display(data, max_cols):
def ipython_display(data):
from IPython.display import display
display(__convert_to_df(data))
_compute_sliced_data(table, ipython_display, start_index, end_index)
# used by DSTableCommands
# noinspection PyUnresolvedReferences
def display_data_html(table, start_index, end_index):
# type: (Union[pd.DataFrame, pd.Series], int, int) -> None
def ipython_display(data):
from IPython.display import display
display(__convert_to_df(data))
@@ -58,7 +76,7 @@ def __get_data_slice(table, start, end):
return __convert_to_df(table).iloc[start:end]
def _compute_sliced_data(table, fun, start_index=None, end_index=None, format=None):
def _compute_sliced_data(table, fun, start_index=None, end_index=None, format=None, conv_mode=False):
# type: (Union[pd.DataFrame, pd.Series], function, int, int) -> str
max_cols, max_colwidth, max_rows = __get_tables_display_options()
@@ -81,7 +99,10 @@ def _compute_sliced_data(table, fun, start_index=None, end_index=None, format=No
if start_index is not None and end_index is not None:
table = __get_data_slice(table, start_index, end_index)
data = fun(table, max_cols)
if conv_mode:
data = fun(table)
else:
data = fun(table, max_cols)
pd.set_option('display.max_columns', _jb_max_cols)
pd.set_option('display.max_colwidth', _jb_max_colwidth)

View File

@@ -33,19 +33,33 @@ def get_column_types(table):
# used by pydevd
def get_data(table, start_index=None, end_index=None, format=None):
def get_data(table, start_index=None, end_index=None, format=None, conv_mode=False):
# type: (pl.DataFrame, int, int) -> str
with __create_config(format):
if conv_mode:
return __get_df_slice(table, start_index, end_index).write_csv()
return table[start_index:end_index]._repr_html_()
# used by DSTableCommands
def display_data(table, start, end):
def display_data_html(table, start, end):
# type: (pl.DataFrame, int, int) -> None
with __create_config():
print(table[start:end]._repr_html_())
def display_data_csv(table, start, end):
# type: (pl.DataFrame, int, int) -> None
with __create_config():
print(__get_df_slice(table, start, end).write_csv())
def __get_df_slice(table, start_index, end_index):
if 'Series' in str(type(table)):
return table[start_index:end_index].to_frame()
return table[start_index:end_index]
def __create_config(format=None):
# type: (Union[str, None]) -> pl.Config
cfg = pl.Config()