[pycharm] PY-72208 Tables(Jupyter, SciView): Add csv mode for py tables

GitOrigin-RevId: cb8d849952c80c882f6025d541efe670764a8cd5
2026-01-08 15:09:39 +07:00 · 2024-09-22 22:20:27 +02:00
parent 608f4a304f
commit 7a743e5c57
5 changed files with 141 additions and 29 deletions
--- a/python/helpers/pydev/_pydevd_bundle/tables/pydevd_dataset.py
+++ b/python/helpers/pydev/_pydevd_bundle/tables/pydevd_dataset.py
@@ -33,20 +33,38 @@ def get_column_types(table):

 # used by pydevd
 # noinspection PyUnresolvedReferences
-def get_data(table, start_index=None, end_index=None, format=None):
+def get_data(table, start_index=None, end_index=None, format=None, conv_mode=False):
     # type: (datasets.arrow_dataset.Dataset, int, int) -> str

+    def convert_data_to_csv(data):
+        return repr(data.to_csv())
+
    def convert_data_to_html(data, max_cols):
        return repr(data.to_html(notebook=True, max_cols=max_cols))

-    return _compute_sliced_data(table, convert_data_to_html, start_index, end_index, format)
+    if conv_mode:
+        computed_data = _compute_sliced_data(table, convert_data_to_csv, start_index, end_index, format)
+    else:
+        computed_data = _compute_sliced_data(table, convert_data_to_html, start_index, end_index, format)
+    return computed_data


 # used by DSTableCommands
 # noinspection PyUnresolvedReferences
-def display_data(table, start_index, end_index):
+def display_data_csv(table, start_index, end_index):
     # type: (datasets.arrow_dataset.Dataset, int, int) -> None
-    def ipython_display(data, max_cols):
+    def ipython_display(data):
+        from IPython.display import display
+        display(data)
+
+    _compute_sliced_data(table, ipython_display, start_index, end_index)
+
+
+# used by DSTableCommands
+# noinspection PyUnresolvedReferences
+def display_data_html(table, start_index, end_index):
+    # type: (datasets.arrow_dataset.Dataset, int, int) -> None
+    def ipython_display(data):
        from IPython.display import display
        display(data)

@@ -58,7 +76,7 @@ def __get_data_slice(table, start, end):
    return __convert_to_df(table).iloc[start:end]


-def _compute_sliced_data(table, fun, start_index=None, end_index=None, format=None):
+def _compute_sliced_data(table, fun, start_index=None, end_index=None, format=None, conv_mode=False):
    # type: (datasets.arrow_dataset.Dataset, function, int, int) -> str
    max_cols, max_colwidth, max_rows = __get_tables_display_options()

@@ -81,7 +99,10 @@ def _compute_sliced_data(table, fun, start_index=None, end_index=None, format=No
    else:
        table = __convert_to_df(table)

-    data = fun(table, max_cols)
+    if conv_mode:
+        data = fun(table)
+    else:
+        data = fun(table, max_cols)

    pd.set_option('display.max_columns', _jb_max_cols)
    pd.set_option('display.max_colwidth', _jb_max_colwidth)
--- a/python/helpers/pydev/_pydevd_bundle/tables/pydevd_numpy.py
+++ b/python/helpers/pydev/_pydevd_bundle/tables/pydevd_numpy.py
@@ -1,5 +1,6 @@
 #  Copyright 2000-2023 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
 import numpy as np
+import io

 TABLE_TYPE_NEXT_VALUE_SEPARATOR = '__pydev_table_column_type_val__'
 MAX_COLWIDTH = 100000
@@ -46,15 +47,23 @@ def get_column_types(arr):
        TABLE_TYPE_NEXT_VALUE_SEPARATOR.join(cols_types)


-def get_data(arr, start_index=None, end_index=None, format=None):
+def get_data(arr, start_index=None, end_index=None, format=None, conv_mode=False):
    # type: (Union[np.ndarray, dict], int, int) -> str
    def convert_data_to_html(data, max_cols):
        return repr(_create_table(data, start_index, end_index, format).to_html(notebook=True, max_cols=max_cols))

-    return _compute_data(arr, convert_data_to_html, format)
+
+    def convert_data_to_csv(data):
+        return repr(_create_table(data, start_index, end_index, format).to_csv())
+
+    if conv_mode:
+        computed_data = _compute_data(arr, convert_data_to_csv, format, conv_mode)
+    else:
+        computed_data = _compute_data(arr, convert_data_to_html, format, conv_mode)
+    return computed_data


-def display_data(arr, start_index=None, end_index=None):
+def display_data_html(arr, start_index=None, end_index=None):
    # type: (np.ndarray, int, int) -> None
    def ipython_display(data, max_cols):
        from IPython.display import display, HTML
@@ -63,6 +72,15 @@ def display_data(arr, start_index=None, end_index=None):
    _compute_data(arr, ipython_display)


+def display_data_csv(arr, start_index=None, end_index=None):
+    # type: (np.ndarray, int, int) -> None
+    def ipython_display(data):
+        from IPython.display import display
+        display(_create_table(data, start_index, end_index).to_csv())
+
+    _compute_data(arr, ipython_display)
+
+
 class _NpTable:
    def __init__(self, np_array, format=None):
        self.array = np_array
@@ -153,6 +171,14 @@ class _NpTable:
        html.append('</tbody>\n')
        return html

+
+    def to_csv(self):
+        csv_stream = io.StringIO()
+        np.savetxt(csv_stream, self.array, delimiter=',')
+        csv_string = csv_stream.getvalue()
+        return csv_string
+
+
    def slice(self, start_index=None, end_index=None):
        if end_index is not None and start_index is not None:
            self.array = self.array[start_index:end_index]
@@ -243,7 +269,7 @@ def _create_table(command, start_index=None, end_index=None, format=None):
    return _NpTable(np_array, format=format).sort(sort_keys).slice(start_index, end_index)


-def _compute_data(arr, fun, format=None):
+def _compute_data(arr, fun, format=None, conv_mode=False):
    is_sort_command = type(arr) is dict
    data = arr['data'] if is_sort_command else arr

@@ -255,7 +281,10 @@ def _compute_data(arr, fun, format=None):
        arr['data'] = data
        data = arr

-    data = fun(data, None)
+    if conv_mode:
+        data = fun(data)
+    else:
+        data = fun(data, None)

    if is_pd:
        _reset_pd_options(jb_max_cols, jb_max_colwidth, jb_max_rows, jb_float_options)
--- a/python/helpers/pydev/_pydevd_bundle/tables/pydevd_numpy_based.py
+++ b/python/helpers/pydev/_pydevd_bundle/tables/pydevd_numpy_based.py
@@ -1,5 +1,6 @@
 #  Copyright 2000-2024 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
 import numpy as np
+import io

 TABLE_TYPE_NEXT_VALUE_SEPARATOR = '__pydev_table_column_type_val__'
 MAX_COLWIDTH = 100000
@@ -43,19 +44,36 @@ def get_column_types(arr):
        TABLE_TYPE_NEXT_VALUE_SEPARATOR.join(cols_types)


-def get_data(arr, start_index=None, end_index=None, format=None):
+def get_data(arr, start_index=None, end_index=None, format=None, conv_mode=False):
    # type: (Union[np.ndarray, dict], int, int) -> str
    def convert_data_to_html(data, max_cols):
        return repr(_create_table(data, start_index, end_index, format).to_html(notebook=True, max_cols=max_cols))

-    return _compute_data(arr, convert_data_to_html, format)
+    def convert_data_to_csv(data):
+        return repr(_create_table(data, start_index, end_index, format).to_csv())
+
+    if conv_mode:
+        computed_data = _compute_data(arr, convert_data_to_csv, format, conv_mode)
+    else:
+        computed_data = _compute_data(arr, convert_data_to_html, format, conv_mode)
+    return computed_data


-def display_data(arr, start_index=None, end_index=None):
+def display_data_html(arr, start_index=None, end_index=None):
    # type: (np.ndarray, int, int) -> None
    def ipython_display(data, max_cols):
        from IPython.display import display, HTML
-        display(HTML(_create_table(data, start_index, end_index).to_html(notebook=True, max_cols=max_cols)))
+        display(HTML(_create_table(data, start_index, end_index).to_html(notebook=True,
+                                                                         max_cols=max_cols)))
+
+    _compute_data(arr, ipython_display)
+
+
+def display_data_csv(arr, start_index=None, end_index=None):
+    # type: (np.ndarray, int, int) -> None
+    def ipython_display(data):
+        from IPython.display import display
+        display(_create_table(data, start_index, end_index).to_csv())

    _compute_data(arr, ipython_display)

@@ -96,24 +114,30 @@ class _NpTable:
        html.append('<thead>\n'
                    '<tr style="text-align: right;">\n'
                    '<th></th>\n')
-        html += self._collect_cols_names()
+        html += self._collect_cols_names_html()
        html.append('</tr>\n'
                    '</thead>\n')

        # tbody
-        html += self._collect_values(max_cols)
+        html += self._collect_values_html(max_cols)

        html.append('</table>\n')

        return "".join(html)

-    def _collect_cols_names(self):
+    def to_csv(self):
+        csv_stream = io.StringIO()
+        np.savetxt(csv_stream, self.array, delimiter=',')
+        csv_string = csv_stream.getvalue()
+        return csv_string
+
+    def _collect_cols_names_html(self):
        if self.type == ONE_DIM:
            return ['<th>0</th>\n']

        return ['<th>{}</th>\n'.format(i) for i in range(len(self.array[0]))]

-    def _collect_values(self, max_cols):
+    def _collect_values_html(self, max_cols):
        html = ['<tbody>\n']
        rows = self.array.shape[0]
        for row_num in range(rows):
@@ -224,7 +248,7 @@ def _create_table(command, start_index=None, end_index=None, format=None):
    return _NpTable(np_array, format=format).sort(sort_keys).slice(start_index, end_index)


-def _compute_data(arr, fun, format=None):
+def _compute_data(arr, fun, format=None, conv_mode=False):
    is_sort_command = type(arr) is dict
    data = arr['data'] if is_sort_command else arr

@@ -236,7 +260,10 @@ def _compute_data(arr, fun, format=None):
        arr['data'] = data
        data = arr

-    data = fun(data, None)
+    if conv_mode:
+        data = fun(data)
+    else:
+        data = fun(data, None)

    if is_pd:
        _reset_pd_options(jb_max_cols, jb_max_colwidth, jb_max_rows, jb_float_options)
--- a/python/helpers/pydev/_pydevd_bundle/tables/pydevd_pandas.py
+++ b/python/helpers/pydev/_pydevd_bundle/tables/pydevd_pandas.py
@@ -34,20 +34,38 @@ def get_column_types(table):

 # used by pydevd
 # noinspection PyUnresolvedReferences
-def get_data(table, start_index=None, end_index=None, format=None):
+def get_data(table, start_index=None, end_index=None, format=None, conv_mode=False):
    # type: (Union[pd.DataFrame, pd.Series], int, int) -> str

+    def convert_data_to_csv(data):
+        return repr(__convert_to_df(data).to_csv())
+
    def convert_data_to_html(data, max_cols):
        return repr(__convert_to_df(data).to_html(notebook=True, max_cols=max_cols))

-    return _compute_sliced_data(table, convert_data_to_html, start_index, end_index, format)
+    if conv_mode:
+        computed_data = _compute_sliced_data(table, convert_data_to_csv, start_index, end_index, format)
+    else:
+        computed_data = _compute_sliced_data(table, convert_data_to_html, start_index, end_index, format)
+    return computed_data


 # used by DSTableCommands
 # noinspection PyUnresolvedReferences
-def display_data(table, start_index, end_index):
+def display_data_csv(table, start_index, end_index):
    # type: (Union[pd.DataFrame, pd.Series], int, int) -> None
-    def ipython_display(data, max_cols):
+    def ipython_display(data):
+        from IPython.display import display
+        display(__convert_to_df(data))
+
+    _compute_sliced_data(table, ipython_display, start_index, end_index)
+
+
+# used by DSTableCommands
+# noinspection PyUnresolvedReferences
+def display_data_html(table, start_index, end_index):
+    # type: (Union[pd.DataFrame, pd.Series], int, int) -> None
+    def ipython_display(data):
        from IPython.display import display
        display(__convert_to_df(data))

@@ -58,7 +76,7 @@ def __get_data_slice(table, start, end):
    return __convert_to_df(table).iloc[start:end]


-def _compute_sliced_data(table, fun, start_index=None, end_index=None, format=None):
+def _compute_sliced_data(table, fun, start_index=None, end_index=None, format=None, conv_mode=False):
    # type: (Union[pd.DataFrame, pd.Series], function, int, int) -> str

    max_cols, max_colwidth, max_rows = __get_tables_display_options()
@@ -81,7 +99,10 @@ def _compute_sliced_data(table, fun, start_index=None, end_index=None, format=No
    if start_index is not None and end_index is not None:
        table = __get_data_slice(table, start_index, end_index)

-    data = fun(table, max_cols)
+    if conv_mode:
+        data = fun(table)
+    else:
+        data = fun(table, max_cols)

    pd.set_option('display.max_columns', _jb_max_cols)
    pd.set_option('display.max_colwidth', _jb_max_colwidth)
--- a/python/helpers/pydev/_pydevd_bundle/tables/pydevd_polars.py
+++ b/python/helpers/pydev/_pydevd_bundle/tables/pydevd_polars.py
@@ -33,19 +33,33 @@ def get_column_types(table):


 # used by pydevd
-def get_data(table, start_index=None, end_index=None, format=None):
+def get_data(table, start_index=None, end_index=None, format=None, conv_mode=False):
    # type: (pl.DataFrame, int, int) -> str
    with __create_config(format):
+        if conv_mode:
+            return __get_df_slice(table, start_index, end_index).write_csv()
        return table[start_index:end_index]._repr_html_()


 # used by DSTableCommands
-def display_data(table, start, end):
+def display_data_html(table, start, end):
    # type: (pl.DataFrame, int, int) -> None
    with __create_config():
        print(table[start:end]._repr_html_())


+def display_data_csv(table, start, end):
+    # type: (pl.DataFrame, int, int) -> None
+    with __create_config():
+        print(__get_df_slice(table, start, end).write_csv())
+
+
+def __get_df_slice(table, start_index, end_index):
+    if 'Series' in str(type(table)):
+        return table[start_index:end_index].to_frame()
+    return table[start_index:end_index]
+
+
 def __create_config(format=None):
    # type: (Union[str, None]) -> pl.Config
    cfg = pl.Config()