[PyCharm Tables] PY-76723 Fixed Multiple problems in CSV serialization #PY-76723 Fixed

* see the YT issue for more details (cherry picked from commit 676a021e221c430c6fd3b600640d7aec1503239e) (cherry picked from commit 5c8a478a2cc3edb148b684b48dfaf85f9d50627f) IJ-CR-147319 GitOrigin-RevId: 3d5caa7d348861bc904335db3ff0b1edef2a17b1
2026-03-22 15:19:59 +07:00 · 2024-10-19 01:23:56 +02:00
parent d3c97e24b0
commit c347819edc
5 changed files with 102 additions and 37 deletions
--- a/python/helpers/pydev/_pydevd_bundle/tables/pydevd_dataset.py
+++ b/python/helpers/pydev/_pydevd_bundle/tables/pydevd_dataset.py
@@ -37,7 +37,7 @@ def get_data(table, use_csv_serialization, start_index=None, end_index=None, for
     # type: (datasets.arrow_dataset.Dataset, int, int) -> str

    def convert_data_to_csv(data):
-        return repr(data.to_csv(na_rep = "NaN"))
+        return repr(data.to_csv(na_rep = "NaN", float_format=format))

    def convert_data_to_html(data):
        return repr(data.to_html(notebook=True))
--- a/python/helpers/pydev/_pydevd_bundle/tables/pydevd_numpy.py
+++ b/python/helpers/pydev/_pydevd_bundle/tables/pydevd_numpy.py
@@ -53,7 +53,7 @@ def get_data(arr, use_csv_serialization, start_index=None, end_index=None, forma
        return repr(_create_table(data, start_index, end_index, format).to_html(notebook=True))

    def convert_data_to_csv(data):
-        return repr(_create_table(data, start_index, end_index, format).to_csv())
+        return repr(_create_table(data, start_index, end_index, format).to_csv(na_rep = "None", float_format=format))

    if use_csv_serialization:
        computed_data = _compute_data(arr, convert_data_to_csv, format)
@@ -74,7 +74,7 @@ def display_data_html(arr, start_index=None, end_index=None):
 def display_data_csv(arr, start_index=None, end_index=None):
    # type: (np.ndarray, int, int) -> None
    def ipython_display(data):
-        print(_create_table(data, start_index, end_index).to_csv())
+        print(_create_table(data, start_index, end_index).to_csv(na_rep = "None"))

    _compute_data(arr, ipython_display)

@@ -151,7 +151,7 @@ class _NpTable:
            html.append('<tr>\n')
            html.append('<th>{}</th>\n'.format(int(self.indexes[row_num])))
            if self.type == ONE_DIM:
-                if self.format is not None:
+                if self.format is not None and self.array[row_num] is not None:
                    value = self.format % self.array[row_num]
                else:
                    value = self.array[row_num]
@@ -160,7 +160,7 @@ class _NpTable:
                cols = len(self.array[0])
                max_cols = cols if max_cols is None else min(max_cols, cols)
                for col_num in range(max_cols):
-                    if self.format is not None:
+                    if self.format is not None and self.array[row_num][col_num] is not None:
                        value = self.format % self.array[row_num][col_num]
                    else:
                        value = self.array[row_num][col_num]
@@ -170,11 +170,36 @@ class _NpTable:
        return html


-    def to_csv(self):
+    def to_csv(self, na_rep = "None", float_format=None):
        csv_stream = io.StringIO()
-        np.savetxt(csv_stream, self.array, delimiter=',')
+        np_array_without_nones = np.where(self.array == None, np.nan, self.array)
+        if float_format is None or float_format == 'null':
+            float_format = "%s"
+
+        np.savetxt(csv_stream, np_array_without_nones, delimiter=',', fmt=float_format)
        csv_string = csv_stream.getvalue()
-        return csv_string
+        csv_rows_with_index = self._insert_index_at_rows_begging_csv(csv_string)
+
+        col_names = self._collect_col_names_csv()
+        return col_names + "\n" + csv_rows_with_index
+
+    def _insert_index_at_rows_begging_csv(self, csv_string):
+        # type: (str) -> str
+        csv_rows = csv_string.split('\n')
+        csv_rows_with_index = []
+        for row_index in range(self.array.shape[0]):
+            csv_rows_with_index.append(str(row_index) + "," + csv_rows[row_index])
+        return "\n".join(csv_rows_with_index)
+
+    def _collect_col_names_csv(self):
+        if self.type == ONE_DIM:
+            return ",0"
+
+        if self.type == WITH_TYPES:
+            return "," + ",".join(['{}'.format(name) for name in self.array.dtype.names])
+
+        # TWO_DIM
+        return "," + ",".join(['{}'.format(i) for i in range(self.array.shape[1])])


    def slice(self, start_index=None, end_index=None):
@@ -259,10 +284,19 @@ def _create_table(command, start_index=None, end_index=None, format=None):
        np_array = command

    if is_pd:
-        sorting_arr = _sort_df(pd.DataFrame(np_array), sort_keys)
+        sorted_df = _sort_df(pd.DataFrame(np_array), sort_keys)
        if start_index is not None and end_index is not None:
-            return sorting_arr.iloc[start_index:end_index]
-        return sorting_arr
+            sorted_df_slice = sorted_df.iloc[start_index:end_index]
+            # to apply "format" we should not have None inside DFs
+            try:
+                import warnings
+                with warnings.catch_warnings():
+                    warnings.simplefilter("ignore")
+                    sorted_df_slice = sorted_df_slice.fillna("None")
+            except Exception as _:
+                pass
+            return sorted_df_slice
+        return sorted_df

    return _NpTable(np_array, format=format).sort(sort_keys).slice(start_index, end_index)

@@ -295,8 +329,8 @@ def __get_tables_display_options():
    try:
        import pandas as pd
        if int(pd.__version__.split('.')[0]) < 1:
-            return None, MAX_COLWIDTH_PYTHON_2, None
-    except ImportError:
+            return None, MAX_COLWIDTH, None
+    except Exception:
        pass
    return None, None, None

--- a/python/helpers/pydev/_pydevd_bundle/tables/pydevd_numpy_based.py
+++ b/python/helpers/pydev/_pydevd_bundle/tables/pydevd_numpy_based.py
@@ -45,12 +45,12 @@ def get_column_types(arr):


 def get_data(arr, use_csv_serialization, start_index=None, end_index=None, format=None):
-    # type: (Union[np.ndarray, dict], int, int) -> str
+    # type: (Union[np.ndarray, dict], bool, Union[int, None], Union[int, None], Union[str, None]) -> str
    def convert_data_to_html(data):
        return repr(_create_table(data, start_index, end_index, format).to_html(notebook=True))

    def convert_data_to_csv(data):
-        return repr(_create_table(data, start_index, end_index, format).to_csv())
+        return repr(_create_table(data, start_index, end_index, format).to_csv(na_rep = "None", float_format=format))

    if use_csv_serialization:
        computed_data = _compute_data(arr, convert_data_to_csv, format)
@@ -71,7 +71,7 @@ def display_data_html(arr, start_index=None, end_index=None):
 def display_data_csv(arr, start_index=None, end_index=None):
    # type: (np.ndarray, int, int) -> None
    def ipython_display(data):
-        print(_create_table(data, start_index, end_index).to_csv())
+        print(_create_table(data, start_index, end_index).to_csv(na_rep = "None"))

    _compute_data(arr, ipython_display)

@@ -84,13 +84,17 @@ class _NpTable:
        self.format = format

    def get_array_type(self):
-        if self.array.ndim > 1:
+        if len(self.array.shape) > 1:
            return TWO_DIM

        return ONE_DIM

    def get_cols_types(self):
-        col_type = self.array.dtype
+        dtype = self.array.dtype
+        if "torch" in str(dtype):
+            col_type = dtype
+        else:
+            col_type = dtype.name

        if self.type == ONE_DIM:
            # [1, 2, 3] -> [int]
@@ -123,12 +127,6 @@ class _NpTable:

        return "".join(html)

-    def to_csv(self):
-        csv_stream = io.StringIO()
-        np.savetxt(csv_stream, self.array, delimiter=',')
-        csv_string = csv_stream.getvalue()
-        return csv_string
-
    def _collect_cols_names_html(self):
        if self.type == ONE_DIM:
            return ['<th>0</th>\n']
@@ -142,7 +140,8 @@ class _NpTable:
            html.append('<tr>\n')
            html.append('<th>{}</th>\n'.format(int(self.indexes[row_num])))
            if self.type == ONE_DIM:
-                if self.format is not None:
+                # None usually is not supported in tensors, but to be totally sure
+                if self.format is not None and self.array[row_num] is not None:
                    value = self.format % self.array[row_num]
                else:
                    value = self.array[row_num]
@@ -151,7 +150,7 @@ class _NpTable:
                cols = len(self.array[0])
                max_cols = cols if max_cols is None else min(max_cols, cols)
                for col_num in range(max_cols):
-                    if self.format is not None:
+                    if self.format is not None and self.array[row_num][col_num]:
                        value = self.format % self.array[row_num][col_num]
                    else:
                        value = self.array[row_num][col_num]
@@ -160,6 +159,33 @@ class _NpTable:
        html.append('</tbody>\n')
        return html

+    def to_csv(self, na_rep = "None", float_format=None):
+        csv_stream = io.StringIO()
+        if float_format is None or float_format == 'null':
+            float_format = "%s"
+
+        np.savetxt(csv_stream, self.array, delimiter=',', fmt=float_format)
+        csv_string = csv_stream.getvalue()
+        csv_rows_with_index = self._insert_index_at_rows_begging_csv(csv_string)
+
+        col_names = self._collect_col_names_csv()
+        return col_names + "\n" + csv_rows_with_index
+
+    def _insert_index_at_rows_begging_csv(self, csv_string):
+        # type: (str) -> str
+        csv_rows = csv_string.split('\n')
+        csv_rows_with_index = []
+        for row_index in range(self.array.shape[0]):
+            csv_rows_with_index.append(str(row_index) + "," + csv_rows[row_index])
+        return "\n".join(csv_rows_with_index)
+
+    def _collect_col_names_csv(self):
+        if self.type == ONE_DIM:
+            return ",0"
+
+        # TWO_DIM
+        return "," + ",".join(['{}'.format(i) for i in range(self.array.shape[1])])
+
    def slice(self, start_index=None, end_index=None):
        if end_index is not None and start_index is not None:
            self.array = self.array[start_index:end_index]
@@ -271,6 +297,12 @@ def __get_tables_display_options():
    import sys
    if sys.version_info < (3, 0):
        return None, MAX_COLWIDTH, None
+    try:
+        import pandas as pd
+        if int(pd.__version__.split('.')[0]) < 1:
+            return None, MAX_COLWIDTH, None
+    except Exception:
+        pass
    return None, None, None


--- a/python/helpers/pydev/_pydevd_bundle/tables/pydevd_pandas.py
+++ b/python/helpers/pydev/_pydevd_bundle/tables/pydevd_pandas.py
@@ -4,7 +4,7 @@ import pandas as pd
 import typing

 TABLE_TYPE_NEXT_VALUE_SEPARATOR = '__pydev_table_column_type_val__'
-MAX_COLWIDTH_PYTHON_2 = 100000
+MAX_COLWIDTH = 100000


 def get_type(table):
@@ -38,7 +38,7 @@ def get_data(table, use_csv_serialization, start_index=None, end_index=None, for
    # type: (Union[pd.DataFrame, pd.Series], int, int) -> str

    def convert_data_to_csv(data):
-        return repr(__convert_to_df(data).to_csv(na_rep = "NaN"))
+        return repr(__convert_to_df(data).to_csv(na_rep = "NaN", float_format=format))

    def convert_data_to_html(data):
        return repr(__convert_to_df(data).to_html(notebook=True))
@@ -295,11 +295,10 @@ def __get_tables_display_options():
    # type: () -> Tuple[None, Union[int, None], None]
    import sys
    if sys.version_info < (3, 0):
-        return None, MAX_COLWIDTH_PYTHON_2, None
+        return None, MAX_COLWIDTH, None
    try:
-        import pandas as pd
        if int(pd.__version__.split('.')[0]) < 1:
-            return None, MAX_COLWIDTH_PYTHON_2, None
+            return None, MAX_COLWIDTH, None
    except ImportError:
        pass
    return None, None, None
--- a/python/helpers/pydev/_pydevd_bundle/tables/pydevd_polars.py
+++ b/python/helpers/pydev/_pydevd_bundle/tables/pydevd_polars.py
@@ -37,7 +37,8 @@ def get_data(table, use_csv_serialization, start_index=None, end_index=None, for
    # type: (pl.DataFrame, int, int) -> str
    with __create_config(format):
        if use_csv_serialization:
-            return __get_df_slice(table, start_index, end_index).write_csv(null_value = "null")
+            float_precision = _get_float_precision(format)
+            return __get_df_slice(table, start_index, end_index).write_csv(null_value = "null", float_precision=float_precision)
        return table[start_index:end_index]._repr_html_()


@@ -66,10 +67,9 @@ def __create_config(format=None):
    cfg.set_tbl_cols(-1)  # Unlimited
    cfg.set_tbl_rows(-1)  # Unlimited
    cfg.set_fmt_str_lengths(MAX_COLWIDTH)  # No option to set unlimited, so it's 100_000
-    if format is not None:
-        float_precision = _get_float_precision(format)
-        if float_precision is not None:
-            cfg.set_float_precision(float_precision)
+    float_precision = _get_float_precision(format)
+    if float_precision is not None:
+        cfg.set_float_precision(float_precision)
    return cfg


@@ -219,7 +219,7 @@ def __get_describe(table):


 def _get_float_precision(format):
-    # type: (str) -> Union[int, None]
+    # type: (Union[str, None]) -> Union[int, None]
    if isinstance(format, str):
        if format.startswith("%") and format.endswith("f"):
            start = format.find('%.') + 2