From c347819edc99288352ea49ef7501db99f7b9c0d8 Mon Sep 17 00:00:00 2001
From: "Natalia.Murycheva" <natalia.murycheva@jetbrains.com>
Date: Sat, 19 Oct 2024 01:23:56 +0200
Subject: [PATCH] [PyCharm Tables] PY-76723 Fixed Multiple problems in CSV
 serialization #PY-76723 Fixed

* see the YT issue for more details

(cherry picked from commit 676a021e221c430c6fd3b600640d7aec1503239e)


(cherry picked from commit 5c8a478a2cc3edb148b684b48dfaf85f9d50627f)

IJ-CR-147319

GitOrigin-RevId: 3d5caa7d348861bc904335db3ff0b1edef2a17b1
---
 .../_pydevd_bundle/tables/pydevd_dataset.py   |  2 +-
 .../_pydevd_bundle/tables/pydevd_numpy.py     | 58 +++++++++++++++----
 .../tables/pydevd_numpy_based.py              | 58 ++++++++++++++-----
 .../_pydevd_bundle/tables/pydevd_pandas.py    |  9 ++-
 .../_pydevd_bundle/tables/pydevd_polars.py    | 12 ++--
 5 files changed, 102 insertions(+), 37 deletions(-)
diff --git a/python/helpers/pydev/_pydevd_bundle/tables/pydevd_dataset.py b/python/helpers/pydev/_pydevd_bundle/tables/pydevd_dataset.py
index c8da839fff96..192bc2f8adea 100644
--- a/python/helpers/pydev/_pydevd_bundle/tables/pydevd_dataset.py
+++ b/python/helpers/pydev/_pydevd_bundle/tables/pydevd_dataset.py
@@ -37,7 +37,7 @@ def get_data(table, use_csv_serialization, start_index=None, end_index=None, for
      # type: (datasets.arrow_dataset.Dataset, int, int) -> str
 
     def convert_data_to_csv(data):
-        return repr(data.to_csv(na_rep = "NaN"))
+        return repr(data.to_csv(na_rep = "NaN", float_format=format))
 
     def convert_data_to_html(data):
         return repr(data.to_html(notebook=True))
diff --git a/python/helpers/pydev/_pydevd_bundle/tables/pydevd_numpy.py b/python/helpers/pydev/_pydevd_bundle/tables/pydevd_numpy.py
index 25ff8933eb18..f396b9c663dc 100644
--- a/python/helpers/pydev/_pydevd_bundle/tables/pydevd_numpy.py
+++ b/python/helpers/pydev/_pydevd_bundle/tables/pydevd_numpy.py
@@ -53,7 +53,7 @@ def get_data(arr, use_csv_serialization, start_index=None, end_index=None, forma
         return repr(_create_table(data, start_index, end_index, format).to_html(notebook=True))
 
     def convert_data_to_csv(data):
-        return repr(_create_table(data, start_index, end_index, format).to_csv())
+        return repr(_create_table(data, start_index, end_index, format).to_csv(na_rep = "None", float_format=format))
 
     if use_csv_serialization:
         computed_data = _compute_data(arr, convert_data_to_csv, format)
@@ -74,7 +74,7 @@ def display_data_html(arr, start_index=None, end_index=None):
 def display_data_csv(arr, start_index=None, end_index=None):
     # type: (np.ndarray, int, int) -> None
     def ipython_display(data):
-        print(_create_table(data, start_index, end_index).to_csv())
+        print(_create_table(data, start_index, end_index).to_csv(na_rep = "None"))
 
     _compute_data(arr, ipython_display)
 
@@ -151,7 +151,7 @@ class _NpTable:
             html.append('<tr>\n')
             html.append('<th>{}</th>\n'.format(int(self.indexes[row_num])))
             if self.type == ONE_DIM:
-                if self.format is not None:
+                if self.format is not None and self.array[row_num] is not None:
                     value = self.format % self.array[row_num]
                 else:
                     value = self.array[row_num]
@@ -160,7 +160,7 @@ class _NpTable:
                 cols = len(self.array[0])
                 max_cols = cols if max_cols is None else min(max_cols, cols)
                 for col_num in range(max_cols):
-                    if self.format is not None:
+                    if self.format is not None and self.array[row_num][col_num] is not None:
                         value = self.format % self.array[row_num][col_num]
                     else:
                         value = self.array[row_num][col_num]
@@ -170,11 +170,36 @@ class _NpTable:
         return html
 
 
-    def to_csv(self):
+    def to_csv(self, na_rep = "None", float_format=None):
         csv_stream = io.StringIO()
-        np.savetxt(csv_stream, self.array, delimiter=',')
+        np_array_without_nones = np.where(self.array == None, np.nan, self.array)
+        if float_format is None or float_format == 'null':
+            float_format = "%s"
+
+        np.savetxt(csv_stream, np_array_without_nones, delimiter=',', fmt=float_format)
         csv_string = csv_stream.getvalue()
-        return csv_string
+        csv_rows_with_index = self._insert_index_at_rows_begging_csv(csv_string)
+
+        col_names = self._collect_col_names_csv()
+        return col_names + "\n" + csv_rows_with_index
+
+    def _insert_index_at_rows_begging_csv(self, csv_string):
+        # type: (str) -> str
+        csv_rows = csv_string.split('\n')
+        csv_rows_with_index = []
+        for row_index in range(self.array.shape[0]):
+            csv_rows_with_index.append(str(row_index) + "," + csv_rows[row_index])
+        return "\n".join(csv_rows_with_index)
+
+    def _collect_col_names_csv(self):
+        if self.type == ONE_DIM:
+            return ",0"
+
+        if self.type == WITH_TYPES:
+            return "," + ",".join(['{}'.format(name) for name in self.array.dtype.names])
+
+        # TWO_DIM
+        return "," + ",".join(['{}'.format(i) for i in range(self.array.shape[1])])
 
 
     def slice(self, start_index=None, end_index=None):
@@ -259,10 +284,19 @@ def _create_table(command, start_index=None, end_index=None, format=None):
         np_array = command
 
     if is_pd:
-        sorting_arr = _sort_df(pd.DataFrame(np_array), sort_keys)
+        sorted_df = _sort_df(pd.DataFrame(np_array), sort_keys)
         if start_index is not None and end_index is not None:
-            return sorting_arr.iloc[start_index:end_index]
-        return sorting_arr
+            sorted_df_slice = sorted_df.iloc[start_index:end_index]
+            # to apply "format" we should not have None inside DFs
+            try:
+                import warnings
+                with warnings.catch_warnings():
+                    warnings.simplefilter("ignore")
+                    sorted_df_slice = sorted_df_slice.fillna("None")
+            except Exception as _:
+                pass
+            return sorted_df_slice
+        return sorted_df
 
     return _NpTable(np_array, format=format).sort(sort_keys).slice(start_index, end_index)
 
@@ -295,8 +329,8 @@ def __get_tables_display_options():
     try:
         import pandas as pd
         if int(pd.__version__.split('.')[0]) < 1:
-            return None, MAX_COLWIDTH_PYTHON_2, None
-    except ImportError:
+            return None, MAX_COLWIDTH, None
+    except Exception:
         pass
     return None, None, None
 
diff --git a/python/helpers/pydev/_pydevd_bundle/tables/pydevd_numpy_based.py b/python/helpers/pydev/_pydevd_bundle/tables/pydevd_numpy_based.py
index 92c107355017..d381d450a8c5 100644
--- a/python/helpers/pydev/_pydevd_bundle/tables/pydevd_numpy_based.py
+++ b/python/helpers/pydev/_pydevd_bundle/tables/pydevd_numpy_based.py
@@ -45,12 +45,12 @@ def get_column_types(arr):
 
 
 def get_data(arr, use_csv_serialization, start_index=None, end_index=None, format=None):
-    # type: (Union[np.ndarray, dict], int, int) -> str
+    # type: (Union[np.ndarray, dict], bool, Union[int, None], Union[int, None], Union[str, None]) -> str
     def convert_data_to_html(data):
         return repr(_create_table(data, start_index, end_index, format).to_html(notebook=True))
 
     def convert_data_to_csv(data):
-        return repr(_create_table(data, start_index, end_index, format).to_csv())
+        return repr(_create_table(data, start_index, end_index, format).to_csv(na_rep = "None", float_format=format))
 
     if use_csv_serialization:
         computed_data = _compute_data(arr, convert_data_to_csv, format)
@@ -71,7 +71,7 @@ def display_data_html(arr, start_index=None, end_index=None):
 def display_data_csv(arr, start_index=None, end_index=None):
     # type: (np.ndarray, int, int) -> None
     def ipython_display(data):
-        print(_create_table(data, start_index, end_index).to_csv())
+        print(_create_table(data, start_index, end_index).to_csv(na_rep = "None"))
 
     _compute_data(arr, ipython_display)
 
@@ -84,13 +84,17 @@ class _NpTable:
         self.format = format
 
     def get_array_type(self):
-        if self.array.ndim > 1:
+        if len(self.array.shape) > 1:
             return TWO_DIM
 
         return ONE_DIM
 
     def get_cols_types(self):
-        col_type = self.array.dtype
+        dtype = self.array.dtype
+        if "torch" in str(dtype):
+            col_type = dtype
+        else:
+            col_type = dtype.name
 
         if self.type == ONE_DIM:
             # [1, 2, 3] -> [int]
@@ -123,12 +127,6 @@ class _NpTable:
 
         return "".join(html)
 
-    def to_csv(self):
-        csv_stream = io.StringIO()
-        np.savetxt(csv_stream, self.array, delimiter=',')
-        csv_string = csv_stream.getvalue()
-        return csv_string
-
     def _collect_cols_names_html(self):
         if self.type == ONE_DIM:
             return ['<th>0</th>\n']
@@ -142,7 +140,8 @@ class _NpTable:
             html.append('<tr>\n')
             html.append('<th>{}</th>\n'.format(int(self.indexes[row_num])))
             if self.type == ONE_DIM:
-                if self.format is not None:
+                # None usually is not supported in tensors, but to be totally sure
+                if self.format is not None and self.array[row_num] is not None:
                     value = self.format % self.array[row_num]
                 else:
                     value = self.array[row_num]
@@ -151,7 +150,7 @@ class _NpTable:
                 cols = len(self.array[0])
                 max_cols = cols if max_cols is None else min(max_cols, cols)
                 for col_num in range(max_cols):
-                    if self.format is not None:
+                    if self.format is not None and self.array[row_num][col_num]:
                         value = self.format % self.array[row_num][col_num]
                     else:
                         value = self.array[row_num][col_num]
@@ -160,6 +159,33 @@ class _NpTable:
         html.append('</tbody>\n')
         return html
 
+    def to_csv(self, na_rep = "None", float_format=None):
+        csv_stream = io.StringIO()
+        if float_format is None or float_format == 'null':
+            float_format = "%s"
+
+        np.savetxt(csv_stream, self.array, delimiter=',', fmt=float_format)
+        csv_string = csv_stream.getvalue()
+        csv_rows_with_index = self._insert_index_at_rows_begging_csv(csv_string)
+
+        col_names = self._collect_col_names_csv()
+        return col_names + "\n" + csv_rows_with_index
+
+    def _insert_index_at_rows_begging_csv(self, csv_string):
+        # type: (str) -> str
+        csv_rows = csv_string.split('\n')
+        csv_rows_with_index = []
+        for row_index in range(self.array.shape[0]):
+            csv_rows_with_index.append(str(row_index) + "," + csv_rows[row_index])
+        return "\n".join(csv_rows_with_index)
+
+    def _collect_col_names_csv(self):
+        if self.type == ONE_DIM:
+            return ",0"
+
+        # TWO_DIM
+        return "," + ",".join(['{}'.format(i) for i in range(self.array.shape[1])])
+
     def slice(self, start_index=None, end_index=None):
         if end_index is not None and start_index is not None:
             self.array = self.array[start_index:end_index]
@@ -271,6 +297,12 @@ def __get_tables_display_options():
     import sys
     if sys.version_info < (3, 0):
         return None, MAX_COLWIDTH, None
+    try:
+        import pandas as pd
+        if int(pd.__version__.split('.')[0]) < 1:
+            return None, MAX_COLWIDTH, None
+    except Exception:
+        pass
     return None, None, None
 
 
diff --git a/python/helpers/pydev/_pydevd_bundle/tables/pydevd_pandas.py b/python/helpers/pydev/_pydevd_bundle/tables/pydevd_pandas.py
index 755ecb9bb17b..03f532935ebc 100644
--- a/python/helpers/pydev/_pydevd_bundle/tables/pydevd_pandas.py
+++ b/python/helpers/pydev/_pydevd_bundle/tables/pydevd_pandas.py
@@ -4,7 +4,7 @@ import pandas as pd
 import typing
 
 TABLE_TYPE_NEXT_VALUE_SEPARATOR = '__pydev_table_column_type_val__'
-MAX_COLWIDTH_PYTHON_2 = 100000
+MAX_COLWIDTH = 100000
 
 
 def get_type(table):
@@ -38,7 +38,7 @@ def get_data(table, use_csv_serialization, start_index=None, end_index=None, for
     # type: (Union[pd.DataFrame, pd.Series], int, int) -> str
 
     def convert_data_to_csv(data):
-        return repr(__convert_to_df(data).to_csv(na_rep = "NaN"))
+        return repr(__convert_to_df(data).to_csv(na_rep = "NaN", float_format=format))
 
     def convert_data_to_html(data):
         return repr(__convert_to_df(data).to_html(notebook=True))
@@ -295,11 +295,10 @@ def __get_tables_display_options():
     # type: () -> Tuple[None, Union[int, None], None]
     import sys
     if sys.version_info < (3, 0):
-        return None, MAX_COLWIDTH_PYTHON_2, None
+        return None, MAX_COLWIDTH, None
     try:
-        import pandas as pd
         if int(pd.__version__.split('.')[0]) < 1:
-            return None, MAX_COLWIDTH_PYTHON_2, None
+            return None, MAX_COLWIDTH, None
     except ImportError:
         pass
     return None, None, None
diff --git a/python/helpers/pydev/_pydevd_bundle/tables/pydevd_polars.py b/python/helpers/pydev/_pydevd_bundle/tables/pydevd_polars.py
index 9d37d67c54ab..01fa7c4671c4 100644
--- a/python/helpers/pydev/_pydevd_bundle/tables/pydevd_polars.py
+++ b/python/helpers/pydev/_pydevd_bundle/tables/pydevd_polars.py
@@ -37,7 +37,8 @@ def get_data(table, use_csv_serialization, start_index=None, end_index=None, for
     # type: (pl.DataFrame, int, int) -> str
     with __create_config(format):
         if use_csv_serialization:
-            return __get_df_slice(table, start_index, end_index).write_csv(null_value = "null")
+            float_precision = _get_float_precision(format)
+            return __get_df_slice(table, start_index, end_index).write_csv(null_value = "null", float_precision=float_precision)
         return table[start_index:end_index]._repr_html_()
 
 
@@ -66,10 +67,9 @@ def __create_config(format=None):
     cfg.set_tbl_cols(-1)  # Unlimited
     cfg.set_tbl_rows(-1)  # Unlimited
     cfg.set_fmt_str_lengths(MAX_COLWIDTH)  # No option to set unlimited, so it's 100_000
-    if format is not None:
-        float_precision = _get_float_precision(format)
-        if float_precision is not None:
-            cfg.set_float_precision(float_precision)
+    float_precision = _get_float_precision(format)
+    if float_precision is not None:
+        cfg.set_float_precision(float_precision)
     return cfg
 
 
@@ -219,7 +219,7 @@ def __get_describe(table):
 
 
 def _get_float_precision(format):
-    # type: (str) -> Union[int, None]
+    # type: (Union[str, None]) -> Union[int, None]
     if isinstance(format, str):
         if format.startswith("%") and format.endswith("f"):
             start = format.find('%.') + 2