mirror of
https://gitflic.ru/project/openide/openide.git
synced 2026-03-22 15:19:59 +07:00
[PyCharm Tables] PY-76723 Fixed Multiple problems in CSV serialization #PY-76723 Fixed
* see the YT issue for more details (cherry picked from commit 676a021e221c430c6fd3b600640d7aec1503239e) (cherry picked from commit 5c8a478a2cc3edb148b684b48dfaf85f9d50627f) IJ-CR-147319 GitOrigin-RevId: 3d5caa7d348861bc904335db3ff0b1edef2a17b1
This commit is contained in:
committed by
intellij-monorepo-bot
parent
d3c97e24b0
commit
c347819edc
@@ -37,7 +37,7 @@ def get_data(table, use_csv_serialization, start_index=None, end_index=None, for
|
||||
# type: (datasets.arrow_dataset.Dataset, int, int) -> str
|
||||
|
||||
def convert_data_to_csv(data):
|
||||
return repr(data.to_csv(na_rep = "NaN"))
|
||||
return repr(data.to_csv(na_rep = "NaN", float_format=format))
|
||||
|
||||
def convert_data_to_html(data):
|
||||
return repr(data.to_html(notebook=True))
|
||||
|
||||
@@ -53,7 +53,7 @@ def get_data(arr, use_csv_serialization, start_index=None, end_index=None, forma
|
||||
return repr(_create_table(data, start_index, end_index, format).to_html(notebook=True))
|
||||
|
||||
def convert_data_to_csv(data):
|
||||
return repr(_create_table(data, start_index, end_index, format).to_csv())
|
||||
return repr(_create_table(data, start_index, end_index, format).to_csv(na_rep = "None", float_format=format))
|
||||
|
||||
if use_csv_serialization:
|
||||
computed_data = _compute_data(arr, convert_data_to_csv, format)
|
||||
@@ -74,7 +74,7 @@ def display_data_html(arr, start_index=None, end_index=None):
|
||||
def display_data_csv(arr, start_index=None, end_index=None):
|
||||
# type: (np.ndarray, int, int) -> None
|
||||
def ipython_display(data):
|
||||
print(_create_table(data, start_index, end_index).to_csv())
|
||||
print(_create_table(data, start_index, end_index).to_csv(na_rep = "None"))
|
||||
|
||||
_compute_data(arr, ipython_display)
|
||||
|
||||
@@ -151,7 +151,7 @@ class _NpTable:
|
||||
html.append('<tr>\n')
|
||||
html.append('<th>{}</th>\n'.format(int(self.indexes[row_num])))
|
||||
if self.type == ONE_DIM:
|
||||
if self.format is not None:
|
||||
if self.format is not None and self.array[row_num] is not None:
|
||||
value = self.format % self.array[row_num]
|
||||
else:
|
||||
value = self.array[row_num]
|
||||
@@ -160,7 +160,7 @@ class _NpTable:
|
||||
cols = len(self.array[0])
|
||||
max_cols = cols if max_cols is None else min(max_cols, cols)
|
||||
for col_num in range(max_cols):
|
||||
if self.format is not None:
|
||||
if self.format is not None and self.array[row_num][col_num] is not None:
|
||||
value = self.format % self.array[row_num][col_num]
|
||||
else:
|
||||
value = self.array[row_num][col_num]
|
||||
@@ -170,11 +170,36 @@ class _NpTable:
|
||||
return html
|
||||
|
||||
|
||||
def to_csv(self):
|
||||
def to_csv(self, na_rep = "None", float_format=None):
|
||||
csv_stream = io.StringIO()
|
||||
np.savetxt(csv_stream, self.array, delimiter=',')
|
||||
np_array_without_nones = np.where(self.array == None, np.nan, self.array)
|
||||
if float_format is None or float_format == 'null':
|
||||
float_format = "%s"
|
||||
|
||||
np.savetxt(csv_stream, np_array_without_nones, delimiter=',', fmt=float_format)
|
||||
csv_string = csv_stream.getvalue()
|
||||
return csv_string
|
||||
csv_rows_with_index = self._insert_index_at_rows_begging_csv(csv_string)
|
||||
|
||||
col_names = self._collect_col_names_csv()
|
||||
return col_names + "\n" + csv_rows_with_index
|
||||
|
||||
def _insert_index_at_rows_begging_csv(self, csv_string):
|
||||
# type: (str) -> str
|
||||
csv_rows = csv_string.split('\n')
|
||||
csv_rows_with_index = []
|
||||
for row_index in range(self.array.shape[0]):
|
||||
csv_rows_with_index.append(str(row_index) + "," + csv_rows[row_index])
|
||||
return "\n".join(csv_rows_with_index)
|
||||
|
||||
def _collect_col_names_csv(self):
|
||||
if self.type == ONE_DIM:
|
||||
return ",0"
|
||||
|
||||
if self.type == WITH_TYPES:
|
||||
return "," + ",".join(['{}'.format(name) for name in self.array.dtype.names])
|
||||
|
||||
# TWO_DIM
|
||||
return "," + ",".join(['{}'.format(i) for i in range(self.array.shape[1])])
|
||||
|
||||
|
||||
def slice(self, start_index=None, end_index=None):
|
||||
@@ -259,10 +284,19 @@ def _create_table(command, start_index=None, end_index=None, format=None):
|
||||
np_array = command
|
||||
|
||||
if is_pd:
|
||||
sorting_arr = _sort_df(pd.DataFrame(np_array), sort_keys)
|
||||
sorted_df = _sort_df(pd.DataFrame(np_array), sort_keys)
|
||||
if start_index is not None and end_index is not None:
|
||||
return sorting_arr.iloc[start_index:end_index]
|
||||
return sorting_arr
|
||||
sorted_df_slice = sorted_df.iloc[start_index:end_index]
|
||||
# to apply "format" we should not have None inside DFs
|
||||
try:
|
||||
import warnings
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore")
|
||||
sorted_df_slice = sorted_df_slice.fillna("None")
|
||||
except Exception as _:
|
||||
pass
|
||||
return sorted_df_slice
|
||||
return sorted_df
|
||||
|
||||
return _NpTable(np_array, format=format).sort(sort_keys).slice(start_index, end_index)
|
||||
|
||||
@@ -295,8 +329,8 @@ def __get_tables_display_options():
|
||||
try:
|
||||
import pandas as pd
|
||||
if int(pd.__version__.split('.')[0]) < 1:
|
||||
return None, MAX_COLWIDTH_PYTHON_2, None
|
||||
except ImportError:
|
||||
return None, MAX_COLWIDTH, None
|
||||
except Exception:
|
||||
pass
|
||||
return None, None, None
|
||||
|
||||
|
||||
@@ -45,12 +45,12 @@ def get_column_types(arr):
|
||||
|
||||
|
||||
def get_data(arr, use_csv_serialization, start_index=None, end_index=None, format=None):
|
||||
# type: (Union[np.ndarray, dict], int, int) -> str
|
||||
# type: (Union[np.ndarray, dict], bool, Union[int, None], Union[int, None], Union[str, None]) -> str
|
||||
def convert_data_to_html(data):
|
||||
return repr(_create_table(data, start_index, end_index, format).to_html(notebook=True))
|
||||
|
||||
def convert_data_to_csv(data):
|
||||
return repr(_create_table(data, start_index, end_index, format).to_csv())
|
||||
return repr(_create_table(data, start_index, end_index, format).to_csv(na_rep = "None", float_format=format))
|
||||
|
||||
if use_csv_serialization:
|
||||
computed_data = _compute_data(arr, convert_data_to_csv, format)
|
||||
@@ -71,7 +71,7 @@ def display_data_html(arr, start_index=None, end_index=None):
|
||||
def display_data_csv(arr, start_index=None, end_index=None):
|
||||
# type: (np.ndarray, int, int) -> None
|
||||
def ipython_display(data):
|
||||
print(_create_table(data, start_index, end_index).to_csv())
|
||||
print(_create_table(data, start_index, end_index).to_csv(na_rep = "None"))
|
||||
|
||||
_compute_data(arr, ipython_display)
|
||||
|
||||
@@ -84,13 +84,17 @@ class _NpTable:
|
||||
self.format = format
|
||||
|
||||
def get_array_type(self):
|
||||
if self.array.ndim > 1:
|
||||
if len(self.array.shape) > 1:
|
||||
return TWO_DIM
|
||||
|
||||
return ONE_DIM
|
||||
|
||||
def get_cols_types(self):
|
||||
col_type = self.array.dtype
|
||||
dtype = self.array.dtype
|
||||
if "torch" in str(dtype):
|
||||
col_type = dtype
|
||||
else:
|
||||
col_type = dtype.name
|
||||
|
||||
if self.type == ONE_DIM:
|
||||
# [1, 2, 3] -> [int]
|
||||
@@ -123,12 +127,6 @@ class _NpTable:
|
||||
|
||||
return "".join(html)
|
||||
|
||||
def to_csv(self):
|
||||
csv_stream = io.StringIO()
|
||||
np.savetxt(csv_stream, self.array, delimiter=',')
|
||||
csv_string = csv_stream.getvalue()
|
||||
return csv_string
|
||||
|
||||
def _collect_cols_names_html(self):
|
||||
if self.type == ONE_DIM:
|
||||
return ['<th>0</th>\n']
|
||||
@@ -142,7 +140,8 @@ class _NpTable:
|
||||
html.append('<tr>\n')
|
||||
html.append('<th>{}</th>\n'.format(int(self.indexes[row_num])))
|
||||
if self.type == ONE_DIM:
|
||||
if self.format is not None:
|
||||
# None usually is not supported in tensors, but to be totally sure
|
||||
if self.format is not None and self.array[row_num] is not None:
|
||||
value = self.format % self.array[row_num]
|
||||
else:
|
||||
value = self.array[row_num]
|
||||
@@ -151,7 +150,7 @@ class _NpTable:
|
||||
cols = len(self.array[0])
|
||||
max_cols = cols if max_cols is None else min(max_cols, cols)
|
||||
for col_num in range(max_cols):
|
||||
if self.format is not None:
|
||||
if self.format is not None and self.array[row_num][col_num]:
|
||||
value = self.format % self.array[row_num][col_num]
|
||||
else:
|
||||
value = self.array[row_num][col_num]
|
||||
@@ -160,6 +159,33 @@ class _NpTable:
|
||||
html.append('</tbody>\n')
|
||||
return html
|
||||
|
||||
def to_csv(self, na_rep = "None", float_format=None):
|
||||
csv_stream = io.StringIO()
|
||||
if float_format is None or float_format == 'null':
|
||||
float_format = "%s"
|
||||
|
||||
np.savetxt(csv_stream, self.array, delimiter=',', fmt=float_format)
|
||||
csv_string = csv_stream.getvalue()
|
||||
csv_rows_with_index = self._insert_index_at_rows_begging_csv(csv_string)
|
||||
|
||||
col_names = self._collect_col_names_csv()
|
||||
return col_names + "\n" + csv_rows_with_index
|
||||
|
||||
def _insert_index_at_rows_begging_csv(self, csv_string):
|
||||
# type: (str) -> str
|
||||
csv_rows = csv_string.split('\n')
|
||||
csv_rows_with_index = []
|
||||
for row_index in range(self.array.shape[0]):
|
||||
csv_rows_with_index.append(str(row_index) + "," + csv_rows[row_index])
|
||||
return "\n".join(csv_rows_with_index)
|
||||
|
||||
def _collect_col_names_csv(self):
|
||||
if self.type == ONE_DIM:
|
||||
return ",0"
|
||||
|
||||
# TWO_DIM
|
||||
return "," + ",".join(['{}'.format(i) for i in range(self.array.shape[1])])
|
||||
|
||||
def slice(self, start_index=None, end_index=None):
|
||||
if end_index is not None and start_index is not None:
|
||||
self.array = self.array[start_index:end_index]
|
||||
@@ -271,6 +297,12 @@ def __get_tables_display_options():
|
||||
import sys
|
||||
if sys.version_info < (3, 0):
|
||||
return None, MAX_COLWIDTH, None
|
||||
try:
|
||||
import pandas as pd
|
||||
if int(pd.__version__.split('.')[0]) < 1:
|
||||
return None, MAX_COLWIDTH, None
|
||||
except Exception:
|
||||
pass
|
||||
return None, None, None
|
||||
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ import pandas as pd
|
||||
import typing
|
||||
|
||||
TABLE_TYPE_NEXT_VALUE_SEPARATOR = '__pydev_table_column_type_val__'
|
||||
MAX_COLWIDTH_PYTHON_2 = 100000
|
||||
MAX_COLWIDTH = 100000
|
||||
|
||||
|
||||
def get_type(table):
|
||||
@@ -38,7 +38,7 @@ def get_data(table, use_csv_serialization, start_index=None, end_index=None, for
|
||||
# type: (Union[pd.DataFrame, pd.Series], int, int) -> str
|
||||
|
||||
def convert_data_to_csv(data):
|
||||
return repr(__convert_to_df(data).to_csv(na_rep = "NaN"))
|
||||
return repr(__convert_to_df(data).to_csv(na_rep = "NaN", float_format=format))
|
||||
|
||||
def convert_data_to_html(data):
|
||||
return repr(__convert_to_df(data).to_html(notebook=True))
|
||||
@@ -295,11 +295,10 @@ def __get_tables_display_options():
|
||||
# type: () -> Tuple[None, Union[int, None], None]
|
||||
import sys
|
||||
if sys.version_info < (3, 0):
|
||||
return None, MAX_COLWIDTH_PYTHON_2, None
|
||||
return None, MAX_COLWIDTH, None
|
||||
try:
|
||||
import pandas as pd
|
||||
if int(pd.__version__.split('.')[0]) < 1:
|
||||
return None, MAX_COLWIDTH_PYTHON_2, None
|
||||
return None, MAX_COLWIDTH, None
|
||||
except ImportError:
|
||||
pass
|
||||
return None, None, None
|
||||
|
||||
@@ -37,7 +37,8 @@ def get_data(table, use_csv_serialization, start_index=None, end_index=None, for
|
||||
# type: (pl.DataFrame, int, int) -> str
|
||||
with __create_config(format):
|
||||
if use_csv_serialization:
|
||||
return __get_df_slice(table, start_index, end_index).write_csv(null_value = "null")
|
||||
float_precision = _get_float_precision(format)
|
||||
return __get_df_slice(table, start_index, end_index).write_csv(null_value = "null", float_precision=float_precision)
|
||||
return table[start_index:end_index]._repr_html_()
|
||||
|
||||
|
||||
@@ -66,10 +67,9 @@ def __create_config(format=None):
|
||||
cfg.set_tbl_cols(-1) # Unlimited
|
||||
cfg.set_tbl_rows(-1) # Unlimited
|
||||
cfg.set_fmt_str_lengths(MAX_COLWIDTH) # No option to set unlimited, so it's 100_000
|
||||
if format is not None:
|
||||
float_precision = _get_float_precision(format)
|
||||
if float_precision is not None:
|
||||
cfg.set_float_precision(float_precision)
|
||||
float_precision = _get_float_precision(format)
|
||||
if float_precision is not None:
|
||||
cfg.set_float_precision(float_precision)
|
||||
return cfg
|
||||
|
||||
|
||||
@@ -219,7 +219,7 @@ def __get_describe(table):
|
||||
|
||||
|
||||
def _get_float_precision(format):
|
||||
# type: (str) -> Union[int, None]
|
||||
# type: (Union[str, None]) -> Union[int, None]
|
||||
if isinstance(format, str):
|
||||
if format.startswith("%") and format.endswith("f"):
|
||||
start = format.find('%.') + 2
|
||||
|
||||
Reference in New Issue
Block a user