Files
openide/python/helpers/pydev/pydev_tests_tables/test_pydev_pandas.py
Natalia.Murycheva 0e20868e92 [PyCharm Tables] PY-80834 Fixed test data and tests
(cherry picked from commit c1c344c53e2ec1ad91e423f59c4f062c25678f39)

IJ-CR-166515

GitOrigin-RevId: 152abd4bbb6df85b9fa3a373f8992898864348e8
2025-06-22 09:32:51 +00:00

723 lines
27 KiB
Python

# Copyright 2000-2023 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
"""
Here we test aux methods for pandas tables handling, namely,
check functions from _pydevd_bundle.tables.pydevd_pandas module.
"""
import pandas as pd
import pytest
import sys
try:
from StringIO import StringIO
except ImportError:
from io import StringIO
from IPython.display import HTML
import _pydevd_bundle.tables.pydevd_pandas as pandas_tables_helpers
from _pydevd_bundle.pydevd_constants import NEXT_VALUE_SEPARATOR
TYPE_BOOL, TYPE_NUMERIC, TYPE_CATEGORICAL = "bool", "numeric", "categorical"
test_data_dir = 'python_' + str(sys.version_info[0]) + '_' + str(sys.version_info[1])
@pytest.fixture
def setup_dataframe():
"""
Here we create a fixture for tests that are related to DataFrames.
Also, we create other auxiliary data
"""
rows_number = 4
df = pd.DataFrame(
{
"A": 1.0,
"B": "foo",
"C": [None] * rows_number,
"D": [1 + 20j] * rows_number,
"E": [1 + 20j] * rows_number,
"F": [None, "bar", 2., 1 + 10j],
"G": [None, "bar", 2., 1 + 10j],
"H": [True, False] * (rows_number // 2),
"bool_with_nan": [True, False, False, None],
"I": pd.Timestamp("20130102"),
"J": pd.Series(1, index=list(range(rows_number)),
dtype="float32"),
"K": pd.Series(range(rows_number),
index=list(range(rows_number)),
dtype="int32"),
"L": pd.Categorical(["test", "train"] * (rows_number // 2)),
"dates": pd.date_range("now", periods=rows_number),
"datetime64[ns]": pd.Timestamp("20010102"),
"datetime64[ns, <tz>]": pd.date_range("20130101", periods=rows_number,
tz="CET"),
"period": pd.Period('2012-1-1', freq='D'),
"category": pd.Series(list("ABCD")).astype("category"),
"interval": pd.interval_range(start=pd.Timestamp("2017-01-01"),
periods=rows_number, freq="W"),
"lists": [[1, 2], [1, 2], [3, 4], [4, 5]],
"dicts": [{1: 2}, {1: 2}, {3: 4}, {4, 5}],
"tuples": [(1, 2), (1, 2), (3, 4), (4, 5)],
}
)
df['datetime64[ns]'] = df['datetime64[ns]'].astype("datetime64[ns]")
df['I'] = df['I'].astype("datetime64[ns]")
df_html = repr(df.head(1).to_html(notebook=True, max_cols=None))
columns_types = [str(df.index.dtype)] + [str(t) for t in df.dtypes]
col_name_to_data_type = {
"A": TYPE_NUMERIC,
"B": TYPE_CATEGORICAL,
"C": [None] * rows_number,
"D": TYPE_CATEGORICAL,
"E": TYPE_CATEGORICAL,
"F": TYPE_CATEGORICAL,
"G": TYPE_CATEGORICAL,
"H": TYPE_BOOL,
"bool_with_nan": TYPE_CATEGORICAL,
"I": TYPE_CATEGORICAL,
"J": TYPE_NUMERIC,
"K": TYPE_NUMERIC,
"L": TYPE_CATEGORICAL,
"dates": TYPE_CATEGORICAL,
"datetime64[ns]": TYPE_CATEGORICAL,
"datetime64[ns, <tz>]": TYPE_CATEGORICAL,
"period": TYPE_CATEGORICAL,
"category": TYPE_CATEGORICAL,
"interval": TYPE_CATEGORICAL,
"lists": TYPE_CATEGORICAL,
"dicts": TYPE_CATEGORICAL,
"tuples": TYPE_CATEGORICAL,
}
return rows_number, df, df_html, columns_types, col_name_to_data_type
@pytest.fixture
def setup_series_no_names():
"""
Here we create a fixture for tests that are related to Series without a name.
"""
return pd.Series([1, 2, 3])
@pytest.fixture
def setup_dataframe_many_columns():
"""
Here we create a fixture for tests that are related to DataFrames.
We check that we don't miss columns for big dataframes
"""
return pd.read_csv('test_data/pandas/dataframe_many_columns_before.csv')
@pytest.fixture
def setup_df_with_big_int_values():
"""
Here we create a fixture for one test.
With that df we check that we catch OverflowError exception in the describe functions.
This number has to be so big.
"""
big_int = 555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555
df = pd.DataFrame({"BitIntValues": [1, 2]})
df["BitIntValues"] = big_int
return df
@pytest.fixture
def setup_dataframe_with_float_values():
if test_data_dir.startswith("python_2"):
df = pd.DataFrame({
"int_col": [1, 2, 3],
"float_col": [1.0, 2.0, None],
"strings": ["f", "s", None],
"list": [[1.1, 2.2], [2.2, 3.3], [4.4, None]],
"complex": [1.0 + 2j, 2.2 + 3j, 4.4 + 5j]
})
else:
df = pd.DataFrame({
"int_col": [1, 2, 3],
"float_col": [1.0, 2.0, None],
"strings": ["f", "s", None],
"dict": [{"age": 30, "height": 5.5},
{"age": 25, "height": 6.1},
{"age": 35, "height": None}],
"list": [[1.1, 2.2], [2.2, 3.3], [4.4, None]],
"complex": [1.0 + 2j, 2.2 + 3j, 4.4 + 5j]
})
return df
# 1
def test_info_command(setup_dataframe):
"""
Here we check the correctness of info command that is invoked via Kotlin.
:param setup_dataframe: fixture/data for the test
"""
rows, df, df_html, cols_types_expected, _ = setup_dataframe
cols_types_actual = pandas_tables_helpers.get_column_types(df)
cols_types_actual = cols_types_actual.split(pandas_tables_helpers.TABLE_TYPE_NEXT_VALUE_SEPARATOR)
assert pandas_tables_helpers.get_type(df) == str(pd.DataFrame)
assert pandas_tables_helpers.get_shape(df) == str(rows)
assert pandas_tables_helpers.get_head(df) == df_html
assert cols_types_actual == cols_types_expected
# 2
def test_get_data_saves_display_options(setup_dataframe):
"""
We check that we don't ruin a user's display options.
:param setup_dataframe: fixture/data for the test
"""
_, df, _, _, _ = setup_dataframe
max_columns_before = pd.get_option('display.max_columns')
max_colwidth_before = pd.get_option('display.max_colwidth')
max_rows_before = pd.get_option('display.max_rows')
pandas_tables_helpers.get_data(df, False, format="%.2f")
max_columns_after = pd.get_option('display.max_columns')
max_colwidth_after = pd.get_option('display.max_colwidth')
max_rows_after = pd.get_option('display.max_rows')
assert max_columns_before == max_columns_after
assert max_colwidth_before == max_colwidth_after
assert max_rows_before == max_rows_after
# 3
def test_display_html_saves_display_options(setup_dataframe):
"""
We check that we don't ruin a user's display options.
:param setup_dataframe: fixture/data for the test
"""
_, df, _, _, _ = setup_dataframe
max_columns_before = pd.get_option('display.max_columns')
max_colwidth_before = pd.get_option('display.max_colwidth')
max_rows_before = pd.get_option('display.max_rows')
pandas_tables_helpers.display_data_html(df, start_index=0, end_index=2)
max_columns_after = pd.get_option('display.max_columns')
max_colwidth_after = pd.get_option('display.max_colwidth')
max_rows_after = pd.get_option('display.max_rows')
assert max_columns_before == max_columns_after
assert max_colwidth_before == max_colwidth_after
assert max_rows_before == max_rows_after
# 4
def test_display_csv_saves_display_options(setup_dataframe):
"""
We check that we don't ruin a user's display options.
:param setup_dataframe: fixture/data for the test
"""
_, df, _, _, _ = setup_dataframe
max_columns_before = pd.get_option('display.max_columns')
max_colwidth_before = pd.get_option('display.max_colwidth')
max_rows_before = pd.get_option('display.max_rows')
pandas_tables_helpers.display_data_csv(df, start_index=0, end_index=2)
max_columns_after = pd.get_option('display.max_columns')
max_colwidth_after = pd.get_option('display.max_colwidth')
max_rows_after = pd.get_option('display.max_rows')
assert max_columns_before == max_columns_after
assert max_colwidth_before == max_colwidth_after
assert max_rows_before == max_rows_after
# 5
def test_convert_to_df_unnamed_series(setup_series_no_names):
"""
In this test we check two methods: __convert_to_df and __get_column_name.
For unnamed pd.Series case.
:param setup_series_no_names: fixture/data for the test
"""
converted_series = pandas_tables_helpers.__convert_to_df(setup_series_no_names)
assert isinstance(converted_series, pd.DataFrame)
assert converted_series.columns[0] == '<unnamed>'
# 6
def test_convert_to_df_common_series(setup_dataframe):
"""
In this test we check two methods: __convert_to_df and __get_column_name.
For a common pd.Series case.
:param setup_dataframe: fixture/data for the test
"""
_, df, _, _, _ = setup_dataframe
for col in df.columns:
converted_series = pandas_tables_helpers.__convert_to_df(df[col])
assert isinstance(converted_series, pd.DataFrame)
assert converted_series.columns[0] == col
# 7
@pytest.mark.skipif(sys.version_info < (3, 0),
reason="TODO: investigate pd.Categorical/complex cases")
def test_get_info_format(setup_dataframe):
"""
We have a common format for the result for dataframe info command.
As a reference of the format here we take getInfoCommandActions from DSTableCommands
print(get_type(initCommandResult))
print('$NEXT_VALUE_SEPARATOR')
print(get_shape(initCommandResult))
print('$NEXT_VALUE_SEPARATOR')
print(get_head(initCommandResult))
print('$NEXT_VALUE_SEPARATOR')
print('$NEXT_VALUE_SEPARATOR')
print(get_column_types(initCommandResult))
Here we check that with pandas_tables_helpers methods can compose expected result
TODO: we also should check this format for pydevd_tables.exec_table_command
TODO: actually, the format is different: in one case we have \n, in other just ''
:param setup_dataframe: fixture/data for the test, dataframe
"""
_, df, _, _, _ = setup_dataframe
# remove "dates" column from df because it uses "now" timestamp for data generating
df = df.drop(columns=['dates', 'interval'])
actual = [pandas_tables_helpers.get_type(df),
NEXT_VALUE_SEPARATOR,
pandas_tables_helpers.get_shape(df),
NEXT_VALUE_SEPARATOR,
pandas_tables_helpers.get_head(df),
NEXT_VALUE_SEPARATOR,
pandas_tables_helpers.get_column_types(df)]
actual = '\n'.join(actual)
__read_expected_from_file_and_compare_with_actual(
actual=actual,
expected_file='test_data/pandas/getInfo_result.txt'
)
# 8
@pytest.mark.skipif(sys.version_info < (3, 0), reason="Different format for Python2")
def test_describe_many_columns_check_html(setup_dataframe_many_columns):
df = setup_dataframe_many_columns
actual = pandas_tables_helpers.get_column_descriptions(df)
__read_expected_from_file_and_compare_with_actual(
actual=actual,
expected_file='test_data/pandas/dataframe_many_columns_describe_after.txt'
)
# 9
def test_describe_shape_numeric_types(setup_dataframe_many_columns):
df = setup_dataframe_many_columns
describe_df = pandas_tables_helpers.__get_describe(df)
# for dataframes with only numeric types in columns we have 10 statistics
assert describe_df.shape[0] == 10
# the number of columns should be the same
assert describe_df.shape[1] == df.shape[1]
# 10
def test_describe_shape_all_types(setup_dataframe):
_, df, _, _, _ = setup_dataframe
if sys.version_info < (3, 0):
df = df.drop(columns=['lists', 'dicts', 'tuples'])
describe_df = pandas_tables_helpers.__get_describe(df)
# for dataframes with different types in columns we have 13/15 statistics
if sys.version_info < (3, 0):
# python2 have 2 additional statistics that we don't use: first and last
assert describe_df.shape[0] == 15
else:
assert describe_df.shape[0] == 13
# the number of columns should be the same
assert describe_df.shape[1] == df.shape[1]
# check that we don't exclude any column from the describe function
assert len(describe_df.columns[describe_df.isna().all()].tolist()) == 0
# 11
def test_get_describe_save_columns(setup_dataframe):
_, df, _, _, _ = setup_dataframe
if sys.version_info < (3, 0):
df = df.drop(columns=['lists', 'dicts', 'tuples'])
describe_df = pandas_tables_helpers.__get_describe(df)
original_columns, describe_columns = df.columns.tolist(), describe_df.columns.tolist()
# the number of columns is the same in described and in original
assert len(original_columns) == len(describe_columns)
# compare columns and it's order
for expected, actual in zip(original_columns, describe_columns):
assert expected == actual
# 12
def test_get_describe_returned_types(setup_dataframe):
_, df, _, _, _ = setup_dataframe
if sys.version_info < (3, 0):
df = df.drop(columns=['lists', 'dicts', 'tuples'])
assert type(pandas_tables_helpers.__get_describe(df)) == pd.DataFrame
assert type(pandas_tables_helpers.__get_describe(df['A'])) == pd.Series
# 13
@pytest.mark.skipif(sys.version_info < (3, 0), reason="Different format for Python2")
def test_describe_series(setup_dataframe):
_, df, _, _, _ = setup_dataframe
resulted = ""
for column in df:
# we skip dates column because its data every time is different
if column != 'dates' and column != 'interval':
described_series = pandas_tables_helpers.__get_describe(df[column])
if described_series is not None:
resulted += str(described_series.to_dict()) + "\n"
else:
resulted += "\n"
__read_expected_from_file_and_compare_with_actual(
actual=resulted,
expected_file='test_data/pandas/' + test_data_dir + '/series_describe.txt'
)
# 14
@pytest.mark.skipif(sys.version_info < (3, 0),
reason="The exception will be raised during df creation in Python2")
def test_overflow_error_is_caught(setup_df_with_big_int_values):
df = setup_df_with_big_int_values
actual_result = pandas_tables_helpers.__get_describe(df)
assert actual_result is not None
expected_result = pd.Series(data={"count": 2}, index=["count"], name="BitIntValues")
assert (actual_result.values == expected_result.values).all()
# 15
def test_vis_data_detecting_column_type(setup_dataframe):
_, df, _, _, col_name_to_data_type = setup_dataframe
for column in df.columns:
col_type = df[column].dtype
if col_name_to_data_type[column] == TYPE_BOOL:
assert pandas_tables_helpers.__is_boolean(col_type) == True, "column is %s, col_type is %s, col_type_kind is %s" % (column, col_type, col_type.kind)
assert pandas_tables_helpers.__is_categorical(df[column], col_type) == False, "column is %s, col_type is %s, col_type_kind is %s" % (column, col_type, col_type.kind)
assert pandas_tables_helpers.__is_numeric(col_type) == False, "column is %s, col_type is %s, col_type_kind is %s" % (column, col_type, col_type.kind)
elif col_name_to_data_type[column] == TYPE_NUMERIC:
assert pandas_tables_helpers.__is_boolean(col_type) == False, "column is %s, col_type is %s, col_type_kind is %s" % (column, col_type, col_type.kind)
assert pandas_tables_helpers.__is_categorical(df[column], col_type) == False, "column is %s, col_type is %s, col_type_kind is %s" % (column, col_type, col_type.kind)
assert pandas_tables_helpers.__is_numeric(col_type) == True, "column is %s, col_type is %s, col_type_kind is %s" % (column, col_type, col_type.kind)
elif col_name_to_data_type[column] == TYPE_CATEGORICAL:
assert pandas_tables_helpers.__is_boolean(col_type) == False, "column is %s, col_type is %s, col_type_kind is %s" % (column, col_type, col_type.kind)
assert pandas_tables_helpers.__is_categorical(df[column], col_type) == True, "column is %s, col_type is %s, col_type_kind is %s" % (column, col_type, col_type.kind)
assert pandas_tables_helpers.__is_numeric(col_type) == False, "column is %s, col_type is %s, col_type_kind is %s" % (column, col_type, col_type.kind)
# 16
def test_vis_data_integer_columns_simple():
test_data = pd.DataFrame({"ints": list(range(10)) + list(range(10))})
actual = pandas_tables_helpers.get_value_occurrences_count(test_data)
__read_expected_from_file_and_compare_with_actual(
actual=actual,
expected_file='test_data/pandas/vis_data_integer_simple.txt'
)
# 17
@pytest.mark.skipif(sys.version_info < (3, 0),reason="")
def test_vis_data_integer_columns_with_bins():
test_data = pd.DataFrame({"ints": list(range(21)) + list(range(21))})
actual = pandas_tables_helpers.get_value_occurrences_count(test_data)
__read_expected_from_file_and_compare_with_actual(
actual=actual,
expected_file='test_data/pandas/vis_data_integer_with_bins.txt'
)
# 18
@pytest.mark.skipif(sys.version_info < (3, 0),reason="")
def test_vis_data_float_columns_simple():
import numpy as np
test_data = pd.DataFrame({"floats": np.arange(0, 1, 0.1)})
actual = pandas_tables_helpers.get_value_occurrences_count(test_data)
__read_expected_from_file_and_compare_with_actual(
actual=actual,
expected_file='test_data/pandas/vis_data_float_simple.txt'
)
# 19
@pytest.mark.skipif(sys.version_info < (3, 0),reason="")
def test_vis_data_float_columns_with_bins():
import numpy as np
test_data = pd.DataFrame({"floats": np.arange(0, 3, 0.1)})
actual = pandas_tables_helpers.get_value_occurrences_count(test_data)
__read_expected_from_file_and_compare_with_actual(
actual=actual,
expected_file='test_data/pandas/vis_data_float_with_bins.txt'
)
# 20
def test_vis_data_bool_column():
test_data_bool = pd.DataFrame({"bools": [True] * 50 + [False] * 25})
actual = pandas_tables_helpers.get_value_occurrences_count(test_data_bool)
__read_expected_from_file_and_compare_with_actual(
actual=actual,
expected_file='test_data/pandas/vis_data_bool_column.txt'
)
# 21
def test_vis_data_bool_with_nan_column():
test_data_bool = pd.DataFrame({"bools": [True] * 50 + [False] * 25 + [None] * 10})
actual = pandas_tables_helpers.get_value_occurrences_count(test_data_bool)
if test_data_dir.startswith('python_2'):
__read_expected_from_file_and_compare_with_actual(
actual=actual,
expected_file='test_data/pandas/python_2_7/vis_data_bool_with_nan_column.txt'
)
else:
__read_expected_from_file_and_compare_with_actual(
actual=actual,
expected_file='test_data/pandas/vis_data_bool_with_nan_column.txt'
)
# 22
def test_vis_data_categorical_column_percentage():
test_data_str = pd.DataFrame({"strs": ["First"] * 50 + ["Second"] * 25})
actual = pandas_tables_helpers.get_value_occurrences_count(test_data_str)
__read_expected_from_file_and_compare_with_actual(
actual=actual,
expected_file='test_data/pandas/vis_data_categorical_column.txt'
)
# 23
def test_vis_data_categorical_column_other():
test_data_str_other = pd.DataFrame({"strs": ["First"] * 50 + ["Second"] * 25 + ["Third"] * 10 + ["Forth"] * 5})
actual = pandas_tables_helpers.get_value_occurrences_count(test_data_str_other)
__read_expected_from_file_and_compare_with_actual(
actual=actual,
expected_file='test_data/pandas/vis_data_categorical_column_other.txt'
)
# 24
def test_vis_data_categorical_column_unique():
test_data_str_unique = pd.DataFrame({"strs": [str(i) for i in range(1000)]})
actual = pandas_tables_helpers.get_value_occurrences_count(test_data_str_unique)
__read_expected_from_file_and_compare_with_actual(
actual=actual,
expected_file='test_data/pandas/vis_data_categorical_column_unique.txt'
)
# 25
def test_vis_data_categorical_column_switch_perc_to_unique():
# we need a column with 49% of unique values
test_data_other = pd.DataFrame({"str": [str(i) for i in range(49)] + ["48"] * 51})
assert pandas_tables_helpers.ColumnVisualisationType.PERCENTAGE in pandas_tables_helpers.get_value_occurrences_count(test_data_other)
# if the share of unique is greater than 50% then we should show "UNIQUE" vis
test_data_unique = pd.DataFrame({"str": [str(i) for i in range(52)] + ["51"] * 49})
assert pandas_tables_helpers.ColumnVisualisationType.UNIQUE in pandas_tables_helpers.get_value_occurrences_count(test_data_unique)
# 26
def test_define_format_function():
assert pandas_tables_helpers.__define_format_function(None) is None
assert pandas_tables_helpers.__define_format_function('null') is None
assert pandas_tables_helpers.__define_format_function('garbage') is None
assert pandas_tables_helpers.__define_format_function(1) is None
format_to_result = {
"%.2f": (1.1, "1.10"),
"%.12f": (1.1, "1.100000000000"),
"%.2e": (1.1, "1.10e+00"),
"%d": (1.1, "1"),
"%d garbage": (1.1, "1 garbage"),
}
for format_str, (float_value, expected_result) in format_to_result.items():
formatter = pandas_tables_helpers.__define_format_function(format_str)
assert formatter is not None
assert callable(formatter)
assert formatter(float_value) == expected_result
# 27
def test_get_tables_display_options():
max_cols, max_colwidth, max_rows = pandas_tables_helpers.__get_tables_display_options()
assert max_cols is None
assert max_rows is None
if sys.version_info < (3, 0) or int(pd.__version__.split('.')[0]) < 1:
assert max_colwidth == pandas_tables_helpers.MAX_COLWIDTH
else:
assert max_colwidth is None
# 28
def test_get_data_float_values_2f(setup_dataframe_with_float_values):
df = setup_dataframe_with_float_values
actual = pandas_tables_helpers.get_data(df, False, 0, 3, format="%.2f")
__read_expected_from_file_and_compare_with_actual(
actual=actual,
expected_file='test_data/pandas/' + test_data_dir + '/get_data_float_values_2f.txt'
)
# 29
def test_get_data_float_values_12f(setup_dataframe_with_float_values):
df = setup_dataframe_with_float_values
actual = pandas_tables_helpers.get_data(df, False, 0, 3, format="%.12f")
__read_expected_from_file_and_compare_with_actual(
actual=actual,
expected_file='test_data/pandas/' + test_data_dir + '/get_data_float_values_12f.txt'
)
# 30
def test_get_data_float_values_2e(setup_dataframe_with_float_values):
df = setup_dataframe_with_float_values
actual = pandas_tables_helpers.get_data(df, False, 0, 3, format="%.2e")
__read_expected_from_file_and_compare_with_actual(
actual=actual,
expected_file='test_data/pandas/' + test_data_dir + '/get_data_float_values_2e.txt'
)
# 31
@pytest.mark.skipif(sys.version_info < (3, 0), reason="%d doesn't work with np.float('nan')")
def test_get_data_float_values_d(setup_dataframe_with_float_values):
df = setup_dataframe_with_float_values
actual = pandas_tables_helpers.get_data(df, False, 0, 3, format="%d")
__read_expected_from_file_and_compare_with_actual(
actual=actual,
expected_file='test_data/pandas/' + test_data_dir + '/get_data_float_values_d.txt'
)
# 32
@pytest.mark.skipif(sys.version_info < (3, 0), reason="%d doesn't work with np.float('nan')")
def test_get_data_float_values_d_garbage(setup_dataframe_with_float_values):
df = setup_dataframe_with_float_values
actual = pandas_tables_helpers.get_data(df, False, 0, 3, format="%d garbage")
__read_expected_from_file_and_compare_with_actual(
actual=actual,
expected_file='test_data/pandas/' + test_data_dir + '/get_data_float_values_d_garbage.txt'
)
# 33
def test_display_data_html_df(mocker, setup_dataframe):
_, df, _, _, _ = setup_dataframe
df = df.drop(columns=['dates'])
# Mock the HTML and display functions
mock_display = mocker.patch('IPython.display.display')
pandas_tables_helpers.display_data_html(df, 0, 16)
called_args, called_kwargs = mock_display.call_args
displayed_html = called_args[0]
__read_expected_from_file_and_compare_with_actual(
actual=displayed_html.data,
expected_file='test_data/pandas/' + test_data_dir + '/display_data_html_df.txt'
)
# 34
def test_display_data_csv_df(mocker, setup_dataframe):
_, df, _, _, _ = setup_dataframe
df = df.drop(columns=['dates'])
# Mock the CSV and display functions
mock_print = mocker.patch('sys.stdout', new_callable=StringIO)
pandas_tables_helpers.display_data_csv(df, 0, 16)
displayed_csv = mock_print.getvalue()
__read_expected_from_file_and_compare_with_actual(
actual=displayed_csv,
expected_file='test_data/pandas/' + test_data_dir + '/display_data_csv_df.txt'
)
# 35
def test_display_data_html_df_with_float_values(mocker, setup_dataframe_with_float_values):
df = setup_dataframe_with_float_values
# Mock the HTML and display functions
mock_display = mocker.patch('IPython.display.display')
pandas_tables_helpers.display_data_html(df, 0, 3)
called_args, called_kwargs = mock_display.call_args
displayed_html = called_args[0]
__read_expected_from_file_and_compare_with_actual(
actual=displayed_html.data,
expected_file='test_data/pandas/' + test_data_dir + '/display_data_html_df_with_float_values.txt'
)
# 36
def test_display_data_csv_df_with_float_values(mocker, setup_dataframe_with_float_values):
df = setup_dataframe_with_float_values
# Mock the CSV and display functions
mock_print = mocker.patch('sys.stdout', new_callable=StringIO)
pandas_tables_helpers.display_data_csv(df, 0, 3)
displayed_csv = mock_print.getvalue()
__read_expected_from_file_and_compare_with_actual(
actual=displayed_csv,
expected_file='test_data/pandas/' + test_data_dir + '/display_data_csv_df_with_float_values.txt'
)
def __prepare_describe_result(described_str):
"""
This function is needed with the aim not to be depended on the python version,
there is different indentation in different python versions.
We check only the data, not the indentation.
"""
# type: (str) -> (str)
result = []
for line in described_str.split("\n"):
result.append(" ".join(line.split()))
return "\n".join(result)
def __read_expected_from_file_and_compare_with_actual(actual, expected_file):
with open(expected_file, 'r') as in_f:
expected = in_f.read()
assert len(expected) > 0, "The expected file is empty"
# for a more convenient assertion fails messages here we compare string char by char
for ind, (act, exp) in enumerate(zip(actual, expected)):
assert act == exp, "\nindex is %s \n\n act part = %s \n\n exp part = %s\n" % (ind, actual, expected)