Source code for kdiagram.utils.q_utils

# -*- coding: utf-8 -*-
#   License: Apache 2.0 
#   Author: LKouadio <etanoyau@gmail.com>

"""
Provides utility functions for quantile extraction and validation.
"""
from __future__ import annotations 

import re 
import warnings 
from typing import List, Optional, Union, Tuple 

import pandas as pd 

from ..decorators import SaveFile, check_non_emptiness
from .diagnose_q import validate_quantiles 
from .generic_utils import error_policy 
from .handlers import columns_manager 
from .validator import is_frame, exist_features, check_spatial_columns  

__all__ =["reshape_quantile_data", "melt_q_data", "pivot_q_data"]


[docs]
@SaveFile 
@check_non_emptiness 
def reshape_quantile_data(
    df: pd.DataFrame,
    value_prefix: str,
    spatial_cols: Optional[List[str]] = None,
    dt_col: str = 'year',
    error: str = 'warn',
    savefile: Optional[str] = None, 
    verbose: int = 0, 
) -> pd.DataFrame:
    r"""
    Reshape a wide-format DataFrame with quantile columns into a 
    DataFrame where the quantiles are separated into distinct 
    columns for each quantile value.

    This method transforms columns that follow the naming pattern 
    ``{value_prefix}_{dt_value}_q{quantile}`` into a structured format,
    preserving spatial coordinates and adding the temporal dimension
    based on extracted datetime values.

    Parameters
    ----------
    df : pd.DataFrame
        Input DataFrame containing quantile columns. The columns should 
        follow the pattern ``{value_prefix}_{dt_val}_q{quantile}``, where:
        - `value_prefix` is the base name for the quantile measurement
          (e.g., ``'predicted_subsidence'``)
        - `dt_val` is the datetime value (e.g., year or month)
        - `quantile` is the quantile value (e.g., 0.1, 0.5, 0.9)
    value_prefix : str
        Base name for quantile measurement columns (e.g., 
        ``'predicted_subsidence'``). This is used to identify the 
        quantile columns in the DataFrame.
    spatial_cols : list of str, optional
        List of spatial column names (e.g., ``['longitude', 'latitude']``).
        These columns will be preserved through the reshaping operations.
        If `None`, the default columns (e.g., ``['longitude', 'latitude']``)
        will be used.
    dt_col : str, default='year'
        Name of the column that will contain the extracted temporal 
        information (e.g., 'year'). This will be used as a column in the
        output DataFrame for temporal dimension tracking.
    error : {'raise', 'warn', 'ignore'}, default='warn'
        Specifies how to handle errors when certain columns or data 
        patterns are not found. Options include:
        - ``'raise'``: Raises a ValueError with a message if columns are missing.
        - ``'warn'``: Issues a warning with a message if columns are missing.
        - ``'ignore'``: Silently returns an empty DataFrame when issues are found.
    savefile : str, optional
        Path to save the reshaped DataFrame. If provided, the DataFrame
        will be saved to this location.
    verbose : int, default=0
        Level of verbosity for progress messages. Higher values 
        correspond to more detailed output during processing:
        - 0: Silent
        - 1: Basic progress
        - 2: Column parsing details
        - 3: Metadata extraction
        - 4: Reshaping steps
        - 5: Full debug

    Returns
    -------
    pd.DataFrame
        A reshaped DataFrame with quantiles as separate columns for each 
        quantile value. The DataFrame will have the following columns:
        - Spatial columns (if any)
        - Temporal column (specified by ``dt_col``)
        - ``{value_prefix}_q{quantile}`` value columns for each quantile

    Examples
    --------
    >>> from kdiagram.utils.q_utils import reshape_quantile_data
    >>> import pandas as pd
    >>> wide_df = pd.DataFrame({
    ...     'lon': [-118.25, -118.30],
    ...     'lat': [34.05, 34.10],
    ...     'subs_2022_q0.1': [1.2, 1.3],
    ...     'subs_2022_q0.5': [1.5, 1.6],
    ...     'subs_2023_q0.1': [1.7, 1.8]
    ... })
    >>> reshaped_df = reshape_quantile_data(wide_df, 'subs')
    >>> reshaped_df.columns
    Index(['lon', 'lat', 'year', 'subs_q0.1', 'subs_q0.5'], dtype='object')

    Notes
    -----
    - The column names must follow the pattern 
      ``{value_prefix}_{dt_value}_q{quantile}`` for proper extraction.
    - The temporal dimension is determined by the ``dt_col`` argument.
    - Spatial columns are automatically detected or can be passed explicitly.
    - The quantiles are pivoted and separated into distinct columns 
      based on the unique quantile values found in the DataFrame.
      
    .. math::

        \mathbf{W}_{m \times n} \rightarrow \mathbf{L}_{p \times k}

    Where:
    - :math:`m` = Original row count
    - :math:`n` = Original columns (quantile + spatial + temporal)
    - :math:`p` = :math:`m \times t` (t = unique temporal values)
    - :math:`k` = Spatial cols + 1 temporal + q quantile cols

    
    See Also
    --------
    pandas.melt : For reshaping DataFrames from wide to long format.
    gofast.utils.validator.melt_q_data : Alternative method for reshaping quantile data.
    gofast.utils.validator.handle_error : Error handling utility for reshaping functions.
    
    References
    ----------
    .. [1] McKinney, W. (2010). "Data Structures for Statistical Computing
           in Python". Proceedings of the 9th Python in Science Conference.
    .. [2] Wickham, H. (2014). "Tidy Data". Journal of Statistical Software,
           59(10), 1-23.
    """
    is_frame(df, df_only=True, objname="Data 'df'") 
    
    if spatial_cols:
        missing_spatial = set(spatial_cols) - set(df.columns)
        if missing_spatial:
            msg = f"Missing spatial columns: {missing_spatial}"
            if error == 'raise':
                raise ValueError(msg)
            elif error == 'warn':
                warnings.warn(msg)
            spatial_cols = list(set(spatial_cols) & set(df.columns))

    # Find quantile columns
    quant_cols = [col for col in df.columns 
                 if col.startswith(value_prefix)]
    
    if not quant_cols:
        msg = f"No columns found with prefix '{value_prefix}'"
        if error == 'raise':
            raise ValueError(msg)
        elif error == 'warn':
            warnings.warn(msg)
        return pd.DataFrame()

    # Extract metadata from column names
    pattern = re.compile(
        rf"{re.escape(value_prefix)}_(\d{{4}})_q([0-9.]+)$"
    )
    
    meta = []
    valid_cols = []
    for col in quant_cols:
        match = pattern.match(col)
        if match:
            year, quantile = match.groups()
            meta.append((col, int(year), float(quantile)))
            valid_cols.append(col)

    if verbose >= 1:
        print(f"Found {len(valid_cols)} valid quantile columns")

    if not valid_cols:
        return pd.DataFrame()

    # Melt dataframe
    id_vars = spatial_cols if spatial_cols else []
    melt_df = df.melt(
        id_vars=id_vars,
        value_vars=valid_cols,
        var_name='column',
        value_name='value'
    )

    # Add metadata columns
    meta_df = pd.DataFrame(
        meta, columns=['column', dt_col, 'quantile']
    )
    melt_df = melt_df.merge(meta_df, on='column')

    # Pivot to wide format
    pivot_df = melt_df.pivot_table(
        index=id_vars + [dt_col],
        columns='quantile',
        values='value',
        aggfunc='first'
    ).reset_index()

    # Clean column names
    pivot_df.columns = [
        f"{value_prefix}_q{col}" if isinstance(col, float) else col 
        for col in pivot_df.columns
    ]

    return pivot_df.sort_values(
        by=dt_col, ascending=True
    ).reset_index(drop=True)




[docs]
@SaveFile
@check_non_emptiness 
def melt_q_data(
    df: pd.DataFrame,
    value_prefix: Optional[str]=None,
    dt_name: str = 'dt_col',
    q: Optional[List[Union[float, str]]] = None,
    error: str = 'raise',
    sort_values: Optional[str]=None, 
    spatial_cols: Optional[Tuple[str, str]] = None,
    savefile: Optional[str] = None,
    verbose: int = 0
) -> pd.DataFrame:
    r"""
    Reshape wide-format DataFrame with quantile columns to long format 
    with explicit temporal and quantile dimensions.

    This method transforms columns that follow the naming pattern 
    ``{value_prefix}_{dt_value}_q{quantile}`` into a structured long format
    with separated datetime and quantile columns. Handles spatial 
    coordinates preservation through reshaping operations.

    Parameters
    ----------
    df : pd.DataFrame
        Input DataFrame containing quantile columns. The columns should 
        follow the pattern ``{value_prefix}_{dt_val}_q{quantile}``, where:
        - `value_prefix` is the base name for the quantile measurement
          (e.g., ``'predicted_subsidence'``)
        - `dt_val` is the datetime value (e.g., year or month)
        - `quantile` is the quantile value (e.g., 0.1, 0.5, 0.9)
    value_prefix : str
        Base name for quantile measurement columns (e.g., 
        ``'predicted_subsidence'``). This is used to identify the 
        quantile columns in the DataFrame.
    dt_name : str, default='dt_col'
        Name of the column that will contain the extracted temporal 
        information (e.g., 'year'). This will be used as a column in the
        output DataFrame for temporal dimension tracking.
    q : list of float/str, optional
        Specific quantiles to include. Accepts:
        - Float values (0.1, 0.5, 0.9)
        - Percentage strings ("10%", "90%")
        - None (include all detected quantiles)
    error : {'raise', 'warn', 'ignore'}, default='raise'
        Specifies how to handle errors when certain columns or data 
        patterns are not found. Options include:
        - ``'raise'``: Raises a ValueError with a message if columns are missing.
        - ``'warn'``: Issues a warning with a message if columns are missing.
        - ``'ignore'``: Silently returns an empty DataFrame when issues are found.
    sort_values : str, optional
        If provided, the final pivoted DataFrame is sorted by this column.
        If the column does not exist and `verbose` >= 1, the function
        warns and does not sort.
    spatial_cols : tuple of str, optional
        Columns corresponding to spatial coordinates (e.g.,
        ``('lon', 'lat')``). These are retained as part of the
        index when the DataFrame is pivoted.
    savefile : str, optional
        Path to save the reshaped DataFrame. If provided, the DataFrame
        will be saved to this location.
    verbose : int, default=0
        Level of verbosity for progress messages. Higher values 
        correspond to more detailed output during processing:
        - 0: Silent
        - 1: Basic progress
        - 2: Column parsing details
        - 3: Metadata extraction
        - 4: Reshaping steps
        - 5: Full debug

    Returns
    -------
    pd.DataFrame
        A long-format DataFrame with quantiles as separate columns for 
        each quantile value. The DataFrame will have the following columns:
        - Spatial columns (if any)
        - Temporal column (specified by ``dt_name``)
        - ``{value_prefix}_q{quantile}`` value columns for each quantile

    Examples
    --------
    >>> from kdiagram.utils.q_utils import melt_q_data
    >>> import pandas as pd
    >>> wide_df = pd.DataFrame({
    ...     'lon': [-118.25, -118.30],
    ...     'lat': [34.05, 34.10],
    ...     'subs_2022_q0.1': [1.2, 1.3],
    ...     'subs_2022_q0.5': [1.5, 1.6],
    ...     'subs_2023_q0.9': [1.7, 1.8]
    ... })
    >>> long_df = melt_q_data(wide_df, 'subs', dt_name='year')
    >>> long_df
    Out[113]: 
       year  subs_q0.1  subs_q0.5  subs_q0.9
    0  2022        1.2        1.5        NaN
    1  2023        NaN        NaN        1.7
    
    >>> long_df.columns
    Index(['lon', 'lat', 'year', 'subs_q0.1', 'subs_q0.5'], dtype='object')

    >>> long_df = melt_q_data(wide_df, 'subs', dt_name='year',
    ...                      spatial_cols=('lon', 'lat')) 
    >>> long_df
    Out[115]: 
          lon    lat  year  subs_q0.1  subs_q0.5  subs_q0.9
    0 -118.30  34.10  2022        1.3        1.6        NaN
    1 -118.30  34.10  2023        NaN        NaN        1.8
    2 -118.25  34.05  2022        1.2        1.5        NaN
    3 -118.25  34.05  2023        NaN        NaN        1.7
    
    Notes
    -----
    - The column names must follow the pattern 
      ``{value_prefix}_{dt_value}_q{quantile}`` for proper extraction.
    - The temporal dimension is determined by the ``dt_name`` argument.
    - Spatial columns are automatically detected or can be passed explicitly.
    - The quantiles are pivoted and separated into distinct columns 
      based on the unique quantile values found in the DataFrame.
      
    .. math::

        \mathbf{W}_{m \times n} \rightarrow \mathbf{L}_{p \times k}

    Where:
    - :math:`m` = Original row count
    - :math:`n` = Original columns (quantile + spatial + temporal)
    - :math:`p` = :math:`m \times t` (t = unique temporal values)
    - :math:`k` = Spatial cols + 1 temporal + q quantile cols

    See Also
    --------
    pandas.melt : For reshaping DataFrames from wide to long format.
    kdiagram.utils.q_utils.reshape_quantile_data : 
        Alternative method for reshaping quantile data.


    References
    ----------
    .. [1] McKinney, W. (2010). "Data Structures for Statistical Computing
           in Python". Proceedings of the 9th Python in Science Conference.
    .. [2] Wickham, H. (2014). "Tidy Data". Journal of Statistical Software,
           59(10), 1-23.
    """
    # Validate error handling
    error = error_policy(
        error,
        base="warn",
        msg="error must be one of 'raise','warn', or 'ignore'"
    )

    is_frame(df, df_only=True, objname="Data 'df'") 

    # Compile regex to match columns like: {value_prefix}_{dt_val}_q{quantile}
    pattern = re.compile(
        rf"^{re.escape(value_prefix)}_(\d+)_q([0-9.]+)$"
    )

    # Collect matching columns & metadata
    meta = []
    quant_cols = []
    for col in df.columns:
        match = pattern.match(col)
        if match:
            dt_val, q_val = match.groups()
            meta.append((col, dt_val, float(q_val)))
            quant_cols.append(col)

    if verbose >= 2:
        print(
            f"[INFO] Found {len(quant_cols)} quantile columns "
            f"for prefix '{value_prefix}'."
        )

    # Handle case: no matched columns
    if not quant_cols:
        msg = (
            f"No columns found with prefix '{value_prefix}' "
            "following the pattern {prefix}_{dt_val}_q{quant}"
        )
        handle_error(msg, error)
        return pd.DataFrame()

    # Filter by requested quantiles if needed
    if q is not None:
        # skip doc; assume validate_quantiles is imported
        valid_q = validate_quantiles(
            q, mode='soft', dtype='float64'
        )
        # Convert all to float for comparison
        q_floats = [float(x) for x in valid_q]
        new_meta = [
            (c, d, v) for (c, d, v) in meta if v in q_floats
        ]
        if not new_meta:
            msg = f"No columns match requested quantiles {q}"
            handle_error(msg, error)
            return pd.DataFrame()
        meta = new_meta
        quant_cols = [m[0] for m in meta]

    # Detect or validate spatial columns
    # skip doc; assume columns_manager & check_spatial_columns are imported
    spatial_cols = columns_manager(spatial_cols, empty_as_none=False)
    if spatial_cols:
        check_spatial_columns(df, spatial_cols)
        if verbose >= 2:
            print(
                "[INFO] Spatial columns detected: "
                f"{spatial_cols}"
            )

    # Prepare for melting
    id_vars = list(spatial_cols) if spatial_cols else []
    # Melt only the quantile columns
    melt_df = df.melt(
        id_vars=id_vars,
        value_vars=quant_cols,
        var_name='column',
        value_name=value_prefix
    )
    if verbose >= 4:
        print(
            "[DEBUG] After melt, shape: "
            f"{melt_df.shape}"
        )

    # Merge with metadata (columns -> dt & quantile)
    meta_df = pd.DataFrame(
        meta, columns=['column', dt_name, 'quantile']
    )
    merged_df = melt_df.merge(meta_df, on='column', how='left')

    # Pivot with (spatial + dt_name) as index, 'quantile' as columns
    pivot_index = id_vars + [dt_name] if id_vars else [dt_name]
    pivot_df = merged_df.pivot_table(
        index=pivot_index,
        columns='quantile',
        values=value_prefix,
        aggfunc='first'
    ).reset_index()

    # Rename pivoted columns -> e.g. subs_q0.1, subs_q0.9
    new_cols = []
    for col in pivot_df.columns:
        if isinstance(col, float):
            new_cols.append(
                f"{value_prefix}_q{col:.2f}"
                .rstrip('0').rstrip('.')
            )
        else:
            new_cols.append(str(col))
    pivot_df.columns = new_cols

    # Sort final columns for consistency
    sort_cols = list(spatial_cols) + [dt_name] if spatial_cols else [dt_name]
    pivot_df = pivot_df.sort_values(sort_cols).reset_index(drop=True)

    if verbose >= 4:
        print(
            "[DEBUG] After pivot, shape: "
            f"{pivot_df.shape}"
        )

    if verbose >= 1:
        print(
            f"[INFO] melt_q_data complete. Final shape: "
            f"{pivot_df.shape}"
        )
    
    # Sort if requested
    if sort_values is not None:
        try:
            # Verify that `sort_values` columns exist
            exist_features(pivot_df, features=sort_values)
        except Exception as e:
            if verbose >= 2:
                print(
                    f"[WARN] Unable to sort by '{sort_values}'. "
                    f"{str(e)} Fallback to no sorting."
                )
            sort_values = None
    
        if sort_values is not None:
            try:
                pivot_df = pivot_df.sort_values(by=sort_values)
            except Exception as e:
                if verbose >= 2:
                    print(
                        f"[WARN] Sorting failed: {str(e)}. "
                        "No sort applied."
                    )
    return pivot_df


def handle_error(msg: str, error: str) -> None:
    """Centralized error handling."""
    if error == 'raise':
        raise ValueError(msg)
    elif error == 'warn':
        warnings.warn(msg)
        


[docs]
@SaveFile 
@check_non_emptiness
def pivot_q_data(
    df: pd.DataFrame,
    value_prefix: str,
    dt_col: str = 'dt_col',
    q: Optional[List[Union[float, str]]] = None,
    spatial_cols: Optional[Tuple[str, str]]=None, 
    error: str = 'raise',
    verbose: int = 0
) -> pd.DataFrame:
    r"""
    Convert long-format DataFrame with quantile columns back to wide format
    with temporal quantile measurements.

    Reconstructs columns following the pattern 
    ``{value_prefix}_{dt_value}_q{quantile}`` from separated temporal and
    quantile dimensions. Inverse operation of ``to_long_data_q`` [1]_.

    .. math::

        \mathbf{L}_{p \times k} \rightarrow \mathbf{W}_{m \times n}
        
    Where:
    - :math:`p` = Long format row count
    - :math:`k` = Spatial cols + temporal + quantile columns
    - :math:`m` = :math:`p / t` (t = unique temporal values)
    - :math:`n` = Spatial cols + :math:`t \times q` quantile columns

    Parameters
    ----------
    df : pd.DataFrame
        Long-format DataFrame containing:
        - Spatial columns (e.g., ``'lon'``, ``'lat'``)
        - Temporal column (``dt_col``)
        - Quantile columns (``{value_prefix}_q{quantile}``)
    value_prefix : str
        Base measurement name for column reconstruction
        (e.g., ``'predicted_subsidence'``)
    dt_col : str, default='dt_col'
        Name of temporal dimension column containing dt_values
    q : list of float/str, optional
        Specific quantiles to include in output. If None,
        uses all detected quantiles in columns
    error : {'raise', 'warn', 'ignore'}, default='raise'
        Handling for missing components:
        - ``'raise'``: ValueError on missing data
        - ``'warn'``: Warning with partial DataFrame
        - ``'ignore'``: Return partial DataFrame silently
    verbose : {0, 1, 2, 3, 4, 5}, default=0
        Detail level for processing messages:
        - 0: Silent
        - 1: Basic progress
        - 2: Column detection details
        - 3: Quantile validation
        - 4: Pivoting steps
        - 5: Full shape transitions

    Returns
    -------
    pd.DataFrame
        Wide-format DataFrame with columns:
        - Spatial columns
        - ``{value_prefix}_{dt_value}_q{quantile}`` columns

    Examples
    --------
    >>> from kdiagram.utils.q_utils import pivot_q_data
    >>> long_df = pd.DataFrame({
    ...     'lon': [-118.25, -118.25, -118.3],
    ...     'lat': [34.05, 34.05, 34.1],
    ...     'year': [2022, 2023, 2022],
    ...     'subs_q0.1': [1.2, 1.7, 1.3],
    ...     'subs_q0.5': [1.5, 1.9, 1.6]
    ... })
    >>> wide_df = pivot_q_data(long_df, 'subs', dt_col='year')
    >>> wide_df.columns
    Index(['lon', 'lat', 'subs_2022_q0.1', 'subs_2022_q0.5',
           'subs_2023_q0.1', 'subs_2023_q0.5'], dtype='object')

    Notes
    -----
    1. Column requirements:
       - Must contain exactly one temporal column (``dt_col``)
       - Quantile columns must follow ``{prefix}_q{quantile}`` pattern
       - Spatial columns must be unique per location

    2. Pivoting logic:
       - Maintains original spatial coordinates through operations
       - Handles missing quantiles per temporal value based on ``error``
       - Preserves original data types for measurement values

    See Also
    --------
    pandas.pivot_table : Base pandas function for reshaping
    to_long_data_q : Inverse transformation function
    gofast.analysis.validate_spatial_coordinates : Spatial validation

    References
    ----------
    .. [1] VanderPlas, J. (2016). "Python Data Science Handbook".
           O'Reilly Media, Inc.
    .. [2] McKinney, W. (2013). "Python for Data Analysis".
           O'Reilly Media, Inc.
    """
    def handle_error(
        msg: str, 
        error: str, 
        default: pd.DataFrame
    ) -> pd.DataFrame:
        """Centralized error handling."""
        if error == 'raise':
            raise ValueError(msg)
        elif error == 'warn':
            warnings.warn(msg)
        return default

    # Validate input parameters
    if not isinstance(df, pd.DataFrame):
        raise TypeError("Input must be a pandas DataFrame")
    
    if error not in ['raise', 'warn', 'ignore']:
        raise ValueError("error must be 'raise', 'warn', or 'ignore'")

    # Create working copy and validate structure
    df = df.copy()
    required_cols = {dt_col}
    if not required_cols.issubset(df.columns):
        missing = required_cols - set(df.columns)
        msg = f"Missing required columns: {missing}"
        return handle_error(msg, error, pd.DataFrame())

    # Detect quantile columns
    quant_pattern = re.compile(rf"^{re.escape(value_prefix)}_q([0-9.]+)$")
    quant_columns = [col for col in df.columns if quant_pattern.match(col)]
    
    if not quant_columns:
        msg = f"No quantile columns found with prefix '{value_prefix}'"
        return handle_error(msg, error, pd.DataFrame())

    # Extract and validate quantile values
    quantiles = sorted(
        [float(quant_pattern.match(col).group(1)) for col in quant_columns],
        key=lambda x: float(x)
    )
    
    if verbose >= 1:
        print(f"Found quantiles: {quantiles}")

    # Filter requested quantiles
    if q is not None:
        valid_q = validate_quantiles(q, mode='soft', dtype='float64')
        quant_columns = [
            col for col in quant_columns
            if float(quant_pattern.match(col).group(1)) in valid_q
        ]
        if not quant_columns:
            msg = f"No columns match filtered quantiles {q}"
            return handle_error(msg, error, pd.DataFrame())

    # Identify spatial columns (non-temporal, non-quantile)
    spatial_cols = columns_manager(spatial_cols, empty_as_none=False)
    if spatial_cols: 
        check_spatial_columns(df, spatial_cols )
        
    # spatial_cols = [
    #     col for col in df.columns
    #     if col not in quant_columns + [dt_col]
    # ]
    # Melt quantile columns to long format
    id_vars = spatial_cols + [dt_col]
    melt_df = df.melt(
        id_vars=id_vars,
        value_vars=quant_columns,
        var_name='quantile',
        value_name='value'
    )

    # Extract numeric quantile values
    melt_df['quantile'] = melt_df['quantile'].str.extract(
        r'q([0-9.]+)$'
    ).astype(float)

    # Pivot to wide format
    try:
        wide_df = melt_df.pivot_table(
            index=spatial_cols,
            columns=[dt_col, 'quantile'],
            values='value',
            aggfunc='first'  # Handle potential duplicates
        )
    except ValueError as e:
        msg = f"Pivoting failed: {str(e)}"
        return handle_error(msg, error, pd.DataFrame())

    # Flatten multi-index columns
    wide_df.columns = [
        f"{value_prefix}_{dt}_q{quantile:.2f}".rstrip('0').rstrip('.')
        for (dt, quantile) in wide_df.columns
    ]

    return wide_df.reset_index()