Source code for wrapyfi.plugins.pandas_data

"""
Encoder and Decoder for pandas Series/Dataframes Data via Wrapyfi.

This script provides mechanisms to encode and decode pandas data using Wrapyfi.
It utilizes base64 encoding to convert binary data into ASCII strings for "pandas<2.0".
It utilizes base64 encoding and pickle serialization for "pandas>=2.0".

The script contains a class, `PandasData`, registered as a plugin to manage the
conversion of pandas data (if available) between its original and encoded forms.

Requirements:
    - Wrapyfi: Middleware communication wrapper (refer to the Wrapyfi documentation for installation instructions)
    - pandas: A data structures library for data analysis, time series, and statistics (refer to https://pandas.pydata.org/pandas-docs/stable/getting_started/install.html for installation instructions)
        Note: If pandas is not available, HAVE_PANDAS will be set to False and
        the plugin will be registered with no types.
        For pandas>=2.0, the object is pickled (slower but identical on decoding).
        For pandas<2.0, the object is encoded using base64 (faster but potentially non-identical on decoding).

    You can install the necessary packages using pip:
        ``pip install pandas``  # Basic installation of pandas
"""

import io
import pickle
import base64

import numpy as np

from wrapyfi.utils.core_utils import *

try:
    import pandas

    HAVE_PANDAS = True
except ImportError:
    HAVE_PANDAS = False


[docs] @PluginRegistrar.register( types=( None if not HAVE_PANDAS or (HAVE_PANDAS and pandas.__version__ < "2.0") else pandas.DataFrame.__mro__[:-1] + pandas.Series.__mro__[:-1] ) ) class PandasData(Plugin):
[docs] def __init__(self, **kwargs): """ Initialize the PandasData plugin. """ pass
[docs] def encode(self, obj, *args, **kwargs): """ Encode pandas data using pickle and base64. :param obj: Union[pandas.DataFrame, pandas.Series]: The pandas data to encode :param args: tuple: Additional arguments (not used) :param kwargs: dict: Additional keyword arguments (not used) :return: Tuple[bool, dict]: A tuple containing: - bool: Always True, indicating that the encoding was successful - dict: A dictionary containing: - '__wrapyfi__': A tuple containing the class name, pickled data string, and any buffer data """ buffers = [] obj_data = pickle.dumps(obj, protocol=5, buffer_callback=buffers.append) obj_buffers = list( map(lambda x: base64.b64encode(memoryview(x)).decode("ascii"), buffers) ) return True, dict( __wrapyfi__=( str(self.__class__.__name__), obj_data.decode("latin1"), *obj_buffers, ) )
[docs] def decode(self, obj_type, obj_full, *args, **kwargs): """ Decode a pickled and base64 encoded string back into pandas data. :param obj_type: type: The expected type of the decoded object (not used) :param obj_full: tuple: A tuple containing the pickled data string and any buffer data :param args: tuple: Additional arguments (not used) :param kwargs: dict: Additional keyword arguments (not used) :return: Tuple[bool, Union[pandas.DataFrame, pandas.Series]]: A tuple containing: - bool: Always True, indicating that the decoding was successful - Union[pandas.DataFrame, pandas.Series]: The decoded pandas data """ obj_data = obj_full[1].encode("latin1") obj_buffers = list( map(lambda x: base64.b64decode(x.encode("ascii")), obj_full[2:]) ) obj_data = bytearray(obj_data) for buf in obj_buffers: obj_data += buf return True, pickle.loads(obj_data, buffers=obj_buffers)
[docs] @PluginRegistrar.register( types=( None if not HAVE_PANDAS or (HAVE_PANDAS and pandas.__version__ >= "2.0") else pandas.DataFrame.__mro__[:-1] + pandas.Series.__mro__[:-1] ) ) class PandasLegacyData(Plugin):
[docs] def __init__(self, **kwargs): """ Initialize the PandasLegacyData plugin. """ pass
[docs] def encode(self, obj, *args, **kwargs): """ Encode pandas data into a base64 ASCII string. :param obj: Union[pandas.DataFrame, pandas.Series]: The pandas data to encode :param args: tuple: Additional arguments (not used) :param kwargs: dict: Additional keyword arguments (not used) :return: Tuple[bool, dict]: A tuple containing: - bool: Always True, indicating that the encoding was successful - dict: A dictionary containing: - '__wrapyfi__': A tuple containing the class name, encoded data string, and object type """ with io.BytesIO() as memfile: if isinstance(obj, pandas.DataFrame): obj_type = "DataFrame" obj.to_json(memfile, orient="split") elif isinstance(obj, pandas.Series): obj_type = "Series" obj.to_frame().to_json(memfile, orient="split") obj_data = base64.b64encode(memfile.getvalue()).decode("ascii") return True, dict( __wrapyfi__=(str(self.__class__.__name__), obj_data, obj_type) )
[docs] def decode(self, obj_type, obj_full, *args, **kwargs): """ Decode a base64 ASCII string back into pandas data. :param obj_type: type: The expected type of the decoded object (not used) :param obj_full: tuple: A tuple containing the encoded data string and object type :param args: tuple: Additional arguments (not used) :param kwargs: dict: Additional keyword arguments (not used) :return: Tuple[bool, Union[pandas.DataFrame, pandas.Series]]: A tuple containing: - bool: Always True, indicating that the decoding was successful - Union[pandas.DataFrame, pandas.Series]: The decoded pandas data """ with io.BytesIO(base64.b64decode(obj_full[1].encode("ascii"))) as memfile: obj = pandas.read_json(memfile, orient="split") obj_type = obj_full[2] if obj_type == "Series": obj = obj.iloc[:, 0] return True, obj