Source code for geosoft.gxpy.dataframe

"""
Table (records, fields) handling, inherits from Pandas (http://pandas.pydata.org/) base class.

:Classes:
    :`Data_frame`: dataframe that holds a table

.. seealso:: :class:`geosoft.gxapi.GXLTB`

.. note::

    Regression tests provide usage examples:
    `dataframe tests <https://github.com/GeosoftInc/gxpy/blob/master/geosoft/gxpy/tests/test_dataframe.py>`_

"""
import pandas as pd

import geosoft
import geosoft.gxapi as gxapi
from . import utility as gxu

__version__ = geosoft.__version__

def _t(s):
    return geosoft.gxpy.system.translate(s)

[docs]class DfException(geosoft.GXRuntimeError):
    """
    Exceptions from :mod:`geosoft.gxpy.dataframe`.

    .. versionadded:: 9.2
    """
    pass

[docs]def table_record(table, rec):
    """
    Return a dictionary of a single record from a table

    :param table:   table name
    :param rec:     record wanted
    :returns:       dictionary containing record values as strings

    .. versionadded:: 9.2
    """

    t = Data_frame(table, records=rec)
    return t.to_dict(orient='records')[0]

[docs]def table_column(table, col):
    """
    Return a dictionary of a column from a table

    :param table:   table name
    :param col:     column wanted
    :returns:       dictionary containing record values as strings

    .. versionadded:: 9.2
    """

    t = Data_frame(table, columns=col).to_dict(orient='index')
    d = {}
    for rec in t.keys():
        d[rec] = t[rec][col]
    return d

[docs]def Data_frame(initial=None, records=None, columns=None):
    """
    Pandas DataFrame from a Geosoft table.

    :parameters:
        :initial:   Geosoft table name, which is normally an ASCII csv file.  If the table cannot be
                    found in the project folder `user/csv` is searched, then the Geosoft `csv` folder.
        :records:   Record name to include, or a list of records to include.  If not specified all
                    records are included in the dataframe.
        :columns:   Column name to be included, or a list of column names to include.  If not specified all
                    columns are included in the dataframe.

    :raises:
        :DfException:    if no columns.records found in the table.  If only some fields are found the dataframe is
                         created with the found fields.
        :raises geosoft.gxapi.GXError:  if a requested record is not found.

    This returns a Pandas DataFrame instance, which can be accessed and used with standard Pandas
    calls.

    Column names from Geosoft table files are always uppercase, regardless
    of case used in the table file.

    Record/index names from Geosoft table files are case-sensitive.

    Example table file "rockcode.csv":

    .. code::

        / standard Geosoft rock codes
        CODE,LABEL,__DESCRIPTION,PATTERN,PAT_SIZE,PAT_DENSITY,PAT_THICKNESS,COLOR
        bau,BAU,BAUXITE,100,,,,RG49B181
        bif,BIF,"BANDED IRON FM",202,,,,R
        cal,CAL,CALCRETE,315,,,,B
        cbt,CBT,CARBONATITE,305,,,,R128G128B192

    .. code::

        include geosoft.gxpy as gxpy
        with gxpy.GXpy() as gx:
            df = gxpy.dataframe.Data_frame('rockcode')
            print(len(df))
            print(df.loc['bif', 'DESCRIPTION']) # "BANDED IRON FM"
            print(df.loc['bif'][1])             # "BANDED IRON FM"
            print(df.iloc[1,0])                 # "BIF"
            print(df.loc['cal', 'PATTERN'])     # "315"

    .. versionadded:: 9.2
    .. versionchanged:: 9.4

    """
    if not type(initial) is str:
        raise DfException(_t('Only Geosoft tables are supported.'))

    df = pd.DataFrame()

    if initial is None:
        return df

    lst = gxapi.GXLST.create(geosoft.gxpy.MAX_LST)
    sr = gxapi.str_ref()

    if records is None:
        try:
            ltb = gxapi.GXLTB.create(initial, 0, 1, '')
        except geosoft.gxapi.GXError as e:
            raise DfException(str(e))
    else:
        if type(records) is str:
            if not records:
                raise DfException(_t('Empty records string.'))
            try:
                ltb = gxapi.GXLTB.create(initial, 0, 1, records)
            except geosoft.gxapi.GXError as e:
                raise DfException(_t('Invalid table \'{}\' ({})').format(initial, str(e)))
            except geosoft.gxapi.GXAPIError as e:
                raise DfException(_t('Record \'{}\' not in \'{}\' ({})').format(records, initial, str(e)))
            records = None
        else:
            ltb = gxapi.GXLTB.create(initial, 0, 1, '')

    col_indexes = []
    for i in range(1, ltb.fields()):
        ltb.get_field(i, sr)
        if columns is None:
            incl = True
        elif type(columns) is str:
            incl = sr.value == columns
        else:
            incl = sr.value in columns
        if incl:
            df[sr.value] = ()
            col_indexes.append(i)

    if len(col_indexes) == 0:
        raise DfException(_t('Table has no columns or \'{}\' column(s) not found.'.format(columns)))

    if records is None:
        ltb.get_lst(0, lst)
        keys = list(gxu.dict_from_lst(lst, True))
        vlst = list(df.columns)
        for j in range(len(keys)):
            nf = 0
            for i in col_indexes:
                ltb.get_string(j, i, sr)
                vlst[nf] = sr.value
                nf += 1
            df.loc[keys[j]] = vlst

    else:  # selective read
        vlst = list(df.columns)
        for rec in records:
            j = ltb.find_key(rec)
            nf = 0
            for i in col_indexes:
                ltb.get_string(j, i, sr)
                vlst[nf] = sr.value
                nf += 1
            df.loc[rec] = vlst

    return df