Source code for pygmt.src.grdhisteq

"""
grdhisteq - Perform histogram equalization for a grid.
"""
import warnings

import numpy as np
import pandas as pd
from pygmt.clib import Session
from pygmt.exceptions import GMTInvalidInput
from pygmt.helpers import (
    GMTTempFile,
    build_arg_string,
    fmt_docstring,
    kwargs_to_strings,
    use_alias,
)
from pygmt.io import load_dataarray

__doctest_skip__ = ["grdhisteq.*"]


[docs]class grdhisteq:  # pylint: disable=invalid-name
    r"""
    Perform histogram equalization for a grid.

    Two common use cases of :meth:`pygmt.grdhisteq` are to find data values
    that divide a grid into patches of equal area
    (:meth:`pygmt.grdhisteq.compute_bins`) or to write a grid with
    statistics based on some kind of cumulative distribution function
    (:meth:`pygmt.grdhisteq.equalize_grid`).

    Histogram equalization provides a way to highlight data that has most
    values clustered in a small portion of the dynamic range, such as a
    grid of flat topography with a mountain in the middle. Ordinary gray
    shading of this grid (using :meth:`pygmt.Figure.grdimage` or
    :meth:`pygmt.Figure.grdview`) with a linear mapping from topography to
    graytone will result in most of the image being very dark gray, with the
    mountain being almost white. :meth:`pygmt.grdhisteq.compute_bins` can
    provide a list of data values that divide the data range into divisions
    which have an equal area in the image [Default is 16 if ``divisions`` is
    not set]. The :class:`pandas.DataFrame` or ASCII file output can be used to
    make a colormap with :meth:`pygmt.makecpt` and an image with
    :meth:`pygmt.Figure.grdimage` that has all levels of gray occuring
    equally.

    :meth:`pygmt.grdhisteq.equalize_grid` provides a way to write a grid with
    statistics based on a cumulative distribution function. In this
    application, the ``outgrid`` has relative highs and lows in the same
    (x,y) locations as the ``grid``, but the values are changed to reflect
    their place in the cumulative distribution.
    """

    @staticmethod
    @fmt_docstring
    @use_alias(
        C="divisions",
        D="outfile",
        G="outgrid",
        R="region",
        N="gaussian",
        Q="quadratic",
        V="verbose",
        h="header",
    )
    @kwargs_to_strings(R="sequence")
    def _grdhisteq(grid, output_type, **kwargs):
        r"""
        Perform histogram equalization for a grid.

        Must provide ``outfile`` or ``outgrid``.

        Full option list at :gmt-docs:`grdhisteq.html`

        {aliases}

        Parameters
        ----------
        grid : str or xarray.DataArray
            The file name of the input grid or the grid loaded as a DataArray.
        outgrid : str or bool or None
            The name of the output netCDF file with extension .nc to store the
            grid in.
        outfile : str or bool or None
            The name of the output ASCII file to store the results of the
            histogram equalization in.
        output_type: str
            Determines the output type. Use "file", "xarray", "pandas", or
            "numpy".
        divisions : int
            Set the number of divisions of the data range [Default is 16].

        {R}
        {V}
        {h}

        Returns
        -------
        ret: pandas.DataFrame or xarray.DataArray or None
            Return type depends on whether the ``outgrid`` parameter is set:

            - xarray.DataArray if ``output_type`` is "xarray""
            - numpy.ndarray if ``output_type`` is "numpy"
            - pandas.DataFrame if ``output_type`` is "pandas"
            - None if ``output_type`` is "file" (output is stored in
              ``outgrid`` or ``outfile``)

        See Also
        -------
        :meth:`pygmt.grd2cpt`
        """

        with Session() as lib:
            file_context = lib.virtualfile_from_data(check_kind="raster", data=grid)
            with file_context as infile:
                arg_str = " ".join([infile, build_arg_string(kwargs)])
                lib.call_module("grdhisteq", arg_str)

        if output_type == "file":
            return None
        if output_type == "xarray":
            return load_dataarray(kwargs["G"])

        result = pd.read_csv(
            filepath_or_buffer=kwargs["D"],
            sep="\t",
            header=None,
            names=["start", "stop", "bin_id"],
            dtype={
                "start": np.float32,
                "stop": np.float32,
                "bin_id": np.uint32,
            },
        )
        if output_type == "numpy":
            return result.to_numpy()

        return result.set_index("bin_id")

[docs]    @staticmethod
    @fmt_docstring
    def equalize_grid(
        grid,
        *,
        outgrid=None,
        divisions=None,
        region=None,
        gaussian=None,
        quadratic=None,
        verbose=None,
    ):
        r"""
        Perform histogram equalization for a grid.

        :meth:`pygmt.grdhisteq.equalize_grid` provides a way to write a grid
        with statistics based on a cumulative distribution function. The
        ``outgrid`` has relative highs and lows in the same (x,y) locations as
        the ``grid``, but the values are changed to reflect their place in the
        cumulative distribution.

        Full option list at :gmt-docs:`grdhisteq.html`

        Parameters
        ----------
        grid : str or xarray.DataArray
            The file name of the input grid or the grid loaded as a DataArray.
        outgrid : str or None
            The name of the output netCDF file with extension .nc to store the
            grid in.
        divisions : int
            Set the number of divisions of the data range.
        gaussian : bool or int or float
            *norm*.
            Produce an output grid with standard normal scores using
            ``gaussian=True`` or force the scores to fall in the ±\ *norm*
            range.
        quadratic: bool
            Perform quadratic equalization [Default is linear].
        {R}
        {V}

        Returns
        -------
        ret: xarray.DataArray or None
            Return type depends on the ``outgrid`` parameter:

            - xarray.DataArray if ``outgrid`` is None
            - None if ``outgrid`` is a str (grid output is stored in
              ``outgrid``)

        Example
        -------
        >>> import pygmt
        >>> # Load a grid of @earth_relief_30m data, with an x-range of 10 to
        >>> # 30, and a y-range of 15 to 25
        >>> grid = pygmt.datasets.load_earth_relief(
        ...     resolution="30m", region=[10, 30, 15, 25]
        ... )
        >>> # Create a new grid with a Gaussian data distribution
        >>> grid = pygmt.grdhisteq.equalize_grid(grid=grid, gaussian=True)

        See Also
        -------
        :meth:`pygmt.grd2cpt`

        Note
        ----
        This method does a weighted histogram equalization for geographic
        grids to account for node area varying with latitude.
        """
        # Return an xarray.DataArray if ``outgrid`` is not set
        with GMTTempFile(suffix=".nc") as tmpfile:
            if isinstance(outgrid, str):
                output_type = "file"
            elif outgrid is None:
                output_type = "xarray"
                outgrid = tmpfile.name
            else:
                raise GMTInvalidInput("Must specify 'outgrid' as a string or None.")
            return grdhisteq._grdhisteq(
                grid=grid,
                output_type=output_type,
                outgrid=outgrid,
                divisions=divisions,
                region=region,
                gaussian=gaussian,
                quadratic=quadratic,
                verbose=verbose,
            )

[docs]    @staticmethod
    @fmt_docstring
    def compute_bins(
        grid,
        *,
        output_type="pandas",
        outfile=None,
        divisions=None,
        quadratic=None,
        verbose=None,
        region=None,
        header=None,
    ):
        r"""
        Perform histogram equalization for a grid.

        Histogram equalization provides a way to highlight data that has most
        values clustered in a small portion of the dynamic range, such as a
        grid of flat topography with a mountain in the middle. Ordinary gray
        shading of this grid (using :meth:`pygmt.Figure.grdimage` or
        :meth:`pygmt.Figure.grdview`) with a linear mapping from topography to
        graytone will result in most of the image being very dark gray, with
        the mountain being almost white. :meth:`pygmt.grdhisteq.compute_bins`
        can provide a list of data values that divide the data range into
        divisions which have an equal area in the image [Default is 16 if
        ``divisions`` is not set]. The :class:`pandas.DataFrame` or ASCII file
        output can be used to make a colormap with :meth:`pygmt.makecpt` and an
        image with :meth:`pygmt.Figure.grdimage` that has all levels of gray
        occuring equally.

        Full option list at :gmt-docs:`grdhisteq.html`

        Parameters
        ----------
        grid : str or xarray.DataArray
            The file name of the input grid or the grid loaded as a DataArray.
        outfile : str or bool or None
            The name of the output ASCII file to store the results of the
            histogram equalization in.
        output_type : str
            Determine the format the xyz data will be returned in [Default is
            ``pandas``]:

                - ``numpy`` - :class:`numpy.ndarray`
                - ``pandas``- :class:`pandas.DataFrame`
                - ``file`` - ASCII file (requires ``outfile``)
        divisions : int
            Set the number of divisions of the data range.
        quadratic : bool
            Perform quadratic equalization [Default is linear].
        {R}
        {V}
        {h}

        Returns
        -------
        ret : pandas.DataFrame or numpy.ndarray or None
            Return type depends on ``outfile`` and ``output_type``:

            - None if ``outfile`` is set (output will be stored in file set by
              ``outfile``)
            - :class:`pandas.DataFrame` or :class:`numpy.ndarray` if
              ``outfile`` is not set (depends on ``output_type``)

        Example
        -------
        >>> import pygmt
        >>> # Load a grid of @earth_relief_30m data, with an x-range of 10 to
        >>> # 30, and a y-range of 15 to 25
        >>> grid = pygmt.datasets.load_earth_relief(
        ...     resolution="30m", region=[10, 30, 15, 25]
        ... )
        >>> # Find elevation intervals that splits the data range into 5
        >>> # divisions, each of which have an equal area in the original grid.
        >>> bins = pygmt.grdhisteq.compute_bins(grid=grid, divisions=5)
        >>> print(bins)
                start    stop
        bin_id
        0       179.0   397.5
        1       397.5   475.5
        2       475.5   573.5
        3       573.5   710.5
        4       710.5  2103.0

        See Also
        -------
        :meth:`pygmt.grd2cpt`

        Note
        ----
        This method does a weighted histogram equalization for geographic
        grids to account for node area varying with latitude.
        """
        # Return a pandas.DataFrame if ``outfile`` is not set
        if output_type not in ["numpy", "pandas", "file"]:
            raise GMTInvalidInput(
                "Must specify 'output_type' either as 'numpy', 'pandas' or 'file'."
            )

        if header is not None and output_type != "file":
            raise GMTInvalidInput("'header' is only allowed with output_type='file'.")

        if isinstance(outfile, str) and output_type != "file":
            msg = (
                f"Changing 'output_type' from '{output_type}' to 'file' "
                "since 'outfile' parameter is set. Please use output_type='file' "
                "to silence this warning."
            )
            warnings.warn(message=msg, category=RuntimeWarning, stacklevel=2)
            output_type = "file"
        with GMTTempFile(suffix=".txt") as tmpfile:
            if output_type != "file":
                outfile = tmpfile.name
            return grdhisteq._grdhisteq(
                grid,
                output_type=output_type,
                outfile=outfile,
                divisions=divisions,
                quadratic=quadratic,
                verbose=verbose,
                region=region,
                header=header,
            )