"""
grd2xyz - Convert grid to data table
"""
from typing import Literal
import numpy as np
import pandas as pd
import xarray as xr
from pygmt.clib import Session
from pygmt.exceptions import GMTInvalidInput
from pygmt.helpers import (
build_arg_list,
fmt_docstring,
kwargs_to_strings,
use_alias,
validate_output_table_type,
)
__doctest_skip__ = ["grd2xyz"]
[docs]
@fmt_docstring
@use_alias(
C="cstyle",
R="region",
V="verbose",
W="weight",
Z="convention",
b="binary",
d="nodata",
f="coltypes",
h="header",
o="outcols",
s="skiprows",
)
@kwargs_to_strings(R="sequence", o="sequence_comma")
def grd2xyz(
grid,
output_type: Literal["pandas", "numpy", "file"] = "pandas",
outfile: str | None = None,
**kwargs,
) -> pd.DataFrame | np.ndarray | None:
r"""
Convert grid to data table.
Read a grid and output xyz-triplets as a :class:`numpy.ndarray`,
:class:`pandas.DataFrame`, or ASCII file.
Full option list at :gmt-docs:`grd2xyz.html`
{aliases}
Parameters
----------
{grid}
{output_type}
{outfile}
cstyle : str
[**f**\|\ **i**].
Replace the x- and y-coordinates on output with the corresponding
column and row numbers. These start at 0 (C-style counting); append
**f** to start at 1 (Fortran-style counting). Alternatively, append
**i** to write just the two columns *index* and *z*, where *index*
is the 1-D indexing that GMT uses when referring to grid nodes.
{region}
Adding ``region`` will select a subsection of the grid. If this
subsection exceeds the boundaries of the grid, only the common region
will be output.
weight : str
[**a**\ [**+u**\ *unit*]\|\ *weight*].
Write out *x,y,z,w*\ , where *w* is the supplied *weight* (or 1 if not
supplied) [Default writes *x,y,z* only]. Choose **a** to compute
weights equal to the area each node represents. For Cartesian grids
this is simply the product of the *x* and *y* increments (except for
gridline-registered grids at all sides [half] and corners [quarter]).
For geographic grids we default to a length unit of **k**. Change
this by appending **+u**\ *unit*. For such grids, the area
varies with latitude and also sees special cases for
gridline-registered layouts at sides, corners, and poles.
{verbose}
convention : str
[*flags*].
Write a 1-column ASCII [or binary] table. Output will be organized
according to the specified ordering convention contained in *flags*.
If data should be written by rows, make *flags* start with
**T** (op) if first row is y = ymax or
**B** (ottom) if first row is y = ymin. Then,
append **L** or **R** to indicate that first element should start at
left or right end of row. Likewise for column formats: start with
**L** or **R** to position first column, and then append **T** or
**B** to position first element in a row. For gridline registered
grids: If grid is periodic in x but the written data should not
contain the (redundant) column at x = xmax, append **x**. For grid
periodic in y, skip writing the redundant row at y = ymax by
appending **y**. If the byte-order needs to be swapped, append
**w**. Select one of several data types (all binary except **a**):
- **a**: ASCII representation of a single item per record
- **c**: int8_t, signed 1-byte character
- **u**: uint8_t, unsigned 1-byte character
- **h**: int16_t, short 2-byte integer
- **H**: uint16_t, unsigned short 2-byte integer
- **i**: int32_t, 4-byte integer
- **I**: uint32_t, unsigned 4-byte integer
- **l**: int64_t, long (8-byte) integer
- **L**: uint64_t, unsigned long (8-byte) integer
- **f**: 4-byte floating point single precision
- **d**: 8-byte floating point double precision
Default format is scanline orientation of ASCII numbers: **TLa**.
{binary}
{nodata}
{coltypes}
{header}
{outcols}
{skiprows}
Returns
-------
ret
Return type depends on ``outfile`` and ``output_type``:
- ``None`` if ``outfile`` is set (output will be stored in the file set by
``outfile``)
- :class:`pandas.DataFrame` or :class:`numpy.ndarray` if ``outfile`` is not set
(depends on ``output_type``)
Example
-------
>>> import pygmt
>>> # Load a grid of @earth_relief_30m data, with a longitude range of
>>> # 10° E to 30° E, and a latitude range of 15° N to 25° N
>>> grid = pygmt.datasets.load_earth_relief(
... resolution="30m", region=[10, 30, 15, 25]
... )
>>> # Create a pandas.DataFrame with the xyz data from an input grid
>>> xyz_dataframe = pygmt.grd2xyz(grid=grid, output_type="pandas")
>>> xyz_dataframe.head(n=2)
lon lat z
0 10.0 25.0 965.5
1 10.5 25.0 876.5
"""
output_type = validate_output_table_type(output_type, outfile=outfile)
if kwargs.get("o") is not None and output_type == "pandas":
msg = (
"If 'outcols' is specified, 'output_type' must be either 'numpy' "
"or 'file'."
)
raise GMTInvalidInput(msg)
# Set the default column names for the pandas DataFrame header.
column_names: list[str] = ["x", "y", "z"]
# Let output pandas column names match input DataArray dimension names
if output_type == "pandas" and isinstance(grid, xr.DataArray):
# Reverse the dims because it is rows, columns ordered.
column_names = [str(grid.dims[1]), str(grid.dims[0]), str(grid.name)]
with Session() as lib:
with (
lib.virtualfile_in(check_kind="raster", data=grid) as vingrd,
lib.virtualfile_out(kind="dataset", fname=outfile) as vouttbl,
):
lib.call_module(
module="grd2xyz",
args=build_arg_list(kwargs, infile=vingrd, outfile=vouttbl),
)
return lib.virtualfile_to_dataset(
vfname=vouttbl, output_type=output_type, column_names=column_names
)