Aggregate functions
|
approx_count_distinct
|
|
approx_count_distinct(e: Column): Column approx_count_distinct(columnName: String): Column approx_count_distinct(e: Column, rsd: Double): Column approx_count_distinct(columnName: String, rsd: Double): Column |
|
avg
|
|
avg(e: Column): Column avg(columnName: String): Column |
|
collect_list
|
|
collect_list(e: Column): Column collect_list(columnName: String): Column |
|
collect_set
|
|
collect_set(e: Column): Column collect_set(columnName: String): Column |
|
corr
|
|
corr(column1: Column, column2: Column): Column corr(columnName1: String, columnName2: String): Column |
|
count
|
|
count(e: Column): Column count(columnName: String): TypedColumn[Any, Long] |
|
countDistinct
|
|
countDistinct(expr: Column, exprs: Column*): Column countDistinct(columnName: String, columnNames: String*): Column |
|
covar_pop
|
|
covar_pop(column1: Column, column2: Column): Column covar_pop(columnName1: String, columnName2: String): Column |
|
covar_samp
|
|
covar_samp(column1: Column, column2: Column): Column covar_samp(columnName1: String, columnName2: String): Column |
|
first
|
|
first(e: Column): Column first(e: Column, ignoreNulls: Boolean): Column first(columnName: String): Column first(columnName: String, ignoreNulls: Boolean): Column |
Returns the first value in a group. Returns the first non-null value when ignoreNulls flag on. If all values are null, then returns null.
|
grouping
|
|
grouping(e: Column): Column grouping(columnName: String): Column |
Indicates whether a given column is aggregated or not
|
grouping_id
|
|
grouping_id(cols: Column*): Column grouping_id(colName: String, colNames: String*): Column |
Computes the level of grouping
|
kurtosis
|
|
kurtosis(e: Column): Column kurtosis(columnName: String): Column |
|
last
|
|
last(e: Column, ignoreNulls: Boolean): Column last(columnName: String, ignoreNulls: Boolean): Column last(e: Column): Column last(columnName: String): Column |
|
max
|
|
max(e: Column): Column max(columnName: String): Column |
|
mean
|
|
mean(e: Column): Column mean(columnName: String): Column |
|
min
|
|
min(e: Column): Column min(columnName: String): Column |
|
skewness
|
|
skewness(e: Column): Column skewness(columnName: String): Column |
|
stddev
|
|
stddev(e: Column): Column stddev(columnName: String): Column |
|
stddev_pop
|
|
stddev_pop(e: Column): Column stddev_pop(columnName: String): Column |
|
stddev_samp
|
|
stddev_samp(e: Column): Column stddev_samp(columnName: String): Column |
|
sum
|
|
sum(e: Column): Column sum(columnName: String): Column |
|
sumDistinct
|
|
sumDistinct(e: Column): Column sumDistinct(columnName: String): Column |
|
variance
|
|
variance(e: Column): Column variance(columnName: String): Column |
|
var_pop
|
|
var_pop(e: Column): Column var_pop(columnName: String): Column |
|
var_samp
|
|
var_samp(e: Column): Column var_samp(columnName: String): Column |
|
Collection functions
|
array_contains
|
|
array_contains(column: Column, value: Any): Column |
|
array_distinct
|
|
array_distinct(e: Column): Column |
|
array_except
|
|
array_except(e: Column): Column |
|
array_intersect
|
|
array_intersect(col1: Column, col2: Column): Column |
|
array_join
|
|
array_join(column: Column, delimiter: String): Column array_join(column: Column, delimiter: String, nullReplacement: String): Column |
|
array_max
|
|
array_max(e: Column): Column |
|
array_min
|
|
array_min(e: Column): Column |
|
array_position
|
|
array_position(column: Column, value: Any): Column |
|
array_remove
|
|
array_remove(column: Column, element: Any): Column |
|
array_repeat
|
|
array_repeat(e: Column, count: Int): Column array_repeat(left: Column, right: Column): Column |
|
array_sort
|
|
array_sort(e: Column): Column |
|
array_union
|
|
array_union(col1: Column, col2: Column): Column |
|
arrays_zip
|
|
arrays_zip(e: Column*): Column |
|
arrays_overlap
|
|
arrays_overlap(a1: Column, a2: Column): Column |
|
element_at
|
|
element_at(column: Column, value: Any): Column |
|
explode
|
|
explode(e: Column): Column |
|
explode_outer
|
|
explode_outer(e: Column): Column |
Creates a new row for each element in the given array or map column. If the array/map is null or empty then null is produced.
|
flatten
|
|
flatten(e: Column): Column |
|
from_json
|
|
from_json(e: Column, schema: Column): Column (1) from_json(e: Column, schema: DataType): Column from_json(e: Column, schema: DataType, options: Map[String, String]): Column from_json(e: Column, schema: String, options: Map[String, String]): Column from_json(e: Column, schema: StructType): Column from_json(e: Column, schema: StructType, options: Map[String, String]): Column |
Parses a column with a JSON string into a StructType or ArrayType of StructType elements with the specified schema.
|
map_concat
|
|
map_concat(cols: Column*): Column |
|
map_from_entries
|
|
map_from_entries(e: Column): Column |
|
map_keys
|
|
map_keys(e: Column): Column |
|
map_values
|
|
map_values(e: Column): Column |
|
posexplode
|
|
posexplode(e: Column): Column |
|
posexplode_outer
|
|
posexplode_outer(e: Column): Column |
|
reverse
|
|
reverse(e: Column): Column |
Returns a reversed string or an array with reverse order of elements
Note
|
Support for reversing arrays is new in 2.4.0.
|
|
schema_of_json
|
|
schema_of_json(json: Column): Column schema_of_json(json: String): Column |
|
sequence
|
|
sequence(start: Column, stop: Column): Column sequence(start: Column, stop: Column, step: Column): Column |
|
shuffle
|
|
shuffle(e: Column): Column |
|
size
|
Returns the size of the given array or map. Returns -1 if null .
|
slice
|
|
slice(x: Column, start: Int, length: Int): Column |
|
Date and time functions
|
current_date
|
|
current_timestamp
|
|
current_timestamp(): Column |
|
from_utc_timestamp
|
|
from_utc_timestamp(ts: Column, tz: String): Column from_utc_timestamp(ts: Column, tz: Column): Column (1) |
|
months_between
|
|
months_between(end: Column, start: Column): Column months_between(end: Column, start: Column, roundOff: Boolean): Column (1) |
|
to_date
|
|
to_date(e: Column): Column to_date(e: Column, fmt: String): Column |
|
to_timestamp
|
|
to_timestamp(s: Column): Column to_timestamp(s: Column, fmt: String): Column |
|
to_utc_timestamp
|
|
to_utc_timestamp(ts: Column, tz: String): Column to_utc_timestamp(ts: Column, tz: Column): Column (1) |
|
unix_timestamp
|
Converts current or specified time to Unix timestamp (in seconds)
|
unix_timestamp(): Column unix_timestamp(s: Column): Column unix_timestamp(s: Column, p: String): Column |
|
window
|
Generates tumbling time windows
|
window( timeColumn: Column, windowDuration: String): Column window( timeColumn: Column, windowDuration: String, slideDuration: String): Column window( timeColumn: Column, windowDuration: String, slideDuration: String, startTime: String): Column |
|
Math functions
|
bin
|
Converts the value of a long column to binary format
|
Regular functions (Non-aggregate functions)
|
array
|
|
broadcast
|
|
coalesce
|
Gives the first non-null value among the given columns or null
|
col and column
|
Creating Columns
|
expr
|
|
lit
|
|
map
|
|
monotonically_increasing_id
|
Returns monotonically increasing 64-bit integers that are guaranteed to be monotonically increasing and unique, but not consecutive.
|
struct
|
|
typedLit
|
|
when
|
|
String functions
|
split
|
|
upper
|
|
UDF functions
|
udf
|
Creating UDFs
|
callUDF
|
Executing an UDF by name with variable-length list of columns
|
Window functions
|
cume_dist
|
Computes the cumulative distribution of records across window partitions
|
currentRow
|
|
dense_rank
|
Computes the rank of records per window partition
|
lag
|
|
lag(e: Column, offset: Int): Column lag(columnName: String, offset: Int): Column lag(columnName: String, offset: Int, defaultValue: Any): Column |
|
lead
|
|
lead(columnName: String, offset: Int): Column lead(e: Column, offset: Int): Column lead(columnName: String, offset: Int, defaultValue: Any): Column lead(e: Column, offset: Int, defaultValue: Any): Column |
|
ntile
|
|
percent_rank
|
Computes the rank of records per window partition
|
rank
|
Computes the rank of records per window partition
|
row_number
|
Computes the sequential numbering per window partition
|
unboundedFollowing
|
|
unboundedFollowing(): Column |
|
unboundedPreceding
|
|
unboundedPreceding(): Column |
|