Test dense rank

D Daniel Verlan event 2023-12-06 visibility 51
more_vert

Code description

No description

Code snippet

from pyspark.sql import SparkSession, Window
from datetime import datetime
from pyspark.sql.functions import rank, dense_rank, desc

app_name = "PySpark rank and dense_rank Window Function"
master = "local"

spark = SparkSession.builder \
    .appName(app_name) \
    .master(master) \
    .getOrCreate()

spark.sparkContext.setLogLevel("WARN")

data = [
    [101, 10.01, datetime.strptime('2021-01-01', '%Y-%m-%d')],
    [101, 102.01, datetime.strptime('2021-01-01', '%Y-%m-%d')],
    [102, 93.0, datetime.strptime('2021-01-01', '%Y-%m-%d')],
    [103, 913.1, datetime.strptime('2021-01-02', '%Y-%m-%d')],
    [101, 900.56, datetime.strptime('2021-01-03', '%Y-%m-%d')],
    [102, 900.56, datetime.strptime('2021-01-03', '%Y-%m-%d')],
    [103, 80.0, datetime.strptime('2021-01-03', '%Y-%m-%d')]
]

df = spark.createDataFrame(data, ['ACCT', 'AMT', 'TXN_DT'])
df.show()

window = Window.partitionBy('TXN_DT').orderBy(desc("AMT")).rowsBetween(
    Window.unboundedPreceding, Window.currentRow)
df = df.withColumn('rank', rank().over(window)).withColumn(
    'dense_rank', dense_rank().over(window))

df.show()
More from Kontext
comment Comments
No comments yet.

Please log in or register to comment.

account_circle Log in person_add Register

Log in with external accounts