Test dense rank
Code description
No description
Code snippet
from pyspark.sql import SparkSession, Window from datetime import datetime from pyspark.sql.functions import rank, dense_rank, desc app_name = "PySpark rank and dense_rank Window Function" master = "local" spark = SparkSession.builder \ .appName(app_name) \ .master(master) \ .getOrCreate() spark.sparkContext.setLogLevel("WARN") data = [ [101, 10.01, datetime.strptime('2021-01-01', '%Y-%m-%d')], [101, 102.01, datetime.strptime('2021-01-01', '%Y-%m-%d')], [102, 93.0, datetime.strptime('2021-01-01', '%Y-%m-%d')], [103, 913.1, datetime.strptime('2021-01-02', '%Y-%m-%d')], [101, 900.56, datetime.strptime('2021-01-03', '%Y-%m-%d')], [102, 900.56, datetime.strptime('2021-01-03', '%Y-%m-%d')], [103, 80.0, datetime.strptime('2021-01-03', '%Y-%m-%d')] ] df = spark.createDataFrame(data, ['ACCT', 'AMT', 'TXN_DT']) df.show() window = Window.partitionBy('TXN_DT').orderBy(desc("AMT")).rowsBetween( Window.unboundedPreceding, Window.currentRow) df = df.withColumn('rank', rank().over(window)).withColumn( 'dense_rank', dense_rank().over(window)) df.show()
copyright
This page is subject to Site terms.
comment Comments
No comments yet.