From a7c152224f10adc0394ea171a64eb2d817dc6de0 Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Wed, 17 Jun 2015 23:36:05 -0700 Subject: [PATCH 1/3] [SPARK-8218][SQL] Binary log math function update. Some minor updates based on after merging #6725. --- python/pyspark/sql/functions.py | 11 ++++++++--- .../apache/spark/sql/catalyst/expressions/math.scala | 4 ++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 177fc196e083..fcf4a38c55d2 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -404,16 +404,21 @@ def when(condition, value): return Column(jc) -@since(1.4) -def log(col, base=math.e): +@since(1.5) +def log(arg1, arg2=None): """Returns the first argument-based logarithm of the second argument. - >>> df.select(log(df.age, 10.0).alias('ten')).map(lambda l: str(l.ten)[:7]).collect() + If there is only one argument, then this takes the natural logarithm of the argument. + + >>> df.select(log(10.0, df.age).alias('ten')).map(lambda l: str(l.ten)[:7]).collect() ['0.30102', '0.69897'] >>> df.select(log(df.age).alias('e')).map(lambda l: str(l.e)[:7]).collect() ['0.69314', '1.60943'] """ + if arg2 is None: + arg2 = arg1 + arg1 = math.e sc = SparkContext._active_spark_context jc = sc._jvm.functions.log(base, _to_java_column(col)) return Column(jc) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala index 67cb0b508ca9..280258d69163 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala @@ -258,6 +258,10 @@ case class Pow(left: Expression, right: Expression) case class Logarithm(left: Expression, right: Expression) extends BinaryMathExpression((c1, c2) => math.log(c2) / math.log(c1), "LOG") { + + /** + * Natural log, i.e. using e as the base. + */ def this(child: Expression) = { this(EulerNumber(), child) } From 76fc8de92260546153ad71efe644c92f083fe63e Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Thu, 18 Jun 2015 13:02:04 -0700 Subject: [PATCH 2/3] Fixed arg. --- python/pyspark/sql/functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index fcf4a38c55d2..7ff1abd68b41 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -420,7 +420,7 @@ def log(arg1, arg2=None): arg2 = arg1 arg1 = math.e sc = SparkContext._active_spark_context - jc = sc._jvm.functions.log(base, _to_java_column(col)) + jc = sc._jvm.functions.log(arg1, _to_java_column(arg2)) return Column(jc) From ab515428ca464f0c66b0335d6c1defb064b5aa52 Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Thu, 18 Jun 2015 13:02:58 -0700 Subject: [PATCH 3/3] Use JVM log --- python/pyspark/sql/functions.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 7ff1abd68b41..acdb01d3d3f5 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -416,11 +416,11 @@ def log(arg1, arg2=None): >>> df.select(log(df.age).alias('e')).map(lambda l: str(l.e)[:7]).collect() ['0.69314', '1.60943'] """ - if arg2 is None: - arg2 = arg1 - arg1 = math.e sc = SparkContext._active_spark_context - jc = sc._jvm.functions.log(arg1, _to_java_column(arg2)) + if arg2 is None: + jc = sc._jvm.functions.log(_to_java_column(arg1)) + else: + jc = sc._jvm.functions.log(arg1, _to_java_column(arg2)) return Column(jc)