diff --git a/python/pyspark/sql.py b/python/pyspark/sql.py index fdd8034c98f7..8fdb3591cf22 100644 --- a/python/pyspark/sql.py +++ b/python/pyspark/sql.py @@ -2136,9 +2136,9 @@ def __getitem__(self, item): def __getattr__(self, name): """ Return the column by given name """ - if isinstance(name, basestring): - return Column(self._jdf.apply(name)) - raise AttributeError + if name.startswith("__"): + raise AttributeError(name) + return Column(self._jdf.apply(name)) def alias(self, name): """ Alias the current DataFrame """ diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py index 081a77fbb0be..bec1961f2639 100644 --- a/python/pyspark/tests.py +++ b/python/pyspark/tests.py @@ -23,6 +23,7 @@ from fileinput import input from glob import glob import os +import pydoc import re import shutil import subprocess @@ -1032,6 +1033,15 @@ def test_aggregator(self): from pyspark.sql import Aggregator as Agg # self.assertEqual((0, '100'), tuple(g.agg(Agg.first(df.key), Agg.last(df.value)).first())) + def test_help_command(self): + # Regression test for SPARK-5464 + rdd = self.sc.parallelize(['{"foo":"bar"}', '{"foo":"baz"}']) + df = self.sqlCtx.jsonRDD(rdd) + # render_doc() reproduces the help() exception without printing output + pydoc.render_doc(df) + pydoc.render_doc(df.foo) + pydoc.render_doc(df.take(1)) + class InputFormatTests(ReusedPySparkTestCase):