googleapis · chelsea-lin · Nov 24, 2025 · Nov 21, 2025 · Nov 24, 2025
@@ -47,6 +47,7 @@
     json_extract,
     json_extract_array,
     json_extract_string_array,
+    json_keys,
     json_query,
     json_query_array,
     json_set,
@@ -138,6 +139,7 @@
     "json_extract",
     "json_extract_array",
     "json_extract_string_array",
+    "json_keys",
     "json_query",
     "json_query_array",
     "json_set",

@@ -421,6 +421,35 @@ def json_value_array(
     return input._apply_unary_op(ops.JSONValueArray(json_path=json_path))
 
 
+def json_keys(
+    input: series.Series,
+    max_depth: Optional[int] = None,
+) -> series.Series:
+    """Returns all keys in the root of a JSON object as an ARRAY of STRINGs.
+
+    **Examples:**
+
+        >>> import bigframes.pandas as bpd
+        >>> import bigframes.bigquery as bbq
+
+        >>> s = bpd.Series(['{"b": {"c": 2}, "a": 1}'], dtype="json")
+        >>> bbq.json_keys(s)
+        0    ['a' 'b' 'b.c']
+        dtype: list<item: string>[pyarrow]
+
+    Args:
+        input (bigframes.series.Series):
+            The Series containing JSON data.
+        max_depth (int, optional):
+            Specifies the maximum depth of nested fields to search for keys. If not
+            provided, searched keys at all levels.
+
+    Returns:
+        bigframes.series.Series: A new Series containing arrays of keys from the input JSON.
+    """
+    return input._apply_unary_op(ops.JSONKeys(max_depth=max_depth))
+
+
 def to_json(
     input: series.Series,
 ) -> series.Series:

@@ -1234,6 +1234,11 @@ def json_value_array_op_impl(x: ibis_types.Value, op: ops.JSONValueArray):
     return json_value_array(json_obj=x, json_path=op.json_path)
 
 
+@scalar_op_compiler.register_unary_op(ops.JSONKeys, pass_op=True)
+def json_keys_op_impl(x: ibis_types.Value, op: ops.JSONKeys):
+    return json_keys(x, op.max_depth)
+
+
 # Blob Ops
 @scalar_op_compiler.register_unary_op(ops.obj_fetch_metadata_op)
 def obj_fetch_metadata_op_impl(obj_ref: ibis_types.Value):
@@ -2059,6 +2064,14 @@ def to_json_string(value) -> ibis_dtypes.String:  # type: ignore[empty-body]
     """Convert value to JSON-formatted string."""
 
 
+@ibis_udf.scalar.builtin(name="json_keys")
+def json_keys(  # type: ignore[empty-body]
+    json_obj: ibis_dtypes.JSON,
+    max_depth: ibis_dtypes.Int64,
+) -> ibis_dtypes.Array[ibis_dtypes.String]:
+    """Extracts unique JSON keys from a JSON expression."""
+
+
 @ibis_udf.scalar.builtin(name="json_value")
 def json_value(  # type: ignore[empty-body]
     json_obj: ibis_dtypes.JSON, json_path: ibis_dtypes.String

@@ -39,6 +39,11 @@ def _(expr: TypedExpr, op: ops.JSONExtractStringArray) -> sge.Expression:
     return sge.func("JSON_EXTRACT_STRING_ARRAY", expr.expr, sge.convert(op.json_path))
 
 
+@register_unary_op(ops.JSONKeys, pass_op=True)
+def _(expr: TypedExpr, op: ops.JSONKeys) -> sge.Expression:
+    return sge.func("JSON_KEYS", expr.expr, sge.convert(op.max_depth))
+
+
 @register_unary_op(ops.JSONQuery, pass_op=True)
 def _(expr: TypedExpr, op: ops.JSONQuery) -> sge.Expression:
     return sge.func("JSON_QUERY", expr.expr, sge.convert(op.json_path))

@@ -128,6 +128,7 @@
     JSONExtract,
     JSONExtractArray,
     JSONExtractStringArray,
+    JSONKeys,
     JSONQuery,
     JSONQueryArray,
     JSONSet,
@@ -381,6 +382,7 @@
     "JSONExtract",
     "JSONExtractArray",
     "JSONExtractStringArray",
+    "JSONKeys",
     "JSONQuery",
     "JSONQueryArray",
     "JSONSet",

@@ -199,6 +199,23 @@ def output_type(self, *input_types):
         return input_type
 
 
+@dataclasses.dataclass(frozen=True)
+class JSONKeys(base_ops.UnaryOp):
+    name: typing.ClassVar[str] = "json_keys"
+    max_depth: typing.Optional[int] = None
+
+    def output_type(self, *input_types):
+        input_type = input_types[0]
+        if input_type != dtypes.JSON_DTYPE:
+            raise TypeError(
+                "Input type must be a valid JSON object or JSON-formatted string type."
+                + f" Received type: {input_type}"
+            )
+        return pd.ArrowDtype(
+            pa.list_(dtypes.bigframes_dtype_to_arrow_dtype(dtypes.STRING_DTYPE))
+        )
+
+
 @dataclasses.dataclass(frozen=True)
 class JSONDecode(base_ops.UnaryOp):
     name: typing.ClassVar[str] = "json_decode"

@@ -434,3 +434,53 @@ def test_to_json_string_from_struct():
     )
 
     pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())
+
+
+def test_json_keys():
+    json_data = [
+        '{"name": "Alice", "age": 30}',
+        '{"city": "New York", "country": "USA", "active": true}',
+        "{}",
+        '{"items": [1, 2, 3]}',
+    ]
+    s = bpd.Series(json_data, dtype=dtypes.JSON_DTYPE)
+    actual = bbq.json_keys(s)
+
+    expected_data_pandas = [
+        ["age", "name"],
+        [
+            "active",
+            "city",
+            "country",
+        ],
+        [],
+        ["items"],
+    ]
+    expected = bpd.Series(
+        expected_data_pandas, dtype=pd.ArrowDtype(pa.list_(pa.string()))
+    )
+    pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())
+
+
+def test_json_keys_with_max_depth():
+    json_data = [
+        '{"user": {"name": "Bob", "details": {"id": 123, "status": "approved"}}}',
+        '{"user": {"name": "Charlie"}}',
+    ]
+    s = bpd.Series(json_data, dtype=dtypes.JSON_DTYPE)
+    actual = bbq.json_keys(s, max_depth=2)
+
+    expected_data_pandas = [
+        ["user", "user.details", "user.name"],
+        ["user", "user.name"],
+    ]
+    expected = bpd.Series(
+        expected_data_pandas, dtype=pd.ArrowDtype(pa.list_(pa.string()))
+    )
+    pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())
+
+
+def test_json_keys_from_string_error():
+    s = bpd.Series(['{"a": 1, "b": 2}', '{"c": 3}'])
+    with pytest.raises(TypeError):
+        bbq.json_keys(s)
@@ -0,0 +1,15 @@
+WITH `bfcte_0` AS (
+  SELECT
+    `json_col`
+  FROM `bigframes-dev`.`sqlglot_test`.`json_types`
+), `bfcte_1` AS (
+  SELECT
+    *,
+    JSON_KEYS(`json_col`, NULL) AS `bfcol_1`,
+    JSON_KEYS(`json_col`, 2) AS `bfcol_2`
+  FROM `bfcte_0`
+)
+SELECT
+  `bfcol_1` AS `json_keys`,
+  `bfcol_2` AS `json_keys_w_max_depth`
+FROM `bfcte_1`
@@ -52,6 +52,19 @@ def test_json_extract_string_array(json_types_df: bpd.DataFrame, snapshot):
     snapshot.assert_match(sql, "out.sql")
 
 
+def test_json_keys(json_types_df: bpd.DataFrame, snapshot):
+    col_name = "json_col"
+    bf_df = json_types_df[[col_name]]
+
+    ops_map = {
+        "json_keys": ops.JSONKeys().as_expr(col_name),
+        "json_keys_w_max_depth": ops.JSONKeys(max_depth=2).as_expr(col_name),
+    }
+
+    sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys()))
+    snapshot.assert_match(sql, "out.sql")
+
+
 def test_json_query(json_types_df: bpd.DataFrame, snapshot):
     col_name = "json_col"
     bf_df = json_types_df[[col_name]]