Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added linting for DBFS usage #1341

Merged
merged 9 commits into from
Apr 10, 2024
56 changes: 56 additions & 0 deletions src/databricks/labs/ucx/source_code/fsfinder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from abc import ABC, abstractmethod
jimidle marked this conversation as resolved.
Show resolved Hide resolved
from collections.abc import Iterable
from databricks.labs.ucx.source_code.base import Advice, Linter

import ast

class DetectFSVisitor(ast.NodeVisitor):
jimidle marked this conversation as resolved.
Show resolved Hide resolved
"""
Visitor that detects file system paths in Python code and checks them
against a list of known deprecated paths.
"""

def __init__(self):
self.advices: List[Advice] = []
self.fs_prefixes = ["/dbfs/mnt", "dbfs:/", "/mnt/"]
jimidle marked this conversation as resolved.
Show resolved Hide resolved

def visit_Call(self, node):
print(f"Function call: {ast.unparse(node)}")
jimidle marked this conversation as resolved.
Show resolved Hide resolved
self.generic_visit(node)

def visit_Str(self, node):
print(f"String literal: {ast.unparse(node)}")
if any(node.s.startswith(prefix) for prefix in self.fs_prefixes):
self.advices.append(Advice(
code='fs-finder',
message=f"Deprecated file system path: {node.s}",
start_line=node.lineno,
start_col=node.col_offset,
end_line=node.lineno,
end_col=node.col_offset + len(node.s)
))
self.generic_visit(node)

def get_advices(self) -> Iterable[Advice]:
for advice in self.advices:
yield advice

class FSFinderLinter(Linter):
jimidle marked this conversation as resolved.
Show resolved Hide resolved
def __init__(self):
pass

def name(self) -> str:
"""
Returns the name of the linter, for reporting etc
"""
return 'fs-finder-linter'
jimidle marked this conversation as resolved.
Show resolved Hide resolved

def lint(self, code: str) -> Iterable[Advice]:
"""
Lints the code looking for file system paths that are deprecated
"""
tree = ast.parse(code)
visitor = DetectFSVisitor()
visitor.visit(tree)
return visitor.get_advices()

3 changes: 2 additions & 1 deletion src/databricks/labs/ucx/source_code/languages.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,15 @@
from databricks.labs.ucx.source_code.base import Fixer, Linter, SequentialLinter
from databricks.labs.ucx.source_code.pyspark import SparkSql
from databricks.labs.ucx.source_code.queries import FromTable
from source_code.fsfinder import FSFinderLinter


class Languages:
def __init__(self, index: MigrationIndex):
self._index = index
from_table = FromTable(index)
self._linters = {
Language.PYTHON: SequentialLinter([SparkSql(from_table, index)]),
Language.PYTHON: SequentialLinter([SparkSql(from_table, index), FSFinderLinter()]),
Language.SQL: SequentialLinter([from_table]),
}
self._fixers: dict[Language, list[Fixer]] = {
Expand Down
6 changes: 6 additions & 0 deletions tests/unit/source_code/test_fsfinder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import pytest

from databricks.labs.ucx.source_code.base import Advisory, Deprecation
from databricks.labs.ucx.source_code.fsfinder import
from databricks.labs.ucx.source_code.queries import FromTable