-
Notifications
You must be signed in to change notification settings - Fork 2
/
tree_plus_cli.py
293 lines (264 loc) · 7.4 KB
/
tree_plus_cli.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
# tree_plus_cli.py
from typing import Optional, Union, Tuple
from time import perf_counter
import click
from tree_plus_src import ( # noqa E402
enable_debug,
debug_print,
__version__,
engine as tree_plus,
TreePlus,
DEFAULT_IGNORE,
DEFAULT_REGEX_TIMEOUT,
web,
)
from tree_plus_src.count_tokens_lines import TokenizerName
CONTEXT_SETTINGS = dict(help_option_names=["--help", "-h", "-H"])
DEFAULT_QUERY = "best tree data structures"
@click.command(
context_settings=CONTEXT_SETTINGS,
epilog=f"""
\b
v({__version__}) --- https://github.com/bionicles/tree_plus/blob/main/README.md
""",
)
@click.option(
"--ignore",
"-i",
"-I",
multiple=True,
help='Patterns to ignore, in quotes: -i "*.java"',
)
@click.option(
"--override",
"-o",
"-O",
is_flag=True,
default=False,
help='Override DEFAULT_IGNORE (includes ignored content): -o -i "*.java"',
)
@click.option(
"--glob",
"-g",
"-G",
multiple=True,
help='Patterns to find, in quotes: -g "*.rs"',
)
@click.option(
"--version",
"-v",
"-V",
is_flag=True,
default=False,
help="Print the version and exit.",
)
@click.option(
"--debug",
"-d",
"-D",
is_flag=True,
default=False,
help="Enables $DEBUG_TREE_PLUS.",
)
@click.option(
"--syntax",
"-s",
"-S",
is_flag=True,
default=False,
help="Enables Syntax Highlighting (WIP).",
)
@click.option(
"--concise",
"-c",
"-C",
is_flag=True,
default=False,
help="Omit module components. (False)",
)
@click.option(
"--yc",
"--hn",
is_flag=True,
help="Include ycombinator (False)",
default=False,
)
@click.option(
"--number",
"-n",
"-N",
help="number of results (--yc mode only, default 3)",
default=3,
)
@click.option(
"--max-depth",
"-m",
"-M",
help="max number of steps (depth / level) from root (--yc mode only, default 3)",
default=3,
)
@click.option(
"--links",
"-l",
"-L",
help="include links (web mode only, default False)",
is_flag=True,
)
@click.option(
"--tiktoken",
"-t",
help="a shorthand for tiktoken with the gpt4o tokenizer",
is_flag=True,
default=False,
)
@click.option(
"--tokenizer-name",
"-T",
help="name of the tokenizer to use, for now only 'gpt4o' works",
default=None,
type=str,
)
@click.option(
"--timeout",
help=f"regex timeout in seconds (optional, default {DEFAULT_REGEX_TIMEOUT})",
default=None,
type=float,
)
@click.argument("paths", nargs=-1, type=click.UNPROCESSED) # Accepts multiple arguments
def main(
# these are NON-MUTUALLY-EXCLUSIVE OPTIONS
glob: Optional[Tuple[str, ...]],
paths: Optional[Union[str, Tuple[str, ...]]],
ignore: Tuple[str, ...],
override: bool,
debug: bool,
version: bool,
syntax: bool,
concise: bool,
# web_action: Optional[Tuple[str, ...]],
# query: Optional[Tuple[str, ...]],
yc: bool,
number: int,
max_depth: int,
links: bool,
tiktoken: bool,
tokenizer_name: Optional[str],
timeout: Optional[float],
):
"""A `tree` util enhanced with tokens, lines, and components.
Wrap patterns in quotes: -i "*.py" / -g "*.rs"
Example Invocations:
\b
Show tree_plus_src and tests simultaneously
> tree_plus tree_plus_src tests
\b
Show files matching "*.*s" within tests/more_languages
> tree_plus -g "*.*s" tests/more_languages
\b
Ignore Java files
> tree_plus -i "*.java" tests
\b
Override DEFAULT_IGNORE: Only ignore .ini files.
> tree_plus -o -i "*.ini" tests/dot_dot
\b
Syntax Highlight python files in src and tests
> tree_plus -s tree_plus_src/*.py tests/*.py
\b
Concise Mode (No Parsing)
> tree_plus -c
\b
URL + Tag Categories for a website
> tree_plus example.com
\b
URL + Tag Categories for multiple websites with a link tree
> tree_plus example.com example.org -l
\b
Hacker News Mode (3 articles, max depth 3)
> tree_plus --yc
\b
Hacker News Mode (6 articles, max depth 6, warning, slow!)
> tree_plus --yc -n 6 -m 6
\b
Use the Tiktoken gpt4o Model Tokenizer to tokenize Rust files
> tree_plus -t -g '*.rs'
"""
start_time = perf_counter()
if debug:
enable_debug()
if version:
print(__version__)
return
debug_print(f"tree_plus main received {paths=} {ignore=} {glob=}")
_paths: Tuple[Union[str, TreePlus], ...] = ()
if isinstance(paths, str):
_paths = (paths,)
elif isinstance(paths, tuple):
_paths = paths
assert ignore is None or isinstance(
ignore, tuple
), f"{ignore=} must be None or Tuple[str]"
assert glob is None or isinstance(
glob, tuple
), f"{glob=} must be None or Tuple[str]"
og_ignore = ignore
if not ignore and not override:
ignore = DEFAULT_IGNORE
if yc:
hacker_news_articles = web.articles_from_hacker_news(
max_depth=max_depth, n_articles=number
)
article_comment_tree = tree_plus.from_hacker_news_articles(hacker_news_articles)
_paths += (article_comment_tree,)
# TOO SOON! need to support py38, thanks CI/CD testing matrix!
# _tokenizer_name = TokenizerName.WC
# match (tiktoken, tokenizer_name):
# case (False, None) | (True, "wc"):
# pass
# case (True, None) | (_, "gpt4o"):
# _tokenizer_name = TokenizerName.GPT4O
# case (_, "gpt4"):
# _tokenizer_name = TokenizerName.GPT4O
_tokenizer_name = TokenizerName.WC
if (not tiktoken and tokenizer_name is None) or (
tiktoken and tokenizer_name == "wc"
):
pass
elif (tiktoken and tokenizer_name is None) or (tokenizer_name == "gpt4o"):
_tokenizer_name = TokenizerName.GPT4O
elif tokenizer_name == "gpt4":
_tokenizer_name = TokenizerName.GPT4
else:
raise ValueError(f"unsupported {tiktoken=} {tokenizer_name=}")
# guard against negative timeouts
if timeout is not None and timeout <= 0:
timeout = None
root = tree_plus.from_seeds(
_paths,
maybe_ignore=ignore,
maybe_globs=glob,
syntax_highlighting=syntax,
override_ignore=override,
concise=concise,
tokenizer_name=_tokenizer_name,
regex_timeout=timeout,
)
root.render(markup=True, highlight=True)
if links:
root.render_hrefs()
yc_part = f" yc={yc} n={number} m={max_depth}" if yc else ""
line1 = f"\n[link=https://github.com/bionicles/tree_plus/blob/main/README.md]tree_plus[/link] v({__version__}){yc_part} ignore={og_ignore} globs={glob}"
line1 += f" {concise=} {paths=}" if concise else f" {syntax=} {paths=}"
line2 = f"\n{root.stats()} in {perf_counter() - start_time:.02f} second(s)."
tree_plus.safe_print(
line1 + line2,
# style="bold white on black",
highlight=True,
markup=True,
)
if __name__ == "__main__":
main()
# Reminder to those rewriting this in Rust:
# How many commands are there in the `tree_plus` CLI?
# There are ZERO. You just call `tree_plus` [options] [paths].
# If I were to rewrite this with clap, how many commands would there be? ZERO.
# How many subcommands would there be? Zero!