Skip to content

Commit 38dc316

Browse files
authored
Added format and lint for Python codes (#30)
* Add boilerplate code for python format and lint * format all files
1 parent 946b3d0 commit 38dc316

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+2434
-2014
lines changed

Diff for: .gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
.vscode

Diff for: Python/.flake8

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
[flake8]
2+
max-line-length = 80
3+
max-complexity = 40
4+
ignore =
5+
E203
6+
W503
7+
F841
8+
E501
9+
exclude =
10+
.eggs
11+
.git
12+
.tox
13+
__pycache__
14+
build
15+
dist
16+
venv

Diff for: Python/.isort.cfg

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Copyright 2023 Google LLC
2+
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
[settings]
16+
sections=FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER
17+
import_heading_stdlib=standard libraries
18+
import_heading_thirdparty=third party libraries
19+
include_trailing_comma=True
20+
indent=' '
21+
known_dfml=src
22+
dedup_headings=True
23+
line_length=80
24+
multi_line_output=3
25+
skip=./venv/,./venv-docs/,./.git/

Diff for: Python/.pre-commit-config.yaml

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# Copyright 2023 Google LLC
2+
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
exclude: ^docs/notebooks/
16+
repos:
17+
- repo: https://github.com/ambv/black
18+
rev: 23.11.0
19+
hooks:
20+
- id: black
21+
args: ["--config=Python/pyproject.toml", "--check", "--diff"]
22+
- repo: https://github.com/pycqa/flake8
23+
rev: 6.1.0
24+
hooks:
25+
- id: flake8
26+
args: ["--config=Python/.flake8"]
27+
- repo: https://github.com/timothycrosley/isort
28+
rev: 5.12.0
29+
hooks:
30+
- id: isort

Diff for: Python/CONTRIBUTING.md

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# How to Contribute Python Examples
2+
3+
It is encouraged to install `make` to create your local development environment.
4+
5+
1. Create the local Python environment:
6+
```bash
7+
make init
8+
```
9+
2. Use `source venv/bin/activate` to activate venv
10+
3. Clean up the local enviroment:
11+
```bash
12+
make clean
13+
```
14+
4. Format the Python code:
15+
```bash
16+
make format
17+
```
18+
5. Run the Python code linter:
19+
```bash
20+
make lint
21+
```

Diff for: Python/Makefile

+74
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
# Copyright 2023 Google LLC
2+
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
SILENT:
16+
.PHONY:
17+
.DEFAULT_GOAL := help
18+
19+
define PRINT_HELP_PYSCRIPT
20+
import re, sys # isort:skip
21+
22+
matches = []
23+
for line in sys.stdin:
24+
match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line)
25+
if match:
26+
matches.append(match.groups())
27+
28+
for target, help in sorted(matches):
29+
print(" %-25s %s" % (target, help))
30+
endef
31+
export PRINT_HELP_PYSCRIPT
32+
33+
PYTHON = python$(PYTHON_VERSION)
34+
35+
ifndef TF_MODEL_URI
36+
MODEL_ENV := "TORCH"
37+
else
38+
MODEL_ENV := "TF"
39+
endif
40+
41+
help: ## Print this help
42+
@echo
43+
@echo " make targets:"
44+
@echo
45+
@$(PYTHON) -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST)
46+
47+
init-venv: ## Create virtual environment in venv folder
48+
@$(PYTHON) -m venv venv
49+
50+
init: init-venv ## Init virtual environment
51+
@./venv/bin/python3 -m pip install -U pip
52+
@./venv/bin/python3 -m pip install -r requirements.txt
53+
@./venv/bin/python3 -m pip install -r requirements.dev.txt
54+
@./venv/bin/python3 -m pre_commit install --install-hooks --overwrite
55+
@echo "use 'source venv/bin/activate' to activate venv "
56+
57+
format: ## Run formatter on source code
58+
@./venv/bin/python3 -m black --config=pyproject.toml .
59+
60+
lint: ## Run linter on source code
61+
@./venv/bin/python3 -m black --config=pyproject.toml --check .
62+
@./venv/bin/python3 -m flake8 --config=.flake8 .
63+
64+
clean-lite: ## Remove pycache files, pytest files, etc
65+
@rm -rf build dist .cache .coverage .coverage.* *.egg-info
66+
@find . -name .coverage | xargs rm -rf
67+
@find . -name .pytest_cache | xargs rm -rf
68+
@find . -name .tox | xargs rm -rf
69+
@find . -name __pycache__ | xargs rm -rf
70+
@find . -name *.egg-info | xargs rm -rf
71+
72+
clean: clean-lite ## Remove virtual environment, downloaded models, etc
73+
@rm -rf venv
74+
@echo "run 'make init'"

Diff for: Python/advanced/stateful_dofn.py

+76-64
Original file line numberDiff line numberDiff line change
@@ -12,79 +12,91 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
# standard libraries
1516
import json
1617
import logging
1718

19+
# third party libraries
1820
import apache_beam as beam
19-
from apache_beam import DoFn
20-
from apache_beam import Map
21-
from apache_beam import ParDo
21+
from apache_beam import DoFn, Map, ParDo
2222
from apache_beam.coders.coders import StrUtf8Coder
2323
from apache_beam.io.gcp.pubsub import ReadFromPubSub
2424
from apache_beam.options.pipeline_options import PipelineOptions
25-
from apache_beam.transforms.userstate import BagStateSpec
26-
from apache_beam.transforms.userstate import CombiningValueStateSpec
25+
from apache_beam.transforms.userstate import (
26+
BagStateSpec,
27+
CombiningValueStateSpec,
28+
)
2729

2830

2931
def run(argv=None):
30-
class StatefulDoFn(DoFn):
31-
"""
32-
GroupIntoBatches implements a similar logic.
33-
When using StatefulDoFns be careful of not keeping the state forever and clearing state. This example is OK
34-
because we know the keys are always incoming, but if we have sparse keys, we may keep the buffer up forever
35-
(e.g., we trigger every 100 elements but we only got 99 for that key. See `timer_dofn` for an example that
36-
would fix that)
37-
"""
38-
BUFFER_RIDES = BagStateSpec('rides', StrUtf8Coder())
39-
COUNT_STATE = CombiningValueStateSpec('count', combine_fn=sum)
40-
41-
def __init__(self):
42-
self.status_max_bag = {
43-
"pickup": 100,
44-
"enroute": 10000,
45-
"dropoff": 100
46-
}
47-
48-
def process(self,
49-
element,
50-
count_state=DoFn.StateParam(COUNT_STATE),
51-
ride_state=DoFn.StateParam(BUFFER_RIDES)):
52-
53-
ride_id = element[1]
54-
ride_status = element[0]
55-
56-
# Add ride id to bag
57-
ride_state.add(ride_id)
58-
59-
# Increase counter
60-
count_state.add(1)
61-
count = count_state.read()
62-
63-
max_size = self.status_max_bag[ride_status]
64-
65-
# If counter is over max bag size, release buffer
66-
if count > max_size:
67-
logging.info("Releasing buffer for key %s", element[0])
68-
for ride in ride_state.read():
69-
yield ride
70-
71-
# Clear states
72-
ride_state.clear()
73-
count_state.clear()
74-
75-
options = PipelineOptions(streaming=True)
76-
with beam.Pipeline(options=options) as p:
77-
topic = "projects/pubsub-public-data/topics/taxirides-realtime"
78-
79-
pubsub = (p | "Read Topic" >> ReadFromPubSub(topic=topic)
80-
| "Json Loads" >> Map(json.loads)
81-
# SDFn need KVs as input. They are applied in a Key and Window basis
82-
| "KV" >> Map(lambda x: (x["ride_status"], x["ride_id"]))
83-
)
84-
85-
(pubsub | "StatefulDoFn" >> ParDo(StatefulDoFn())
86-
| "Pass" >> Map(lambda x: x))
32+
class StatefulDoFn(DoFn):
33+
"""
34+
GroupIntoBatches implements a similar logic.
35+
When using StatefulDoFns be careful of not keeping the state forever
36+
and clearing state. This example is OK
37+
because we know the keys are always incoming, but if we have sparse
38+
keys, we may keep the buffer up forever
39+
(e.g., we trigger every 100 elements but we only got 99 for that key.
40+
See `timer_dofn` for an example that would fix that)
41+
"""
42+
43+
BUFFER_RIDES = BagStateSpec("rides", StrUtf8Coder())
44+
COUNT_STATE = CombiningValueStateSpec("count", combine_fn=sum)
45+
46+
def __init__(self):
47+
self.status_max_bag = {
48+
"pickup": 100,
49+
"enroute": 10000,
50+
"dropoff": 100,
51+
}
52+
53+
def process(
54+
self,
55+
element,
56+
count_state=DoFn.StateParam(COUNT_STATE),
57+
ride_state=DoFn.StateParam(BUFFER_RIDES),
58+
):
59+
ride_id = element[1]
60+
ride_status = element[0]
61+
62+
# Add ride id to bag
63+
ride_state.add(ride_id)
64+
65+
# Increase counter
66+
count_state.add(1)
67+
count = count_state.read()
68+
69+
max_size = self.status_max_bag[ride_status]
70+
71+
# If counter is over max bag size, release buffer
72+
if count > max_size:
73+
logging.info("Releasing buffer for key %s", element[0])
74+
for ride in ride_state.read():
75+
yield ride
76+
77+
# Clear states
78+
ride_state.clear()
79+
count_state.clear()
80+
81+
options = PipelineOptions(streaming=True)
82+
with beam.Pipeline(options=options) as p:
83+
topic = "projects/pubsub-public-data/topics/taxirides-realtime"
84+
85+
pubsub = (
86+
p
87+
| "Read Topic" >> ReadFromPubSub(topic=topic)
88+
| "Json Loads" >> Map(json.loads)
89+
# SDFn need KVs as input. They are applied in a Key and Window basis
90+
| "KV" >> Map(lambda x: (x["ride_status"], x["ride_id"]))
91+
)
92+
93+
(
94+
pubsub
95+
| "StatefulDoFn" >> ParDo(StatefulDoFn())
96+
| "Pass" >> Map(lambda x: x)
97+
)
98+
8799

88100
if __name__ == "__main__":
89-
logging.getLogger().setLevel(logging.INFO)
90-
run()
101+
logging.getLogger().setLevel(logging.INFO)
102+
run()

0 commit comments

Comments
 (0)