Skip to content

Commit fb1f614

Browse files
authored
Merge pull request #1079 from openzim/fcc_2.0.0
Add illustration setting to freecodecamp scraper
2 parents 8633243 + fb0157d commit fb1f614

File tree

2 files changed

+24
-9
lines changed

2 files changed

+24
-9
lines changed

dispatcher/backend/src/common/schemas/offliners/freecodecamp.py

+9
Original file line numberDiff line numberDiff line change
@@ -125,3 +125,12 @@ class Meta:
125125
data_key="zim-file",
126126
validate=validate_zim_filename,
127127
)
128+
129+
illustration = fields.Url(
130+
metadata={
131+
"label": "Illustration",
132+
"description": "URL for ZIM illustration. Freecodecamp default logo if "
133+
"missing",
134+
},
135+
required=False,
136+
)

recipesauto/src/recipesauto/shamela.py

+15-9
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ def _get_category_include_regex(category: int):
243243
book = match.group(1)
244244
books.append(book)
245245

246-
return(f"^https:\\/\\/shamela\\.ws\\/(book\\/({"|".join(books)})($|\\/.*)|category\\/{category}|author\\/.+)" )
246+
return f"^https:\\/\\/shamela\\.ws\\/(book\\/({'|'.join(books)})($|\\/.*)|category\\/{category}|author\\/.+)"
247247

248248

249249
def get_expected_recipes() -> list[dict[str, Any]]:
@@ -265,10 +265,12 @@ def get_expected_recipes() -> list[dict[str, Any]]:
265265
"name": check_zim_name(f"shamela.ws_ar_{category_key}"),
266266
"output": "/output",
267267
"publisher": "openZIM",
268-
"scopeIncludeRx": _get_category_include_regex(category_data["number"]),
268+
"scopeIncludeRx": _get_category_include_regex(
269+
category_data["number"]
270+
),
269271
"scopeType": "custom",
270272
"seeds": f"https://shamela.ws/category/{category_data['number']}",
271-
"title": category_data['title'],
273+
"title": category_data["title"],
272274
"workers": "4",
273275
"zim-lang": "ara",
274276
"zimit-progress-file": "/output/task_progress.json",
@@ -281,12 +283,14 @@ def get_expected_recipes() -> list[dict[str, Any]]:
281283
"platform": "shamela",
282284
"resources": {
283285
"cpu": 3,
284-
"disk": category_data.get("disk", 107374182400), 100G by default
286+
"disk": category_data.get("disk", 107374182400), # 100G by default
285287
"memory": 4294967296,
286-
'shm': 1073741824,
288+
"shm": 1073741824,
287289
},
288290
"task_name": "zimit",
289-
"warehouse_path": "/zimit" if category_data.get("in_prod", False) else "/.hidden/dev",
291+
"warehouse_path": (
292+
"/zimit" if category_data.get("in_prod", False) else "/.hidden/dev"
293+
),
290294
},
291295
"enabled": True,
292296
"language": {
@@ -295,7 +299,9 @@ def get_expected_recipes() -> list[dict[str, Any]]:
295299
"name_native": "العربية",
296300
},
297301
"name": f"shamela.ws_ar_{category_key}-{category_data['number']}",
298-
"periodicity": "quarterly" if category_data.get("in_prod", False) else "manually",
302+
"periodicity": (
303+
"quarterly" if category_data.get("in_prod", False) else "manually"
304+
),
299305
"tags": [
300306
"shamela",
301307
],
@@ -307,5 +313,5 @@ def get_expected_recipes() -> list[dict[str, Any]]:
307313

308314
def _is_needed(category_key: Any, category_data: Any) -> bool:
309315
return True
310-
#return category_data["number"] in [1, 2, 3, 4, 5, 6, 34]
311-
#return category_data["number"] == 1
316+
# return category_data["number"] in [1, 2, 3, 4, 5, 6, 34]
317+
# return category_data["number"] == 1

0 commit comments

Comments
 (0)