-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathworldcereal_inference.json
1422 lines (1422 loc) · 84.3 KB
/
worldcereal_inference.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
{
"description": "# WorldCereal croptype classification\n\nThis process computes crop types globally for maize, winter cereals, ...\n\nThe process works up to a maximum area of 20x20 km. \n\n## Inputs\n\n* spatial_extent: a bounding box covering the area of interest, with a maximum size of 20x20 km\n* temporal_extent: a list with two dates, start date should be one year before the end date. The end date is the end of the growing season to consider.\n\n## Output\n\nThe output is a raster in geotif format, following the legend which can be found at...\n\n## Job Options\n\nRecommended job options are:\n\n\"\"\"\n{\n \"driver-memory\": \"4g\",\n \"executor-memory\": \"1g\", \n \"python-memory\": \"2g\", \n \"udf-dependency-archives\": [\"https://artifactory.vgt.vito.be/artifactory/auxdata-public/openeo/onnx_dependencies_1.16.3.zip#onnx_deps\"],\n}\n\"\"\"\n\n## Scientific Background\n\nA peer reviewed publication [is available](https://doi.org/10.5194/essd-2023-184).\n",
"id": "worldcereal_inference",
"links": [
{
"href": "https://esa-worldcereal.org/",
"rel": "about",
"title": "ESA WorldCereal website"
}
],
"parameters": [
{
"description": "Spatial extent specified as a bounding box with 'west', 'south', 'east' and 'north' fields.",
"name": "spatial_extent",
"schema": {
"properties": {
"crs": {
"anyOf": [
{
"minimum": 1000,
"subtype": "epsg-code",
"title": "EPSG Code",
"type": "integer"
},
{
"subtype": "wkt2-definition",
"title": "WKT2 definition",
"type": "string"
}
],
"default": 4326,
"description": "Coordinate reference system of the extent, specified as as [EPSG code](http://www.epsg-registry.org/) or [WKT2 CRS string](http://docs.opengeospatial.org/is/18-010r7/18-010r7.html). Defaults to `4326` (EPSG code 4326) unless the client explicitly requests a different coordinate reference system."
},
"east": {
"description": "East (upper right corner, coordinate axis 1).",
"type": "number"
},
"north": {
"description": "North (upper right corner, coordinate axis 2).",
"type": "number"
},
"south": {
"description": "South (lower left corner, coordinate axis 2).",
"type": "number"
},
"west": {
"description": "West (lower left corner, coordinate axis 1).",
"type": "number"
}
},
"required": [
"west",
"south",
"east",
"north"
],
"subtype": "bounding-box",
"type": "object"
}
},
{
"description": "Temporal extent specified as two-element array with start and end date/date-time.",
"name": "temporal_extent",
"schema": {
"items": {
"anyOf": [
{
"format": "date-time",
"subtype": "date-time",
"type": "string"
},
{
"format": "date",
"subtype": "date",
"type": "string"
},
{
"type": "null"
}
]
},
"maxItems": 2,
"minItems": 2,
"subtype": "temporal-interval",
"type": "array",
"uniqueItems": true
}
}
],
"process_graph": {
"aggregatetemporalperiod1": {
"arguments": {
"data": {
"from_node": "apply1"
},
"dimension": "t",
"period": "month",
"reducer": {
"process_graph": {
"median1": {
"arguments": {
"data": {
"from_parameter": "data"
}
},
"process_id": "median",
"result": true
}
}
}
},
"process_id": "aggregate_temporal_period"
},
"aggregatetemporalperiod2": {
"arguments": {
"data": {
"from_node": "renamelabels3"
},
"dimension": "t",
"period": "month",
"reducer": {
"process_graph": {
"mean1": {
"arguments": {
"data": {
"from_parameter": "data"
}
},
"process_id": "mean",
"result": true
}
}
}
},
"process_id": "aggregate_temporal_period"
},
"apply1": {
"arguments": {
"data": {
"from_node": "renamelabels2"
},
"process": {
"process_graph": {
"linearscalerange1": {
"arguments": {
"inputMax": 65534,
"inputMin": 0,
"outputMax": 65534,
"outputMin": 0,
"x": {
"from_parameter": "x"
}
},
"process_id": "linear_scale_range",
"result": true
}
}
}
},
"process_id": "apply"
},
"apply2": {
"arguments": {
"data": {
"from_node": "aggregatetemporalperiod1"
},
"process": {
"process_graph": {
"linearscalerange2": {
"arguments": {
"inputMax": 65534,
"inputMin": 0,
"outputMax": 65534,
"outputMin": 0,
"x": {
"from_parameter": "x"
}
},
"process_id": "linear_scale_range",
"result": true
}
}
}
},
"process_id": "apply"
},
"apply3": {
"arguments": {
"data": {
"from_node": "applydimension1"
},
"process": {
"process_graph": {
"linearscalerange3": {
"arguments": {
"inputMax": 65534,
"inputMin": 1,
"outputMax": 65534,
"outputMin": 1,
"x": {
"from_parameter": "x"
}
},
"process_id": "linear_scale_range",
"result": true
}
}
}
},
"process_id": "apply"
},
"apply4": {
"arguments": {
"data": {
"from_node": "renamelabels4"
},
"process": {
"process_graph": {
"linearscalerange4": {
"arguments": {
"inputMax": 65534,
"inputMin": 0,
"outputMax": 65534,
"outputMin": 0,
"x": {
"from_parameter": "x"
}
},
"process_id": "linear_scale_range",
"result": true
}
}
}
},
"process_id": "apply"
},
"apply5": {
"arguments": {
"data": {
"from_node": "renamelabels9"
},
"process": {
"process_graph": {
"linearscalerange5": {
"arguments": {
"inputMax": 253,
"inputMin": 0,
"outputMax": 253,
"outputMin": 0,
"x": {
"from_parameter": "x"
}
},
"process_id": "linear_scale_range",
"result": true
}
}
}
},
"process_id": "apply"
},
"apply6": {
"arguments": {
"data": {
"from_node": "reducedimension2"
},
"process": {
"process_graph": {
"eq1": {
"arguments": {
"x": {
"from_parameter": "x"
},
"y": 0
},
"process_id": "eq",
"result": true
}
}
}
},
"process_id": "apply"
},
"apply7": {
"arguments": {
"data": {
"from_node": "mask2"
},
"process": {
"process_graph": {
"linearscalerange6": {
"arguments": {
"inputMax": 65534,
"inputMin": 0,
"outputMax": 65534,
"outputMin": 0,
"x": {
"from_parameter": "x"
}
},
"process_id": "linear_scale_range",
"result": true
}
}
}
},
"process_id": "apply",
"result": true
},
"applydimension1": {
"arguments": {
"data": {
"from_node": "aggregatetemporalperiod2"
},
"dimension": "bands",
"process": {
"process_graph": {
"add1": {
"arguments": {
"x": {
"from_node": "multiply1"
},
"y": 83
},
"process_id": "add"
},
"add2": {
"arguments": {
"x": {
"from_node": "multiply2"
},
"y": 83
},
"process_id": "add"
},
"arraycreate1": {
"arguments": {
"data": [
{
"from_node": "if1"
},
{
"from_node": "if2"
}
]
},
"process_id": "array_create",
"result": true
},
"arrayelement1": {
"arguments": {
"data": {
"from_parameter": "data"
},
"index": 0
},
"process_id": "array_element"
},
"arrayelement2": {
"arguments": {
"data": {
"from_parameter": "data"
},
"index": 0
},
"process_id": "array_element"
},
"arrayelement3": {
"arguments": {
"data": {
"from_parameter": "data"
},
"index": 1
},
"process_id": "array_element"
},
"arrayelement4": {
"arguments": {
"data": {
"from_parameter": "data"
},
"index": 1
},
"process_id": "array_element"
},
"divide1": {
"arguments": {
"x": {
"from_node": "add1"
},
"y": 20
},
"process_id": "divide"
},
"divide2": {
"arguments": {
"x": {
"from_node": "add2"
},
"y": 20
},
"process_id": "divide"
},
"if1": {
"arguments": {
"accept": 1,
"reject": {
"from_node": "power1"
},
"value": {
"from_node": "isnodata1"
}
},
"process_id": "if"
},
"if2": {
"arguments": {
"accept": 1,
"reject": {
"from_node": "power2"
},
"value": {
"from_node": "isnodata2"
}
},
"process_id": "if"
},
"isnodata1": {
"arguments": {
"x": {
"from_node": "arrayelement2"
}
},
"process_id": "is_nodata"
},
"isnodata2": {
"arguments": {
"x": {
"from_node": "arrayelement4"
}
},
"process_id": "is_nodata"
},
"log1": {
"arguments": {
"base": 10,
"x": {
"from_node": "arrayelement1"
}
},
"process_id": "log"
},
"log2": {
"arguments": {
"base": 10,
"x": {
"from_node": "arrayelement3"
}
},
"process_id": "log"
},
"multiply1": {
"arguments": {
"x": 10,
"y": {
"from_node": "log1"
}
},
"process_id": "multiply"
},
"multiply2": {
"arguments": {
"x": 10,
"y": {
"from_node": "log2"
}
},
"process_id": "multiply"
},
"power1": {
"arguments": {
"base": 10,
"p": {
"from_node": "divide1"
}
},
"process_id": "power"
},
"power2": {
"arguments": {
"base": 10,
"p": {
"from_node": "divide2"
}
},
"process_id": "power"
}
}
}
},
"process_id": "apply_dimension"
},
"applyneighborhood1": {
"arguments": {
"data": {
"from_node": "filterbbox1"
},
"overlap": [
{
"dimension": "x",
"unit": "px",
"value": 0
},
{
"dimension": "y",
"unit": "px",
"value": 0
}
],
"process": {
"process_graph": {
"runudf1": {
"arguments": {
"context": {
"presto_model_url": "https://artifactory.vgt.vito.be/artifactory/auxdata-public/worldcereal/models/PhaseII/presto-ss-wc-ft-ct-30D_test.pt",
"rescale_s1": false
},
"data": {
"from_parameter": "data"
},
"runtime": "Python",
"udf": "# /// script\n# dependencies = [\n# ]\n# ///\n\nimport functools\nimport inspect\nimport logging\nimport re\nimport shutil\nimport urllib.request\nfrom abc import ABC, abstractmethod\nfrom pathlib import Path\nimport numpy as np\nimport openeo\nimport xarray as xr\nfrom openeo.udf import XarrayDataCube\nfrom openeo.udf.udf_data import UdfData\nfrom pyproj import Transformer\nfrom pyproj.crs import CRS\n\nLAT_HARMONIZED_NAME = \"GEO-LAT\"\nLON_HARMONIZED_NAME = \"GEO-LON\"\nEPSG_HARMONIZED_NAME = \"GEO-EPSG\"\n\nclass FeatureExtractor(ABC):\n \"\"\"Base class for all feature extractor UDFs. It provides some common\n methods and attributes to be used by other feature extractor.\n\n The inherited classes are supposed to take care of VectorDataCubes for\n point based extraction or dense Cubes for tile/polygon based extraction.\n \"\"\"\n\n def __init__(self) -> None:\n self._epsg = None\n\n logging.basicConfig(level=logging.INFO)\n self.logger = logging.getLogger(self.__class__.__name__)\n\n @classmethod\n @functools.lru_cache(maxsize=6)\n def extract_dependencies(cls, base_url: str, dependency_name: str) -> str:\n \"\"\"Extract the dependencies from the given URL. Unpacking a zip\n file in the current working directory and return the path to the\n unpacked directory.\n\n Parameters:\n - base_url: The base public URL where the dependencies are stored.\n - dependency_name: The name of the dependency file to download. This\n parameter is added to `base_url` as a download path to the .zip\n archive\n Returns:\n - The absolute path to the extracted dependencies directory, to be added\n to the python path with the `sys.path.append` method.\n \"\"\"\n #self.logger.info(\"Unzipping dependencies\")\n # Generate absolute path for the dependencies folder\n dependencies_dir = Path.cwd() / \"dependencies\"\n\n # Create the directory if it doesn't exist\n dependencies_dir.mkdir(exist_ok=True, parents=True)\n\n # Download and extract the model file\n modelfile_url = f\"{base_url}/{dependency_name}\"\n modelfile, _ = urllib.request.urlretrieve(\n modelfile_url, filename=dependencies_dir / Path(modelfile_url).name\n )\n shutil.unpack_archive(modelfile, extract_dir=dependencies_dir)\n\n # Add the model directory to system path if it's not already there\n abs_path = str(\n dependencies_dir / Path(modelfile_url).name.split(\".zip\")[0]\n ) # NOQA\n\n return abs_path\n\n def _common_preparations(\n self, inarr: xr.DataArray, parameters: dict\n ) -> xr.DataArray:\n \"\"\"Common preparations to be executed before the feature extractor is\n executed. This method should be called by the `_execute` method of the\n feature extractor.\n \"\"\"\n self._epsg = parameters.pop(EPSG_HARMONIZED_NAME)\n self._parameters = parameters\n return inarr\n\n @property\n def epsg(self) -> int:\n \"\"\"Returns the EPSG code of the datacube.\"\"\"\n return self._epsg\n\n @epsg.setter\n def epsg(self, value: int):\n self._epsg = value\n\n def dependencies(self) -> list:\n \"\"\"Returns the additional dependencies such as wheels or zip files.\n Dependencies should be returned as a list of string, which will set-up at the top of the\n generated UDF. More information can be found at:\n https://open-eo.github.io/openeo-python-client/udf.html#standard-for-declaring-python-udf-dependencies\n \"\"\"\n if(self.logger is not None):\n\n self.logger.warning(\n \"No additional dependencies are defined. If you wish to add \"\n \"dependencies to your feature extractor, override the \"\n \"`dependencies` method in your class.\"\n )\n return []\n\n @abstractmethod\n def output_labels(self) -> list:\n \"\"\"Returns a list of output labels to be assigned on the output bands,\n needs to be overriden by the user.\"\"\"\n raise NotImplementedError(\n \"FeatureExtractor is a base abstract class, please implement the \"\n \"output_labels property.\"\n )\n\n @abstractmethod\n def _execute(self, cube: XarrayDataCube, parameters: dict) -> XarrayDataCube:\n raise NotImplementedError(\n \"FeatureExtractor is a base abstract class, please implement the \"\n \"_execute method.\"\n )\n\n\nclass PatchFeatureExtractor(FeatureExtractor):\n \"\"\"Base class for all the tile/polygon based feature extractors. An user\n implementing a feature extractor should take care of\n \"\"\"\n\n def get_latlons(self, inarr: xr.DataArray) -> xr.DataArray:\n \"\"\"Returns the latitude and longitude coordinates of the given array in\n a dataarray. Returns a dataarray with the same width/height of the input\n array, but with two bands, one for latitude and one for longitude. The\n metadata coordinates of the output array are the same as the input\n array, as the array wasn't reprojected but instead new features were\n computed.\n\n The latitude and longitude band names are standardized to the names\n `LAT_HARMONIZED_NAME` and `LON_HARMONIZED_NAME` respectively.\n \"\"\"\n\n lon = inarr.coords[\"x\"]\n lat = inarr.coords[\"y\"]\n lon, lat = np.meshgrid(lon, lat)\n\n if self.epsg is None:\n raise Exception(\n \"EPSG code was not defined, cannot extract lat/lon array \"\n \"as the CRS is unknown.\"\n )\n\n # If the coordiantes are not in EPSG:4326, we need to reproject them\n if self.epsg != 4326:\n # Initializes a pyproj reprojection object\n transformer = Transformer.from_crs(\n crs_from=CRS.from_epsg(self.epsg),\n crs_to=CRS.from_epsg(4326),\n always_xy=True,\n )\n lon, lat = transformer.transform(xx=lon, yy=lat)\n\n # Create a two channel numpy array of the lat and lons together by stacking\n latlon = np.stack([lat, lon])\n\n # Repack in a dataarray\n return xr.DataArray(\n latlon,\n dims=[\"bands\", \"y\", \"x\"],\n coords={\n \"bands\": [LAT_HARMONIZED_NAME, LON_HARMONIZED_NAME],\n \"y\": inarr.coords[\"y\"],\n \"x\": inarr.coords[\"x\"],\n },\n )\n\n def _rescale_s1_backscatter(self, arr: xr.DataArray) -> xr.DataArray:\n \"\"\"Rescales the input array from uint16 to float32 decibel values.\n The input array should be in uint16 format, as this optimizes memory usage in Open-EO\n processes. This function is called automatically on the bands of the input array, except\n if the parameter `rescale_s1` is set to False.\n \"\"\"\n s1_bands = [\"S1-SIGMA0-VV\", \"S1-SIGMA0-VH\", \"S1-SIGMA0-HV\", \"S1-SIGMA0-HH\"]\n s1_bands_to_select = list(set(arr.bands.values) & set(s1_bands))\n\n if len(s1_bands_to_select) == 0:\n return arr\n\n data_to_rescale = arr.sel(bands=s1_bands_to_select).astype(np.float32).data\n\n # Assert that the values are set between 1 and 65535\n if data_to_rescale.min().item() < 1 or data_to_rescale.max().item() > 65535:\n raise ValueError(\n \"The input array should be in uint16 format, with values between 1 and 65535. \"\n \"This restriction assures that the data was processed according to the S1 fetcher \"\n \"preprocessor. The user can disable this scaling manually by setting the \"\n \"`rescale_s1` parameter to False in the feature extractor.\"\n )\n\n # Converting back to power values\n data_to_rescale = 20.0 * np.log10(data_to_rescale) - 83.0\n data_to_rescale = np.power(10, data_to_rescale / 10.0)\n data_to_rescale[~np.isfinite(data_to_rescale)] = np.nan\n\n # Converting power values to decibels\n data_to_rescale = 10.0 * np.log10(data_to_rescale)\n\n # Change the bands within the array\n arr.loc[dict(bands=s1_bands_to_select)] = data_to_rescale\n return arr\n\n # TODO to remove the fixed transpose as it contributes to unclear code.\n def _execute(self, cube: XarrayDataCube, parameters: dict) -> XarrayDataCube:\n arr = cube.get_array().transpose(\"bands\", \"t\", \"y\", \"x\")\n arr = self._common_preparations(arr, parameters)\n if self._parameters.get(\"rescale_s1\", True):\n arr = self._rescale_s1_backscatter(arr)\n\n arr = self.execute(arr).transpose(\"bands\", \"y\", \"x\")\n return XarrayDataCube(arr)\n\n @abstractmethod\n def execute(self, inarr: xr.DataArray) -> xr.DataArray:\n pass\n\n\nclass PointFeatureExtractor(FeatureExtractor):\n def __init__(self):\n raise NotImplementedError(\n \"Point based feature extraction on Vector Cubes is not supported yet.\"\n )\n\n def _execute(self, cube: XarrayDataCube, parameters: dict) -> XarrayDataCube:\n arr = cube.get_array().transpose(\"bands\", \"t\")\n\n arr = self._common_preparations(arr, parameters)\n\n outarr = self.execute(cube.to_array()).transpose(\"bands\", \"t\")\n return XarrayDataCube(outarr)\n\n @abstractmethod\n def execute(self, inarr: xr.DataArray) -> xr.DataArray:\n pass\n\n\nclass PrestoFeatureExtractor(PatchFeatureExtractor):\n \"\"\"Feature extractor to use Presto model to compute per-pixel embeddings.\n This will generate a datacube with 128 bands, each band representing a\n feature from the Presto model.\n\n Interesting UDF parameters:\n - presto_url: A public URL to the Presto model file. A default Presto\n version is provided if the parameter is left undefined.\n - rescale_s1: Is specifically disabled by default, as the presto\n dependencies already take care of the backscatter decompression. If\n specified, should be set as `False`.\n \"\"\"\n\n import functools\n\n PRESTO_MODEL_URL = \"https://artifactory.vgt.vito.be/artifactory/auxdata-public/worldcereal-minimal-inference/presto.pt\" # NOQA\n PRESTO_WHL_URL = \"https://artifactory.vgt.vito.be/artifactory/auxdata-public/worldcereal/dependencies/presto_worldcereal-0.1.2-py3-none-any.whl\"\n BASE_URL = \"https://s3.waw3-1.cloudferro.com/swift/v1/project_dependencies\" # NOQA\n DEPENDENCY_NAME = \"worldcereal_deps.zip\"\n\n GFMAP_BAND_MAPPING = {\n \"S2-L2A-B02\": \"B02\",\n \"S2-L2A-B03\": \"B03\",\n \"S2-L2A-B04\": \"B04\",\n \"S2-L2A-B05\": \"B05\",\n \"S2-L2A-B06\": \"B06\",\n \"S2-L2A-B07\": \"B07\",\n \"S2-L2A-B08\": \"B08\",\n \"S2-L2A-B8A\": \"B8A\",\n \"S2-L2A-B11\": \"B11\",\n \"S2-L2A-B12\": \"B12\",\n \"S1-SIGMA0-VH\": \"VH\",\n \"S1-SIGMA0-VV\": \"VV\",\n \"COP-DEM\": \"DEM\",\n \"AGERA5-TMEAN\": \"temperature-mean\",\n \"AGERA5-PRECIP\": \"precipitation-flux\",\n }\n\n @functools.lru_cache(maxsize=6)\n def unpack_presto_wheel(self, wheel_url: str, destination_dir: str) -> list:\n import urllib.request\n import zipfile\n from pathlib import Path\n self.logger.info(\"Unpacking presto wheel\")\n\n # Downloads the wheel file\n modelfile, _ = urllib.request.urlretrieve(\n wheel_url, filename=Path.cwd() / Path(wheel_url).name\n )\n with zipfile.ZipFile(modelfile, \"r\") as zip_ref:\n zip_ref.extractall(destination_dir)\n return destination_dir\n\n def output_labels(self) -> list:\n \"\"\"Returns the output labels from this UDF, which is the output labels\n of the presto embeddings\"\"\"\n return [f\"presto_ft_{i}\" for i in range(128)]\n\n def execute(self, inarr: xr.DataArray) -> xr.DataArray:\n import sys\n\n if self.epsg is None:\n raise ValueError(\n \"EPSG code is required for Presto feature extraction, but was \"\n \"not correctly initialized.\"\n )\n presto_model_url = self._parameters.get(\n \"presto_model_url\", self.PRESTO_MODEL_URL\n )\n presto_wheel_url = self._parameters.get(\"presto_wheel_url\", self.PRESTO_WHL_URL)\n\n ignore_dependencies = self._parameters.get(\"ignore_dependencies\", False)\n if ignore_dependencies:\n self.logger.info(\n \"`ignore_dependencies` flag is set to True. Make sure that \"\n \"Presto and its dependencies are available on the runtime \"\n \"environment\"\n )\n\n # The below is required to avoid flipping of the result\n # when running on OpenEO backend!\n inarr = inarr.transpose(\"bands\", \"t\", \"x\", \"y\")\n\n # Change the band names\n new_band_names = [\n self.GFMAP_BAND_MAPPING.get(b.item(), b.item()) for b in inarr.bands\n ]\n inarr = inarr.assign_coords(bands=new_band_names)\n\n # Handle NaN values in Presto compatible way\n inarr = inarr.fillna(65535)\n\n # Unzip de dependencies on the backend\n if not ignore_dependencies:\n self.logger.info(\"Unzipping dependencies\")\n deps_dir = self.extract_dependencies(self.BASE_URL, self.DEPENDENCY_NAME)\n self.logger.info(\"Unpacking presto wheel\")\n deps_dir = self.unpack_presto_wheel(presto_wheel_url, deps_dir)\n\n self.logger.info(\"Appending dependencies\")\n sys.path.append(str(deps_dir))\n\n from presto.inference import ( # pylint: disable=import-outside-toplevel\n get_presto_features,\n )\n\n batch_size = self._parameters.get(\"batch_size\", 256)\n\n self.logger.info(\"Extracting presto features\")\n features = get_presto_features(\n inarr, presto_model_url, self.epsg, batch_size=batch_size\n )\n return features\n\n def _execute(self, cube: XarrayDataCube, parameters: dict) -> XarrayDataCube:\n # Disable S1 rescaling (decompression) by default\n if parameters.get(\"rescale_s1\", None) is None:\n parameters.update({\"rescale_s1\": False})\n return super()._execute(cube, parameters)\n\n\ndef apply_udf_data(udf_data: UdfData) -> XarrayDataCube:\n feature_extractor_class = PrestoFeatureExtractor\n\n # User-defined, feature extractor class initialized here\n feature_extractor = feature_extractor_class()\n\n is_pixel_based = issubclass(feature_extractor_class, PointFeatureExtractor)\n\n if not is_pixel_based:\n assert (\n len(udf_data.datacube_list) == 1\n ), \"OpenEO GFMAP Feature extractor pipeline only supports single input cubes for the tile.\"\n\n cube = udf_data.datacube_list[0]\n parameters = udf_data.user_context\n\n proj = udf_data.proj\n if proj is not None:\n proj = proj[\"EPSG\"]\n\n parameters[EPSG_HARMONIZED_NAME] = proj\n\n cube = feature_extractor._execute(cube, parameters=parameters)\n\n udf_data.datacube_list = [cube]\n\n return udf_data\n"
},
"process_id": "run_udf",
"result": true
}
}
},
"size": [
{
"dimension": "x",
"unit": "px",
"value": 100
},
{
"dimension": "y",
"unit": "px",
"value": 100
}
]
},
"process_id": "apply_neighborhood"
},
"applyneighborhood2": {
"arguments": {
"data": {
"from_node": "renamelabels6"
},
"overlap": [
{
"dimension": "x",
"unit": "px",
"value": 0
},
{
"dimension": "y",
"unit": "px",
"value": 0
}
],
"process": {
"process_graph": {
"runudf2": {
"arguments": {
"context": {
"classifier_url": "https://artifactory.vgt.vito.be/artifactory/auxdata-public/worldcereal/models/PhaseII/presto-ss-wc-ft-ct-30D_test_CROPTYPE9.onnx"
},
"data": {
"from_parameter": "data"
},
"runtime": "Python",
"udf": "# /// script\n# dependencies = []\n# ///\n\nimport functools\nimport inspect\nimport logging\nimport re\nimport shutil\nimport sys\nimport urllib.request\nfrom abc import ABC, abstractmethod\nfrom pathlib import Path\nimport numpy as np\nimport openeo\nimport requests\nimport xarray as xr\nfrom openeo.udf import XarrayDataCube\nfrom openeo.udf import inspect as udf_inspect\nfrom openeo.udf.udf_data import UdfData\nsys.path.insert(0, \"onnx_deps\")\nimport onnxruntime as ort # noqa: E402\n\nEPSG_HARMONIZED_NAME = \"GEO-EPSG\"\n\nclass ModelInference(ABC):\n \"\"\"Base class for all model inference UDFs. It provides some common\n methods and attributes to be used by other model inference classes.\n \"\"\"\n\n def __init__(self) -> None:\n \"\"\"\n Initializes the PrestoFeatureExtractor object, starting a logger.\n \"\"\"\n logging.basicConfig(level=logging.INFO)\n self.logger = logging.getLogger(self.__class__.__name__)\n\n @classmethod\n @functools.lru_cache(maxsize=6)\n def extract_dependencies(cls, base_url: str, dependency_name: str) -> str:\n \"\"\"Extract the dependencies from the given URL. Unpacking a zip\n file in the current working directory and return the path to the\n unpacked directory.\n\n Parameters:\n - base_url: The base public URL where the dependencies are stored.\n - dependency_name: The name of the dependency file to download. This\n parameter is added to `base_url` as a download path to the .zip\n archive\n Returns:\n - The absolute path to the extracted dependencies directory, to be added\n to the python path with the `sys.path.append` method.\n \"\"\"\n\n # Generate absolute path for the dependencies folder\n dependencies_dir = Path.cwd() / \"dependencies\"\n\n # Create the directory if it doesn't exist\n dependencies_dir.mkdir(exist_ok=True, parents=True)\n\n # Download and extract the model file\n modelfile_url = f\"{base_url}/{dependency_name}\"\n modelfile, _ = urllib.request.urlretrieve(\n modelfile_url, filename=dependencies_dir / Path(modelfile_url).name\n )\n shutil.unpack_archive(modelfile, extract_dir=dependencies_dir)\n\n # Add the model directory to system path if it's not already there\n abs_path = str(\n dependencies_dir / Path(modelfile_url).name.split(\".zip\")[0]\n ) # NOQA\n\n return abs_path\n\n @classmethod\n @functools.lru_cache(maxsize=6)\n def load_ort_session(cls, model_url: str):\n \"\"\"Loads an onnx session from a publicly available URL. The URL must be a direct\n download link to the ONNX session file.\n The `lru_cache` decorator avoids loading multiple time the model within the same worker.\n \"\"\"\n # Two minutes timeout to download the model\n response = requests.get(model_url, timeout=120)\n model = response.content\n\n return ort.InferenceSession(model)\n\n def apply_ml(\n self, tensor: np.ndarray, session: ort.InferenceSession, input_name: str\n ) -> np.ndarray:\n \"\"\"Applies the machine learning model to the input data as a tensor.\n\n Parameters\n ----------\n tensor: np.ndarray\n The input data with shape (bands, instance). If the input data is a tile (bands, y, x),\n then the y, x dimension must be flattened before being applied in this function.\n session: ort.InferenceSession\n The ONNX Session object, loaded from the `load_ort_session` class method.\n input_name: str\n The name of the input tensor in the ONNX session. Depends on how is the ONNX serialized\n model generated. For example, CatBoost models have their input tensor named as\n features: https://catboost.ai/en/docs/concepts/apply-onnx-ml\n \"\"\"\n return session.run(None, {input_name: tensor})[0]\n\n def _common_preparations(\n self, inarr: xr.DataArray, parameters: dict\n ) -> xr.DataArray:\n \"\"\"Common preparations for all inference models. This method will be\n executed at the very beginning of the process.\n \"\"\"\n self._epsg = parameters.pop(EPSG_HARMONIZED_NAME)\n self._parameters = parameters\n return inarr\n\n def _execute(self, cube: XarrayDataCube, parameters: dict) -> XarrayDataCube:\n arr = cube.get_array().transpose(\"bands\", \"y\", \"x\")\n arr = self._common_preparations(arr, parameters)\n arr = self.execute(arr).transpose(\"bands\", \"y\", \"x\")\n return XarrayDataCube(arr)\n\n @property\n def epsg(self) -> int:\n \"\"\"EPSG code of the input data.\"\"\"\n return self._epsg\n\n def dependencies(self) -> list:\n \"\"\"Returns the additional dependencies such as wheels or zip files.\n Dependencies should be returned as a list of string, which will set-up at the top of the\n generated UDF. More information can be found at:\n https://open-eo.github.io/openeo-python-client/udf.html#standard-for-declaring-python-udf-dependencies\n \"\"\"\n self.logger.warning(\n \"Only onnx is defined as dependency. If you wish to add \"\n \"dependencies to your model inference, override the \"\n \"`dependencies` method in your class.\"\n )\n return [\"onnxruntime\"]\n\n @abstractmethod\n def output_labels(self) -> list:\n \"\"\"Returns the labels of the output data.\"\"\"\n raise NotImplementedError(\n \"ModelInference is a base abstract class, please implement the \"\n \"output_labels property.\"\n )\n\n @abstractmethod\n def execute(self, inarr: xr.DataArray) -> xr.DataArray:\n \"\"\"Executes the model inference.\"\"\"\n raise NotImplementedError(\n \"ModelInference is a base abstract class, please implement the \"\n \"execute method.\"\n )\n\n\nclass CroptypeClassifier(ModelInference):\n \"\"\"Multi-class crop classifier using ONNX to load a catboost model.\n\n The classifier use the embeddings computed from the Presto Feature\n Extractor.\n\n Interesting UDF parameters:\n - classifier_url: A public URL to the ONNX classification model. Default is\n the public Presto model.\n \"\"\"\n\n import numpy as np\n\n CATBOOST_PATH = \"https://artifactory.vgt.vito.be/artifactory/auxdata-public/worldcereal/models/PhaseII/presto-ss-wc-ft-ct-30D_test_CROPTYPE9.onnx\" # NOQA\n\n def __init__(self):\n super().__init__()\n\n self.onnx_session = None\n\n def dependencies(self) -> list:\n return [] # Disable the dependencies from PIP install\n\n def output_labels(self) -> list:\n return [\"classification\", \"probability\"]\n\n def predict(self, features: np.ndarray) -> np.ndarray:\n \"\"\"\n Predicts labels using the provided features array.\n \"\"\"\n import numpy as np\n\n if self.onnx_session is None:\n raise ValueError(\"Model has not been loaded. Please load a model first.\")\n\n # Prepare input data for ONNX model\n outputs = self.onnx_session.run(None, {\"features\": features})\n\n # Get info on classes from the model\n class_params = eval(\n self.onnx_session.get_modelmeta().custom_metadata_map[\"class_params\"]\n )\n\n # Get classes LUT\n LUT = dict(zip(class_params[\"class_names\"], class_params[\"class_to_label\"]))\n\n # Extract classes as INTs and probability of winning class values\n labels = np.zeros((len(outputs[0]),), dtype=np.uint16)\n probabilities = np.zeros((len(outputs[0]),), dtype=np.uint8)\n for i, (label, prob) in enumerate(zip(outputs[0], outputs[1])):\n labels[i] = LUT[label]\n probabilities[i] = int(prob[label] * 100)\n\n return np.stack([labels, probabilities], axis=0)\n\n def execute(self, inarr: xr.DataArray) -> xr.DataArray:\n classifier_url = self._parameters.get(\"classifier_url\", self.CATBOOST_PATH)\n\n # shape and indices for output (\"xy\", \"bands\")\n x_coords, y_coords = inarr.x.values, inarr.y.values\n inarr = inarr.transpose(\"bands\", \"x\", \"y\").stack(xy=[\"x\", \"y\"]).transpose()\n\n self.onnx_session = self.load_ort_session(classifier_url)\n\n # Run catboost classification\n self.logger.info(\"Catboost classification with input shape: %s\", inarr.shape)\n classification = self.predict(inarr.values)\n self.logger.info(\"Classification done with shape: %s\", inarr.shape)\n\n classification = xr.DataArray(\n classification.reshape((2, len(x_coords), len(y_coords))),\n dims=[\"bands\", \"x\", \"y\"],\n coords={\n \"bands\": [\"classification\", \"probability\"],\n \"x\": x_coords,\n \"y\": y_coords,\n },\n )\n\n return classification\n\n\ndef apply_udf_data(udf_data: UdfData) -> XarrayDataCube:\n model_inference_class = CroptypeClassifier\n\n model_inference = model_inference_class()\n\n # User-defined, model inference class initialized here\n cube = udf_data.datacube_list[0]\n parameters = udf_data.user_context\n\n proj = udf_data.proj\n if proj is not None:\n proj = proj.get(\"EPSG\")\n\n parameters[EPSG_HARMONIZED_NAME] = proj\n\n cube = model_inference._execute(cube, parameters=parameters)\n\n udf_data.datacube_list = [cube]\n\n return udf_data\n"
},
"process_id": "run_udf",
"result": true
}
}
},
"size": [
{
"dimension": "x",
"unit": "px",
"value": 100
},
{
"dimension": "y",
"unit": "px",
"value": 100
},
{
"dimension": "t",
"value": "P1D"
}
]
},
"process_id": "apply_neighborhood"
},
"applyneighborhood3": {
"arguments": {
"data": {
"from_node": "filterbbox1"
},
"overlap": [
{
"dimension": "x",
"unit": "px",
"value": 0
},
{
"dimension": "y",
"unit": "px",
"value": 0
}
],
"process": {
"process_graph": {
"runudf3": {
"arguments": {
"context": {
"presto_model_url": "https://artifactory.vgt.vito.be/artifactory/auxdata-public/worldcereal-minimal-inference/presto.pt",
"rescale_s1": false
},
"data": {
"from_parameter": "data"
},
"runtime": "Python",
"udf": "# /// script\n# dependencies = [\n# ]\n# ///\n\nimport functools\nimport inspect\nimport logging\nimport re\nimport shutil\nimport urllib.request\nfrom abc import ABC, abstractmethod\nfrom pathlib import Path\nimport numpy as np\nimport openeo\nimport xarray as xr\nfrom openeo.udf import XarrayDataCube\nfrom openeo.udf.udf_data import UdfData\nfrom pyproj import Transformer\nfrom pyproj.crs import CRS\n\nLAT_HARMONIZED_NAME = \"GEO-LAT\"\nLON_HARMONIZED_NAME = \"GEO-LON\"\nEPSG_HARMONIZED_NAME = \"GEO-EPSG\"\n\nclass FeatureExtractor(ABC):\n \"\"\"Base class for all feature extractor UDFs. It provides some common\n methods and attributes to be used by other feature extractor.\n\n The inherited classes are supposed to take care of VectorDataCubes for\n point based extraction or dense Cubes for tile/polygon based extraction.\n \"\"\"\n\n def __init__(self) -> None:\n self._epsg = None\n\n logging.basicConfig(level=logging.INFO)\n self.logger = logging.getLogger(self.__class__.__name__)\n\n @classmethod\n @functools.lru_cache(maxsize=6)\n def extract_dependencies(cls, base_url: str, dependency_name: str) -> str:\n \"\"\"Extract the dependencies from the given URL. Unpacking a zip\n file in the current working directory and return the path to the\n unpacked directory.\n\n Parameters:\n - base_url: The base public URL where the dependencies are stored.\n - dependency_name: The name of the dependency file to download. This\n parameter is added to `base_url` as a download path to the .zip\n archive\n Returns:\n - The absolute path to the extracted dependencies directory, to be added\n to the python path with the `sys.path.append` method.\n \"\"\"\n #self.logger.info(\"Unzipping dependencies\")\n # Generate absolute path for the dependencies folder\n dependencies_dir = Path.cwd() / \"dependencies\"\n\n # Create the directory if it doesn't exist\n dependencies_dir.mkdir(exist_ok=True, parents=True)\n\n # Download and extract the model file\n modelfile_url = f\"{base_url}/{dependency_name}\"\n modelfile, _ = urllib.request.urlretrieve(\n modelfile_url, filename=dependencies_dir / Path(modelfile_url).name\n )\n shutil.unpack_archive(modelfile, extract_dir=dependencies_dir)\n\n # Add the model directory to system path if it's not already there\n abs_path = str(\n dependencies_dir / Path(modelfile_url).name.split(\".zip\")[0]\n ) # NOQA\n\n return abs_path\n\n def _common_preparations(\n self, inarr: xr.DataArray, parameters: dict\n ) -> xr.DataArray:\n \"\"\"Common preparations to be executed before the feature extractor is\n executed. This method should be called by the `_execute` method of the\n feature extractor.\n \"\"\"\n self._epsg = parameters.pop(EPSG_HARMONIZED_NAME)\n self._parameters = parameters\n return inarr\n\n @property\n def epsg(self) -> int:\n \"\"\"Returns the EPSG code of the datacube.\"\"\"\n return self._epsg\n\n @epsg.setter\n def epsg(self, value: int):\n self._epsg = value\n\n def dependencies(self) -> list:\n \"\"\"Returns the additional dependencies such as wheels or zip files.\n Dependencies should be returned as a list of string, which will set-up at the top of the\n generated UDF. More information can be found at:\n https://open-eo.github.io/openeo-python-client/udf.html#standard-for-declaring-python-udf-dependencies\n \"\"\"\n if(self.logger is not None):\n\n self.logger.warning(\n \"No additional dependencies are defined. If you wish to add \"\n \"dependencies to your feature extractor, override the \"\n \"`dependencies` method in your class.\"\n )\n return []\n\n @abstractmethod\n def output_labels(self) -> list:\n \"\"\"Returns a list of output labels to be assigned on the output bands,\n needs to be overriden by the user.\"\"\"\n raise NotImplementedError(\n \"FeatureExtractor is a base abstract class, please implement the \"\n \"output_labels property.\"\n )\n\n @abstractmethod\n def _execute(self, cube: XarrayDataCube, parameters: dict) -> XarrayDataCube:\n raise NotImplementedError(\n \"FeatureExtractor is a base abstract class, please implement the \"\n \"_execute method.\"\n )\n\n\nclass PatchFeatureExtractor(FeatureExtractor):\n \"\"\"Base class for all the tile/polygon based feature extractors. An user\n implementing a feature extractor should take care of\n \"\"\"\n\n def get_latlons(self, inarr: xr.DataArray) -> xr.DataArray:\n \"\"\"Returns the latitude and longitude coordinates of the given array in\n a dataarray. Returns a dataarray with the same width/height of the input\n array, but with two bands, one for latitude and one for longitude. The\n metadata coordinates of the output array are the same as the input\n array, as the array wasn't reprojected but instead new features were\n computed.\n\n The latitude and longitude band names are standardized to the names\n `LAT_HARMONIZED_NAME` and `LON_HARMONIZED_NAME` respectively.\n \"\"\"\n\n lon = inarr.coords[\"x\"]\n lat = inarr.coords[\"y\"]\n lon, lat = np.meshgrid(lon, lat)\n\n if self.epsg is None:\n raise Exception(\n \"EPSG code was not defined, cannot extract lat/lon array \"\n \"as the CRS is unknown.\"\n )\n\n # If the coordiantes are not in EPSG:4326, we need to reproject them\n if self.epsg != 4326:\n # Initializes a pyproj reprojection object\n transformer = Transformer.from_crs(\n crs_from=CRS.from_epsg(self.epsg),\n crs_to=CRS.from_epsg(4326),\n always_xy=True,\n )\n lon, lat = transformer.transform(xx=lon, yy=lat)\n\n # Create a two channel numpy array of the lat and lons together by stacking\n latlon = np.stack([lat, lon])\n\n # Repack in a dataarray\n return xr.DataArray(\n latlon,\n dims=[\"bands\", \"y\", \"x\"],\n coords={\n \"bands\": [LAT_HARMONIZED_NAME, LON_HARMONIZED_NAME],\n \"y\": inarr.coords[\"y\"],\n \"x\": inarr.coords[\"x\"],\n },\n )\n\n def _rescale_s1_backscatter(self, arr: xr.DataArray) -> xr.DataArray:\n \"\"\"Rescales the input array from uint16 to float32 decibel values.\n The input array should be in uint16 format, as this optimizes memory usage in Open-EO\n processes. This function is called automatically on the bands of the input array, except\n if the parameter `rescale_s1` is set to False.\n \"\"\"\n s1_bands = [\"S1-SIGMA0-VV\", \"S1-SIGMA0-VH\", \"S1-SIGMA0-HV\", \"S1-SIGMA0-HH\"]\n s1_bands_to_select = list(set(arr.bands.values) & set(s1_bands))\n\n if len(s1_bands_to_select) == 0:\n return arr\n\n data_to_rescale = arr.sel(bands=s1_bands_to_select).astype(np.float32).data\n\n # Assert that the values are set between 1 and 65535\n if data_to_rescale.min().item() < 1 or data_to_rescale.max().item() > 65535:\n raise ValueError(\n \"The input array should be in uint16 format, with values between 1 and 65535. \"\n \"This restriction assures that the data was processed according to the S1 fetcher \"\n \"preprocessor. The user can disable this scaling manually by setting the \"\n \"`rescale_s1` parameter to False in the feature extractor.\"\n )\n\n # Converting back to power values\n data_to_rescale = 20.0 * np.log10(data_to_rescale) - 83.0\n data_to_rescale = np.power(10, data_to_rescale / 10.0)\n data_to_rescale[~np.isfinite(data_to_rescale)] = np.nan\n\n # Converting power values to decibels\n data_to_rescale = 10.0 * np.log10(data_to_rescale)\n\n # Change the bands within the array\n arr.loc[dict(bands=s1_bands_to_select)] = data_to_rescale\n return arr\n\n # TODO to remove the fixed transpose as it contributes to unclear code.\n def _execute(self, cube: XarrayDataCube, parameters: dict) -> XarrayDataCube:\n arr = cube.get_array().transpose(\"bands\", \"t\", \"y\", \"x\")\n arr = self._common_preparations(arr, parameters)\n if self._parameters.get(\"rescale_s1\", True):\n arr = self._rescale_s1_backscatter(arr)\n\n arr = self.execute(arr).transpose(\"bands\", \"y\", \"x\")\n return XarrayDataCube(arr)\n\n @abstractmethod\n def execute(self, inarr: xr.DataArray) -> xr.DataArray:\n pass\n\n\nclass PointFeatureExtractor(FeatureExtractor):\n def __init__(self):\n raise NotImplementedError(\n \"Point based feature extraction on Vector Cubes is not supported yet.\"\n )\n\n def _execute(self, cube: XarrayDataCube, parameters: dict) -> XarrayDataCube:\n arr = cube.get_array().transpose(\"bands\", \"t\")\n\n arr = self._common_preparations(arr, parameters)\n\n outarr = self.execute(cube.to_array()).transpose(\"bands\", \"t\")\n return XarrayDataCube(outarr)\n\n @abstractmethod\n def execute(self, inarr: xr.DataArray) -> xr.DataArray:\n pass\n\n\nclass PrestoFeatureExtractor(PatchFeatureExtractor):\n \"\"\"Feature extractor to use Presto model to compute per-pixel embeddings.\n This will generate a datacube with 128 bands, each band representing a\n feature from the Presto model.\n\n Interesting UDF parameters:\n - presto_url: A public URL to the Presto model file. A default Presto\n version is provided if the parameter is left undefined.\n - rescale_s1: Is specifically disabled by default, as the presto\n dependencies already take care of the backscatter decompression. If\n specified, should be set as `False`.\n \"\"\"\n\n import functools\n\n PRESTO_MODEL_URL = \"https://artifactory.vgt.vito.be/artifactory/auxdata-public/worldcereal-minimal-inference/presto.pt\" # NOQA\n PRESTO_WHL_URL = \"https://artifactory.vgt.vito.be/artifactory/auxdata-public/worldcereal/dependencies/presto_worldcereal-0.1.2-py3-none-any.whl\"\n BASE_URL = \"https://s3.waw3-1.cloudferro.com/swift/v1/project_dependencies\" # NOQA\n DEPENDENCY_NAME = \"worldcereal_deps.zip\"\n\n GFMAP_BAND_MAPPING = {\n \"S2-L2A-B02\": \"B02\",\n \"S2-L2A-B03\": \"B03\",\n \"S2-L2A-B04\": \"B04\",\n \"S2-L2A-B05\": \"B05\",\n \"S2-L2A-B06\": \"B06\",\n \"S2-L2A-B07\": \"B07\",\n \"S2-L2A-B08\": \"B08\",\n \"S2-L2A-B8A\": \"B8A\",\n \"S2-L2A-B11\": \"B11\",\n \"S2-L2A-B12\": \"B12\",\n \"S1-SIGMA0-VH\": \"VH\",\n \"S1-SIGMA0-VV\": \"VV\",\n \"COP-DEM\": \"DEM\",\n \"AGERA5-TMEAN\": \"temperature-mean\",\n \"AGERA5-PRECIP\": \"precipitation-flux\",\n }\n\n @functools.lru_cache(maxsize=6)\n def unpack_presto_wheel(self, wheel_url: str, destination_dir: str) -> list:\n import urllib.request\n import zipfile\n from pathlib import Path\n self.logger.info(\"Unpacking presto wheel\")\n\n # Downloads the wheel file\n modelfile, _ = urllib.request.urlretrieve(\n wheel_url, filename=Path.cwd() / Path(wheel_url).name\n )\n with zipfile.ZipFile(modelfile, \"r\") as zip_ref:\n zip_ref.extractall(destination_dir)\n return destination_dir\n\n def output_labels(self) -> list:\n \"\"\"Returns the output labels from this UDF, which is the output labels\n of the presto embeddings\"\"\"\n return [f\"presto_ft_{i}\" for i in range(128)]\n\n def execute(self, inarr: xr.DataArray) -> xr.DataArray:\n import sys\n\n if self.epsg is None:\n raise ValueError(\n \"EPSG code is required for Presto feature extraction, but was \"\n \"not correctly initialized.\"\n )\n presto_model_url = self._parameters.get(\n \"presto_model_url\", self.PRESTO_MODEL_URL\n )\n presto_wheel_url = self._parameters.get(\"presto_wheel_url\", self.PRESTO_WHL_URL)\n\n ignore_dependencies = self._parameters.get(\"ignore_dependencies\", False)\n if ignore_dependencies:\n self.logger.info(\n \"`ignore_dependencies` flag is set to True. Make sure that \"\n \"Presto and its dependencies are available on the runtime \"\n \"environment\"\n )\n\n # The below is required to avoid flipping of the result\n # when running on OpenEO backend!\n inarr = inarr.transpose(\"bands\", \"t\", \"x\", \"y\")\n\n # Change the band names\n new_band_names = [\n self.GFMAP_BAND_MAPPING.get(b.item(), b.item()) for b in inarr.bands\n ]\n inarr = inarr.assign_coords(bands=new_band_names)\n\n # Handle NaN values in Presto compatible way\n inarr = inarr.fillna(65535)\n\n # Unzip de dependencies on the backend\n if not ignore_dependencies:\n self.logger.info(\"Unzipping dependencies\")\n deps_dir = self.extract_dependencies(self.BASE_URL, self.DEPENDENCY_NAME)\n self.logger.info(\"Unpacking presto wheel\")\n deps_dir = self.unpack_presto_wheel(presto_wheel_url, deps_dir)\n\n self.logger.info(\"Appending dependencies\")\n sys.path.append(str(deps_dir))\n\n from presto.inference import ( # pylint: disable=import-outside-toplevel\n get_presto_features,\n )\n\n batch_size = self._parameters.get(\"batch_size\", 256)\n\n self.logger.info(\"Extracting presto features\")\n features = get_presto_features(\n inarr, presto_model_url, self.epsg, batch_size=batch_size\n )\n return features\n\n def _execute(self, cube: XarrayDataCube, parameters: dict) -> XarrayDataCube:\n # Disable S1 rescaling (decompression) by default\n if parameters.get(\"rescale_s1\", None) is None:\n parameters.update({\"rescale_s1\": False})\n return super()._execute(cube, parameters)\n\n\ndef apply_udf_data(udf_data: UdfData) -> XarrayDataCube:\n feature_extractor_class = PrestoFeatureExtractor\n\n # User-defined, feature extractor class initialized here\n feature_extractor = feature_extractor_class()\n\n is_pixel_based = issubclass(feature_extractor_class, PointFeatureExtractor)\n\n if not is_pixel_based:\n assert (\n len(udf_data.datacube_list) == 1\n ), \"OpenEO GFMAP Feature extractor pipeline only supports single input cubes for the tile.\"\n\n cube = udf_data.datacube_list[0]\n parameters = udf_data.user_context\n\n proj = udf_data.proj\n if proj is not None:\n proj = proj[\"EPSG\"]\n\n parameters[EPSG_HARMONIZED_NAME] = proj\n\n cube = feature_extractor._execute(cube, parameters=parameters)\n\n udf_data.datacube_list = [cube]\n\n return udf_data\n"
},
"process_id": "run_udf",
"result": true
}
}
},
"size": [
{
"dimension": "x",
"unit": "px",
"value": 100
},
{
"dimension": "y",
"unit": "px",
"value": 100
}
]
},
"process_id": "apply_neighborhood"
},
"applyneighborhood4": {
"arguments": {
"data": {
"from_node": "renamelabels8"
},
"overlap": [
{
"dimension": "x",
"unit": "px",
"value": 0
},
{
"dimension": "y",
"unit": "px",
"value": 0
}
],
"process": {
"process_graph": {
"runudf4": {
"arguments": {
"context": {
"classifier_url": "https://artifactory.vgt.vito.be/artifactory/auxdata-public/worldcereal-minimal-inference/wc_catboost.onnx"
},
"data": {
"from_parameter": "data"
},
"runtime": "Python",
"udf": "# /// script\n# dependencies = []\n# ///\n\nimport functools\nimport inspect\nimport logging\nimport re\nimport shutil\nimport sys\nimport urllib.request\nfrom abc import ABC, abstractmethod\nfrom pathlib import Path\nimport numpy as np\nimport openeo\nimport requests\nimport xarray as xr\nfrom openeo.udf import XarrayDataCube\nfrom openeo.udf import inspect as udf_inspect\nfrom openeo.udf.udf_data import UdfData\nsys.path.insert(0, \"onnx_deps\")\nimport onnxruntime as ort # noqa: E402\n\nEPSG_HARMONIZED_NAME = \"GEO-EPSG\"\n\nclass ModelInference(ABC):\n \"\"\"Base class for all model inference UDFs. It provides some common\n methods and attributes to be used by other model inference classes.\n \"\"\"\n\n def __init__(self) -> None:\n \"\"\"\n Initializes the PrestoFeatureExtractor object, starting a logger.\n \"\"\"\n logging.basicConfig(level=logging.INFO)\n self.logger = logging.getLogger(self.__class__.__name__)\n\n @classmethod\n @functools.lru_cache(maxsize=6)\n def extract_dependencies(cls, base_url: str, dependency_name: str) -> str:\n \"\"\"Extract the dependencies from the given URL. Unpacking a zip\n file in the current working directory and return the path to the\n unpacked directory.\n\n Parameters:\n - base_url: The base public URL where the dependencies are stored.\n - dependency_name: The name of the dependency file to download. This\n parameter is added to `base_url` as a download path to the .zip\n archive\n Returns:\n - The absolute path to the extracted dependencies directory, to be added\n to the python path with the `sys.path.append` method.\n \"\"\"\n\n # Generate absolute path for the dependencies folder\n dependencies_dir = Path.cwd() / \"dependencies\"\n\n # Create the directory if it doesn't exist\n dependencies_dir.mkdir(exist_ok=True, parents=True)\n\n # Download and extract the model file\n modelfile_url = f\"{base_url}/{dependency_name}\"\n modelfile, _ = urllib.request.urlretrieve(\n modelfile_url, filename=dependencies_dir / Path(modelfile_url).name\n )\n shutil.unpack_archive(modelfile, extract_dir=dependencies_dir)\n\n # Add the model directory to system path if it's not already there\n abs_path = str(\n dependencies_dir / Path(modelfile_url).name.split(\".zip\")[0]\n ) # NOQA\n\n return abs_path\n\n @classmethod\n @functools.lru_cache(maxsize=6)\n def load_ort_session(cls, model_url: str):\n \"\"\"Loads an onnx session from a publicly available URL. The URL must be a direct\n download link to the ONNX session file.\n The `lru_cache` decorator avoids loading multiple time the model within the same worker.\n \"\"\"\n # Two minutes timeout to download the model\n response = requests.get(model_url, timeout=120)\n model = response.content\n\n return ort.InferenceSession(model)\n\n def apply_ml(\n self, tensor: np.ndarray, session: ort.InferenceSession, input_name: str\n ) -> np.ndarray:\n \"\"\"Applies the machine learning model to the input data as a tensor.\n\n Parameters\n ----------\n tensor: np.ndarray\n The input data with shape (bands, instance). If the input data is a tile (bands, y, x),\n then the y, x dimension must be flattened before being applied in this function.\n session: ort.InferenceSession\n The ONNX Session object, loaded from the `load_ort_session` class method.\n input_name: str\n The name of the input tensor in the ONNX session. Depends on how is the ONNX serialized\n model generated. For example, CatBoost models have their input tensor named as\n features: https://catboost.ai/en/docs/concepts/apply-onnx-ml\n \"\"\"\n return session.run(None, {input_name: tensor})[0]\n\n def _common_preparations(\n self, inarr: xr.DataArray, parameters: dict\n ) -> xr.DataArray:\n \"\"\"Common preparations for all inference models. This method will be\n executed at the very beginning of the process.\n \"\"\"\n self._epsg = parameters.pop(EPSG_HARMONIZED_NAME)\n self._parameters = parameters\n return inarr\n\n def _execute(self, cube: XarrayDataCube, parameters: dict) -> XarrayDataCube:\n arr = cube.get_array().transpose(\"bands\", \"y\", \"x\")\n arr = self._common_preparations(arr, parameters)\n arr = self.execute(arr).transpose(\"bands\", \"y\", \"x\")\n return XarrayDataCube(arr)\n\n @property\n def epsg(self) -> int:\n \"\"\"EPSG code of the input data.\"\"\"\n return self._epsg\n\n def dependencies(self) -> list:\n \"\"\"Returns the additional dependencies such as wheels or zip files.\n Dependencies should be returned as a list of string, which will set-up at the top of the\n generated UDF. More information can be found at:\n https://open-eo.github.io/openeo-python-client/udf.html#standard-for-declaring-python-udf-dependencies\n \"\"\"\n self.logger.warning(\n \"Only onnx is defined as dependency. If you wish to add \"\n \"dependencies to your model inference, override the \"\n \"`dependencies` method in your class.\"\n )\n return [\"onnxruntime\"]\n\n @abstractmethod\n def output_labels(self) -> list:\n \"\"\"Returns the labels of the output data.\"\"\"\n raise NotImplementedError(\n \"ModelInference is a base abstract class, please implement the \"\n \"output_labels property.\"\n )\n\n @abstractmethod\n def execute(self, inarr: xr.DataArray) -> xr.DataArray:\n \"\"\"Executes the model inference.\"\"\"\n raise NotImplementedError(\n \"ModelInference is a base abstract class, please implement the \"\n \"execute method.\"\n )\n\n\nclass CroplandClassifier(ModelInference):\n \"\"\"Binary crop-land classifier using ONNX to load a catboost model.\n\n The classifier use the embeddings computed from the Presto Feature\n Extractor.\n\n Interesting UDF parameters:\n - classifier_url: A public URL to the ONNX classification model. Default is\n the public Presto model.\n \"\"\"\n\n import numpy as np\n\n CATBOOST_PATH = \"https://artifactory.vgt.vito.be/artifactory/auxdata-public/worldcereal-minimal-inference/wc_catboost.onnx\" # NOQA\n\n def __init__(self):\n super().__init__()\n\n self.onnx_session = None\n\n def dependencies(self) -> list:\n return [] # Disable the dependencies from PIP install\n\n def output_labels(self) -> list:\n return [\"classification\", \"probability\"]\n\n def predict(self, features: np.ndarray) -> np.ndarray:\n \"\"\"\n Predicts labels using the provided features array.\n \"\"\"\n import numpy as np\n\n if self.onnx_session is None:\n raise ValueError(\"Model has not been loaded. Please load a model first.\")\n\n # Prepare input data for ONNX model\n outputs = self.onnx_session.run(None, {\"features\": features})\n\n # Threshold for binary conversion\n threshold = 0.5\n\n # Extract all prediction values and convert them to binary labels\n prediction_values = [sublist[\"True\"] for sublist in outputs[1]]\n binary_labels = np.array(prediction_values) >= threshold\n binary_labels = binary_labels.astype(\"uint8\")\n\n prediction_values = np.array(prediction_values) * 100.0\n prediction_values = np.round(prediction_values).astype(\"uint8\")\n\n return np.stack([binary_labels, prediction_values], axis=0)\n\n def execute(self, inarr: xr.DataArray) -> xr.DataArray:\n classifier_url = self._parameters.get(\"classifier_url\", self.CATBOOST_PATH)\n\n # shape and indices for output (\"xy\", \"bands\")\n x_coords, y_coords = inarr.x.values, inarr.y.values\n inarr = inarr.transpose(\"bands\", \"x\", \"y\").stack(xy=[\"x\", \"y\"]).transpose()\n\n self.onnx_session = self.load_ort_session(classifier_url)\n\n # Run catboost classification\n self.logger.info(\"Catboost classification with input shape: %s\", inarr.shape)\n classification = self.predict(inarr.values)\n self.logger.info(\"Classification done with shape: %s\", inarr.shape)\n\n classification = xr.DataArray(\n classification.reshape((2, len(x_coords), len(y_coords))),\n dims=[\"bands\", \"x\", \"y\"],\n coords={\n \"bands\": [\"classification\", \"probability\"],\n \"x\": x_coords,\n \"y\": y_coords,\n },\n )\n\n return classification\n\n\ndef apply_udf_data(udf_data: UdfData) -> XarrayDataCube:\n model_inference_class = CroplandClassifier\n\n model_inference = model_inference_class()\n\n # User-defined, model inference class initialized here\n cube = udf_data.datacube_list[0]\n parameters = udf_data.user_context\n\n proj = udf_data.proj\n if proj is not None:\n proj = proj.get(\"EPSG\")\n\n parameters[EPSG_HARMONIZED_NAME] = proj\n\n cube = model_inference._execute(cube, parameters=parameters)\n\n udf_data.datacube_list = [cube]\n\n return udf_data\n"
},
"process_id": "run_udf",
"result": true
}
}
},
"size": [
{
"dimension": "x",
"unit": "px",
"value": 100
},
{
"dimension": "y",
"unit": "px",
"value": 100
},
{
"dimension": "t",
"value": "P1D"
}
]
},
"process_id": "apply_neighborhood"
},
"filterbands1": {
"arguments": {
"bands": [
"classification"
],
"data": {
"from_node": "apply5"
}
},
"process_id": "filter_bands"
},
"filterbbox1": {
"arguments": {
"data": {
"from_node": "mergecubes3"
},
"extent": {
"from_parameter": "spatial_extent"
}
},
"process_id": "filter_bbox"
},
"loadcollection1": {
"arguments": {
"bands": [
"B02",
"B03",
"B04",
"B05",
"B06",
"B07",
"B08",
"B11",
"B12"
],
"featureflags": {
"tilesize": 128
},
"id": "SENTINEL2_L2A",
"properties": {
"eo:cloud_cover": {
"process_graph": {
"lte1": {
"arguments": {
"x": {
"from_parameter": "value"
},
"y": 95
},
"process_id": "lte",
"result": true
}
}
}
},
"spatial_extent": {
"from_parameter": "spatial_extent"
},
"temporal_extent": {
"from_parameter": "temporal_extent"
}
},
"process_id": "load_collection"
},
"loadcollection2": {
"arguments": {
"bands": [
"SCL"
],
"id": "SENTINEL2_L2A",
"properties": {
"eo:cloud_cover": {
"process_graph": {
"lte2": {
"arguments": {
"x": {
"from_parameter": "value"
},
"y": 95
},
"process_id": "lte",
"result": true
}
}
}
},
"spatial_extent": {
"from_parameter": "spatial_extent"
},
"temporal_extent": {
"from_parameter": "temporal_extent"
}
},
"process_id": "load_collection"
},
"loadcollection3": {
"arguments": {
"bands": [
"VH",
"VV"
],
"id": "SENTINEL1_GRD",
"spatial_extent": {
"from_parameter": "spatial_extent"
},
"temporal_extent": {
"from_parameter": "temporal_extent"
}
},
"process_id": "load_collection"
},
"loadcollection4": {
"arguments": {
"bands": [
"DEM"
],
"id": "COPERNICUS_30",
"spatial_extent": {
"from_parameter": "spatial_extent"
},
"temporal_extent": null
},
"process_id": "load_collection"
},
"loadstac1": {
"arguments": {
"bands": [
"precipitation-flux",
"temperature-mean"
],
"featureflags": {
"tilesize": 1
},
"spatial_extent": {
"from_parameter": "spatial_extent"
},
"temporal_extent": {
"from_parameter": "temporal_extent"
},
"url": "https://s3.waw3-1.cloudferro.com/swift/v1/agera/stac/collection.json"
},
"process_id": "load_stac"
},
"mask1": {
"arguments": {
"data": {
"from_node": "loadcollection1"
},
"mask": {
"from_node": "renamelabels1"
}
},
"process_id": "mask"
},
"mask2": {
"arguments": {
"data": {
"from_node": "renamelabels7"
},
"mask": {
"from_node": "apply6"
},
"replacement": 254
},
"process_id": "mask"
},
"mergecubes1": {
"arguments": {
"cube1": {
"from_node": "apply2"
},
"cube2": {
"from_node": "apply3"
}
},
"process_id": "merge_cubes"
},
"mergecubes2": {
"arguments": {
"cube1": {
"from_node": "mergecubes1"
},
"cube2": {
"from_node": "apply4"
}
},
"process_id": "merge_cubes"
},
"mergecubes3": {
"arguments": {
"cube1": {
"from_node": "mergecubes2"
},
"cube2": {
"from_node": "renamelabels5"
}
},
"process_id": "merge_cubes"
},
"reducedimension1": {
"arguments": {
"data": {
"from_node": "loadcollection4"
},
"dimension": "t",
"reducer": {
"process_graph": {
"min1": {
"arguments": {
"data": {
"from_parameter": "data"
}
},
"process_id": "min",
"result": true
}
}
}
},
"process_id": "reduce_dimension"
},
"reducedimension2": {
"arguments": {
"data": {
"from_node": "filterbands1"
},
"dimension": "t",
"reducer": {
"process_graph": {
"mean2": {
"arguments": {
"data": {
"from_parameter": "data"
}
},
"process_id": "mean",
"result": true
}
}
}
},
"process_id": "reduce_dimension"
},
"renamelabels1": {
"arguments": {
"data": {
"from_node": "toscldilationmask1"
},
"dimension": "bands",
"target": [
"S2-L2A-SCL_DILATED_MASK"
]
},
"process_id": "rename_labels"
},
"renamelabels2": {
"arguments": {
"data": {
"from_node": "mask1"
},
"dimension": "bands",
"source": [
"B02",
"B03",
"B04",
"B05",
"B06",
"B07",
"B08",
"B11",
"B12"
],
"target": [
"S2-L2A-B02",
"S2-L2A-B03",
"S2-L2A-B04",
"S2-L2A-B05",
"S2-L2A-B06",
"S2-L2A-B07",
"S2-L2A-B08",
"S2-L2A-B11",
"S2-L2A-B12"
]
},
"process_id": "rename_labels"
},
"renamelabels3": {
"arguments": {