-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathufser.h
7161 lines (6726 loc) · 461 KB
/
ufser.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/** This file is part of the uFser project which is released under the MIT license.
* See file COPYING for full license details.
* Copyright 2024 Ericsson AB
*/
#pragma once
/// @file ufser.h Serialization library.
#include <memory>
#include <chrono>
#include <array>
#include <tuple>
#include <functional>
#include <unordered_map>
#include <optional>
#include <variant>
#include <map>
#include <cstring>
#include <cassert>
#include <string>
#include <vector>
#include <variant>
#include <numeric>
#include <sstream>
#ifdef HAVE_BOOST_PFR
#include <boost/pfr.hpp>
#endif
#ifdef __GNUC__
#define ATTR_PURE__ __attribute__((pure))
#define ATTR_NOINLINE__ __attribute__((noinline))
#else
#define ATTR_PURE__
#define ATTR_NOINLINE__
#include <winsock2.h>
#pragma comment(lib, "Ws2_32.lib")
#define htobe32 htonl
#define htobe64 htonll
#define be32toh ntohl
#define be64toh ntohll
#undef min
#undef max
#endif
/** @defgroup serialization Serialization library
* @brief Serialization without a schema language for C++ and Python
* Its primary goal is to ease the programmer's work and allow compatibility between
* these languages.
* It is not as complete as boost serialization, for example, only
* a subset of types can be serialized. All types required to be simple value types.
* Types that you want to put in containers must be default constructible (preferably cheaply).
*
* Basic Types
* ===========
*
* Each type can be represented with a "typestring", where each character represents
* a primitive or compound type. These also show what types can and cannot be serialized.
* Typestrings are prefix codes, that is, you can always tell where they end.
* We have these primitive types:
* - i: integer and unsigned (transmitted as 32-bit),
* - I: uint64 and int64 are transmitted as a separate type (8 bytes)
* - d: floating-point number (transmitted as double)
* - s: string (not null terminated, a byte-array really, convention is UTF-8, if you actually pass a string.)
* - c: bytes and chars as a byte
* - b: `bool` in 1 byte.
* - a: `uf::any` - a special type that can hold any serializable type. Internally its typestring and its
* serialized value is stored.
* - e: `uf::error_value` - the error type used in expected values, see below.
*
* Any 16-bit integer is converted to and from a 32-bit one. For any integer
* signed and unsigned are silently converted to each other. In case of conversion
* 32-bit to 64 (and back), we assume signed.
* All lengths and enums are serialized as 32 bit integers.
* Note that "lc" is serialized byte-wise exactly as an "s". Nevertheless, we keep
* string a separate type, since python for example has a very different notion
* of a list of chars than a string.
* Also, "b" is serialized the same as "c", but we keep the distinction.
* Finally, mXY (map) serializes to the same bytes as lt2XY (a list of pairs), but we
* keep the distinction, so that in Python (and javascript) we can deserialize into a
* dict (object) and not to a list (array).
*
* Compound types
* ==============
*
* - xT: `uf::expected<T>`. This either holds a `T` or a `uf::error_value` in the spirit of the C++23(?)
* `std::expected<T,E>`. `T` must be default constructible.
* - X: `uf::expected<void>`
* - oT: `std::optional<T>`: Either a T or nothing. Smart and raw pointers and `std::optional` serializes to this in C++.
* On deserialization such values can either deserialize back to a smart pointer
* or convert to a variable of T in C++. If a missing value is attempted to deserialize
* to a variable of type T, we get a value_error. In Python empty optionals deserialize
* to None, while in Go we simply panic. T must not be void and must be default constructible.
* - tNT1T2...TN: tuples (fixed number of heterogeneous values, where N is a decimal element count,
* followed by the typestrings of the elements. E.g., t2ii is a pair of a two ints.
* t0 is illegal. t1T is always encoded as simply T.
* std::array is serialized as a tuple of same elements (length known at compile time)
* C arrays also (except for `char[]` and `const char[]`, but true for wide chars)
* - lT: lists(variable number of homogenous values). `std::vector`, `std::list`, `std::set` all can serialize to and from
* a list. Note also that in order to deserialize into `std::vector`/`list`/etc`<T>` `T` shall be a
* default constructible type.
* - mT1T2: maps. For saving more of the type info for dynamic languages, we also
* have a _map_ type, which is encoded the same as list of pairs.
* Note that the key type (`T1`) cannot be a void-like type nor a type that only
* has `uf::expected<void>` in it (`like uf::expected<void>` itself or, for example,
* `std::list<uf::expected<void>>` and similar compound types having no other primitive type).
* Note also that in order to deserialize into `std::map<T1, T2>` both `T1` and `T2` shall be a
* default constructible type.
*
* Empty Python (or JS) lists/arrays or dicts/objects are serialized to type <la> and <maa>,
* since we cannot determine the element type. These empty lists/dicts will deserialize
* into any list/map type in C++ (if appropriate conversion flags enable it).
* However, if you serialize an empty list<int> in C++ you will get a <li>, not <la>.
* On serialization we maintain the order of the
* container (sorted for set and map), as is for vector and list and random
* for unsorted maps and sets. In case of multimaps and sets, values with the
* same keys are serialized.
* On deserialization values are inserted to the container. For lists and
* vectors in the order received, for other containers via `insert()`. If the
* key already exists, insert may not do anything.
*
* Please, please do not override the unary `operator &` for types that you use with the serialization lib.
*
* Void-like types
* ===============
*
* For the type of `void` (and the Python `None` value or JSON's 'null') the type string and serialized
* value bytes are both of zero len. A few other C++ types behave like this, we call them void-like:
* - Empty tuples. Or tuples containing only void-like types.
* - Zero-length `std::array`s (or C arrays) or `std::array`s (or C arrays) of void-like types.
* - Lists of void-like types.
* - Maps with both a void-like key type and mapped type.
*
* If these appear as members of a tuple, they are simply omitted both from the type and the
* serialized value (which is natural as they are of zero length in both). If this makes the tuple
* have just one non-void member, it is encoded as simply that member. If a tuple has only void
* members, it becomes void itself: zero length type and serialized value.
* Deserializing a `void` value is valid into any void type.
* Note that `uf::expected<void>` is not a void type as it may carry some info (the error).
*
* Deserializing views
* ===================
*
* You can deserialize a "view". This has impact only for strings or any. If the type
* you deserialize into contains `std::string_view`/`uf::any_view` objects, those will take values
* pointing to the raw memory you deserialize from. Thus if the raw memory is
* freed your "view" (specifically the string_views in it) will become invalid.
* But OTOH you save a lot of string copies, potentially. All other types
* are copied. `uf::is_deserializable_view_v<T>` is true for types that have
* `std::string_view`s or `uf::any_view`s in it.
* Regular deserialization does not match to types that contain string_views,
* you need to have actual strings that can hold a value. To test if a type
* can be deserialized into you can use `uf::is_deserializable_v<T>` to test.
*
* Pointers
* ========
*
* As a convenience you can serialize from/to smart pointers: `std::unique_ptr`
* and `std::shared_ptr`. These serialize into an optional of the pointed to object.
* Values of `T` also deserialize into smart pointers of the same or compatible type.
* You can also serialize from a raw pointer (as optional), but cannot deserialize into it.
* Two exceptions:
* - a `void*` is not possible to serialize from.
* - `const char *` (and `char*`) will be serialized as a null-terminated string and not
* as a single `optional<char>`.
*
* Serialization of containers
* ===========================
*
* Any type `T` that is not a basic type, not an expected, optional, pointer or tuple and has
* no 'tuple_for_serialization()' free or member function (see structs below) will be
* attempted to be treated as a container.
* The type is considered a map (and get an "m" typestring), if it has `begin()`, `end()`, `size()`
* member functions and `value_type`, `key_type`, `mapped_type` member typedefs.
*
* For serialization of non-map containers (which get the "l" typestring), we seek `begin(const T&)`
* and `end(const T&)` free functions (think ADL), including potentially matching `std::begin()` and
* `std::end()`, which will call the corresponding member function, if it exists.)
* (Of course, the type of the element of the container must be serializable itself. We deduce the
* value type from `*begin()`.)
* If we find these, we will iterate over the container's elements and serialize them one-by-one.
* The size of the container is queried before the serialization using the `T::size()` const if
* exists, else we use `std::distance(end()-begin())`. So you can pass in a range of forward iterators,
* if you will (but not input iterators, since we will need to compute the size).
*
* On deserialization, we seek `begin(T&)` and `end(T&)` free functions, plus `T::clear()` and
* `T::push_back(value_type&&)` or `T::insert(value_type&&)` or `T::insert(iterator, value_type&&)`
* member functions (probed in this order). We deduce the value type from `*begin()`.
* Naturally the value_type must be deserializable itself - and it also has to be default
* constructible, because there is no mechanism as of yet to construct an object from serialized data.
* If these are found, we call `clear()` at the beginning of deserialization; issue a reserve() if exists
* and then for each element we default construct a `T::value_type`, deserialize it and then move
* `insert()` or move `push_back()` it to the container.
*
* Serialization of structs
* ========================
*
* There is limited support to serialize structs as tuples.
*
* Auto serialization
* ------------------
*
* Simple Aggregate structures are automatically serializable to/from without any user code.
* Specifically, the following conditions must be met for this to work:
* - The struct must be an aggregate, that is
* - No constructors/destructors
* - No virtual functions
* - Only public non-static members
* - No virtual or non-public base classes
* - Only empty base classes (or empty struct with only one non-empty base class)
* This practically means no base classes.
* - No const members.
* - No reference or C-array members
* - Each member must be copy constructible (or move constructible and move assignable)
* - Empty structs are not auto serialized, by design.
* - Structs that look like containers (see "Serialization of containers" above) are
* serialized as containers not as auto serialized structures.
* - Structs that have (any) tuple_for_serialization(void) member/free functions use
* those (see below how) and are not auto serialized. This is to turn auto serialization
* off when doing manual serialization.
*
* In short, you can have simple, public collection of values. You can also have:
* - default initializers
* - static data members
* - member functions
*
* Of course, the members of the structs must also be serializable/deserializable.
*
* If you dont want a structure to be auto serializable (e.g., because you want it to be
* serializable/deserializable only with tags), you can disable it in two ways
* - Add 'using auto_serialization = void;' to the struct; and/or
* - Specialize 'allow_auto_serialization' for your type to false:
* 'template <> constexpr bool allow_auto_serialization<MyType> = false;'
*
* With auto serialize you cannot
* - omit certain members, all will be serialized/deserialized;
* - take actions before and after serialization (like maintaining class invariants or
* locking);
* - specify tags (see below). (But member types having tags will work.)
*
* If you want these, read on to manual serialization below.
*
* Here are a few notes that are not making sense right now, so please read on to the
* next subsections for more details.
* - If you specify only one of the const or non-const tuple_for_serialization()
* function for a struct then auto serialization will be turned off both for serialization
* and deserialization.
* - If you specify tuple_for_serialization(tag) with a tag argument,
* auto serialization will still be turned on, making this struct serializable/deserializable
* without tags. Disable auto serialization if you dont want this.
* - If you specify a before/after_serialization() or after_serialization_xxx() member
* or free function for a struct that is using auto serialization, the above functions
* will NOT be used.
*
* Manual serialization
* --------------------
* To have more control over serialization or to serialize non-aggregate types, add a function
* `auto tuple_for_serialization() const noexcept {return std::tie(member1, member2,..);}`
* to the struct, then the listed members will automatically serialize,
* even if part of a tuple or list (serialize_len and serialize_type will
* also work). You dont need to list all members, just the ones to serialize.
* Note, that you can also use a free `tuple_for_serialization(const T&)`
* function. (Put that into the namespace where `T` is defined, so that ADL find it.)
* If you add a non-const `tuple_for_serialization()` function (or a free variant) then
* deserialization will also work.
* NOTE: The return type of the const and non-const `tuple_for_serialization()` functions
* must not be *exactly* the same. This normally happens automatically if you return
* references to members as one will contain const refs, another non-const refs.
* It is good practice to make these noexcept, but not mandatory.
* \code
* struct mystruct {
* mystruct(std::string text); //no-aggregate type: auto serialization doesn't work
* int a, b, c;
* std::string s;
* other_struct o;
* std::vector<double> vd;
* auto tuple_for_serialization() const noexcept {return std::tie(a,b,c,s,o,vd);}
* auto tuple_for_serialization() noexcept {return std::tie(a,b,c,s,o,vd);}
* };
* \endcode
*
* Never return void from tuple_for_serialization. If you don't want to serialize
* or deserialize anything, return 'std::monostate'. That makes your type void-like.
*
* Avoid circular return types, so do not return anything that will resolve to
* the same type. So implementing a list of ints like
* \code
* struct ilist {
* int i;
* ilist *next;
* auto tuple_for_serialization() const { return std::tie(i, next); }
* };
* \endcode
* will not work as its typestring would be infinitely long. Also avoid re-using
* the type in a member container, such as `std::vector<ilist>` in 'mystruct' above.
*
* Any free function for `tuple_for_serialization()` will be accepted if it is callable
* with a const reference of your type (or non-const lvalue reference for deserialization).
* So instead of the member functions above you can also write
* \code
* auto tuple_for_serialization(const mystruct &m) noexcept {return std::tie(m.a,m.m.b,m.c,m.s,m.o,m.vd);}
* auto tuple_for_serialization( mystruct &m) noexcept {return std::tie(m.a,m.m.b,m.c,m.s,m.o,m.vd);}
* \endcode
* This is useful if you use a 3rd party type which you cannot modify, but have access
* to its members. (If you have access to members only through getters/setters read on.)
* Note, that when we search for a `tuple_for_serialization()` we always check matching
* free functions first. So in the above situation adding a `mystruct::tuple_for_serialization()`
* member function will have no effect as the free function takes precedence. We dont warn on
* this condition, so be aware.
* (Same rules apply to before/after_serialization/deserialization below.)
*
* The free function feature may have unintended side effects.
* For example, f you inherit your type B from a type A for which there is a free
* `tuple_for_serialization(A&)`, it will be called for your type 'B'. In such a case, either
* this is what you want (because serialization for B is exactly as for A), or you should
* define a `tuple_for_serialization(B&)` specifically for your type. You can also say
* `tuple_for_serialization(B&)=delete;` to prevent serialization of your type B (or
* to promote a B::tuple_for_serialization() member function).
*
* FYI, to re-iterate some name matching rules of C++, if you have type A, which defines
* both a const and non-const `tuple_for_serialization()` member function
* and in a descendant type B you define only one of them, then the other one will not be
* found for B, unless you say 'using A::tuple_for_serialization;' in the definition of B.
*
* Note that the typestrings of the const and non-const versions may differ
* for serialization and deserialization - these two aspects of a type are
* completely separate in the serialization lib. This has few uses, IMO.
* You may also omit either the const or non-const version, so your type may be only
* serializable from, but not deserializable into (use another type to receive
* serialized versions) or vice versa. However, in most cases
* you probably want the typestring of the two tuples returned from the const
* and non-const version to be the same, so your type is serializable from
* and into the same way. Use the `uf::is_ser_deser_ok_v<T>` type trait
* to check (for any type) if it is both serializable and deserializable
* (as owning) and the two typestrings are the same.
* `static_assert(uf:is_ser_deser_ok_v<mystruct>);`
* You can also use `uf::is_ser_deser_view_ok_v<T>` to test view types.
*
* You may return anything from `tuple_for_serialization()` not just
* a tuple of references, as `std::tie()` would do. This is useful for serializing
* types with non-directly-serializable members, see below.
*
* Note again, however, that the return type of const and non-const tuple_for_serialization()
* functions must be different for deserialization to work. Otherwise it is not possible to
* selectively detect if the const or non-const version exists (and have ADL, that is).
* This normally works automatically if you return tuples to refs to members (they will have
* const or non-const refs).
* Usually a problem when returning by value from both (and using after_deserialization()).
* If you really want to return the same 'Type' from both, use std::tuple<Type> for the const
* version to make the two types technically different.
*
* Simple helpers for non-supported members
* ----------------------------------------
*
* The serialization library provides support for a few cases when a member of a tuple
* is not natively supported.
* - Since in `tuple_for_serialization()` you may return anything, not just a tuple of references
* as std::tie does, a simple helper is available to mix references to members (that
* serialize as-is) and other, non lvalue-reference types.
* Using `uf::tie()`, you can also list computed values, not just members.
* E.g., if you have an 'std::filesystem::path file_path` member you can simply write
* \code
* auto tuple_for_serialization() const {return uf::tie(...., file_path.native(), ....);
* \endcode
* which will insert an std::string
* representation of the path into the returned tuple by value. Then this value will be
* serialized as if it were a member (and use the "s" typestring). This is an easy way to
* do a quick conversion of a member that is otherwise not serializable.
* - To deserialize such string values back into a proper member of non-string type you can
* use `uf::string_deserializer<Func>(Func f)` class. `f` must be a lambda taking a string_view
* parameter and carrying out the deserialization. So to deserialize the above
* `std::filesystem::path file_path` member, just return in the non-const `tuple_for_serialization()`
* \code
* auto tuple_for_serialization() {
* return uf::tie(..., uf::string_deserializer([this](std::string_view s){file_path=s;}), ....);
* }
* \endcode
* If you want to have a view type, use `uf::string_deserializer_view<Func>` instead. Returning this
* as part of the tuple in the non-const tuple_for_serialization(), then the type will be
* deserializable only as a view. (It cannot be used in `uf::deserialize()`, `uf::any::get()`,
* `uf::any::get_as()` calls, only in
* `uf::deserialize_view()` and the `get_view()` or `get_view_as()` variants.
* - For deserialization back to a C array use
* `uf::array_inserter<T>(T*p, int max_size, int*size=nullptr, bool throv=true)`.
* Specify the location and how many elements can the space accomodate. You can also specify
* a third parameter, where the deserialization will store the number of elements in the input list.
* If the fourth parameter is true, we throw `uf::value_mismatch_error` if the input does not fit into
* max_size. If false, we drop the ones not fitting and size will contain the elements in the input
* data, not now many have been stored. These classes can also be returned from
* `tuple_for_serialization()`, making seialization for static arrays composable, see below.
* \code
* struct S {
* constexpr unsigned MAX = 200;
* int len;
* double d[MAX];
* auto tuple_for_serialization() const {return std::span(d, len);}
* auto tuple_for_serialization() {return uf::array_inserter(d, MAX, &len);}
* };
* \endcode
* More complex examples soon below.
*
* Thread-safe serialization
* -------------------------
*
* Serialization is essentially a two-step process: we query the length and reserve the space
* and then do the actual serialization. Thus, we call `tuple_for_serialization()` twice. (And
* discard its result after use in both cases, so this better be a cheap function.)
* If you need to maintain class integrity in-between these two calls with a lock, you can
* also supply a `before_serialization(void)` const member (or `before_serialization(const T&)`
* free function), which will be called before these two steps. Also `after_serialization(bool)`
* will be called after, allowing you to lock and unlock the mutex, respectively.
* It will be called with a boolean indicating success of the entire serialization operation
* (of a type this type may only be a part of). This allows to unambigously keep or pass
* ownership of some resource from the object to its serialized variant.
* (Note that if you provide any of these functions for members of a struct - ie what
* `tuple_for_serialization()` returns (or constitutent types of a member has them),
* `tuple_for_serialization()` will be called more than twice - to get to call before or
* `after_serialization()` for the members.)
*
* It is guaranteed that for any object we have called `before_serialization()` for,
* `after_serialization(bool)` will also be called (also in case of exceptions, even if
* `before_serialization()` was the function that has thrown).
* The only case when we cannot guarantee such pairing is when `tuple_for_serialization()` of a
* component type does not throw when called as part of the before_serialization pass neither
* when doing length calculation nor when doing the actual serialization, but only when doing
* the after_serialization pass. (Note that all of these are calls to the same const object,
* so different behaviour in terms of throwing has to be very weird, but may happen via
* mutable members (e.g., unlocking a mutable mutex member twice) or via globals, such as
* a logging system throwing.)
* In that case there is no way we can call after_serialization for the result of
* `tuple_for_serialization()`, so we give up calling them for this struct.
* So if you want to throw in `tuple_for_serialization()` do it consistently.
* All in all, best to make it cheap and noexcept.
*
* Thread safe deserialization and maintaining class invariants
* ------------------------------------------------------------
*
* Deserialization is a one-step process, so the non-const `tuple_for_serialization()` is
* called only once. You can also provide either a `after_deserialization_simple()` or an
* `after_deserialization(U&&) member (or corresponding free functions taking a (first) argument
* of non-const ref to your struct), which will be called after the deserialization into the
* what was returned by tuple_for_serialization(). Here are the deserialization steps taken for a struct.
* 1. Call `&&x = tuple_for_serialization();` Note that tuple_for_serialization() may return a reference.
* 2. deserialize the bytes into 'x'.
* 3. If an exception was thrown by step \#1 or \#2, call `after_deserialization_error()`
* if exists (which must be noexcept). Re-throw the exception from step #1 or #2.
* 4. Else if `after_deserialization_simple()` exists, call it.
* 5. Else if `after_deserialization(std::move(x))` is callable, call it.
* 6. If steps \#4 or \#5 throw an exception, let it percolate up.
*
* Note that it is easy to mistype some part (name, type of U, constness, etc.) of
* after_deserialization(U&&) and you get no error, a badly formatted after_deserialization() will
* simply not be called. So it is good practice to add
* `static_assert(uf::has_after_deserialization_tag_v<T>);' after you defined these functions.
* Note that you have all the following traits (all of them true if either a member or a free
* function variant exists (with the latter taking precedence over the former if both exist).
* - uf::has_tuple_for_serialization_tag_v<bool, T>
* - uf::has_before_serialization_tag_v<T>
* - uf::has_after_serialization_tag_v<T>
* - uf::has_after_deserialization_tag_v<T>
* - uf::has_after_deserialization_simple_tag_v<T>
* - uf::has_after_deserialization_error_tag_v<T>
*
* (The term 'tag' in the name of these traits is explained below.)
* Note that these return true even if your struct has
* - no tuple_for_serialization() member with this tag nor without any tags, so these
* functions can never be invoked;
* - no tuple_for_serialization() member at all and is using auto serialization,
* which, again means that these functions are not invoked at all.
*
* Below is a list of typical situations and the best practice.
* - If you only serialize a subset of your member variables and you want to maintain class
* invariants, like filling in the rest or checking, etc.: Provide a `tuple_for_serialization()`
* that `std::tie`s the members to serialize and an `after_deserialization_simple()`, in which you
* can ensure the class invariants after the members listed in `tuple_for_serialization()` were
* deserialized into.
* - If you need to lock the structure for the duration of the deserialization: Add a mutex to
* the data structure; lock it at the beginning of `tuple_for_serialization()`
* (as there is no `before_deserialization()` function looked up); unlock it in both
* `after_deserialization_error()` and `after_deserialization_simple()`/`after_deserialization(U&&)`.
* - If you have members that cannot be serialized directly, but only a transformed version
* of them: See below.
*
* Non-serializable members
* ------------------------
*
* If you have members that cannot be serialized directly, but only a transformed version
* of them, do the following.
* - Serialization: in 'tuple_for_serialization() const' return the transformed version by value.
* - Deserialization: in 'tuple_for_serialization()' return a placeholder of the transformed
* by value; in after_deserialization(U&&) take the transformed version and create the original
* in the member.
* For example, assuming we have an atomic_int (which is not natively serializable, but has a
* trivial conversion to-from int -- you can model more complex cases with this) as a member
* variable.
* \code
* struct S {
* double d;
* std::atomic_int i;
* auto tuple_for_serialization() const noexcept { return uf::tie(d, int(i)); }
* auto tuple_for_serialization() noexcept { return uf::tie(d, int(0)); } //placeholder
* void after_deserialization(std::tuple<double&, int> &&t) {i = std::get<1>(t);}
* };
* \endcode
*
* Tags: Selection of helper functions and providing context
* ---------------------------------------------------------
*
* It is sometimes desirable to have more control over serialization/deserialization
* 1) You may want a type to be serializable in multiple different ways or want to perform
* different side effects at serialization/deserialization; or
* 2) You may want to provide some context at serialization/deserialization, e.g., to have
* a code book that needs to be looked up at both serialization/deserialization.
*
* Both these cases can be solved via tags. Whenever you define any of the 7 helper function
* (tuple_for_serialization (const/non-const), before/after_serialization and
* after_deserialization(_simple/_error), you can also provide an argument called
* a tag. This is true in case of both member or free helper functions. For free functions
* declare the tag argument after the reference to the object serialized, like
* 'tuple_for_serialization(const T&, Tagtype tag). For after_deserialization(T&, U&&)
* you should specify it after U&&.
* This way you can specify several of each helper function with different tag types.
* This may even lead to different typestrings, if the return value of
* 'tuple_for_serialization()' depends on the tags. For example, the below struct
* can be both serialized/deserialized as a double or as a string. The use of a
* zero-length 'as_string'/'as_double' structs represents zero runtime overhead,
* they just select the helper function variant.
* \code
* struct S {
* struct as_string {};
* struct as_double {};
* double d;
* auto tuple_for_serialization(as_double) const noexcept { return uf::tie(d); }
* auto tuple_for_serialization(as_double) noexcept { return uf::tie(d); }
* auto tuple_for_serialization(as_string) const { return std::to_string(d); }
* auto tuple_for_serialization(as_string) noexcept { return std::string(); }
* void after_deserialization(std::string &&s, as_string) noexcept { d = std::atof(s.c_str()); }
* };
* \endcode
*
* Then at invoking any serialization/deserialziation operation, you can specify a list of tags.
* (The list of tags must be preceeded with uf::use_tags to avoid misunderstanding the function
* arguments.)
* As a result, whenever a helper function needs to be called, the one with the tag type on
* the list will be called. If more than one matches a type on the tag list, tags earlier in the
* list have precedence. If the type does not have a helper function with any of the tags, the
* version of the helper functions without a tag is invoked. If that does not exist, the type
* is not serializable/deserializable with this tag list.
* Thus, for the above type
*\code
* struct S s{42.42};
* uf::serialize_type<S>() //error: no tags and no 'tuple_for_serialization() const' (without a tag)
* uf::serialize_type<S, int>() //error: no tags and no 'tuple_for_serialization() const' or 'tuple_for_serialization(int)' const
* uf::serialize_type<S, S::as_string>() //good: yields "s"
* uf::serialize_type<S, S::as_double>() //good: yields "d"
* uf::serialize(s) //error: not serializable without tags
* uf::serialize(s, uf::use_tags, S::as_string()) //good, returns a serialized string
* uf::serialize(s, uf::use_tags, S::as_string(), int()) //good, returns a serialized string, the tag 'int' is unused
* uf::serialize(s, uf::use_tags, S::as_string(), S::as_double()) //good, returns a serialized string, the tag 'S::as_double' has lower precedence
* uf::any a1(s); //error: cannot serialize 's' without tags
* uf::any a2(s, uf::use_tags, S::as_string()); //good, 'a2' now contains a string
* a2.get_as<std::string>(); //good, we can get it out.
* uf::any a3(3.14); //a3 now contains a double
* a3.get(s); //error: cannot get any value into 's' without a tag
* a3.get(s, uf::allow_converting_all, uf::use_tags, S::as_string()); //error: 's' with a tag 'S::as_string' expects a string. This throws a type_mismatch_error 'd'->'s'
* a3.get(s, uf::allow_converting_all, uf::use_tags, S::as_double()); //good: 's' with a tag 'S::as_double' expects a double
* a3.get_as<S>(uf::allow_converting_all, uf::use_tags, S::as_double());//good, too
* uf::any a4(42); //a4 now contains a int
* a4.get(s); //error: cannot get any value into 's' without a tag
* a4.get(s, uf::allow_converting_all, uf::use_tags, S::as_string()); //error: 's' with a tag 'S::as_string' expects a string. This throws a type_mismatch_error 'd'->'s'
* a4.get(s, uf::allow_converting_double, uf::use_tags, S::as_double());//good: 's' with a tag 'S::as_double' expects a double and we can convert an int
* a4.get(s, uf::allow_converting_none, uf::use_tags, S::as_double()); //error: 's' with a tag 'S::as_double' expects a double and we can NOT convert an int
*\endcode
*
* Note that helper functions are selected individually. Assume you provide 2 versions of
* 'tuple_for_serialization()' one with tag 'Tag' and one with no tags and only one version
* of 'after_deserialization_simple()' with 'Tag' to maintain a class invariant.
* In this case the type will be deserializable with any set of tags due to the existence of
* the tag-less 'tuple_for_serialization()'. However, after_deserialization_simple() will only be
* called when the tag list includes 'Tag'.
* NOTE: As an exception to the above rule an 'after_deserialization(U&&, tag)' function will only be
* detected if there is a 'tuple_for_serialization(tag)' with the same tag AND the latter returns 'U'.
* Thus, specifying a tagless 'U tuple_for_serialization()' and 'after_deserialization(U&&, tag)'
* will never call the latter even if the tag is specified. Use uf::has_after_deserialization_tag_v<T, tag>
* to check. It is true only if 'U tuple_for_serialization(tag)' and 'after_deserialization(U&&, tag)'
* are defined (either as member or free functions).
*
* In all the examples above tags were zero-length structs. However, tags can actually carry a value.
* This is useful if you want to provide some context to the serialization/deserialization
* process. But be aware that internally these tags are always passed by value, thus they have
* to be copy constructable, preferably cheaply. If you want to pass a larger const value around,
* use a 'const Context*' as the tag type. If you also want to update the context, pass a non-const
* pointer around. Note that you do not need smart pointers for this, as during the serialization/
* deserialization process, ownership will not change.
* (The decision of not to pass the tags around by (any kind of) reference was made
* so that zero-length tags remain overhead-free. As a result you have to use pointers for any larger
* context.)
*
* Note well: Try avoiding tags that are convertible to each other - it will likely trigger unwanted
* functions (like int and double) and it will be hard to debug.
* Note, as well: If the tag set provided allows 'after_deserialization()' function (either because
* there is an 'after_deserialization()' with one of the tags or there is a tagless one, then
* 'after_deserialization_simple()' functions are not considered at all - even if there is one
* with a tag that is on the tag list.
*
* Debugging
* =========
*
* When you find that serialization of one of your types doesn't work, use the following type traits
* to see where is the problem. These type traits are always true (thus they shall be used in
* static_assert()s), but compiling them will trigger other static assertions explaining what is the
* cause of the problem.
* @code
* uf::is_ser_deser_ok_v<T, tags...> //Type is both serializable and deserializable and the two typestrings are the same
* uf::is_ser_deser_view_ok_v<T, tags...> //Type is both serializable and deserializable as a view and the two typestrings are the same
* uf::is_ser_ok_v<T, tags...> //Type is serializable
* uf::is_deser_view_ok_v<T, tags...> //Type is deserializable as a view
* uf::is_deser_ok_v<T, tags...> //Type is deserializable as owning
* @endcode
*
* When decoding the error messages you get, consider the following example.
* @code
* struct test_bad {
* test tuple_for_serialization() { return 5; }
* };
* static_assert(uf::is_ser_deser_ok_v<std::vector<test_bad>>);
* @endcode
* This gives the following error messages
* @code{.unparsed}
* 1. ufserialize.h: In instantiation of 'constexpr bool uf::impl::is_serializable_f() [with T = const test_bad; bool emit_error = true; tags = {}]':
* 2. ufserialize.h:736:108: required from 'constexpr bool uf::impl::is_ser_deser_ok_f() [with T = std::vector<test_bad>; bool as_view = false; bool emit_error = true; tags = {}]'
* 3. ufserialize.h:6978:95: required from 'constexpr const bool uf::is_ser_deser_ok_v<std::vector<test_bad> >'
* 4. example_code.cc:4:23: required from here
* 5. ufserialize.h:1857:55: in 'constexpr' expansion of 'uf::impl::is_serializable_f<std::vector<test_bad>, true>()'
* 6. ufserialize.h:777:35: error: static assertion failed: Structure has no tuple_for_serialization() const member/free function, nor seem to be a container.
* @endcode
* - Line #4 (the last 'required from' line) shows the location of the check.
* - Then we see in line #3 that the type is question is 'std::vector<test_bad>' This may be a structure of many components,
* walking up the 'required_from' list we get deeper into the type towards the problem.
* - Then the first line shows the type of the actual problem type: 'T = const test_bad'.
* - The last line then tells what is the problem: "Structure has no tuple_for_serialization() const member/free function". Because we only
* have a non-const member and for serialization we need a const version.
*
* Default values
* ==============
*
* Each type has a default value (usually zero and empty). You can create a serialized value for a
* type representing its default value.
* - uf::any::create_default(typestring)
* - std::string default_serialized_value(typestring)
* Default values are:
* - b: false
* - c, i, I, d: the zero value (-0.0 for double, to be precise).
* - s, l, m, o: Empty string, list, map or optional (of whatever type).
* - a: an any containing void.
* - x: An expected containin a default value for its type.
* - e: Empty type, id and message and a void any.
*
* Type conversion at deserialization
* ==================================
*
* Type conversion can be requested during deserialization is, if the typestring of the value to
* deserialize does not match the type to deserialize into.
* Note optionals always convert to their carried value and vice versa, but not to an expected.
* The uf::serpolicy enum can be used to govern what conversions are allowed. See its documentation for
* details.
*
* Note on void-like tuple members. Sometimes a tuple member may become void-like during
* conversion. E.g., t2Xi will happily convert to 'i' if the 'X' holds a value (void).
* (If the policy includes 'allow_converting_expected'.)
* The pathological case of t2Xli will therefore happily convert to 'li' with 'allow_converting_expected',
* but also to 'la' with 'allow_converting_expected' and 'allow_converting_any' - by wrapping the
* integers in the list to an uf::any.
* On the other hand, if you include 'allow_converting_tuple_list', then t2Xli will be converted to a
* list member-by-member. This results in a list that has always the same number of elements as many
* members the tuple had. That is, it will include an uf::any holding a uf::expected<void> and a second
* uf::any holding a std::vector<int>. (Note that std::tuple<std::monospace,int,int> counts as two
* members, since std::monospace is known to be void-like already during compilation, so its type is
* 't2ii' and will convert to a list of two elements.) The above also means that 't2Xli' will not
* convert to 'li' if 'allow_converting_tuple_list' is specified ('allow_converting_all' will also
* include it), because member-by-member conversion fails as none of the tuple's members can be
* converted to an integer.
*
* Error handling
* ==============
*
* Serialization related errors are thrown as exceptions. All of the below are
* descendants of uf::value_error, which can be used to catch all of them.
* - uf::value_mismatch_error: Thrown when a serialzied value does not match a typestring
* (or a C++ type). Thrown in deserialization, serialize_scan, printing.
* - uf::typestring_error: When a typestring is invalid (invalid character, no number after 't', etc.)
* - uf::type_mismatch_error: Thrown when conversion is not possible
* - uf::expected_with_error: When we need to convert an expected value to their holding type
* (xT->T, such as xi->i), but the expected contains an error. This exception holds the errors,
* (all of them if there was more than one such occurrence).
* - uf::not_serializable_error: Thrown when serializing invalid expected:s
* or when a non-serializable Python object is serialized.
*
* In these errors (the first 3), we display the typestring with an asterisk marking where the
* error happened. E.g, on conversion, we say: could not convert <t2s*i> -> <t2s*s>, showing that
* it was the second element of the tuple failed conversion. Note that during conversion void-like
* values may simply decay, so it is possible to convert a 3-element tuple to a 2-element one.
* So converting t3as*a->t2is* does not fail upfront, since it may succeed if the first any holds
* an integer and the second holds a void and we allow any packing/unpacking during conversion
* (uf::allow_converting_any). In some cases we need to parse the content of 'any's, and some
* type error may happen there, this is indicated with a parenthesis showing the type inside
* the any. In the above example if the first any contained a string, we would get conversion
* error: t3a(*s)sa -> t2*is, showing that the string inside the any cannot be converted to an int.
* Note that you do not allow any unpacking, the above would result in error t3*asa->t2*is
* (even if the any contains an int), but we would also indicate that uf::allow_converting_any
* policy would allow conversion (of this particular bit).
*
* API levels
* ==========
*
* The API has three levels.
* - Level 1: The lowest level, recursively callable serialization, deserialization, print and parse
* functions, with a lot of technical parameters, coded to be easy to optimize and
* do not fully honour before* and after* functions. Should not be used from user functions.
* These are in namespace uf::impl:: This level is not centrally documented.
* - Level 2: functions to serialize, deserialize, give the typestring, parse or print serialized values.
* These no longer lend themselves to recursive calling, but are honouring before* after*
* helper functions fully and are safe to use from user functions. These are in namespace uf::
* This level is documented below.
* - Level 3: uf::any and uf::any_view, which package the above operations conveniently into a class
* with little loss of performance compared to level 2. This level is documented at `uf::any` and
* `uf::any_view`;
*
* The Level 2 API is as follows:
* - `string default_serialized_value(typestring)`
* Creates a string representing the serialized version of the default value for `typestring`.
* - `string_view serialize_type(const T&)`
* - `serialize_type<T>()`
* - `string_view serialize_type(const T&, uf::use_tags, tags...)`
* - `serialize_type<T, tags...>()`
* Produces a typestring of what this type will serialize into using these tags.
* - You also have the same variants for deserialize_type(), which gives you what typestring
* can be deserialized into this variable.
* - `string serialize(const T&t)`
* - `string serialize(const T&t, use_tags, tags...)`
Allocates memory and encodes `t` (using the given tags)
* - `void serialize(Alloc alloc, const T&t)`
* - `void serialize(Alloc alloc, const T&t, uf::use_tags, tags...)`
* Lets the user allocate memory and encodes `t`. `alloc` is a char*(size_t) function
* taking the length and returning a char pointer where the serialized data has to be placed.
* - `string_view deserialize_as<T>(string_view s, bool allow_longer=false)`
* - `string_view deserialize_as<T>(string_view s, bool allow_longer, uf::use_tags, tags...)`
* - `string_view deserialize_view_as<T>(string_view s, bool allow_longer=false)`
* - `string_view deserialize_view_as<T>(string_view s, bool allow_longer, uf::use_tags, tags...)`
* Deserializes s into type `T` using the tags assuming s is a serialized form of
* deserilize_type<T, tags...>. Else a value_mismatch_error is thrown.
* If `allow_longer` is true, we accept if data remains after the deserialization.
* This can be used to deserialize only the beginning of a tuple.
* No conversions applied, the types are expected to match completely.
* The `view` variants allow `T` to be a view type and contain any_view or string_view members
* that dont own the data. Deserializing as a view is cheaper, as no memory needs allocation
* but the original data must outlive the deserialized variable.
* - `T deserialize_convert_as<T>(string_view s, string_view from_type,
* serpolicy policy=all, bool allow_longer=false)`
* - `T deserialize_convert_as<T>(string_view s, string_view from_type,
* serpolicy policy=all, bool allow_longer, uf::use_tags, tags...)`
* - `T deserialize_view_convert_as<T>(string_view s, string_view from_type,
* serpolicy policy=all, bool allow_longer=false)`
* - `T deserialize_view_convert_as<T>(string_view s, string_view from_type,
* serpolicy policy=all, bool allow_longer, uf::use_tags, tags...)`
* Deserialize either as owning or as a view from `s` as serialized value (assuming it
* is of type `from_type`) using `policy`. If the deserialize type of `T` (with `tags`) is
* not exactly the same as `from_type`, we apply conversion. This is slower a bit.
* - `string_view deserialize(string_view s, T&v, bool allow_longer=false)`
* - `string_view deserialize(string_view s, T&v, bool allow_longer, uf::use_tags, tags...)`
* - `string_view deserialize_view(string_view s, T&v, bool allow_longer=false)`
* - `string_view deserialize_view(string_view s, T&v, bool allow_longer, uf::use_tags, tags...)`
* - `string_view deserialize_convert(string_view s, string_view from_type, T&v,
* serpolicy policy=all, bool allow_longer=false)`
* - `string_view deserialize_convert(string_view s, string_view from_type, T&v,
* serpolicy policy=all, bool allow_longer, uf::use_tags, tags...)`
* - `string_view deserialize_view_convert(string_view s, string_view from_type, T&v,
* serpolicy policy=all, bool allow_longer=false)`
* - `string_view deserialize_view_convert(string_view s, string_view from_type, T&v,
* serpolicy policy=all, bool allow_longer, uf::use_tags, tags...)`
* These varians serialize into an existing lvalue as opposed to returning the
* deserialized value. This also means that you do not have to specify the type
* excplicitly as a template parameter. We return the serialized data remaining after
* deserialization. If allow_longer==false, this is empty. Else it is the back of `s`.
* - `std::optional<type_mismatch_error>
* cant_convert(string_view from_type, string_view to_type, serpolicy policy);`
* Tells if one type is convertible to another using `policy`. It cannot check one thing:
* If we allow any to be converted to other types, we cannot check if the any will actually
* contain a type compatible with its target. So we assume so here. To check if the value is
* also known use
* - `std::optional<std::variant<type_mismatch_error, expected_with_error>>
cant_convert(string_view from_type, string_view to_type, serpolicy policy, string_view from_value);`
* If conversion CANNOT happen, it returns a the exception that would be thrown (but does not throw)
* Best used like `if (auto err = cant_convert()) {handle_error(*err)};`
* Note that a bad typestring always throws a `uf::typestring_error` and mismatching `from_type` and
* `from_value` always throws a `uf::value_mismatch_error`.
* - `std::pair<std::string, bool>
convert(string_view from_type, string_view to_type, serpolicy policy, string_view from_value);`
* If converts a serialized value of type `from_type` to `to_type`. Throws if not possible with the policy specified,
* expecteds to be converted to their contained type contain errors, one of the typestrings is invalid or the value
* does not fit the type. Returns the empty string and a true, if the converted value is the same as `from_value`.
* - `string serialize_print(T&t, json_like = false, max_len = 0, chars = {}, escape_char=`%`)
* - `string serialize_print(T&t, json_like = false, max_len = 0, chars = {}, escape_char=`%`,
* uf::use_tags, tags...)`
* Prints a the value of `t` for humans.
* - `string serialize_print_by_type(string_view type, string_view serialized, json_like = false, max_len = 0, chars = {}, escape_char=`%`)`
* Prints a serialized type for humans. You can specify a maximum length and characters to escape.
* - `size_t parse_type(string_view type)`: Parses a type string to its end.
* Since type strings are prefix codes, we can tell accurately where it ends.
* If not a valid typestring, returns 0 (~it could only read the void type)
* - `is_serializable_v<T, tags...>`
* - `is_deserializable_v<T, tags...>`
* - `is_deserializable_view_v<T, tags...>`
* Tells us if a type is possible to serialize or to deserialize into (using these tags).
* - `has_tuple_for_serialization_tag_v<deser, T, tags...>` //True if T has tuple_for_serialization() (deser=false: const; deser=true: non-const)
* - `has_before_serialization_tag_v<T, tags...>` //True if T has before_serialization()
* - `has_after_serialization_tag_v<T, tags...>` //True if T has after_serialization(bool, tags...)
* - `has_after_deserialization_tag_v<T, tags...>` //True if T has after_deserialization(U&&) (where U= return type of tuple_for_serialization())
* - `has_after_deserialization_simple_tag_v<T, tags...>` //True if T has after_deserialization()
* - `has_after_deserialization_error_tag_v<T, tags...>` //True if T has after_deserialization_error()
* Verify that user supplied member or free functions actually exist. The `tag` is part of the name
* to remind you that these apply only to a particular set of tags. If tags... is empty then the
* non tagged version.
* - `is_ser_deser_ok_v<T, tags...>` //Type is both serializable and deserializable and the two typestrings are the same
* - `is_ser_deser_view_ok_v<T, tags...>` //Type is both serializable and deserializable as a view and the two typestrings are the same
* - `is_ser_ok_v<T, tags...>` //Type is serializable
* - `is_deser_view_ok_v<T, tags...>` //Type is deserializable as a view
* - `is_deser_ok_v<T, tags...>` //Type is deserializable as a owning
* These always return true, but static_assert()ing these will give detailed errors on problems with a type.
*
* Use of `std::tie` helps a lot. Simply deserializing a string and an int
* is easy: `str = serialize(tie(s,i));` Then getting them back is also easy
* `deserialize(str, tie(s,i));`
*/
namespace uf
{
/** @defgroup tools Tools
* @{*/
// This is slow + incomplete: we are waiting for std::fmt
template<typename... TT>
auto concat(TT const&... tt) { std::ostringstream o; (o << ... << tt); return o.str(); }
/** Base class for all ufser errors and exceptions. */
struct error : public std::logic_error { using logic_error::logic_error; };
/** Serialization or type mismatch exceptions.
* It can take 2 types with positions. Descendats may use one or both,
* - value_mismatch_error uses one or none.
* - typestring_error uses one.
* - type_mismatch_error and expected_with_error uses two.
* - not_serializable_error uses none.
* It has a mechanism to store more than one position in each type. This is used to mark
* problematic expecteds in expected_with_error.*/
struct value_error : public error {
struct type_pos {
std::string type;
std::basic_string<uint16_t> pos; //use basic_string for SSO
[[nodiscard]] std::string format(bool front_caret) const
{
std::string ret{type.length() ? type : "void"};
for (int i = pos.size()-1; i>=0; i--) //do it backwards
if ((front_caret || pos[i]>0) && pos[i] <= ret.length())
ret.insert(ret.begin() + pos[i], '*');
return ret;
}
/** True if we have no type, no position or only a single position at
* the front or way larger then the type length. */
[[nodiscard]] bool front_only() const noexcept { return type.empty() || pos.size() == 0 || (pos.size() == 1 && (pos.front() == 0 || pos.front()>type.size())); }
void prepend(char c) {
type.insert(type.begin(), c);
for (auto &p : pos) p++;
}
void prepend(std::string_view s) {
type.insert(0, s);
for (auto &p : pos) p += s.size();
}
};
std::string my_what; ///<what() returns a pointer into this
std::string msg; ///<Informational message before types.
std::array<type_pos, 2> types; ///<The two types with position of error
[[nodiscard]] value_error(const value_error &) = default;
[[nodiscard]] value_error(value_error &&) noexcept = default;
value_error &operator=(const value_error &) = default;
value_error &operator=(value_error &&) noexcept = default;
const char *what() const noexcept override { return my_what.c_str(); }
/** Returns a formatted error.
* %1 will be replaced to formatted type1
* %2 will be replaced to formatted type2 */
virtual void regenerate_what(std::string_view format ={})
{
if (format.length()) msg = format;
else if (types[0].type.length() && msg.find("%1") == std::string::npos) {
//Add typestring if we have type(s) but not part of message.
if (types[1].type.length() && msg.find("%2") == std::string::npos)
msg.append(" (<%1> -> <%2>)");
else
msg.append(" (<%1>)");
}
my_what = msg;
const bool front_caret = types[0].front_only() && types[1].front_only();//dont show caret at the beginning of both
char buff[3] = "%i";
for (int i = 0; i < 2; i++) {
size_t pos = 0;
buff[1] = '1' + i;
while (std::string::npos != (pos = my_what.find(buff, pos)))
my_what.replace(pos, 2, types[i].format(!front_caret));
}
}
/** Encaps and extend the type in type[0] assuming encapsualtion in an 'any'.
* Useful for operations (scanning, printing), which report only the remaining type in their errors
* at the lowest level recursive functions. This is perhaps
* best explained via an example. Assume we are working on a 't2ai' type (scanning its value,
* converting or printing it) where the 'a' contains a map: 'mad' and there is one element in the
* map, where the 'any' contains an invalid typestring: "@". Once that is detected and a value_error
* (a typestring_error to be specific) is formed at the bottom layer this function will be called
* from the place processing the map as follows
* => type[0] = "@" (pos=0), original_inner_type="@", remaining_inner_type="", remaining_outer_type="d"
* and it will update type[0] to be "(@)d" (pos=1).
* Then, in the place, where we process the outer tuple, it will be called again
* => type[0] = "(@)d" (pos=3), original_inner_type="mad", remaining_inner_type="d", remaining_outer_type="i"
* and it will update type[0] to be "(ma(@)d)i" (pos=4).
* Then the top level function will prepend the processed part of the outer type: t2a, resulting in the
* type in the error message: t2a(ma(*@)d)i. */
value_error &encaps(std::string_view original_inner_type, std::string_view remaining_inner_type,
std::string_view remaining_outer_type) {
const size_t consumed = original_inner_type.length() - remaining_inner_type.length();
types[0].prepend(original_inner_type.substr(0, consumed)); //also updates 'pos'
types[0].prepend('(');
types[0].type.push_back(')');
types[0].type.append(remaining_outer_type);
regenerate_what();
return *this;
}
/** Prepend the prefix of the 'original_type' not part of 'remaining_type'.
* Assumes 'original_type' ends in 'remaining_type'. */
value_error &prepend_type0(std::string_view original_type, std::string_view remaining_type) {
const size_t consumed = original_type.length() - remaining_type.length();
types[0].prepend(original_type.substr(0, consumed));
regenerate_what();
return *this;
}
/** Throws the exception we contain or does nothing if we dont.*/
[[noreturn]] virtual void throw_me() const = 0;
value_error &append_msg(std::string_view message) {
msg.append(message);
regenerate_what();
return *this;
}
protected:
value_error(std::string_view _format,
std::string_view t1, std::string_view t2,
size_t pos_t1 = std::string::npos, size_t pos_t2 = std::string::npos) :
error(std::string{}), msg(_format)
{
types ={type_pos{ std::string(t1), {(uint16_t)std::min(size_t(std::numeric_limits<uint16_t>::max()), pos_t1)} },
type_pos{ std::string(t2), {(uint16_t)std::min(size_t(std::numeric_limits<uint16_t>::max()), pos_t2)} }};
regenerate_what();
}
};
/** Type mismatch exceptions.*/
struct type_mismatch_error : public value_error {
[[nodiscard]] type_mismatch_error(std::string_view _format,
std::string_view t1, std::string_view t2,
size_t pos_t1 = std::string::npos, size_t pos_t2 = std::string::npos)
: value_error(_format, t1, t2, pos_t1, pos_t2) {}
[[nodiscard]] type_mismatch_error(const type_mismatch_error&) = default;
[[nodiscard]] type_mismatch_error(type_mismatch_error&&) noexcept = default;
type_mismatch_error& operator=(const type_mismatch_error&) = default;
type_mismatch_error& operator=(type_mismatch_error&&) noexcept = default;
[[noreturn]] void throw_me() const override { throw *this; };
};
/** When a typestring is invalid.*/
struct typestring_error : public value_error {
[[nodiscard]] explicit typestring_error(std::string_view _msg, std::string_view type, size_t pos_type = std::string::npos)
: value_error(_msg, type, {}, pos_type) {}
[[nodiscard]] typestring_error(const typestring_error&) = default;
[[nodiscard]] typestring_error(typestring_error&&) noexcept = default;
typestring_error& operator=(const typestring_error&) = default;
typestring_error& operator=(typestring_error&&) noexcept = default;
[[noreturn]] void throw_me() const override { throw *this; };
};
/** When a value does not match its type string.
* We may construct it with or without a type string.*/
struct value_mismatch_error : public value_error {
[[nodiscard]] value_mismatch_error(std::string_view _msg) : value_error(_msg, {}, {}) {}
[[nodiscard]] value_mismatch_error(std::string_view _msg, std::string_view type, size_t pos_type = std::string::npos)
: value_error(_msg, type, {}, pos_type) {}
[[nodiscard]] value_mismatch_error(const value_mismatch_error&) = default;
[[nodiscard]] value_mismatch_error(value_mismatch_error&&) noexcept = default;
value_mismatch_error& operator=(const value_mismatch_error&) = default;
value_mismatch_error& operator=(value_mismatch_error&&) noexcept = default;
[[noreturn]] void throw_me() const override { throw *this; };