22
22
#include "../perf.h"
23
23
#include "session.h"
24
24
#include "machine.h"
25
+ #include "sort.h"
25
26
#include "tool.h"
26
27
#include "event.h"
27
28
#include "evlist.h"
@@ -115,6 +116,9 @@ struct intel_pt_queue {
115
116
void * decoder ;
116
117
const struct intel_pt_state * state ;
117
118
struct ip_callchain * chain ;
119
+ struct branch_stack * last_branch ;
120
+ struct branch_stack * last_branch_rb ;
121
+ size_t last_branch_pos ;
118
122
union perf_event * event_buf ;
119
123
bool on_heap ;
120
124
bool stop ;
@@ -675,6 +679,19 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
675
679
goto out_free ;
676
680
}
677
681
682
+ if (pt -> synth_opts .last_branch ) {
683
+ size_t sz = sizeof (struct branch_stack );
684
+
685
+ sz += pt -> synth_opts .last_branch_sz *
686
+ sizeof (struct branch_entry );
687
+ ptq -> last_branch = zalloc (sz );
688
+ if (!ptq -> last_branch )
689
+ goto out_free ;
690
+ ptq -> last_branch_rb = zalloc (sz );
691
+ if (!ptq -> last_branch_rb )
692
+ goto out_free ;
693
+ }
694
+
678
695
ptq -> event_buf = malloc (PERF_SAMPLE_MAX_SIZE );
679
696
if (!ptq -> event_buf )
680
697
goto out_free ;
@@ -732,6 +749,8 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
732
749
733
750
out_free :
734
751
zfree (& ptq -> event_buf );
752
+ zfree (& ptq -> last_branch );
753
+ zfree (& ptq -> last_branch_rb );
735
754
zfree (& ptq -> chain );
736
755
free (ptq );
737
756
return NULL ;
@@ -746,6 +765,8 @@ static void intel_pt_free_queue(void *priv)
746
765
thread__zput (ptq -> thread );
747
766
intel_pt_decoder_free (ptq -> decoder );
748
767
zfree (& ptq -> event_buf );
768
+ zfree (& ptq -> last_branch );
769
+ zfree (& ptq -> last_branch_rb );
749
770
zfree (& ptq -> chain );
750
771
free (ptq );
751
772
}
@@ -876,6 +897,57 @@ static int intel_pt_setup_queues(struct intel_pt *pt)
876
897
return 0 ;
877
898
}
878
899
900
+ static inline void intel_pt_copy_last_branch_rb (struct intel_pt_queue * ptq )
901
+ {
902
+ struct branch_stack * bs_src = ptq -> last_branch_rb ;
903
+ struct branch_stack * bs_dst = ptq -> last_branch ;
904
+ size_t nr = 0 ;
905
+
906
+ bs_dst -> nr = bs_src -> nr ;
907
+
908
+ if (!bs_src -> nr )
909
+ return ;
910
+
911
+ nr = ptq -> pt -> synth_opts .last_branch_sz - ptq -> last_branch_pos ;
912
+ memcpy (& bs_dst -> entries [0 ],
913
+ & bs_src -> entries [ptq -> last_branch_pos ],
914
+ sizeof (struct branch_entry ) * nr );
915
+
916
+ if (bs_src -> nr >= ptq -> pt -> synth_opts .last_branch_sz ) {
917
+ memcpy (& bs_dst -> entries [nr ],
918
+ & bs_src -> entries [0 ],
919
+ sizeof (struct branch_entry ) * ptq -> last_branch_pos );
920
+ }
921
+ }
922
+
923
+ static inline void intel_pt_reset_last_branch_rb (struct intel_pt_queue * ptq )
924
+ {
925
+ ptq -> last_branch_pos = 0 ;
926
+ ptq -> last_branch_rb -> nr = 0 ;
927
+ }
928
+
929
+ static void intel_pt_update_last_branch_rb (struct intel_pt_queue * ptq )
930
+ {
931
+ const struct intel_pt_state * state = ptq -> state ;
932
+ struct branch_stack * bs = ptq -> last_branch_rb ;
933
+ struct branch_entry * be ;
934
+
935
+ if (!ptq -> last_branch_pos )
936
+ ptq -> last_branch_pos = ptq -> pt -> synth_opts .last_branch_sz ;
937
+
938
+ ptq -> last_branch_pos -= 1 ;
939
+
940
+ be = & bs -> entries [ptq -> last_branch_pos ];
941
+ be -> from = state -> from_ip ;
942
+ be -> to = state -> to_ip ;
943
+ be -> flags .abort = !!(state -> flags & INTEL_PT_ABORT_TX );
944
+ be -> flags .in_tx = !!(state -> flags & INTEL_PT_IN_TX );
945
+ /* No support for mispredict */
946
+
947
+ if (bs -> nr < ptq -> pt -> synth_opts .last_branch_sz )
948
+ bs -> nr += 1 ;
949
+ }
950
+
879
951
static int intel_pt_inject_event (union perf_event * event ,
880
952
struct perf_sample * sample , u64 type ,
881
953
bool swapped )
@@ -890,6 +962,10 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
890
962
struct intel_pt * pt = ptq -> pt ;
891
963
union perf_event * event = ptq -> event_buf ;
892
964
struct perf_sample sample = { .ip = 0 , };
965
+ struct dummy_branch_stack {
966
+ u64 nr ;
967
+ struct branch_entry entries ;
968
+ } dummy_bs ;
893
969
894
970
if (pt -> branches_filter && !(pt -> branches_filter & ptq -> flags ))
895
971
return 0 ;
@@ -912,6 +988,21 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
912
988
sample .flags = ptq -> flags ;
913
989
sample .insn_len = ptq -> insn_len ;
914
990
991
+ /*
992
+ * perf report cannot handle events without a branch stack when using
993
+ * SORT_MODE__BRANCH so make a dummy one.
994
+ */
995
+ if (pt -> synth_opts .last_branch && sort__mode == SORT_MODE__BRANCH ) {
996
+ dummy_bs = (struct dummy_branch_stack ){
997
+ .nr = 1 ,
998
+ .entries = {
999
+ .from = sample .ip ,
1000
+ .to = sample .addr ,
1001
+ },
1002
+ };
1003
+ sample .branch_stack = (struct branch_stack * )& dummy_bs ;
1004
+ }
1005
+
915
1006
if (pt -> synth_opts .inject ) {
916
1007
ret = intel_pt_inject_event (event , & sample ,
917
1008
pt -> branches_sample_type ,
@@ -961,6 +1052,11 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
961
1052
sample .callchain = ptq -> chain ;
962
1053
}
963
1054
1055
+ if (pt -> synth_opts .last_branch ) {
1056
+ intel_pt_copy_last_branch_rb (ptq );
1057
+ sample .branch_stack = ptq -> last_branch ;
1058
+ }
1059
+
964
1060
if (pt -> synth_opts .inject ) {
965
1061
ret = intel_pt_inject_event (event , & sample ,
966
1062
pt -> instructions_sample_type ,
@@ -974,6 +1070,9 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
974
1070
pr_err ("Intel Processor Trace: failed to deliver instruction event, error %d\n" ,
975
1071
ret );
976
1072
1073
+ if (pt -> synth_opts .last_branch )
1074
+ intel_pt_reset_last_branch_rb (ptq );
1075
+
977
1076
return ret ;
978
1077
}
979
1078
@@ -1008,6 +1107,11 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
1008
1107
sample .callchain = ptq -> chain ;
1009
1108
}
1010
1109
1110
+ if (pt -> synth_opts .last_branch ) {
1111
+ intel_pt_copy_last_branch_rb (ptq );
1112
+ sample .branch_stack = ptq -> last_branch ;
1113
+ }
1114
+
1011
1115
if (pt -> synth_opts .inject ) {
1012
1116
ret = intel_pt_inject_event (event , & sample ,
1013
1117
pt -> transactions_sample_type ,
@@ -1021,6 +1125,9 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
1021
1125
pr_err ("Intel Processor Trace: failed to deliver transaction event, error %d\n" ,
1022
1126
ret );
1023
1127
1128
+ if (pt -> synth_opts .callchain )
1129
+ intel_pt_reset_last_branch_rb (ptq );
1130
+
1024
1131
return ret ;
1025
1132
}
1026
1133
@@ -1116,6 +1223,9 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
1116
1223
return err ;
1117
1224
}
1118
1225
1226
+ if (pt -> synth_opts .last_branch )
1227
+ intel_pt_update_last_branch_rb (ptq );
1228
+
1119
1229
if (!pt -> sync_switch )
1120
1230
return 0 ;
1121
1231
@@ -1763,6 +1873,8 @@ static int intel_pt_synth_events(struct intel_pt *pt,
1763
1873
pt -> instructions_sample_period = attr .sample_period ;
1764
1874
if (pt -> synth_opts .callchain )
1765
1875
attr .sample_type |= PERF_SAMPLE_CALLCHAIN ;
1876
+ if (pt -> synth_opts .last_branch )
1877
+ attr .sample_type |= PERF_SAMPLE_BRANCH_STACK ;
1766
1878
pr_debug ("Synthesizing 'instructions' event with id %" PRIu64 " sample type %#" PRIx64 "\n" ,
1767
1879
id , (u64 )attr .sample_type );
1768
1880
err = intel_pt_synth_event (session , & attr , id );
@@ -1782,6 +1894,8 @@ static int intel_pt_synth_events(struct intel_pt *pt,
1782
1894
attr .sample_period = 1 ;
1783
1895
if (pt -> synth_opts .callchain )
1784
1896
attr .sample_type |= PERF_SAMPLE_CALLCHAIN ;
1897
+ if (pt -> synth_opts .last_branch )
1898
+ attr .sample_type |= PERF_SAMPLE_BRANCH_STACK ;
1785
1899
pr_debug ("Synthesizing 'transactions' event with id %" PRIu64 " sample type %#" PRIx64 "\n" ,
1786
1900
id , (u64 )attr .sample_type );
1787
1901
err = intel_pt_synth_event (session , & attr , id );
@@ -1808,6 +1922,7 @@ static int intel_pt_synth_events(struct intel_pt *pt,
1808
1922
attr .sample_period = 1 ;
1809
1923
attr .sample_type |= PERF_SAMPLE_ADDR ;
1810
1924
attr .sample_type &= ~(u64 )PERF_SAMPLE_CALLCHAIN ;
1925
+ attr .sample_type &= ~(u64 )PERF_SAMPLE_BRANCH_STACK ;
1811
1926
pr_debug ("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n" ,
1812
1927
id , (u64 )attr .sample_type );
1813
1928
err = intel_pt_synth_event (session , & attr , id );
0 commit comments