22 * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
33 * University Research and Technology
44 * Corporation. All rights reserved.
5- * Copyright (c) 2004-2005 The University of Tennessee and The University
5+ * Copyright (c) 2004-2014 The University of Tennessee and The University
66 * of Tennessee Research Foundation. All rights
77 * reserved.
88 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
2020
2121#include "opal_config.h"
2222
23- #include <mach/mach_time.h>
24-
2523#include "opal/mca/timer/timer.h"
2624#include "opal/mca/timer/darwin/timer_darwin.h"
2725#include "opal/constants.h"
2826
2927opal_timer_t opal_timer_darwin_freq ;
28+ mach_timebase_info_data_t opal_timer_darwin_info = {.denom = 0 };
29+ opal_timer_t opal_timer_darwin_bias ;
3030
3131static int opal_timer_darwin_open (void );
3232
33-
3433const opal_timer_base_component_2_0_0_t mca_timer_darwin_component = {
3534 /* First, the mca_component_t struct containing meta information
3635 about the component itself */
@@ -53,55 +52,48 @@ const opal_timer_base_component_2_0_0_t mca_timer_darwin_component = {
5352 },
5453};
5554
56-
55+ /* mach_timebase_info() returns a fraction that can be multiplied
56+ by the difference between two calls to mach_absolute_time() to
57+ get the number of nanoseconds that passed between the two
58+ calls.
59+
60+ On PPC, mach_timebase_info returns numer = 1000000000 and denom
61+ = 33333335 (or possibly 25000000, depending on the machine).
62+ mach_absolute_time() returns a cycle count from the global
63+ clock, which runs at 25 - 33MHz, so dividing the cycle count by
64+ the frequency gives you seconds between the interval, then
65+ multiplying by 1000000000 gives you nanoseconds. Of course,
66+ you should do the multiply first, then the divide to reduce
67+ arithmetic errors due to integer math. But since we want the
68+ least amount of math in the critical path as possible and
69+ mach_absolute_time is already a cycle counter, we claim we have
70+ native cycle count support and set the frequencey to be the
71+ frequencey of the global clock, which is sTBI.denom *
72+ (1000000000 / sTBI.numer), which is sTBI.denom * (1 / 1), or
73+ sTBI.denom.
74+
75+ On Intel, mach_timebase_info returns numer = 1 nd denom = 1,
76+ meaning that mach_absolute_time() returns some global clock
77+ time in nanoseconds. Because PPC returns a frequency and
78+ returning a time in microseconds would still require math in
79+ the critical path (a divide, at that), we pretend that the
80+ nanosecond timer is instead a cycle counter for a 1GHz clock
81+ and that we're returning a cycle count natively. so sTBI.denom
82+ * (1000000000 / sTBI.numer) gives us 1 * (1000000000 / 1), or
83+ 1000000000, meaning we have a 1GHz clock.
84+
85+ More generally, since mach_timebase_info() gives the "keys" to
86+ transition the return from mach_absolute_time() into
87+ nanoseconds, taking the reverse of that and multipling by
88+ 1000000000 will give you a frequency in cycles / second if you
89+ think of mach_absolute_time() always returning a cycle count.
90+ */
5791int opal_timer_darwin_open (void )
5892{
59- mach_timebase_info_data_t sTBI ;
60-
61- mach_timebase_info (& sTBI );
62-
63- /* mach_timebase_info() returns a fraction that can be multiplied
64- by the difference between two calls to mach_absolute_time() to
65- get the number of nanoseconds that passed between the two
66- calls.
67-
68- On PPC, mach_timebase_info returns numer = 1000000000 and denom
69- = 33333335 (or possibly 25000000, depending on the machine).
70- mach_absolute_time() returns a cycle count from the global
71- clock, which runs at 25 - 33MHz, so dividing the cycle count by
72- the frequency gives you seconds between the interval, then
73- multiplying by 1000000000 gives you nanoseconds. Of course,
74- you should do the multiply first, then the divide to reduce
75- arithmetic errors due to integer math. But since we want the
76- least amount of math in the critical path as possible and
77- mach_absolute_time is already a cycle counter, we claim we have
78- native cycle count support and set the frequencey to be the
79- frequencey of the global clock, which is sTBI.denom *
80- (1000000000 / sTBI.numer), which is sTBI.denom * (1 / 1), or
81- sTBI.denom.
82-
83- On Intel, mach_timebase_info returns numer = 1 nd denom = 1,
84- meaning that mach_absolute_time() returns some global clock
85- time in nanoseconds. Because PPC returns a frequency and
86- returning a time in microseconds would still require math in
87- the critical path (a divide, at that), we pretend that the
88- nanosecond timer is instead a cycle counter for a 1GHz clock
89- and that we're returning a cycle count natively. so sTBI.denom
90- * (1000000000 / sTBI.numer) gives us 1 * (1000000000 / 1), or
91- 1000000000, meaning we have a 1GHz clock.
92-
93- More generally, since mach_timebase_info() gives the "keys" to
94- transition the return from mach_absolute_time() into
95- nanoseconds, taking the reverse of that and multipling by
96- 1000000000 will give you a frequency in cycles / second if you
97- think of mach_absolute_time() always returning a cycle count.
93+ /* Call the opal_timer_base_get_cycles once to start the enging */
94+ (void )opal_timer_base_get_cycles ();
9895
99- By the way, it's interesting to note that because these are
100- library functions and because of how rosetta works, a PPC
101- binary running under rosetta on an Intel Mac will behave
102- exactly like an Intel binary running on an Intel Mac.
103- */
104- opal_timer_darwin_freq = sTBI .denom * (1000000000 / sTBI .numer );
96+ opal_timer_darwin_freq = opal_timer_darwin_info .denom * (1000000000 / opal_timer_darwin_info .numer );
10597
10698 return OPAL_SUCCESS ;
10799}
0 commit comments