Home ⌂Doc Index ◂Up ▴
Intel(R) Threading Building Blocks Doxygen Documentation  version 4.2.3
mac_ppc.h
Go to the documentation of this file.
1 /*
2  Copyright (c) 2005-2020 Intel Corporation
3 
4  Licensed under the Apache License, Version 2.0 (the "License");
5  you may not use this file except in compliance with the License.
6  You may obtain a copy of the License at
7 
8  http://www.apache.org/licenses/LICENSE-2.0
9 
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15 */
16 
17 #if !defined(__TBB_machine_H) || defined(__TBB_machine_gcc_power_H)
18 #error Do not #include this internal file directly; use public TBB headers instead.
19 #endif
20 
21 #define __TBB_machine_gcc_power_H
22 
23 #include <stdint.h>
24 #include <unistd.h>
25 
26 // TODO: rename to gcc_power.h?
27 // This file is for Power Architecture with compilers supporting GNU inline-assembler syntax (currently GNU g++ and IBM XL).
28 // Note that XL V9.0 (sometimes?) has trouble dealing with empty input and/or clobber lists, so they should be avoided.
29 
30 #if __powerpc64__ || __ppc64__
31  // IBM XL documents __powerpc64__ (and __PPC64__).
32  // Apple documents __ppc64__ (with __ppc__ only on 32-bit).
33  #define __TBB_WORDSIZE 8
34 #else
35  #define __TBB_WORDSIZE 4
36 #endif
37 
38 // Traditionally Power Architecture is big-endian.
39 // Little-endian could be just an address manipulation (compatibility with TBB not verified),
40 // or normal little-endian (on more recent systems). Embedded PowerPC systems may support
41 // page-specific endianness, but then one endianness must be hidden from TBB so that it still sees only one.
42 #if __BIG_ENDIAN__ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__)
43  #define __TBB_ENDIANNESS __TBB_ENDIAN_BIG
44 #elif __LITTLE_ENDIAN__ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__)
45  #define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
46 #elif defined(__BYTE_ORDER__)
47  #define __TBB_ENDIANNESS __TBB_ENDIAN_UNSUPPORTED
48 #else
49  #define __TBB_ENDIANNESS __TBB_ENDIAN_DETECT
50 #endif
51 
52 // On Power Architecture, (lock-free) 64-bit atomics require 64-bit hardware:
53 #if __TBB_WORDSIZE==8
54  // Do not change the following definition, because TBB itself will use 64-bit atomics in 64-bit builds.
55  #define __TBB_64BIT_ATOMICS 1
56 #elif __bgp__
57  // Do not change the following definition, because this is known 32-bit hardware.
58  #define __TBB_64BIT_ATOMICS 0
59 #else
60  // To enable 64-bit atomics in 32-bit builds, set the value below to 1 instead of 0.
61  // You must make certain that the program will only use them on actual 64-bit hardware
62  // (which typically means that the entire program is only executed on such hardware),
63  // because their implementation involves machine instructions that are illegal elsewhere.
64  // The setting can be chosen independently per compilation unit,
65  // which also means that TBB itself does not need to be rebuilt.
66  // Alternatively (but only for the current architecture and TBB version),
67  // override the default as a predefined macro when invoking the compiler.
68  #ifndef __TBB_64BIT_ATOMICS
69  #define __TBB_64BIT_ATOMICS 0
70  #endif
71 #endif
72 
73 inline int32_t __TBB_machine_cmpswp4 (volatile void *ptr, int32_t value, int32_t comparand )
74 {
75  int32_t result;
76 
77  __asm__ __volatile__("sync\n"
78  "0:\n\t"
79  "lwarx %[res],0,%[ptr]\n\t" /* load w/ reservation */
80  "cmpw %[res],%[cmp]\n\t" /* compare against comparand */
81  "bne- 1f\n\t" /* exit if not same */
82  "stwcx. %[val],0,%[ptr]\n\t" /* store new value */
83  "bne- 0b\n" /* retry if reservation lost */
84  "1:\n\t" /* the exit */
85  "isync"
86  : [res]"=&r"(result)
87  , "+m"(* (int32_t*) ptr) /* redundant with "memory" */
88  : [ptr]"r"(ptr)
89  , [val]"r"(value)
90  , [cmp]"r"(comparand)
91  : "memory" /* compiler full fence */
92  , "cr0" /* clobbered by cmp and/or stwcx. */
93  );
94  return result;
95 }
96 
97 #if __TBB_WORDSIZE==8
98 
99 inline int64_t __TBB_machine_cmpswp8 (volatile void *ptr, int64_t value, int64_t comparand )
100 {
101  int64_t result;
102  __asm__ __volatile__("sync\n"
103  "0:\n\t"
104  "ldarx %[res],0,%[ptr]\n\t" /* load w/ reservation */
105  "cmpd %[res],%[cmp]\n\t" /* compare against comparand */
106  "bne- 1f\n\t" /* exit if not same */
107  "stdcx. %[val],0,%[ptr]\n\t" /* store new value */
108  "bne- 0b\n" /* retry if reservation lost */
109  "1:\n\t" /* the exit */
110  "isync"
111  : [res]"=&r"(result)
112  , "+m"(* (int64_t*) ptr) /* redundant with "memory" */
113  : [ptr]"r"(ptr)
114  , [val]"r"(value)
115  , [cmp]"r"(comparand)
116  : "memory" /* compiler full fence */
117  , "cr0" /* clobbered by cmp and/or stdcx. */
118  );
119  return result;
120 }
121 
122 #elif __TBB_64BIT_ATOMICS /* && __TBB_WORDSIZE==4 */
123 
124 inline int64_t __TBB_machine_cmpswp8 (volatile void *ptr, int64_t value, int64_t comparand )
125 {
126  int64_t result;
127  int64_t value_register, comparand_register, result_register; // dummy variables to allocate registers
128  __asm__ __volatile__("sync\n\t"
129  "ld %[val],%[valm]\n\t"
130  "ld %[cmp],%[cmpm]\n"
131  "0:\n\t"
132  "ldarx %[res],0,%[ptr]\n\t" /* load w/ reservation */
133  "cmpd %[res],%[cmp]\n\t" /* compare against comparand */
134  "bne- 1f\n\t" /* exit if not same */
135  "stdcx. %[val],0,%[ptr]\n\t" /* store new value */
136  "bne- 0b\n" /* retry if reservation lost */
137  "1:\n\t" /* the exit */
138  "std %[res],%[resm]\n\t"
139  "isync"
140  : [resm]"=m"(result)
141  , [res] "=&r"( result_register)
142  , [val] "=&r"( value_register)
143  , [cmp] "=&r"(comparand_register)
144  , "+m"(* (int64_t*) ptr) /* redundant with "memory" */
145  : [ptr] "r"(ptr)
146  , [valm]"m"(value)
147  , [cmpm]"m"(comparand)
148  : "memory" /* compiler full fence */
149  , "cr0" /* clobbered by cmpd and/or stdcx. */
150  );
151  return result;
152 }
153 
154 #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
155 
156 #define __TBB_MACHINE_DEFINE_LOAD_STORE(S,ldx,stx,cmpx) \
157  template <typename T> \
158  struct machine_load_store<T,S> { \
159  static inline T load_with_acquire(const volatile T& location) { \
160  T result; \
161  __asm__ __volatile__(ldx " %[res],0(%[ptr])\n" \
162  "0:\n\t" \
163  cmpx " %[res],%[res]\n\t" \
164  "bne- 0b\n\t" \
165  "isync" \
166  : [res]"=r"(result) \
167  : [ptr]"b"(&location) /* cannot use register 0 here */ \
168  , "m"(location) /* redundant with "memory" */ \
169  : "memory" /* compiler acquire fence */ \
170  , "cr0" /* clobbered by cmpw/cmpd */); \
171  return result; \
172  } \
173  static inline void store_with_release(volatile T &location, T value) { \
174  __asm__ __volatile__("lwsync\n\t" \
175  stx " %[val],0(%[ptr])" \
176  : "=m"(location) /* redundant with "memory" */ \
177  : [ptr]"b"(&location) /* cannot use register 0 here */ \
178  , [val]"r"(value) \
179  : "memory"/*compiler release fence*/ /*(cr0 not affected)*/); \
180  } \
181  }; \
182  \
183  template <typename T> \
184  struct machine_load_store_relaxed<T,S> { \
185  static inline T load (const __TBB_atomic T& location) { \
186  T result; \
187  __asm__ __volatile__(ldx " %[res],0(%[ptr])" \
188  : [res]"=r"(result) \
189  : [ptr]"b"(&location) /* cannot use register 0 here */ \
190  , "m"(location) \
191  ); /*(no compiler fence)*/ /*(cr0 not affected)*/ \
192  return result; \
193  } \
194  static inline void store (__TBB_atomic T &location, T value) { \
195  __asm__ __volatile__(stx " %[val],0(%[ptr])" \
196  : "=m"(location) \
197  : [ptr]"b"(&location) /* cannot use register 0 here */ \
198  , [val]"r"(value) \
199  ); /*(no compiler fence)*/ /*(cr0 not affected)*/ \
200  } \
201  };
202 
203 namespace tbb {
204 namespace internal {
205  __TBB_MACHINE_DEFINE_LOAD_STORE(1,"lbz","stb","cmpw")
206  __TBB_MACHINE_DEFINE_LOAD_STORE(2,"lhz","sth","cmpw")
207  __TBB_MACHINE_DEFINE_LOAD_STORE(4,"lwz","stw","cmpw")
208 
209 #if __TBB_WORDSIZE==8
210 
211  __TBB_MACHINE_DEFINE_LOAD_STORE(8,"ld" ,"std","cmpd")
212 
213 #elif __TBB_64BIT_ATOMICS /* && __TBB_WORDSIZE==4 */
214 
215  template <typename T>
216  struct machine_load_store<T,8> {
217  static inline T load_with_acquire(const volatile T& location) {
218  T result;
219  T result_register; // dummy variable to allocate a register
220  __asm__ __volatile__("ld %[res],0(%[ptr])\n\t"
221  "std %[res],%[resm]\n"
222  "0:\n\t"
223  "cmpd %[res],%[res]\n\t"
224  "bne- 0b\n\t"
225  "isync"
226  : [resm]"=m"(result)
227  , [res]"=&r"(result_register)
228  : [ptr]"b"(&location) /* cannot use register 0 here */
229  , "m"(location) /* redundant with "memory" */
230  : "memory" /* compiler acquire fence */
231  , "cr0" /* clobbered by cmpd */);
232  return result;
233  }
234 
235  static inline void store_with_release(volatile T &location, T value) {
236  T value_register; // dummy variable to allocate a register
237  __asm__ __volatile__("lwsync\n\t"
238  "ld %[val],%[valm]\n\t"
239  "std %[val],0(%[ptr])"
240  : "=m"(location) /* redundant with "memory" */
241  , [val]"=&r"(value_register)
242  : [ptr]"b"(&location) /* cannot use register 0 here */
243  , [valm]"m"(value)
244  : "memory"/*compiler release fence*/ /*(cr0 not affected)*/);
245  }
246  };
247 
248  struct machine_load_store_relaxed<T,8> {
249  static inline T load (const volatile T& location) {
250  T result;
251  T result_register; // dummy variable to allocate a register
252  __asm__ __volatile__("ld %[res],0(%[ptr])\n\t"
253  "std %[res],%[resm]"
254  : [resm]"=m"(result)
255  , [res]"=&r"(result_register)
256  : [ptr]"b"(&location) /* cannot use register 0 here */
257  , "m"(location)
258  ); /*(no compiler fence)*/ /*(cr0 not affected)*/
259  return result;
260  }
261 
262  static inline void store (volatile T &location, T value) {
263  T value_register; // dummy variable to allocate a register
264  __asm__ __volatile__("ld %[val],%[valm]\n\t"
265  "std %[val],0(%[ptr])"
266  : "=m"(location)
267  , [val]"=&r"(value_register)
268  : [ptr]"b"(&location) /* cannot use register 0 here */
269  , [valm]"m"(value)
270  ); /*(no compiler fence)*/ /*(cr0 not affected)*/
271  }
272  };
273  #define __TBB_machine_load_store_relaxed_8
274 
275 #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
276 
277 }} // namespaces internal, tbb
278 
279 #undef __TBB_MACHINE_DEFINE_LOAD_STORE
280 
281 #define __TBB_USE_GENERIC_PART_WORD_CAS 1
282 #define __TBB_USE_GENERIC_FETCH_ADD 1
283 #define __TBB_USE_GENERIC_FETCH_STORE 1
284 #define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
285 
286 #define __TBB_control_consistency_helper() __asm__ __volatile__("isync": : :"memory")
287 #define __TBB_full_memory_fence() __asm__ __volatile__( "sync": : :"memory")
288 
289 static inline intptr_t __TBB_machine_lg( uintptr_t x ) {
290  __TBB_ASSERT(x, "__TBB_Log2(0) undefined");
291  // cntlzd/cntlzw starts counting at 2^63/2^31 (ignoring any higher-order bits), and does not affect cr0
292 #if __TBB_WORDSIZE==8
293  __asm__ __volatile__ ("cntlzd %0,%0" : "+r"(x));
294  return 63-static_cast<intptr_t>(x);
295 #else
296  __asm__ __volatile__ ("cntlzw %0,%0" : "+r"(x));
297  return 31-static_cast<intptr_t>(x);
298 #endif
299 }
300 #define __TBB_Log2(V) __TBB_machine_lg(V)
301 
302 // Assumes implicit alignment for any 32-bit value
303 typedef uint32_t __TBB_Flag;
304 #define __TBB_Flag __TBB_Flag
305 
307  return __TBB_machine_cmpswp4(&flag,1,0)==0;
308 }
309 #define __TBB_TryLockByte(P) __TBB_machine_trylockbyte(P)
static intptr_t __TBB_machine_lg(uintptr_t x)
Definition: mac_ppc.h:289
int32_t __TBB_machine_cmpswp4(volatile void *ptr, int32_t value, int32_t comparand)
Definition: mac_ppc.h:73
static void store(T &location, T value)
Definition: icc_generic.h:108
static T load(const T &location)
Definition: icc_generic.h:105
bool __TBB_machine_trylockbyte(__TBB_atomic __TBB_Flag &flag)
Definition: mac_ppc.h:306
#define __TBB_atomic
Definition: tbb_stddef.h:237
#define __TBB_machine_cmpswp8
Definition: ibm_aix51.h:42
#define __TBB_Flag
Definition: mac_ppc.h:304
static void store_with_release(volatile T &location, T value)
Definition: icc_generic.h:98
static T load_with_acquire(const volatile T &location)
Definition: icc_generic.h:95
unsigned char __TBB_Flag
Definition: gcc_generic.h:105
#define __TBB_ASSERT(predicate, comment)
No-op version of __TBB_ASSERT.
Definition: tbb_stddef.h:165
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long value
The graph class.
#define __TBB_MACHINE_DEFINE_LOAD_STORE(S, ldx, stx, cmpx)
Definition: mac_ppc.h:156

Copyright © 2005-2020 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.