Home ⌂Doc Index ◂Up ▴
Intel(R) Threading Building Blocks Doxygen Documentation  version 4.2.3
gcc_arm.h
Go to the documentation of this file.
1 /*
2  Copyright (c) 2005-2020 Intel Corporation
3 
4  Licensed under the Apache License, Version 2.0 (the "License");
5  you may not use this file except in compliance with the License.
6  You may obtain a copy of the License at
7 
8  http://www.apache.org/licenses/LICENSE-2.0
9 
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15 */
16 
17 /*
18  Platform isolation layer for the ARMv7-a architecture.
19 */
20 
21 #ifndef __TBB_machine_H
22 #error Do not include this file directly; include tbb_machine.h instead
23 #endif
24 
25 #if __ARM_ARCH_7A__
26 
27 #include <sys/param.h>
28 #include <unistd.h>
29 
30 #define __TBB_WORDSIZE 4
31 
32 // Traditionally ARM is little-endian.
33 // Note that, since only the layout of aligned 32-bit words is of interest,
34 // any apparent PDP-endianness of 32-bit words at half-word alignment or
35 // any little-endian ordering of big-endian 32-bit words in 64-bit quantities
36 // may be disregarded for this setting.
37 #if __BIG_ENDIAN__ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__)
38  #define __TBB_ENDIANNESS __TBB_ENDIAN_BIG
39 #elif __LITTLE_ENDIAN__ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__)
40  #define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
41 #elif defined(__BYTE_ORDER__)
42  #define __TBB_ENDIANNESS __TBB_ENDIAN_UNSUPPORTED
43 #else
44  #define __TBB_ENDIANNESS __TBB_ENDIAN_DETECT
45 #endif
46 
47 
48 #define __TBB_compiler_fence() __asm__ __volatile__("": : :"memory")
49 #define __TBB_full_memory_fence() __asm__ __volatile__("dmb ish": : :"memory")
50 #define __TBB_control_consistency_helper() __TBB_full_memory_fence()
51 #define __TBB_acquire_consistency_helper() __TBB_full_memory_fence()
52 #define __TBB_release_consistency_helper() __TBB_full_memory_fence()
53 
54 //--------------------------------------------------
55 // Compare and swap
56 //--------------------------------------------------
57 
65 static inline int32_t __TBB_machine_cmpswp4(volatile void *ptr, int32_t value, int32_t comparand )
66 {
67  int32_t oldval, res;
68 
70 
71  do {
72  __asm__ __volatile__(
73  "ldrex %1, [%3]\n"
74  "mov %0, #0\n"
75  "cmp %1, %4\n"
76  "it eq\n"
77  "strexeq %0, %5, [%3]\n"
78  : "=&r" (res), "=&r" (oldval), "+Qo" (*(volatile int32_t*)ptr)
79  : "r" ((volatile int32_t *)ptr), "Ir" (comparand), "r" (value)
80  : "cc");
81  } while (res);
82 
84 
85  return oldval;
86 }
87 
95 static inline int64_t __TBB_machine_cmpswp8(volatile void *ptr, int64_t value, int64_t comparand )
96 {
97  int64_t oldval;
98  int32_t res;
99 
101 
102  do {
103  __asm__ __volatile__(
104  "mov %0, #0\n"
105  "ldrexd %1, %H1, [%3]\n"
106  "cmp %1, %4\n"
107  "it eq\n"
108  "cmpeq %H1, %H4\n"
109  "it eq\n"
110  "strexdeq %0, %5, %H5, [%3]"
111  : "=&r" (res), "=&r" (oldval), "+Qo" (*(volatile int64_t*)ptr)
112  : "r" ((volatile int64_t *)ptr), "r" (comparand), "r" (value)
113  : "cc");
114  } while (res);
115 
117 
118  return oldval;
119 }
120 
121 static inline int32_t __TBB_machine_fetchadd4(volatile void* ptr, int32_t addend)
122 {
123  unsigned long tmp;
124  int32_t result, tmp2;
125 
127 
128  __asm__ __volatile__(
129 "1: ldrex %0, [%4]\n"
130 " add %3, %0, %5\n"
131 " strex %1, %3, [%4]\n"
132 " cmp %1, #0\n"
133 " bne 1b\n"
134  : "=&r" (result), "=&r" (tmp), "+Qo" (*(volatile int32_t*)ptr), "=&r"(tmp2)
135  : "r" ((volatile int32_t *)ptr), "Ir" (addend)
136  : "cc");
137 
139 
140  return result;
141 }
142 
143 static inline int64_t __TBB_machine_fetchadd8(volatile void *ptr, int64_t addend)
144 {
145  unsigned long tmp;
146  int64_t result, tmp2;
147 
149 
150  __asm__ __volatile__(
151 "1: ldrexd %0, %H0, [%4]\n"
152 " adds %3, %0, %5\n"
153 " adc %H3, %H0, %H5\n"
154 " strexd %1, %3, %H3, [%4]\n"
155 " cmp %1, #0\n"
156 " bne 1b"
157  : "=&r" (result), "=&r" (tmp), "+Qo" (*(volatile int64_t*)ptr), "=&r"(tmp2)
158  : "r" ((volatile int64_t *)ptr), "r" (addend)
159  : "cc");
160 
161 
163 
164  return result;
165 }
166 
167 namespace tbb {
168 namespace internal {
169  template <typename T, size_t S>
170  struct machine_load_store_relaxed {
171  static inline T load ( const volatile T& location ) {
172  const T value = location;
173 
174  /*
175  * An extra memory barrier is required for errata #761319
176  * Please see http://infocenter.arm.com/help/topic/com.arm.doc.uan0004a
177  */
179  return value;
180  }
181 
182  static inline void store ( volatile T& location, T value ) {
183  location = value;
184  }
185  };
186 }} // namespaces internal, tbb
187 
188 // Machine specific atomic operations
189 
190 #define __TBB_CompareAndSwap4(P,V,C) __TBB_machine_cmpswp4(P,V,C)
191 #define __TBB_CompareAndSwap8(P,V,C) __TBB_machine_cmpswp8(P,V,C)
192 
193 // Use generics for some things
194 #define __TBB_USE_GENERIC_PART_WORD_CAS 1
195 #define __TBB_USE_GENERIC_PART_WORD_FETCH_ADD 1
196 #define __TBB_USE_GENERIC_PART_WORD_FETCH_STORE 1
197 #define __TBB_USE_GENERIC_FETCH_STORE 1
198 #define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
199 #define __TBB_USE_GENERIC_DWORD_LOAD_STORE 1
200 #define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
201 #elif defined __aarch64__
202 // Generic gcc implementations are fine for ARMv8-a except __TBB_PAUSE.
203 #include "gcc_generic.h"
204 #else
205 #error compilation requires an ARMv7-a or ARMv8-a architecture.
206 #endif // __ARM_ARCH_7A__
207 
208 inline void __TBB_machine_pause (int32_t delay)
209 {
210  while(delay>0)
211  {
212  __asm__ __volatile__("yield" ::: "memory");
213  delay--;
214  }
215 }
216 #define __TBB_Pause(V) __TBB_machine_pause(V)
#define __TBB_machine_cmpswp4
Definition: ibm_aix51.h:41
#define __TBB_acquire_consistency_helper()
Definition: gcc_generic.h:57
long __TBB_machine_fetchadd4(volatile void *ptr, __int32 addend)
static void store(T &location, T value)
Definition: icc_generic.h:108
static T load(const T &location)
Definition: icc_generic.h:105
static int64_t __TBB_machine_fetchadd8(volatile void *ptr, int64_t addend)
Definition: macos_common.h:113
#define __TBB_machine_cmpswp8
Definition: ibm_aix51.h:42
#define __TBB_full_memory_fence()
Definition: gcc_generic.h:59
void __TBB_machine_pause(int32_t delay)
Definition: gcc_arm.h:208
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long value
The graph class.

Copyright © 2005-2020 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.