Emilio G. Cota 37b995f6e7 target-i386: remove helper_lock()
It's been superseded by the atomic helpers.

The use of the atomic helpers provides a significant performance and scalability
improvement. Below is the result of running the atomic_add-test microbenchmark with:
 $ x86_64-linux-user/qemu-x86_64 tests/atomic_add-bench -o 5000000 -r $r -n $n
, where $n is the number of threads and $r is the allowed range for the additions.

The scenarios measured are:
- atomic: implements x86' ADDL with the atomic_add helper (i.e. this patchset)
- cmpxchg: implement x86' ADDL with a TCG loop using the cmpxchg helper
- master: before this patchset

Results sorted in ascending range, i.e. descending degree of contention.
Y axis is Throughput in Mops/s. Tests are run on an AMD machine with 64
Opteron 6376 cores.

                atomic_add-bench: 5000000 ops/thread, [0,1] range

  25 ++---------+----------+---------+----------+----------+----------+---++
     + atomic +-E--+       +         +          +          +          +    |
     |cmpxchg +-H--+                                                       |
  20 +Emaster +-N--+                                                      ++
     ||                                                                    |
     |++                                                                   |
     ||                                                                    |
  15 +++                                                                  ++
     |N|                                                                   |
     |+|                                                                   |
  10 ++|                                                                  ++
     |+|+                                                                  |
     | |    -+E+------        +++  ---+E+------+E+------+E+-----+E+------+E|
     |+E+E+- +++     +E+------+E+--                                        |
   5 ++|+                                                                 ++
     |+N+H+---                                 +++                         |
     ++++N+--+H++----+++   +  +++  --++H+------+H+------+H++----+H+---+--- |
   0 ++---------+-----H----+---H-----+----------+----------+----------+---H+
     0          10         20        30         40         50         60
                                Number of threads

                atomic_add-bench: 5000000 ops/thread, [0,2] range

  25 ++---------+----------+---------+----------+----------+----------+---++
     ++atomic +-E--+       +         +          +          +          +    |
     |cmpxchg +-H--+                                                       |
  20 ++master +-N--+                                                      ++
     |E|                                                                   |
     |++                                                                   |
     ||E                                                                   |
  15 ++|                                                                  ++
     |N||                                                                  |
     |+||                                   ---+E+------+E+-----+E+------+E|
  10 ++| |        ---+E+------+E+-----+E+---                    +++      +++
     ||H+E+--+E+--                                                         |
     |+++++                                                                |
     | ||                                                                  |
   5 ++|+H+--                                  +++                        ++
     |+N+    -                              ---+H+------+H+------          |
     +  +N+--+H++----+H+---+--+H+----++H+---    +          +    +H+---+--+H|
   0 ++---------+----------+---------+----------+----------+----------+---++
     0          10         20        30         40         50         60
                                Number of threads

                atomic_add-bench: 5000000 ops/thread, [0,8] range

  40 ++---------+----------+---------+----------+----------+----------+---++
     ++atomic +-E--+       +         +          +          +          +    |
  35 +cmpxchg +-H--+                                                      ++
     | master +-N--+               ---+E+------+E+------+E+-----+E+------+E|
  30 ++|                   ---+E+--   +++                                 ++
     | |            -+E+---                                                |
  25 ++E        ---- +++                                                  ++
     |+++++ -+E+                                                           |
  20 +E+ E-- +++                                                          ++
     |H|+++                                                                |
     |+|                                       +H+-------                  |
  15 ++H+                                   ---+++      +H+------         ++
     |N++H+--                         +++---                    +H+------++|
  10 ++ +++  -       +++           ---+H+                       +++      +H+
     | |     +H+-----+H+------+H+--                                        |
   5 ++|                      +++                                         ++
     ++N+N+--+N++          +         +          +          +          +    |
   0 ++---------+----------+---------+----------+----------+----------+---++
     0          10         20        30         40         50         60
                                Number of threads

               atomic_add-bench: 5000000 ops/thread, [0,128] range

  160 ++---------+---------+----------+---------+----------+----------+---++
      + atomic +-E--+      +          +         +          +          +    |
  140 +cmpxchg +-H--+                          +++      +++               ++
      | master +-N--+                           E--------E------+E+------++|
  120 ++                                      --|        |      +++       E+
      |                                     -- +++      +++              ++|
  100 ++                                   -                              ++
      |                                +++-                     +++      ++|
   80 ++                              -+E+    -+H+------+H+------H--------++
      |                           ----    ----                  +++       H|
      |            ---+E+-----+E+-  ---+H+                               ++|
   60 ++     +E+---   +++  ---+H+---                                      ++
      |    --+++   ---+H+--                                                |
   40 ++ +E+-+H+---                                                       ++
      |  +H+                                                               |
   20 +EE+                                                                ++
      +N+        +         +          +         +          +          +    |
    0 ++N-N---N--+---------+----------+---------+----------+----------+---++
      0          10        20         30        40         50         60
                                Number of threads

              atomic_add-bench: 5000000 ops/thread, [0,1024] range

  350 ++---------+---------+----------+---------+----------+----------+---++
      + atomic +-E--+      +          +         +          +          +    |
  300 +cmpxchg +-H--+                                                    +++
      | master +-N--+                                           +++       ||
      |                                                 +++      |    ----E|
  250 ++                                                 |   ----E----    ++
      |                                              ----E---    |    ---+H|
  200 ++                                      -+E+---   +++  ---+H+---    ++
      |                                   ----         -+H+--              |
      |                                +E+     +++ ---- +++                |
  150 ++                            ---+++  ---+H+-                       ++
      |                          ---  -+H+--                               |
  100 ++                   ---+E+ ---- +++                                ++
      |      +++   ---+E+-----+H+-                                         |
      |     -+E+------+H+--                                                |
   50 ++ +E+                                                              ++
      +EE+       +         +          +         +          +          +    |
    0 ++N-N---N--+---------+----------+---------+----------+----------+---++
      0          10        20         30        40         50         60
                                Number of threads

  hi-res: http://imgur.com/a/fMRmq

For master I stopped measuring master after 8 threads, because there is little
point in measuring the well-known performance collapse of a contended lock.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Emilio G. Cota <cota@braap.org>
Message-Id: <1467054136-10430-21-git-send-email-cota@braap.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-10-26 08:29:01 -07:00

231 lines
8.1 KiB
C

DEF_HELPER_FLAGS_4(cc_compute_all, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl, int)
DEF_HELPER_FLAGS_4(cc_compute_c, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl, int)
DEF_HELPER_3(write_eflags, void, env, tl, i32)
DEF_HELPER_1(read_eflags, tl, env)
DEF_HELPER_2(divb_AL, void, env, tl)
DEF_HELPER_2(idivb_AL, void, env, tl)
DEF_HELPER_2(divw_AX, void, env, tl)
DEF_HELPER_2(idivw_AX, void, env, tl)
DEF_HELPER_2(divl_EAX, void, env, tl)
DEF_HELPER_2(idivl_EAX, void, env, tl)
#ifdef TARGET_X86_64
DEF_HELPER_2(divq_EAX, void, env, tl)
DEF_HELPER_2(idivq_EAX, void, env, tl)
#endif
DEF_HELPER_FLAGS_2(cr4_testbit, TCG_CALL_NO_WG, void, env, i32)
DEF_HELPER_FLAGS_2(bndck, TCG_CALL_NO_WG, void, env, i32)
DEF_HELPER_FLAGS_3(bndldx32, TCG_CALL_NO_WG, i64, env, tl, tl)
DEF_HELPER_FLAGS_3(bndldx64, TCG_CALL_NO_WG, i64, env, tl, tl)
DEF_HELPER_FLAGS_5(bndstx32, TCG_CALL_NO_WG, void, env, tl, tl, i64, i64)
DEF_HELPER_FLAGS_5(bndstx64, TCG_CALL_NO_WG, void, env, tl, tl, i64, i64)
DEF_HELPER_1(bnd_jmp, void, env)
DEF_HELPER_2(aam, void, env, int)
DEF_HELPER_2(aad, void, env, int)
DEF_HELPER_1(aaa, void, env)
DEF_HELPER_1(aas, void, env)
DEF_HELPER_1(daa, void, env)
DEF_HELPER_1(das, void, env)
DEF_HELPER_2(lsl, tl, env, tl)
DEF_HELPER_2(lar, tl, env, tl)
DEF_HELPER_2(verr, void, env, tl)
DEF_HELPER_2(verw, void, env, tl)
DEF_HELPER_2(lldt, void, env, int)
DEF_HELPER_2(ltr, void, env, int)
DEF_HELPER_3(load_seg, void, env, int, int)
DEF_HELPER_4(ljmp_protected, void, env, int, tl, tl)
DEF_HELPER_5(lcall_real, void, env, int, tl, int, int)
DEF_HELPER_5(lcall_protected, void, env, int, tl, int, tl)
DEF_HELPER_2(iret_real, void, env, int)
DEF_HELPER_3(iret_protected, void, env, int, int)
DEF_HELPER_3(lret_protected, void, env, int, int)
DEF_HELPER_2(read_crN, tl, env, int)
DEF_HELPER_3(write_crN, void, env, int, tl)
DEF_HELPER_2(lmsw, void, env, tl)
DEF_HELPER_1(clts, void, env)
DEF_HELPER_FLAGS_3(set_dr, TCG_CALL_NO_WG, void, env, int, tl)
DEF_HELPER_FLAGS_2(get_dr, TCG_CALL_NO_WG, tl, env, int)
DEF_HELPER_2(invlpg, void, env, tl)
DEF_HELPER_1(sysenter, void, env)
DEF_HELPER_2(sysexit, void, env, int)
#ifdef TARGET_X86_64
DEF_HELPER_2(syscall, void, env, int)
DEF_HELPER_2(sysret, void, env, int)
#endif
DEF_HELPER_2(hlt, void, env, int)
DEF_HELPER_2(monitor, void, env, tl)
DEF_HELPER_2(mwait, void, env, int)
DEF_HELPER_2(pause, void, env, int)
DEF_HELPER_1(debug, void, env)
DEF_HELPER_1(reset_rf, void, env)
DEF_HELPER_3(raise_interrupt, void, env, int, int)
DEF_HELPER_2(raise_exception, void, env, int)
DEF_HELPER_1(cli, void, env)
DEF_HELPER_1(sti, void, env)
DEF_HELPER_1(clac, void, env)
DEF_HELPER_1(stac, void, env)
DEF_HELPER_3(boundw, void, env, tl, int)
DEF_HELPER_3(boundl, void, env, tl, int)
DEF_HELPER_1(rsm, void, env)
DEF_HELPER_2(into, void, env, int)
DEF_HELPER_2(cmpxchg8b_unlocked, void, env, tl)
DEF_HELPER_2(cmpxchg8b, void, env, tl)
#ifdef TARGET_X86_64
DEF_HELPER_2(cmpxchg16b_unlocked, void, env, tl)
DEF_HELPER_2(cmpxchg16b, void, env, tl)
#endif
DEF_HELPER_1(single_step, void, env)
DEF_HELPER_1(cpuid, void, env)
DEF_HELPER_1(rdtsc, void, env)
DEF_HELPER_1(rdtscp, void, env)
DEF_HELPER_1(rdpmc, void, env)
DEF_HELPER_1(rdmsr, void, env)
DEF_HELPER_1(wrmsr, void, env)
DEF_HELPER_2(check_iob, void, env, i32)
DEF_HELPER_2(check_iow, void, env, i32)
DEF_HELPER_2(check_iol, void, env, i32)
DEF_HELPER_3(outb, void, env, i32, i32)
DEF_HELPER_2(inb, tl, env, i32)
DEF_HELPER_3(outw, void, env, i32, i32)
DEF_HELPER_2(inw, tl, env, i32)
DEF_HELPER_3(outl, void, env, i32, i32)
DEF_HELPER_2(inl, tl, env, i32)
DEF_HELPER_FLAGS_4(bpt_io, TCG_CALL_NO_WG, void, env, i32, i32, tl)
DEF_HELPER_3(svm_check_intercept_param, void, env, i32, i64)
DEF_HELPER_3(vmexit, void, env, i32, i64)
DEF_HELPER_4(svm_check_io, void, env, i32, i32, i32)
DEF_HELPER_3(vmrun, void, env, int, int)
DEF_HELPER_1(vmmcall, void, env)
DEF_HELPER_2(vmload, void, env, int)
DEF_HELPER_2(vmsave, void, env, int)
DEF_HELPER_1(stgi, void, env)
DEF_HELPER_1(clgi, void, env)
DEF_HELPER_1(skinit, void, env)
DEF_HELPER_2(invlpga, void, env, int)
/* x86 FPU */
DEF_HELPER_2(flds_FT0, void, env, i32)
DEF_HELPER_2(fldl_FT0, void, env, i64)
DEF_HELPER_2(fildl_FT0, void, env, s32)
DEF_HELPER_2(flds_ST0, void, env, i32)
DEF_HELPER_2(fldl_ST0, void, env, i64)
DEF_HELPER_2(fildl_ST0, void, env, s32)
DEF_HELPER_2(fildll_ST0, void, env, s64)
DEF_HELPER_1(fsts_ST0, i32, env)
DEF_HELPER_1(fstl_ST0, i64, env)
DEF_HELPER_1(fist_ST0, s32, env)
DEF_HELPER_1(fistl_ST0, s32, env)
DEF_HELPER_1(fistll_ST0, s64, env)
DEF_HELPER_1(fistt_ST0, s32, env)
DEF_HELPER_1(fisttl_ST0, s32, env)
DEF_HELPER_1(fisttll_ST0, s64, env)
DEF_HELPER_2(fldt_ST0, void, env, tl)
DEF_HELPER_2(fstt_ST0, void, env, tl)
DEF_HELPER_1(fpush, void, env)
DEF_HELPER_1(fpop, void, env)
DEF_HELPER_1(fdecstp, void, env)
DEF_HELPER_1(fincstp, void, env)
DEF_HELPER_2(ffree_STN, void, env, int)
DEF_HELPER_1(fmov_ST0_FT0, void, env)
DEF_HELPER_2(fmov_FT0_STN, void, env, int)
DEF_HELPER_2(fmov_ST0_STN, void, env, int)
DEF_HELPER_2(fmov_STN_ST0, void, env, int)
DEF_HELPER_2(fxchg_ST0_STN, void, env, int)
DEF_HELPER_1(fcom_ST0_FT0, void, env)
DEF_HELPER_1(fucom_ST0_FT0, void, env)
DEF_HELPER_1(fcomi_ST0_FT0, void, env)
DEF_HELPER_1(fucomi_ST0_FT0, void, env)
DEF_HELPER_1(fadd_ST0_FT0, void, env)
DEF_HELPER_1(fmul_ST0_FT0, void, env)
DEF_HELPER_1(fsub_ST0_FT0, void, env)
DEF_HELPER_1(fsubr_ST0_FT0, void, env)
DEF_HELPER_1(fdiv_ST0_FT0, void, env)
DEF_HELPER_1(fdivr_ST0_FT0, void, env)
DEF_HELPER_2(fadd_STN_ST0, void, env, int)
DEF_HELPER_2(fmul_STN_ST0, void, env, int)
DEF_HELPER_2(fsub_STN_ST0, void, env, int)
DEF_HELPER_2(fsubr_STN_ST0, void, env, int)
DEF_HELPER_2(fdiv_STN_ST0, void, env, int)
DEF_HELPER_2(fdivr_STN_ST0, void, env, int)
DEF_HELPER_1(fchs_ST0, void, env)
DEF_HELPER_1(fabs_ST0, void, env)
DEF_HELPER_1(fxam_ST0, void, env)
DEF_HELPER_1(fld1_ST0, void, env)
DEF_HELPER_1(fldl2t_ST0, void, env)
DEF_HELPER_1(fldl2e_ST0, void, env)
DEF_HELPER_1(fldpi_ST0, void, env)
DEF_HELPER_1(fldlg2_ST0, void, env)
DEF_HELPER_1(fldln2_ST0, void, env)
DEF_HELPER_1(fldz_ST0, void, env)
DEF_HELPER_1(fldz_FT0, void, env)
DEF_HELPER_1(fnstsw, i32, env)
DEF_HELPER_1(fnstcw, i32, env)
DEF_HELPER_2(fldcw, void, env, i32)
DEF_HELPER_1(fclex, void, env)
DEF_HELPER_1(fwait, void, env)
DEF_HELPER_1(fninit, void, env)
DEF_HELPER_2(fbld_ST0, void, env, tl)
DEF_HELPER_2(fbst_ST0, void, env, tl)
DEF_HELPER_1(f2xm1, void, env)
DEF_HELPER_1(fyl2x, void, env)
DEF_HELPER_1(fptan, void, env)
DEF_HELPER_1(fpatan, void, env)
DEF_HELPER_1(fxtract, void, env)
DEF_HELPER_1(fprem1, void, env)
DEF_HELPER_1(fprem, void, env)
DEF_HELPER_1(fyl2xp1, void, env)
DEF_HELPER_1(fsqrt, void, env)
DEF_HELPER_1(fsincos, void, env)
DEF_HELPER_1(frndint, void, env)
DEF_HELPER_1(fscale, void, env)
DEF_HELPER_1(fsin, void, env)
DEF_HELPER_1(fcos, void, env)
DEF_HELPER_3(fstenv, void, env, tl, int)
DEF_HELPER_3(fldenv, void, env, tl, int)
DEF_HELPER_3(fsave, void, env, tl, int)
DEF_HELPER_3(frstor, void, env, tl, int)
DEF_HELPER_FLAGS_2(fxsave, TCG_CALL_NO_WG, void, env, tl)
DEF_HELPER_FLAGS_2(fxrstor, TCG_CALL_NO_WG, void, env, tl)
DEF_HELPER_FLAGS_3(xsave, TCG_CALL_NO_WG, void, env, tl, i64)
DEF_HELPER_FLAGS_3(xsaveopt, TCG_CALL_NO_WG, void, env, tl, i64)
DEF_HELPER_FLAGS_3(xrstor, TCG_CALL_NO_WG, void, env, tl, i64)
DEF_HELPER_FLAGS_2(xgetbv, TCG_CALL_NO_WG, i64, env, i32)
DEF_HELPER_FLAGS_3(xsetbv, TCG_CALL_NO_WG, void, env, i32, i64)
DEF_HELPER_FLAGS_2(rdpkru, TCG_CALL_NO_WG, i64, env, i32)
DEF_HELPER_FLAGS_3(wrpkru, TCG_CALL_NO_WG, void, env, i32, i64)
DEF_HELPER_FLAGS_1(clz, TCG_CALL_NO_RWG_SE, tl, tl)
DEF_HELPER_FLAGS_1(ctz, TCG_CALL_NO_RWG_SE, tl, tl)
DEF_HELPER_FLAGS_2(pdep, TCG_CALL_NO_RWG_SE, tl, tl, tl)
DEF_HELPER_FLAGS_2(pext, TCG_CALL_NO_RWG_SE, tl, tl, tl)
/* MMX/SSE */
DEF_HELPER_2(ldmxcsr, void, env, i32)
DEF_HELPER_1(enter_mmx, void, env)
DEF_HELPER_1(emms, void, env)
DEF_HELPER_3(movq, void, env, ptr, ptr)
#define SHIFT 0
#include "ops_sse_header.h"
#define SHIFT 1
#include "ops_sse_header.h"
DEF_HELPER_3(rclb, tl, env, tl, tl)
DEF_HELPER_3(rclw, tl, env, tl, tl)
DEF_HELPER_3(rcll, tl, env, tl, tl)
DEF_HELPER_3(rcrb, tl, env, tl, tl)
DEF_HELPER_3(rcrw, tl, env, tl, tl)
DEF_HELPER_3(rcrl, tl, env, tl, tl)
#ifdef TARGET_X86_64
DEF_HELPER_3(rclq, tl, env, tl, tl)
DEF_HELPER_3(rcrq, tl, env, tl, tl)
#endif