target/aarch64: optimize indirect branches

Measurements:

[Baseline performance is that before applying this and the previous commit]

-                                    NBench, aarch64-softmmu. Host: Intel i7-4790K @ 4.00GHz

 1.7x +-+--------------------------------------------------------------------------------------------------------------+-+
      |                                                                                                                  |
      |   cross                                                                                                          |
 1.6x +cross+jr.................................................####...................................................+-+
      |                                                         #++#                                                     |
      |                                                         #  #                                                     |
 1.5x +-+...................................................*****..#...................................................+-+
      |                                                     *+++*  #                                                     |
      |                                                     *   *  #                                                     |
 1.4x +-+...................................................*...*..#...................................................+-+
      |                                                     *   *  #                                                     |
      |                                     #####           *   *  #                                                     |
 1.3x +-+................................****+++#...........*...*..#...................................................+-+
      |                                  *++*   #           *   *  #                                                     |
      |                                  *  *   #           *   *  #                                                     |
 1.2x +-+................................*..*...#...........*...*..#...................................................+-+
      |                                  *  *   #           *   *  #                                                     |
      |                            ####  *  *   #           *   *  #                                                     |
 1.1x +-+.......................+++#..#..*..*...#...........*...*..#...................................................+-+
      |                         ****  #  *  *   #           *   *  #                                        ****####     |
      |                         *  *  #  *  *   #           *   *  #  ****###   +++####            ****###  *  *   #     |
   1x +-++-++++++-++++****###++-*++*++#++*++*+-+#++****+++++*+++*++#++*++*-+#++*****++#++****###-++*++*-+#++*+-*+++#+-++-+
      |     *****###  *  *  #   *  *  #  *  *   #  *++*###  *   *  #  *  *  #  *   *  #  *  *++#   *  *  #  *  *   #     |
      |     *   *++#  *  *  #   *  *  #  *  *   #  *  *  #  *   *  #  *  *  #  *   *  #  *  *  #   *  *  #  *  *   #     |
 0.9x +-+---*****###--****###---****###--****####--****###--*****###--****###--*****###--****###---****###--****####---+-+
      ASSIGNMENT BITFIELD   FOURFP EMULATION   HUFFMAN   LU DECOMPOSITIONNEURAL NUMERIC SORSTRING SORT    hmean
  png: http://imgur.com/qO9ubtk
NB. cross here represents the previous commit.

-                            SPECint06 (test set), aarch64-linux-user. Host: Intel i7-4790K @ 4.00GHz

 1.5x +-+--------------------------------------------------------------------------------------------------------------+-+
      |                                                                       *****                                      |
      |                                                                       *+++*                           jr         |
      |                                                                       *   *                                      |
 1.4x +-+.....................................................................*...*.....................+++............+-+
      |                                                                       *   *                      |               |
      |                                      *****                            *   *                      |               |
      |                                      *   *                            *   *                    *****             |
 1.3x +-+....................................*...*............................*...*....................*.|.*...........+-+
      |                       +++            *   *                            *   *                    * | *             |
      |                      *****           *   *                            *   *                    *+++*             |
      |                      *   *           *   *                            *   *                    *   *             |
 1.2x +-+....................*...*...........*...*............................*...*...........*****....*...*...........+-+
      |     *****            *   *           *   *                            *   *           *   *    *   *    +++      |
      |     *   *            *   *           *   *                            *   *           *   *    *   *   *****     |
      |     *   *            *   *   *****   *   *                            *   *           *   *    *   *   *   *     |
 1.1x +-+...*...*............*...*...*...*...*...*............................*...*....+++....*...*....*...*...*...*...+-+
      |     *   *            *   *   *   *   *   *                            *   *   *****   *   *    *   *   *   *     |
      |     *   *            *   *   *   *   *   *   *****                    *   *   *   *   *   *    *   *   *   *     |
      |     *   *   *****    *   *   *   *   *   *   *   *   ******           *   *   *   *   *   *    *   *   *   *     |
   1x +-++-+*+++*-++*+++*++++*+-+*+++*-++*+++*-++*+++*+++*++-*++++*-++*****+++*++-*+++*++-*+++*+-+*++++*+++*++-*+++*+-++-+
      |     *   *   *   *    *   *   *   *   *   *   *   *   *    *   *+++*   *   *   *   *   *   *    *   *   *   *     |
      |     *   *   *   *    *   *   *   *   *   *   *   *   *    *   *   *   *   *   *   *   *   *    *   *   *   *     |
      |     *   *   *   *    *   *   *   *   *   *   *   *   *    *   *   *   *   *   *   *   *   *    *   *   *   *     |
 0.9x +-+---*****---*****----*****---*****---*****---*****---******---*****---*****---*****---*****----*****---*****---+-+
         astar   bzip2      gcc   gobmk h264ref   hmmlibquantum      mcf omnetpperlbench   sjengxalancbmk   hmean
  png: http://imgur.com/3Dp4vvq

-                           SPECint06 (train set), aarch64-linux-user. Host: Intel i7-4790K @ 4.00GHz

 1.7x +-+--------------------------------------------------------------------------------------------------------------+-+
      |                                                                                                                  |
      |                                                                                                       jr         |
 1.6x +-+...............................................................................................+++............+-+
      |                                                                                                *****             |
      |                                                                                                *+++*             |
      |                                                                                                *   *             |
 1.5x +-+..............................................................................................*...*...........+-+
      |                                                                        +++                     *   *             |
      |                                                                       *****                    *   *             |
 1.4x +-+.....................................................................*+++*....................*...*...........+-+
      |                                                                       *   *                    *   *             |
      |                                      *****                            *   *                    *   *             |
      |                                      *   *                            *   *   *****            *   *             |
 1.3x +-+....................................*...*............................*...*...*...*............*...*...........+-+
      |                       +++            *   *                            *   *   *   *            *   *             |
      |                      *****           *   *                            *   *   *   *   *****    *   *             |
 1.2x +-+....................*...*...........*...*............................*...*...*...*...*+++*....*...*...*****...+-+
      |                      *   *           *   *                            *   *   *   *   *   *    *   *   *+++*     |
      |     *****            *   *   *****   *   *                            *   *   *   *   *   *    *   *   *   *     |
      |     *   *            *   *   *+++*   *   *                            *   *   *   *   *   *    *   *   *   *     |
 1.1x +-+...*...*............*...*...*...*...*...*............................*...*...*...*...*...*....*...*...*...*...+-+
      |     *   *   *****    *   *   *   *   *   *                    *****   *   *   *   *   *   *    *   *   *   *     |
      |     *   *   *   *    *   *   *   *   *   *    +++    ******   *+++*   *   *   *   *   *   *    *   *   *   *     |
   1x +-+---*****---*****----*****---*****---*****---*****---******---*****---*****---*****---*****----*****---*****---+-+
         astar   bzip2      gcc   gobmk h264ref   hmmlibquantum      mcf omnetpperlbench   sjengxalancbmk   hmean
  png: http://imgur.com/vRrdc9j

Signed-off-by: Emilio G. Cota <cota@braap.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
This commit is contained in:
Emilio G. Cota 2017-04-28 14:59:23 -04:00 committed by Richard Henderson
parent e78722368c
commit e75449a346

View File

@ -11367,8 +11367,7 @@ void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb)
gen_a64_set_pc_im(dc->pc);
/* fall through */
case DISAS_JUMP:
/* indicate that the hash table must be used to find the next TB */
tcg_gen_exit_tb(0);
tcg_gen_lookup_and_goto_ptr(cpu_pc);
break;
case DISAS_TB_JUMP:
case DISAS_EXC: