From 554fd8c5195424bdbcabf5de30fdc183aba391bd Mon Sep 17 00:00:00 2001 From: upstream source tree Date: Sun, 15 Mar 2015 20:14:05 -0400 Subject: obtained gcc-4.6.4.tar.bz2 from upstream website; verified gcc-4.6.4.tar.bz2.sig; imported gcc-4.6.4 source tree from verified upstream tarball. downloading a git-generated archive based on the 'upstream' tag should provide you with a source tree that is binary identical to the one extracted from the above tarball. if you have obtained the source via the command 'git clone', however, do note that line-endings of files in your working directory might differ from line-endings of the respective files in the upstream repository. --- gcc/sel-sched.c | 7760 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 7760 insertions(+) create mode 100644 gcc/sel-sched.c (limited to 'gcc/sel-sched.c') diff --git a/gcc/sel-sched.c b/gcc/sel-sched.c new file mode 100644 index 000000000..bc390d581 --- /dev/null +++ b/gcc/sel-sched.c @@ -0,0 +1,7760 @@ +/* Instruction scheduling pass. Selective scheduler and pipeliner. + Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 + Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl-error.h" +#include "tm_p.h" +#include "hard-reg-set.h" +#include "regs.h" +#include "function.h" +#include "flags.h" +#include "insn-config.h" +#include "insn-attr.h" +#include "except.h" +#include "recog.h" +#include "params.h" +#include "target.h" +#include "output.h" +#include "timevar.h" +#include "tree-pass.h" +#include "sched-int.h" +#include "ggc.h" +#include "tree.h" +#include "vec.h" +#include "langhooks.h" +#include "rtlhooks-def.h" +#include "output.h" +#include "emit-rtl.h" +#include "ira.h" + +#ifdef INSN_SCHEDULING +#include "sel-sched-ir.h" +#include "sel-sched-dump.h" +#include "sel-sched.h" +#include "dbgcnt.h" + +/* Implementation of selective scheduling approach. + The below implementation follows the original approach with the following + changes: + + o the scheduler works after register allocation (but can be also tuned + to work before RA); + o some instructions are not copied or register renamed; + o conditional jumps are not moved with code duplication; + o several jumps in one parallel group are not supported; + o when pipelining outer loops, code motion through inner loops + is not supported; + o control and data speculation are supported; + o some improvements for better compile time/performance were made. + + Terminology + =========== + + A vinsn, or virtual insn, is an insn with additional data characterizing + insn pattern, such as LHS, RHS, register sets used/set/clobbered, etc. + Vinsns also act as smart pointers to save memory by reusing them in + different expressions. A vinsn is described by vinsn_t type. + + An expression is a vinsn with additional data characterizing its properties + at some point in the control flow graph. The data may be its usefulness, + priority, speculative status, whether it was renamed/subsituted, etc. + An expression is described by expr_t type. + + Availability set (av_set) is a set of expressions at a given control flow + point. It is represented as av_set_t. The expressions in av sets are kept + sorted in the terms of expr_greater_p function. It allows to truncate + the set while leaving the best expressions. + + A fence is a point through which code motion is prohibited. On each step, + we gather a parallel group of insns at a fence. It is possible to have + multiple fences. A fence is represented via fence_t. + + A boundary is the border between the fence group and the rest of the code. + Currently, we never have more than one boundary per fence, as we finalize + the fence group when a jump is scheduled. A boundary is represented + via bnd_t. + + High-level overview + =================== + + The scheduler finds regions to schedule, schedules each one, and finalizes. + The regions are formed starting from innermost loops, so that when the inner + loop is pipelined, its prologue can be scheduled together with yet unprocessed + outer loop. The rest of acyclic regions are found using extend_rgns: + the blocks that are not yet allocated to any regions are traversed in top-down + order, and a block is added to a region to which all its predecessors belong; + otherwise, the block starts its own region. + + The main scheduling loop (sel_sched_region_2) consists of just + scheduling on each fence and updating fences. For each fence, + we fill a parallel group of insns (fill_insns) until some insns can be added. + First, we compute available exprs (av-set) at the boundary of the current + group. Second, we choose the best expression from it. If the stall is + required to schedule any of the expressions, we advance the current cycle + appropriately. So, the final group does not exactly correspond to a VLIW + word. Third, we move the chosen expression to the boundary (move_op) + and update the intermediate av sets and liveness sets. We quit fill_insns + when either no insns left for scheduling or we have scheduled enough insns + so we feel like advancing a scheduling point. + + Computing available expressions + =============================== + + The computation (compute_av_set) is a bottom-up traversal. At each insn, + we're moving the union of its successors' sets through it via + moveup_expr_set. The dependent expressions are removed. Local + transformations (substitution, speculation) are applied to move more + exprs. Then the expr corresponding to the current insn is added. + The result is saved on each basic block header. + + When traversing the CFG, we're moving down for no more than max_ws insns. + Also, we do not move down to ineligible successors (is_ineligible_successor), + which include moving along a back-edge, moving to already scheduled code, + and moving to another fence. The first two restrictions are lifted during + pipelining, which allows us to move insns along a back-edge. We always have + an acyclic region for scheduling because we forbid motion through fences. + + Choosing the best expression + ============================ + + We sort the final availability set via sel_rank_for_schedule, then we remove + expressions which are not yet ready (tick_check_p) or which dest registers + cannot be used. For some of them, we choose another register via + find_best_reg. To do this, we run find_used_regs to calculate the set of + registers which cannot be used. The find_used_regs function performs + a traversal of code motion paths for an expr. We consider for renaming + only registers which are from the same regclass as the original one and + using which does not interfere with any live ranges. Finally, we convert + the resulting set to the ready list format and use max_issue and reorder* + hooks similarly to the Haifa scheduler. + + Scheduling the best expression + ============================== + + We run the move_op routine to perform the same type of code motion paths + traversal as in find_used_regs. (These are working via the same driver, + code_motion_path_driver.) When moving down the CFG, we look for original + instruction that gave birth to a chosen expression. We undo + the transformations performed on an expression via the history saved in it. + When found, we remove the instruction or leave a reg-reg copy/speculation + check if needed. On a way up, we insert bookkeeping copies at each join + point. If a copy is not needed, it will be removed later during this + traversal. We update the saved av sets and liveness sets on the way up, too. + + Finalizing the schedule + ======================= + + When pipelining, we reschedule the blocks from which insns were pipelined + to get a tighter schedule. On Itanium, we also perform bundling via + the same routine from ia64.c. + + Dependence analysis changes + =========================== + + We augmented the sched-deps.c with hooks that get called when a particular + dependence is found in a particular part of an insn. Using these hooks, we + can do several actions such as: determine whether an insn can be moved through + another (has_dependence_p, moveup_expr); find out whether an insn can be + scheduled on the current cycle (tick_check_p); find out registers that + are set/used/clobbered by an insn and find out all the strange stuff that + restrict its movement, like SCHED_GROUP_P or CANT_MOVE (done in + init_global_and_expr_for_insn). + + Initialization changes + ====================== + + There are parts of haifa-sched.c, sched-deps.c, and sched-rgn.c that are + reused in all of the schedulers. We have split up the initialization of data + of such parts into different functions prefixed with scheduler type and + postfixed with the type of data initialized: {,sel_,haifa_}sched_{init,finish}, + sched_rgn_init/finish, sched_deps_init/finish, sched_init_{luids/bbs}, etc. + The same splitting is done with current_sched_info structure: + dependence-related parts are in sched_deps_info, common part is in + common_sched_info, and haifa/sel/etc part is in current_sched_info. + + Target contexts + =============== + + As we now have multiple-point scheduling, this would not work with backends + which save some of the scheduler state to use it in the target hooks. + For this purpose, we introduce a concept of target contexts, which + encapsulate such information. The backend should implement simple routines + of allocating/freeing/setting such a context. The scheduler calls these + as target hooks and handles the target context as an opaque pointer (similar + to the DFA state type, state_t). + + Various speedups + ================ + + As the correct data dependence graph is not supported during scheduling (which + is to be changed in mid-term), we cache as much of the dependence analysis + results as possible to avoid reanalyzing. This includes: bitmap caches on + each insn in stream of the region saying yes/no for a query with a pair of + UIDs; hashtables with the previously done transformations on each insn in + stream; a vector keeping a history of transformations on each expr. + + Also, we try to minimize the dependence context used on each fence to check + whether the given expression is ready for scheduling by removing from it + insns that are definitely completed the execution. The results of + tick_check_p checks are also cached in a vector on each fence. + + We keep a valid liveness set on each insn in a region to avoid the high + cost of recomputation on large basic blocks. + + Finally, we try to minimize the number of needed updates to the availability + sets. The updates happen in two cases: when fill_insns terminates, + we advance all fences and increase the stage number to show that the region + has changed and the sets are to be recomputed; and when the next iteration + of a loop in fill_insns happens (but this one reuses the saved av sets + on bb headers.) Thus, we try to break the fill_insns loop only when + "significant" number of insns from the current scheduling window was + scheduled. This should be made a target param. + + + TODO: correctly support the data dependence graph at all stages and get rid + of all caches. This should speed up the scheduler. + TODO: implement moving cond jumps with bookkeeping copies on both targets. + TODO: tune the scheduler before RA so it does not create too much pseudos. + + + References: + S.-M. Moon and K. Ebcioglu. Parallelizing nonnumerical code with + selective scheduling and software pipelining. + ACM TOPLAS, Vol 19, No. 6, pages 853--898, Nov. 1997. + + Andrey Belevantsev, Maxim Kuvyrkov, Vladimir Makarov, Dmitry Melnik, + and Dmitry Zhurikhin. An interblock VLIW-targeted instruction scheduler + for GCC. In Proceedings of GCC Developers' Summit 2006. + + Arutyun Avetisyan, Andrey Belevantsev, and Dmitry Melnik. GCC Instruction + Scheduler and Software Pipeliner on the Itanium Platform. EPIC-7 Workshop. + http://rogue.colorado.edu/EPIC7/. + +*/ + +/* True when pipelining is enabled. */ +bool pipelining_p; + +/* True if bookkeeping is enabled. */ +bool bookkeeping_p; + +/* Maximum number of insns that are eligible for renaming. */ +int max_insns_to_rename; + + +/* Definitions of local types and macros. */ + +/* Represents possible outcomes of moving an expression through an insn. */ +enum MOVEUP_EXPR_CODE + { + /* The expression is not changed. */ + MOVEUP_EXPR_SAME, + + /* Not changed, but requires a new destination register. */ + MOVEUP_EXPR_AS_RHS, + + /* Cannot be moved. */ + MOVEUP_EXPR_NULL, + + /* Changed (substituted or speculated). */ + MOVEUP_EXPR_CHANGED + }; + +/* The container to be passed into rtx search & replace functions. */ +struct rtx_search_arg +{ + /* What we are searching for. */ + rtx x; + + /* The occurence counter. */ + int n; +}; + +typedef struct rtx_search_arg *rtx_search_arg_p; + +/* This struct contains precomputed hard reg sets that are needed when + computing registers available for renaming. */ +struct hard_regs_data +{ + /* For every mode, this stores registers available for use with + that mode. */ + HARD_REG_SET regs_for_mode[NUM_MACHINE_MODES]; + + /* True when regs_for_mode[mode] is initialized. */ + bool regs_for_mode_ok[NUM_MACHINE_MODES]; + + /* For every register, it has regs that are ok to rename into it. + The register in question is always set. If not, this means + that the whole set is not computed yet. */ + HARD_REG_SET regs_for_rename[FIRST_PSEUDO_REGISTER]; + + /* For every mode, this stores registers not available due to + call clobbering. */ + HARD_REG_SET regs_for_call_clobbered[NUM_MACHINE_MODES]; + + /* All registers that are used or call used. */ + HARD_REG_SET regs_ever_used; + +#ifdef STACK_REGS + /* Stack registers. */ + HARD_REG_SET stack_regs; +#endif +}; + +/* Holds the results of computation of available for renaming and + unavailable hard registers. */ +struct reg_rename +{ + /* These are unavailable due to calls crossing, globalness, etc. */ + HARD_REG_SET unavailable_hard_regs; + + /* These are *available* for renaming. */ + HARD_REG_SET available_for_renaming; + + /* Whether this code motion path crosses a call. */ + bool crosses_call; +}; + +/* A global structure that contains the needed information about harg + regs. */ +static struct hard_regs_data sel_hrd; + + +/* This structure holds local data used in code_motion_path_driver hooks on + the same or adjacent levels of recursion. Here we keep those parameters + that are not used in code_motion_path_driver routine itself, but only in + its hooks. Moreover, all parameters that can be modified in hooks are + in this structure, so all other parameters passed explicitly to hooks are + read-only. */ +struct cmpd_local_params +{ + /* Local params used in move_op_* functions. */ + + /* Edges for bookkeeping generation. */ + edge e1, e2; + + /* C_EXPR merged from all successors and locally allocated temporary C_EXPR. */ + expr_t c_expr_merged, c_expr_local; + + /* Local params used in fur_* functions. */ + /* Copy of the ORIGINAL_INSN list, stores the original insns already + found before entering the current level of code_motion_path_driver. */ + def_list_t old_original_insns; + + /* Local params used in move_op_* functions. */ + /* True when we have removed last insn in the block which was + also a boundary. Do not update anything or create bookkeeping copies. */ + BOOL_BITFIELD removed_last_insn : 1; +}; + +/* Stores the static parameters for move_op_* calls. */ +struct moveop_static_params +{ + /* Destination register. */ + rtx dest; + + /* Current C_EXPR. */ + expr_t c_expr; + + /* An UID of expr_vliw which is to be moved up. If we find other exprs, + they are to be removed. */ + int uid; + +#ifdef ENABLE_CHECKING + /* This is initialized to the insn on which the driver stopped its traversal. */ + insn_t failed_insn; +#endif + + /* True if we scheduled an insn with different register. */ + bool was_renamed; +}; + +/* Stores the static parameters for fur_* calls. */ +struct fur_static_params +{ + /* Set of registers unavailable on the code motion path. */ + regset used_regs; + + /* Pointer to the list of original insns definitions. */ + def_list_t *original_insns; + + /* True if a code motion path contains a CALL insn. */ + bool crosses_call; +}; + +typedef struct fur_static_params *fur_static_params_p; +typedef struct cmpd_local_params *cmpd_local_params_p; +typedef struct moveop_static_params *moveop_static_params_p; + +/* Set of hooks and parameters that determine behaviour specific to + move_op or find_used_regs functions. */ +struct code_motion_path_driver_info_def +{ + /* Called on enter to the basic block. */ + int (*on_enter) (insn_t, cmpd_local_params_p, void *, bool); + + /* Called when original expr is found. */ + void (*orig_expr_found) (insn_t, expr_t, cmpd_local_params_p, void *); + + /* Called while descending current basic block if current insn is not + the original EXPR we're searching for. */ + bool (*orig_expr_not_found) (insn_t, av_set_t, void *); + + /* Function to merge C_EXPRes from different successors. */ + void (*merge_succs) (insn_t, insn_t, int, cmpd_local_params_p, void *); + + /* Function to finalize merge from different successors and possibly + deallocate temporary data structures used for merging. */ + void (*after_merge_succs) (cmpd_local_params_p, void *); + + /* Called on the backward stage of recursion to do moveup_expr. + Used only with move_op_*. */ + void (*ascend) (insn_t, void *); + + /* Called on the ascending pass, before returning from the current basic + block or from the whole traversal. */ + void (*at_first_insn) (insn_t, cmpd_local_params_p, void *); + + /* When processing successors in move_op we need only descend into + SUCCS_NORMAL successors, while in find_used_regs we need SUCCS_ALL. */ + int succ_flags; + + /* The routine name to print in dumps ("move_op" of "find_used_regs"). */ + const char *routine_name; +}; + +/* Global pointer to current hooks, either points to MOVE_OP_HOOKS or + FUR_HOOKS. */ +struct code_motion_path_driver_info_def *code_motion_path_driver_info; + +/* Set of hooks for performing move_op and find_used_regs routines with + code_motion_path_driver. */ +extern struct code_motion_path_driver_info_def move_op_hooks, fur_hooks; + +/* True if/when we want to emulate Haifa scheduler in the common code. + This is used in sched_rgn_local_init and in various places in + sched-deps.c. */ +int sched_emulate_haifa_p; + +/* GLOBAL_LEVEL is used to discard information stored in basic block headers + av_sets. Av_set of bb header is valid if its (bb header's) level is equal + to GLOBAL_LEVEL. And invalid if lesser. This is primarily used to advance + scheduling window. */ +int global_level; + +/* Current fences. */ +flist_t fences; + +/* True when separable insns should be scheduled as RHSes. */ +static bool enable_schedule_as_rhs_p; + +/* Used in verify_target_availability to assert that target reg is reported + unavailabile by both TARGET_UNAVAILABLE and find_used_regs only if + we haven't scheduled anything on the previous fence. + if scheduled_something_on_previous_fence is true, TARGET_UNAVAILABLE can + have more conservative value than the one returned by the + find_used_regs, thus we shouldn't assert that these values are equal. */ +static bool scheduled_something_on_previous_fence; + +/* All newly emitted insns will have their uids greater than this value. */ +static int first_emitted_uid; + +/* Set of basic blocks that are forced to start new ebbs. This is a subset + of all the ebb heads. */ +static bitmap_head _forced_ebb_heads; +bitmap_head *forced_ebb_heads = &_forced_ebb_heads; + +/* Blocks that need to be rescheduled after pipelining. */ +bitmap blocks_to_reschedule = NULL; + +/* True when the first lv set should be ignored when updating liveness. */ +static bool ignore_first = false; + +/* Number of insns max_issue has initialized data structures for. */ +static int max_issue_size = 0; + +/* Whether we can issue more instructions. */ +static int can_issue_more; + +/* Maximum software lookahead window size, reduced when rescheduling after + pipelining. */ +static int max_ws; + +/* Number of insns scheduled in current region. */ +static int num_insns_scheduled; + +/* A vector of expressions is used to be able to sort them. */ +DEF_VEC_P(expr_t); +DEF_VEC_ALLOC_P(expr_t,heap); +static VEC(expr_t, heap) *vec_av_set = NULL; + +/* A vector of vinsns is used to hold temporary lists of vinsns. */ +DEF_VEC_P(vinsn_t); +DEF_VEC_ALLOC_P(vinsn_t,heap); +typedef VEC(vinsn_t, heap) *vinsn_vec_t; + +/* This vector has the exprs which may still present in av_sets, but actually + can't be moved up due to bookkeeping created during code motion to another + fence. See comment near the call to update_and_record_unavailable_insns + for the detailed explanations. */ +static vinsn_vec_t vec_bookkeeping_blocked_vinsns = NULL; + +/* This vector has vinsns which are scheduled with renaming on the first fence + and then seen on the second. For expressions with such vinsns, target + availability information may be wrong. */ +static vinsn_vec_t vec_target_unavailable_vinsns = NULL; + +/* Vector to store temporary nops inserted in move_op to prevent removal + of empty bbs. */ +DEF_VEC_P(insn_t); +DEF_VEC_ALLOC_P(insn_t,heap); +static VEC(insn_t, heap) *vec_temp_moveop_nops = NULL; + +/* These bitmaps record original instructions scheduled on the current + iteration and bookkeeping copies created by them. */ +static bitmap current_originators = NULL; +static bitmap current_copies = NULL; + +/* This bitmap marks the blocks visited by code_motion_path_driver so we don't + visit them afterwards. */ +static bitmap code_motion_visited_blocks = NULL; + +/* Variables to accumulate different statistics. */ + +/* The number of bookkeeping copies created. */ +static int stat_bookkeeping_copies; + +/* The number of insns that required bookkeeiping for their scheduling. */ +static int stat_insns_needed_bookkeeping; + +/* The number of insns that got renamed. */ +static int stat_renamed_scheduled; + +/* The number of substitutions made during scheduling. */ +static int stat_substitutions_total; + + +/* Forward declarations of static functions. */ +static bool rtx_ok_for_substitution_p (rtx, rtx); +static int sel_rank_for_schedule (const void *, const void *); +static av_set_t find_sequential_best_exprs (bnd_t, expr_t, bool); +static basic_block find_block_for_bookkeeping (edge e1, edge e2, bool lax); + +static rtx get_dest_from_orig_ops (av_set_t); +static basic_block generate_bookkeeping_insn (expr_t, edge, edge); +static bool find_used_regs (insn_t, av_set_t, regset, struct reg_rename *, + def_list_t *); +static bool move_op (insn_t, av_set_t, expr_t, rtx, expr_t, bool*); +static int code_motion_path_driver (insn_t, av_set_t, ilist_t, + cmpd_local_params_p, void *); +static void sel_sched_region_1 (void); +static void sel_sched_region_2 (int); +static av_set_t compute_av_set_inside_bb (insn_t, ilist_t, int, bool); + +static void debug_state (state_t); + + +/* Functions that work with fences. */ + +/* Advance one cycle on FENCE. */ +static void +advance_one_cycle (fence_t fence) +{ + unsigned i; + int cycle; + rtx insn; + + advance_state (FENCE_STATE (fence)); + cycle = ++FENCE_CYCLE (fence); + FENCE_ISSUED_INSNS (fence) = 0; + FENCE_STARTS_CYCLE_P (fence) = 1; + can_issue_more = issue_rate; + FENCE_ISSUE_MORE (fence) = can_issue_more; + + for (i = 0; VEC_iterate (rtx, FENCE_EXECUTING_INSNS (fence), i, insn); ) + { + if (INSN_READY_CYCLE (insn) < cycle) + { + remove_from_deps (FENCE_DC (fence), insn); + VEC_unordered_remove (rtx, FENCE_EXECUTING_INSNS (fence), i); + continue; + } + i++; + } + if (sched_verbose >= 2) + { + sel_print ("Finished a cycle. Current cycle = %d\n", FENCE_CYCLE (fence)); + debug_state (FENCE_STATE (fence)); + } +} + +/* Returns true when SUCC in a fallthru bb of INSN, possibly + skipping empty basic blocks. */ +static bool +in_fallthru_bb_p (rtx insn, rtx succ) +{ + basic_block bb = BLOCK_FOR_INSN (insn); + edge e; + + if (bb == BLOCK_FOR_INSN (succ)) + return true; + + e = find_fallthru_edge_from (bb); + if (e) + bb = e->dest; + else + return false; + + while (sel_bb_empty_p (bb)) + bb = bb->next_bb; + + return bb == BLOCK_FOR_INSN (succ); +} + +/* Construct successor fences from OLD_FENCEs and put them in NEW_FENCES. + When a successor will continue a ebb, transfer all parameters of a fence + to the new fence. ORIG_MAX_SEQNO is the maximal seqno before this round + of scheduling helping to distinguish between the old and the new code. */ +static void +extract_new_fences_from (flist_t old_fences, flist_tail_t new_fences, + int orig_max_seqno) +{ + bool was_here_p = false; + insn_t insn = NULL_RTX; + insn_t succ; + succ_iterator si; + ilist_iterator ii; + fence_t fence = FLIST_FENCE (old_fences); + basic_block bb; + + /* Get the only element of FENCE_BNDS (fence). */ + FOR_EACH_INSN (insn, ii, FENCE_BNDS (fence)) + { + gcc_assert (!was_here_p); + was_here_p = true; + } + gcc_assert (was_here_p && insn != NULL_RTX); + + /* When in the "middle" of the block, just move this fence + to the new list. */ + bb = BLOCK_FOR_INSN (insn); + if (! sel_bb_end_p (insn) + || (single_succ_p (bb) + && single_pred_p (single_succ (bb)))) + { + insn_t succ; + + succ = (sel_bb_end_p (insn) + ? sel_bb_head (single_succ (bb)) + : NEXT_INSN (insn)); + + if (INSN_SEQNO (succ) > 0 + && INSN_SEQNO (succ) <= orig_max_seqno + && INSN_SCHED_TIMES (succ) <= 0) + { + FENCE_INSN (fence) = succ; + move_fence_to_fences (old_fences, new_fences); + + if (sched_verbose >= 1) + sel_print ("Fence %d continues as %d[%d] (state continue)\n", + INSN_UID (insn), INSN_UID (succ), BLOCK_NUM (succ)); + } + return; + } + + /* Otherwise copy fence's structures to (possibly) multiple successors. */ + FOR_EACH_SUCC_1 (succ, si, insn, SUCCS_NORMAL | SUCCS_SKIP_TO_LOOP_EXITS) + { + int seqno = INSN_SEQNO (succ); + + if (0 < seqno && seqno <= orig_max_seqno + && (pipelining_p || INSN_SCHED_TIMES (succ) <= 0)) + { + bool b = (in_same_ebb_p (insn, succ) + || in_fallthru_bb_p (insn, succ)); + + if (sched_verbose >= 1) + sel_print ("Fence %d continues as %d[%d] (state %s)\n", + INSN_UID (insn), INSN_UID (succ), + BLOCK_NUM (succ), b ? "continue" : "reset"); + + if (b) + add_dirty_fence_to_fences (new_fences, succ, fence); + else + { + /* Mark block of the SUCC as head of the new ebb. */ + bitmap_set_bit (forced_ebb_heads, BLOCK_NUM (succ)); + add_clean_fence_to_fences (new_fences, succ, fence); + } + } + } +} + + +/* Functions to support substitution. */ + +/* Returns whether INSN with dependence status DS is eligible for + substitution, i.e. it's a copy operation x := y, and RHS that is + moved up through this insn should be substituted. */ +static bool +can_substitute_through_p (insn_t insn, ds_t ds) +{ + /* We can substitute only true dependencies. */ + if ((ds & DEP_OUTPUT) + || (ds & DEP_ANTI) + || ! INSN_RHS (insn) + || ! INSN_LHS (insn)) + return false; + + /* Now we just need to make sure the INSN_RHS consists of only one + simple REG rtx. */ + if (REG_P (INSN_LHS (insn)) + && REG_P (INSN_RHS (insn))) + return true; + return false; +} + +/* Substitute all occurences of INSN's destination in EXPR' vinsn with INSN's + source (if INSN is eligible for substitution). Returns TRUE if + substitution was actually performed, FALSE otherwise. Substitution might + be not performed because it's either EXPR' vinsn doesn't contain INSN's + destination or the resulting insn is invalid for the target machine. + When UNDO is true, perform unsubstitution instead (the difference is in + the part of rtx on which validate_replace_rtx is called). */ +static bool +substitute_reg_in_expr (expr_t expr, insn_t insn, bool undo) +{ + rtx *where; + bool new_insn_valid; + vinsn_t *vi = &EXPR_VINSN (expr); + bool has_rhs = VINSN_RHS (*vi) != NULL; + rtx old, new_rtx; + + /* Do not try to replace in SET_DEST. Although we'll choose new + register for the RHS, we don't want to change RHS' original reg. + If the insn is not SET, we may still be able to substitute something + in it, and if we're here (don't have deps), it doesn't write INSN's + dest. */ + where = (has_rhs + ? &VINSN_RHS (*vi) + : &PATTERN (VINSN_INSN_RTX (*vi))); + old = undo ? INSN_RHS (insn) : INSN_LHS (insn); + + /* Substitute if INSN has a form of x:=y and LHS(INSN) occurs in *VI. */ + if (rtx_ok_for_substitution_p (old, *where)) + { + rtx new_insn; + rtx *where_replace; + + /* We should copy these rtxes before substitution. */ + new_rtx = copy_rtx (undo ? INSN_LHS (insn) : INSN_RHS (insn)); + new_insn = create_copy_of_insn_rtx (VINSN_INSN_RTX (*vi)); + + /* Where we'll replace. + WHERE_REPLACE should point inside NEW_INSN, so INSN_RHS couldn't be + used instead of SET_SRC. */ + where_replace = (has_rhs + ? &SET_SRC (PATTERN (new_insn)) + : &PATTERN (new_insn)); + + new_insn_valid + = validate_replace_rtx_part_nosimplify (old, new_rtx, where_replace, + new_insn); + + /* ??? Actually, constrain_operands result depends upon choice of + destination register. E.g. if we allow single register to be an rhs, + and if we try to move dx=ax(as rhs) through ax=dx, we'll result + in invalid insn dx=dx, so we'll loose this rhs here. + Just can't come up with significant testcase for this, so just + leaving it for now. */ + if (new_insn_valid) + { + change_vinsn_in_expr (expr, + create_vinsn_from_insn_rtx (new_insn, false)); + + /* Do not allow clobbering the address register of speculative + insns. */ + if ((EXPR_SPEC_DONE_DS (expr) & SPECULATIVE) + && bitmap_bit_p (VINSN_REG_USES (EXPR_VINSN (expr)), + expr_dest_regno (expr))) + EXPR_TARGET_AVAILABLE (expr) = false; + + return true; + } + else + return false; + } + else + return false; +} + +/* Helper function for count_occurences_equiv. */ +static int +count_occurrences_1 (rtx *cur_rtx, void *arg) +{ + rtx_search_arg_p p = (rtx_search_arg_p) arg; + + /* The last param FOR_GCSE is true, because otherwise it performs excessive + substitutions like + r8 = r33 + r16 = r33 + for the last insn it presumes r33 equivalent to r8, so it changes it to + r33. Actually, there's no change, but it spoils debugging. */ + if (exp_equiv_p (*cur_rtx, p->x, 0, true)) + { + /* Bail out if we occupy more than one register. */ + if (REG_P (*cur_rtx) + && HARD_REGISTER_P (*cur_rtx) + && hard_regno_nregs[REGNO(*cur_rtx)][GET_MODE (*cur_rtx)] > 1) + { + p->n = 0; + return 1; + } + + p->n++; + + /* Do not traverse subexprs. */ + return -1; + } + + if (GET_CODE (*cur_rtx) == SUBREG + && REG_P (p->x) + && (!REG_P (SUBREG_REG (*cur_rtx)) + || REGNO (SUBREG_REG (*cur_rtx)) == REGNO (p->x))) + { + /* ??? Do not support substituting regs inside subregs. In that case, + simplify_subreg will be called by validate_replace_rtx, and + unsubstitution will fail later. */ + p->n = 0; + return 1; + } + + /* Continue search. */ + return 0; +} + +/* Return the number of places WHAT appears within WHERE. + Bail out when we found a reference occupying several hard registers. */ +static int +count_occurrences_equiv (rtx what, rtx where) +{ + struct rtx_search_arg arg; + + arg.x = what; + arg.n = 0; + + for_each_rtx (&where, &count_occurrences_1, (void *) &arg); + + return arg.n; +} + +/* Returns TRUE if WHAT is found in WHERE rtx tree. */ +static bool +rtx_ok_for_substitution_p (rtx what, rtx where) +{ + return (count_occurrences_equiv (what, where) > 0); +} + + +/* Functions to support register renaming. */ + +/* Substitute VI's set source with REGNO. Returns newly created pattern + that has REGNO as its source. */ +static rtx +create_insn_rtx_with_rhs (vinsn_t vi, rtx rhs_rtx) +{ + rtx lhs_rtx; + rtx pattern; + rtx insn_rtx; + + lhs_rtx = copy_rtx (VINSN_LHS (vi)); + + pattern = gen_rtx_SET (VOIDmode, lhs_rtx, rhs_rtx); + insn_rtx = create_insn_rtx_from_pattern (pattern, NULL_RTX); + + return insn_rtx; +} + +/* Returns whether INSN's src can be replaced with register number + NEW_SRC_REG. E.g. the following insn is valid for i386: + + (insn:HI 2205 6585 2207 727 ../../gcc/libiberty/regex.c:3337 + (set (mem/s:QI (plus:SI (plus:SI (reg/f:SI 7 sp) + (reg:SI 0 ax [orig:770 c1 ] [770])) + (const_int 288 [0x120])) [0 str S1 A8]) + (const_int 0 [0x0])) 43 {*movqi_1} (nil) + (nil)) + + But if we change (const_int 0 [0x0]) to (reg:QI 4 si), it will be invalid + because of operand constraints: + + (define_insn "*movqi_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=q,q ,q ,r,r ,?r,m") + (match_operand:QI 1 "general_operand" " q,qn,qm,q,rn,qm,qn") + )] + + So do constrain_operands here, before choosing NEW_SRC_REG as best + reg for rhs. */ + +static bool +replace_src_with_reg_ok_p (insn_t insn, rtx new_src_reg) +{ + vinsn_t vi = INSN_VINSN (insn); + enum machine_mode mode; + rtx dst_loc; + bool res; + + gcc_assert (VINSN_SEPARABLE_P (vi)); + + get_dest_and_mode (insn, &dst_loc, &mode); + gcc_assert (mode == GET_MODE (new_src_reg)); + + if (REG_P (dst_loc) && REGNO (new_src_reg) == REGNO (dst_loc)) + return true; + + /* See whether SET_SRC can be replaced with this register. */ + validate_change (insn, &SET_SRC (PATTERN (insn)), new_src_reg, 1); + res = verify_changes (0); + cancel_changes (0); + + return res; +} + +/* Returns whether INSN still be valid after replacing it's DEST with + register NEW_REG. */ +static bool +replace_dest_with_reg_ok_p (insn_t insn, rtx new_reg) +{ + vinsn_t vi = INSN_VINSN (insn); + bool res; + + /* We should deal here only with separable insns. */ + gcc_assert (VINSN_SEPARABLE_P (vi)); + gcc_assert (GET_MODE (VINSN_LHS (vi)) == GET_MODE (new_reg)); + + /* See whether SET_DEST can be replaced with this register. */ + validate_change (insn, &SET_DEST (PATTERN (insn)), new_reg, 1); + res = verify_changes (0); + cancel_changes (0); + + return res; +} + +/* Create a pattern with rhs of VI and lhs of LHS_RTX. */ +static rtx +create_insn_rtx_with_lhs (vinsn_t vi, rtx lhs_rtx) +{ + rtx rhs_rtx; + rtx pattern; + rtx insn_rtx; + + rhs_rtx = copy_rtx (VINSN_RHS (vi)); + + pattern = gen_rtx_SET (VOIDmode, lhs_rtx, rhs_rtx); + insn_rtx = create_insn_rtx_from_pattern (pattern, NULL_RTX); + + return insn_rtx; +} + +/* Substitute lhs in the given expression EXPR for the register with number + NEW_REGNO. SET_DEST may be arbitrary rtx, not only register. */ +static void +replace_dest_with_reg_in_expr (expr_t expr, rtx new_reg) +{ + rtx insn_rtx; + vinsn_t vinsn; + + insn_rtx = create_insn_rtx_with_lhs (EXPR_VINSN (expr), new_reg); + vinsn = create_vinsn_from_insn_rtx (insn_rtx, false); + + change_vinsn_in_expr (expr, vinsn); + EXPR_WAS_RENAMED (expr) = 1; + EXPR_TARGET_AVAILABLE (expr) = 1; +} + +/* Returns whether VI writes either one of the USED_REGS registers or, + if a register is a hard one, one of the UNAVAILABLE_HARD_REGS registers. */ +static bool +vinsn_writes_one_of_regs_p (vinsn_t vi, regset used_regs, + HARD_REG_SET unavailable_hard_regs) +{ + unsigned regno; + reg_set_iterator rsi; + + EXECUTE_IF_SET_IN_REG_SET (VINSN_REG_SETS (vi), 0, regno, rsi) + { + if (REGNO_REG_SET_P (used_regs, regno)) + return true; + if (HARD_REGISTER_NUM_P (regno) + && TEST_HARD_REG_BIT (unavailable_hard_regs, regno)) + return true; + } + + EXECUTE_IF_SET_IN_REG_SET (VINSN_REG_CLOBBERS (vi), 0, regno, rsi) + { + if (REGNO_REG_SET_P (used_regs, regno)) + return true; + if (HARD_REGISTER_NUM_P (regno) + && TEST_HARD_REG_BIT (unavailable_hard_regs, regno)) + return true; + } + + return false; +} + +/* Returns register class of the output register in INSN. + Returns NO_REGS for call insns because some targets have constraints on + destination register of a call insn. + + Code adopted from regrename.c::build_def_use. */ +static enum reg_class +get_reg_class (rtx insn) +{ + int alt, i, n_ops; + + extract_insn (insn); + if (! constrain_operands (1)) + fatal_insn_not_found (insn); + preprocess_constraints (); + alt = which_alternative; + n_ops = recog_data.n_operands; + + for (i = 0; i < n_ops; ++i) + { + int matches = recog_op_alt[i][alt].matches; + if (matches >= 0) + recog_op_alt[i][alt].cl = recog_op_alt[matches][alt].cl; + } + + if (asm_noperands (PATTERN (insn)) > 0) + { + for (i = 0; i < n_ops; i++) + if (recog_data.operand_type[i] == OP_OUT) + { + rtx *loc = recog_data.operand_loc[i]; + rtx op = *loc; + enum reg_class cl = recog_op_alt[i][alt].cl; + + if (REG_P (op) + && REGNO (op) == ORIGINAL_REGNO (op)) + continue; + + return cl; + } + } + else if (!CALL_P (insn)) + { + for (i = 0; i < n_ops + recog_data.n_dups; i++) + { + int opn = i < n_ops ? i : recog_data.dup_num[i - n_ops]; + enum reg_class cl = recog_op_alt[opn][alt].cl; + + if (recog_data.operand_type[opn] == OP_OUT || + recog_data.operand_type[opn] == OP_INOUT) + return cl; + } + } + +/* Insns like + (insn (set (reg:CCZ 17 flags) (compare:CCZ ...))) + may result in returning NO_REGS, cause flags is written implicitly through + CMP insn, which has no OP_OUT | OP_INOUT operands. */ + return NO_REGS; +} + +#ifdef HARD_REGNO_RENAME_OK +/* Calculate HARD_REGNO_RENAME_OK data for REGNO. */ +static void +init_hard_regno_rename (int regno) +{ + int cur_reg; + + SET_HARD_REG_BIT (sel_hrd.regs_for_rename[regno], regno); + + for (cur_reg = 0; cur_reg < FIRST_PSEUDO_REGISTER; cur_reg++) + { + /* We are not interested in renaming in other regs. */ + if (!TEST_HARD_REG_BIT (sel_hrd.regs_ever_used, cur_reg)) + continue; + + if (HARD_REGNO_RENAME_OK (regno, cur_reg)) + SET_HARD_REG_BIT (sel_hrd.regs_for_rename[regno], cur_reg); + } +} +#endif + +/* A wrapper around HARD_REGNO_RENAME_OK that will look into the hard regs + data first. */ +static inline bool +sel_hard_regno_rename_ok (int from ATTRIBUTE_UNUSED, int to ATTRIBUTE_UNUSED) +{ +#ifdef HARD_REGNO_RENAME_OK + /* Check whether this is all calculated. */ + if (TEST_HARD_REG_BIT (sel_hrd.regs_for_rename[from], from)) + return TEST_HARD_REG_BIT (sel_hrd.regs_for_rename[from], to); + + init_hard_regno_rename (from); + + return TEST_HARD_REG_BIT (sel_hrd.regs_for_rename[from], to); +#else + return true; +#endif +} + +/* Calculate set of registers that are capable of holding MODE. */ +static void +init_regs_for_mode (enum machine_mode mode) +{ + int cur_reg; + + CLEAR_HARD_REG_SET (sel_hrd.regs_for_mode[mode]); + CLEAR_HARD_REG_SET (sel_hrd.regs_for_call_clobbered[mode]); + + for (cur_reg = 0; cur_reg < FIRST_PSEUDO_REGISTER; cur_reg++) + { + int nregs = hard_regno_nregs[cur_reg][mode]; + int i; + + for (i = nregs - 1; i >= 0; --i) + if (fixed_regs[cur_reg + i] + || global_regs[cur_reg + i] + /* Can't use regs which aren't saved by + the prologue. */ + || !TEST_HARD_REG_BIT (sel_hrd.regs_ever_used, cur_reg + i) + /* Can't use regs with non-null REG_BASE_VALUE, because adjusting + it affects aliasing globally and invalidates all AV sets. */ + || get_reg_base_value (cur_reg + i) +#ifdef LEAF_REGISTERS + /* We can't use a non-leaf register if we're in a + leaf function. */ + || (current_function_is_leaf + && !LEAF_REGISTERS[cur_reg + i]) +#endif + ) + break; + + if (i >= 0) + continue; + + /* See whether it accepts all modes that occur in + original insns. */ + if (! HARD_REGNO_MODE_OK (cur_reg, mode)) + continue; + + if (HARD_REGNO_CALL_PART_CLOBBERED (cur_reg, mode)) + SET_HARD_REG_BIT (sel_hrd.regs_for_call_clobbered[mode], + cur_reg); + + /* If the CUR_REG passed all the checks above, + then it's ok. */ + SET_HARD_REG_BIT (sel_hrd.regs_for_mode[mode], cur_reg); + } + + sel_hrd.regs_for_mode_ok[mode] = true; +} + +/* Init all register sets gathered in HRD. */ +static void +init_hard_regs_data (void) +{ + int cur_reg = 0; + int cur_mode = 0; + + CLEAR_HARD_REG_SET (sel_hrd.regs_ever_used); + for (cur_reg = 0; cur_reg < FIRST_PSEUDO_REGISTER; cur_reg++) + if (df_regs_ever_live_p (cur_reg) || call_used_regs[cur_reg]) + SET_HARD_REG_BIT (sel_hrd.regs_ever_used, cur_reg); + + /* Initialize registers that are valid based on mode when this is + really needed. */ + for (cur_mode = 0; cur_mode < NUM_MACHINE_MODES; cur_mode++) + sel_hrd.regs_for_mode_ok[cur_mode] = false; + + /* Mark that all HARD_REGNO_RENAME_OK is not calculated. */ + for (cur_reg = 0; cur_reg < FIRST_PSEUDO_REGISTER; cur_reg++) + CLEAR_HARD_REG_SET (sel_hrd.regs_for_rename[cur_reg]); + +#ifdef STACK_REGS + CLEAR_HARD_REG_SET (sel_hrd.stack_regs); + + for (cur_reg = FIRST_STACK_REG; cur_reg <= LAST_STACK_REG; cur_reg++) + SET_HARD_REG_BIT (sel_hrd.stack_regs, cur_reg); +#endif +} + +/* Mark hardware regs in REG_RENAME_P that are not suitable + for renaming rhs in INSN due to hardware restrictions (register class, + modes compatibility etc). This doesn't affect original insn's dest reg, + if it isn't in USED_REGS. DEF is a definition insn of rhs for which the + destination register is sought. LHS (DEF->ORIG_INSN) may be REG or MEM. + Registers that are in used_regs are always marked in + unavailable_hard_regs as well. */ + +static void +mark_unavailable_hard_regs (def_t def, struct reg_rename *reg_rename_p, + regset used_regs ATTRIBUTE_UNUSED) +{ + enum machine_mode mode; + enum reg_class cl = NO_REGS; + rtx orig_dest; + unsigned cur_reg, regno; + hard_reg_set_iterator hrsi; + + gcc_assert (GET_CODE (PATTERN (def->orig_insn)) == SET); + gcc_assert (reg_rename_p); + + orig_dest = SET_DEST (PATTERN (def->orig_insn)); + + /* We have decided not to rename 'mem = something;' insns, as 'something' + is usually a register. */ + if (!REG_P (orig_dest)) + return; + + regno = REGNO (orig_dest); + + /* If before reload, don't try to work with pseudos. */ + if (!reload_completed && !HARD_REGISTER_NUM_P (regno)) + return; + + if (reload_completed) + cl = get_reg_class (def->orig_insn); + + /* Stop if the original register is one of the fixed_regs, global_regs or + frame pointer, or we could not discover its class. */ + if (fixed_regs[regno] + || global_regs[regno] +#if !HARD_FRAME_POINTER_IS_FRAME_POINTER + || (frame_pointer_needed && regno == HARD_FRAME_POINTER_REGNUM) +#else + || (frame_pointer_needed && regno == FRAME_POINTER_REGNUM) +#endif + || (reload_completed && cl == NO_REGS)) + { + SET_HARD_REG_SET (reg_rename_p->unavailable_hard_regs); + + /* Give a chance for original register, if it isn't in used_regs. */ + if (!def->crosses_call) + CLEAR_HARD_REG_BIT (reg_rename_p->unavailable_hard_regs, regno); + + return; + } + + /* If something allocated on stack in this function, mark frame pointer + register unavailable, considering also modes. + FIXME: it is enough to do this once per all original defs. */ + if (frame_pointer_needed) + { + int i; + + for (i = hard_regno_nregs[FRAME_POINTER_REGNUM][Pmode]; i--;) + SET_HARD_REG_BIT (reg_rename_p->unavailable_hard_regs, + FRAME_POINTER_REGNUM + i); + +#if !HARD_FRAME_POINTER_IS_FRAME_POINTER + for (i = hard_regno_nregs[HARD_FRAME_POINTER_REGNUM][Pmode]; i--;) + SET_HARD_REG_BIT (reg_rename_p->unavailable_hard_regs, + HARD_FRAME_POINTER_REGNUM + i); +#endif + } + +#ifdef STACK_REGS + /* For the stack registers the presence of FIRST_STACK_REG in USED_REGS + is equivalent to as if all stack regs were in this set. + I.e. no stack register can be renamed, and even if it's an original + register here we make sure it won't be lifted over it's previous def + (it's previous def will appear as if it's a FIRST_STACK_REG def. + The HARD_REGNO_RENAME_OK covers other cases in condition below. */ + if (IN_RANGE (REGNO (orig_dest), FIRST_STACK_REG, LAST_STACK_REG) + && REGNO_REG_SET_P (used_regs, FIRST_STACK_REG)) + IOR_HARD_REG_SET (reg_rename_p->unavailable_hard_regs, + sel_hrd.stack_regs); +#endif + + /* If there's a call on this path, make regs from call_used_reg_set + unavailable. */ + if (def->crosses_call) + IOR_HARD_REG_SET (reg_rename_p->unavailable_hard_regs, + call_used_reg_set); + + /* Stop here before reload: we need FRAME_REGS, STACK_REGS, and crosses_call, + but not register classes. */ + if (!reload_completed) + return; + + /* Leave regs as 'available' only from the current + register class. */ + COPY_HARD_REG_SET (reg_rename_p->available_for_renaming, + reg_class_contents[cl]); + + mode = GET_MODE (orig_dest); + + /* Leave only registers available for this mode. */ + if (!sel_hrd.regs_for_mode_ok[mode]) + init_regs_for_mode (mode); + AND_HARD_REG_SET (reg_rename_p->available_for_renaming, + sel_hrd.regs_for_mode[mode]); + + /* Exclude registers that are partially call clobbered. */ + if (def->crosses_call + && ! HARD_REGNO_CALL_PART_CLOBBERED (regno, mode)) + AND_COMPL_HARD_REG_SET (reg_rename_p->available_for_renaming, + sel_hrd.regs_for_call_clobbered[mode]); + + /* Leave only those that are ok to rename. */ + EXECUTE_IF_SET_IN_HARD_REG_SET (reg_rename_p->available_for_renaming, + 0, cur_reg, hrsi) + { + int nregs; + int i; + + nregs = hard_regno_nregs[cur_reg][mode]; + gcc_assert (nregs > 0); + + for (i = nregs - 1; i >= 0; --i) + if (! sel_hard_regno_rename_ok (regno + i, cur_reg + i)) + break; + + if (i >= 0) + CLEAR_HARD_REG_BIT (reg_rename_p->available_for_renaming, + cur_reg); + } + + AND_COMPL_HARD_REG_SET (reg_rename_p->available_for_renaming, + reg_rename_p->unavailable_hard_regs); + + /* Regno is always ok from the renaming part of view, but it really + could be in *unavailable_hard_regs already, so set it here instead + of there. */ + SET_HARD_REG_BIT (reg_rename_p->available_for_renaming, regno); +} + +/* reg_rename_tick[REG1] > reg_rename_tick[REG2] if REG1 was chosen as the + best register more recently than REG2. */ +static int reg_rename_tick[FIRST_PSEUDO_REGISTER]; + +/* Indicates the number of times renaming happened before the current one. */ +static int reg_rename_this_tick; + +/* Choose the register among free, that is suitable for storing + the rhs value. + + ORIGINAL_INSNS is the list of insns where the operation (rhs) + originally appears. There could be multiple original operations + for single rhs since we moving it up and merging along different + paths. + + Some code is adapted from regrename.c (regrename_optimize). + If original register is available, function returns it. + Otherwise it performs the checks, so the new register should + comply with the following: + - it should not violate any live ranges (such registers are in + REG_RENAME_P->available_for_renaming set); + - it should not be in the HARD_REGS_USED regset; + - it should be in the class compatible with original uses; + - it should not be clobbered through reference with different mode; + - if we're in the leaf function, then the new register should + not be in the LEAF_REGISTERS; + - etc. + + If several registers meet the conditions, the register with smallest + tick is returned to achieve more even register allocation. + + If original register seems to be ok, we set *IS_ORIG_REG_P_PTR to true. + + If no register satisfies the above conditions, NULL_RTX is returned. */ +static rtx +choose_best_reg_1 (HARD_REG_SET hard_regs_used, + struct reg_rename *reg_rename_p, + def_list_t original_insns, bool *is_orig_reg_p_ptr) +{ + int best_new_reg; + unsigned cur_reg; + enum machine_mode mode = VOIDmode; + unsigned regno, i, n; + hard_reg_set_iterator hrsi; + def_list_iterator di; + def_t def; + + /* If original register is available, return it. */ + *is_orig_reg_p_ptr = true; + + FOR_EACH_DEF (def, di, original_insns) + { + rtx orig_dest = SET_DEST (PATTERN (def->orig_insn)); + + gcc_assert (REG_P (orig_dest)); + + /* Check that all original operations have the same mode. + This is done for the next loop; if we'd return from this + loop, we'd check only part of them, but in this case + it doesn't matter. */ + if (mode == VOIDmode) + mode = GET_MODE (orig_dest); + gcc_assert (mode == GET_MODE (orig_dest)); + + regno = REGNO (orig_dest); + for (i = 0, n = hard_regno_nregs[regno][mode]; i < n; i++) + if (TEST_HARD_REG_BIT (hard_regs_used, regno + i)) + break; + + /* All hard registers are available. */ + if (i == n) + { + gcc_assert (mode != VOIDmode); + + /* Hard registers should not be shared. */ + return gen_rtx_REG (mode, regno); + } + } + + *is_orig_reg_p_ptr = false; + best_new_reg = -1; + + /* Among all available regs choose the register that was + allocated earliest. */ + EXECUTE_IF_SET_IN_HARD_REG_SET (reg_rename_p->available_for_renaming, + 0, cur_reg, hrsi) + if (! TEST_HARD_REG_BIT (hard_regs_used, cur_reg)) + { + /* Check that all hard regs for mode are available. */ + for (i = 1, n = hard_regno_nregs[cur_reg][mode]; i < n; i++) + if (TEST_HARD_REG_BIT (hard_regs_used, cur_reg + i) + || !TEST_HARD_REG_BIT (reg_rename_p->available_for_renaming, + cur_reg + i)) + break; + + if (i < n) + continue; + + /* All hard registers are available. */ + if (best_new_reg < 0 + || reg_rename_tick[cur_reg] < reg_rename_tick[best_new_reg]) + { + best_new_reg = cur_reg; + + /* Return immediately when we know there's no better reg. */ + if (! reg_rename_tick[best_new_reg]) + break; + } + } + + if (best_new_reg >= 0) + { + /* Use the check from the above loop. */ + gcc_assert (mode != VOIDmode); + return gen_rtx_REG (mode, best_new_reg); + } + + return NULL_RTX; +} + +/* A wrapper around choose_best_reg_1 () to verify that we make correct + assumptions about available registers in the function. */ +static rtx +choose_best_reg (HARD_REG_SET hard_regs_used, struct reg_rename *reg_rename_p, + def_list_t original_insns, bool *is_orig_reg_p_ptr) +{ + rtx best_reg = choose_best_reg_1 (hard_regs_used, reg_rename_p, + original_insns, is_orig_reg_p_ptr); + + /* FIXME loop over hard_regno_nregs here. */ + gcc_assert (best_reg == NULL_RTX + || TEST_HARD_REG_BIT (sel_hrd.regs_ever_used, REGNO (best_reg))); + + return best_reg; +} + +/* Choose the pseudo register for storing rhs value. As this is supposed + to work before reload, we return either the original register or make + the new one. The parameters are the same that in choose_nest_reg_1 + functions, except that USED_REGS may contain pseudos. + If we work with hard regs, check also REG_RENAME_P->UNAVAILABLE_HARD_REGS. + + TODO: take into account register pressure while doing this. Up to this + moment, this function would never return NULL for pseudos, but we should + not rely on this. */ +static rtx +choose_best_pseudo_reg (regset used_regs, + struct reg_rename *reg_rename_p, + def_list_t original_insns, bool *is_orig_reg_p_ptr) +{ + def_list_iterator i; + def_t def; + enum machine_mode mode = VOIDmode; + bool bad_hard_regs = false; + + /* We should not use this after reload. */ + gcc_assert (!reload_completed); + + /* If original register is available, return it. */ + *is_orig_reg_p_ptr = true; + + FOR_EACH_DEF (def, i, original_insns) + { + rtx dest = SET_DEST (PATTERN (def->orig_insn)); + int orig_regno; + + gcc_assert (REG_P (dest)); + + /* Check that all original operations have the same mode. */ + if (mode == VOIDmode) + mode = GET_MODE (dest); + else + gcc_assert (mode == GET_MODE (dest)); + orig_regno = REGNO (dest); + + if (!REGNO_REG_SET_P (used_regs, orig_regno)) + { + if (orig_regno < FIRST_PSEUDO_REGISTER) + { + gcc_assert (df_regs_ever_live_p (orig_regno)); + + /* For hard registers, we have to check hardware imposed + limitations (frame/stack registers, calls crossed). */ + if (!TEST_HARD_REG_BIT (reg_rename_p->unavailable_hard_regs, + orig_regno)) + { + /* Don't let register cross a call if it doesn't already + cross one. This condition is written in accordance with + that in sched-deps.c sched_analyze_reg(). */ + if (!reg_rename_p->crosses_call + || REG_N_CALLS_CROSSED (orig_regno) > 0) + return gen_rtx_REG (mode, orig_regno); + } + + bad_hard_regs = true; + } + else + return dest; + } + } + + *is_orig_reg_p_ptr = false; + + /* We had some original hard registers that couldn't be used. + Those were likely special. Don't try to create a pseudo. */ + if (bad_hard_regs) + return NULL_RTX; + + /* We haven't found a register from original operations. Get a new one. + FIXME: control register pressure somehow. */ + { + rtx new_reg = gen_reg_rtx (mode); + + gcc_assert (mode != VOIDmode); + + max_regno = max_reg_num (); + maybe_extend_reg_info_p (); + REG_N_CALLS_CROSSED (REGNO (new_reg)) = reg_rename_p->crosses_call ? 1 : 0; + + return new_reg; + } +} + +/* True when target of EXPR is available due to EXPR_TARGET_AVAILABLE, + USED_REGS and REG_RENAME_P->UNAVAILABLE_HARD_REGS. */ +static void +verify_target_availability (expr_t expr, regset used_regs, + struct reg_rename *reg_rename_p) +{ + unsigned n, i, regno; + enum machine_mode mode; + bool target_available, live_available, hard_available; + + if (!REG_P (EXPR_LHS (expr)) || EXPR_TARGET_AVAILABLE (expr) < 0) + return; + + regno = expr_dest_regno (expr); + mode = GET_MODE (EXPR_LHS (expr)); + target_available = EXPR_TARGET_AVAILABLE (expr) == 1; + n = reload_completed ? hard_regno_nregs[regno][mode] : 1; + + live_available = hard_available = true; + for (i = 0; i < n; i++) + { + if (bitmap_bit_p (used_regs, regno + i)) + live_available = false; + if (TEST_HARD_REG_BIT (reg_rename_p->unavailable_hard_regs, regno + i)) + hard_available = false; + } + + /* When target is not available, it may be due to hard register + restrictions, e.g. crosses calls, so we check hard_available too. */ + if (target_available) + gcc_assert (live_available); + else + /* Check only if we haven't scheduled something on the previous fence, + cause due to MAX_SOFTWARE_LOOKAHEAD_WINDOW_SIZE issues + and having more than one fence, we may end having targ_un in a block + in which successors target register is actually available. + + The last condition handles the case when a dependence from a call insn + was created in sched-deps.c for insns with destination registers that + never crossed a call before, but do cross one after our code motion. + + FIXME: in the latter case, we just uselessly called find_used_regs, + because we can't move this expression with any other register + as well. */ + gcc_assert (scheduled_something_on_previous_fence || !live_available + || !hard_available + || (!reload_completed && reg_rename_p->crosses_call + && REG_N_CALLS_CROSSED (regno) == 0)); +} + +/* Collect unavailable registers due to liveness for EXPR from BNDS + into USED_REGS. Save additional information about available + registers and unavailable due to hardware restriction registers + into REG_RENAME_P structure. Save original insns into ORIGINAL_INSNS + list. */ +static void +collect_unavailable_regs_from_bnds (expr_t expr, blist_t bnds, regset used_regs, + struct reg_rename *reg_rename_p, + def_list_t *original_insns) +{ + for (; bnds; bnds = BLIST_NEXT (bnds)) + { + bool res; + av_set_t orig_ops = NULL; + bnd_t bnd = BLIST_BND (bnds); + + /* If the chosen best expr doesn't belong to current boundary, + skip it. */ + if (!av_set_is_in_p (BND_AV1 (bnd), EXPR_VINSN (expr))) + continue; + + /* Put in ORIG_OPS all exprs from this boundary that became + RES on top. */ + orig_ops = find_sequential_best_exprs (bnd, expr, false); + + /* Compute used regs and OR it into the USED_REGS. */ + res = find_used_regs (BND_TO (bnd), orig_ops, used_regs, + reg_rename_p, original_insns); + + /* FIXME: the assert is true until we'd have several boundaries. */ + gcc_assert (res); + av_set_clear (&orig_ops); + } +} + +/* Return TRUE if it is possible to replace LHSes of ORIG_INSNS with BEST_REG. + If BEST_REG is valid, replace LHS of EXPR with it. */ +static bool +try_replace_dest_reg (ilist_t orig_insns, rtx best_reg, expr_t expr) +{ + /* Try whether we'll be able to generate the insn + 'dest := best_reg' at the place of the original operation. */ + for (; orig_insns; orig_insns = ILIST_NEXT (orig_insns)) + { + insn_t orig_insn = DEF_LIST_DEF (orig_insns)->orig_insn; + + gcc_assert (EXPR_SEPARABLE_P (INSN_EXPR (orig_insn))); + + if (REGNO (best_reg) != REGNO (INSN_LHS (orig_insn)) + && (! replace_src_with_reg_ok_p (orig_insn, best_reg) + || ! replace_dest_with_reg_ok_p (orig_insn, best_reg))) + return false; + } + + /* Make sure that EXPR has the right destination + register. */ + if (expr_dest_regno (expr) != REGNO (best_reg)) + replace_dest_with_reg_in_expr (expr, best_reg); + else + EXPR_TARGET_AVAILABLE (expr) = 1; + + return true; +} + +/* Select and assign best register to EXPR searching from BNDS. + Set *IS_ORIG_REG_P to TRUE if original register was selected. + Return FALSE if no register can be chosen, which could happen when: + * EXPR_SEPARABLE_P is true but we were unable to find suitable register; + * EXPR_SEPARABLE_P is false but the insn sets/clobbers one of the registers + that are used on the moving path. */ +static bool +find_best_reg_for_expr (expr_t expr, blist_t bnds, bool *is_orig_reg_p) +{ + static struct reg_rename reg_rename_data; + + regset used_regs; + def_list_t original_insns = NULL; + bool reg_ok; + + *is_orig_reg_p = false; + + /* Don't bother to do anything if this insn doesn't set any registers. */ + if (bitmap_empty_p (VINSN_REG_SETS (EXPR_VINSN (expr))) + && bitmap_empty_p (VINSN_REG_CLOBBERS (EXPR_VINSN (expr)))) + return true; + + used_regs = get_clear_regset_from_pool (); + CLEAR_HARD_REG_SET (reg_rename_data.unavailable_hard_regs); + + collect_unavailable_regs_from_bnds (expr, bnds, used_regs, ®_rename_data, + &original_insns); + +#ifdef ENABLE_CHECKING + /* If after reload, make sure we're working with hard regs here. */ + if (reload_completed) + { + reg_set_iterator rsi; + unsigned i; + + EXECUTE_IF_SET_IN_REG_SET (used_regs, FIRST_PSEUDO_REGISTER, i, rsi) + gcc_unreachable (); + } +#endif + + if (EXPR_SEPARABLE_P (expr)) + { + rtx best_reg = NULL_RTX; + /* Check that we have computed availability of a target register + correctly. */ + verify_target_availability (expr, used_regs, ®_rename_data); + + /* Turn everything in hard regs after reload. */ + if (reload_completed) + { + HARD_REG_SET hard_regs_used; + REG_SET_TO_HARD_REG_SET (hard_regs_used, used_regs); + + /* Join hard registers unavailable due to register class + restrictions and live range intersection. */ + IOR_HARD_REG_SET (hard_regs_used, + reg_rename_data.unavailable_hard_regs); + + best_reg = choose_best_reg (hard_regs_used, ®_rename_data, + original_insns, is_orig_reg_p); + } + else + best_reg = choose_best_pseudo_reg (used_regs, ®_rename_data, + original_insns, is_orig_reg_p); + + if (!best_reg) + reg_ok = false; + else if (*is_orig_reg_p) + { + /* In case of unification BEST_REG may be different from EXPR's LHS + when EXPR's LHS is unavailable, and there is another LHS among + ORIGINAL_INSNS. */ + reg_ok = try_replace_dest_reg (original_insns, best_reg, expr); + } + else + { + /* Forbid renaming of low-cost insns. */ + if (sel_vinsn_cost (EXPR_VINSN (expr)) < 2) + reg_ok = false; + else + reg_ok = try_replace_dest_reg (original_insns, best_reg, expr); + } + } + else + { + /* If !EXPR_SCHEDULE_AS_RHS (EXPR), just make sure INSN doesn't set + any of the HARD_REGS_USED set. */ + if (vinsn_writes_one_of_regs_p (EXPR_VINSN (expr), used_regs, + reg_rename_data.unavailable_hard_regs)) + { + reg_ok = false; + gcc_assert (EXPR_TARGET_AVAILABLE (expr) <= 0); + } + else + { + reg_ok = true; + gcc_assert (EXPR_TARGET_AVAILABLE (expr) != 0); + } + } + + ilist_clear (&original_insns); + return_regset_to_pool (used_regs); + + return reg_ok; +} + + +/* Return true if dependence described by DS can be overcomed. */ +static bool +can_speculate_dep_p (ds_t ds) +{ + if (spec_info == NULL) + return false; + + /* Leave only speculative data. */ + ds &= SPECULATIVE; + + if (ds == 0) + return false; + + { + /* FIXME: make sched-deps.c produce only those non-hard dependencies, + that we can overcome. */ + ds_t spec_mask = spec_info->mask; + + if ((ds & spec_mask) != ds) + return false; + } + + if (ds_weak (ds) < spec_info->data_weakness_cutoff) + return false; + + return true; +} + +/* Get a speculation check instruction. + C_EXPR is a speculative expression, + CHECK_DS describes speculations that should be checked, + ORIG_INSN is the original non-speculative insn in the stream. */ +static insn_t +create_speculation_check (expr_t c_expr, ds_t check_ds, insn_t orig_insn) +{ + rtx check_pattern; + rtx insn_rtx; + insn_t insn; + basic_block recovery_block; + rtx label; + + /* Create a recovery block if target is going to emit branchy check, or if + ORIG_INSN was speculative already. */ + if (targetm.sched.needs_block_p (check_ds) + || EXPR_SPEC_DONE_DS (INSN_EXPR (orig_insn)) != 0) + { + recovery_block = sel_create_recovery_block (orig_insn); + label = BB_HEAD (recovery_block); + } + else + { + recovery_block = NULL; + label = NULL_RTX; + } + + /* Get pattern of the check. */ + check_pattern = targetm.sched.gen_spec_check (EXPR_INSN_RTX (c_expr), label, + check_ds); + + gcc_assert (check_pattern != NULL); + + /* Emit check. */ + insn_rtx = create_insn_rtx_from_pattern (check_pattern, label); + + insn = sel_gen_insn_from_rtx_after (insn_rtx, INSN_EXPR (orig_insn), + INSN_SEQNO (orig_insn), orig_insn); + + /* Make check to be non-speculative. */ + EXPR_SPEC_DONE_DS (INSN_EXPR (insn)) = 0; + INSN_SPEC_CHECKED_DS (insn) = check_ds; + + /* Decrease priority of check by difference of load/check instruction + latencies. */ + EXPR_PRIORITY (INSN_EXPR (insn)) -= (sel_vinsn_cost (INSN_VINSN (orig_insn)) + - sel_vinsn_cost (INSN_VINSN (insn))); + + /* Emit copy of original insn (though with replaced target register, + if needed) to the recovery block. */ + if (recovery_block != NULL) + { + rtx twin_rtx; + + twin_rtx = copy_rtx (PATTERN (EXPR_INSN_RTX (c_expr))); + twin_rtx = create_insn_rtx_from_pattern (twin_rtx, NULL_RTX); + sel_gen_recovery_insn_from_rtx_after (twin_rtx, + INSN_EXPR (orig_insn), + INSN_SEQNO (insn), + bb_note (recovery_block)); + } + + /* If we've generated a data speculation check, make sure + that all the bookkeeping instruction we'll create during + this move_op () will allocate an ALAT entry so that the + check won't fail. + In case of control speculation we must convert C_EXPR to control + speculative mode, because failing to do so will bring us an exception + thrown by the non-control-speculative load. */ + check_ds = ds_get_max_dep_weak (check_ds); + speculate_expr (c_expr, check_ds); + + return insn; +} + +/* True when INSN is a "regN = regN" copy. */ +static bool +identical_copy_p (rtx insn) +{ + rtx lhs, rhs, pat; + + pat = PATTERN (insn); + + if (GET_CODE (pat) != SET) + return false; + + lhs = SET_DEST (pat); + if (!REG_P (lhs)) + return false; + + rhs = SET_SRC (pat); + if (!REG_P (rhs)) + return false; + + return REGNO (lhs) == REGNO (rhs); +} + +/* Undo all transformations on *AV_PTR that were done when + moving through INSN. */ +static void +undo_transformations (av_set_t *av_ptr, rtx insn) +{ + av_set_iterator av_iter; + expr_t expr; + av_set_t new_set = NULL; + + /* First, kill any EXPR that uses registers set by an insn. This is + required for correctness. */ + FOR_EACH_EXPR_1 (expr, av_iter, av_ptr) + if (!sched_insns_conditions_mutex_p (insn, EXPR_INSN_RTX (expr)) + && bitmap_intersect_p (INSN_REG_SETS (insn), + VINSN_REG_USES (EXPR_VINSN (expr))) + /* When an insn looks like 'r1 = r1', we could substitute through + it, but the above condition will still hold. This happened with + gcc.c-torture/execute/961125-1.c. */ + && !identical_copy_p (insn)) + { + if (sched_verbose >= 6) + sel_print ("Expr %d removed due to use/set conflict\n", + INSN_UID (EXPR_INSN_RTX (expr))); + av_set_iter_remove (&av_iter); + } + + /* Undo transformations looking at the history vector. */ + FOR_EACH_EXPR (expr, av_iter, *av_ptr) + { + int index = find_in_history_vect (EXPR_HISTORY_OF_CHANGES (expr), + insn, EXPR_VINSN (expr), true); + + if (index >= 0) + { + expr_history_def *phist; + + phist = VEC_index (expr_history_def, + EXPR_HISTORY_OF_CHANGES (expr), + index); + + switch (phist->type) + { + case TRANS_SPECULATION: + { + ds_t old_ds, new_ds; + + /* Compute the difference between old and new speculative + statuses: that's what we need to check. + Earlier we used to assert that the status will really + change. This no longer works because only the probability + bits in the status may have changed during compute_av_set, + and in the case of merging different probabilities of the + same speculative status along different paths we do not + record this in the history vector. */ + old_ds = phist->spec_ds; + new_ds = EXPR_SPEC_DONE_DS (expr); + + old_ds &= SPECULATIVE; + new_ds &= SPECULATIVE; + new_ds &= ~old_ds; + + EXPR_SPEC_TO_CHECK_DS (expr) |= new_ds; + break; + } + case TRANS_SUBSTITUTION: + { + expr_def _tmp_expr, *tmp_expr = &_tmp_expr; + vinsn_t new_vi; + bool add = true; + + new_vi = phist->old_expr_vinsn; + + gcc_assert (VINSN_SEPARABLE_P (new_vi) + == EXPR_SEPARABLE_P (expr)); + copy_expr (tmp_expr, expr); + + if (vinsn_equal_p (phist->new_expr_vinsn, + EXPR_VINSN (tmp_expr))) + change_vinsn_in_expr (tmp_expr, new_vi); + else + /* This happens when we're unsubstituting on a bookkeeping + copy, which was in turn substituted. The history is wrong + in this case. Do it the hard way. */ + add = substitute_reg_in_expr (tmp_expr, insn, true); + if (add) + av_set_add (&new_set, tmp_expr); + clear_expr (tmp_expr); + break; + } + default: + gcc_unreachable (); + } + } + + } + + av_set_union_and_clear (av_ptr, &new_set, NULL); +} + + +/* Moveup_* helpers for code motion and computing av sets. */ + +/* Propagates EXPR inside an insn group through THROUGH_INSN. + The difference from the below function is that only substitution is + performed. */ +static enum MOVEUP_EXPR_CODE +moveup_expr_inside_insn_group (expr_t expr, insn_t through_insn) +{ + vinsn_t vi = EXPR_VINSN (expr); + ds_t *has_dep_p; + ds_t full_ds; + + /* Do this only inside insn group. */ + gcc_assert (INSN_SCHED_CYCLE (through_insn) > 0); + + full_ds = has_dependence_p (expr, through_insn, &has_dep_p); + if (full_ds == 0) + return MOVEUP_EXPR_SAME; + + /* Substitution is the possible choice in this case. */ + if (has_dep_p[DEPS_IN_RHS]) + { + /* Can't substitute UNIQUE VINSNs. */ + gcc_assert (!VINSN_UNIQUE_P (vi)); + + if (can_substitute_through_p (through_insn, + has_dep_p[DEPS_IN_RHS]) + && substitute_reg_in_expr (expr, through_insn, false)) + { + EXPR_WAS_SUBSTITUTED (expr) = true; + return MOVEUP_EXPR_CHANGED; + } + + /* Don't care about this, as even true dependencies may be allowed + in an insn group. */ + return MOVEUP_EXPR_SAME; + } + + /* This can catch output dependencies in COND_EXECs. */ + if (has_dep_p[DEPS_IN_INSN]) + return MOVEUP_EXPR_NULL; + + /* This is either an output or an anti dependence, which usually have + a zero latency. Allow this here, if we'd be wrong, tick_check_p + will fix this. */ + gcc_assert (has_dep_p[DEPS_IN_LHS]); + return MOVEUP_EXPR_AS_RHS; +} + +/* True when a trapping EXPR cannot be moved through THROUGH_INSN. */ +#define CANT_MOVE_TRAPPING(expr, through_insn) \ + (VINSN_MAY_TRAP_P (EXPR_VINSN (expr)) \ + && !sel_insn_has_single_succ_p ((through_insn), SUCCS_ALL) \ + && !sel_insn_is_speculation_check (through_insn)) + +/* True when a conflict on a target register was found during moveup_expr. */ +static bool was_target_conflict = false; + +/* Return true when moving a debug INSN across THROUGH_INSN will + create a bookkeeping block. We don't want to create such blocks, + for they would cause codegen differences between compilations with + and without debug info. */ + +static bool +moving_insn_creates_bookkeeping_block_p (insn_t insn, + insn_t through_insn) +{ + basic_block bbi, bbt; + edge e1, e2; + edge_iterator ei1, ei2; + + if (!bookkeeping_can_be_created_if_moved_through_p (through_insn)) + { + if (sched_verbose >= 9) + sel_print ("no bookkeeping required: "); + return FALSE; + } + + bbi = BLOCK_FOR_INSN (insn); + + if (EDGE_COUNT (bbi->preds) == 1) + { + if (sched_verbose >= 9) + sel_print ("only one pred edge: "); + return TRUE; + } + + bbt = BLOCK_FOR_INSN (through_insn); + + FOR_EACH_EDGE (e1, ei1, bbt->succs) + { + FOR_EACH_EDGE (e2, ei2, bbi->preds) + { + if (find_block_for_bookkeeping (e1, e2, TRUE)) + { + if (sched_verbose >= 9) + sel_print ("found existing block: "); + return FALSE; + } + } + } + + if (sched_verbose >= 9) + sel_print ("would create bookkeeping block: "); + + return TRUE; +} + +/* Return true when the conflict with newly created implicit clobbers + between EXPR and THROUGH_INSN is found because of renaming. */ +static bool +implicit_clobber_conflict_p (insn_t through_insn, expr_t expr) +{ + HARD_REG_SET temp; + rtx insn, reg, rhs, pat; + hard_reg_set_iterator hrsi; + unsigned regno; + bool valid; + + /* Make a new pseudo register. */ + reg = gen_reg_rtx (GET_MODE (EXPR_LHS (expr))); + max_regno = max_reg_num (); + maybe_extend_reg_info_p (); + + /* Validate a change and bail out early. */ + insn = EXPR_INSN_RTX (expr); + validate_change (insn, &SET_DEST (PATTERN (insn)), reg, true); + valid = verify_changes (0); + cancel_changes (0); + if (!valid) + { + if (sched_verbose >= 6) + sel_print ("implicit clobbers failed validation, "); + return true; + } + + /* Make a new insn with it. */ + rhs = copy_rtx (VINSN_RHS (EXPR_VINSN (expr))); + pat = gen_rtx_SET (VOIDmode, reg, rhs); + start_sequence (); + insn = emit_insn (pat); + end_sequence (); + + /* Calculate implicit clobbers. */ + extract_insn (insn); + preprocess_constraints (); + ira_implicitly_set_insn_hard_regs (&temp); + AND_COMPL_HARD_REG_SET (temp, ira_no_alloc_regs); + + /* If any implicit clobber registers intersect with regular ones in + through_insn, we have a dependency and thus bail out. */ + EXECUTE_IF_SET_IN_HARD_REG_SET (temp, 0, regno, hrsi) + { + vinsn_t vi = INSN_VINSN (through_insn); + if (bitmap_bit_p (VINSN_REG_SETS (vi), regno) + || bitmap_bit_p (VINSN_REG_CLOBBERS (vi), regno) + || bitmap_bit_p (VINSN_REG_USES (vi), regno)) + return true; + } + + return false; +} + +/* Modifies EXPR so it can be moved through the THROUGH_INSN, + performing necessary transformations. Record the type of transformation + made in PTRANS_TYPE, when it is not NULL. When INSIDE_INSN_GROUP, + permit all dependencies except true ones, and try to remove those + too via forward substitution. All cases when a non-eliminable + non-zero cost dependency exists inside an insn group will be fixed + in tick_check_p instead. */ +static enum MOVEUP_EXPR_CODE +moveup_expr (expr_t expr, insn_t through_insn, bool inside_insn_group, + enum local_trans_type *ptrans_type) +{ + vinsn_t vi = EXPR_VINSN (expr); + insn_t insn = VINSN_INSN_RTX (vi); + bool was_changed = false; + bool as_rhs = false; + ds_t *has_dep_p; + ds_t full_ds; + + /* ??? We use dependencies of non-debug insns on debug insns to + indicate that the debug insns need to be reset if the non-debug + insn is pulled ahead of it. It's hard to figure out how to + introduce such a notion in sel-sched, but it already fails to + support debug insns in other ways, so we just go ahead and + let the deug insns go corrupt for now. */ + if (DEBUG_INSN_P (through_insn) && !DEBUG_INSN_P (insn)) + return MOVEUP_EXPR_SAME; + + /* When inside_insn_group, delegate to the helper. */ + if (inside_insn_group) + return moveup_expr_inside_insn_group (expr, through_insn); + + /* Deal with unique insns and control dependencies. */ + if (VINSN_UNIQUE_P (vi)) + { + /* We can move jumps without side-effects or jumps that are + mutually exclusive with instruction THROUGH_INSN (all in cases + dependencies allow to do so and jump is not speculative). */ + if (control_flow_insn_p (insn)) + { + basic_block fallthru_bb; + + /* Do not move checks and do not move jumps through other + jumps. */ + if (control_flow_insn_p (through_insn) + || sel_insn_is_speculation_check (insn)) + return MOVEUP_EXPR_NULL; + + /* Don't move jumps through CFG joins. */ + if (bookkeeping_can_be_created_if_moved_through_p (through_insn)) + return MOVEUP_EXPR_NULL; + + /* The jump should have a clear fallthru block, and + this block should be in the current region. */ + if ((fallthru_bb = fallthru_bb_of_jump (insn)) == NULL + || ! in_current_region_p (fallthru_bb)) + return MOVEUP_EXPR_NULL; + + /* And it should be mutually exclusive with through_insn. */ + if (! sched_insns_conditions_mutex_p (insn, through_insn) + && ! DEBUG_INSN_P (through_insn)) + return MOVEUP_EXPR_NULL; + } + + /* Don't move what we can't move. */ + if (EXPR_CANT_MOVE (expr) + && BLOCK_FOR_INSN (through_insn) != BLOCK_FOR_INSN (insn)) + return MOVEUP_EXPR_NULL; + + /* Don't move SCHED_GROUP instruction through anything. + If we don't force this, then it will be possible to start + scheduling a sched_group before all its dependencies are + resolved. + ??? Haifa deals with this issue by delaying the SCHED_GROUP + as late as possible through rank_for_schedule. */ + if (SCHED_GROUP_P (insn)) + return MOVEUP_EXPR_NULL; + } + else + gcc_assert (!control_flow_insn_p (insn)); + + /* Don't move debug insns if this would require bookkeeping. */ + if (DEBUG_INSN_P (insn) + && BLOCK_FOR_INSN (through_insn) != BLOCK_FOR_INSN (insn) + && moving_insn_creates_bookkeeping_block_p (insn, through_insn)) + return MOVEUP_EXPR_NULL; + + /* Deal with data dependencies. */ + was_target_conflict = false; + full_ds = has_dependence_p (expr, through_insn, &has_dep_p); + if (full_ds == 0) + { + if (!CANT_MOVE_TRAPPING (expr, through_insn)) + return MOVEUP_EXPR_SAME; + } + else + { + /* We can move UNIQUE insn up only as a whole and unchanged, + so it shouldn't have any dependencies. */ + if (VINSN_UNIQUE_P (vi)) + return MOVEUP_EXPR_NULL; + } + + if (full_ds != 0 && can_speculate_dep_p (full_ds)) + { + int res; + + res = speculate_expr (expr, full_ds); + if (res >= 0) + { + /* Speculation was successful. */ + full_ds = 0; + was_changed = (res > 0); + if (res == 2) + was_target_conflict = true; + if (ptrans_type) + *ptrans_type = TRANS_SPECULATION; + sel_clear_has_dependence (); + } + } + + if (has_dep_p[DEPS_IN_INSN]) + /* We have some dependency that cannot be discarded. */ + return MOVEUP_EXPR_NULL; + + if (has_dep_p[DEPS_IN_LHS]) + { + /* Only separable insns can be moved up with the new register. + Anyways, we should mark that the original register is + unavailable. */ + if (!enable_schedule_as_rhs_p || !EXPR_SEPARABLE_P (expr)) + return MOVEUP_EXPR_NULL; + + /* When renaming a hard register to a pseudo before reload, extra + dependencies can occur from the implicit clobbers of the insn. + Filter out such cases here. */ + if (!reload_completed && REG_P (EXPR_LHS (expr)) + && HARD_REGISTER_P (EXPR_LHS (expr)) + && implicit_clobber_conflict_p (through_insn, expr)) + { + if (sched_verbose >= 6) + sel_print ("implicit clobbers conflict detected, "); + return MOVEUP_EXPR_NULL; + } + EXPR_TARGET_AVAILABLE (expr) = false; + was_target_conflict = true; + as_rhs = true; + } + + /* At this point we have either separable insns, that will be lifted + up only as RHSes, or non-separable insns with no dependency in lhs. + If dependency is in RHS, then try to perform substitution and move up + substituted RHS: + + Ex. 1: Ex.2 + y = x; y = x; + z = y*2; y = y*2; + + In Ex.1 y*2 can be substituted for x*2 and the whole operation can be + moved above y=x assignment as z=x*2. + + In Ex.2 y*2 also can be substituted for x*2, but only the right hand + side can be moved because of the output dependency. The operation was + cropped to its rhs above. */ + if (has_dep_p[DEPS_IN_RHS]) + { + ds_t *rhs_dsp = &has_dep_p[DEPS_IN_RHS]; + + /* Can't substitute UNIQUE VINSNs. */ + gcc_assert (!VINSN_UNIQUE_P (vi)); + + if (can_speculate_dep_p (*rhs_dsp)) + { + int res; + + res = speculate_expr (expr, *rhs_dsp); + if (res >= 0) + { + /* Speculation was successful. */ + *rhs_dsp = 0; + was_changed = (res > 0); + if (res == 2) + was_target_conflict = true; + if (ptrans_type) + *ptrans_type = TRANS_SPECULATION; + } + else + return MOVEUP_EXPR_NULL; + } + else if (can_substitute_through_p (through_insn, + *rhs_dsp) + && substitute_reg_in_expr (expr, through_insn, false)) + { + /* ??? We cannot perform substitution AND speculation on the same + insn. */ + gcc_assert (!was_changed); + was_changed = true; + if (ptrans_type) + *ptrans_type = TRANS_SUBSTITUTION; + EXPR_WAS_SUBSTITUTED (expr) = true; + } + else + return MOVEUP_EXPR_NULL; + } + + /* Don't move trapping insns through jumps. + This check should be at the end to give a chance to control speculation + to perform its duties. */ + if (CANT_MOVE_TRAPPING (expr, through_insn)) + return MOVEUP_EXPR_NULL; + + return (was_changed + ? MOVEUP_EXPR_CHANGED + : (as_rhs + ? MOVEUP_EXPR_AS_RHS + : MOVEUP_EXPR_SAME)); +} + +/* Try to look at bitmap caches for EXPR and INSN pair, return true + if successful. When INSIDE_INSN_GROUP, also try ignore dependencies + that can exist within a parallel group. Write to RES the resulting + code for moveup_expr. */ +static bool +try_bitmap_cache (expr_t expr, insn_t insn, + bool inside_insn_group, + enum MOVEUP_EXPR_CODE *res) +{ + int expr_uid = INSN_UID (EXPR_INSN_RTX (expr)); + + /* First check whether we've analyzed this situation already. */ + if (bitmap_bit_p (INSN_ANALYZED_DEPS (insn), expr_uid)) + { + if (bitmap_bit_p (INSN_FOUND_DEPS (insn), expr_uid)) + { + if (sched_verbose >= 6) + sel_print ("removed (cached)\n"); + *res = MOVEUP_EXPR_NULL; + return true; + } + else + { + if (sched_verbose >= 6) + sel_print ("unchanged (cached)\n"); + *res = MOVEUP_EXPR_SAME; + return true; + } + } + else if (bitmap_bit_p (INSN_FOUND_DEPS (insn), expr_uid)) + { + if (inside_insn_group) + { + if (sched_verbose >= 6) + sel_print ("unchanged (as RHS, cached, inside insn group)\n"); + *res = MOVEUP_EXPR_SAME; + return true; + + } + else + EXPR_TARGET_AVAILABLE (expr) = false; + + /* This is the only case when propagation result can change over time, + as we can dynamically switch off scheduling as RHS. In this case, + just check the flag to reach the correct decision. */ + if (enable_schedule_as_rhs_p) + { + if (sched_verbose >= 6) + sel_print ("unchanged (as RHS, cached)\n"); + *res = MOVEUP_EXPR_AS_RHS; + return true; + } + else + { + if (sched_verbose >= 6) + sel_print ("removed (cached as RHS, but renaming" + " is now disabled)\n"); + *res = MOVEUP_EXPR_NULL; + return true; + } + } + + return false; +} + +/* Try to look at bitmap caches for EXPR and INSN pair, return true + if successful. Write to RES the resulting code for moveup_expr. */ +static bool +try_transformation_cache (expr_t expr, insn_t insn, + enum MOVEUP_EXPR_CODE *res) +{ + struct transformed_insns *pti + = (struct transformed_insns *) + htab_find_with_hash (INSN_TRANSFORMED_INSNS (insn), + &EXPR_VINSN (expr), + VINSN_HASH_RTX (EXPR_VINSN (expr))); + if (pti) + { + /* This EXPR was already moved through this insn and was + changed as a result. Fetch the proper data from + the hashtable. */ + insert_in_history_vect (&EXPR_HISTORY_OF_CHANGES (expr), + INSN_UID (insn), pti->type, + pti->vinsn_old, pti->vinsn_new, + EXPR_SPEC_DONE_DS (expr)); + + if (INSN_IN_STREAM_P (VINSN_INSN_RTX (pti->vinsn_new))) + pti->vinsn_new = vinsn_copy (pti->vinsn_new, true); + change_vinsn_in_expr (expr, pti->vinsn_new); + if (pti->was_target_conflict) + EXPR_TARGET_AVAILABLE (expr) = false; + if (pti->type == TRANS_SPECULATION) + { + EXPR_SPEC_DONE_DS (expr) = pti->ds; + EXPR_NEEDS_SPEC_CHECK_P (expr) |= pti->needs_check; + } + + if (sched_verbose >= 6) + { + sel_print ("changed (cached): "); + dump_expr (expr); + sel_print ("\n"); + } + + *res = MOVEUP_EXPR_CHANGED; + return true; + } + + return false; +} + +/* Update bitmap caches on INSN with result RES of propagating EXPR. */ +static void +update_bitmap_cache (expr_t expr, insn_t insn, bool inside_insn_group, + enum MOVEUP_EXPR_CODE res) +{ + int expr_uid = INSN_UID (EXPR_INSN_RTX (expr)); + + /* Do not cache result of propagating jumps through an insn group, + as it is always true, which is not useful outside the group. */ + if (inside_insn_group) + return; + + if (res == MOVEUP_EXPR_NULL) + { + bitmap_set_bit (INSN_ANALYZED_DEPS (insn), expr_uid); + bitmap_set_bit (INSN_FOUND_DEPS (insn), expr_uid); + } + else if (res == MOVEUP_EXPR_SAME) + { + bitmap_set_bit (INSN_ANALYZED_DEPS (insn), expr_uid); + bitmap_clear_bit (INSN_FOUND_DEPS (insn), expr_uid); + } + else if (res == MOVEUP_EXPR_AS_RHS) + { + bitmap_clear_bit (INSN_ANALYZED_DEPS (insn), expr_uid); + bitmap_set_bit (INSN_FOUND_DEPS (insn), expr_uid); + } + else + gcc_unreachable (); +} + +/* Update hashtable on INSN with changed EXPR, old EXPR_OLD_VINSN + and transformation type TRANS_TYPE. */ +static void +update_transformation_cache (expr_t expr, insn_t insn, + bool inside_insn_group, + enum local_trans_type trans_type, + vinsn_t expr_old_vinsn) +{ + struct transformed_insns *pti; + + if (inside_insn_group) + return; + + pti = XNEW (struct transformed_insns); + pti->vinsn_old = expr_old_vinsn; + pti->vinsn_new = EXPR_VINSN (expr); + pti->type = trans_type; + pti->was_target_conflict = was_target_conflict; + pti->ds = EXPR_SPEC_DONE_DS (expr); + pti->needs_check = EXPR_NEEDS_SPEC_CHECK_P (expr); + vinsn_attach (pti->vinsn_old); + vinsn_attach (pti->vinsn_new); + *((struct transformed_insns **) + htab_find_slot_with_hash (INSN_TRANSFORMED_INSNS (insn), + pti, VINSN_HASH_RTX (expr_old_vinsn), + INSERT)) = pti; +} + +/* Same as moveup_expr, but first looks up the result of + transformation in caches. */ +static enum MOVEUP_EXPR_CODE +moveup_expr_cached (expr_t expr, insn_t insn, bool inside_insn_group) +{ + enum MOVEUP_EXPR_CODE res; + bool got_answer = false; + + if (sched_verbose >= 6) + { + sel_print ("Moving "); + dump_expr (expr); + sel_print (" through %d: ", INSN_UID (insn)); + } + + if (DEBUG_INSN_P (EXPR_INSN_RTX (expr)) + && (sel_bb_head (BLOCK_FOR_INSN (EXPR_INSN_RTX (expr))) + == EXPR_INSN_RTX (expr))) + /* Don't use cached information for debug insns that are heads of + basic blocks. */; + else if (try_bitmap_cache (expr, insn, inside_insn_group, &res)) + /* When inside insn group, we do not want remove stores conflicting + with previosly issued loads. */ + got_answer = ! inside_insn_group || res != MOVEUP_EXPR_NULL; + else if (try_transformation_cache (expr, insn, &res)) + got_answer = true; + + if (! got_answer) + { + /* Invoke moveup_expr and record the results. */ + vinsn_t expr_old_vinsn = EXPR_VINSN (expr); + ds_t expr_old_spec_ds = EXPR_SPEC_DONE_DS (expr); + int expr_uid = INSN_UID (VINSN_INSN_RTX (expr_old_vinsn)); + bool unique_p = VINSN_UNIQUE_P (expr_old_vinsn); + enum local_trans_type trans_type = TRANS_SUBSTITUTION; + + /* ??? Invent something better than this. We can't allow old_vinsn + to go, we need it for the history vector. */ + vinsn_attach (expr_old_vinsn); + + res = moveup_expr (expr, insn, inside_insn_group, + &trans_type); + switch (res) + { + case MOVEUP_EXPR_NULL: + update_bitmap_cache (expr, insn, inside_insn_group, res); + if (sched_verbose >= 6) + sel_print ("removed\n"); + break; + + case MOVEUP_EXPR_SAME: + update_bitmap_cache (expr, insn, inside_insn_group, res); + if (sched_verbose >= 6) + sel_print ("unchanged\n"); + break; + + case MOVEUP_EXPR_AS_RHS: + gcc_assert (!unique_p || inside_insn_group); + update_bitmap_cache (expr, insn, inside_insn_group, res); + if (sched_verbose >= 6) + sel_print ("unchanged (as RHS)\n"); + break; + + case MOVEUP_EXPR_CHANGED: + gcc_assert (INSN_UID (EXPR_INSN_RTX (expr)) != expr_uid + || EXPR_SPEC_DONE_DS (expr) != expr_old_spec_ds); + insert_in_history_vect (&EXPR_HISTORY_OF_CHANGES (expr), + INSN_UID (insn), trans_type, + expr_old_vinsn, EXPR_VINSN (expr), + expr_old_spec_ds); + update_transformation_cache (expr, insn, inside_insn_group, + trans_type, expr_old_vinsn); + if (sched_verbose >= 6) + { + sel_print ("changed: "); + dump_expr (expr); + sel_print ("\n"); + } + break; + default: + gcc_unreachable (); + } + + vinsn_detach (expr_old_vinsn); + } + + return res; +} + +/* Moves an av set AVP up through INSN, performing necessary + transformations. */ +static void +moveup_set_expr (av_set_t *avp, insn_t insn, bool inside_insn_group) +{ + av_set_iterator i; + expr_t expr; + + FOR_EACH_EXPR_1 (expr, i, avp) + { + + switch (moveup_expr_cached (expr, insn, inside_insn_group)) + { + case MOVEUP_EXPR_SAME: + case MOVEUP_EXPR_AS_RHS: + break; + + case MOVEUP_EXPR_NULL: + av_set_iter_remove (&i); + break; + + case MOVEUP_EXPR_CHANGED: + expr = merge_with_other_exprs (avp, &i, expr); + break; + + default: + gcc_unreachable (); + } + } +} + +/* Moves AVP set along PATH. */ +static void +moveup_set_inside_insn_group (av_set_t *avp, ilist_t path) +{ + int last_cycle; + + if (sched_verbose >= 6) + sel_print ("Moving expressions up in the insn group...\n"); + if (! path) + return; + last_cycle = INSN_SCHED_CYCLE (ILIST_INSN (path)); + while (path + && INSN_SCHED_CYCLE (ILIST_INSN (path)) == last_cycle) + { + moveup_set_expr (avp, ILIST_INSN (path), true); + path = ILIST_NEXT (path); + } +} + +/* Returns true if after moving EXPR along PATH it equals to EXPR_VLIW. */ +static bool +equal_after_moveup_path_p (expr_t expr, ilist_t path, expr_t expr_vliw) +{ + expr_def _tmp, *tmp = &_tmp; + int last_cycle; + bool res = true; + + copy_expr_onside (tmp, expr); + last_cycle = path ? INSN_SCHED_CYCLE (ILIST_INSN (path)) : 0; + while (path + && res + && INSN_SCHED_CYCLE (ILIST_INSN (path)) == last_cycle) + { + res = (moveup_expr_cached (tmp, ILIST_INSN (path), true) + != MOVEUP_EXPR_NULL); + path = ILIST_NEXT (path); + } + + if (res) + { + vinsn_t tmp_vinsn = EXPR_VINSN (tmp); + vinsn_t expr_vliw_vinsn = EXPR_VINSN (expr_vliw); + + if (tmp_vinsn != expr_vliw_vinsn) + res = vinsn_equal_p (tmp_vinsn, expr_vliw_vinsn); + } + + clear_expr (tmp); + return res; +} + + +/* Functions that compute av and lv sets. */ + +/* Returns true if INSN is not a downward continuation of the given path P in + the current stage. */ +static bool +is_ineligible_successor (insn_t insn, ilist_t p) +{ + insn_t prev_insn; + + /* Check if insn is not deleted. */ + if (PREV_INSN (insn) && NEXT_INSN (PREV_INSN (insn)) != insn) + gcc_unreachable (); + else if (NEXT_INSN (insn) && PREV_INSN (NEXT_INSN (insn)) != insn) + gcc_unreachable (); + + /* If it's the first insn visited, then the successor is ok. */ + if (!p) + return false; + + prev_insn = ILIST_INSN (p); + + if (/* a backward edge. */ + INSN_SEQNO (insn) < INSN_SEQNO (prev_insn) + /* is already visited. */ + || (INSN_SEQNO (insn) == INSN_SEQNO (prev_insn) + && (ilist_is_in_p (p, insn) + /* We can reach another fence here and still seqno of insn + would be equal to seqno of prev_insn. This is possible + when prev_insn is a previously created bookkeeping copy. + In that case it'd get a seqno of insn. Thus, check here + whether insn is in current fence too. */ + || IN_CURRENT_FENCE_P (insn))) + /* Was already scheduled on this round. */ + || (INSN_SEQNO (insn) > INSN_SEQNO (prev_insn) + && IN_CURRENT_FENCE_P (insn)) + /* An insn from another fence could also be + scheduled earlier even if this insn is not in + a fence list right now. Check INSN_SCHED_CYCLE instead. */ + || (!pipelining_p + && INSN_SCHED_TIMES (insn) > 0)) + return true; + else + return false; +} + +/* Computes the av_set below the last bb insn INSN, doing all the 'dirty work' + of handling multiple successors and properly merging its av_sets. P is + the current path traversed. WS is the size of lookahead window. + Return the av set computed. */ +static av_set_t +compute_av_set_at_bb_end (insn_t insn, ilist_t p, int ws) +{ + struct succs_info *sinfo; + av_set_t expr_in_all_succ_branches = NULL; + int is; + insn_t succ, zero_succ = NULL; + av_set_t av1 = NULL; + + gcc_assert (sel_bb_end_p (insn)); + + /* Find different kind of successors needed for correct computing of + SPEC and TARGET_AVAILABLE attributes. */ + sinfo = compute_succs_info (insn, SUCCS_NORMAL); + + /* Debug output. */ + if (sched_verbose >= 6) + { + sel_print ("successors of bb end (%d): ", INSN_UID (insn)); + dump_insn_vector (sinfo->succs_ok); + sel_print ("\n"); + if (sinfo->succs_ok_n != sinfo->all_succs_n) + sel_print ("real successors num: %d\n", sinfo->all_succs_n); + } + + /* Add insn to to the tail of current path. */ + ilist_add (&p, insn); + + FOR_EACH_VEC_ELT (rtx, sinfo->succs_ok, is, succ) + { + av_set_t succ_set; + + /* We will edit SUCC_SET and EXPR_SPEC field of its elements. */ + succ_set = compute_av_set_inside_bb (succ, p, ws, true); + + av_set_split_usefulness (succ_set, + VEC_index (int, sinfo->probs_ok, is), + sinfo->all_prob); + + if (sinfo->all_succs_n > 1) + { + /* Find EXPR'es that came from *all* successors and save them + into expr_in_all_succ_branches. This set will be used later + for calculating speculation attributes of EXPR'es. */ + if (is == 0) + { + expr_in_all_succ_branches = av_set_copy (succ_set); + + /* Remember the first successor for later. */ + zero_succ = succ; + } + else + { + av_set_iterator i; + expr_t expr; + + FOR_EACH_EXPR_1 (expr, i, &expr_in_all_succ_branches) + if (!av_set_is_in_p (succ_set, EXPR_VINSN (expr))) + av_set_iter_remove (&i); + } + } + + /* Union the av_sets. Check liveness restrictions on target registers + in special case of two successors. */ + if (sinfo->succs_ok_n == 2 && is == 1) + { + basic_block bb0 = BLOCK_FOR_INSN (zero_succ); + basic_block bb1 = BLOCK_FOR_INSN (succ); + + gcc_assert (BB_LV_SET_VALID_P (bb0) && BB_LV_SET_VALID_P (bb1)); + av_set_union_and_live (&av1, &succ_set, + BB_LV_SET (bb0), + BB_LV_SET (bb1), + insn); + } + else + av_set_union_and_clear (&av1, &succ_set, insn); + } + + /* Check liveness restrictions via hard way when there are more than + two successors. */ + if (sinfo->succs_ok_n > 2) + FOR_EACH_VEC_ELT (rtx, sinfo->succs_ok, is, succ) + { + basic_block succ_bb = BLOCK_FOR_INSN (succ); + + gcc_assert (BB_LV_SET_VALID_P (succ_bb)); + mark_unavailable_targets (av1, BB_AV_SET (succ_bb), + BB_LV_SET (succ_bb)); + } + + /* Finally, check liveness restrictions on paths leaving the region. */ + if (sinfo->all_succs_n > sinfo->succs_ok_n) + FOR_EACH_VEC_ELT (rtx, sinfo->succs_other, is, succ) + mark_unavailable_targets + (av1, NULL, BB_LV_SET (BLOCK_FOR_INSN (succ))); + + if (sinfo->all_succs_n > 1) + { + av_set_iterator i; + expr_t expr; + + /* Increase the spec attribute of all EXPR'es that didn't come + from all successors. */ + FOR_EACH_EXPR (expr, i, av1) + if (!av_set_is_in_p (expr_in_all_succ_branches, EXPR_VINSN (expr))) + EXPR_SPEC (expr)++; + + av_set_clear (&expr_in_all_succ_branches); + + /* Do not move conditional branches through other + conditional branches. So, remove all conditional + branches from av_set if current operator is a conditional + branch. */ + av_set_substract_cond_branches (&av1); + } + + ilist_remove (&p); + free_succs_info (sinfo); + + if (sched_verbose >= 6) + { + sel_print ("av_succs (%d): ", INSN_UID (insn)); + dump_av_set (av1); + sel_print ("\n"); + } + + return av1; +} + +/* This function computes av_set for the FIRST_INSN by dragging valid + av_set through all basic block insns either from the end of basic block + (computed using compute_av_set_at_bb_end) or from the insn on which + MAX_WS was exceeded. It uses compute_av_set_at_bb_end to compute av_set + below the basic block and handling conditional branches. + FIRST_INSN - the basic block head, P - path consisting of the insns + traversed on the way to the FIRST_INSN (the path is sparse, only bb heads + and bb ends are added to the path), WS - current window size, + NEED_COPY_P - true if we'll make a copy of av_set before returning it. */ +static av_set_t +compute_av_set_inside_bb (insn_t first_insn, ilist_t p, int ws, + bool need_copy_p) +{ + insn_t cur_insn; + int end_ws = ws; + insn_t bb_end = sel_bb_end (BLOCK_FOR_INSN (first_insn)); + insn_t after_bb_end = NEXT_INSN (bb_end); + insn_t last_insn; + av_set_t av = NULL; + basic_block cur_bb = BLOCK_FOR_INSN (first_insn); + + /* Return NULL if insn is not on the legitimate downward path. */ + if (is_ineligible_successor (first_insn, p)) + { + if (sched_verbose >= 6) + sel_print ("Insn %d is ineligible_successor\n", INSN_UID (first_insn)); + + return NULL; + } + + /* If insn already has valid av(insn) computed, just return it. */ + if (AV_SET_VALID_P (first_insn)) + { + av_set_t av_set; + + if (sel_bb_head_p (first_insn)) + av_set = BB_AV_SET (BLOCK_FOR_INSN (first_insn)); + else + av_set = NULL; + + if (sched_verbose >= 6) + { + sel_print ("Insn %d has a valid av set: ", INSN_UID (first_insn)); + dump_av_set (av_set); + sel_print ("\n"); + } + + return need_copy_p ? av_set_copy (av_set) : av_set; + } + + ilist_add (&p, first_insn); + + /* As the result after this loop have completed, in LAST_INSN we'll + have the insn which has valid av_set to start backward computation + from: it either will be NULL because on it the window size was exceeded + or other valid av_set as returned by compute_av_set for the last insn + of the basic block. */ + for (last_insn = first_insn; last_insn != after_bb_end; + last_insn = NEXT_INSN (last_insn)) + { + /* We may encounter valid av_set not only on bb_head, but also on + those insns on which previously MAX_WS was exceeded. */ + if (AV_SET_VALID_P (last_insn)) + { + if (sched_verbose >= 6) + sel_print ("Insn %d has a valid empty av set\n", INSN_UID (last_insn)); + break; + } + + /* The special case: the last insn of the BB may be an + ineligible_successor due to its SEQ_NO that was set on + it as a bookkeeping. */ + if (last_insn != first_insn + && is_ineligible_successor (last_insn, p)) + { + if (sched_verbose >= 6) + sel_print ("Insn %d is ineligible_successor\n", INSN_UID (last_insn)); + break; + } + + if (DEBUG_INSN_P (last_insn)) + continue; + + if (end_ws > max_ws) + { + /* We can reach max lookahead size at bb_header, so clean av_set + first. */ + INSN_WS_LEVEL (last_insn) = global_level; + + if (sched_verbose >= 6) + sel_print ("Insn %d is beyond the software lookahead window size\n", + INSN_UID (last_insn)); + break; + } + + end_ws++; + } + + /* Get the valid av_set into AV above the LAST_INSN to start backward + computation from. It either will be empty av_set or av_set computed from + the successors on the last insn of the current bb. */ + if (last_insn != after_bb_end) + { + av = NULL; + + /* This is needed only to obtain av_sets that are identical to + those computed by the old compute_av_set version. */ + if (last_insn == first_insn && !INSN_NOP_P (last_insn)) + av_set_add (&av, INSN_EXPR (last_insn)); + } + else + /* END_WS is always already increased by 1 if LAST_INSN == AFTER_BB_END. */ + av = compute_av_set_at_bb_end (bb_end, p, end_ws); + + /* Compute av_set in AV starting from below the LAST_INSN up to + location above the FIRST_INSN. */ + for (cur_insn = PREV_INSN (last_insn); cur_insn != PREV_INSN (first_insn); + cur_insn = PREV_INSN (cur_insn)) + if (!INSN_NOP_P (cur_insn)) + { + expr_t expr; + + moveup_set_expr (&av, cur_insn, false); + + /* If the expression for CUR_INSN is already in the set, + replace it by the new one. */ + expr = av_set_lookup (av, INSN_VINSN (cur_insn)); + if (expr != NULL) + { + clear_expr (expr); + copy_expr (expr, INSN_EXPR (cur_insn)); + } + else + av_set_add (&av, INSN_EXPR (cur_insn)); + } + + /* Clear stale bb_av_set. */ + if (sel_bb_head_p (first_insn)) + { + av_set_clear (&BB_AV_SET (cur_bb)); + BB_AV_SET (cur_bb) = need_copy_p ? av_set_copy (av) : av; + BB_AV_LEVEL (cur_bb) = global_level; + } + + if (sched_verbose >= 6) + { + sel_print ("Computed av set for insn %d: ", INSN_UID (first_insn)); + dump_av_set (av); + sel_print ("\n"); + } + + ilist_remove (&p); + return av; +} + +/* Compute av set before INSN. + INSN - the current operation (actual rtx INSN) + P - the current path, which is list of insns visited so far + WS - software lookahead window size. + UNIQUE_P - TRUE, if returned av_set will be changed, hence + if we want to save computed av_set in s_i_d, we should make a copy of it. + + In the resulting set we will have only expressions that don't have delay + stalls and nonsubstitutable dependences. */ +static av_set_t +compute_av_set (insn_t insn, ilist_t p, int ws, bool unique_p) +{ + return compute_av_set_inside_bb (insn, p, ws, unique_p); +} + +/* Propagate a liveness set LV through INSN. */ +static void +propagate_lv_set (regset lv, insn_t insn) +{ + gcc_assert (INSN_P (insn)); + + if (INSN_NOP_P (insn)) + return; + + df_simulate_one_insn_backwards (BLOCK_FOR_INSN (insn), insn, lv); +} + +/* Return livness set at the end of BB. */ +static regset +compute_live_after_bb (basic_block bb) +{ + edge e; + edge_iterator ei; + regset lv = get_clear_regset_from_pool (); + + gcc_assert (!ignore_first); + + FOR_EACH_EDGE (e, ei, bb->succs) + if (sel_bb_empty_p (e->dest)) + { + if (! BB_LV_SET_VALID_P (e->dest)) + { + gcc_unreachable (); + gcc_assert (BB_LV_SET (e->dest) == NULL); + BB_LV_SET (e->dest) = compute_live_after_bb (e->dest); + BB_LV_SET_VALID_P (e->dest) = true; + } + IOR_REG_SET (lv, BB_LV_SET (e->dest)); + } + else + IOR_REG_SET (lv, compute_live (sel_bb_head (e->dest))); + + return lv; +} + +/* Compute the set of all live registers at the point before INSN and save + it at INSN if INSN is bb header. */ +regset +compute_live (insn_t insn) +{ + basic_block bb = BLOCK_FOR_INSN (insn); + insn_t final, temp; + regset lv; + + /* Return the valid set if we're already on it. */ + if (!ignore_first) + { + regset src = NULL; + + if (sel_bb_head_p (insn) && BB_LV_SET_VALID_P (bb)) + src = BB_LV_SET (bb); + else + { + gcc_assert (in_current_region_p (bb)); + if (INSN_LIVE_VALID_P (insn)) + src = INSN_LIVE (insn); + } + + if (src) + { + lv = get_regset_from_pool (); + COPY_REG_SET (lv, src); + + if (sel_bb_head_p (insn) && ! BB_LV_SET_VALID_P (bb)) + { + COPY_REG_SET (BB_LV_SET (bb), lv); + BB_LV_SET_VALID_P (bb) = true; + } + + return_regset_to_pool (lv); + return lv; + } + } + + /* We've skipped the wrong lv_set. Don't skip the right one. */ + ignore_first = false; + gcc_assert (in_current_region_p (bb)); + + /* Find a valid LV set in this block or below, if needed. + Start searching from the next insn: either ignore_first is true, or + INSN doesn't have a correct live set. */ + temp = NEXT_INSN (insn); + final = NEXT_INSN (BB_END (bb)); + while (temp != final && ! INSN_LIVE_VALID_P (temp)) + temp = NEXT_INSN (temp); + if (temp == final) + { + lv = compute_live_after_bb (bb); + temp = PREV_INSN (temp); + } + else + { + lv = get_regset_from_pool (); + COPY_REG_SET (lv, INSN_LIVE (temp)); + } + + /* Put correct lv sets on the insns which have bad sets. */ + final = PREV_INSN (insn); + while (temp != final) + { + propagate_lv_set (lv, temp); + COPY_REG_SET (INSN_LIVE (temp), lv); + INSN_LIVE_VALID_P (temp) = true; + temp = PREV_INSN (temp); + } + + /* Also put it in a BB. */ + if (sel_bb_head_p (insn)) + { + basic_block bb = BLOCK_FOR_INSN (insn); + + COPY_REG_SET (BB_LV_SET (bb), lv); + BB_LV_SET_VALID_P (bb) = true; + } + + /* We return LV to the pool, but will not clear it there. Thus we can + legimatelly use LV till the next use of regset_pool_get (). */ + return_regset_to_pool (lv); + return lv; +} + +/* Update liveness sets for INSN. */ +static inline void +update_liveness_on_insn (rtx insn) +{ + ignore_first = true; + compute_live (insn); +} + +/* Compute liveness below INSN and write it into REGS. */ +static inline void +compute_live_below_insn (rtx insn, regset regs) +{ + rtx succ; + succ_iterator si; + + FOR_EACH_SUCC_1 (succ, si, insn, SUCCS_ALL) + IOR_REG_SET (regs, compute_live (succ)); +} + +/* Update the data gathered in av and lv sets starting from INSN. */ +static void +update_data_sets (rtx insn) +{ + update_liveness_on_insn (insn); + if (sel_bb_head_p (insn)) + { + gcc_assert (AV_LEVEL (insn) != 0); + BB_AV_LEVEL (BLOCK_FOR_INSN (insn)) = -1; + compute_av_set (insn, NULL, 0, 0); + } +} + + +/* Helper for move_op () and find_used_regs (). + Return speculation type for which a check should be created on the place + of INSN. EXPR is one of the original ops we are searching for. */ +static ds_t +get_spec_check_type_for_insn (insn_t insn, expr_t expr) +{ + ds_t to_check_ds; + ds_t already_checked_ds = EXPR_SPEC_DONE_DS (INSN_EXPR (insn)); + + to_check_ds = EXPR_SPEC_TO_CHECK_DS (expr); + + if (targetm.sched.get_insn_checked_ds) + already_checked_ds |= targetm.sched.get_insn_checked_ds (insn); + + if (spec_info != NULL + && (spec_info->flags & SEL_SCHED_SPEC_DONT_CHECK_CONTROL)) + already_checked_ds |= BEGIN_CONTROL; + + already_checked_ds = ds_get_speculation_types (already_checked_ds); + + to_check_ds &= ~already_checked_ds; + + return to_check_ds; +} + +/* Find the set of registers that are unavailable for storing expres + while moving ORIG_OPS up on the path starting from INSN due to + liveness (USED_REGS) or hardware restrictions (REG_RENAME_P). + + All the original operations found during the traversal are saved in the + ORIGINAL_INSNS list. + + REG_RENAME_P denotes the set of hardware registers that + can not be used with renaming due to the register class restrictions, + mode restrictions and other (the register we'll choose should be + compatible class with the original uses, shouldn't be in call_used_regs, + should be HARD_REGNO_RENAME_OK etc). + + Returns TRUE if we've found all original insns, FALSE otherwise. + + This function utilizes code_motion_path_driver (formerly find_used_regs_1) + to traverse the code motion paths. This helper function finds registers + that are not available for storing expres while moving ORIG_OPS up on the + path starting from INSN. A register considered as used on the moving path, + if one of the following conditions is not satisfied: + + (1) a register not set or read on any path from xi to an instance of + the original operation, + (2) not among the live registers of the point immediately following the + first original operation on a given downward path, except for the + original target register of the operation, + (3) not live on the other path of any conditional branch that is passed + by the operation, in case original operations are not present on + both paths of the conditional branch. + + All the original operations found during the traversal are saved in the + ORIGINAL_INSNS list. + + REG_RENAME_P->CROSSES_CALL is true, if there is a call insn on the path + from INSN to original insn. In this case CALL_USED_REG_SET will be added + to unavailable hard regs at the point original operation is found. */ + +static bool +find_used_regs (insn_t insn, av_set_t orig_ops, regset used_regs, + struct reg_rename *reg_rename_p, def_list_t *original_insns) +{ + def_list_iterator i; + def_t def; + int res; + bool needs_spec_check_p = false; + expr_t expr; + av_set_iterator expr_iter; + struct fur_static_params sparams; + struct cmpd_local_params lparams; + + /* We haven't visited any blocks yet. */ + bitmap_clear (code_motion_visited_blocks); + + /* Init parameters for code_motion_path_driver. */ + sparams.crosses_call = false; + sparams.original_insns = original_insns; + sparams.used_regs = used_regs; + + /* Set the appropriate hooks and data. */ + code_motion_path_driver_info = &fur_hooks; + + res = code_motion_path_driver (insn, orig_ops, NULL, &lparams, &sparams); + + reg_rename_p->crosses_call |= sparams.crosses_call; + + gcc_assert (res == 1); + gcc_assert (original_insns && *original_insns); + + /* ??? We calculate whether an expression needs a check when computing + av sets. This information is not as precise as it could be due to + merging this bit in merge_expr. We can do better in find_used_regs, + but we want to avoid multiple traversals of the same code motion + paths. */ + FOR_EACH_EXPR (expr, expr_iter, orig_ops) + needs_spec_check_p |= EXPR_NEEDS_SPEC_CHECK_P (expr); + + /* Mark hardware regs in REG_RENAME_P that are not suitable + for renaming expr in INSN due to hardware restrictions (register class, + modes compatibility etc). */ + FOR_EACH_DEF (def, i, *original_insns) + { + vinsn_t vinsn = INSN_VINSN (def->orig_insn); + + if (VINSN_SEPARABLE_P (vinsn)) + mark_unavailable_hard_regs (def, reg_rename_p, used_regs); + + /* Do not allow clobbering of ld.[sa] address in case some of the + original operations need a check. */ + if (needs_spec_check_p) + IOR_REG_SET (used_regs, VINSN_REG_USES (vinsn)); + } + + return true; +} + + +/* Functions to choose the best insn from available ones. */ + +/* Adjusts the priority for EXPR using the backend *_adjust_priority hook. */ +static int +sel_target_adjust_priority (expr_t expr) +{ + int priority = EXPR_PRIORITY (expr); + int new_priority; + + if (targetm.sched.adjust_priority) + new_priority = targetm.sched.adjust_priority (EXPR_INSN_RTX (expr), priority); + else + new_priority = priority; + + /* If the priority has changed, adjust EXPR_PRIORITY_ADJ accordingly. */ + EXPR_PRIORITY_ADJ (expr) = new_priority - EXPR_PRIORITY (expr); + + gcc_assert (EXPR_PRIORITY_ADJ (expr) >= 0); + + if (sched_verbose >= 4) + sel_print ("sel_target_adjust_priority: insn %d, %d+%d = %d.\n", + INSN_UID (EXPR_INSN_RTX (expr)), EXPR_PRIORITY (expr), + EXPR_PRIORITY_ADJ (expr), new_priority); + + return new_priority; +} + +/* Rank two available exprs for schedule. Never return 0 here. */ +static int +sel_rank_for_schedule (const void *x, const void *y) +{ + expr_t tmp = *(const expr_t *) y; + expr_t tmp2 = *(const expr_t *) x; + insn_t tmp_insn, tmp2_insn; + vinsn_t tmp_vinsn, tmp2_vinsn; + int val; + + tmp_vinsn = EXPR_VINSN (tmp); + tmp2_vinsn = EXPR_VINSN (tmp2); + tmp_insn = EXPR_INSN_RTX (tmp); + tmp2_insn = EXPR_INSN_RTX (tmp2); + + /* Schedule debug insns as early as possible. */ + if (DEBUG_INSN_P (tmp_insn) && !DEBUG_INSN_P (tmp2_insn)) + return -1; + else if (DEBUG_INSN_P (tmp2_insn)) + return 1; + + /* Prefer SCHED_GROUP_P insns to any others. */ + if (SCHED_GROUP_P (tmp_insn) != SCHED_GROUP_P (tmp2_insn)) + { + if (VINSN_UNIQUE_P (tmp_vinsn) && VINSN_UNIQUE_P (tmp2_vinsn)) + return SCHED_GROUP_P (tmp2_insn) ? 1 : -1; + + /* Now uniqueness means SCHED_GROUP_P is set, because schedule groups + cannot be cloned. */ + if (VINSN_UNIQUE_P (tmp2_vinsn)) + return 1; + return -1; + } + + /* Discourage scheduling of speculative checks. */ + val = (sel_insn_is_speculation_check (tmp_insn) + - sel_insn_is_speculation_check (tmp2_insn)); + if (val) + return val; + + /* Prefer not scheduled insn over scheduled one. */ + if (EXPR_SCHED_TIMES (tmp) > 0 || EXPR_SCHED_TIMES (tmp2) > 0) + { + val = EXPR_SCHED_TIMES (tmp) - EXPR_SCHED_TIMES (tmp2); + if (val) + return val; + } + + /* Prefer jump over non-jump instruction. */ + if (control_flow_insn_p (tmp_insn) && !control_flow_insn_p (tmp2_insn)) + return -1; + else if (control_flow_insn_p (tmp2_insn) && !control_flow_insn_p (tmp_insn)) + return 1; + + /* Prefer an expr with greater priority. */ + if (EXPR_USEFULNESS (tmp) != 0 && EXPR_USEFULNESS (tmp2) != 0) + { + int p2 = EXPR_PRIORITY (tmp2) + EXPR_PRIORITY_ADJ (tmp2), + p1 = EXPR_PRIORITY (tmp) + EXPR_PRIORITY_ADJ (tmp); + + val = p2 * EXPR_USEFULNESS (tmp2) - p1 * EXPR_USEFULNESS (tmp); + } + else + val = EXPR_PRIORITY (tmp2) - EXPR_PRIORITY (tmp) + + EXPR_PRIORITY_ADJ (tmp2) - EXPR_PRIORITY_ADJ (tmp); + if (val) + return val; + + if (spec_info != NULL && spec_info->mask != 0) + /* This code was taken from haifa-sched.c: rank_for_schedule (). */ + { + ds_t ds1, ds2; + dw_t dw1, dw2; + int dw; + + ds1 = EXPR_SPEC_DONE_DS (tmp); + if (ds1) + dw1 = ds_weak (ds1); + else + dw1 = NO_DEP_WEAK; + + ds2 = EXPR_SPEC_DONE_DS (tmp2); + if (ds2) + dw2 = ds_weak (ds2); + else + dw2 = NO_DEP_WEAK; + + dw = dw2 - dw1; + if (dw > (NO_DEP_WEAK / 8) || dw < -(NO_DEP_WEAK / 8)) + return dw; + } + + /* Prefer an old insn to a bookkeeping insn. */ + if (INSN_UID (tmp_insn) < first_emitted_uid + && INSN_UID (tmp2_insn) >= first_emitted_uid) + return -1; + if (INSN_UID (tmp_insn) >= first_emitted_uid + && INSN_UID (tmp2_insn) < first_emitted_uid) + return 1; + + /* Prefer an insn with smaller UID, as a last resort. + We can't safely use INSN_LUID as it is defined only for those insns + that are in the stream. */ + return INSN_UID (tmp_insn) - INSN_UID (tmp2_insn); +} + +/* Filter out expressions from av set pointed to by AV_PTR + that are pipelined too many times. */ +static void +process_pipelined_exprs (av_set_t *av_ptr) +{ + expr_t expr; + av_set_iterator si; + + /* Don't pipeline already pipelined code as that would increase + number of unnecessary register moves. */ + FOR_EACH_EXPR_1 (expr, si, av_ptr) + { + if (EXPR_SCHED_TIMES (expr) + >= PARAM_VALUE (PARAM_SELSCHED_MAX_SCHED_TIMES)) + av_set_iter_remove (&si); + } +} + +/* Filter speculative insns from AV_PTR if we don't want them. */ +static void +process_spec_exprs (av_set_t *av_ptr) +{ + bool try_data_p = true; + bool try_control_p = true; + expr_t expr; + av_set_iterator si; + + if (spec_info == NULL) + return; + + /* Scan *AV_PTR to find out if we want to consider speculative + instructions for scheduling. */ + FOR_EACH_EXPR_1 (expr, si, av_ptr) + { + ds_t ds; + + ds = EXPR_SPEC_DONE_DS (expr); + + /* The probability of a success is too low - don't speculate. */ + if ((ds & SPECULATIVE) + && (ds_weak (ds) < spec_info->data_weakness_cutoff + || EXPR_USEFULNESS (expr) < spec_info->control_weakness_cutoff + || (pipelining_p && false + && (ds & DATA_SPEC) + && (ds & CONTROL_SPEC)))) + { + av_set_iter_remove (&si); + continue; + } + + if ((spec_info->flags & PREFER_NON_DATA_SPEC) + && !(ds & BEGIN_DATA)) + try_data_p = false; + + if ((spec_info->flags & PREFER_NON_CONTROL_SPEC) + && !(ds & BEGIN_CONTROL)) + try_control_p = false; + } + + FOR_EACH_EXPR_1 (expr, si, av_ptr) + { + ds_t ds; + + ds = EXPR_SPEC_DONE_DS (expr); + + if (ds & SPECULATIVE) + { + if ((ds & BEGIN_DATA) && !try_data_p) + /* We don't want any data speculative instructions right + now. */ + av_set_iter_remove (&si); + + if ((ds & BEGIN_CONTROL) && !try_control_p) + /* We don't want any control speculative instructions right + now. */ + av_set_iter_remove (&si); + } + } +} + +/* Search for any use-like insns in AV_PTR and decide on scheduling + them. Return one when found, and NULL otherwise. + Note that we check here whether a USE could be scheduled to avoid + an infinite loop later. */ +static expr_t +process_use_exprs (av_set_t *av_ptr) +{ + expr_t expr; + av_set_iterator si; + bool uses_present_p = false; + bool try_uses_p = true; + + FOR_EACH_EXPR_1 (expr, si, av_ptr) + { + /* This will also initialize INSN_CODE for later use. */ + if (recog_memoized (EXPR_INSN_RTX (expr)) < 0) + { + /* If we have a USE in *AV_PTR that was not scheduled yet, + do so because it will do good only. */ + if (EXPR_SCHED_TIMES (expr) <= 0) + { + if (EXPR_TARGET_AVAILABLE (expr) == 1) + return expr; + + av_set_iter_remove (&si); + } + else + { + gcc_assert (pipelining_p); + + uses_present_p = true; + } + } + else + try_uses_p = false; + } + + if (uses_present_p) + { + /* If we don't want to schedule any USEs right now and we have some + in *AV_PTR, remove them, else just return the first one found. */ + if (!try_uses_p) + { + FOR_EACH_EXPR_1 (expr, si, av_ptr) + if (INSN_CODE (EXPR_INSN_RTX (expr)) < 0) + av_set_iter_remove (&si); + } + else + { + FOR_EACH_EXPR_1 (expr, si, av_ptr) + { + gcc_assert (INSN_CODE (EXPR_INSN_RTX (expr)) < 0); + + if (EXPR_TARGET_AVAILABLE (expr) == 1) + return expr; + + av_set_iter_remove (&si); + } + } + } + + return NULL; +} + +/* Lookup EXPR in VINSN_VEC and return TRUE if found. Also check patterns from + EXPR's history of changes. */ +static bool +vinsn_vec_has_expr_p (vinsn_vec_t vinsn_vec, expr_t expr) +{ + vinsn_t vinsn, expr_vinsn; + int n; + unsigned i; + + /* Start with checking expr itself and then proceed with all the old forms + of expr taken from its history vector. */ + for (i = 0, expr_vinsn = EXPR_VINSN (expr); + expr_vinsn; + expr_vinsn = (i < VEC_length (expr_history_def, + EXPR_HISTORY_OF_CHANGES (expr)) + ? VEC_index (expr_history_def, + EXPR_HISTORY_OF_CHANGES (expr), + i++)->old_expr_vinsn + : NULL)) + FOR_EACH_VEC_ELT (vinsn_t, vinsn_vec, n, vinsn) + if (VINSN_SEPARABLE_P (vinsn)) + { + if (vinsn_equal_p (vinsn, expr_vinsn)) + return true; + } + else + { + /* For non-separable instructions, the blocking insn can have + another pattern due to substitution, and we can't choose + different register as in the above case. Check all registers + being written instead. */ + if (bitmap_intersect_p (VINSN_REG_SETS (vinsn), + VINSN_REG_SETS (expr_vinsn))) + return true; + } + + return false; +} + +#ifdef ENABLE_CHECKING +/* Return true if either of expressions from ORIG_OPS can be blocked + by previously created bookkeeping code. STATIC_PARAMS points to static + parameters of move_op. */ +static bool +av_set_could_be_blocked_by_bookkeeping_p (av_set_t orig_ops, void *static_params) +{ + expr_t expr; + av_set_iterator iter; + moveop_static_params_p sparams; + + /* This checks that expressions in ORIG_OPS are not blocked by bookkeeping + created while scheduling on another fence. */ + FOR_EACH_EXPR (expr, iter, orig_ops) + if (vinsn_vec_has_expr_p (vec_bookkeeping_blocked_vinsns, expr)) + return true; + + gcc_assert (code_motion_path_driver_info == &move_op_hooks); + sparams = (moveop_static_params_p) static_params; + + /* Expressions can be also blocked by bookkeeping created during current + move_op. */ + if (bitmap_bit_p (current_copies, INSN_UID (sparams->failed_insn))) + FOR_EACH_EXPR (expr, iter, orig_ops) + if (moveup_expr_cached (expr, sparams->failed_insn, false) != MOVEUP_EXPR_NULL) + return true; + + /* Expressions in ORIG_OPS may have wrong destination register due to + renaming. Check with the right register instead. */ + if (sparams->dest && REG_P (sparams->dest)) + { + unsigned regno = REGNO (sparams->dest); + vinsn_t failed_vinsn = INSN_VINSN (sparams->failed_insn); + + if (bitmap_bit_p (VINSN_REG_SETS (failed_vinsn), regno) + || bitmap_bit_p (VINSN_REG_USES (failed_vinsn), regno) + || bitmap_bit_p (VINSN_REG_CLOBBERS (failed_vinsn), regno)) + return true; + } + + return false; +} +#endif + +/* Clear VINSN_VEC and detach vinsns. */ +static void +vinsn_vec_clear (vinsn_vec_t *vinsn_vec) +{ + unsigned len = VEC_length (vinsn_t, *vinsn_vec); + if (len > 0) + { + vinsn_t vinsn; + int n; + + FOR_EACH_VEC_ELT (vinsn_t, *vinsn_vec, n, vinsn) + vinsn_detach (vinsn); + VEC_block_remove (vinsn_t, *vinsn_vec, 0, len); + } +} + +/* Add the vinsn of EXPR to the VINSN_VEC. */ +static void +vinsn_vec_add (vinsn_vec_t *vinsn_vec, expr_t expr) +{ + vinsn_attach (EXPR_VINSN (expr)); + VEC_safe_push (vinsn_t, heap, *vinsn_vec, EXPR_VINSN (expr)); +} + +/* Free the vector representing blocked expressions. */ +static void +vinsn_vec_free (vinsn_vec_t *vinsn_vec) +{ + if (*vinsn_vec) + VEC_free (vinsn_t, heap, *vinsn_vec); +} + +/* Increase EXPR_PRIORITY_ADJ for INSN by AMOUNT. */ + +void sel_add_to_insn_priority (rtx insn, int amount) +{ + EXPR_PRIORITY_ADJ (INSN_EXPR (insn)) += amount; + + if (sched_verbose >= 2) + sel_print ("sel_add_to_insn_priority: insn %d, by %d (now %d+%d).\n", + INSN_UID (insn), amount, EXPR_PRIORITY (INSN_EXPR (insn)), + EXPR_PRIORITY_ADJ (INSN_EXPR (insn))); +} + +/* Turn AV into a vector, filter inappropriate insns and sort it. Return + true if there is something to schedule. BNDS and FENCE are current + boundaries and fence, respectively. If we need to stall for some cycles + before an expr from AV would become available, write this number to + *PNEED_STALL. */ +static bool +fill_vec_av_set (av_set_t av, blist_t bnds, fence_t fence, + int *pneed_stall) +{ + av_set_iterator si; + expr_t expr; + int sched_next_worked = 0, stalled, n; + static int av_max_prio, est_ticks_till_branch; + int min_need_stall = -1; + deps_t dc = BND_DC (BLIST_BND (bnds)); + + /* Bail out early when the ready list contained only USEs/CLOBBERs that are + already scheduled. */ + if (av == NULL) + return false; + + /* Empty vector from the previous stuff. */ + if (VEC_length (expr_t, vec_av_set) > 0) + VEC_block_remove (expr_t, vec_av_set, 0, VEC_length (expr_t, vec_av_set)); + + /* Turn the set into a vector for sorting and call sel_target_adjust_priority + for each insn. */ + gcc_assert (VEC_empty (expr_t, vec_av_set)); + FOR_EACH_EXPR (expr, si, av) + { + VEC_safe_push (expr_t, heap, vec_av_set, expr); + + gcc_assert (EXPR_PRIORITY_ADJ (expr) == 0 || *pneed_stall); + + /* Adjust priority using target backend hook. */ + sel_target_adjust_priority (expr); + } + + /* Sort the vector. */ + VEC_qsort (expr_t, vec_av_set, sel_rank_for_schedule); + + /* We record maximal priority of insns in av set for current instruction + group. */ + if (FENCE_STARTS_CYCLE_P (fence)) + av_max_prio = est_ticks_till_branch = INT_MIN; + + /* Filter out inappropriate expressions. Loop's direction is reversed to + visit "best" instructions first. We assume that VEC_unordered_remove + moves last element in place of one being deleted. */ + for (n = VEC_length (expr_t, vec_av_set) - 1, stalled = 0; n >= 0; n--) + { + expr_t expr = VEC_index (expr_t, vec_av_set, n); + insn_t insn = EXPR_INSN_RTX (expr); + signed char target_available; + bool is_orig_reg_p = true; + int need_cycles, new_prio; + + /* Don't allow any insns other than from SCHED_GROUP if we have one. */ + if (FENCE_SCHED_NEXT (fence) && insn != FENCE_SCHED_NEXT (fence)) + { + VEC_unordered_remove (expr_t, vec_av_set, n); + continue; + } + + /* Set number of sched_next insns (just in case there + could be several). */ + if (FENCE_SCHED_NEXT (fence)) + sched_next_worked++; + + /* Check all liveness requirements and try renaming. + FIXME: try to minimize calls to this. */ + target_available = EXPR_TARGET_AVAILABLE (expr); + + /* If insn was already scheduled on the current fence, + set TARGET_AVAILABLE to -1 no matter what expr's attribute says. */ + if (vinsn_vec_has_expr_p (vec_target_unavailable_vinsns, expr)) + target_available = -1; + + /* If the availability of the EXPR is invalidated by the insertion of + bookkeeping earlier, make sure that we won't choose this expr for + scheduling if it's not separable, and if it is separable, then + we have to recompute the set of available registers for it. */ + if (vinsn_vec_has_expr_p (vec_bookkeeping_blocked_vinsns, expr)) + { + VEC_unordered_remove (expr_t, vec_av_set, n); + if (sched_verbose >= 4) + sel_print ("Expr %d is blocked by bookkeeping inserted earlier\n", + INSN_UID (insn)); + continue; + } + + if (target_available == true) + { + /* Do nothing -- we can use an existing register. */ + is_orig_reg_p = EXPR_SEPARABLE_P (expr); + } + else if (/* Non-separable instruction will never + get another register. */ + (target_available == false + && !EXPR_SEPARABLE_P (expr)) + /* Don't try to find a register for low-priority expression. */ + || (int) VEC_length (expr_t, vec_av_set) - 1 - n >= max_insns_to_rename + /* ??? FIXME: Don't try to rename data speculation. */ + || (EXPR_SPEC_DONE_DS (expr) & BEGIN_DATA) + || ! find_best_reg_for_expr (expr, bnds, &is_orig_reg_p)) + { + VEC_unordered_remove (expr_t, vec_av_set, n); + if (sched_verbose >= 4) + sel_print ("Expr %d has no suitable target register\n", + INSN_UID (insn)); + continue; + } + + /* Filter expressions that need to be renamed or speculated when + pipelining, because compensating register copies or speculation + checks are likely to be placed near the beginning of the loop, + causing a stall. */ + if (pipelining_p && EXPR_ORIG_SCHED_CYCLE (expr) > 0 + && (!is_orig_reg_p || EXPR_SPEC_DONE_DS (expr) != 0)) + { + /* Estimation of number of cycles until loop branch for + renaming/speculation to be successful. */ + int need_n_ticks_till_branch = sel_vinsn_cost (EXPR_VINSN (expr)); + + if ((int) current_loop_nest->ninsns < 9) + { + VEC_unordered_remove (expr_t, vec_av_set, n); + if (sched_verbose >= 4) + sel_print ("Pipelining expr %d will likely cause stall\n", + INSN_UID (insn)); + continue; + } + + if ((int) current_loop_nest->ninsns - num_insns_scheduled + < need_n_ticks_till_branch * issue_rate / 2 + && est_ticks_till_branch < need_n_ticks_till_branch) + { + VEC_unordered_remove (expr_t, vec_av_set, n); + if (sched_verbose >= 4) + sel_print ("Pipelining expr %d will likely cause stall\n", + INSN_UID (insn)); + continue; + } + } + + /* We want to schedule speculation checks as late as possible. Discard + them from av set if there are instructions with higher priority. */ + if (sel_insn_is_speculation_check (insn) + && EXPR_PRIORITY (expr) < av_max_prio) + { + stalled++; + min_need_stall = min_need_stall < 0 ? 1 : MIN (min_need_stall, 1); + VEC_unordered_remove (expr_t, vec_av_set, n); + if (sched_verbose >= 4) + sel_print ("Delaying speculation check %d until its first use\n", + INSN_UID (insn)); + continue; + } + + /* Ignore EXPRs available from pipelining to update AV_MAX_PRIO. */ + if (EXPR_ORIG_SCHED_CYCLE (expr) <= 0) + av_max_prio = MAX (av_max_prio, EXPR_PRIORITY (expr)); + + /* Don't allow any insns whose data is not yet ready. + Check first whether we've already tried them and failed. */ + if (INSN_UID (insn) < FENCE_READY_TICKS_SIZE (fence)) + { + need_cycles = (FENCE_READY_TICKS (fence)[INSN_UID (insn)] + - FENCE_CYCLE (fence)); + if (EXPR_ORIG_SCHED_CYCLE (expr) <= 0) + est_ticks_till_branch = MAX (est_ticks_till_branch, + EXPR_PRIORITY (expr) + need_cycles); + + if (need_cycles > 0) + { + stalled++; + min_need_stall = (min_need_stall < 0 + ? need_cycles + : MIN (min_need_stall, need_cycles)); + VEC_unordered_remove (expr_t, vec_av_set, n); + + if (sched_verbose >= 4) + sel_print ("Expr %d is not ready until cycle %d (cached)\n", + INSN_UID (insn), + FENCE_READY_TICKS (fence)[INSN_UID (insn)]); + continue; + } + } + + /* Now resort to dependence analysis to find whether EXPR might be + stalled due to dependencies from FENCE's context. */ + need_cycles = tick_check_p (expr, dc, fence); + new_prio = EXPR_PRIORITY (expr) + EXPR_PRIORITY_ADJ (expr) + need_cycles; + + if (EXPR_ORIG_SCHED_CYCLE (expr) <= 0) + est_ticks_till_branch = MAX (est_ticks_till_branch, + new_prio); + + if (need_cycles > 0) + { + if (INSN_UID (insn) >= FENCE_READY_TICKS_SIZE (fence)) + { + int new_size = INSN_UID (insn) * 3 / 2; + + FENCE_READY_TICKS (fence) + = (int *) xrecalloc (FENCE_READY_TICKS (fence), + new_size, FENCE_READY_TICKS_SIZE (fence), + sizeof (int)); + } + FENCE_READY_TICKS (fence)[INSN_UID (insn)] + = FENCE_CYCLE (fence) + need_cycles; + + stalled++; + min_need_stall = (min_need_stall < 0 + ? need_cycles + : MIN (min_need_stall, need_cycles)); + + VEC_unordered_remove (expr_t, vec_av_set, n); + + if (sched_verbose >= 4) + sel_print ("Expr %d is not ready yet until cycle %d\n", + INSN_UID (insn), + FENCE_READY_TICKS (fence)[INSN_UID (insn)]); + continue; + } + + if (sched_verbose >= 4) + sel_print ("Expr %d is ok\n", INSN_UID (insn)); + min_need_stall = 0; + } + + /* Clear SCHED_NEXT. */ + if (FENCE_SCHED_NEXT (fence)) + { + gcc_assert (sched_next_worked == 1); + FENCE_SCHED_NEXT (fence) = NULL_RTX; + } + + /* No need to stall if this variable was not initialized. */ + if (min_need_stall < 0) + min_need_stall = 0; + + if (VEC_empty (expr_t, vec_av_set)) + { + /* We need to set *pneed_stall here, because later we skip this code + when ready list is empty. */ + *pneed_stall = min_need_stall; + return false; + } + else + gcc_assert (min_need_stall == 0); + + /* Sort the vector. */ + VEC_qsort (expr_t, vec_av_set, sel_rank_for_schedule); + + if (sched_verbose >= 4) + { + sel_print ("Total ready exprs: %d, stalled: %d\n", + VEC_length (expr_t, vec_av_set), stalled); + sel_print ("Sorted av set (%d): ", VEC_length (expr_t, vec_av_set)); + FOR_EACH_VEC_ELT (expr_t, vec_av_set, n, expr) + dump_expr (expr); + sel_print ("\n"); + } + + *pneed_stall = 0; + return true; +} + +/* Convert a vectored and sorted av set to the ready list that + the rest of the backend wants to see. */ +static void +convert_vec_av_set_to_ready (void) +{ + int n; + expr_t expr; + + /* Allocate and fill the ready list from the sorted vector. */ + ready.n_ready = VEC_length (expr_t, vec_av_set); + ready.first = ready.n_ready - 1; + + gcc_assert (ready.n_ready > 0); + + if (ready.n_ready > max_issue_size) + { + max_issue_size = ready.n_ready; + sched_extend_ready_list (ready.n_ready); + } + + FOR_EACH_VEC_ELT (expr_t, vec_av_set, n, expr) + { + vinsn_t vi = EXPR_VINSN (expr); + insn_t insn = VINSN_INSN_RTX (vi); + + ready_try[n] = 0; + ready.vec[n] = insn; + } +} + +/* Initialize ready list from *AV_PTR for the max_issue () call. + If any unrecognizable insn found in *AV_PTR, return it (and skip + max_issue). BND and FENCE are current boundary and fence, + respectively. If we need to stall for some cycles before an expr + from *AV_PTR would become available, write this number to *PNEED_STALL. */ +static expr_t +fill_ready_list (av_set_t *av_ptr, blist_t bnds, fence_t fence, + int *pneed_stall) +{ + expr_t expr; + + /* We do not support multiple boundaries per fence. */ + gcc_assert (BLIST_NEXT (bnds) == NULL); + + /* Process expressions required special handling, i.e. pipelined, + speculative and recog() < 0 expressions first. */ + process_pipelined_exprs (av_ptr); + process_spec_exprs (av_ptr); + + /* A USE could be scheduled immediately. */ + expr = process_use_exprs (av_ptr); + if (expr) + { + *pneed_stall = 0; + return expr; + } + + /* Turn the av set to a vector for sorting. */ + if (! fill_vec_av_set (*av_ptr, bnds, fence, pneed_stall)) + { + ready.n_ready = 0; + return NULL; + } + + /* Build the final ready list. */ + convert_vec_av_set_to_ready (); + return NULL; +} + +/* Wrapper for dfa_new_cycle (). Returns TRUE if cycle was advanced. */ +static bool +sel_dfa_new_cycle (insn_t insn, fence_t fence) +{ + int last_scheduled_cycle = FENCE_LAST_SCHEDULED_INSN (fence) + ? INSN_SCHED_CYCLE (FENCE_LAST_SCHEDULED_INSN (fence)) + : FENCE_CYCLE (fence) - 1; + bool res = false; + int sort_p = 0; + + if (!targetm.sched.dfa_new_cycle) + return false; + + memcpy (curr_state, FENCE_STATE (fence), dfa_state_size); + + while (!sort_p && targetm.sched.dfa_new_cycle (sched_dump, sched_verbose, + insn, last_scheduled_cycle, + FENCE_CYCLE (fence), &sort_p)) + { + memcpy (FENCE_STATE (fence), curr_state, dfa_state_size); + advance_one_cycle (fence); + memcpy (curr_state, FENCE_STATE (fence), dfa_state_size); + res = true; + } + + return res; +} + +/* Invoke reorder* target hooks on the ready list. Return the number of insns + we can issue. FENCE is the current fence. */ +static int +invoke_reorder_hooks (fence_t fence) +{ + int issue_more; + bool ran_hook = false; + + /* Call the reorder hook at the beginning of the cycle, and call + the reorder2 hook in the middle of the cycle. */ + if (FENCE_ISSUED_INSNS (fence) == 0) + { + if (targetm.sched.reorder + && !SCHED_GROUP_P (ready_element (&ready, 0)) + && ready.n_ready > 1) + { + /* Don't give reorder the most prioritized insn as it can break + pipelining. */ + if (pipelining_p) + --ready.n_ready; + + issue_more + = targetm.sched.reorder (sched_dump, sched_verbose, + ready_lastpos (&ready), + &ready.n_ready, FENCE_CYCLE (fence)); + + if (pipelining_p) + ++ready.n_ready; + + ran_hook = true; + } + else + /* Initialize can_issue_more for variable_issue. */ + issue_more = issue_rate; + } + else if (targetm.sched.reorder2 + && !SCHED_GROUP_P (ready_element (&ready, 0))) + { + if (ready.n_ready == 1) + issue_more = + targetm.sched.reorder2 (sched_dump, sched_verbose, + ready_lastpos (&ready), + &ready.n_ready, FENCE_CYCLE (fence)); + else + { + if (pipelining_p) + --ready.n_ready; + + issue_more = + targetm.sched.reorder2 (sched_dump, sched_verbose, + ready.n_ready + ? ready_lastpos (&ready) : NULL, + &ready.n_ready, FENCE_CYCLE (fence)); + + if (pipelining_p) + ++ready.n_ready; + } + + ran_hook = true; + } + else + issue_more = FENCE_ISSUE_MORE (fence); + + /* Ensure that ready list and vec_av_set are in line with each other, + i.e. vec_av_set[i] == ready_element (&ready, i). */ + if (issue_more && ran_hook) + { + int i, j, n; + rtx *arr = ready.vec; + expr_t *vec = VEC_address (expr_t, vec_av_set); + + for (i = 0, n = ready.n_ready; i < n; i++) + if (EXPR_INSN_RTX (vec[i]) != arr[i]) + { + expr_t tmp; + + for (j = i; j < n; j++) + if (EXPR_INSN_RTX (vec[j]) == arr[i]) + break; + gcc_assert (j < n); + + tmp = vec[i]; + vec[i] = vec[j]; + vec[j] = tmp; + } + } + + return issue_more; +} + +/* Return an EXPR correponding to INDEX element of ready list, if + FOLLOW_READY_ELEMENT is true (i.e., an expr of + ready_element (&ready, INDEX) will be returned), and to INDEX element of + ready.vec otherwise. */ +static inline expr_t +find_expr_for_ready (int index, bool follow_ready_element) +{ + expr_t expr; + int real_index; + + real_index = follow_ready_element ? ready.first - index : index; + + expr = VEC_index (expr_t, vec_av_set, real_index); + gcc_assert (ready.vec[real_index] == EXPR_INSN_RTX (expr)); + + return expr; +} + +/* Calculate insns worth trying via lookahead_guard hook. Return a number + of such insns found. */ +static int +invoke_dfa_lookahead_guard (void) +{ + int i, n; + bool have_hook + = targetm.sched.first_cycle_multipass_dfa_lookahead_guard != NULL; + + if (sched_verbose >= 2) + sel_print ("ready after reorder: "); + + for (i = 0, n = 0; i < ready.n_ready; i++) + { + expr_t expr; + insn_t insn; + int r; + + /* In this loop insn is Ith element of the ready list given by + ready_element, not Ith element of ready.vec. */ + insn = ready_element (&ready, i); + + if (! have_hook || i == 0) + r = 0; + else + r = !targetm.sched.first_cycle_multipass_dfa_lookahead_guard (insn); + + gcc_assert (INSN_CODE (insn) >= 0); + + /* Only insns with ready_try = 0 can get here + from fill_ready_list. */ + gcc_assert (ready_try [i] == 0); + ready_try[i] = r; + if (!r) + n++; + + expr = find_expr_for_ready (i, true); + + if (sched_verbose >= 2) + { + dump_vinsn (EXPR_VINSN (expr)); + sel_print (":%d; ", ready_try[i]); + } + } + + if (sched_verbose >= 2) + sel_print ("\n"); + return n; +} + +/* Calculate the number of privileged insns and return it. */ +static int +calculate_privileged_insns (void) +{ + expr_t cur_expr, min_spec_expr = NULL; + int privileged_n = 0, i; + + for (i = 0; i < ready.n_ready; i++) + { + if (ready_try[i]) + continue; + + if (! min_spec_expr) + min_spec_expr = find_expr_for_ready (i, true); + + cur_expr = find_expr_for_ready (i, true); + + if (EXPR_SPEC (cur_expr) > EXPR_SPEC (min_spec_expr)) + break; + + ++privileged_n; + } + + if (i == ready.n_ready) + privileged_n = 0; + + if (sched_verbose >= 2) + sel_print ("privileged_n: %d insns with SPEC %d\n", + privileged_n, privileged_n ? EXPR_SPEC (min_spec_expr) : -1); + return privileged_n; +} + +/* Call the rest of the hooks after the choice was made. Return + the number of insns that still can be issued given that the current + number is ISSUE_MORE. FENCE and BEST_INSN are the current fence + and the insn chosen for scheduling, respectively. */ +static int +invoke_aftermath_hooks (fence_t fence, rtx best_insn, int issue_more) +{ + gcc_assert (INSN_P (best_insn)); + + /* First, call dfa_new_cycle, and then variable_issue, if available. */ + sel_dfa_new_cycle (best_insn, fence); + + if (targetm.sched.variable_issue) + { + memcpy (curr_state, FENCE_STATE (fence), dfa_state_size); + issue_more = + targetm.sched.variable_issue (sched_dump, sched_verbose, best_insn, + issue_more); + memcpy (FENCE_STATE (fence), curr_state, dfa_state_size); + } + else if (GET_CODE (PATTERN (best_insn)) != USE + && GET_CODE (PATTERN (best_insn)) != CLOBBER) + issue_more--; + + return issue_more; +} + +/* Estimate the cost of issuing INSN on DFA state STATE. */ +static int +estimate_insn_cost (rtx insn, state_t state) +{ + static state_t temp = NULL; + int cost; + + if (!temp) + temp = xmalloc (dfa_state_size); + + memcpy (temp, state, dfa_state_size); + cost = state_transition (temp, insn); + + if (cost < 0) + return 0; + else if (cost == 0) + return 1; + return cost; +} + +/* Return the cost of issuing EXPR on the FENCE as estimated by DFA. + This function properly handles ASMs, USEs etc. */ +static int +get_expr_cost (expr_t expr, fence_t fence) +{ + rtx insn = EXPR_INSN_RTX (expr); + + if (recog_memoized (insn) < 0) + { + if (!FENCE_STARTS_CYCLE_P (fence) + && INSN_ASM_P (insn)) + /* This is asm insn which is tryed to be issued on the + cycle not first. Issue it on the next cycle. */ + return 1; + else + /* A USE insn, or something else we don't need to + understand. We can't pass these directly to + state_transition because it will trigger a + fatal error for unrecognizable insns. */ + return 0; + } + else + return estimate_insn_cost (insn, FENCE_STATE (fence)); +} + +/* Find the best insn for scheduling, either via max_issue or just take + the most prioritized available. */ +static int +choose_best_insn (fence_t fence, int privileged_n, int *index) +{ + int can_issue = 0; + + if (dfa_lookahead > 0) + { + cycle_issued_insns = FENCE_ISSUED_INSNS (fence); + /* TODO: pass equivalent of first_cycle_insn_p to max_issue (). */ + can_issue = max_issue (&ready, privileged_n, + FENCE_STATE (fence), true, index); + if (sched_verbose >= 2) + sel_print ("max_issue: we can issue %d insns, already did %d insns\n", + can_issue, FENCE_ISSUED_INSNS (fence)); + } + else + { + /* We can't use max_issue; just return the first available element. */ + int i; + + for (i = 0; i < ready.n_ready; i++) + { + expr_t expr = find_expr_for_ready (i, true); + + if (get_expr_cost (expr, fence) < 1) + { + can_issue = can_issue_more; + *index = i; + + if (sched_verbose >= 2) + sel_print ("using %dth insn from the ready list\n", i + 1); + + break; + } + } + + if (i == ready.n_ready) + { + can_issue = 0; + *index = -1; + } + } + + return can_issue; +} + +/* Choose the best expr from *AV_VLIW_PTR and a suitable register for it. + BNDS and FENCE are current boundaries and scheduling fence respectively. + Return the expr found and NULL if nothing can be issued atm. + Write to PNEED_STALL the number of cycles to stall if no expr was found. */ +static expr_t +find_best_expr (av_set_t *av_vliw_ptr, blist_t bnds, fence_t fence, + int *pneed_stall) +{ + expr_t best; + + /* Choose the best insn for scheduling via: + 1) sorting the ready list based on priority; + 2) calling the reorder hook; + 3) calling max_issue. */ + best = fill_ready_list (av_vliw_ptr, bnds, fence, pneed_stall); + if (best == NULL && ready.n_ready > 0) + { + int privileged_n, index; + + can_issue_more = invoke_reorder_hooks (fence); + if (can_issue_more > 0) + { + /* Try choosing the best insn until we find one that is could be + scheduled due to liveness restrictions on its destination register. + In the future, we'd like to choose once and then just probe insns + in the order of their priority. */ + invoke_dfa_lookahead_guard (); + privileged_n = calculate_privileged_insns (); + can_issue_more = choose_best_insn (fence, privileged_n, &index); + if (can_issue_more) + best = find_expr_for_ready (index, true); + } + /* We had some available insns, so if we can't issue them, + we have a stall. */ + if (can_issue_more == 0) + { + best = NULL; + *pneed_stall = 1; + } + } + + if (best != NULL) + { + can_issue_more = invoke_aftermath_hooks (fence, EXPR_INSN_RTX (best), + can_issue_more); + if (targetm.sched.variable_issue + && can_issue_more == 0) + *pneed_stall = 1; + } + + if (sched_verbose >= 2) + { + if (best != NULL) + { + sel_print ("Best expression (vliw form): "); + dump_expr (best); + sel_print ("; cycle %d\n", FENCE_CYCLE (fence)); + } + else + sel_print ("No best expr found!\n"); + } + + return best; +} + + +/* Functions that implement the core of the scheduler. */ + + +/* Emit an instruction from EXPR with SEQNO and VINSN after + PLACE_TO_INSERT. */ +static insn_t +emit_insn_from_expr_after (expr_t expr, vinsn_t vinsn, int seqno, + insn_t place_to_insert) +{ + /* This assert fails when we have identical instructions + one of which dominates the other. In this case move_op () + finds the first instruction and doesn't search for second one. + The solution would be to compute av_set after the first found + insn and, if insn present in that set, continue searching. + For now we workaround this issue in move_op. */ + gcc_assert (!INSN_IN_STREAM_P (EXPR_INSN_RTX (expr))); + + if (EXPR_WAS_RENAMED (expr)) + { + unsigned regno = expr_dest_regno (expr); + + if (HARD_REGISTER_NUM_P (regno)) + { + df_set_regs_ever_live (regno, true); + reg_rename_tick[regno] = ++reg_rename_this_tick; + } + } + + return sel_gen_insn_from_expr_after (expr, vinsn, seqno, + place_to_insert); +} + +/* Return TRUE if BB can hold bookkeeping code. */ +static bool +block_valid_for_bookkeeping_p (basic_block bb) +{ + insn_t bb_end = BB_END (bb); + + if (!in_current_region_p (bb) || EDGE_COUNT (bb->succs) > 1) + return false; + + if (INSN_P (bb_end)) + { + if (INSN_SCHED_TIMES (bb_end) > 0) + return false; + } + else + gcc_assert (NOTE_INSN_BASIC_BLOCK_P (bb_end)); + + return true; +} + +/* Attempt to find a block that can hold bookkeeping code for path(s) incoming + into E2->dest, except from E1->src (there may be a sequence of empty basic + blocks between E1->src and E2->dest). Return found block, or NULL if new + one must be created. If LAX holds, don't assume there is a simple path + from E1->src to E2->dest. */ +static basic_block +find_block_for_bookkeeping (edge e1, edge e2, bool lax) +{ + basic_block candidate_block = NULL; + edge e; + + /* Loop over edges from E1 to E2, inclusive. */ + for (e = e1; !lax || e->dest != EXIT_BLOCK_PTR; e = EDGE_SUCC (e->dest, 0)) + { + if (EDGE_COUNT (e->dest->preds) == 2) + { + if (candidate_block == NULL) + candidate_block = (EDGE_PRED (e->dest, 0) == e + ? EDGE_PRED (e->dest, 1)->src + : EDGE_PRED (e->dest, 0)->src); + else + /* Found additional edge leading to path from e1 to e2 + from aside. */ + return NULL; + } + else if (EDGE_COUNT (e->dest->preds) > 2) + /* Several edges leading to path from e1 to e2 from aside. */ + return NULL; + + if (e == e2) + return ((!lax || candidate_block) + && block_valid_for_bookkeeping_p (candidate_block) + ? candidate_block + : NULL); + + if (lax && EDGE_COUNT (e->dest->succs) != 1) + return NULL; + } + + if (lax) + return NULL; + + gcc_unreachable (); +} + +/* Create new basic block for bookkeeping code for path(s) incoming into + E2->dest, except from E1->src. Return created block. */ +static basic_block +create_block_for_bookkeeping (edge e1, edge e2) +{ + basic_block new_bb, bb = e2->dest; + + /* Check that we don't spoil the loop structure. */ + if (current_loop_nest) + { + basic_block latch = current_loop_nest->latch; + + /* We do not split header. */ + gcc_assert (e2->dest != current_loop_nest->header); + + /* We do not redirect the only edge to the latch block. */ + gcc_assert (e1->dest != latch + || !single_pred_p (latch) + || e1 != single_pred_edge (latch)); + } + + /* Split BB to insert BOOK_INSN there. */ + new_bb = sched_split_block (bb, NULL); + + /* Move note_list from the upper bb. */ + gcc_assert (BB_NOTE_LIST (new_bb) == NULL_RTX); + BB_NOTE_LIST (new_bb) = BB_NOTE_LIST (bb); + BB_NOTE_LIST (bb) = NULL_RTX; + + gcc_assert (e2->dest == bb); + + /* Skip block for bookkeeping copy when leaving E1->src. */ + if (e1->flags & EDGE_FALLTHRU) + sel_redirect_edge_and_branch_force (e1, new_bb); + else + sel_redirect_edge_and_branch (e1, new_bb); + + gcc_assert (e1->dest == new_bb); + gcc_assert (sel_bb_empty_p (bb)); + + /* To keep basic block numbers in sync between debug and non-debug + compilations, we have to rotate blocks here. Consider that we + started from (a,b)->d, (c,d)->e, and d contained only debug + insns. It would have been removed before if the debug insns + weren't there, so we'd have split e rather than d. So what we do + now is to swap the block numbers of new_bb and + single_succ(new_bb) == e, so that the insns that were in e before + get the new block number. */ + + if (MAY_HAVE_DEBUG_INSNS) + { + basic_block succ; + insn_t insn = sel_bb_head (new_bb); + insn_t last; + + if (DEBUG_INSN_P (insn) + && single_succ_p (new_bb) + && (succ = single_succ (new_bb)) + && succ != EXIT_BLOCK_PTR + && DEBUG_INSN_P ((last = sel_bb_end (new_bb)))) + { + while (insn != last && (DEBUG_INSN_P (insn) || NOTE_P (insn))) + insn = NEXT_INSN (insn); + + if (insn == last) + { + sel_global_bb_info_def gbi; + sel_region_bb_info_def rbi; + int i; + + if (sched_verbose >= 2) + sel_print ("Swapping block ids %i and %i\n", + new_bb->index, succ->index); + + i = new_bb->index; + new_bb->index = succ->index; + succ->index = i; + + SET_BASIC_BLOCK (new_bb->index, new_bb); + SET_BASIC_BLOCK (succ->index, succ); + + memcpy (&gbi, SEL_GLOBAL_BB_INFO (new_bb), sizeof (gbi)); + memcpy (SEL_GLOBAL_BB_INFO (new_bb), SEL_GLOBAL_BB_INFO (succ), + sizeof (gbi)); + memcpy (SEL_GLOBAL_BB_INFO (succ), &gbi, sizeof (gbi)); + + memcpy (&rbi, SEL_REGION_BB_INFO (new_bb), sizeof (rbi)); + memcpy (SEL_REGION_BB_INFO (new_bb), SEL_REGION_BB_INFO (succ), + sizeof (rbi)); + memcpy (SEL_REGION_BB_INFO (succ), &rbi, sizeof (rbi)); + + i = BLOCK_TO_BB (new_bb->index); + BLOCK_TO_BB (new_bb->index) = BLOCK_TO_BB (succ->index); + BLOCK_TO_BB (succ->index) = i; + + i = CONTAINING_RGN (new_bb->index); + CONTAINING_RGN (new_bb->index) = CONTAINING_RGN (succ->index); + CONTAINING_RGN (succ->index) = i; + + for (i = 0; i < current_nr_blocks; i++) + if (BB_TO_BLOCK (i) == succ->index) + BB_TO_BLOCK (i) = new_bb->index; + else if (BB_TO_BLOCK (i) == new_bb->index) + BB_TO_BLOCK (i) = succ->index; + + FOR_BB_INSNS (new_bb, insn) + if (INSN_P (insn)) + EXPR_ORIG_BB_INDEX (INSN_EXPR (insn)) = new_bb->index; + + FOR_BB_INSNS (succ, insn) + if (INSN_P (insn)) + EXPR_ORIG_BB_INDEX (INSN_EXPR (insn)) = succ->index; + + if (bitmap_clear_bit (code_motion_visited_blocks, new_bb->index)) + bitmap_set_bit (code_motion_visited_blocks, succ->index); + + gcc_assert (LABEL_P (BB_HEAD (new_bb)) + && LABEL_P (BB_HEAD (succ))); + + if (sched_verbose >= 4) + sel_print ("Swapping code labels %i and %i\n", + CODE_LABEL_NUMBER (BB_HEAD (new_bb)), + CODE_LABEL_NUMBER (BB_HEAD (succ))); + + i = CODE_LABEL_NUMBER (BB_HEAD (new_bb)); + CODE_LABEL_NUMBER (BB_HEAD (new_bb)) + = CODE_LABEL_NUMBER (BB_HEAD (succ)); + CODE_LABEL_NUMBER (BB_HEAD (succ)) = i; + } + } + } + + return bb; +} + +/* Return insn after which we must insert bookkeeping code for path(s) incoming + into E2->dest, except from E1->src. */ +static insn_t +find_place_for_bookkeeping (edge e1, edge e2) +{ + insn_t place_to_insert; + /* Find a basic block that can hold bookkeeping. If it can be found, do not + create new basic block, but insert bookkeeping there. */ + basic_block book_block = find_block_for_bookkeeping (e1, e2, FALSE); + + if (book_block) + { + place_to_insert = BB_END (book_block); + + /* Don't use a block containing only debug insns for + bookkeeping, this causes scheduling differences between debug + and non-debug compilations, for the block would have been + removed already. */ + if (DEBUG_INSN_P (place_to_insert)) + { + rtx insn = sel_bb_head (book_block); + + while (insn != place_to_insert && + (DEBUG_INSN_P (insn) || NOTE_P (insn))) + insn = NEXT_INSN (insn); + + if (insn == place_to_insert) + book_block = NULL; + } + } + + if (!book_block) + { + book_block = create_block_for_bookkeeping (e1, e2); + place_to_insert = BB_END (book_block); + if (sched_verbose >= 9) + sel_print ("New block is %i, split from bookkeeping block %i\n", + EDGE_SUCC (book_block, 0)->dest->index, book_block->index); + } + else + { + if (sched_verbose >= 9) + sel_print ("Pre-existing bookkeeping block is %i\n", book_block->index); + } + + /* If basic block ends with a jump, insert bookkeeping code right before it. */ + if (INSN_P (place_to_insert) && control_flow_insn_p (place_to_insert)) + place_to_insert = PREV_INSN (place_to_insert); + + return place_to_insert; +} + +/* Find a proper seqno for bookkeeing insn inserted at PLACE_TO_INSERT + for JOIN_POINT. */ +static int +find_seqno_for_bookkeeping (insn_t place_to_insert, insn_t join_point) +{ + int seqno; + rtx next; + + /* Check if we are about to insert bookkeeping copy before a jump, and use + jump's seqno for the copy; otherwise, use JOIN_POINT's seqno. */ + next = NEXT_INSN (place_to_insert); + if (INSN_P (next) + && JUMP_P (next) + && BLOCK_FOR_INSN (next) == BLOCK_FOR_INSN (place_to_insert)) + { + gcc_assert (INSN_SCHED_TIMES (next) == 0); + seqno = INSN_SEQNO (next); + } + else if (INSN_SEQNO (join_point) > 0) + seqno = INSN_SEQNO (join_point); + else + { + seqno = get_seqno_by_preds (place_to_insert); + + /* Sometimes the fences can move in such a way that there will be + no instructions with positive seqno around this bookkeeping. + This means that there will be no way to get to it by a regular + fence movement. Never mind because we pick up such pieces for + rescheduling anyways, so any positive value will do for now. */ + if (seqno < 0) + { + gcc_assert (pipelining_p); + seqno = 1; + } + } + + gcc_assert (seqno > 0); + return seqno; +} + +/* Insert bookkeeping copy of C_EXPS's insn after PLACE_TO_INSERT, assigning + NEW_SEQNO to it. Return created insn. */ +static insn_t +emit_bookkeeping_insn (insn_t place_to_insert, expr_t c_expr, int new_seqno) +{ + rtx new_insn_rtx = create_copy_of_insn_rtx (EXPR_INSN_RTX (c_expr)); + + vinsn_t new_vinsn + = create_vinsn_from_insn_rtx (new_insn_rtx, + VINSN_UNIQUE_P (EXPR_VINSN (c_expr))); + + insn_t new_insn = emit_insn_from_expr_after (c_expr, new_vinsn, new_seqno, + place_to_insert); + + INSN_SCHED_TIMES (new_insn) = 0; + bitmap_set_bit (current_copies, INSN_UID (new_insn)); + + return new_insn; +} + +/* Generate a bookkeeping copy of C_EXPR's insn for path(s) incoming into to + E2->dest, except from E1->src (there may be a sequence of empty blocks + between E1->src and E2->dest). Return block containing the copy. + All scheduler data is initialized for the newly created insn. */ +static basic_block +generate_bookkeeping_insn (expr_t c_expr, edge e1, edge e2) +{ + insn_t join_point, place_to_insert, new_insn; + int new_seqno; + bool need_to_exchange_data_sets; + + if (sched_verbose >= 4) + sel_print ("Generating bookkeeping insn (%d->%d)\n", e1->src->index, + e2->dest->index); + + join_point = sel_bb_head (e2->dest); + place_to_insert = find_place_for_bookkeeping (e1, e2); + if (!place_to_insert) + return NULL; + new_seqno = find_seqno_for_bookkeeping (place_to_insert, join_point); + need_to_exchange_data_sets + = sel_bb_empty_p (BLOCK_FOR_INSN (place_to_insert)); + + new_insn = emit_bookkeeping_insn (place_to_insert, c_expr, new_seqno); + + /* When inserting bookkeeping insn in new block, av sets should be + following: old basic block (that now holds bookkeeping) data sets are + the same as was before generation of bookkeeping, and new basic block + (that now hold all other insns of old basic block) data sets are + invalid. So exchange data sets for these basic blocks as sel_split_block + mistakenly exchanges them in this case. Cannot do it earlier because + when single instruction is added to new basic block it should hold NULL + lv_set. */ + if (need_to_exchange_data_sets) + exchange_data_sets (BLOCK_FOR_INSN (new_insn), + BLOCK_FOR_INSN (join_point)); + + stat_bookkeeping_copies++; + return BLOCK_FOR_INSN (new_insn); +} + +/* Remove from AV_PTR all insns that may need bookkeeping when scheduling + on FENCE, but we are unable to copy them. */ +static void +remove_insns_that_need_bookkeeping (fence_t fence, av_set_t *av_ptr) +{ + expr_t expr; + av_set_iterator i; + + /* An expression does not need bookkeeping if it is available on all paths + from current block to original block and current block dominates + original block. We check availability on all paths by examining + EXPR_SPEC; this is not equivalent, because it may be positive even + if expr is available on all paths (but if expr is not available on + any path, EXPR_SPEC will be positive). */ + + FOR_EACH_EXPR_1 (expr, i, av_ptr) + { + if (!control_flow_insn_p (EXPR_INSN_RTX (expr)) + && (!bookkeeping_p || VINSN_UNIQUE_P (EXPR_VINSN (expr))) + && (EXPR_SPEC (expr) + || !EXPR_ORIG_BB_INDEX (expr) + || !dominated_by_p (CDI_DOMINATORS, + BASIC_BLOCK (EXPR_ORIG_BB_INDEX (expr)), + BLOCK_FOR_INSN (FENCE_INSN (fence))))) + { + if (sched_verbose >= 4) + sel_print ("Expr %d removed because it would need bookkeeping, which " + "cannot be created\n", INSN_UID (EXPR_INSN_RTX (expr))); + av_set_iter_remove (&i); + } + } +} + +/* Moving conditional jump through some instructions. + + Consider example: + + ... <- current scheduling point + NOTE BASIC BLOCK: <- bb header + (p8) add r14=r14+0x9;; + (p8) mov [r14]=r23 + (!p8) jump L1;; + NOTE BASIC BLOCK: + ... + + We can schedule jump one cycle earlier, than mov, because they cannot be + executed together as their predicates are mutually exclusive. + + This is done in this way: first, new fallthrough basic block is created + after jump (it is always can be done, because there already should be a + fallthrough block, where control flow goes in case of predicate being true - + in our example; otherwise there should be a dependence between those + instructions and jump and we cannot schedule jump right now); + next, all instructions between jump and current scheduling point are moved + to this new block. And the result is this: + + NOTE BASIC BLOCK: + (!p8) jump L1 <- current scheduling point + NOTE BASIC BLOCK: <- bb header + (p8) add r14=r14+0x9;; + (p8) mov [r14]=r23 + NOTE BASIC BLOCK: + ... +*/ +static void +move_cond_jump (rtx insn, bnd_t bnd) +{ + edge ft_edge; + basic_block block_from, block_next, block_new, block_bnd, bb; + rtx next, prev, link, head; + + block_from = BLOCK_FOR_INSN (insn); + block_bnd = BLOCK_FOR_INSN (BND_TO (bnd)); + prev = BND_TO (bnd); + +#ifdef ENABLE_CHECKING + /* Moving of jump should not cross any other jumps or beginnings of new + basic blocks. The only exception is when we move a jump through + mutually exclusive insns along fallthru edges. */ + if (block_from != block_bnd) + { + bb = block_from; + for (link = PREV_INSN (insn); link != PREV_INSN (prev); + link = PREV_INSN (link)) + { + if (INSN_P (link)) + gcc_assert (sched_insns_conditions_mutex_p (insn, link)); + if (BLOCK_FOR_INSN (link) && BLOCK_FOR_INSN (link) != bb) + { + gcc_assert (single_pred (bb) == BLOCK_FOR_INSN (link)); + bb = BLOCK_FOR_INSN (link); + } + } + } +#endif + + /* Jump is moved to the boundary. */ + next = PREV_INSN (insn); + BND_TO (bnd) = insn; + + ft_edge = find_fallthru_edge_from (block_from); + block_next = ft_edge->dest; + /* There must be a fallthrough block (or where should go + control flow in case of false jump predicate otherwise?). */ + gcc_assert (block_next); + + /* Create new empty basic block after source block. */ + block_new = sel_split_edge (ft_edge); + gcc_assert (block_new->next_bb == block_next + && block_from->next_bb == block_new); + + /* Move all instructions except INSN to BLOCK_NEW. */ + bb = block_bnd; + head = BB_HEAD (block_new); + while (bb != block_from->next_bb) + { + rtx from, to; + from = bb == block_bnd ? prev : sel_bb_head (bb); + to = bb == block_from ? next : sel_bb_end (bb); + + /* The jump being moved can be the first insn in the block. + In this case we don't have to move anything in this block. */ + if (NEXT_INSN (to) != from) + { + reorder_insns (from, to, head); + + for (link = to; link != head; link = PREV_INSN (link)) + EXPR_ORIG_BB_INDEX (INSN_EXPR (link)) = block_new->index; + head = to; + } + + /* Cleanup possibly empty blocks left. */ + block_next = bb->next_bb; + if (bb != block_from) + tidy_control_flow (bb, false); + bb = block_next; + } + + /* Assert there is no jump to BLOCK_NEW, only fallthrough edge. */ + gcc_assert (NOTE_INSN_BASIC_BLOCK_P (BB_HEAD (block_new))); + + gcc_assert (!sel_bb_empty_p (block_from) + && !sel_bb_empty_p (block_new)); + + /* Update data sets for BLOCK_NEW to represent that INSN and + instructions from the other branch of INSN is no longer + available at BLOCK_NEW. */ + BB_AV_LEVEL (block_new) = global_level; + gcc_assert (BB_LV_SET (block_new) == NULL); + BB_LV_SET (block_new) = get_clear_regset_from_pool (); + update_data_sets (sel_bb_head (block_new)); + + /* INSN is a new basic block header - so prepare its data + structures and update availability and liveness sets. */ + update_data_sets (insn); + + if (sched_verbose >= 4) + sel_print ("Moving jump %d\n", INSN_UID (insn)); +} + +/* Remove nops generated during move_op for preventing removal of empty + basic blocks. */ +static void +remove_temp_moveop_nops (bool full_tidying) +{ + int i; + insn_t insn; + + FOR_EACH_VEC_ELT (insn_t, vec_temp_moveop_nops, i, insn) + { + gcc_assert (INSN_NOP_P (insn)); + return_nop_to_pool (insn, full_tidying); + } + + /* Empty the vector. */ + if (VEC_length (insn_t, vec_temp_moveop_nops) > 0) + VEC_block_remove (insn_t, vec_temp_moveop_nops, 0, + VEC_length (insn_t, vec_temp_moveop_nops)); +} + +/* Records the maximal UID before moving up an instruction. Used for + distinguishing between bookkeeping copies and original insns. */ +static int max_uid_before_move_op = 0; + +/* Remove from AV_VLIW_P all instructions but next when debug counter + tells us so. Next instruction is fetched from BNDS. */ +static void +remove_insns_for_debug (blist_t bnds, av_set_t *av_vliw_p) +{ + if (! dbg_cnt (sel_sched_insn_cnt)) + /* Leave only the next insn in av_vliw. */ + { + av_set_iterator av_it; + expr_t expr; + bnd_t bnd = BLIST_BND (bnds); + insn_t next = BND_TO (bnd); + + gcc_assert (BLIST_NEXT (bnds) == NULL); + + FOR_EACH_EXPR_1 (expr, av_it, av_vliw_p) + if (EXPR_INSN_RTX (expr) != next) + av_set_iter_remove (&av_it); + } +} + +/* Compute available instructions on BNDS. FENCE is the current fence. Write + the computed set to *AV_VLIW_P. */ +static void +compute_av_set_on_boundaries (fence_t fence, blist_t bnds, av_set_t *av_vliw_p) +{ + if (sched_verbose >= 2) + { + sel_print ("Boundaries: "); + dump_blist (bnds); + sel_print ("\n"); + } + + for (; bnds; bnds = BLIST_NEXT (bnds)) + { + bnd_t bnd = BLIST_BND (bnds); + av_set_t av1_copy; + insn_t bnd_to = BND_TO (bnd); + + /* Rewind BND->TO to the basic block header in case some bookkeeping + instructions were inserted before BND->TO and it needs to be + adjusted. */ + if (sel_bb_head_p (bnd_to)) + gcc_assert (INSN_SCHED_TIMES (bnd_to) == 0); + else + while (INSN_SCHED_TIMES (PREV_INSN (bnd_to)) == 0) + { + bnd_to = PREV_INSN (bnd_to); + if (sel_bb_head_p (bnd_to)) + break; + } + + if (BND_TO (bnd) != bnd_to) + { + gcc_assert (FENCE_INSN (fence) == BND_TO (bnd)); + FENCE_INSN (fence) = bnd_to; + BND_TO (bnd) = bnd_to; + } + + av_set_clear (&BND_AV (bnd)); + BND_AV (bnd) = compute_av_set (BND_TO (bnd), NULL, 0, true); + + av_set_clear (&BND_AV1 (bnd)); + BND_AV1 (bnd) = av_set_copy (BND_AV (bnd)); + + moveup_set_inside_insn_group (&BND_AV1 (bnd), NULL); + + av1_copy = av_set_copy (BND_AV1 (bnd)); + av_set_union_and_clear (av_vliw_p, &av1_copy, NULL); + } + + if (sched_verbose >= 2) + { + sel_print ("Available exprs (vliw form): "); + dump_av_set (*av_vliw_p); + sel_print ("\n"); + } +} + +/* Calculate the sequential av set on BND corresponding to the EXPR_VLIW + expression. When FOR_MOVEOP is true, also replace the register of + expressions found with the register from EXPR_VLIW. */ +static av_set_t +find_sequential_best_exprs (bnd_t bnd, expr_t expr_vliw, bool for_moveop) +{ + av_set_t expr_seq = NULL; + expr_t expr; + av_set_iterator i; + + FOR_EACH_EXPR (expr, i, BND_AV (bnd)) + { + if (equal_after_moveup_path_p (expr, NULL, expr_vliw)) + { + if (for_moveop) + { + /* The sequential expression has the right form to pass + to move_op except when renaming happened. Put the + correct register in EXPR then. */ + if (EXPR_SEPARABLE_P (expr) && REG_P (EXPR_LHS (expr))) + { + if (expr_dest_regno (expr) != expr_dest_regno (expr_vliw)) + { + replace_dest_with_reg_in_expr (expr, EXPR_LHS (expr_vliw)); + stat_renamed_scheduled++; + } + /* Also put the correct TARGET_AVAILABLE bit on the expr. + This is needed when renaming came up with original + register. */ + else if (EXPR_TARGET_AVAILABLE (expr) + != EXPR_TARGET_AVAILABLE (expr_vliw)) + { + gcc_assert (EXPR_TARGET_AVAILABLE (expr_vliw) == 1); + EXPR_TARGET_AVAILABLE (expr) = 1; + } + } + if (EXPR_WAS_SUBSTITUTED (expr)) + stat_substitutions_total++; + } + + av_set_add (&expr_seq, expr); + + /* With substitution inside insn group, it is possible + that more than one expression in expr_seq will correspond + to expr_vliw. In this case, choose one as the attempt to + move both leads to miscompiles. */ + break; + } + } + + if (for_moveop && sched_verbose >= 2) + { + sel_print ("Best expression(s) (sequential form): "); + dump_av_set (expr_seq); + sel_print ("\n"); + } + + return expr_seq; +} + + +/* Move nop to previous block. */ +static void ATTRIBUTE_UNUSED +move_nop_to_previous_block (insn_t nop, basic_block prev_bb) +{ + insn_t prev_insn, next_insn, note; + + gcc_assert (sel_bb_head_p (nop) + && prev_bb == BLOCK_FOR_INSN (nop)->prev_bb); + note = bb_note (BLOCK_FOR_INSN (nop)); + prev_insn = sel_bb_end (prev_bb); + next_insn = NEXT_INSN (nop); + gcc_assert (prev_insn != NULL_RTX + && PREV_INSN (note) == prev_insn); + + NEXT_INSN (prev_insn) = nop; + PREV_INSN (nop) = prev_insn; + + PREV_INSN (note) = nop; + NEXT_INSN (note) = next_insn; + + NEXT_INSN (nop) = note; + PREV_INSN (next_insn) = note; + + BB_END (prev_bb) = nop; + BLOCK_FOR_INSN (nop) = prev_bb; +} + +/* Prepare a place to insert the chosen expression on BND. */ +static insn_t +prepare_place_to_insert (bnd_t bnd) +{ + insn_t place_to_insert; + + /* Init place_to_insert before calling move_op, as the later + can possibly remove BND_TO (bnd). */ + if (/* If this is not the first insn scheduled. */ + BND_PTR (bnd)) + { + /* Add it after last scheduled. */ + place_to_insert = ILIST_INSN (BND_PTR (bnd)); + if (DEBUG_INSN_P (place_to_insert)) + { + ilist_t l = BND_PTR (bnd); + while ((l = ILIST_NEXT (l)) && + DEBUG_INSN_P (ILIST_INSN (l))) + ; + if (!l) + place_to_insert = NULL; + } + } + else + place_to_insert = NULL; + + if (!place_to_insert) + { + /* Add it before BND_TO. The difference is in the + basic block, where INSN will be added. */ + place_to_insert = get_nop_from_pool (BND_TO (bnd)); + gcc_assert (BLOCK_FOR_INSN (place_to_insert) + == BLOCK_FOR_INSN (BND_TO (bnd))); + } + + return place_to_insert; +} + +/* Find original instructions for EXPR_SEQ and move it to BND boundary. + Return the expression to emit in C_EXPR. */ +static bool +move_exprs_to_boundary (bnd_t bnd, expr_t expr_vliw, + av_set_t expr_seq, expr_t c_expr) +{ + bool b, should_move; + unsigned book_uid; + bitmap_iterator bi; + int n_bookkeeping_copies_before_moveop; + + /* Make a move. This call will remove the original operation, + insert all necessary bookkeeping instructions and update the + data sets. After that all we have to do is add the operation + at before BND_TO (BND). */ + n_bookkeeping_copies_before_moveop = stat_bookkeeping_copies; + max_uid_before_move_op = get_max_uid (); + bitmap_clear (current_copies); + bitmap_clear (current_originators); + + b = move_op (BND_TO (bnd), expr_seq, expr_vliw, + get_dest_from_orig_ops (expr_seq), c_expr, &should_move); + + /* We should be able to find the expression we've chosen for + scheduling. */ + gcc_assert (b); + + if (stat_bookkeeping_copies > n_bookkeeping_copies_before_moveop) + stat_insns_needed_bookkeeping++; + + EXECUTE_IF_SET_IN_BITMAP (current_copies, 0, book_uid, bi) + { + unsigned uid; + bitmap_iterator bi; + + /* We allocate these bitmaps lazily. */ + if (! INSN_ORIGINATORS_BY_UID (book_uid)) + INSN_ORIGINATORS_BY_UID (book_uid) = BITMAP_ALLOC (NULL); + + bitmap_copy (INSN_ORIGINATORS_BY_UID (book_uid), + current_originators); + + /* Transitively add all originators' originators. */ + EXECUTE_IF_SET_IN_BITMAP (current_originators, 0, uid, bi) + if (INSN_ORIGINATORS_BY_UID (uid)) + bitmap_ior_into (INSN_ORIGINATORS_BY_UID (book_uid), + INSN_ORIGINATORS_BY_UID (uid)); + } + + return should_move; +} + + +/* Debug a DFA state as an array of bytes. */ +static void +debug_state (state_t state) +{ + unsigned char *p; + unsigned int i, size = dfa_state_size; + + sel_print ("state (%u):", size); + for (i = 0, p = (unsigned char *) state; i < size; i++) + sel_print (" %d", p[i]); + sel_print ("\n"); +} + +/* Advance state on FENCE with INSN. Return true if INSN is + an ASM, and we should advance state once more. */ +static bool +advance_state_on_fence (fence_t fence, insn_t insn) +{ + bool asm_p; + + if (recog_memoized (insn) >= 0) + { + int res; + state_t temp_state = alloca (dfa_state_size); + + gcc_assert (!INSN_ASM_P (insn)); + asm_p = false; + + memcpy (temp_state, FENCE_STATE (fence), dfa_state_size); + res = state_transition (FENCE_STATE (fence), insn); + gcc_assert (res < 0); + + if (memcmp (temp_state, FENCE_STATE (fence), dfa_state_size)) + { + FENCE_ISSUED_INSNS (fence)++; + + /* We should never issue more than issue_rate insns. */ + if (FENCE_ISSUED_INSNS (fence) > issue_rate) + gcc_unreachable (); + } + } + else + { + /* This could be an ASM insn which we'd like to schedule + on the next cycle. */ + asm_p = INSN_ASM_P (insn); + if (!FENCE_STARTS_CYCLE_P (fence) && asm_p) + advance_one_cycle (fence); + } + + if (sched_verbose >= 2) + debug_state (FENCE_STATE (fence)); + if (!DEBUG_INSN_P (insn)) + FENCE_STARTS_CYCLE_P (fence) = 0; + FENCE_ISSUE_MORE (fence) = can_issue_more; + return asm_p; +} + +/* Update FENCE on which INSN was scheduled and this INSN, too. NEED_STALL + is nonzero if we need to stall after issuing INSN. */ +static void +update_fence_and_insn (fence_t fence, insn_t insn, int need_stall) +{ + bool asm_p; + + /* First, reflect that something is scheduled on this fence. */ + asm_p = advance_state_on_fence (fence, insn); + FENCE_LAST_SCHEDULED_INSN (fence) = insn; + VEC_safe_push (rtx, gc, FENCE_EXECUTING_INSNS (fence), insn); + if (SCHED_GROUP_P (insn)) + { + FENCE_SCHED_NEXT (fence) = INSN_SCHED_NEXT (insn); + SCHED_GROUP_P (insn) = 0; + } + else + FENCE_SCHED_NEXT (fence) = NULL_RTX; + if (INSN_UID (insn) < FENCE_READY_TICKS_SIZE (fence)) + FENCE_READY_TICKS (fence) [INSN_UID (insn)] = 0; + + /* Set instruction scheduling info. This will be used in bundling, + pipelining, tick computations etc. */ + ++INSN_SCHED_TIMES (insn); + EXPR_TARGET_AVAILABLE (INSN_EXPR (insn)) = true; + EXPR_ORIG_SCHED_CYCLE (INSN_EXPR (insn)) = FENCE_CYCLE (fence); + INSN_AFTER_STALL_P (insn) = FENCE_AFTER_STALL_P (fence); + INSN_SCHED_CYCLE (insn) = FENCE_CYCLE (fence); + + /* This does not account for adjust_cost hooks, just add the biggest + constant the hook may add to the latency. TODO: make this + a target dependent constant. */ + INSN_READY_CYCLE (insn) + = INSN_SCHED_CYCLE (insn) + (INSN_CODE (insn) < 0 + ? 1 + : maximal_insn_latency (insn) + 1); + + /* Change these fields last, as they're used above. */ + FENCE_AFTER_STALL_P (fence) = 0; + if (asm_p || need_stall) + advance_one_cycle (fence); + + /* Indicate that we've scheduled something on this fence. */ + FENCE_SCHEDULED_P (fence) = true; + scheduled_something_on_previous_fence = true; + + /* Print debug information when insn's fields are updated. */ + if (sched_verbose >= 2) + { + sel_print ("Scheduling insn: "); + dump_insn_1 (insn, 1); + sel_print ("\n"); + } +} + +/* Update boundary BND (and, if needed, FENCE) with INSN, remove the + old boundary from BNDSP, add new boundaries to BNDS_TAIL_P and + return it. */ +static blist_t * +update_boundaries (fence_t fence, bnd_t bnd, insn_t insn, blist_t *bndsp, + blist_t *bnds_tailp) +{ + succ_iterator si; + insn_t succ; + + advance_deps_context (BND_DC (bnd), insn); + FOR_EACH_SUCC_1 (succ, si, insn, + SUCCS_NORMAL | SUCCS_SKIP_TO_LOOP_EXITS) + { + ilist_t ptr = ilist_copy (BND_PTR (bnd)); + + ilist_add (&ptr, insn); + + if (DEBUG_INSN_P (insn) && sel_bb_end_p (insn) + && is_ineligible_successor (succ, ptr)) + { + ilist_clear (&ptr); + continue; + } + + if (FENCE_INSN (fence) == insn && !sel_bb_end_p (insn)) + { + if (sched_verbose >= 9) + sel_print ("Updating fence insn from %i to %i\n", + INSN_UID (insn), INSN_UID (succ)); + FENCE_INSN (fence) = succ; + } + blist_add (bnds_tailp, succ, ptr, BND_DC (bnd)); + bnds_tailp = &BLIST_NEXT (*bnds_tailp); + } + + blist_remove (bndsp); + return bnds_tailp; +} + +/* Schedule EXPR_VLIW on BND. Return the insn emitted. */ +static insn_t +schedule_expr_on_boundary (bnd_t bnd, expr_t expr_vliw, int seqno) +{ + av_set_t expr_seq; + expr_t c_expr = XALLOCA (expr_def); + insn_t place_to_insert; + insn_t insn; + bool should_move; + + expr_seq = find_sequential_best_exprs (bnd, expr_vliw, true); + + /* In case of scheduling a jump skipping some other instructions, + prepare CFG. After this, jump is at the boundary and can be + scheduled as usual insn by MOVE_OP. */ + if (vinsn_cond_branch_p (EXPR_VINSN (expr_vliw))) + { + insn = EXPR_INSN_RTX (expr_vliw); + + /* Speculative jumps are not handled. */ + if (insn != BND_TO (bnd) + && !sel_insn_is_speculation_check (insn)) + move_cond_jump (insn, bnd); + } + + /* Find a place for C_EXPR to schedule. */ + place_to_insert = prepare_place_to_insert (bnd); + should_move = move_exprs_to_boundary (bnd, expr_vliw, expr_seq, c_expr); + clear_expr (c_expr); + + /* Add the instruction. The corner case to care about is when + the expr_seq set has more than one expr, and we chose the one that + is not equal to expr_vliw. Then expr_vliw may be insn in stream, and + we can't use it. Generate the new vinsn. */ + if (INSN_IN_STREAM_P (EXPR_INSN_RTX (expr_vliw))) + { + vinsn_t vinsn_new; + + vinsn_new = vinsn_copy (EXPR_VINSN (expr_vliw), false); + change_vinsn_in_expr (expr_vliw, vinsn_new); + should_move = false; + } + if (should_move) + insn = sel_move_insn (expr_vliw, seqno, place_to_insert); + else + insn = emit_insn_from_expr_after (expr_vliw, NULL, seqno, + place_to_insert); + + /* Return the nops generated for preserving of data sets back + into pool. */ + if (INSN_NOP_P (place_to_insert)) + return_nop_to_pool (place_to_insert, !DEBUG_INSN_P (insn)); + remove_temp_moveop_nops (!DEBUG_INSN_P (insn)); + + av_set_clear (&expr_seq); + + /* Save the expression scheduled so to reset target availability if we'll + meet it later on the same fence. */ + if (EXPR_WAS_RENAMED (expr_vliw)) + vinsn_vec_add (&vec_target_unavailable_vinsns, INSN_EXPR (insn)); + + /* Check that the recent movement didn't destroyed loop + structure. */ + gcc_assert (!pipelining_p + || current_loop_nest == NULL + || loop_latch_edge (current_loop_nest)); + return insn; +} + +/* Stall for N cycles on FENCE. */ +static void +stall_for_cycles (fence_t fence, int n) +{ + int could_more; + + could_more = n > 1 || FENCE_ISSUED_INSNS (fence) < issue_rate; + while (n--) + advance_one_cycle (fence); + if (could_more) + FENCE_AFTER_STALL_P (fence) = 1; +} + +/* Gather a parallel group of insns at FENCE and assign their seqno + to SEQNO. All scheduled insns are gathered in SCHEDULED_INSNS_TAILPP + list for later recalculation of seqnos. */ +static void +fill_insns (fence_t fence, int seqno, ilist_t **scheduled_insns_tailpp) +{ + blist_t bnds = NULL, *bnds_tailp; + av_set_t av_vliw = NULL; + insn_t insn = FENCE_INSN (fence); + + if (sched_verbose >= 2) + sel_print ("Starting fill_insns for insn %d, cycle %d\n", + INSN_UID (insn), FENCE_CYCLE (fence)); + + blist_add (&bnds, insn, NULL, FENCE_DC (fence)); + bnds_tailp = &BLIST_NEXT (bnds); + set_target_context (FENCE_TC (fence)); + can_issue_more = FENCE_ISSUE_MORE (fence); + target_bb = INSN_BB (insn); + + /* Do while we can add any operation to the current group. */ + do + { + blist_t *bnds_tailp1, *bndsp; + expr_t expr_vliw; + int need_stall; + int was_stall = 0, scheduled_insns = 0; + int max_insns = pipelining_p ? issue_rate : 2 * issue_rate; + int max_stall = pipelining_p ? 1 : 3; + bool last_insn_was_debug = false; + bool was_debug_bb_end_p = false; + + compute_av_set_on_boundaries (fence, bnds, &av_vliw); + remove_insns_that_need_bookkeeping (fence, &av_vliw); + remove_insns_for_debug (bnds, &av_vliw); + + /* Return early if we have nothing to schedule. */ + if (av_vliw == NULL) + break; + + /* Choose the best expression and, if needed, destination register + for it. */ + do + { + expr_vliw = find_best_expr (&av_vliw, bnds, fence, &need_stall); + if (! expr_vliw && need_stall) + { + /* All expressions required a stall. Do not recompute av sets + as we'll get the same answer (modulo the insns between + the fence and its boundary, which will not be available for + pipelining). + If we are going to stall for too long, break to recompute av + sets and bring more insns for pipelining. */ + was_stall++; + if (need_stall <= 3) + stall_for_cycles (fence, need_stall); + else + { + stall_for_cycles (fence, 1); + break; + } + } + } + while (! expr_vliw && need_stall); + + /* Now either we've selected expr_vliw or we have nothing to schedule. */ + if (!expr_vliw) + { + av_set_clear (&av_vliw); + break; + } + + bndsp = &bnds; + bnds_tailp1 = bnds_tailp; + + do + /* This code will be executed only once until we'd have several + boundaries per fence. */ + { + bnd_t bnd = BLIST_BND (*bndsp); + + if (!av_set_is_in_p (BND_AV1 (bnd), EXPR_VINSN (expr_vliw))) + { + bndsp = &BLIST_NEXT (*bndsp); + continue; + } + + insn = schedule_expr_on_boundary (bnd, expr_vliw, seqno); + last_insn_was_debug = DEBUG_INSN_P (insn); + if (last_insn_was_debug) + was_debug_bb_end_p = (insn == BND_TO (bnd) && sel_bb_end_p (insn)); + update_fence_and_insn (fence, insn, need_stall); + bnds_tailp = update_boundaries (fence, bnd, insn, bndsp, bnds_tailp); + + /* Add insn to the list of scheduled on this cycle instructions. */ + ilist_add (*scheduled_insns_tailpp, insn); + *scheduled_insns_tailpp = &ILIST_NEXT (**scheduled_insns_tailpp); + } + while (*bndsp != *bnds_tailp1); + + av_set_clear (&av_vliw); + if (!last_insn_was_debug) + scheduled_insns++; + + /* We currently support information about candidate blocks only for + one 'target_bb' block. Hence we can't schedule after jump insn, + as this will bring two boundaries and, hence, necessity to handle + information for two or more blocks concurrently. */ + if ((last_insn_was_debug ? was_debug_bb_end_p : sel_bb_end_p (insn)) + || (was_stall + && (was_stall >= max_stall + || scheduled_insns >= max_insns))) + break; + } + while (bnds); + + gcc_assert (!FENCE_BNDS (fence)); + + /* Update boundaries of the FENCE. */ + while (bnds) + { + ilist_t ptr = BND_PTR (BLIST_BND (bnds)); + + if (ptr) + { + insn = ILIST_INSN (ptr); + + if (!ilist_is_in_p (FENCE_BNDS (fence), insn)) + ilist_add (&FENCE_BNDS (fence), insn); + } + + blist_remove (&bnds); + } + + /* Update target context on the fence. */ + reset_target_context (FENCE_TC (fence), false); +} + +/* All exprs in ORIG_OPS must have the same destination register or memory. + Return that destination. */ +static rtx +get_dest_from_orig_ops (av_set_t orig_ops) +{ + rtx dest = NULL_RTX; + av_set_iterator av_it; + expr_t expr; + bool first_p = true; + + FOR_EACH_EXPR (expr, av_it, orig_ops) + { + rtx x = EXPR_LHS (expr); + + if (first_p) + { + first_p = false; + dest = x; + } + else + gcc_assert (dest == x + || (dest != NULL_RTX && x != NULL_RTX + && rtx_equal_p (dest, x))); + } + + return dest; +} + +/* Update data sets for the bookkeeping block and record those expressions + which become no longer available after inserting this bookkeeping. */ +static void +update_and_record_unavailable_insns (basic_block book_block) +{ + av_set_iterator i; + av_set_t old_av_set = NULL; + expr_t cur_expr; + rtx bb_end = sel_bb_end (book_block); + + /* First, get correct liveness in the bookkeeping block. The problem is + the range between the bookeeping insn and the end of block. */ + update_liveness_on_insn (bb_end); + if (control_flow_insn_p (bb_end)) + update_liveness_on_insn (PREV_INSN (bb_end)); + + /* If there's valid av_set on BOOK_BLOCK, then there might exist another + fence above, where we may choose to schedule an insn which is + actually blocked from moving up with the bookkeeping we create here. */ + if (AV_SET_VALID_P (sel_bb_head (book_block))) + { + old_av_set = av_set_copy (BB_AV_SET (book_block)); + update_data_sets (sel_bb_head (book_block)); + + /* Traverse all the expressions in the old av_set and check whether + CUR_EXPR is in new AV_SET. */ + FOR_EACH_EXPR (cur_expr, i, old_av_set) + { + expr_t new_expr = av_set_lookup (BB_AV_SET (book_block), + EXPR_VINSN (cur_expr)); + + if (! new_expr + /* In this case, we can just turn off the E_T_A bit, but we can't + represent this information with the current vector. */ + || EXPR_TARGET_AVAILABLE (new_expr) + != EXPR_TARGET_AVAILABLE (cur_expr)) + /* Unfortunately, the below code could be also fired up on + separable insns, e.g. when moving insns through the new + speculation check as in PR 53701. */ + vinsn_vec_add (&vec_bookkeeping_blocked_vinsns, cur_expr); + } + + av_set_clear (&old_av_set); + } +} + +/* The main effect of this function is that sparams->c_expr is merged + with (or copied to) lparams->c_expr_merged. If there's only one successor, + we avoid merging anything by copying sparams->c_expr to lparams->c_expr_merged. + lparams->c_expr_merged is copied back to sparams->c_expr after all + successors has been traversed. lparams->c_expr_local is an expr allocated + on stack in the caller function, and is used if there is more than one + successor. + + SUCC is one of the SUCCS_NORMAL successors of INSN, + MOVEOP_DRV_CALL_RES is the result of call code_motion_path_driver on succ, + LPARAMS and STATIC_PARAMS contain the parameters described above. */ +static void +move_op_merge_succs (insn_t insn ATTRIBUTE_UNUSED, + insn_t succ ATTRIBUTE_UNUSED, + int moveop_drv_call_res, + cmpd_local_params_p lparams, void *static_params) +{ + moveop_static_params_p sparams = (moveop_static_params_p) static_params; + + /* Nothing to do, if original expr wasn't found below. */ + if (moveop_drv_call_res != 1) + return; + + /* If this is a first successor. */ + if (!lparams->c_expr_merged) + { + lparams->c_expr_merged = sparams->c_expr; + sparams->c_expr = lparams->c_expr_local; + } + else + { + /* We must merge all found expressions to get reasonable + EXPR_SPEC_DONE_DS for the resulting insn. If we don't + do so then we can first find the expr with epsilon + speculation success probability and only then with the + good probability. As a result the insn will get epsilon + probability and will never be scheduled because of + weakness_cutoff in find_best_expr. + + We call merge_expr_data here instead of merge_expr + because due to speculation C_EXPR and X may have the + same insns with different speculation types. And as of + now such insns are considered non-equal. + + However, EXPR_SCHED_TIMES is different -- we must get + SCHED_TIMES from a real insn, not a bookkeeping copy. + We force this here. Instead, we may consider merging + SCHED_TIMES to the maximum instead of minimum in the + below function. */ + int old_times = EXPR_SCHED_TIMES (lparams->c_expr_merged); + + merge_expr_data (lparams->c_expr_merged, sparams->c_expr, NULL); + if (EXPR_SCHED_TIMES (sparams->c_expr) == 0) + EXPR_SCHED_TIMES (lparams->c_expr_merged) = old_times; + + clear_expr (sparams->c_expr); + } +} + +/* Add used regs for the successor SUCC into SPARAMS->USED_REGS. + + SUCC is one of the SUCCS_NORMAL successors of INSN, + MOVEOP_DRV_CALL_RES is the result of call code_motion_path_driver on succ or 0, + if SUCC is one of SUCCS_BACK or SUCCS_OUT. + STATIC_PARAMS contain USED_REGS set. */ +static void +fur_merge_succs (insn_t insn ATTRIBUTE_UNUSED, insn_t succ, + int moveop_drv_call_res, + cmpd_local_params_p lparams ATTRIBUTE_UNUSED, + void *static_params) +{ + regset succ_live; + fur_static_params_p sparams = (fur_static_params_p) static_params; + + /* Here we compute live regsets only for branches that do not lie + on the code motion paths. These branches correspond to value + MOVEOP_DRV_CALL_RES==0 and include SUCCS_BACK and SUCCS_OUT, though + for such branches code_motion_path_driver is not called. */ + if (moveop_drv_call_res != 0) + return; + + /* Mark all registers that do not meet the following condition: + (3) not live on the other path of any conditional branch + that is passed by the operation, in case original + operations are not present on both paths of the + conditional branch. */ + succ_live = compute_live (succ); + IOR_REG_SET (sparams->used_regs, succ_live); +} + +/* This function is called after the last successor. Copies LP->C_EXPR_MERGED + into SP->CEXPR. */ +static void +move_op_after_merge_succs (cmpd_local_params_p lp, void *sparams) +{ + moveop_static_params_p sp = (moveop_static_params_p) sparams; + + sp->c_expr = lp->c_expr_merged; +} + +/* Track bookkeeping copies created, insns scheduled, and blocks for + rescheduling when INSN is found by move_op. */ +static void +track_scheduled_insns_and_blocks (rtx insn) +{ + /* Even if this insn can be a copy that will be removed during current move_op, + we still need to count it as an originator. */ + bitmap_set_bit (current_originators, INSN_UID (insn)); + + if (!bitmap_clear_bit (current_copies, INSN_UID (insn))) + { + /* Note that original block needs to be rescheduled, as we pulled an + instruction out of it. */ + if (INSN_SCHED_TIMES (insn) > 0) + bitmap_set_bit (blocks_to_reschedule, BLOCK_FOR_INSN (insn)->index); + else if (INSN_UID (insn) < first_emitted_uid && !DEBUG_INSN_P (insn)) + num_insns_scheduled++; + } + + /* For instructions we must immediately remove insn from the + stream, so subsequent update_data_sets () won't include this + insn into av_set. + For expr we must make insn look like "INSN_REG (insn) := c_expr". */ + if (INSN_UID (insn) > max_uid_before_move_op) + stat_bookkeeping_copies--; +} + +/* Emit a register-register copy for INSN if needed. Return true if + emitted one. PARAMS is the move_op static parameters. */ +static bool +maybe_emit_renaming_copy (rtx insn, + moveop_static_params_p params) +{ + bool insn_emitted = false; + rtx cur_reg; + + /* Bail out early when expression can not be renamed at all. */ + if (!EXPR_SEPARABLE_P (params->c_expr)) + return false; + + cur_reg = expr_dest_reg (params->c_expr); + gcc_assert (cur_reg && params->dest && REG_P (params->dest)); + + /* If original operation has expr and the register chosen for + that expr is not original operation's dest reg, substitute + operation's right hand side with the register chosen. */ + if (REGNO (params->dest) != REGNO (cur_reg)) + { + insn_t reg_move_insn, reg_move_insn_rtx; + + reg_move_insn_rtx = create_insn_rtx_with_rhs (INSN_VINSN (insn), + params->dest); + reg_move_insn = sel_gen_insn_from_rtx_after (reg_move_insn_rtx, + INSN_EXPR (insn), + INSN_SEQNO (insn), + insn); + EXPR_SPEC_DONE_DS (INSN_EXPR (reg_move_insn)) = 0; + replace_dest_with_reg_in_expr (params->c_expr, params->dest); + + insn_emitted = true; + params->was_renamed = true; + } + + return insn_emitted; +} + +/* Emit a speculative check for INSN speculated as EXPR if needed. + Return true if we've emitted one. PARAMS is the move_op static + parameters. */ +static bool +maybe_emit_speculative_check (rtx insn, expr_t expr, + moveop_static_params_p params) +{ + bool insn_emitted = false; + insn_t x; + ds_t check_ds; + + check_ds = get_spec_check_type_for_insn (insn, expr); + if (check_ds != 0) + { + /* A speculation check should be inserted. */ + x = create_speculation_check (params->c_expr, check_ds, insn); + insn_emitted = true; + } + else + { + EXPR_SPEC_DONE_DS (INSN_EXPR (insn)) = 0; + x = insn; + } + + gcc_assert (EXPR_SPEC_DONE_DS (INSN_EXPR (x)) == 0 + && EXPR_SPEC_TO_CHECK_DS (INSN_EXPR (x)) == 0); + return insn_emitted; +} + +/* Handle transformations that leave an insn in place of original + insn such as renaming/speculation. Return true if one of such + transformations actually happened, and we have emitted this insn. */ +static bool +handle_emitting_transformations (rtx insn, expr_t expr, + moveop_static_params_p params) +{ + bool insn_emitted = false; + + insn_emitted = maybe_emit_renaming_copy (insn, params); + insn_emitted |= maybe_emit_speculative_check (insn, expr, params); + + return insn_emitted; +} + +/* If INSN is the only insn in the basic block (not counting JUMP, + which may be a jump to next insn, and DEBUG_INSNs), we want to + leave a NOP there till the return to fill_insns. */ + +static bool +need_nop_to_preserve_insn_bb (rtx insn) +{ + insn_t bb_head, bb_end, bb_next, in_next; + basic_block bb = BLOCK_FOR_INSN (insn); + + bb_head = sel_bb_head (bb); + bb_end = sel_bb_end (bb); + + if (bb_head == bb_end) + return true; + + while (bb_head != bb_end && DEBUG_INSN_P (bb_head)) + bb_head = NEXT_INSN (bb_head); + + if (bb_head == bb_end) + return true; + + while (bb_head != bb_end && DEBUG_INSN_P (bb_end)) + bb_end = PREV_INSN (bb_end); + + if (bb_head == bb_end) + return true; + + bb_next = NEXT_INSN (bb_head); + while (bb_next != bb_end && DEBUG_INSN_P (bb_next)) + bb_next = NEXT_INSN (bb_next); + + if (bb_next == bb_end && JUMP_P (bb_end)) + return true; + + in_next = NEXT_INSN (insn); + while (DEBUG_INSN_P (in_next)) + in_next = NEXT_INSN (in_next); + + if (IN_CURRENT_FENCE_P (in_next)) + return true; + + return false; +} + +/* Remove INSN from stream. When ONLY_DISCONNECT is true, its data + is not removed but reused when INSN is re-emitted. */ +static void +remove_insn_from_stream (rtx insn, bool only_disconnect) +{ + /* If there's only one insn in the BB, make sure that a nop is + inserted into it, so the basic block won't disappear when we'll + delete INSN below with sel_remove_insn. It should also survive + till the return to fill_insns. */ + if (need_nop_to_preserve_insn_bb (insn)) + { + insn_t nop = get_nop_from_pool (insn); + gcc_assert (INSN_NOP_P (nop)); + VEC_safe_push (insn_t, heap, vec_temp_moveop_nops, nop); + } + + sel_remove_insn (insn, only_disconnect, false); +} + +/* This function is called when original expr is found. + INSN - current insn traversed, EXPR - the corresponding expr found. + LPARAMS is the local parameters of code modion driver, STATIC_PARAMS + is static parameters of move_op. */ +static void +move_op_orig_expr_found (insn_t insn, expr_t expr, + cmpd_local_params_p lparams ATTRIBUTE_UNUSED, + void *static_params) +{ + bool only_disconnect, insn_emitted; + moveop_static_params_p params = (moveop_static_params_p) static_params; + + copy_expr_onside (params->c_expr, INSN_EXPR (insn)); + track_scheduled_insns_and_blocks (insn); + insn_emitted = handle_emitting_transformations (insn, expr, params); + only_disconnect = (params->uid == INSN_UID (insn) + && ! insn_emitted && ! EXPR_WAS_CHANGED (expr)); + + /* Mark that we've disconnected an insn. */ + if (only_disconnect) + params->uid = -1; + remove_insn_from_stream (insn, only_disconnect); +} + +/* The function is called when original expr is found. + INSN - current insn traversed, EXPR - the corresponding expr found, + crosses_call and original_insns in STATIC_PARAMS are updated. */ +static void +fur_orig_expr_found (insn_t insn, expr_t expr ATTRIBUTE_UNUSED, + cmpd_local_params_p lparams ATTRIBUTE_UNUSED, + void *static_params) +{ + fur_static_params_p params = (fur_static_params_p) static_params; + regset tmp; + + if (CALL_P (insn)) + params->crosses_call = true; + + def_list_add (params->original_insns, insn, params->crosses_call); + + /* Mark the registers that do not meet the following condition: + (2) not among the live registers of the point + immediately following the first original operation on + a given downward path, except for the original target + register of the operation. */ + tmp = get_clear_regset_from_pool (); + compute_live_below_insn (insn, tmp); + AND_COMPL_REG_SET (tmp, INSN_REG_SETS (insn)); + AND_COMPL_REG_SET (tmp, INSN_REG_CLOBBERS (insn)); + IOR_REG_SET (params->used_regs, tmp); + return_regset_to_pool (tmp); + + /* (*1) We need to add to USED_REGS registers that are read by + INSN's lhs. This may lead to choosing wrong src register. + E.g. (scheduling const expr enabled): + + 429: ax=0x0 <- Can't use AX for this expr (0x0) + 433: dx=[bp-0x18] + 427: [ax+dx+0x1]=ax + REG_DEAD: ax + 168: di=dx + REG_DEAD: dx + */ + /* FIXME: see comment above and enable MEM_P + in vinsn_separable_p. */ + gcc_assert (!VINSN_SEPARABLE_P (INSN_VINSN (insn)) + || !MEM_P (INSN_LHS (insn))); +} + +/* This function is called on the ascending pass, before returning from + current basic block. */ +static void +move_op_at_first_insn (insn_t insn, cmpd_local_params_p lparams, + void *static_params) +{ + moveop_static_params_p sparams = (moveop_static_params_p) static_params; + basic_block book_block = NULL; + + /* When we have removed the boundary insn for scheduling, which also + happened to be the end insn in its bb, we don't need to update sets. */ + if (!lparams->removed_last_insn + && lparams->e1 + && sel_bb_head_p (insn)) + { + /* We should generate bookkeeping code only if we are not at the + top level of the move_op. */ + if (sel_num_cfg_preds_gt_1 (insn)) + book_block = generate_bookkeeping_insn (sparams->c_expr, + lparams->e1, lparams->e2); + /* Update data sets for the current insn. */ + update_data_sets (insn); + } + + /* If bookkeeping code was inserted, we need to update av sets of basic + block that received bookkeeping. After generation of bookkeeping insn, + bookkeeping block does not contain valid av set because we are not following + the original algorithm in every detail with regards to e.g. renaming + simple reg-reg copies. Consider example: + + bookkeeping block scheduling fence + \ / + \ join / + ---------- + | | + ---------- + / \ + / \ + r1 := r2 r1 := r3 + + We try to schedule insn "r1 := r3" on the current + scheduling fence. Also, note that av set of bookkeeping block + contain both insns "r1 := r2" and "r1 := r3". When the insn has + been scheduled, the CFG is as follows: + + r1 := r3 r1 := r3 + bookkeeping block scheduling fence + \ / + \ join / + ---------- + | | + ---------- + / \ + / \ + r1 := r2 + + Here, insn "r1 := r3" was scheduled at the current scheduling point + and bookkeeping code was generated at the bookeeping block. This + way insn "r1 := r2" is no longer available as a whole instruction + (but only as expr) ahead of insn "r1 := r3" in bookkeeping block. + This situation is handled by calling update_data_sets. + + Since update_data_sets is called only on the bookkeeping block, and + it also may have predecessors with av_sets, containing instructions that + are no longer available, we save all such expressions that become + unavailable during data sets update on the bookkeeping block in + VEC_BOOKKEEPING_BLOCKED_VINSNS. Later we avoid selecting such + expressions for scheduling. This allows us to avoid recomputation of + av_sets outside the code motion path. */ + + if (book_block) + update_and_record_unavailable_insns (book_block); + + /* If INSN was previously marked for deletion, it's time to do it. */ + if (lparams->removed_last_insn) + insn = PREV_INSN (insn); + + /* Do not tidy control flow at the topmost moveop, as we can erroneously + kill a block with a single nop in which the insn should be emitted. */ + if (lparams->e1) + tidy_control_flow (BLOCK_FOR_INSN (insn), true); +} + +/* This function is called on the ascending pass, before returning from the + current basic block. */ +static void +fur_at_first_insn (insn_t insn, + cmpd_local_params_p lparams ATTRIBUTE_UNUSED, + void *static_params ATTRIBUTE_UNUSED) +{ + gcc_assert (!sel_bb_head_p (insn) || AV_SET_VALID_P (insn) + || AV_LEVEL (insn) == -1); +} + +/* Called on the backward stage of recursion to call moveup_expr for insn + and sparams->c_expr. */ +static void +move_op_ascend (insn_t insn, void *static_params) +{ + enum MOVEUP_EXPR_CODE res; + moveop_static_params_p sparams = (moveop_static_params_p) static_params; + + if (! INSN_NOP_P (insn)) + { + res = moveup_expr_cached (sparams->c_expr, insn, false); + gcc_assert (res != MOVEUP_EXPR_NULL); + } + + /* Update liveness for this insn as it was invalidated. */ + update_liveness_on_insn (insn); +} + +/* This function is called on enter to the basic block. + Returns TRUE if this block already have been visited and + code_motion_path_driver should return 1, FALSE otherwise. */ +static int +fur_on_enter (insn_t insn ATTRIBUTE_UNUSED, cmpd_local_params_p local_params, + void *static_params, bool visited_p) +{ + fur_static_params_p sparams = (fur_static_params_p) static_params; + + if (visited_p) + { + /* If we have found something below this block, there should be at + least one insn in ORIGINAL_INSNS. */ + gcc_assert (*sparams->original_insns); + + /* Adjust CROSSES_CALL, since we may have come to this block along + different path. */ + DEF_LIST_DEF (*sparams->original_insns)->crosses_call + |= sparams->crosses_call; + } + else + local_params->old_original_insns = *sparams->original_insns; + + return 1; +} + +/* Same as above but for move_op. */ +static int +move_op_on_enter (insn_t insn ATTRIBUTE_UNUSED, + cmpd_local_params_p local_params ATTRIBUTE_UNUSED, + void *static_params ATTRIBUTE_UNUSED, bool visited_p) +{ + if (visited_p) + return -1; + return 1; +} + +/* This function is called while descending current basic block if current + insn is not the original EXPR we're searching for. + + Return value: FALSE, if code_motion_path_driver should perform a local + cleanup and return 0 itself; + TRUE, if code_motion_path_driver should continue. */ +static bool +move_op_orig_expr_not_found (insn_t insn, av_set_t orig_ops ATTRIBUTE_UNUSED, + void *static_params) +{ + moveop_static_params_p sparams = (moveop_static_params_p) static_params; + +#ifdef ENABLE_CHECKING + sparams->failed_insn = insn; +#endif + + /* If we're scheduling separate expr, in order to generate correct code + we need to stop the search at bookkeeping code generated with the + same destination register or memory. */ + if (lhs_of_insn_equals_to_dest_p (insn, sparams->dest)) + return false; + return true; +} + +/* This function is called while descending current basic block if current + insn is not the original EXPR we're searching for. + + Return value: TRUE (code_motion_path_driver should continue). */ +static bool +fur_orig_expr_not_found (insn_t insn, av_set_t orig_ops, void *static_params) +{ + bool mutexed; + expr_t r; + av_set_iterator avi; + fur_static_params_p sparams = (fur_static_params_p) static_params; + + if (CALL_P (insn)) + sparams->crosses_call = true; + else if (DEBUG_INSN_P (insn)) + return true; + + /* If current insn we are looking at cannot be executed together + with original insn, then we can skip it safely. + + Example: ORIG_OPS = { (p6) r14 = sign_extend (r15); } + INSN = (!p6) r14 = r14 + 1; + + Here we can schedule ORIG_OP with lhs = r14, though only + looking at the set of used and set registers of INSN we must + forbid it. So, add set/used in INSN registers to the + untouchable set only if there is an insn in ORIG_OPS that can + affect INSN. */ + mutexed = true; + FOR_EACH_EXPR (r, avi, orig_ops) + if (!sched_insns_conditions_mutex_p (insn, EXPR_INSN_RTX (r))) + { + mutexed = false; + break; + } + + /* Mark all registers that do not meet the following condition: + (1) Not set or read on any path from xi to an instance of the + original operation. */ + if (!mutexed) + { + IOR_REG_SET (sparams->used_regs, INSN_REG_SETS (insn)); + IOR_REG_SET (sparams->used_regs, INSN_REG_USES (insn)); + IOR_REG_SET (sparams->used_regs, INSN_REG_CLOBBERS (insn)); + } + + return true; +} + +/* Hooks and data to perform move_op operations with code_motion_path_driver. */ +struct code_motion_path_driver_info_def move_op_hooks = { + move_op_on_enter, + move_op_orig_expr_found, + move_op_orig_expr_not_found, + move_op_merge_succs, + move_op_after_merge_succs, + move_op_ascend, + move_op_at_first_insn, + SUCCS_NORMAL, + "move_op" +}; + +/* Hooks and data to perform find_used_regs operations + with code_motion_path_driver. */ +struct code_motion_path_driver_info_def fur_hooks = { + fur_on_enter, + fur_orig_expr_found, + fur_orig_expr_not_found, + fur_merge_succs, + NULL, /* fur_after_merge_succs */ + NULL, /* fur_ascend */ + fur_at_first_insn, + SUCCS_ALL, + "find_used_regs" +}; + +/* Traverse all successors of INSN. For each successor that is SUCCS_NORMAL + code_motion_path_driver is called recursively. Original operation + was found at least on one path that is starting with one of INSN's + successors (this fact is asserted). ORIG_OPS is expressions we're looking + for, PATH is the path we've traversed, STATIC_PARAMS is the parameters + of either move_op or find_used_regs depending on the caller. + + Return 0 if we haven't found expression, 1 if we found it, -1 if we don't + know for sure at this point. */ +static int +code_motion_process_successors (insn_t insn, av_set_t orig_ops, + ilist_t path, void *static_params) +{ + int res = 0; + succ_iterator succ_i; + rtx succ; + basic_block bb; + int old_index; + unsigned old_succs; + + struct cmpd_local_params lparams; + expr_def _x; + + lparams.c_expr_local = &_x; + lparams.c_expr_merged = NULL; + + /* We need to process only NORMAL succs for move_op, and collect live + registers from ALL branches (including those leading out of the + region) for find_used_regs. + + In move_op, there can be a case when insn's bb number has changed + due to created bookkeeping. This happens very rare, as we need to + move expression from the beginning to the end of the same block. + Rescan successors in this case. */ + + rescan: + bb = BLOCK_FOR_INSN (insn); + old_index = bb->index; + old_succs = EDGE_COUNT (bb->succs); + + FOR_EACH_SUCC_1 (succ, succ_i, insn, code_motion_path_driver_info->succ_flags) + { + int b; + + lparams.e1 = succ_i.e1; + lparams.e2 = succ_i.e2; + + /* Go deep into recursion only for NORMAL edges (non-backedges within the + current region). */ + if (succ_i.current_flags == SUCCS_NORMAL) + b = code_motion_path_driver (succ, orig_ops, path, &lparams, + static_params); + else + b = 0; + + /* Merge c_expres found or unify live register sets from different + successors. */ + code_motion_path_driver_info->merge_succs (insn, succ, b, &lparams, + static_params); + if (b == 1) + res = b; + else if (b == -1 && res != 1) + res = b; + + /* We have simplified the control flow below this point. In this case, + the iterator becomes invalid. We need to try again. */ + if (BLOCK_FOR_INSN (insn)->index != old_index + || EDGE_COUNT (bb->succs) != old_succs) + goto rescan; + } + +#ifdef ENABLE_CHECKING + /* Here, RES==1 if original expr was found at least for one of the + successors. After the loop, RES may happen to have zero value + only if at some point the expr searched is present in av_set, but is + not found below. In most cases, this situation is an error. + The exception is when the original operation is blocked by + bookkeeping generated for another fence or for another path in current + move_op. */ + gcc_assert (res == 1 + || (res == 0 + && av_set_could_be_blocked_by_bookkeeping_p (orig_ops, + static_params)) + || res == -1); +#endif + + /* Merge data, clean up, etc. */ + if (res != -1 && code_motion_path_driver_info->after_merge_succs) + code_motion_path_driver_info->after_merge_succs (&lparams, static_params); + + return res; +} + + +/* Perform a cleanup when the driver is about to terminate. ORIG_OPS_P + is the pointer to the av set with expressions we were looking for, + PATH_P is the pointer to the traversed path. */ +static inline void +code_motion_path_driver_cleanup (av_set_t *orig_ops_p, ilist_t *path_p) +{ + ilist_remove (path_p); + av_set_clear (orig_ops_p); +} + +/* The driver function that implements move_op or find_used_regs + functionality dependent whether code_motion_path_driver_INFO is set to + &MOVE_OP_HOOKS or &FUR_HOOKS. This function implements the common parts + of code (CFG traversal etc) that are shared among both functions. INSN + is the insn we're starting the search from, ORIG_OPS are the expressions + we're searching for, PATH is traversed path, LOCAL_PARAMS_IN are local + parameters of the driver, and STATIC_PARAMS are static parameters of + the caller. + + Returns whether original instructions were found. Note that top-level + code_motion_path_driver always returns true. */ +static int +code_motion_path_driver (insn_t insn, av_set_t orig_ops, ilist_t path, + cmpd_local_params_p local_params_in, + void *static_params) +{ + expr_t expr = NULL; + basic_block bb = BLOCK_FOR_INSN (insn); + insn_t first_insn, bb_tail, before_first; + bool removed_last_insn = false; + + if (sched_verbose >= 6) + { + sel_print ("%s (", code_motion_path_driver_info->routine_name); + dump_insn (insn); + sel_print (","); + dump_av_set (orig_ops); + sel_print (")\n"); + } + + gcc_assert (orig_ops); + + /* If no original operations exist below this insn, return immediately. */ + if (is_ineligible_successor (insn, path)) + { + if (sched_verbose >= 6) + sel_print ("Insn %d is ineligible successor\n", INSN_UID (insn)); + return false; + } + + /* The block can have invalid av set, in which case it was created earlier + during move_op. Return immediately. */ + if (sel_bb_head_p (insn)) + { + if (! AV_SET_VALID_P (insn)) + { + if (sched_verbose >= 6) + sel_print ("Returned from block %d as it had invalid av set\n", + bb->index); + return false; + } + + if (bitmap_bit_p (code_motion_visited_blocks, bb->index)) + { + /* We have already found an original operation on this branch, do not + go any further and just return TRUE here. If we don't stop here, + function can have exponential behaviour even on the small code + with many different paths (e.g. with data speculation and + recovery blocks). */ + if (sched_verbose >= 6) + sel_print ("Block %d already visited in this traversal\n", bb->index); + if (code_motion_path_driver_info->on_enter) + return code_motion_path_driver_info->on_enter (insn, + local_params_in, + static_params, + true); + } + } + + if (code_motion_path_driver_info->on_enter) + code_motion_path_driver_info->on_enter (insn, local_params_in, + static_params, false); + orig_ops = av_set_copy (orig_ops); + + /* Filter the orig_ops set. */ + if (AV_SET_VALID_P (insn)) + av_set_code_motion_filter (&orig_ops, AV_SET (insn)); + + /* If no more original ops, return immediately. */ + if (!orig_ops) + { + if (sched_verbose >= 6) + sel_print ("No intersection with av set of block %d\n", bb->index); + return false; + } + + /* For non-speculative insns we have to leave only one form of the + original operation, because if we don't, we may end up with + different C_EXPRes and, consequently, with bookkeepings for different + expression forms along the same code motion path. That may lead to + generation of incorrect code. So for each code motion we stick to + the single form of the instruction, except for speculative insns + which we need to keep in different forms with all speculation + types. */ + av_set_leave_one_nonspec (&orig_ops); + + /* It is not possible that all ORIG_OPS are filtered out. */ + gcc_assert (orig_ops); + + /* It is enough to place only heads and tails of visited basic blocks into + the PATH. */ + ilist_add (&path, insn); + first_insn = insn; + bb_tail = sel_bb_end (bb); + + /* Descend the basic block in search of the original expr; this part + corresponds to the part of the original move_op procedure executed + before the recursive call. */ + for (;;) + { + /* Look at the insn and decide if it could be an ancestor of currently + scheduling operation. If it is so, then the insn "dest = op" could + either be replaced with "dest = reg", because REG now holds the result + of OP, or just removed, if we've scheduled the insn as a whole. + + If this insn doesn't contain currently scheduling OP, then proceed + with searching and look at its successors. Operations we're searching + for could have changed when moving up through this insn via + substituting. In this case, perform unsubstitution on them first. + + When traversing the DAG below this insn is finished, insert + bookkeeping code, if the insn is a joint point, and remove + leftovers. */ + + expr = av_set_lookup (orig_ops, INSN_VINSN (insn)); + if (expr) + { + insn_t last_insn = PREV_INSN (insn); + + /* We have found the original operation. */ + if (sched_verbose >= 6) + sel_print ("Found original operation at insn %d\n", INSN_UID (insn)); + + code_motion_path_driver_info->orig_expr_found + (insn, expr, local_params_in, static_params); + + /* Step back, so on the way back we'll start traversing from the + previous insn (or we'll see that it's bb_note and skip that + loop). */ + if (insn == first_insn) + { + first_insn = NEXT_INSN (last_insn); + removed_last_insn = sel_bb_end_p (last_insn); + } + insn = last_insn; + break; + } + else + { + /* We haven't found the original expr, continue descending the basic + block. */ + if (code_motion_path_driver_info->orig_expr_not_found + (insn, orig_ops, static_params)) + { + /* Av set ops could have been changed when moving through this + insn. To find them below it, we have to un-substitute them. */ + undo_transformations (&orig_ops, insn); + } + else + { + /* Clean up and return, if the hook tells us to do so. It may + happen if we've encountered the previously created + bookkeeping. */ + code_motion_path_driver_cleanup (&orig_ops, &path); + return -1; + } + + gcc_assert (orig_ops); + } + + /* Stop at insn if we got to the end of BB. */ + if (insn == bb_tail) + break; + + insn = NEXT_INSN (insn); + } + + /* Here INSN either points to the insn before the original insn (may be + bb_note, if original insn was a bb_head) or to the bb_end. */ + if (!expr) + { + int res; + + gcc_assert (insn == sel_bb_end (bb)); + + /* Add bb tail to PATH (but it doesn't make any sense if it's a bb_head - + it's already in PATH then). */ + if (insn != first_insn) + ilist_add (&path, insn); + + /* Process_successors should be able to find at least one + successor for which code_motion_path_driver returns TRUE. */ + res = code_motion_process_successors (insn, orig_ops, + path, static_params); + + /* Remove bb tail from path. */ + if (insn != first_insn) + ilist_remove (&path); + + if (res != 1) + { + /* This is the case when one of the original expr is no longer available + due to bookkeeping created on this branch with the same register. + In the original algorithm, which doesn't have update_data_sets call + on a bookkeeping block, it would simply result in returning + FALSE when we've encountered a previously generated bookkeeping + insn in moveop_orig_expr_not_found. */ + code_motion_path_driver_cleanup (&orig_ops, &path); + return res; + } + } + + /* Don't need it any more. */ + av_set_clear (&orig_ops); + + /* Backward pass: now, when we have C_EXPR computed, we'll drag it to + the beginning of the basic block. */ + before_first = PREV_INSN (first_insn); + while (insn != before_first) + { + if (code_motion_path_driver_info->ascend) + code_motion_path_driver_info->ascend (insn, static_params); + + insn = PREV_INSN (insn); + } + + /* Now we're at the bb head. */ + insn = first_insn; + ilist_remove (&path); + local_params_in->removed_last_insn = removed_last_insn; + code_motion_path_driver_info->at_first_insn (insn, local_params_in, static_params); + + /* This should be the very last operation as at bb head we could change + the numbering by creating bookkeeping blocks. */ + if (removed_last_insn) + insn = PREV_INSN (insn); + bitmap_set_bit (code_motion_visited_blocks, BLOCK_FOR_INSN (insn)->index); + return true; +} + +/* Move up the operations from ORIG_OPS set traversing the dag starting + from INSN. PATH represents the edges traversed so far. + DEST is the register chosen for scheduling the current expr. Insert + bookkeeping code in the join points. EXPR_VLIW is the chosen expression, + C_EXPR is how it looks like at the given cfg point. + Set *SHOULD_MOVE to indicate whether we have only disconnected + one of the insns found. + + Returns whether original instructions were found, which is asserted + to be true in the caller. */ +static bool +move_op (insn_t insn, av_set_t orig_ops, expr_t expr_vliw, + rtx dest, expr_t c_expr, bool *should_move) +{ + struct moveop_static_params sparams; + struct cmpd_local_params lparams; + bool res; + + /* Init params for code_motion_path_driver. */ + sparams.dest = dest; + sparams.c_expr = c_expr; + sparams.uid = INSN_UID (EXPR_INSN_RTX (expr_vliw)); +#ifdef ENABLE_CHECKING + sparams.failed_insn = NULL; +#endif + sparams.was_renamed = false; + lparams.e1 = NULL; + + /* We haven't visited any blocks yet. */ + bitmap_clear (code_motion_visited_blocks); + + /* Set appropriate hooks and data. */ + code_motion_path_driver_info = &move_op_hooks; + res = code_motion_path_driver (insn, orig_ops, NULL, &lparams, &sparams); + + if (sparams.was_renamed) + EXPR_WAS_RENAMED (expr_vliw) = true; + + *should_move = (sparams.uid == -1); + + return res; +} + + +/* Functions that work with regions. */ + +/* Current number of seqno used in init_seqno and init_seqno_1. */ +static int cur_seqno; + +/* A helper for init_seqno. Traverse the region starting from BB and + compute seqnos for visited insns, marking visited bbs in VISITED_BBS. + Clear visited blocks from BLOCKS_TO_RESCHEDULE. */ +static void +init_seqno_1 (basic_block bb, sbitmap visited_bbs, bitmap blocks_to_reschedule) +{ + int bbi = BLOCK_TO_BB (bb->index); + insn_t insn, note = bb_note (bb); + insn_t succ_insn; + succ_iterator si; + + SET_BIT (visited_bbs, bbi); + if (blocks_to_reschedule) + bitmap_clear_bit (blocks_to_reschedule, bb->index); + + FOR_EACH_SUCC_1 (succ_insn, si, BB_END (bb), + SUCCS_NORMAL | SUCCS_SKIP_TO_LOOP_EXITS) + { + basic_block succ = BLOCK_FOR_INSN (succ_insn); + int succ_bbi = BLOCK_TO_BB (succ->index); + + gcc_assert (in_current_region_p (succ)); + + if (!TEST_BIT (visited_bbs, succ_bbi)) + { + gcc_assert (succ_bbi > bbi); + + init_seqno_1 (succ, visited_bbs, blocks_to_reschedule); + } + else if (blocks_to_reschedule) + bitmap_set_bit (forced_ebb_heads, succ->index); + } + + for (insn = BB_END (bb); insn != note; insn = PREV_INSN (insn)) + INSN_SEQNO (insn) = cur_seqno--; +} + +/* Initialize seqnos for the current region. NUMBER_OF_INSNS is the number + of instructions in the region, BLOCKS_TO_RESCHEDULE contains blocks on + which we're rescheduling when pipelining, FROM is the block where + traversing region begins (it may not be the head of the region when + pipelining, but the head of the loop instead). + + Returns the maximal seqno found. */ +static int +init_seqno (int number_of_insns, bitmap blocks_to_reschedule, basic_block from) +{ + sbitmap visited_bbs; + bitmap_iterator bi; + unsigned bbi; + + visited_bbs = sbitmap_alloc (current_nr_blocks); + + if (blocks_to_reschedule) + { + sbitmap_ones (visited_bbs); + EXECUTE_IF_SET_IN_BITMAP (blocks_to_reschedule, 0, bbi, bi) + { + gcc_assert (BLOCK_TO_BB (bbi) < current_nr_blocks); + RESET_BIT (visited_bbs, BLOCK_TO_BB (bbi)); + } + } + else + { + sbitmap_zero (visited_bbs); + from = EBB_FIRST_BB (0); + } + + cur_seqno = number_of_insns > 0 ? number_of_insns : sched_max_luid - 1; + init_seqno_1 (from, visited_bbs, blocks_to_reschedule); + gcc_assert (cur_seqno == 0 || number_of_insns == 0); + + sbitmap_free (visited_bbs); + return sched_max_luid - 1; +} + +/* Initialize scheduling parameters for current region. */ +static void +sel_setup_region_sched_flags (void) +{ + enable_schedule_as_rhs_p = 1; + bookkeeping_p = 1; + pipelining_p = (bookkeeping_p + && (flag_sel_sched_pipelining != 0) + && current_loop_nest != NULL + && loop_has_exit_edges (current_loop_nest)); + max_insns_to_rename = PARAM_VALUE (PARAM_SELSCHED_INSNS_TO_RENAME); + max_ws = MAX_WS; +} + +/* Return true if all basic blocks of current region are empty. */ +static bool +current_region_empty_p (void) +{ + int i; + for (i = 0; i < current_nr_blocks; i++) + if (! sel_bb_empty_p (BASIC_BLOCK (BB_TO_BLOCK (i)))) + return false; + + return true; +} + +/* Prepare and verify loop nest for pipelining. */ +static void +setup_current_loop_nest (int rgn) +{ + current_loop_nest = get_loop_nest_for_rgn (rgn); + + if (!current_loop_nest) + return; + + /* If this loop has any saved loop preheaders from nested loops, + add these basic blocks to the current region. */ + sel_add_loop_preheaders (); + + /* Check that we're starting with a valid information. */ + gcc_assert (loop_latch_edge (current_loop_nest)); + gcc_assert (LOOP_MARKED_FOR_PIPELINING_P (current_loop_nest)); +} + +/* Compute instruction priorities for current region. */ +static void +sel_compute_priorities (int rgn) +{ + sched_rgn_compute_dependencies (rgn); + + /* Compute insn priorities in haifa style. Then free haifa style + dependencies that we've calculated for this. */ + compute_priorities (); + + if (sched_verbose >= 5) + debug_rgn_dependencies (0); + + free_rgn_deps (); +} + +/* Init scheduling data for RGN. Returns true when this region should not + be scheduled. */ +static bool +sel_region_init (int rgn) +{ + int i; + bb_vec_t bbs; + + rgn_setup_region (rgn); + + /* Even if sched_is_disabled_for_current_region_p() is true, we still + do region initialization here so the region can be bundled correctly, + but we'll skip the scheduling in sel_sched_region (). */ + if (current_region_empty_p ()) + return true; + + if (flag_sel_sched_pipelining) + setup_current_loop_nest (rgn); + + sel_setup_region_sched_flags (); + + bbs = VEC_alloc (basic_block, heap, current_nr_blocks); + + for (i = 0; i < current_nr_blocks; i++) + VEC_quick_push (basic_block, bbs, BASIC_BLOCK (BB_TO_BLOCK (i))); + + sel_init_bbs (bbs, NULL); + + /* Initialize luids and dependence analysis which both sel-sched and haifa + need. */ + sched_init_luids (bbs, NULL, NULL, NULL); + sched_deps_init (false); + + /* Initialize haifa data. */ + rgn_setup_sched_infos (); + sel_set_sched_flags (); + haifa_init_h_i_d (bbs, NULL, NULL, NULL); + + sel_compute_priorities (rgn); + init_deps_global (); + + /* Main initialization. */ + sel_setup_sched_infos (); + sel_init_global_and_expr (bbs); + + VEC_free (basic_block, heap, bbs); + + blocks_to_reschedule = BITMAP_ALLOC (NULL); + + /* Init correct liveness sets on each instruction of a single-block loop. + This is the only situation when we can't update liveness when calling + compute_live for the first insn of the loop. */ + if (current_loop_nest) + { + int header = (sel_is_loop_preheader_p (BASIC_BLOCK (BB_TO_BLOCK (0))) + ? 1 + : 0); + + if (current_nr_blocks == header + 1) + update_liveness_on_insn + (sel_bb_head (BASIC_BLOCK (BB_TO_BLOCK (header)))); + } + + /* Set hooks so that no newly generated insn will go out unnoticed. */ + sel_register_cfg_hooks (); + + /* !!! We call target.sched.init () for the whole region, but we invoke + targetm.sched.finish () for every ebb. */ + if (targetm.sched.init) + /* None of the arguments are actually used in any target. */ + targetm.sched.init (sched_dump, sched_verbose, -1); + + first_emitted_uid = get_max_uid () + 1; + preheader_removed = false; + + /* Reset register allocation ticks array. */ + memset (reg_rename_tick, 0, sizeof reg_rename_tick); + reg_rename_this_tick = 0; + + bitmap_initialize (forced_ebb_heads, 0); + bitmap_clear (forced_ebb_heads); + + setup_nop_vinsn (); + current_copies = BITMAP_ALLOC (NULL); + current_originators = BITMAP_ALLOC (NULL); + code_motion_visited_blocks = BITMAP_ALLOC (NULL); + + return false; +} + +/* Simplify insns after the scheduling. */ +static void +simplify_changed_insns (void) +{ + int i; + + for (i = 0; i < current_nr_blocks; i++) + { + basic_block bb = BASIC_BLOCK (BB_TO_BLOCK (i)); + rtx insn; + + FOR_BB_INSNS (bb, insn) + if (INSN_P (insn)) + { + expr_t expr = INSN_EXPR (insn); + + if (EXPR_WAS_SUBSTITUTED (expr)) + validate_simplify_insn (insn); + } + } +} + +/* Find boundaries of the EBB starting from basic block BB, marking blocks of + this EBB in SCHEDULED_BLOCKS and appropriately filling in HEAD, TAIL, + PREV_HEAD, and NEXT_TAIL fields of CURRENT_SCHED_INFO structure. */ +static void +find_ebb_boundaries (basic_block bb, bitmap scheduled_blocks) +{ + insn_t head, tail; + basic_block bb1 = bb; + if (sched_verbose >= 2) + sel_print ("Finishing schedule in bbs: "); + + do + { + bitmap_set_bit (scheduled_blocks, BLOCK_TO_BB (bb1->index)); + + if (sched_verbose >= 2) + sel_print ("%d; ", bb1->index); + } + while (!bb_ends_ebb_p (bb1) && (bb1 = bb_next_bb (bb1))); + + if (sched_verbose >= 2) + sel_print ("\n"); + + get_ebb_head_tail (bb, bb1, &head, &tail); + + current_sched_info->head = head; + current_sched_info->tail = tail; + current_sched_info->prev_head = PREV_INSN (head); + current_sched_info->next_tail = NEXT_INSN (tail); +} + +/* Regenerate INSN_SCHED_CYCLEs for insns of current EBB. */ +static void +reset_sched_cycles_in_current_ebb (void) +{ + int last_clock = 0; + int haifa_last_clock = -1; + int haifa_clock = 0; + int issued_insns = 0; + insn_t insn; + + if (targetm.sched.init) + { + /* None of the arguments are actually used in any target. + NB: We should have md_reset () hook for cases like this. */ + targetm.sched.init (sched_dump, sched_verbose, -1); + } + + state_reset (curr_state); + advance_state (curr_state); + + for (insn = current_sched_info->head; + insn != current_sched_info->next_tail; + insn = NEXT_INSN (insn)) + { + int cost, haifa_cost; + int sort_p; + bool asm_p, real_insn, after_stall, all_issued; + int clock; + + if (!INSN_P (insn)) + continue; + + asm_p = false; + real_insn = recog_memoized (insn) >= 0; + clock = INSN_SCHED_CYCLE (insn); + + cost = clock - last_clock; + + /* Initialize HAIFA_COST. */ + if (! real_insn) + { + asm_p = INSN_ASM_P (insn); + + if (asm_p) + /* This is asm insn which *had* to be scheduled first + on the cycle. */ + haifa_cost = 1; + else + /* This is a use/clobber insn. It should not change + cost. */ + haifa_cost = 0; + } + else + haifa_cost = estimate_insn_cost (insn, curr_state); + + /* Stall for whatever cycles we've stalled before. */ + after_stall = 0; + if (INSN_AFTER_STALL_P (insn) && cost > haifa_cost) + { + haifa_cost = cost; + after_stall = 1; + } + all_issued = issued_insns == issue_rate; + if (haifa_cost == 0 && all_issued) + haifa_cost = 1; + if (haifa_cost > 0) + { + int i = 0; + + while (haifa_cost--) + { + advance_state (curr_state); + issued_insns = 0; + i++; + + if (sched_verbose >= 2) + { + sel_print ("advance_state (state_transition)\n"); + debug_state (curr_state); + } + + /* The DFA may report that e.g. insn requires 2 cycles to be + issued, but on the next cycle it says that insn is ready + to go. Check this here. */ + if (!after_stall + && real_insn + && haifa_cost > 0 + && estimate_insn_cost (insn, curr_state) == 0) + break; + + /* When the data dependency stall is longer than the DFA stall, + and when we have issued exactly issue_rate insns and stalled, + it could be that after this longer stall the insn will again + become unavailable to the DFA restrictions. Looks strange + but happens e.g. on x86-64. So recheck DFA on the last + iteration. */ + if ((after_stall || all_issued) + && real_insn + && haifa_cost == 0) + haifa_cost = estimate_insn_cost (insn, curr_state); + } + + haifa_clock += i; + if (sched_verbose >= 2) + sel_print ("haifa clock: %d\n", haifa_clock); + } + else + gcc_assert (haifa_cost == 0); + + if (sched_verbose >= 2) + sel_print ("Haifa cost for insn %d: %d\n", INSN_UID (insn), haifa_cost); + + if (targetm.sched.dfa_new_cycle) + while (targetm.sched.dfa_new_cycle (sched_dump, sched_verbose, insn, + haifa_last_clock, haifa_clock, + &sort_p)) + { + advance_state (curr_state); + issued_insns = 0; + haifa_clock++; + if (sched_verbose >= 2) + { + sel_print ("advance_state (dfa_new_cycle)\n"); + debug_state (curr_state); + sel_print ("haifa clock: %d\n", haifa_clock + 1); + } + } + + if (real_insn) + { + cost = state_transition (curr_state, insn); + issued_insns++; + + if (sched_verbose >= 2) + { + sel_print ("scheduled insn %d, clock %d\n", INSN_UID (insn), + haifa_clock + 1); + debug_state (curr_state); + } + gcc_assert (cost < 0); + } + + if (targetm.sched.variable_issue) + targetm.sched.variable_issue (sched_dump, sched_verbose, insn, 0); + + INSN_SCHED_CYCLE (insn) = haifa_clock; + + last_clock = clock; + haifa_last_clock = haifa_clock; + } +} + +/* Put TImode markers on insns starting a new issue group. */ +static void +put_TImodes (void) +{ + int last_clock = -1; + insn_t insn; + + for (insn = current_sched_info->head; insn != current_sched_info->next_tail; + insn = NEXT_INSN (insn)) + { + int cost, clock; + + if (!INSN_P (insn)) + continue; + + clock = INSN_SCHED_CYCLE (insn); + cost = (last_clock == -1) ? 1 : clock - last_clock; + + gcc_assert (cost >= 0); + + if (issue_rate > 1 + && GET_CODE (PATTERN (insn)) != USE + && GET_CODE (PATTERN (insn)) != CLOBBER) + { + if (reload_completed && cost > 0) + PUT_MODE (insn, TImode); + + last_clock = clock; + } + + if (sched_verbose >= 2) + sel_print ("Cost for insn %d is %d\n", INSN_UID (insn), cost); + } +} + +/* Perform MD_FINISH on EBBs comprising current region. When + RESET_SCHED_CYCLES_P is true, run a pass emulating the scheduler + to produce correct sched cycles on insns. */ +static void +sel_region_target_finish (bool reset_sched_cycles_p) +{ + int i; + bitmap scheduled_blocks = BITMAP_ALLOC (NULL); + + for (i = 0; i < current_nr_blocks; i++) + { + if (bitmap_bit_p (scheduled_blocks, i)) + continue; + + /* While pipelining outer loops, skip bundling for loop + preheaders. Those will be rescheduled in the outer loop. */ + if (sel_is_loop_preheader_p (EBB_FIRST_BB (i))) + continue; + + find_ebb_boundaries (EBB_FIRST_BB (i), scheduled_blocks); + + if (no_real_insns_p (current_sched_info->head, current_sched_info->tail)) + continue; + + if (reset_sched_cycles_p) + reset_sched_cycles_in_current_ebb (); + + if (targetm.sched.init) + targetm.sched.init (sched_dump, sched_verbose, -1); + + put_TImodes (); + + if (targetm.sched.finish) + { + targetm.sched.finish (sched_dump, sched_verbose); + + /* Extend luids so that insns generated by the target will + get zero luid. */ + sched_init_luids (NULL, NULL, NULL, NULL); + } + } + + BITMAP_FREE (scheduled_blocks); +} + +/* Free the scheduling data for the current region. When RESET_SCHED_CYCLES_P + is true, make an additional pass emulating scheduler to get correct insn + cycles for md_finish calls. */ +static void +sel_region_finish (bool reset_sched_cycles_p) +{ + simplify_changed_insns (); + sched_finish_ready_list (); + free_nop_pool (); + + /* Free the vectors. */ + if (vec_av_set) + VEC_free (expr_t, heap, vec_av_set); + BITMAP_FREE (current_copies); + BITMAP_FREE (current_originators); + BITMAP_FREE (code_motion_visited_blocks); + vinsn_vec_free (&vec_bookkeeping_blocked_vinsns); + vinsn_vec_free (&vec_target_unavailable_vinsns); + + /* If LV_SET of the region head should be updated, do it now because + there will be no other chance. */ + { + succ_iterator si; + insn_t insn; + + FOR_EACH_SUCC_1 (insn, si, bb_note (EBB_FIRST_BB (0)), + SUCCS_NORMAL | SUCCS_SKIP_TO_LOOP_EXITS) + { + basic_block bb = BLOCK_FOR_INSN (insn); + + if (!BB_LV_SET_VALID_P (bb)) + compute_live (insn); + } + } + + /* Emulate the Haifa scheduler for bundling. */ + if (reload_completed) + sel_region_target_finish (reset_sched_cycles_p); + + sel_finish_global_and_expr (); + + bitmap_clear (forced_ebb_heads); + + free_nop_vinsn (); + + finish_deps_global (); + sched_finish_luids (); + + sel_finish_bbs (); + BITMAP_FREE (blocks_to_reschedule); + + sel_unregister_cfg_hooks (); + + max_issue_size = 0; +} + + +/* Functions that implement the scheduler driver. */ + +/* Schedule a parallel instruction group on each of FENCES. MAX_SEQNO + is the current maximum seqno. SCHEDULED_INSNS_TAILPP is the list + of insns scheduled -- these would be postprocessed later. */ +static void +schedule_on_fences (flist_t fences, int max_seqno, + ilist_t **scheduled_insns_tailpp) +{ + flist_t old_fences = fences; + + if (sched_verbose >= 1) + { + sel_print ("\nScheduling on fences: "); + dump_flist (fences); + sel_print ("\n"); + } + + scheduled_something_on_previous_fence = false; + for (; fences; fences = FLIST_NEXT (fences)) + { + fence_t fence = NULL; + int seqno = 0; + flist_t fences2; + bool first_p = true; + + /* Choose the next fence group to schedule. + The fact that insn can be scheduled only once + on the cycle is guaranteed by two properties: + 1. seqnos of parallel groups decrease with each iteration. + 2. If is_ineligible_successor () sees the larger seqno, it + checks if candidate insn is_in_current_fence_p (). */ + for (fences2 = old_fences; fences2; fences2 = FLIST_NEXT (fences2)) + { + fence_t f = FLIST_FENCE (fences2); + + if (!FENCE_PROCESSED_P (f)) + { + int i = INSN_SEQNO (FENCE_INSN (f)); + + if (first_p || i > seqno) + { + seqno = i; + fence = f; + first_p = false; + } + else + /* ??? Seqnos of different groups should be different. */ + gcc_assert (1 || i != seqno); + } + } + + gcc_assert (fence); + + /* As FENCE is nonnull, SEQNO is initialized. */ + seqno -= max_seqno + 1; + fill_insns (fence, seqno, scheduled_insns_tailpp); + FENCE_PROCESSED_P (fence) = true; + } + + /* All av_sets are invalidated by GLOBAL_LEVEL increase, thus we + don't need to keep bookkeeping-invalidated and target-unavailable + vinsns any more. */ + vinsn_vec_clear (&vec_bookkeeping_blocked_vinsns); + vinsn_vec_clear (&vec_target_unavailable_vinsns); +} + +/* Calculate MIN_SEQNO and MAX_SEQNO. */ +static void +find_min_max_seqno (flist_t fences, int *min_seqno, int *max_seqno) +{ + *min_seqno = *max_seqno = INSN_SEQNO (FENCE_INSN (FLIST_FENCE (fences))); + + /* The first element is already processed. */ + while ((fences = FLIST_NEXT (fences))) + { + int seqno = INSN_SEQNO (FENCE_INSN (FLIST_FENCE (fences))); + + if (*min_seqno > seqno) + *min_seqno = seqno; + else if (*max_seqno < seqno) + *max_seqno = seqno; + } +} + +/* Calculate new fences from FENCES. */ +static flist_t +calculate_new_fences (flist_t fences, int orig_max_seqno) +{ + flist_t old_fences = fences; + struct flist_tail_def _new_fences, *new_fences = &_new_fences; + + flist_tail_init (new_fences); + for (; fences; fences = FLIST_NEXT (fences)) + { + fence_t fence = FLIST_FENCE (fences); + insn_t insn; + + if (!FENCE_BNDS (fence)) + { + /* This fence doesn't have any successors. */ + if (!FENCE_SCHEDULED_P (fence)) + { + /* Nothing was scheduled on this fence. */ + int seqno; + + insn = FENCE_INSN (fence); + seqno = INSN_SEQNO (insn); + gcc_assert (seqno > 0 && seqno <= orig_max_seqno); + + if (sched_verbose >= 1) + sel_print ("Fence %d[%d] has not changed\n", + INSN_UID (insn), + BLOCK_NUM (insn)); + move_fence_to_fences (fences, new_fences); + } + } + else + extract_new_fences_from (fences, new_fences, orig_max_seqno); + } + + flist_clear (&old_fences); + return FLIST_TAIL_HEAD (new_fences); +} + +/* Update seqnos of insns given by PSCHEDULED_INSNS. MIN_SEQNO and MAX_SEQNO + are the miminum and maximum seqnos of the group, HIGHEST_SEQNO_IN_USE is + the highest seqno used in a region. Return the updated highest seqno. */ +static int +update_seqnos_and_stage (int min_seqno, int max_seqno, + int highest_seqno_in_use, + ilist_t *pscheduled_insns) +{ + int new_hs; + ilist_iterator ii; + insn_t insn; + + /* Actually, new_hs is the seqno of the instruction, that was + scheduled first (i.e. it is the first one in SCHEDULED_INSNS). */ + if (*pscheduled_insns) + { + new_hs = (INSN_SEQNO (ILIST_INSN (*pscheduled_insns)) + + highest_seqno_in_use + max_seqno - min_seqno + 2); + gcc_assert (new_hs > highest_seqno_in_use); + } + else + new_hs = highest_seqno_in_use; + + FOR_EACH_INSN (insn, ii, *pscheduled_insns) + { + gcc_assert (INSN_SEQNO (insn) < 0); + INSN_SEQNO (insn) += highest_seqno_in_use + max_seqno - min_seqno + 2; + gcc_assert (INSN_SEQNO (insn) <= new_hs); + + /* When not pipelining, purge unneeded insn info on the scheduled insns. + For example, having reg_last array of INSN_DEPS_CONTEXT in memory may + require > 1GB of memory e.g. on limit-fnargs.c. */ + if (! pipelining_p) + free_data_for_scheduled_insn (insn); + } + + ilist_clear (pscheduled_insns); + global_level++; + + return new_hs; +} + +/* The main driver for scheduling a region. This function is responsible + for correct propagation of fences (i.e. scheduling points) and creating + a group of parallel insns at each of them. It also supports + pipelining. ORIG_MAX_SEQNO is the maximal seqno before this pass + of scheduling. */ +static void +sel_sched_region_2 (int orig_max_seqno) +{ + int highest_seqno_in_use = orig_max_seqno; + + stat_bookkeeping_copies = 0; + stat_insns_needed_bookkeeping = 0; + stat_renamed_scheduled = 0; + stat_substitutions_total = 0; + num_insns_scheduled = 0; + + while (fences) + { + int min_seqno, max_seqno; + ilist_t scheduled_insns = NULL; + ilist_t *scheduled_insns_tailp = &scheduled_insns; + + find_min_max_seqno (fences, &min_seqno, &max_seqno); + schedule_on_fences (fences, max_seqno, &scheduled_insns_tailp); + fences = calculate_new_fences (fences, orig_max_seqno); + highest_seqno_in_use = update_seqnos_and_stage (min_seqno, max_seqno, + highest_seqno_in_use, + &scheduled_insns); + } + + if (sched_verbose >= 1) + sel_print ("Scheduled %d bookkeeping copies, %d insns needed " + "bookkeeping, %d insns renamed, %d insns substituted\n", + stat_bookkeeping_copies, + stat_insns_needed_bookkeeping, + stat_renamed_scheduled, + stat_substitutions_total); +} + +/* Schedule a region. When pipelining, search for possibly never scheduled + bookkeeping code and schedule it. Reschedule pipelined code without + pipelining after. */ +static void +sel_sched_region_1 (void) +{ + int number_of_insns; + int orig_max_seqno; + + /* Remove empty blocks that might be in the region from the beginning. + We need to do save sched_max_luid before that, as it actually shows + the number of insns in the region, and purge_empty_blocks can + alter it. */ + number_of_insns = sched_max_luid - 1; + purge_empty_blocks (); + + orig_max_seqno = init_seqno (number_of_insns, NULL, NULL); + gcc_assert (orig_max_seqno >= 1); + + /* When pipelining outer loops, create fences on the loop header, + not preheader. */ + fences = NULL; + if (current_loop_nest) + init_fences (BB_END (EBB_FIRST_BB (0))); + else + init_fences (bb_note (EBB_FIRST_BB (0))); + global_level = 1; + + sel_sched_region_2 (orig_max_seqno); + + gcc_assert (fences == NULL); + + if (pipelining_p) + { + int i; + basic_block bb; + struct flist_tail_def _new_fences; + flist_tail_t new_fences = &_new_fences; + bool do_p = true; + + pipelining_p = false; + max_ws = MIN (max_ws, issue_rate * 3 / 2); + bookkeeping_p = false; + enable_schedule_as_rhs_p = false; + + /* Schedule newly created code, that has not been scheduled yet. */ + do_p = true; + + while (do_p) + { + do_p = false; + + for (i = 0; i < current_nr_blocks; i++) + { + basic_block bb = EBB_FIRST_BB (i); + + if (bitmap_bit_p (blocks_to_reschedule, bb->index)) + { + if (! bb_ends_ebb_p (bb)) + bitmap_set_bit (blocks_to_reschedule, bb_next_bb (bb)->index); + if (sel_bb_empty_p (bb)) + { + bitmap_clear_bit (blocks_to_reschedule, bb->index); + continue; + } + clear_outdated_rtx_info (bb); + if (sel_insn_is_speculation_check (BB_END (bb)) + && JUMP_P (BB_END (bb))) + bitmap_set_bit (blocks_to_reschedule, + BRANCH_EDGE (bb)->dest->index); + } + else if (! sel_bb_empty_p (bb) + && INSN_SCHED_TIMES (sel_bb_head (bb)) <= 0) + bitmap_set_bit (blocks_to_reschedule, bb->index); + } + + for (i = 0; i < current_nr_blocks; i++) + { + bb = EBB_FIRST_BB (i); + + /* While pipelining outer loops, skip bundling for loop + preheaders. Those will be rescheduled in the outer + loop. */ + if (sel_is_loop_preheader_p (bb)) + { + clear_outdated_rtx_info (bb); + continue; + } + + if (bitmap_bit_p (blocks_to_reschedule, bb->index)) + { + flist_tail_init (new_fences); + + orig_max_seqno = init_seqno (0, blocks_to_reschedule, bb); + + /* Mark BB as head of the new ebb. */ + bitmap_set_bit (forced_ebb_heads, bb->index); + + gcc_assert (fences == NULL); + + init_fences (bb_note (bb)); + + sel_sched_region_2 (orig_max_seqno); + + do_p = true; + break; + } + } + } + } +} + +/* Schedule the RGN region. */ +void +sel_sched_region (int rgn) +{ + bool schedule_p; + bool reset_sched_cycles_p; + + if (sel_region_init (rgn)) + return; + + if (sched_verbose >= 1) + sel_print ("Scheduling region %d\n", rgn); + + schedule_p = (!sched_is_disabled_for_current_region_p () + && dbg_cnt (sel_sched_region_cnt)); + reset_sched_cycles_p = pipelining_p; + if (schedule_p) + sel_sched_region_1 (); + else + /* Force initialization of INSN_SCHED_CYCLEs for correct bundling. */ + reset_sched_cycles_p = true; + + sel_region_finish (reset_sched_cycles_p); +} + +/* Perform global init for the scheduler. */ +static void +sel_global_init (void) +{ + calculate_dominance_info (CDI_DOMINATORS); + alloc_sched_pools (); + + /* Setup the infos for sched_init. */ + sel_setup_sched_infos (); + setup_sched_dump (); + + sched_rgn_init (false); + sched_init (); + + sched_init_bbs (); + /* Reset AFTER_RECOVERY if it has been set by the 1st scheduler pass. */ + after_recovery = 0; + can_issue_more = issue_rate; + + sched_extend_target (); + sched_deps_init (true); + setup_nop_and_exit_insns (); + sel_extend_global_bb_info (); + init_lv_sets (); + init_hard_regs_data (); +} + +/* Free the global data of the scheduler. */ +static void +sel_global_finish (void) +{ + free_bb_note_pool (); + free_lv_sets (); + sel_finish_global_bb_info (); + + free_regset_pool (); + free_nop_and_exit_insns (); + + sched_rgn_finish (); + sched_deps_finish (); + sched_finish (); + + if (current_loops) + sel_finish_pipelining (); + + free_sched_pools (); + free_dominance_info (CDI_DOMINATORS); +} + +/* Return true when we need to skip selective scheduling. Used for debugging. */ +bool +maybe_skip_selective_scheduling (void) +{ + return ! dbg_cnt (sel_sched_cnt); +} + +/* The entry point. */ +void +run_selective_scheduling (void) +{ + int rgn; + + if (n_basic_blocks == NUM_FIXED_BLOCKS) + return; + + sel_global_init (); + + for (rgn = 0; rgn < nr_regions; rgn++) + sel_sched_region (rgn); + + sel_global_finish (); +} + +#endif -- cgit v1.2.3