/* Copyright (C) 2000, 2001 Silicon Graphics, Inc. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of version 2 of the GNU General Public License as published by the Free Software Foundation. This program is distributed in the hope that it would be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. Further, this software is distributed without any warranty that it is free of the rightful claim of any third person regarding infringement or the like. Any license provided herein, whether implied or otherwise, applies only to this software file. Patent licenses, if any, provided herein do not apply to combinations of this program with other software, or any other product whatsoever. You should have received a copy of the GNU General Public License along with this program; if not, write the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pky, Mountain View, CA 94043, or: http://www.sgi.com For further information regarding this notice, see: http://oss.sgi.com/projects/GenInfo/NoticeExplan */ /** *** *** Description: *** *** The cache model, which chooses the best tiling and block size, given *** which loops we may place innermost and which we can tile. *** *** Reserved Prefix: *** *** None, but this reserves the right to use the SNL prefix, since it's *** really modelling SNLs. E.g. SNL_MODEL, if anything. *** *** CACHE_MODEL_INFO: *** *** Description: *** *** The return value of Cache_Model. *** *** Fields: *** *** double Hidable_Cycles_Per_Iter; *** double Non_Hidable_Cycles_Per_Iter; *** *** Depending on the cache model, the latency of certain *** references are hidable. Return cycles per iter of latency *** that are hidable and not. *** *** Exported functions *** *** void Cache_Model( *** ARRAY_REF* arl, *** INT depth, *** INT required_inner, *** const INT* inners, *** INT ninners, *** const INT* tiles, *** INT ntiles, *** const INT* unrolls, *** const DOLOOP_STACK* stack, *** BOOL blocking_disabled, *** const INT* required_blocksize, *** double model_cost, *** INT num_refs, *** *** -- return values: *** *** INT* new_order, *** INT* nstrips, // return val *** INT* stripdepth, // return val *** INT* iloop, // return array *** INT* stripsz, // return array *** INT* striplevel, // return array *** double* hidable_cycles_per_iter, *** double* non_hidable_cycles_per_iter, *** double* doverhead_best, *** double* thidable_cycles_per_iter, *** double* tnon_hidable_cycles_per_iter, *** double* tdoverhead_best *** ); *** *** arl: From the model. *** depth: The depth of the innermost loop. Outermost is zero. *** So the number of loops in the nest is depth+1. *** required_inner: Which loop will be innermost, because other *** parts of the compiler are modelling this case. *** inners: Which loops in [0..depth] may be placed inner. *** Presumably, depth is one of them. *** ninners: How many legal inner loops there are. *** Presumably, at least one. *** tiles: Which loops in [0..depth] may participate in the *** tile, but cannot be innermost. No loops from here *** will go into a tile unless all loops from *** legal_inners go into a tile. This is how we avoid *** imperfect loop interchange -- loops outside of *** imperfect code go in legal_tiles but not legal_inners. *** ntiles: How many legal inner loops there are. May be zero. *** unrolls: For each loop in [0..depth], how much that loop has *** been unrolled. *** stack: the do loop stack. *** blocking_disabled: don't block. Just evaluate this required inner. *** required_blocksize[loopno*MHD_MAX_LEVESL+mhd_level]: *** If this value is >= 0, then pragmas request that this *** loop at this level have this blocksize (0=in the block, *** but not stripped). This suggestion is taken lightly. *** model_cost: how much the model thinks the loop would cost *** without cache effects. *** num_refs: the number of different references in the *** innermost loop. E.g. do i a(i,j),a(i+1,j), a(i,j+1) *** has two, because the first two CSE in the i loop. *** *** new_order: The permutation to apply. e.g. new_order[0] tells *** which loop the new outer loop comes from. *** nstrips: *nstrips will hold the number of strips produced *** by tiling. *** stripdepth: *stripdepth will hold the depth of the loop *** (after reordering) where the strips will be placed. *** iloop: iloop[s] with hold the depth (after reordering) *** of the s'th stripped loop. *** stripsz: stripsz[s] with hold the blocksize *** of the s'th stripped loop. *** striplevel:striplevel[s] with hold the cache/memory hierarcy *** level this is stripped for, plus 1 (mhd_level 0 is *** striplevel 1). *** *** cycles_per_iter: how many cycles per iteration for *** memory and overhead. *** do_overhead: cycles per iter for overhead only. Therefore, *** cycles_per_iter - do_overhead is memory cost. **/ /** $Revision: 1.2 $ *** $Date: 2003/03/04 06:15:59 $ *** $Author: wychen $ *** $Source: bitbucket.org:berkeleylab/upc-translator.git/open64/osprey1.0/be/lno/cache_model.h $ **/ #ifndef cache_model_INCLUDED #define cache_model_INCLUDED "cache_model.h" #ifdef _KEEP_RCS_ID static char *cache_model_rcs_id = cache_model_INCLUDED "$Revision: 1.2 $"; #endif /* _KEEP_RCS_ID */ #ifndef defs_INCLUDED #include "defs.h" #endif class ARRAY_REF; // class DOLOOP_STACK; /** *** Class FORMULA *** *** Description: *** *** Builds up a tree expressing an arithmetic formula. The arithmetic *** is double precision floating point. There are numbered variables, *** constants, and the binary operators +, -, *, /. The function *** Eval() is passed the value of each variable, and a double value is *** returned. *** *** FORMULA_VAR returns the value of the given variable, the index *** into the passed in array. But also, temporary values can be *** stored into registers and reused later in the same eval. For *** example, to compute partial result, store it into scratch *** register 0 (there are dozens of scratch registers) and use it *** twice later, one would generate code like *** ((r0=...),(... r0 ... r0) *** where the assignment is created with FORMULA::Set(0, tree) *** and that is the left child of a comma operator and the *** values of r0 are obtained with FORMULA::Use(0). *** *** Exported Types: *** *** FORMULA_OP *** *** an enumeration, with values *** FORMULA_ADD *** FORMULA_SUB *** FORMULA_MUL *** FORMULA_DIV *** FORMULA_MAX *** FORMULA_MIN *** FORMULA_GE *** FORMULA_GT *** FORMULA_LE *** FORMULA_LT *** FORMULA_AND return x&&y, short circuit *** FORMULA_OR return x||y, short circuit *** FORMULA_COND *** FORMULA_FCONST *** FORMULA_VAR *** FORMULA_SET *** FORMULA_USE *** FORMULA_COMMA *** *** Exported Functions: *** *** FORMULA::FORMULA(BOOL TRUE, double constant) *** *** Create a formula consisting of a constant value *** *** FORMULA::FORMULA(BOOL FALSE, int variable) *** *** Create a formula consisting of a single variable. *** *** FORMULA::FORMULA(FORMULA_OP fop, FORMULA* left, FORMULA* right) *** *** Create a formula consisting of the fop (+,-,*,/) and the *** two operands. *** *** double FORMULA::Eval(INT nvars, const double* vars) const *** double FORMULA::Eval(INT nvars, const mINT64* vars) const *** double FORMULA::Eval(INT nvars, const mINT32* vars) const *** *** Compute the value of the function. An array of variable *** values is passed in. E.g. vars[0] contains the value of *** of the 0th variable. The INT* version is supplied for *** convenience -- the values are cast to doubles before they *** are used. vars may be NULL, in which case nvars must *** be zero. *** *** void FORMULA::Print(FILE* f) const *** *** Print in infix notation, so easy to read. *** *** Creating a formula: *** *** Start with "FORMULA::Fpool = &LNO_local_pool;" or likewise *** so we know from where to allocate nodes. Then call the static *** constructor-like formulas *** *** FORMULA* FORMULA::Var(INT* k) *** return a formula that will evaluate to the *** k-th user variable. *** FORMULA* FORMULA::Const(double d) *** a formula that will evaluate to the constant d. *** FORMULA* FORMULA::Set(INT r, FORMULA* f) *** a formula that will set scratch register r to have the *** value of f, and return that value. *** FORMULA* FORMULA::Set0(INT r, FORMULA* f) *** same as Set(), but f == NULL is okay, in which case use *** a constant value of zero. *** FORMULA* FORMULA::Use(INT r, FORMULA* f) *** a formula that will return the value of scratch register r *** FORMULA* FORMULA::{Add,Sub,Mul,Div,Comma,Ge,Gt,Le,Lt,Max,Min) *** (FORMULA* left, FORMULA* right) *** a formula that returns left op right. *** FORMULA* FORMULA::Cond(FORMULA* cond, FORMULA* l, FORMULA* r) *** a formula that returns cond!=0?l:r, where only one *** side is evaluated. **/ enum FORMULA_OP { FORMULA_BAD = 666, FORMULA_ADD = 1001, // to keep random stuff from working FORMULA_SUB, FORMULA_MUL, FORMULA_DIV, FORMULA_MAX, FORMULA_MIN, FORMULA_GE, FORMULA_GT, FORMULA_LE, FORMULA_LT, FORMULA_AND, FORMULA_OR, FORMULA_COND, // execute LHS if Cond != 0, RHS otherwise FORMULA_FCONST, FORMULA_VAR, // read from 'vars' parameter FORMULA_SET, // set _scratch register FORMULA_USE, // use _scratch register FORMULA_COMMA // left child probably a set }; // Set Fpool, the pool to use. Call New to get new elements. // Elements are freed by freeing the pool -- there is no need for a // destructor. // Note that you can preset valued in the array v, but you can also // have the formula itself set a value with Set. class FORMULA { public: void Print(FILE* f, FORMULA_OP parent) const; enum {SCRATCH_REGISTERS = 10*LNO_MAX_DO_LOOP_DEPTH}; double Eval(INT nvars, const double* vars) const; double Eval(INT nvars, const mINT64* vars) const; double Eval(INT nvars, const mINT32* vars) const; FORMULA* Add_To_Variable(INT var, INT val); FORMULA* Duplicate() const; void Print(FILE* f) const {Print(f, FORMULA_BAD);} static MEM_POOL* Fpool; inline static FORMULA* Var(INT v) { FmtAssert(Fpool, ("Fpool uninitialized")); return CXX_NEW(FORMULA(v, FALSE), Fpool); } inline static FORMULA* Use(INT v) { FmtAssert(Fpool, ("Fpool uninitialized")); return CXX_NEW(FORMULA(v, TRUE), Fpool); } inline static FORMULA* Set(INT v, FORMULA* f) { FmtAssert(Fpool, ("Fpool uninitialized")); return CXX_NEW(FORMULA(v, f), Fpool); } inline static FORMULA* Set0(INT v, FORMULA* f) { FmtAssert(Fpool, ("Fpool uninitialized")); return Set(v,f?f:Const(0.0)); } inline static FORMULA* Const(double v) { FmtAssert(Fpool, ("Fpool uninitialized")); return CXX_NEW(FORMULA(v), Fpool); } inline static FORMULA* Cond(FORMULA* cond, FORMULA* left, FORMULA* right) { FmtAssert(Fpool, ("Fpool uninitialized")); return CXX_NEW(FORMULA(cond, left, right), Fpool); } inline static FORMULA* Comma(FORMULA* left, FORMULA* right) { FmtAssert(Fpool, ("Fpool uninitialized")); return CXX_NEW(FORMULA(FORMULA_COMMA, left, right), Fpool); } inline static FORMULA* Comma3(FORMULA* c1, FORMULA* c2, FORMULA* c3) { FmtAssert(Fpool, ("Fpool uninitialized")); return Comma(c1, Comma(c2, c3)); } inline static FORMULA* Comma4(FORMULA* c1, FORMULA* c2, FORMULA* c3, FORMULA* c4) { FmtAssert(Fpool, ("Fpool uninitialized")); return Comma(c1, Comma3(c2, c3, c4)); } inline static FORMULA* Comma5(FORMULA* c1, FORMULA* c2, FORMULA* c3, FORMULA* c4, FORMULA* c5) { FmtAssert(Fpool, ("Fpool uninitialized")); return Comma(c1, Comma4(c2, c3, c4, c5)); } inline static FORMULA* Add(FORMULA* left, FORMULA* right) { FmtAssert(Fpool, ("Fpool uninitialized")); return CXX_NEW(FORMULA(FORMULA_ADD, left, right), Fpool); } inline static FORMULA* Sub(FORMULA* left, FORMULA* right) { FmtAssert(Fpool, ("Fpool uninitialized")); return CXX_NEW(FORMULA(FORMULA_SUB, left, right), Fpool); } inline static FORMULA* Mul(FORMULA* left, FORMULA* right) { FmtAssert(Fpool, ("Fpool uninitialized")); return CXX_NEW(FORMULA(FORMULA_MUL, left, right), Fpool); } inline static FORMULA* Div(FORMULA* left, FORMULA* right) { FmtAssert(Fpool, ("Fpool uninitialized")); return CXX_NEW(FORMULA(FORMULA_DIV, left, right), Fpool); } inline static FORMULA* Max(FORMULA* left, FORMULA* right) { FmtAssert(Fpool, ("Fpool uninitialized")); return CXX_NEW(FORMULA(FORMULA_MAX, left, right), Fpool); } inline static FORMULA* Min(FORMULA* left, FORMULA* right) { FmtAssert(Fpool, ("Fpool uninitialized")); return CXX_NEW(FORMULA(FORMULA_MIN, left, right), Fpool); } inline static FORMULA* Ge(FORMULA* left, FORMULA* right) { FmtAssert(Fpool, ("Fpool uninitialized")); return CXX_NEW(FORMULA(FORMULA_GE, left, right), Fpool); } inline static FORMULA* Gt(FORMULA* left, FORMULA* right) { FmtAssert(Fpool, ("Fpool uninitialized")); return CXX_NEW(FORMULA(FORMULA_GT, left, right), Fpool); } inline static FORMULA* Le(FORMULA* left, FORMULA* right) { FmtAssert(Fpool, ("Fpool uninitialized")); return CXX_NEW(FORMULA(FORMULA_LE, left, right), Fpool); } inline static FORMULA* Lt(FORMULA* left, FORMULA* right) { FmtAssert(Fpool, ("Fpool uninitialized")); return CXX_NEW(FORMULA(FORMULA_LT, left, right), Fpool); } inline static FORMULA* And(FORMULA* left, FORMULA* right) { FmtAssert(Fpool, ("Fpool uninitialized")); return CXX_NEW(FORMULA(FORMULA_OR, left, right), Fpool); } inline static FORMULA* Or(FORMULA* left, FORMULA* right) { FmtAssert(Fpool, ("Fpool uninitialized")); return CXX_NEW(FORMULA(FORMULA_OR, left, right), Fpool); } private: FORMULA(double v) { _fop = FORMULA_FCONST; _fconst = v; } FORMULA(INT v, BOOL use_reg) { _fop = use_reg ? FORMULA_USE : FORMULA_VAR; _var = v; } FORMULA(INT v, FORMULA* f) { _fop = FORMULA_SET; _var = v; _kids.Left = f; } FORMULA(FORMULA_OP fop, FORMULA* left, FORMULA* right) { Is_True(left && right, ("Missing child for FORMULA::FORMULA()")); Is_True(fop == FORMULA_ADD || fop == FORMULA_SUB || fop == FORMULA_MUL || fop == FORMULA_DIV || fop == FORMULA_MAX || fop == FORMULA_MIN || fop == FORMULA_GE || fop == FORMULA_GT || fop == FORMULA_LE || fop == FORMULA_LT || fop == FORMULA_AND || fop == FORMULA_OR || fop == FORMULA_COMMA, ("Bad call to formula constructor: fop=%d", fop)); _fop = fop; _kids.Left = left; _kids.Right = right; } FORMULA(FORMULA* cond, FORMULA* left, FORMULA* right) { Is_True(left && right && cond, ("Missing child for FORMULA::FORMULA()")); _fop = FORMULA_COND; _kids.Cond = cond; _kids.Left = left; _kids.Right = right; } // debugging aid ~FORMULA() { _fop = FORMULA_BAD; } double Eval(const double* vars) const; double Eval_Inlined(const double* vars) const { if (_fop == FORMULA_FCONST) return _fconst; else if (_fop == FORMULA_VAR) return vars[_var]; else return Eval(vars); } FORMULA_OP _fop; union { double _fconst; INT _var; }; // wasting some space, since Cond and Right could go in here too. struct { FORMULA* Cond; FORMULA* Left; FORMULA* Right; } _kids; static double _scratch[SCRATCH_REGISTERS]; }; class COMPUTE_FOOTPRINT_RVAL { FORMULA* _formula; // sum of Wformula and Rformula public: FORMULA* RFormula; // formula for reads FORMULA* WFormula; // formula for writes FORMULA* AllFormula(); // total ... use only after // RFormula and Wformula are final INT D; COMPUTE_FOOTPRINT_RVAL() : WFormula(NULL), RFormula(NULL), _formula(NULL), D(0) {} void Print(FILE* f) const; }; extern void Set_Cache_Model_Statics(INT mhd_level); extern COMPUTE_FOOTPRINT_RVAL Compute_Footprint( const ARRAY_REF* arl, INT nloops, // to be tiled const INT* loops, // to be tiled const INT* arl_stripsz, const INT64* est_iters, const INT64* max_iters, const INT* unrolls, INT depth, INT stripdepth, const INT* permute_order, INT v_first, INT64 outersz, BOOL using_tlb, INT middle_loop_no); void Cache_Model(ARRAY_REF* arl, INT depth, INT required_inner, const INT* legal_inners, INT nlegal_inners, const INT* legal_tiles, INT nlegal_tiles, const INT* unrolls, const DOLOOP_STACK* stack, BOOL blocking_disabled, const INT *required_blocksize, double model_cost, INT num_refs, INT* new_order, // array INT* nstrips, // return val INT* stripdepth, // return val INT* iloop, // return array INT* stripsz, // return array INT* striplevel, // return array double* cycles_per_iter, // scalar double* doverhead_best); // scalar #endif