/*

  Copyright (C) 2000, 2001 Silicon Graphics, Inc.  All Rights Reserved.

  This program is free software; you can redistribute it and/or modify it
  under the terms of version 2 of the GNU General Public License as
  published by the Free Software Foundation.

  This program is distributed in the hope that it would be useful, but
  WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  

  Further, this software is distributed without any warranty that it is
  free of the rightful claim of any third person regarding infringement 
  or the like.  Any license provided herein, whether implied or 
  otherwise, applies only to this software file.  Patent licenses, if 
  any, provided herein do not apply to combinations of this program with 
  other software, or any other product whatsoever.  

  You should have received a copy of the GNU General Public License along
  with this program; if not, write the Free Software Foundation, Inc., 59
  Temple Place - Suite 330, Boston MA 02111-1307, USA.

  Contact information:  Silicon Graphics, Inc., 1600 Amphitheatre Pky,
  Mountain View, CA 94043, or:

  http://www.sgi.com

  For further information regarding this notice, see:

  http://oss.sgi.com/projects/GenInfo/NoticeExplan

*/


/* This may look like C code, but it is really -*- C++ -*-
 *
 * ====================================================================
 * ====================================================================
 *
 * Module: config_cache.h
 * $Revision: 1.2 $
 * $Date: 2004/08/31 07:39:20 $
 * $Author: wychen $
 * $Source: bitbucket.org:berkeleylab/upc-translator.git/open64/osprey1.0/common/com/config_cache.h $
 *
 * Revision history:
 *  14-Nov-96 - Original Version, duplicated from cache_parameters.h.
 *
 * Description:
 *
 * This is the description of the memory hierarcy.  It is only used
 * to determine how to SNL transform and for prefetching.  There's no
 * need to model memory hierarchy levels that are to be ignored during
 * transformation.  E.g. typically, there's no need to model main
 * memory.
 *
 * For instructions on adding new memory hierarchy configuration
 * parameters, see the header for config_lno.h.
 *
 * ====================================================================
 * ====================================================================
 */

#ifndef config_cache_INCLUDED
#define config_cache_INCLUDED

#ifdef _KEEP_RCS_ID
/*REFERENCED*/
static char *config_cache_rcs_id = "$Source: bitbucket.org:berkeleylab/upc-translator.git/open64/osprey1.0/common/com/config_cache.h $ $Revision: 1.2 $";
#endif /* _KEEP_RCS_ID */

/*
 * NOTE: this file is included by lnodriver.c and therefore needs to be
 * compilable in C.  This is accomplished by declaring everything as structs
 * with public fields, and then having member functions be protected by
 * conditional compilation (conditional on C++).
 */

/**
*** Description:
***
***    Unique prefix
***
***        MHD_        (meaning "memory hierarchy description")
***
***    MHD_TYPE
***
***        MHD_TYPE_NONE    (not modelling anything)
***        MHD_TYPE_CACHE   (cache: misses to further out cache or main memory)
***        MHD_TYPE_MEM     (main memory: misses to a disk)
***
***    MHD_LEVEL
***
***        The parameters that describe this level of the memory hierarchy.
***
***        MHD_TYPE  Type
***
***             Are we modelling cache, main memory, or nothing
***
***        INT64     Size
***
***             Number of bytes the cache or main memory holds.
***
***        INT64     Effective_Size
***
***             How many bytes the memory hierarcy level can effectively hold
***
***        INT32     Line_Size
***
***             Number of bytes in a cache line or page.
***
***        INT32     Clean_Miss_Penalty
***
***             Processor cycles to next level of memory hierarcy to replace
***             a clean line.
***
***        INT32     Dirty_Miss_Penalty
***
***             Processor cycles to next level of memory hierarcy to replace
***             a dirty line.
***
***        INT32     Miss_Penalty
***
***             Processor cycles to next level of memory hierarcy to replace
***             a clean or dirty line.  Used only to set the distinct
***		clean/dirty variables with a single option -- not to be
***		used outside option processing.
***
***        INT32     Associativity
***
***             Only for MHD_CACHE: 1 for direct mapped, etc
***
***        INT32     TLB_Entries
***
***             -1 if no TLB; otherwise, the number of entries
***
***        INT32     Page_Size
***
***             How many bytes are mapped by one TLB entry
***
***        INT32     TLB_Clean_Miss_Penalty
***        INT32     TLB_Dirty_Miss_Penalty
***        INT32     TLB_Miss_Penalty
***
***		How many cycles a TLB miss costs.
***		The third is used only during option processing to
***		allow setting the first two with a single option --
***		not to be used outside option processing.
***
***        double    Typical_Outstanding
***
***             How many loads may be outstanding at the same time typically.
***             1 means no overlap.  If a cache's max is 4, then 3 might be
***             a reasonable number to use here.  Don't use anything less
***             than 1.
***
***        double    Load_Op_Overlap_1
***        double    Load_Op_Overlap_2
***
***             How much of a load may be overlapped by machine ops.
***             0.9 is reasonable for a non-blocking cache, and 0.0
***             for a blocking cache.  If these are different, then
***             _1 is how much of the first miss cycle is overlapped, and
***             _2 is how much of the last miss cycle that's overlappable
***             is overlapped.  Thus 1.0 >= _1 >= _2 >= 0.  It's linear
***             inbetween those.  Thus if the cache miss cycles are half
***             the estimated machine cycles m, then the actual cycles lost
***             to cache miss would have to be
***                       (1/2 m) [(3/4)(1-_1) + (1/4)(1-_2)]
***
***        INT32   Pct_Excess_Writes_Nonhidable
***
***             If a dirty line costs more than a clean line, let's call
***             the additional cost the excess.  E.g. clean miss=10 cycles,
***             dirty miss=18 cycles.  Excess 8 cycles.  If this parameter
***             is 0, then all 8 additional cycles are treated just as
***             the first 10: it can be hidden.  Realistically, this is not
***             the case.  The processor has to wait longer, and might run
***             out of work.  Dirty misses also take up extra cache resource.
***             If this value is greater than 0, the part that isn't treated
***             has hidable is added directly to the cost (after division
***             by Typical_Outstanding).  A perfectly reasonable value is 100.
***             Because of the extra resources used, one can even imagine
***             a number >100.  We actually allow up to 1000 in the options
***             processing (for testing purposes), but that doesn't
***             make sense.
***
***        BOOL     Prefetch_Level
***
***             Prefetching desired at this memory level?
***
***	   The following are used only for option processing, and
***	   should not be referenced elsewhere:
***
***        INT32     Miss_Penalty
***        INT32     TLB_Miss_Penalty
***
***		Described above.
***
***        char *   CS_string
***
***             Cache size in string form for option processing.
***
***        BOOL	    <various>_Set
***
***             The relevant option was set from the command line.
***
***        Several of the values are specified by the constructor, which
***        from the cache size, line size, and associativity computes
***        the effective cache size.  Other field values are initialized
***        to -1, NULL, or FALSE as appropriate.  If any are
***	   inappropriately 0 or -1 after initialization, then the cache
***        specification is invalid, and Valid() returns false.
***
***        MHD_LEVEL()
***
***             Initialize fields to -1, NULL, or FALSE.
***
***        MHD_LEVEL(MHD_TYPE, INT64 cs, INT32 ls, INT32 mp, INT32 assoc,
***                  INT32 tlb_entries, INT32 pagesz, INT32 tlb_mp)
***
***             Initialize fields and compute Effective_Cache_Size.
***
***        ~MHD_LEVEL()
***        void          operator = (const MHD_LEVEL&);
***        void          Print(FILE* f) const;
***
***             destruct, assign, print
***            
***        void          Merge_Options(const MHD_LEVEL& o)
***
***             Alter specification by merging in defined values from o.
***
***        BOOL          Valid() const;
***
***             Returns true if and only if all cache fields >= 1 and type
***             not MHD_TYPE_NONE.
***
***        BOOL          TLB_Valid() const;
***
***             TLB_Entries, Page_Size and TLB_Miss_Penalty all >=1 and Valid()
***
***    MHD_MAX_LEVELS
***
***        The maximum number of memory hierarcy levels we will model.
***        Currently, 4.  If this changes, change lnodriver.c as well.
***
***    MHD
***
***        The memory hierarcy description, which contains MHD_LEVEL for
***        each level of the memory hierarcy to be modeled by the compiler,
***        and other system information useful for cache modelling.
***
***        BOOL           Non_Blocking_Loads
***
***             TRUE if processor continues after a cache miss.
***
***        INT32          Loop_Overhead_Base
***        INT32          Loop_Overhead_Memref
***
***             The loop overhead, in processor cycles, is 
***                  Loop_Overhead_Base + memrefs*Loop_Overhead_Memref
***             where memrefs is the number of non-cse'd memrefs in the
***             inner loop.  For example, do i a(i,j), a(i,j), a(i+1,j)
***             all are cse'd into one memref in the inner loop, but
***             do i a(i,j) a(i,j+1) have memref=2.
***             The base is lower for T5 than for others because pipeline
***             startup/winddown should be less when the fp pipelines are
***             shorter.  Also, multiple int/cycle.  The memref is the cost
***             to load an address into a register.
***
***        INT         TLB_Trustworthiness
***
***             0 to ignore tlb, 100 to fully trust it.  Even if it's fully
***             trusted, when there is no blocking and a low cache miss rate,
***             we still reduce the TLB penalty if TLB_NoBlocking_Model is set.
***
***        BOOL           TLB_NoBlocking_Model
***
***             If we are not blocking and there is a low cache miss rate,
***             should we back off on the TLB penalty?  If true
***             it favors not blocking.  It's there because the TLB model
***             is inaccurate and especially can overestimate the TLB miss
***             rate when we are not blocking.
***
***        MHD_LEVEL      L[MHD_MAX_LEVELS]
***
***             E.g. L[0] might be the primary cache.
***
***        INT            First()
***
***             The first valid level.  Returns -1 if none.
***
***        INT            Next(INT l)
***
***             The valid level after l.  Returns -1 when done.
***
***        void           Merge_Options(const MHD& o);
***
***            Alter specification by merging in defined values from o.
***
***        void           Initialize()
***
***            Use information about the target to set default values for
***            these cache parameters.
***
***        void           Print(FILE*) const
***        MHD()
***        ~MHD()
***
*** Exported Variables:
***
***    MHD Mhd;
***
***        The memory hierarchy description of we are compiling for
***
***    MHD Mhd_Options;
***
***        User specified options
**/

typedef enum MHD_TYPE {
  MHD_TYPE_NONE=222,
  MHD_TYPE_CACHE,
  MHD_TYPE_MEM
} MHD_TYPE;

typedef struct MHD_LEVEL {
  MHD_TYPE  Type;
  INT64   Size;
  INT64   Effective_Size;
  INT32   Line_Size;
  INT32   Clean_Miss_Penalty;
  INT32   Dirty_Miss_Penalty;
  INT32   Associativity;
  INT32   TLB_Entries;
  INT32   Page_Size;
  BOOL	  Prefetch_Level;
  INT32   TLB_Clean_Miss_Penalty;
  INT32   TLB_Dirty_Miss_Penalty;

  double  Typical_Outstanding;
  double  Load_Op_Overlap_1;
  double  Load_Op_Overlap_2;
  INT32   Pct_Excess_Writes_Nonhidable;

  /* Used just for option setting: */
  char *  CS_string;
  BOOL	  CMP_Set;	/* Clean_Miss_Penalty was set explicitly */
  BOOL	  DMP_Set;	/* Dirty_Miss_Penalty was set explicitly */
  BOOL	  Is_Mem_Level;
  BOOL	  Is_Mem_Level_Set;
  INT32	  Miss_Penalty;
  BOOL	  Miss_Penalty_Set;
  BOOL	  TLB_CMP_Set;	/* TLB_Clean_Miss_Penalty set explicitly */
  BOOL	  TLB_DMP_Set;	/* TLB_Dirty_Miss_Penalty set explicitly */
  INT32	  TLB_Miss_Penalty;
  BOOL	  TLB_MP_Set;

#if defined(_LANGUAGE_C_PLUS_PLUS)
  void      operator = (const MHD_LEVEL&);
  void      Print(FILE* f) const;
  void      Merge_Options(const MHD_LEVEL& o);
  BOOL      Valid() const;
  BOOL      TLB_Valid() const;

  MHD_LEVEL() : Type(MHD_TYPE_NONE),
                Size(-1), Line_Size(-1),
                Clean_Miss_Penalty(-1), Dirty_Miss_Penalty(-1),
                Associativity(-1), Effective_Size(-1),
                TLB_Entries(-1), Page_Size(-1), Prefetch_Level (-1),
                TLB_Clean_Miss_Penalty(-1), TLB_Dirty_Miss_Penalty(-1),
                Typical_Outstanding(-1.0),
                Load_Op_Overlap_1(-1.0), Load_Op_Overlap_2(-1.0),
		Pct_Excess_Writes_Nonhidable(-1),
		CS_string (NULL), CMP_Set (FALSE), DMP_Set (FALSE),
		Is_Mem_Level(-1), Is_Mem_Level_Set(FALSE),
		Miss_Penalty(-1), Miss_Penalty_Set(FALSE),
		TLB_CMP_Set (FALSE), TLB_DMP_Set (FALSE),
		TLB_Miss_Penalty(-1), TLB_MP_Set(FALSE) {}
  MHD_LEVEL(MHD_TYPE type, INT64 cs, INT32 ls, INT32 cmp, INT32 dmp,
            INT32 assoc, INT32 entries, INT32 pagesz,
            INT32 tlb_cmp, INT32 tlb_dmp,
            double outstanding, double op_overlap_1, double op_overlap_2,
	    INT32 pct_excess_writes_nonhidable);
  ~MHD_LEVEL() {}

 private:

  //MHD_LEVEL(const MHD_LEVEL&); /* undefined */
  void      Compute_Effective_Size();
#endif
} MHD_LEVEL;

/* Don't forget to modify config_lno.c as well */
#define MHD_MAX_LEVELS 4

typedef struct MHD {
  INT32     Loop_Overhead_Base;
  INT32     Loop_Overhead_Memref;
  BOOL      Non_Blocking_Loads;
  INT32     TLB_Trustworthiness;
  BOOL      TLB_NoBlocking_Model;
  MHD_LEVEL L[MHD_MAX_LEVELS];

#if defined(_LANGUAGE_C_PLUS_PLUS)
  INT       First();
  INT       Next(INT);
  void      Merge_Options(const MHD&);
  void      Initialize();
  void      Print(FILE*) const;

  MHD() : Non_Blocking_Loads(-1),
          Loop_Overhead_Base(-1),
	  Loop_Overhead_Memref(-1),
          TLB_Trustworthiness(-1),
          TLB_NoBlocking_Model(-1) {}
  ~MHD() {}

 private:

  MHD(const MHD&);                /* undefined */
#endif
} MHD;

extern MHD Mhd;
extern MHD Mhd_Options;

#endif /* config_cache_INCLUDED */