Fixed MTP to work with TWRP

This commit is contained in:
awab228 2018-06-19 23:16:04 +02:00
commit f6dfaef42e
50820 changed files with 20846062 additions and 0 deletions

View file

@ -0,0 +1,5 @@
ccflags-$(CONFIG_SGI_GRU_DEBUG) := -DDEBUG
obj-$(CONFIG_SGI_GRU) := gru.o
gru-y := grufile.o grumain.o grufault.o grutlbpurge.o gruprocfs.o grukservices.o gruhandles.o grukdump.o

View file

@ -0,0 +1,78 @@
/*
* Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation; either version 2.1 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef __GRU_H__
#define __GRU_H__
/*
* GRU architectural definitions
*/
#define GRU_CACHE_LINE_BYTES 64
#define GRU_HANDLE_STRIDE 256
#define GRU_CB_BASE 0
#define GRU_DS_BASE 0x20000
/*
* Size used to map GRU GSeg
*/
#if defined(CONFIG_IA64)
#define GRU_GSEG_PAGESIZE (256 * 1024UL)
#elif defined(CONFIG_X86_64)
#define GRU_GSEG_PAGESIZE (256 * 1024UL) /* ZZZ 2MB ??? */
#else
#error "Unsupported architecture"
#endif
/*
* Structure for obtaining GRU resource information
*/
struct gru_chiplet_info {
int node;
int chiplet;
int blade;
int total_dsr_bytes;
int total_cbr;
int total_user_dsr_bytes;
int total_user_cbr;
int free_user_dsr_bytes;
int free_user_cbr;
};
/*
* Statictics kept for each context.
*/
struct gru_gseg_statistics {
unsigned long fmm_tlbmiss;
unsigned long upm_tlbmiss;
unsigned long tlbdropin;
unsigned long context_stolen;
unsigned long reserved[10];
};
/* Flags for GRU options on the gru_create_context() call */
/* Select one of the follow 4 options to specify how TLB misses are handled */
#define GRU_OPT_MISS_DEFAULT 0x0000 /* Use default mode */
#define GRU_OPT_MISS_USER_POLL 0x0001 /* User will poll CB for faults */
#define GRU_OPT_MISS_FMM_INTR 0x0002 /* Send interrupt to cpu to
handle fault */
#define GRU_OPT_MISS_FMM_POLL 0x0003 /* Use system polling thread */
#define GRU_OPT_MISS_MASK 0x0003 /* Mask for TLB MISS option */
#endif /* __GRU_H__ */

View file

@ -0,0 +1,736 @@
/*
* Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation; either version 2.1 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef __GRU_INSTRUCTIONS_H__
#define __GRU_INSTRUCTIONS_H__
extern int gru_check_status_proc(void *cb);
extern int gru_wait_proc(void *cb);
extern void gru_wait_abort_proc(void *cb);
/*
* Architecture dependent functions
*/
#if defined(CONFIG_IA64)
#include <linux/compiler.h>
#include <asm/intrinsics.h>
#define __flush_cache(p) ia64_fc((unsigned long)p)
/* Use volatile on IA64 to ensure ordering via st4.rel */
#define gru_ordered_store_ulong(p, v) \
do { \
barrier(); \
*((volatile unsigned long *)(p)) = v; /* force st.rel */ \
} while (0)
#elif defined(CONFIG_X86_64)
#include <asm/cacheflush.h>
#define __flush_cache(p) clflush(p)
#define gru_ordered_store_ulong(p, v) \
do { \
barrier(); \
*(unsigned long *)p = v; \
} while (0)
#else
#error "Unsupported architecture"
#endif
/*
* Control block status and exception codes
*/
#define CBS_IDLE 0
#define CBS_EXCEPTION 1
#define CBS_ACTIVE 2
#define CBS_CALL_OS 3
/* CB substatus bitmasks */
#define CBSS_MSG_QUEUE_MASK 7
#define CBSS_IMPLICIT_ABORT_ACTIVE_MASK 8
/* CB substatus message queue values (low 3 bits of substatus) */
#define CBSS_NO_ERROR 0
#define CBSS_LB_OVERFLOWED 1
#define CBSS_QLIMIT_REACHED 2
#define CBSS_PAGE_OVERFLOW 3
#define CBSS_AMO_NACKED 4
#define CBSS_PUT_NACKED 5
/*
* Structure used to fetch exception detail for CBs that terminate with
* CBS_EXCEPTION
*/
struct control_block_extended_exc_detail {
unsigned long cb;
int opc;
int ecause;
int exopc;
long exceptdet0;
int exceptdet1;
int cbrstate;
int cbrexecstatus;
};
/*
* Instruction formats
*/
/*
* Generic instruction format.
* This definition has precise bit field definitions.
*/
struct gru_instruction_bits {
/* DW 0 - low */
unsigned int icmd: 1;
unsigned char ima: 3; /* CB_DelRep, unmapped mode */
unsigned char reserved0: 4;
unsigned int xtype: 3;
unsigned int iaa0: 2;
unsigned int iaa1: 2;
unsigned char reserved1: 1;
unsigned char opc: 8; /* opcode */
unsigned char exopc: 8; /* extended opcode */
/* DW 0 - high */
unsigned int idef2: 22; /* TRi0 */
unsigned char reserved2: 2;
unsigned char istatus: 2;
unsigned char isubstatus:4;
unsigned char reserved3: 1;
unsigned char tlb_fault_color: 1;
/* DW 1 */
unsigned long idef4; /* 42 bits: TRi1, BufSize */
/* DW 2-6 */
unsigned long idef1; /* BAddr0 */
unsigned long idef5; /* Nelem */
unsigned long idef6; /* Stride, Operand1 */
unsigned long idef3; /* BAddr1, Value, Operand2 */
unsigned long reserved4;
/* DW 7 */
unsigned long avalue; /* AValue */
};
/*
* Generic instruction with friendlier names. This format is used
* for inline instructions.
*/
struct gru_instruction {
/* DW 0 */
union {
unsigned long op64; /* icmd,xtype,iaa0,ima,opc,tri0 */
struct {
unsigned int op32;
unsigned int tri0;
};
};
unsigned long tri1_bufsize; /* DW 1 */
unsigned long baddr0; /* DW 2 */
unsigned long nelem; /* DW 3 */
unsigned long op1_stride; /* DW 4 */
unsigned long op2_value_baddr1; /* DW 5 */
unsigned long reserved0; /* DW 6 */
unsigned long avalue; /* DW 7 */
};
/* Some shifts and masks for the low 64 bits of a GRU command */
#define GRU_CB_ICMD_SHFT 0
#define GRU_CB_ICMD_MASK 0x1
#define GRU_CB_XTYPE_SHFT 8
#define GRU_CB_XTYPE_MASK 0x7
#define GRU_CB_IAA0_SHFT 11
#define GRU_CB_IAA0_MASK 0x3
#define GRU_CB_IAA1_SHFT 13
#define GRU_CB_IAA1_MASK 0x3
#define GRU_CB_IMA_SHFT 1
#define GRU_CB_IMA_MASK 0x3
#define GRU_CB_OPC_SHFT 16
#define GRU_CB_OPC_MASK 0xff
#define GRU_CB_EXOPC_SHFT 24
#define GRU_CB_EXOPC_MASK 0xff
#define GRU_IDEF2_SHFT 32
#define GRU_IDEF2_MASK 0x3ffff
#define GRU_ISTATUS_SHFT 56
#define GRU_ISTATUS_MASK 0x3
/* GRU instruction opcodes (opc field) */
#define OP_NOP 0x00
#define OP_BCOPY 0x01
#define OP_VLOAD 0x02
#define OP_IVLOAD 0x03
#define OP_VSTORE 0x04
#define OP_IVSTORE 0x05
#define OP_VSET 0x06
#define OP_IVSET 0x07
#define OP_MESQ 0x08
#define OP_GAMXR 0x09
#define OP_GAMIR 0x0a
#define OP_GAMIRR 0x0b
#define OP_GAMER 0x0c
#define OP_GAMERR 0x0d
#define OP_BSTORE 0x0e
#define OP_VFLUSH 0x0f
/* Extended opcodes values (exopc field) */
/* GAMIR - AMOs with implicit operands */
#define EOP_IR_FETCH 0x01 /* Plain fetch of memory */
#define EOP_IR_CLR 0x02 /* Fetch and clear */
#define EOP_IR_INC 0x05 /* Fetch and increment */
#define EOP_IR_DEC 0x07 /* Fetch and decrement */
#define EOP_IR_QCHK1 0x0d /* Queue check, 64 byte msg */
#define EOP_IR_QCHK2 0x0e /* Queue check, 128 byte msg */
/* GAMIRR - Registered AMOs with implicit operands */
#define EOP_IRR_FETCH 0x01 /* Registered fetch of memory */
#define EOP_IRR_CLR 0x02 /* Registered fetch and clear */
#define EOP_IRR_INC 0x05 /* Registered fetch and increment */
#define EOP_IRR_DEC 0x07 /* Registered fetch and decrement */
#define EOP_IRR_DECZ 0x0f /* Registered fetch and decrement, update on zero*/
/* GAMER - AMOs with explicit operands */
#define EOP_ER_SWAP 0x00 /* Exchange argument and memory */
#define EOP_ER_OR 0x01 /* Logical OR with memory */
#define EOP_ER_AND 0x02 /* Logical AND with memory */
#define EOP_ER_XOR 0x03 /* Logical XOR with memory */
#define EOP_ER_ADD 0x04 /* Add value to memory */
#define EOP_ER_CSWAP 0x08 /* Compare with operand2, write operand1 if match*/
#define EOP_ER_CADD 0x0c /* Queue check, operand1*64 byte msg */
/* GAMERR - Registered AMOs with explicit operands */
#define EOP_ERR_SWAP 0x00 /* Exchange argument and memory */
#define EOP_ERR_OR 0x01 /* Logical OR with memory */
#define EOP_ERR_AND 0x02 /* Logical AND with memory */
#define EOP_ERR_XOR 0x03 /* Logical XOR with memory */
#define EOP_ERR_ADD 0x04 /* Add value to memory */
#define EOP_ERR_CSWAP 0x08 /* Compare with operand2, write operand1 if match*/
#define EOP_ERR_EPOLL 0x09 /* Poll for equality */
#define EOP_ERR_NPOLL 0x0a /* Poll for inequality */
/* GAMXR - SGI Arithmetic unit */
#define EOP_XR_CSWAP 0x0b /* Masked compare exchange */
/* Transfer types (xtype field) */
#define XTYPE_B 0x0 /* byte */
#define XTYPE_S 0x1 /* short (2-byte) */
#define XTYPE_W 0x2 /* word (4-byte) */
#define XTYPE_DW 0x3 /* doubleword (8-byte) */
#define XTYPE_CL 0x6 /* cacheline (64-byte) */
/* Instruction access attributes (iaa0, iaa1 fields) */
#define IAA_RAM 0x0 /* normal cached RAM access */
#define IAA_NCRAM 0x2 /* noncoherent RAM access */
#define IAA_MMIO 0x1 /* noncoherent memory-mapped I/O space */
#define IAA_REGISTER 0x3 /* memory-mapped registers, etc. */
/* Instruction mode attributes (ima field) */
#define IMA_MAPPED 0x0 /* Virtual mode */
#define IMA_CB_DELAY 0x1 /* hold read responses until status changes */
#define IMA_UNMAPPED 0x2 /* bypass the TLBs (OS only) */
#define IMA_INTERRUPT 0x4 /* Interrupt when instruction completes */
/* CBE ecause bits */
#define CBE_CAUSE_RI (1 << 0)
#define CBE_CAUSE_INVALID_INSTRUCTION (1 << 1)
#define CBE_CAUSE_UNMAPPED_MODE_FORBIDDEN (1 << 2)
#define CBE_CAUSE_PE_CHECK_DATA_ERROR (1 << 3)
#define CBE_CAUSE_IAA_GAA_MISMATCH (1 << 4)
#define CBE_CAUSE_DATA_SEGMENT_LIMIT_EXCEPTION (1 << 5)
#define CBE_CAUSE_OS_FATAL_TLB_FAULT (1 << 6)
#define CBE_CAUSE_EXECUTION_HW_ERROR (1 << 7)
#define CBE_CAUSE_TLBHW_ERROR (1 << 8)
#define CBE_CAUSE_RA_REQUEST_TIMEOUT (1 << 9)
#define CBE_CAUSE_HA_REQUEST_TIMEOUT (1 << 10)
#define CBE_CAUSE_RA_RESPONSE_FATAL (1 << 11)
#define CBE_CAUSE_RA_RESPONSE_NON_FATAL (1 << 12)
#define CBE_CAUSE_HA_RESPONSE_FATAL (1 << 13)
#define CBE_CAUSE_HA_RESPONSE_NON_FATAL (1 << 14)
#define CBE_CAUSE_ADDRESS_SPACE_DECODE_ERROR (1 << 15)
#define CBE_CAUSE_PROTOCOL_STATE_DATA_ERROR (1 << 16)
#define CBE_CAUSE_RA_RESPONSE_DATA_ERROR (1 << 17)
#define CBE_CAUSE_HA_RESPONSE_DATA_ERROR (1 << 18)
#define CBE_CAUSE_FORCED_ERROR (1 << 19)
/* CBE cbrexecstatus bits */
#define CBR_EXS_ABORT_OCC_BIT 0
#define CBR_EXS_INT_OCC_BIT 1
#define CBR_EXS_PENDING_BIT 2
#define CBR_EXS_QUEUED_BIT 3
#define CBR_EXS_TLB_INVAL_BIT 4
#define CBR_EXS_EXCEPTION_BIT 5
#define CBR_EXS_CB_INT_PENDING_BIT 6
#define CBR_EXS_ABORT_OCC (1 << CBR_EXS_ABORT_OCC_BIT)
#define CBR_EXS_INT_OCC (1 << CBR_EXS_INT_OCC_BIT)
#define CBR_EXS_PENDING (1 << CBR_EXS_PENDING_BIT)
#define CBR_EXS_QUEUED (1 << CBR_EXS_QUEUED_BIT)
#define CBR_EXS_TLB_INVAL (1 << CBR_EXS_TLB_INVAL_BIT)
#define CBR_EXS_EXCEPTION (1 << CBR_EXS_EXCEPTION_BIT)
#define CBR_EXS_CB_INT_PENDING (1 << CBR_EXS_CB_INT_PENDING_BIT)
/*
* Exceptions are retried for the following cases. If any OTHER bits are set
* in ecause, the exception is not retryable.
*/
#define EXCEPTION_RETRY_BITS (CBE_CAUSE_EXECUTION_HW_ERROR | \
CBE_CAUSE_TLBHW_ERROR | \
CBE_CAUSE_RA_REQUEST_TIMEOUT | \
CBE_CAUSE_RA_RESPONSE_NON_FATAL | \
CBE_CAUSE_HA_RESPONSE_NON_FATAL | \
CBE_CAUSE_RA_RESPONSE_DATA_ERROR | \
CBE_CAUSE_HA_RESPONSE_DATA_ERROR \
)
/* Message queue head structure */
union gru_mesqhead {
unsigned long val;
struct {
unsigned int head;
unsigned int limit;
};
};
/* Generate the low word of a GRU instruction */
static inline unsigned long
__opdword(unsigned char opcode, unsigned char exopc, unsigned char xtype,
unsigned char iaa0, unsigned char iaa1,
unsigned long idef2, unsigned char ima)
{
return (1 << GRU_CB_ICMD_SHFT) |
((unsigned long)CBS_ACTIVE << GRU_ISTATUS_SHFT) |
(idef2<< GRU_IDEF2_SHFT) |
(iaa0 << GRU_CB_IAA0_SHFT) |
(iaa1 << GRU_CB_IAA1_SHFT) |
(ima << GRU_CB_IMA_SHFT) |
(xtype << GRU_CB_XTYPE_SHFT) |
(opcode << GRU_CB_OPC_SHFT) |
(exopc << GRU_CB_EXOPC_SHFT);
}
/*
* Architecture specific intrinsics
*/
static inline void gru_flush_cache(void *p)
{
__flush_cache(p);
}
/*
* Store the lower 64 bits of the command including the "start" bit. Then
* start the instruction executing.
*/
static inline void gru_start_instruction(struct gru_instruction *ins, unsigned long op64)
{
gru_ordered_store_ulong(ins, op64);
mb();
gru_flush_cache(ins);
}
/* Convert "hints" to IMA */
#define CB_IMA(h) ((h) | IMA_UNMAPPED)
/* Convert data segment cache line index into TRI0 / TRI1 value */
#define GRU_DINDEX(i) ((i) * GRU_CACHE_LINE_BYTES)
/* Inline functions for GRU instructions.
* Note:
* - nelem and stride are in elements
* - tri0/tri1 is in bytes for the beginning of the data segment.
*/
static inline void gru_vload_phys(void *cb, unsigned long gpa,
unsigned int tri0, int iaa, unsigned long hints)
{
struct gru_instruction *ins = (struct gru_instruction *)cb;
ins->baddr0 = (long)gpa | ((unsigned long)iaa << 62);
ins->nelem = 1;
ins->op1_stride = 1;
gru_start_instruction(ins, __opdword(OP_VLOAD, 0, XTYPE_DW, iaa, 0,
(unsigned long)tri0, CB_IMA(hints)));
}
static inline void gru_vstore_phys(void *cb, unsigned long gpa,
unsigned int tri0, int iaa, unsigned long hints)
{
struct gru_instruction *ins = (struct gru_instruction *)cb;
ins->baddr0 = (long)gpa | ((unsigned long)iaa << 62);
ins->nelem = 1;
ins->op1_stride = 1;
gru_start_instruction(ins, __opdword(OP_VSTORE, 0, XTYPE_DW, iaa, 0,
(unsigned long)tri0, CB_IMA(hints)));
}
static inline void gru_vload(void *cb, unsigned long mem_addr,
unsigned int tri0, unsigned char xtype, unsigned long nelem,
unsigned long stride, unsigned long hints)
{
struct gru_instruction *ins = (struct gru_instruction *)cb;
ins->baddr0 = (long)mem_addr;
ins->nelem = nelem;
ins->op1_stride = stride;
gru_start_instruction(ins, __opdword(OP_VLOAD, 0, xtype, IAA_RAM, 0,
(unsigned long)tri0, CB_IMA(hints)));
}
static inline void gru_vstore(void *cb, unsigned long mem_addr,
unsigned int tri0, unsigned char xtype, unsigned long nelem,
unsigned long stride, unsigned long hints)
{
struct gru_instruction *ins = (void *)cb;
ins->baddr0 = (long)mem_addr;
ins->nelem = nelem;
ins->op1_stride = stride;
gru_start_instruction(ins, __opdword(OP_VSTORE, 0, xtype, IAA_RAM, 0,
tri0, CB_IMA(hints)));
}
static inline void gru_ivload(void *cb, unsigned long mem_addr,
unsigned int tri0, unsigned int tri1, unsigned char xtype,
unsigned long nelem, unsigned long hints)
{
struct gru_instruction *ins = (void *)cb;
ins->baddr0 = (long)mem_addr;
ins->nelem = nelem;
ins->tri1_bufsize = tri1;
gru_start_instruction(ins, __opdword(OP_IVLOAD, 0, xtype, IAA_RAM, 0,
tri0, CB_IMA(hints)));
}
static inline void gru_ivstore(void *cb, unsigned long mem_addr,
unsigned int tri0, unsigned int tri1,
unsigned char xtype, unsigned long nelem, unsigned long hints)
{
struct gru_instruction *ins = (void *)cb;
ins->baddr0 = (long)mem_addr;
ins->nelem = nelem;
ins->tri1_bufsize = tri1;
gru_start_instruction(ins, __opdword(OP_IVSTORE, 0, xtype, IAA_RAM, 0,
tri0, CB_IMA(hints)));
}
static inline void gru_vset(void *cb, unsigned long mem_addr,
unsigned long value, unsigned char xtype, unsigned long nelem,
unsigned long stride, unsigned long hints)
{
struct gru_instruction *ins = (void *)cb;
ins->baddr0 = (long)mem_addr;
ins->op2_value_baddr1 = value;
ins->nelem = nelem;
ins->op1_stride = stride;
gru_start_instruction(ins, __opdword(OP_VSET, 0, xtype, IAA_RAM, 0,
0, CB_IMA(hints)));
}
static inline void gru_ivset(void *cb, unsigned long mem_addr,
unsigned int tri1, unsigned long value, unsigned char xtype,
unsigned long nelem, unsigned long hints)
{
struct gru_instruction *ins = (void *)cb;
ins->baddr0 = (long)mem_addr;
ins->op2_value_baddr1 = value;
ins->nelem = nelem;
ins->tri1_bufsize = tri1;
gru_start_instruction(ins, __opdword(OP_IVSET, 0, xtype, IAA_RAM, 0,
0, CB_IMA(hints)));
}
static inline void gru_vflush(void *cb, unsigned long mem_addr,
unsigned long nelem, unsigned char xtype, unsigned long stride,
unsigned long hints)
{
struct gru_instruction *ins = (void *)cb;
ins->baddr0 = (long)mem_addr;
ins->op1_stride = stride;
ins->nelem = nelem;
gru_start_instruction(ins, __opdword(OP_VFLUSH, 0, xtype, IAA_RAM, 0,
0, CB_IMA(hints)));
}
static inline void gru_nop(void *cb, int hints)
{
struct gru_instruction *ins = (void *)cb;
gru_start_instruction(ins, __opdword(OP_NOP, 0, 0, 0, 0, 0, CB_IMA(hints)));
}
static inline void gru_bcopy(void *cb, const unsigned long src,
unsigned long dest,
unsigned int tri0, unsigned int xtype, unsigned long nelem,
unsigned int bufsize, unsigned long hints)
{
struct gru_instruction *ins = (void *)cb;
ins->baddr0 = (long)src;
ins->op2_value_baddr1 = (long)dest;
ins->nelem = nelem;
ins->tri1_bufsize = bufsize;
gru_start_instruction(ins, __opdword(OP_BCOPY, 0, xtype, IAA_RAM,
IAA_RAM, tri0, CB_IMA(hints)));
}
static inline void gru_bstore(void *cb, const unsigned long src,
unsigned long dest, unsigned int tri0, unsigned int xtype,
unsigned long nelem, unsigned long hints)
{
struct gru_instruction *ins = (void *)cb;
ins->baddr0 = (long)src;
ins->op2_value_baddr1 = (long)dest;
ins->nelem = nelem;
gru_start_instruction(ins, __opdword(OP_BSTORE, 0, xtype, 0, IAA_RAM,
tri0, CB_IMA(hints)));
}
static inline void gru_gamir(void *cb, int exopc, unsigned long src,
unsigned int xtype, unsigned long hints)
{
struct gru_instruction *ins = (void *)cb;
ins->baddr0 = (long)src;
gru_start_instruction(ins, __opdword(OP_GAMIR, exopc, xtype, IAA_RAM, 0,
0, CB_IMA(hints)));
}
static inline void gru_gamirr(void *cb, int exopc, unsigned long src,
unsigned int xtype, unsigned long hints)
{
struct gru_instruction *ins = (void *)cb;
ins->baddr0 = (long)src;
gru_start_instruction(ins, __opdword(OP_GAMIRR, exopc, xtype, IAA_RAM, 0,
0, CB_IMA(hints)));
}
static inline void gru_gamer(void *cb, int exopc, unsigned long src,
unsigned int xtype,
unsigned long operand1, unsigned long operand2,
unsigned long hints)
{
struct gru_instruction *ins = (void *)cb;
ins->baddr0 = (long)src;
ins->op1_stride = operand1;
ins->op2_value_baddr1 = operand2;
gru_start_instruction(ins, __opdword(OP_GAMER, exopc, xtype, IAA_RAM, 0,
0, CB_IMA(hints)));
}
static inline void gru_gamerr(void *cb, int exopc, unsigned long src,
unsigned int xtype, unsigned long operand1,
unsigned long operand2, unsigned long hints)
{
struct gru_instruction *ins = (void *)cb;
ins->baddr0 = (long)src;
ins->op1_stride = operand1;
ins->op2_value_baddr1 = operand2;
gru_start_instruction(ins, __opdword(OP_GAMERR, exopc, xtype, IAA_RAM, 0,
0, CB_IMA(hints)));
}
static inline void gru_gamxr(void *cb, unsigned long src,
unsigned int tri0, unsigned long hints)
{
struct gru_instruction *ins = (void *)cb;
ins->baddr0 = (long)src;
ins->nelem = 4;
gru_start_instruction(ins, __opdword(OP_GAMXR, EOP_XR_CSWAP, XTYPE_DW,
IAA_RAM, 0, 0, CB_IMA(hints)));
}
static inline void gru_mesq(void *cb, unsigned long queue,
unsigned long tri0, unsigned long nelem,
unsigned long hints)
{
struct gru_instruction *ins = (void *)cb;
ins->baddr0 = (long)queue;
ins->nelem = nelem;
gru_start_instruction(ins, __opdword(OP_MESQ, 0, XTYPE_CL, IAA_RAM, 0,
tri0, CB_IMA(hints)));
}
static inline unsigned long gru_get_amo_value(void *cb)
{
struct gru_instruction *ins = (void *)cb;
return ins->avalue;
}
static inline int gru_get_amo_value_head(void *cb)
{
struct gru_instruction *ins = (void *)cb;
return ins->avalue & 0xffffffff;
}
static inline int gru_get_amo_value_limit(void *cb)
{
struct gru_instruction *ins = (void *)cb;
return ins->avalue >> 32;
}
static inline union gru_mesqhead gru_mesq_head(int head, int limit)
{
union gru_mesqhead mqh;
mqh.head = head;
mqh.limit = limit;
return mqh;
}
/*
* Get struct control_block_extended_exc_detail for CB.
*/
extern int gru_get_cb_exception_detail(void *cb,
struct control_block_extended_exc_detail *excdet);
#define GRU_EXC_STR_SIZE 256
/*
* Control block definition for checking status
*/
struct gru_control_block_status {
unsigned int icmd :1;
unsigned int ima :3;
unsigned int reserved0 :4;
unsigned int unused1 :24;
unsigned int unused2 :24;
unsigned int istatus :2;
unsigned int isubstatus :4;
unsigned int unused3 :2;
};
/* Get CB status */
static inline int gru_get_cb_status(void *cb)
{
struct gru_control_block_status *cbs = (void *)cb;
return cbs->istatus;
}
/* Get CB message queue substatus */
static inline int gru_get_cb_message_queue_substatus(void *cb)
{
struct gru_control_block_status *cbs = (void *)cb;
return cbs->isubstatus & CBSS_MSG_QUEUE_MASK;
}
/* Get CB substatus */
static inline int gru_get_cb_substatus(void *cb)
{
struct gru_control_block_status *cbs = (void *)cb;
return cbs->isubstatus;
}
/*
* User interface to check an instruction status. UPM and exceptions
* are handled automatically. However, this function does NOT wait
* for an active instruction to complete.
*
*/
static inline int gru_check_status(void *cb)
{
struct gru_control_block_status *cbs = (void *)cb;
int ret;
ret = cbs->istatus;
if (ret != CBS_ACTIVE)
ret = gru_check_status_proc(cb);
return ret;
}
/*
* User interface (via inline function) to wait for an instruction
* to complete. Completion status (IDLE or EXCEPTION is returned
* to the user. Exception due to hardware errors are automatically
* retried before returning an exception.
*
*/
static inline int gru_wait(void *cb)
{
return gru_wait_proc(cb);
}
/*
* Wait for CB to complete. Aborts program if error. (Note: error does NOT
* mean TLB mis - only fatal errors such as memory parity error or user
* bugs will cause termination.
*/
static inline void gru_wait_abort(void *cb)
{
gru_wait_abort_proc(cb);
}
/*
* Get a pointer to the start of a gseg
* p - Any valid pointer within the gseg
*/
static inline void *gru_get_gseg_pointer (void *p)
{
return (void *)((unsigned long)p & ~(GRU_GSEG_PAGESIZE - 1));
}
/*
* Get a pointer to a control block
* gseg - GSeg address returned from gru_get_thread_gru_segment()
* index - index of desired CB
*/
static inline void *gru_get_cb_pointer(void *gseg,
int index)
{
return gseg + GRU_CB_BASE + index * GRU_HANDLE_STRIDE;
}
/*
* Get a pointer to a cacheline in the data segment portion of a GSeg
* gseg - GSeg address returned from gru_get_thread_gru_segment()
* index - index of desired cache line
*/
static inline void *gru_get_data_pointer(void *gseg, int index)
{
return gseg + GRU_DS_BASE + index * GRU_CACHE_LINE_BYTES;
}
/*
* Convert a vaddr into the tri index within the GSEG
* vaddr - virtual address of within gseg
*/
static inline int gru_get_tri(void *vaddr)
{
return ((unsigned long)vaddr & (GRU_GSEG_PAGESIZE - 1)) - GRU_DS_BASE;
}
#endif /* __GRU_INSTRUCTIONS_H__ */

View file

@ -0,0 +1,903 @@
/*
* SN Platform GRU Driver
*
* FAULT HANDLER FOR GRU DETECTED TLB MISSES
*
* This file contains code that handles TLB misses within the GRU.
* These misses are reported either via interrupts or user polling of
* the user CB.
*
* Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/spinlock.h>
#include <linux/mm.h>
#include <linux/hugetlb.h>
#include <linux/device.h>
#include <linux/io.h>
#include <linux/uaccess.h>
#include <linux/security.h>
#include <linux/prefetch.h>
#include <asm/pgtable.h>
#include "gru.h"
#include "grutables.h"
#include "grulib.h"
#include "gru_instructions.h"
#include <asm/uv/uv_hub.h>
/* Return codes for vtop functions */
#define VTOP_SUCCESS 0
#define VTOP_INVALID -1
#define VTOP_RETRY -2
/*
* Test if a physical address is a valid GRU GSEG address
*/
static inline int is_gru_paddr(unsigned long paddr)
{
return paddr >= gru_start_paddr && paddr < gru_end_paddr;
}
/*
* Find the vma of a GRU segment. Caller must hold mmap_sem.
*/
struct vm_area_struct *gru_find_vma(unsigned long vaddr)
{
struct vm_area_struct *vma;
vma = find_vma(current->mm, vaddr);
if (vma && vma->vm_start <= vaddr && vma->vm_ops == &gru_vm_ops)
return vma;
return NULL;
}
/*
* Find and lock the gts that contains the specified user vaddr.
*
* Returns:
* - *gts with the mmap_sem locked for read and the GTS locked.
* - NULL if vaddr invalid OR is not a valid GSEG vaddr.
*/
static struct gru_thread_state *gru_find_lock_gts(unsigned long vaddr)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
struct gru_thread_state *gts = NULL;
down_read(&mm->mmap_sem);
vma = gru_find_vma(vaddr);
if (vma)
gts = gru_find_thread_state(vma, TSID(vaddr, vma));
if (gts)
mutex_lock(&gts->ts_ctxlock);
else
up_read(&mm->mmap_sem);
return gts;
}
static struct gru_thread_state *gru_alloc_locked_gts(unsigned long vaddr)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
struct gru_thread_state *gts = ERR_PTR(-EINVAL);
down_write(&mm->mmap_sem);
vma = gru_find_vma(vaddr);
if (!vma)
goto err;
gts = gru_alloc_thread_state(vma, TSID(vaddr, vma));
if (IS_ERR(gts))
goto err;
mutex_lock(&gts->ts_ctxlock);
downgrade_write(&mm->mmap_sem);
return gts;
err:
up_write(&mm->mmap_sem);
return gts;
}
/*
* Unlock a GTS that was previously locked with gru_find_lock_gts().
*/
static void gru_unlock_gts(struct gru_thread_state *gts)
{
mutex_unlock(&gts->ts_ctxlock);
up_read(&current->mm->mmap_sem);
}
/*
* Set a CB.istatus to active using a user virtual address. This must be done
* just prior to a TFH RESTART. The new cb.istatus is an in-cache status ONLY.
* If the line is evicted, the status may be lost. The in-cache update
* is necessary to prevent the user from seeing a stale cb.istatus that will
* change as soon as the TFH restart is complete. Races may cause an
* occasional failure to clear the cb.istatus, but that is ok.
*/
static void gru_cb_set_istatus_active(struct gru_instruction_bits *cbk)
{
if (cbk) {
cbk->istatus = CBS_ACTIVE;
}
}
/*
* Read & clear a TFM
*
* The GRU has an array of fault maps. A map is private to a cpu
* Only one cpu will be accessing a cpu's fault map.
*
* This function scans the cpu-private fault map & clears all bits that
* are set. The function returns a bitmap that indicates the bits that
* were cleared. Note that sense the maps may be updated asynchronously by
* the GRU, atomic operations must be used to clear bits.
*/
static void get_clear_fault_map(struct gru_state *gru,
struct gru_tlb_fault_map *imap,
struct gru_tlb_fault_map *dmap)
{
unsigned long i, k;
struct gru_tlb_fault_map *tfm;
tfm = get_tfm_for_cpu(gru, gru_cpu_fault_map_id());
prefetchw(tfm); /* Helps on hardware, required for emulator */
for (i = 0; i < BITS_TO_LONGS(GRU_NUM_CBE); i++) {
k = tfm->fault_bits[i];
if (k)
k = xchg(&tfm->fault_bits[i], 0UL);
imap->fault_bits[i] = k;
k = tfm->done_bits[i];
if (k)
k = xchg(&tfm->done_bits[i], 0UL);
dmap->fault_bits[i] = k;
}
/*
* Not functionally required but helps performance. (Required
* on emulator)
*/
gru_flush_cache(tfm);
}
/*
* Atomic (interrupt context) & non-atomic (user context) functions to
* convert a vaddr into a physical address. The size of the page
* is returned in pageshift.
* returns:
* 0 - successful
* < 0 - error code
* 1 - (atomic only) try again in non-atomic context
*/
static int non_atomic_pte_lookup(struct vm_area_struct *vma,
unsigned long vaddr, int write,
unsigned long *paddr, int *pageshift)
{
struct page *page;
#ifdef CONFIG_HUGETLB_PAGE
*pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT;
#else
*pageshift = PAGE_SHIFT;
#endif
if (get_user_pages
(current, current->mm, vaddr, 1, write, 0, &page, NULL) <= 0)
return -EFAULT;
*paddr = page_to_phys(page);
put_page(page);
return 0;
}
/*
* atomic_pte_lookup
*
* Convert a user virtual address to a physical address
* Only supports Intel large pages (2MB only) on x86_64.
* ZZZ - hugepage support is incomplete
*
* NOTE: mmap_sem is already held on entry to this function. This
* guarantees existence of the page tables.
*/
static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr,
int write, unsigned long *paddr, int *pageshift)
{
pgd_t *pgdp;
pmd_t *pmdp;
pud_t *pudp;
pte_t pte;
pgdp = pgd_offset(vma->vm_mm, vaddr);
if (unlikely(pgd_none(*pgdp)))
goto err;
pudp = pud_offset(pgdp, vaddr);
if (unlikely(pud_none(*pudp)))
goto err;
pmdp = pmd_offset(pudp, vaddr);
if (unlikely(pmd_none(*pmdp)))
goto err;
#ifdef CONFIG_X86_64
if (unlikely(pmd_large(*pmdp)))
pte = *(pte_t *) pmdp;
else
#endif
pte = *pte_offset_kernel(pmdp, vaddr);
if (unlikely(!pte_present(pte) ||
(write && (!pte_write(pte) || !pte_dirty(pte)))))
return 1;
*paddr = pte_pfn(pte) << PAGE_SHIFT;
#ifdef CONFIG_HUGETLB_PAGE
*pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT;
#else
*pageshift = PAGE_SHIFT;
#endif
return 0;
err:
return 1;
}
static int gru_vtop(struct gru_thread_state *gts, unsigned long vaddr,
int write, int atomic, unsigned long *gpa, int *pageshift)
{
struct mm_struct *mm = gts->ts_mm;
struct vm_area_struct *vma;
unsigned long paddr;
int ret, ps;
vma = find_vma(mm, vaddr);
if (!vma)
goto inval;
/*
* Atomic lookup is faster & usually works even if called in non-atomic
* context.
*/
rmb(); /* Must/check ms_range_active before loading PTEs */
ret = atomic_pte_lookup(vma, vaddr, write, &paddr, &ps);
if (ret) {
if (atomic)
goto upm;
if (non_atomic_pte_lookup(vma, vaddr, write, &paddr, &ps))
goto inval;
}
if (is_gru_paddr(paddr))
goto inval;
paddr = paddr & ~((1UL << ps) - 1);
*gpa = uv_soc_phys_ram_to_gpa(paddr);
*pageshift = ps;
return VTOP_SUCCESS;
inval:
return VTOP_INVALID;
upm:
return VTOP_RETRY;
}
/*
* Flush a CBE from cache. The CBE is clean in the cache. Dirty the
* CBE cacheline so that the line will be written back to home agent.
* Otherwise the line may be silently dropped. This has no impact
* except on performance.
*/
static void gru_flush_cache_cbe(struct gru_control_block_extended *cbe)
{
if (unlikely(cbe)) {
cbe->cbrexecstatus = 0; /* make CL dirty */
gru_flush_cache(cbe);
}
}
/*
* Preload the TLB with entries that may be required. Currently, preloading
* is implemented only for BCOPY. Preload <tlb_preload_count> pages OR to
* the end of the bcopy tranfer, whichever is smaller.
*/
static void gru_preload_tlb(struct gru_state *gru,
struct gru_thread_state *gts, int atomic,
unsigned long fault_vaddr, int asid, int write,
unsigned char tlb_preload_count,
struct gru_tlb_fault_handle *tfh,
struct gru_control_block_extended *cbe)
{
unsigned long vaddr = 0, gpa;
int ret, pageshift;
if (cbe->opccpy != OP_BCOPY)
return;
if (fault_vaddr == cbe->cbe_baddr0)
vaddr = fault_vaddr + GRU_CACHE_LINE_BYTES * cbe->cbe_src_cl - 1;
else if (fault_vaddr == cbe->cbe_baddr1)
vaddr = fault_vaddr + (1 << cbe->xtypecpy) * cbe->cbe_nelemcur - 1;
fault_vaddr &= PAGE_MASK;
vaddr &= PAGE_MASK;
vaddr = min(vaddr, fault_vaddr + tlb_preload_count * PAGE_SIZE);
while (vaddr > fault_vaddr) {
ret = gru_vtop(gts, vaddr, write, atomic, &gpa, &pageshift);
if (ret || tfh_write_only(tfh, gpa, GAA_RAM, vaddr, asid, write,
GRU_PAGESIZE(pageshift)))
return;
gru_dbg(grudev,
"%s: gid %d, gts 0x%p, tfh 0x%p, vaddr 0x%lx, asid 0x%x, rw %d, ps %d, gpa 0x%lx\n",
atomic ? "atomic" : "non-atomic", gru->gs_gid, gts, tfh,
vaddr, asid, write, pageshift, gpa);
vaddr -= PAGE_SIZE;
STAT(tlb_preload_page);
}
}
/*
* Drop a TLB entry into the GRU. The fault is described by info in an TFH.
* Input:
* cb Address of user CBR. Null if not running in user context
* Return:
* 0 = dropin, exception, or switch to UPM successful
* 1 = range invalidate active
* < 0 = error code
*
*/
static int gru_try_dropin(struct gru_state *gru,
struct gru_thread_state *gts,
struct gru_tlb_fault_handle *tfh,
struct gru_instruction_bits *cbk)
{
struct gru_control_block_extended *cbe = NULL;
unsigned char tlb_preload_count = gts->ts_tlb_preload_count;
int pageshift = 0, asid, write, ret, atomic = !cbk, indexway;
unsigned long gpa = 0, vaddr = 0;
/*
* NOTE: The GRU contains magic hardware that eliminates races between
* TLB invalidates and TLB dropins. If an invalidate occurs
* in the window between reading the TFH and the subsequent TLB dropin,
* the dropin is ignored. This eliminates the need for additional locks.
*/
/*
* Prefetch the CBE if doing TLB preloading
*/
if (unlikely(tlb_preload_count)) {
cbe = gru_tfh_to_cbe(tfh);
prefetchw(cbe);
}
/*
* Error if TFH state is IDLE or FMM mode & the user issuing a UPM call.
* Might be a hardware race OR a stupid user. Ignore FMM because FMM
* is a transient state.
*/
if (tfh->status != TFHSTATUS_EXCEPTION) {
gru_flush_cache(tfh);
sync_core();
if (tfh->status != TFHSTATUS_EXCEPTION)
goto failnoexception;
STAT(tfh_stale_on_fault);
}
if (tfh->state == TFHSTATE_IDLE)
goto failidle;
if (tfh->state == TFHSTATE_MISS_FMM && cbk)
goto failfmm;
write = (tfh->cause & TFHCAUSE_TLB_MOD) != 0;
vaddr = tfh->missvaddr;
asid = tfh->missasid;
indexway = tfh->indexway;
if (asid == 0)
goto failnoasid;
rmb(); /* TFH must be cache resident before reading ms_range_active */
/*
* TFH is cache resident - at least briefly. Fail the dropin
* if a range invalidate is active.
*/
if (atomic_read(&gts->ts_gms->ms_range_active))
goto failactive;
ret = gru_vtop(gts, vaddr, write, atomic, &gpa, &pageshift);
if (ret == VTOP_INVALID)
goto failinval;
if (ret == VTOP_RETRY)
goto failupm;
if (!(gts->ts_sizeavail & GRU_SIZEAVAIL(pageshift))) {
gts->ts_sizeavail |= GRU_SIZEAVAIL(pageshift);
if (atomic || !gru_update_cch(gts)) {
gts->ts_force_cch_reload = 1;
goto failupm;
}
}
if (unlikely(cbe) && pageshift == PAGE_SHIFT) {
gru_preload_tlb(gru, gts, atomic, vaddr, asid, write, tlb_preload_count, tfh, cbe);
gru_flush_cache_cbe(cbe);
}
gru_cb_set_istatus_active(cbk);
gts->ustats.tlbdropin++;
tfh_write_restart(tfh, gpa, GAA_RAM, vaddr, asid, write,
GRU_PAGESIZE(pageshift));
gru_dbg(grudev,
"%s: gid %d, gts 0x%p, tfh 0x%p, vaddr 0x%lx, asid 0x%x, indexway 0x%x,"
" rw %d, ps %d, gpa 0x%lx\n",
atomic ? "atomic" : "non-atomic", gru->gs_gid, gts, tfh, vaddr, asid,
indexway, write, pageshift, gpa);
STAT(tlb_dropin);
return 0;
failnoasid:
/* No asid (delayed unload). */
STAT(tlb_dropin_fail_no_asid);
gru_dbg(grudev, "FAILED no_asid tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
if (!cbk)
tfh_user_polling_mode(tfh);
else
gru_flush_cache(tfh);
gru_flush_cache_cbe(cbe);
return -EAGAIN;
failupm:
/* Atomic failure switch CBR to UPM */
tfh_user_polling_mode(tfh);
gru_flush_cache_cbe(cbe);
STAT(tlb_dropin_fail_upm);
gru_dbg(grudev, "FAILED upm tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
return 1;
failfmm:
/* FMM state on UPM call */
gru_flush_cache(tfh);
gru_flush_cache_cbe(cbe);
STAT(tlb_dropin_fail_fmm);
gru_dbg(grudev, "FAILED fmm tfh: 0x%p, state %d\n", tfh, tfh->state);
return 0;
failnoexception:
/* TFH status did not show exception pending */
gru_flush_cache(tfh);
gru_flush_cache_cbe(cbe);
if (cbk)
gru_flush_cache(cbk);
STAT(tlb_dropin_fail_no_exception);
gru_dbg(grudev, "FAILED non-exception tfh: 0x%p, status %d, state %d\n",
tfh, tfh->status, tfh->state);
return 0;
failidle:
/* TFH state was idle - no miss pending */
gru_flush_cache(tfh);
gru_flush_cache_cbe(cbe);
if (cbk)
gru_flush_cache(cbk);
STAT(tlb_dropin_fail_idle);
gru_dbg(grudev, "FAILED idle tfh: 0x%p, state %d\n", tfh, tfh->state);
return 0;
failinval:
/* All errors (atomic & non-atomic) switch CBR to EXCEPTION state */
tfh_exception(tfh);
gru_flush_cache_cbe(cbe);
STAT(tlb_dropin_fail_invalid);
gru_dbg(grudev, "FAILED inval tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
return -EFAULT;
failactive:
/* Range invalidate active. Switch to UPM iff atomic */
if (!cbk)
tfh_user_polling_mode(tfh);
else
gru_flush_cache(tfh);
gru_flush_cache_cbe(cbe);
STAT(tlb_dropin_fail_range_active);
gru_dbg(grudev, "FAILED range active: tfh 0x%p, vaddr 0x%lx\n",
tfh, vaddr);
return 1;
}
/*
* Process an external interrupt from the GRU. This interrupt is
* caused by a TLB miss.
* Note that this is the interrupt handler that is registered with linux
* interrupt handlers.
*/
static irqreturn_t gru_intr(int chiplet, int blade)
{
struct gru_state *gru;
struct gru_tlb_fault_map imap, dmap;
struct gru_thread_state *gts;
struct gru_tlb_fault_handle *tfh = NULL;
struct completion *cmp;
int cbrnum, ctxnum;
STAT(intr);
gru = &gru_base[blade]->bs_grus[chiplet];
if (!gru) {
dev_err(grudev, "GRU: invalid interrupt: cpu %d, chiplet %d\n",
raw_smp_processor_id(), chiplet);
return IRQ_NONE;
}
get_clear_fault_map(gru, &imap, &dmap);
gru_dbg(grudev,
"cpu %d, chiplet %d, gid %d, imap %016lx %016lx, dmap %016lx %016lx\n",
smp_processor_id(), chiplet, gru->gs_gid,
imap.fault_bits[0], imap.fault_bits[1],
dmap.fault_bits[0], dmap.fault_bits[1]);
for_each_cbr_in_tfm(cbrnum, dmap.fault_bits) {
STAT(intr_cbr);
cmp = gru->gs_blade->bs_async_wq;
if (cmp)
complete(cmp);
gru_dbg(grudev, "gid %d, cbr_done %d, done %d\n",
gru->gs_gid, cbrnum, cmp ? cmp->done : -1);
}
for_each_cbr_in_tfm(cbrnum, imap.fault_bits) {
STAT(intr_tfh);
tfh = get_tfh_by_index(gru, cbrnum);
prefetchw(tfh); /* Helps on hdw, required for emulator */
/*
* When hardware sets a bit in the faultmap, it implicitly
* locks the GRU context so that it cannot be unloaded.
* The gts cannot change until a TFH start/writestart command
* is issued.
*/
ctxnum = tfh->ctxnum;
gts = gru->gs_gts[ctxnum];
/* Spurious interrupts can cause this. Ignore. */
if (!gts) {
STAT(intr_spurious);
continue;
}
/*
* This is running in interrupt context. Trylock the mmap_sem.
* If it fails, retry the fault in user context.
*/
gts->ustats.fmm_tlbmiss++;
if (!gts->ts_force_cch_reload &&
down_read_trylock(&gts->ts_mm->mmap_sem)) {
gru_try_dropin(gru, gts, tfh, NULL);
up_read(&gts->ts_mm->mmap_sem);
} else {
tfh_user_polling_mode(tfh);
STAT(intr_mm_lock_failed);
}
}
return IRQ_HANDLED;
}
irqreturn_t gru0_intr(int irq, void *dev_id)
{
return gru_intr(0, uv_numa_blade_id());
}
irqreturn_t gru1_intr(int irq, void *dev_id)
{
return gru_intr(1, uv_numa_blade_id());
}
irqreturn_t gru_intr_mblade(int irq, void *dev_id)
{
int blade;
for_each_possible_blade(blade) {
if (uv_blade_nr_possible_cpus(blade))
continue;
gru_intr(0, blade);
gru_intr(1, blade);
}
return IRQ_HANDLED;
}
static int gru_user_dropin(struct gru_thread_state *gts,
struct gru_tlb_fault_handle *tfh,
void *cb)
{
struct gru_mm_struct *gms = gts->ts_gms;
int ret;
gts->ustats.upm_tlbmiss++;
while (1) {
wait_event(gms->ms_wait_queue,
atomic_read(&gms->ms_range_active) == 0);
prefetchw(tfh); /* Helps on hdw, required for emulator */
ret = gru_try_dropin(gts->ts_gru, gts, tfh, cb);
if (ret <= 0)
return ret;
STAT(call_os_wait_queue);
}
}
/*
* This interface is called as a result of a user detecting a "call OS" bit
* in a user CB. Normally means that a TLB fault has occurred.
* cb - user virtual address of the CB
*/
int gru_handle_user_call_os(unsigned long cb)
{
struct gru_tlb_fault_handle *tfh;
struct gru_thread_state *gts;
void *cbk;
int ucbnum, cbrnum, ret = -EINVAL;
STAT(call_os);
/* sanity check the cb pointer */
ucbnum = get_cb_number((void *)cb);
if ((cb & (GRU_HANDLE_STRIDE - 1)) || ucbnum >= GRU_NUM_CB)
return -EINVAL;
gts = gru_find_lock_gts(cb);
if (!gts)
return -EINVAL;
gru_dbg(grudev, "address 0x%lx, gid %d, gts 0x%p\n", cb, gts->ts_gru ? gts->ts_gru->gs_gid : -1, gts);
if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE)
goto exit;
gru_check_context_placement(gts);
/*
* CCH may contain stale data if ts_force_cch_reload is set.
*/
if (gts->ts_gru && gts->ts_force_cch_reload) {
gts->ts_force_cch_reload = 0;
gru_update_cch(gts);
}
ret = -EAGAIN;
cbrnum = thread_cbr_number(gts, ucbnum);
if (gts->ts_gru) {
tfh = get_tfh_by_index(gts->ts_gru, cbrnum);
cbk = get_gseg_base_address_cb(gts->ts_gru->gs_gru_base_vaddr,
gts->ts_ctxnum, ucbnum);
ret = gru_user_dropin(gts, tfh, cbk);
}
exit:
gru_unlock_gts(gts);
return ret;
}
/*
* Fetch the exception detail information for a CB that terminated with
* an exception.
*/
int gru_get_exception_detail(unsigned long arg)
{
struct control_block_extended_exc_detail excdet;
struct gru_control_block_extended *cbe;
struct gru_thread_state *gts;
int ucbnum, cbrnum, ret;
STAT(user_exception);
if (copy_from_user(&excdet, (void __user *)arg, sizeof(excdet)))
return -EFAULT;
gts = gru_find_lock_gts(excdet.cb);
if (!gts)
return -EINVAL;
gru_dbg(grudev, "address 0x%lx, gid %d, gts 0x%p\n", excdet.cb, gts->ts_gru ? gts->ts_gru->gs_gid : -1, gts);
ucbnum = get_cb_number((void *)excdet.cb);
if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) {
ret = -EINVAL;
} else if (gts->ts_gru) {
cbrnum = thread_cbr_number(gts, ucbnum);
cbe = get_cbe_by_index(gts->ts_gru, cbrnum);
gru_flush_cache(cbe); /* CBE not coherent */
sync_core(); /* make sure we are have current data */
excdet.opc = cbe->opccpy;
excdet.exopc = cbe->exopccpy;
excdet.ecause = cbe->ecause;
excdet.exceptdet0 = cbe->idef1upd;
excdet.exceptdet1 = cbe->idef3upd;
excdet.cbrstate = cbe->cbrstate;
excdet.cbrexecstatus = cbe->cbrexecstatus;
gru_flush_cache_cbe(cbe);
ret = 0;
} else {
ret = -EAGAIN;
}
gru_unlock_gts(gts);
gru_dbg(grudev,
"cb 0x%lx, op %d, exopc %d, cbrstate %d, cbrexecstatus 0x%x, ecause 0x%x, "
"exdet0 0x%lx, exdet1 0x%x\n",
excdet.cb, excdet.opc, excdet.exopc, excdet.cbrstate, excdet.cbrexecstatus,
excdet.ecause, excdet.exceptdet0, excdet.exceptdet1);
if (!ret && copy_to_user((void __user *)arg, &excdet, sizeof(excdet)))
ret = -EFAULT;
return ret;
}
/*
* User request to unload a context. Content is saved for possible reload.
*/
static int gru_unload_all_contexts(void)
{
struct gru_thread_state *gts;
struct gru_state *gru;
int gid, ctxnum;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
foreach_gid(gid) {
gru = GID_TO_GRU(gid);
spin_lock(&gru->gs_lock);
for (ctxnum = 0; ctxnum < GRU_NUM_CCH; ctxnum++) {
gts = gru->gs_gts[ctxnum];
if (gts && mutex_trylock(&gts->ts_ctxlock)) {
spin_unlock(&gru->gs_lock);
gru_unload_context(gts, 1);
mutex_unlock(&gts->ts_ctxlock);
spin_lock(&gru->gs_lock);
}
}
spin_unlock(&gru->gs_lock);
}
return 0;
}
int gru_user_unload_context(unsigned long arg)
{
struct gru_thread_state *gts;
struct gru_unload_context_req req;
STAT(user_unload_context);
if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
return -EFAULT;
gru_dbg(grudev, "gseg 0x%lx\n", req.gseg);
if (!req.gseg)
return gru_unload_all_contexts();
gts = gru_find_lock_gts(req.gseg);
if (!gts)
return -EINVAL;
if (gts->ts_gru)
gru_unload_context(gts, 1);
gru_unlock_gts(gts);
return 0;
}
/*
* User request to flush a range of virtual addresses from the GRU TLB
* (Mainly for testing).
*/
int gru_user_flush_tlb(unsigned long arg)
{
struct gru_thread_state *gts;
struct gru_flush_tlb_req req;
struct gru_mm_struct *gms;
STAT(user_flush_tlb);
if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
return -EFAULT;
gru_dbg(grudev, "gseg 0x%lx, vaddr 0x%lx, len 0x%lx\n", req.gseg,
req.vaddr, req.len);
gts = gru_find_lock_gts(req.gseg);
if (!gts)
return -EINVAL;
gms = gts->ts_gms;
gru_unlock_gts(gts);
gru_flush_tlb_range(gms, req.vaddr, req.len);
return 0;
}
/*
* Fetch GSEG statisticss
*/
long gru_get_gseg_statistics(unsigned long arg)
{
struct gru_thread_state *gts;
struct gru_get_gseg_statistics_req req;
if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
return -EFAULT;
/*
* The library creates arrays of contexts for threaded programs.
* If no gts exists in the array, the context has never been used & all
* statistics are implicitly 0.
*/
gts = gru_find_lock_gts(req.gseg);
if (gts) {
memcpy(&req.stats, &gts->ustats, sizeof(gts->ustats));
gru_unlock_gts(gts);
} else {
memset(&req.stats, 0, sizeof(gts->ustats));
}
if (copy_to_user((void __user *)arg, &req, sizeof(req)))
return -EFAULT;
return 0;
}
/*
* Register the current task as the user of the GSEG slice.
* Needed for TLB fault interrupt targeting.
*/
int gru_set_context_option(unsigned long arg)
{
struct gru_thread_state *gts;
struct gru_set_context_option_req req;
int ret = 0;
STAT(set_context_option);
if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
return -EFAULT;
gru_dbg(grudev, "op %d, gseg 0x%lx, value1 0x%lx\n", req.op, req.gseg, req.val1);
gts = gru_find_lock_gts(req.gseg);
if (!gts) {
gts = gru_alloc_locked_gts(req.gseg);
if (IS_ERR(gts))
return PTR_ERR(gts);
}
switch (req.op) {
case sco_blade_chiplet:
/* Select blade/chiplet for GRU context */
if (req.val0 < -1 || req.val0 >= GRU_CHIPLETS_PER_HUB ||
req.val1 < -1 || req.val1 >= GRU_MAX_BLADES ||
(req.val1 >= 0 && !gru_base[req.val1])) {
ret = -EINVAL;
} else {
gts->ts_user_blade_id = req.val1;
gts->ts_user_chiplet_id = req.val0;
gru_check_context_placement(gts);
}
break;
case sco_gseg_owner:
/* Register the current task as the GSEG owner */
gts->ts_tgid_owner = current->tgid;
break;
case sco_cch_req_slice:
/* Set the CCH slice option */
gts->ts_cch_req_slice = req.val1 & 3;
break;
default:
ret = -EINVAL;
}
gru_unlock_gts(gts);
return ret;
}

View file

@ -0,0 +1,623 @@
/*
* SN Platform GRU Driver
*
* FILE OPERATIONS & DRIVER INITIALIZATION
*
* This file supports the user system call for file open, close, mmap, etc.
* This also incudes the driver initialization code.
*
* Copyright (c) 2008-2014 Silicon Graphics, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/io.h>
#include <linux/spinlock.h>
#include <linux/device.h>
#include <linux/miscdevice.h>
#include <linux/interrupt.h>
#include <linux/proc_fs.h>
#include <linux/uaccess.h>
#ifdef CONFIG_X86_64
#include <asm/uv/uv_irq.h>
#endif
#include <asm/uv/uv.h>
#include "gru.h"
#include "grulib.h"
#include "grutables.h"
#include <asm/uv/uv_hub.h>
#include <asm/uv/uv_mmrs.h>
struct gru_blade_state *gru_base[GRU_MAX_BLADES] __read_mostly;
unsigned long gru_start_paddr __read_mostly;
void *gru_start_vaddr __read_mostly;
unsigned long gru_end_paddr __read_mostly;
unsigned int gru_max_gids __read_mostly;
struct gru_stats_s gru_stats;
/* Guaranteed user available resources on each node */
static int max_user_cbrs, max_user_dsr_bytes;
static struct miscdevice gru_miscdev;
static int gru_supported(void)
{
return is_uv_system() &&
(uv_hub_info->hub_revision < UV3_HUB_REVISION_BASE);
}
/*
* gru_vma_close
*
* Called when unmapping a device mapping. Frees all gru resources
* and tables belonging to the vma.
*/
static void gru_vma_close(struct vm_area_struct *vma)
{
struct gru_vma_data *vdata;
struct gru_thread_state *gts;
struct list_head *entry, *next;
if (!vma->vm_private_data)
return;
vdata = vma->vm_private_data;
vma->vm_private_data = NULL;
gru_dbg(grudev, "vma %p, file %p, vdata %p\n", vma, vma->vm_file,
vdata);
list_for_each_safe(entry, next, &vdata->vd_head) {
gts =
list_entry(entry, struct gru_thread_state, ts_next);
list_del(&gts->ts_next);
mutex_lock(&gts->ts_ctxlock);
if (gts->ts_gru)
gru_unload_context(gts, 0);
mutex_unlock(&gts->ts_ctxlock);
gts_drop(gts);
}
kfree(vdata);
STAT(vdata_free);
}
/*
* gru_file_mmap
*
* Called when mmapping the device. Initializes the vma with a fault handler
* and private data structure necessary to allocate, track, and free the
* underlying pages.
*/
static int gru_file_mmap(struct file *file, struct vm_area_struct *vma)
{
if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) != (VM_SHARED | VM_WRITE))
return -EPERM;
if (vma->vm_start & (GRU_GSEG_PAGESIZE - 1) ||
vma->vm_end & (GRU_GSEG_PAGESIZE - 1))
return -EINVAL;
vma->vm_flags |= VM_IO | VM_PFNMAP | VM_LOCKED |
VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP;
vma->vm_page_prot = PAGE_SHARED;
vma->vm_ops = &gru_vm_ops;
vma->vm_private_data = gru_alloc_vma_data(vma, 0);
if (!vma->vm_private_data)
return -ENOMEM;
gru_dbg(grudev, "file %p, vaddr 0x%lx, vma %p, vdata %p\n",
file, vma->vm_start, vma, vma->vm_private_data);
return 0;
}
/*
* Create a new GRU context
*/
static int gru_create_new_context(unsigned long arg)
{
struct gru_create_context_req req;
struct vm_area_struct *vma;
struct gru_vma_data *vdata;
int ret = -EINVAL;
if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
return -EFAULT;
if (req.data_segment_bytes > max_user_dsr_bytes)
return -EINVAL;
if (req.control_blocks > max_user_cbrs || !req.maximum_thread_count)
return -EINVAL;
if (!(req.options & GRU_OPT_MISS_MASK))
req.options |= GRU_OPT_MISS_FMM_INTR;
down_write(&current->mm->mmap_sem);
vma = gru_find_vma(req.gseg);
if (vma) {
vdata = vma->vm_private_data;
vdata->vd_user_options = req.options;
vdata->vd_dsr_au_count =
GRU_DS_BYTES_TO_AU(req.data_segment_bytes);
vdata->vd_cbr_au_count = GRU_CB_COUNT_TO_AU(req.control_blocks);
vdata->vd_tlb_preload_count = req.tlb_preload_count;
ret = 0;
}
up_write(&current->mm->mmap_sem);
return ret;
}
/*
* Get GRU configuration info (temp - for emulator testing)
*/
static long gru_get_config_info(unsigned long arg)
{
struct gru_config_info info;
int nodesperblade;
if (num_online_nodes() > 1 &&
(uv_node_to_blade_id(1) == uv_node_to_blade_id(0)))
nodesperblade = 2;
else
nodesperblade = 1;
memset(&info, 0, sizeof(info));
info.cpus = num_online_cpus();
info.nodes = num_online_nodes();
info.blades = info.nodes / nodesperblade;
info.chiplets = GRU_CHIPLETS_PER_BLADE * info.blades;
if (copy_to_user((void __user *)arg, &info, sizeof(info)))
return -EFAULT;
return 0;
}
/*
* gru_file_unlocked_ioctl
*
* Called to update file attributes via IOCTL calls.
*/
static long gru_file_unlocked_ioctl(struct file *file, unsigned int req,
unsigned long arg)
{
int err = -EBADRQC;
gru_dbg(grudev, "file %p, req 0x%x, 0x%lx\n", file, req, arg);
switch (req) {
case GRU_CREATE_CONTEXT:
err = gru_create_new_context(arg);
break;
case GRU_SET_CONTEXT_OPTION:
err = gru_set_context_option(arg);
break;
case GRU_USER_GET_EXCEPTION_DETAIL:
err = gru_get_exception_detail(arg);
break;
case GRU_USER_UNLOAD_CONTEXT:
err = gru_user_unload_context(arg);
break;
case GRU_USER_FLUSH_TLB:
err = gru_user_flush_tlb(arg);
break;
case GRU_USER_CALL_OS:
err = gru_handle_user_call_os(arg);
break;
case GRU_GET_GSEG_STATISTICS:
err = gru_get_gseg_statistics(arg);
break;
case GRU_KTEST:
err = gru_ktest(arg);
break;
case GRU_GET_CONFIG_INFO:
err = gru_get_config_info(arg);
break;
case GRU_DUMP_CHIPLET_STATE:
err = gru_dump_chiplet_request(arg);
break;
}
return err;
}
/*
* Called at init time to build tables for all GRUs that are present in the
* system.
*/
static void gru_init_chiplet(struct gru_state *gru, unsigned long paddr,
void *vaddr, int blade_id, int chiplet_id)
{
spin_lock_init(&gru->gs_lock);
spin_lock_init(&gru->gs_asid_lock);
gru->gs_gru_base_paddr = paddr;
gru->gs_gru_base_vaddr = vaddr;
gru->gs_gid = blade_id * GRU_CHIPLETS_PER_BLADE + chiplet_id;
gru->gs_blade = gru_base[blade_id];
gru->gs_blade_id = blade_id;
gru->gs_chiplet_id = chiplet_id;
gru->gs_cbr_map = (GRU_CBR_AU == 64) ? ~0 : (1UL << GRU_CBR_AU) - 1;
gru->gs_dsr_map = (1UL << GRU_DSR_AU) - 1;
gru->gs_asid_limit = MAX_ASID;
gru_tgh_flush_init(gru);
if (gru->gs_gid >= gru_max_gids)
gru_max_gids = gru->gs_gid + 1;
gru_dbg(grudev, "bid %d, gid %d, vaddr %p (0x%lx)\n",
blade_id, gru->gs_gid, gru->gs_gru_base_vaddr,
gru->gs_gru_base_paddr);
}
static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr)
{
int pnode, nid, bid, chip;
int cbrs, dsrbytes, n;
int order = get_order(sizeof(struct gru_blade_state));
struct page *page;
struct gru_state *gru;
unsigned long paddr;
void *vaddr;
max_user_cbrs = GRU_NUM_CB;
max_user_dsr_bytes = GRU_NUM_DSR_BYTES;
for_each_possible_blade(bid) {
pnode = uv_blade_to_pnode(bid);
nid = uv_blade_to_memory_nid(bid);/* -1 if no memory on blade */
page = alloc_pages_node(nid, GFP_KERNEL, order);
if (!page)
goto fail;
gru_base[bid] = page_address(page);
memset(gru_base[bid], 0, sizeof(struct gru_blade_state));
gru_base[bid]->bs_lru_gru = &gru_base[bid]->bs_grus[0];
spin_lock_init(&gru_base[bid]->bs_lock);
init_rwsem(&gru_base[bid]->bs_kgts_sema);
dsrbytes = 0;
cbrs = 0;
for (gru = gru_base[bid]->bs_grus, chip = 0;
chip < GRU_CHIPLETS_PER_BLADE;
chip++, gru++) {
paddr = gru_chiplet_paddr(gru_base_paddr, pnode, chip);
vaddr = gru_chiplet_vaddr(gru_base_vaddr, pnode, chip);
gru_init_chiplet(gru, paddr, vaddr, bid, chip);
n = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE;
cbrs = max(cbrs, n);
n = hweight64(gru->gs_dsr_map) * GRU_DSR_AU_BYTES;
dsrbytes = max(dsrbytes, n);
}
max_user_cbrs = min(max_user_cbrs, cbrs);
max_user_dsr_bytes = min(max_user_dsr_bytes, dsrbytes);
}
return 0;
fail:
for (bid--; bid >= 0; bid--)
free_pages((unsigned long)gru_base[bid], order);
return -ENOMEM;
}
static void gru_free_tables(void)
{
int bid;
int order = get_order(sizeof(struct gru_state) *
GRU_CHIPLETS_PER_BLADE);
for (bid = 0; bid < GRU_MAX_BLADES; bid++)
free_pages((unsigned long)gru_base[bid], order);
}
static unsigned long gru_chiplet_cpu_to_mmr(int chiplet, int cpu, int *corep)
{
unsigned long mmr = 0;
int core;
/*
* We target the cores of a blade and not the hyperthreads themselves.
* There is a max of 8 cores per socket and 2 sockets per blade,
* making for a max total of 16 cores (i.e., 16 CPUs without
* hyperthreading and 32 CPUs with hyperthreading).
*/
core = uv_cpu_core_number(cpu) + UV_MAX_INT_CORES * uv_cpu_socket_number(cpu);
if (core >= GRU_NUM_TFM || uv_cpu_ht_number(cpu))
return 0;
if (chiplet == 0) {
mmr = UVH_GR0_TLB_INT0_CONFIG +
core * (UVH_GR0_TLB_INT1_CONFIG - UVH_GR0_TLB_INT0_CONFIG);
} else if (chiplet == 1) {
mmr = UVH_GR1_TLB_INT0_CONFIG +
core * (UVH_GR1_TLB_INT1_CONFIG - UVH_GR1_TLB_INT0_CONFIG);
} else {
BUG();
}
*corep = core;
return mmr;
}
#ifdef CONFIG_IA64
static int gru_irq_count[GRU_CHIPLETS_PER_BLADE];
static void gru_noop(struct irq_data *d)
{
}
static struct irq_chip gru_chip[GRU_CHIPLETS_PER_BLADE] = {
[0 ... GRU_CHIPLETS_PER_BLADE - 1] {
.irq_mask = gru_noop,
.irq_unmask = gru_noop,
.irq_ack = gru_noop
}
};
static int gru_chiplet_setup_tlb_irq(int chiplet, char *irq_name,
irq_handler_t irq_handler, int cpu, int blade)
{
unsigned long mmr;
int irq = IRQ_GRU + chiplet;
int ret, core;
mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core);
if (mmr == 0)
return 0;
if (gru_irq_count[chiplet] == 0) {
gru_chip[chiplet].name = irq_name;
ret = irq_set_chip(irq, &gru_chip[chiplet]);
if (ret) {
printk(KERN_ERR "%s: set_irq_chip failed, errno=%d\n",
GRU_DRIVER_ID_STR, -ret);
return ret;
}
ret = request_irq(irq, irq_handler, 0, irq_name, NULL);
if (ret) {
printk(KERN_ERR "%s: request_irq failed, errno=%d\n",
GRU_DRIVER_ID_STR, -ret);
return ret;
}
}
gru_irq_count[chiplet]++;
return 0;
}
static void gru_chiplet_teardown_tlb_irq(int chiplet, int cpu, int blade)
{
unsigned long mmr;
int core, irq = IRQ_GRU + chiplet;
if (gru_irq_count[chiplet] == 0)
return;
mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core);
if (mmr == 0)
return;
if (--gru_irq_count[chiplet] == 0)
free_irq(irq, NULL);
}
#elif defined CONFIG_X86_64
static int gru_chiplet_setup_tlb_irq(int chiplet, char *irq_name,
irq_handler_t irq_handler, int cpu, int blade)
{
unsigned long mmr;
int irq, core;
int ret;
mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core);
if (mmr == 0)
return 0;
irq = uv_setup_irq(irq_name, cpu, blade, mmr, UV_AFFINITY_CPU);
if (irq < 0) {
printk(KERN_ERR "%s: uv_setup_irq failed, errno=%d\n",
GRU_DRIVER_ID_STR, -irq);
return irq;
}
ret = request_irq(irq, irq_handler, 0, irq_name, NULL);
if (ret) {
uv_teardown_irq(irq);
printk(KERN_ERR "%s: request_irq failed, errno=%d\n",
GRU_DRIVER_ID_STR, -ret);
return ret;
}
gru_base[blade]->bs_grus[chiplet].gs_irq[core] = irq;
return 0;
}
static void gru_chiplet_teardown_tlb_irq(int chiplet, int cpu, int blade)
{
int irq, core;
unsigned long mmr;
mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core);
if (mmr) {
irq = gru_base[blade]->bs_grus[chiplet].gs_irq[core];
if (irq) {
free_irq(irq, NULL);
uv_teardown_irq(irq);
}
}
}
#endif
static void gru_teardown_tlb_irqs(void)
{
int blade;
int cpu;
for_each_online_cpu(cpu) {
blade = uv_cpu_to_blade_id(cpu);
gru_chiplet_teardown_tlb_irq(0, cpu, blade);
gru_chiplet_teardown_tlb_irq(1, cpu, blade);
}
for_each_possible_blade(blade) {
if (uv_blade_nr_possible_cpus(blade))
continue;
gru_chiplet_teardown_tlb_irq(0, 0, blade);
gru_chiplet_teardown_tlb_irq(1, 0, blade);
}
}
static int gru_setup_tlb_irqs(void)
{
int blade;
int cpu;
int ret;
for_each_online_cpu(cpu) {
blade = uv_cpu_to_blade_id(cpu);
ret = gru_chiplet_setup_tlb_irq(0, "GRU0_TLB", gru0_intr, cpu, blade);
if (ret != 0)
goto exit1;
ret = gru_chiplet_setup_tlb_irq(1, "GRU1_TLB", gru1_intr, cpu, blade);
if (ret != 0)
goto exit1;
}
for_each_possible_blade(blade) {
if (uv_blade_nr_possible_cpus(blade))
continue;
ret = gru_chiplet_setup_tlb_irq(0, "GRU0_TLB", gru_intr_mblade, 0, blade);
if (ret != 0)
goto exit1;
ret = gru_chiplet_setup_tlb_irq(1, "GRU1_TLB", gru_intr_mblade, 0, blade);
if (ret != 0)
goto exit1;
}
return 0;
exit1:
gru_teardown_tlb_irqs();
return ret;
}
/*
* gru_init
*
* Called at boot or module load time to initialize the GRUs.
*/
static int __init gru_init(void)
{
int ret;
if (!gru_supported())
return 0;
#if defined CONFIG_IA64
gru_start_paddr = 0xd000000000UL; /* ZZZZZZZZZZZZZZZZZZZ fixme */
#else
gru_start_paddr = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR) &
0x7fffffffffffUL;
#endif
gru_start_vaddr = __va(gru_start_paddr);
gru_end_paddr = gru_start_paddr + GRU_MAX_BLADES * GRU_SIZE;
printk(KERN_INFO "GRU space: 0x%lx - 0x%lx\n",
gru_start_paddr, gru_end_paddr);
ret = misc_register(&gru_miscdev);
if (ret) {
printk(KERN_ERR "%s: misc_register failed\n",
GRU_DRIVER_ID_STR);
goto exit0;
}
ret = gru_proc_init();
if (ret) {
printk(KERN_ERR "%s: proc init failed\n", GRU_DRIVER_ID_STR);
goto exit1;
}
ret = gru_init_tables(gru_start_paddr, gru_start_vaddr);
if (ret) {
printk(KERN_ERR "%s: init tables failed\n", GRU_DRIVER_ID_STR);
goto exit2;
}
ret = gru_setup_tlb_irqs();
if (ret != 0)
goto exit3;
gru_kservices_init();
printk(KERN_INFO "%s: v%s\n", GRU_DRIVER_ID_STR,
GRU_DRIVER_VERSION_STR);
return 0;
exit3:
gru_free_tables();
exit2:
gru_proc_exit();
exit1:
misc_deregister(&gru_miscdev);
exit0:
return ret;
}
static void __exit gru_exit(void)
{
if (!gru_supported())
return;
gru_teardown_tlb_irqs();
gru_kservices_exit();
gru_free_tables();
misc_deregister(&gru_miscdev);
gru_proc_exit();
}
static const struct file_operations gru_fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = gru_file_unlocked_ioctl,
.mmap = gru_file_mmap,
.llseek = noop_llseek,
};
static struct miscdevice gru_miscdev = {
.minor = MISC_DYNAMIC_MINOR,
.name = "gru",
.fops = &gru_fops,
};
const struct vm_operations_struct gru_vm_ops = {
.close = gru_vma_close,
.fault = gru_fault,
};
#ifndef MODULE
fs_initcall(gru_init);
#else
module_init(gru_init);
#endif
module_exit(gru_exit);
module_param(gru_options, ulong, 0644);
MODULE_PARM_DESC(gru_options, "Various debug options");
MODULE_AUTHOR("Silicon Graphics, Inc.");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION(GRU_DRIVER_ID_STR GRU_DRIVER_VERSION_STR);
MODULE_VERSION(GRU_DRIVER_VERSION_STR);

View file

@ -0,0 +1,216 @@
/*
* GRU KERNEL MCS INSTRUCTIONS
*
* Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/kernel.h>
#include "gru.h"
#include "grulib.h"
#include "grutables.h"
/* 10 sec */
#ifdef CONFIG_IA64
#include <asm/processor.h>
#define GRU_OPERATION_TIMEOUT (((cycles_t) local_cpu_data->itc_freq)*10)
#define CLKS2NSEC(c) ((c) *1000000000 / local_cpu_data->itc_freq)
#else
#include <asm/tsc.h>
#define GRU_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
#define CLKS2NSEC(c) ((c) * 1000000 / tsc_khz)
#endif
/* Extract the status field from a kernel handle */
#define GET_MSEG_HANDLE_STATUS(h) (((*(unsigned long *)(h)) >> 16) & 3)
struct mcs_op_statistic mcs_op_statistics[mcsop_last];
static void update_mcs_stats(enum mcs_op op, unsigned long clks)
{
unsigned long nsec;
nsec = CLKS2NSEC(clks);
atomic_long_inc(&mcs_op_statistics[op].count);
atomic_long_add(nsec, &mcs_op_statistics[op].total);
if (mcs_op_statistics[op].max < nsec)
mcs_op_statistics[op].max = nsec;
}
static void start_instruction(void *h)
{
unsigned long *w0 = h;
wmb(); /* setting CMD/STATUS bits must be last */
*w0 = *w0 | 0x20001;
gru_flush_cache(h);
}
static void report_instruction_timeout(void *h)
{
unsigned long goff = GSEGPOFF((unsigned long)h);
char *id = "???";
if (TYPE_IS(CCH, goff))
id = "CCH";
else if (TYPE_IS(TGH, goff))
id = "TGH";
else if (TYPE_IS(TFH, goff))
id = "TFH";
panic(KERN_ALERT "GRU %p (%s) is malfunctioning\n", h, id);
}
static int wait_instruction_complete(void *h, enum mcs_op opc)
{
int status;
unsigned long start_time = get_cycles();
while (1) {
cpu_relax();
status = GET_MSEG_HANDLE_STATUS(h);
if (status != CCHSTATUS_ACTIVE)
break;
if (GRU_OPERATION_TIMEOUT < (get_cycles() - start_time)) {
report_instruction_timeout(h);
start_time = get_cycles();
}
}
if (gru_options & OPT_STATS)
update_mcs_stats(opc, get_cycles() - start_time);
return status;
}
int cch_allocate(struct gru_context_configuration_handle *cch)
{
int ret;
cch->opc = CCHOP_ALLOCATE;
start_instruction(cch);
ret = wait_instruction_complete(cch, cchop_allocate);
/*
* Stop speculation into the GSEG being mapped by the previous ALLOCATE.
* The GSEG memory does not exist until the ALLOCATE completes.
*/
sync_core();
return ret;
}
int cch_start(struct gru_context_configuration_handle *cch)
{
cch->opc = CCHOP_START;
start_instruction(cch);
return wait_instruction_complete(cch, cchop_start);
}
int cch_interrupt(struct gru_context_configuration_handle *cch)
{
cch->opc = CCHOP_INTERRUPT;
start_instruction(cch);
return wait_instruction_complete(cch, cchop_interrupt);
}
int cch_deallocate(struct gru_context_configuration_handle *cch)
{
int ret;
cch->opc = CCHOP_DEALLOCATE;
start_instruction(cch);
ret = wait_instruction_complete(cch, cchop_deallocate);
/*
* Stop speculation into the GSEG being unmapped by the previous
* DEALLOCATE.
*/
sync_core();
return ret;
}
int cch_interrupt_sync(struct gru_context_configuration_handle
*cch)
{
cch->opc = CCHOP_INTERRUPT_SYNC;
start_instruction(cch);
return wait_instruction_complete(cch, cchop_interrupt_sync);
}
int tgh_invalidate(struct gru_tlb_global_handle *tgh,
unsigned long vaddr, unsigned long vaddrmask,
int asid, int pagesize, int global, int n,
unsigned short ctxbitmap)
{
tgh->vaddr = vaddr;
tgh->asid = asid;
tgh->pagesize = pagesize;
tgh->n = n;
tgh->global = global;
tgh->vaddrmask = vaddrmask;
tgh->ctxbitmap = ctxbitmap;
tgh->opc = TGHOP_TLBINV;
start_instruction(tgh);
return wait_instruction_complete(tgh, tghop_invalidate);
}
int tfh_write_only(struct gru_tlb_fault_handle *tfh,
unsigned long paddr, int gaa,
unsigned long vaddr, int asid, int dirty,
int pagesize)
{
tfh->fillasid = asid;
tfh->fillvaddr = vaddr;
tfh->pfn = paddr >> GRU_PADDR_SHIFT;
tfh->gaa = gaa;
tfh->dirty = dirty;
tfh->pagesize = pagesize;
tfh->opc = TFHOP_WRITE_ONLY;
start_instruction(tfh);
return wait_instruction_complete(tfh, tfhop_write_only);
}
void tfh_write_restart(struct gru_tlb_fault_handle *tfh,
unsigned long paddr, int gaa,
unsigned long vaddr, int asid, int dirty,
int pagesize)
{
tfh->fillasid = asid;
tfh->fillvaddr = vaddr;
tfh->pfn = paddr >> GRU_PADDR_SHIFT;
tfh->gaa = gaa;
tfh->dirty = dirty;
tfh->pagesize = pagesize;
tfh->opc = TFHOP_WRITE_RESTART;
start_instruction(tfh);
}
void tfh_restart(struct gru_tlb_fault_handle *tfh)
{
tfh->opc = TFHOP_RESTART;
start_instruction(tfh);
}
void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh)
{
tfh->opc = TFHOP_USER_POLLING_MODE;
start_instruction(tfh);
}
void tfh_exception(struct gru_tlb_fault_handle *tfh)
{
tfh->opc = TFHOP_EXCEPTION;
start_instruction(tfh);
}

View file

@ -0,0 +1,531 @@
/*
* SN Platform GRU Driver
*
* GRU HANDLE DEFINITION
*
* Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef __GRUHANDLES_H__
#define __GRUHANDLES_H__
#include "gru_instructions.h"
/*
* Manifest constants for GRU Memory Map
*/
#define GRU_GSEG0_BASE 0
#define GRU_MCS_BASE (64 * 1024 * 1024)
#define GRU_SIZE (128UL * 1024 * 1024)
/* Handle & resource counts */
#define GRU_NUM_CB 128
#define GRU_NUM_DSR_BYTES (32 * 1024)
#define GRU_NUM_TFM 16
#define GRU_NUM_TGH 24
#define GRU_NUM_CBE 128
#define GRU_NUM_TFH 128
#define GRU_NUM_CCH 16
/* Maximum resource counts that can be reserved by user programs */
#define GRU_NUM_USER_CBR GRU_NUM_CBE
#define GRU_NUM_USER_DSR_BYTES GRU_NUM_DSR_BYTES
/* Bytes per handle & handle stride. Code assumes all cb, tfh, cbe handles
* are the same */
#define GRU_HANDLE_BYTES 64
#define GRU_HANDLE_STRIDE 256
/* Base addresses of handles */
#define GRU_TFM_BASE (GRU_MCS_BASE + 0x00000)
#define GRU_TGH_BASE (GRU_MCS_BASE + 0x08000)
#define GRU_CBE_BASE (GRU_MCS_BASE + 0x10000)
#define GRU_TFH_BASE (GRU_MCS_BASE + 0x18000)
#define GRU_CCH_BASE (GRU_MCS_BASE + 0x20000)
/* User gseg constants */
#define GRU_GSEG_STRIDE (4 * 1024 * 1024)
#define GSEG_BASE(a) ((a) & ~(GRU_GSEG_PAGESIZE - 1))
/* Data segment constants */
#define GRU_DSR_AU_BYTES 1024
#define GRU_DSR_CL (GRU_NUM_DSR_BYTES / GRU_CACHE_LINE_BYTES)
#define GRU_DSR_AU_CL (GRU_DSR_AU_BYTES / GRU_CACHE_LINE_BYTES)
#define GRU_DSR_AU (GRU_NUM_DSR_BYTES / GRU_DSR_AU_BYTES)
/* Control block constants */
#define GRU_CBR_AU_SIZE 2
#define GRU_CBR_AU (GRU_NUM_CBE / GRU_CBR_AU_SIZE)
/* Convert resource counts to the number of AU */
#define GRU_DS_BYTES_TO_AU(n) DIV_ROUND_UP(n, GRU_DSR_AU_BYTES)
#define GRU_CB_COUNT_TO_AU(n) DIV_ROUND_UP(n, GRU_CBR_AU_SIZE)
/* UV limits */
#define GRU_CHIPLETS_PER_HUB 2
#define GRU_HUBS_PER_BLADE 1
#define GRU_CHIPLETS_PER_BLADE (GRU_HUBS_PER_BLADE * GRU_CHIPLETS_PER_HUB)
/* User GRU Gseg offsets */
#define GRU_CB_BASE 0
#define GRU_CB_LIMIT (GRU_CB_BASE + GRU_HANDLE_STRIDE * GRU_NUM_CBE)
#define GRU_DS_BASE 0x20000
#define GRU_DS_LIMIT (GRU_DS_BASE + GRU_NUM_DSR_BYTES)
/* Convert a GRU physical address to the chiplet offset */
#define GSEGPOFF(h) ((h) & (GRU_SIZE - 1))
/* Convert an arbitrary handle address to the beginning of the GRU segment */
#define GRUBASE(h) ((void *)((unsigned long)(h) & ~(GRU_SIZE - 1)))
/* Test a valid handle address to determine the type */
#define TYPE_IS(hn, h) ((h) >= GRU_##hn##_BASE && (h) < \
GRU_##hn##_BASE + GRU_NUM_##hn * GRU_HANDLE_STRIDE && \
(((h) & (GRU_HANDLE_STRIDE - 1)) == 0))
/* General addressing macros. */
static inline void *get_gseg_base_address(void *base, int ctxnum)
{
return (void *)(base + GRU_GSEG0_BASE + GRU_GSEG_STRIDE * ctxnum);
}
static inline void *get_gseg_base_address_cb(void *base, int ctxnum, int line)
{
return (void *)(get_gseg_base_address(base, ctxnum) +
GRU_CB_BASE + GRU_HANDLE_STRIDE * line);
}
static inline void *get_gseg_base_address_ds(void *base, int ctxnum, int line)
{
return (void *)(get_gseg_base_address(base, ctxnum) + GRU_DS_BASE +
GRU_CACHE_LINE_BYTES * line);
}
static inline struct gru_tlb_fault_map *get_tfm(void *base, int ctxnum)
{
return (struct gru_tlb_fault_map *)(base + GRU_TFM_BASE +
ctxnum * GRU_HANDLE_STRIDE);
}
static inline struct gru_tlb_global_handle *get_tgh(void *base, int ctxnum)
{
return (struct gru_tlb_global_handle *)(base + GRU_TGH_BASE +
ctxnum * GRU_HANDLE_STRIDE);
}
static inline struct gru_control_block_extended *get_cbe(void *base, int ctxnum)
{
return (struct gru_control_block_extended *)(base + GRU_CBE_BASE +
ctxnum * GRU_HANDLE_STRIDE);
}
static inline struct gru_tlb_fault_handle *get_tfh(void *base, int ctxnum)
{
return (struct gru_tlb_fault_handle *)(base + GRU_TFH_BASE +
ctxnum * GRU_HANDLE_STRIDE);
}
static inline struct gru_context_configuration_handle *get_cch(void *base,
int ctxnum)
{
return (struct gru_context_configuration_handle *)(base +
GRU_CCH_BASE + ctxnum * GRU_HANDLE_STRIDE);
}
static inline unsigned long get_cb_number(void *cb)
{
return (((unsigned long)cb - GRU_CB_BASE) % GRU_GSEG_PAGESIZE) /
GRU_HANDLE_STRIDE;
}
/* byte offset to a specific GRU chiplet. (p=pnode, c=chiplet (0 or 1)*/
static inline unsigned long gru_chiplet_paddr(unsigned long paddr, int pnode,
int chiplet)
{
return paddr + GRU_SIZE * (2 * pnode + chiplet);
}
static inline void *gru_chiplet_vaddr(void *vaddr, int pnode, int chiplet)
{
return vaddr + GRU_SIZE * (2 * pnode + chiplet);
}
static inline struct gru_control_block_extended *gru_tfh_to_cbe(
struct gru_tlb_fault_handle *tfh)
{
unsigned long cbe;
cbe = (unsigned long)tfh - GRU_TFH_BASE + GRU_CBE_BASE;
return (struct gru_control_block_extended*)cbe;
}
/*
* Global TLB Fault Map
* Bitmap of outstanding TLB misses needing interrupt/polling service.
*
*/
struct gru_tlb_fault_map {
unsigned long fault_bits[BITS_TO_LONGS(GRU_NUM_CBE)];
unsigned long fill0[2];
unsigned long done_bits[BITS_TO_LONGS(GRU_NUM_CBE)];
unsigned long fill1[2];
};
/*
* TGH - TLB Global Handle
* Used for TLB flushing.
*
*/
struct gru_tlb_global_handle {
unsigned int cmd:1; /* DW 0 */
unsigned int delresp:1;
unsigned int opc:1;
unsigned int fill1:5;
unsigned int fill2:8;
unsigned int status:2;
unsigned long fill3:2;
unsigned int state:3;
unsigned long fill4:1;
unsigned int cause:3;
unsigned long fill5:37;
unsigned long vaddr:64; /* DW 1 */
unsigned int asid:24; /* DW 2 */
unsigned int fill6:8;
unsigned int pagesize:5;
unsigned int fill7:11;
unsigned int global:1;
unsigned int fill8:15;
unsigned long vaddrmask:39; /* DW 3 */
unsigned int fill9:9;
unsigned int n:10;
unsigned int fill10:6;
unsigned int ctxbitmap:16; /* DW4 */
unsigned long fill11[3];
};
enum gru_tgh_cmd {
TGHCMD_START
};
enum gru_tgh_opc {
TGHOP_TLBNOP,
TGHOP_TLBINV
};
enum gru_tgh_status {
TGHSTATUS_IDLE,
TGHSTATUS_EXCEPTION,
TGHSTATUS_ACTIVE
};
enum gru_tgh_state {
TGHSTATE_IDLE,
TGHSTATE_PE_INVAL,
TGHSTATE_INTERRUPT_INVAL,
TGHSTATE_WAITDONE,
TGHSTATE_RESTART_CTX,
};
enum gru_tgh_cause {
TGHCAUSE_RR_ECC,
TGHCAUSE_TLB_ECC,
TGHCAUSE_LRU_ECC,
TGHCAUSE_PS_ECC,
TGHCAUSE_MUL_ERR,
TGHCAUSE_DATA_ERR,
TGHCAUSE_SW_FORCE
};
/*
* TFH - TLB Global Handle
* Used for TLB dropins into the GRU TLB.
*
*/
struct gru_tlb_fault_handle {
unsigned int cmd:1; /* DW 0 - low 32*/
unsigned int delresp:1;
unsigned int fill0:2;
unsigned int opc:3;
unsigned int fill1:9;
unsigned int status:2;
unsigned int fill2:2;
unsigned int state:3;
unsigned int fill3:1;
unsigned int cause:6;
unsigned int cb_int:1;
unsigned int fill4:1;
unsigned int indexway:12; /* DW 0 - high 32 */
unsigned int fill5:4;
unsigned int ctxnum:4;
unsigned int fill6:12;
unsigned long missvaddr:64; /* DW 1 */
unsigned int missasid:24; /* DW 2 */
unsigned int fill7:8;
unsigned int fillasid:24;
unsigned int dirty:1;
unsigned int gaa:2;
unsigned long fill8:5;
unsigned long pfn:41; /* DW 3 */
unsigned int fill9:7;
unsigned int pagesize:5;
unsigned int fill10:11;
unsigned long fillvaddr:64; /* DW 4 */
unsigned long fill11[3];
};
enum gru_tfh_opc {
TFHOP_NOOP,
TFHOP_RESTART,
TFHOP_WRITE_ONLY,
TFHOP_WRITE_RESTART,
TFHOP_EXCEPTION,
TFHOP_USER_POLLING_MODE = 7,
};
enum tfh_status {
TFHSTATUS_IDLE,
TFHSTATUS_EXCEPTION,
TFHSTATUS_ACTIVE,
};
enum tfh_state {
TFHSTATE_INACTIVE,
TFHSTATE_IDLE,
TFHSTATE_MISS_UPM,
TFHSTATE_MISS_FMM,
TFHSTATE_HW_ERR,
TFHSTATE_WRITE_TLB,
TFHSTATE_RESTART_CBR,
};
/* TFH cause bits */
enum tfh_cause {
TFHCAUSE_NONE,
TFHCAUSE_TLB_MISS,
TFHCAUSE_TLB_MOD,
TFHCAUSE_HW_ERROR_RR,
TFHCAUSE_HW_ERROR_MAIN_ARRAY,
TFHCAUSE_HW_ERROR_VALID,
TFHCAUSE_HW_ERROR_PAGESIZE,
TFHCAUSE_INSTRUCTION_EXCEPTION,
TFHCAUSE_UNCORRECTIBLE_ERROR,
};
/* GAA values */
#define GAA_RAM 0x0
#define GAA_NCRAM 0x2
#define GAA_MMIO 0x1
#define GAA_REGISTER 0x3
/* GRU paddr shift for pfn. (NOTE: shift is NOT by actual pagesize) */
#define GRU_PADDR_SHIFT 12
/*
* Context Configuration handle
* Used to allocate resources to a GSEG context.
*
*/
struct gru_context_configuration_handle {
unsigned int cmd:1; /* DW0 */
unsigned int delresp:1;
unsigned int opc:3;
unsigned int unmap_enable:1;
unsigned int req_slice_set_enable:1;
unsigned int req_slice:2;
unsigned int cb_int_enable:1;
unsigned int tlb_int_enable:1;
unsigned int tfm_fault_bit_enable:1;
unsigned int tlb_int_select:4;
unsigned int status:2;
unsigned int state:2;
unsigned int reserved2:4;
unsigned int cause:4;
unsigned int tfm_done_bit_enable:1;
unsigned int unused:3;
unsigned int dsr_allocation_map;
unsigned long cbr_allocation_map; /* DW1 */
unsigned int asid[8]; /* DW 2 - 5 */
unsigned short sizeavail[8]; /* DW 6 - 7 */
} __attribute__ ((packed));
enum gru_cch_opc {
CCHOP_START = 1,
CCHOP_ALLOCATE,
CCHOP_INTERRUPT,
CCHOP_DEALLOCATE,
CCHOP_INTERRUPT_SYNC,
};
enum gru_cch_status {
CCHSTATUS_IDLE,
CCHSTATUS_EXCEPTION,
CCHSTATUS_ACTIVE,
};
enum gru_cch_state {
CCHSTATE_INACTIVE,
CCHSTATE_MAPPED,
CCHSTATE_ACTIVE,
CCHSTATE_INTERRUPTED,
};
/* CCH Exception cause */
enum gru_cch_cause {
CCHCAUSE_REGION_REGISTER_WRITE_ERROR = 1,
CCHCAUSE_ILLEGAL_OPCODE = 2,
CCHCAUSE_INVALID_START_REQUEST = 3,
CCHCAUSE_INVALID_ALLOCATION_REQUEST = 4,
CCHCAUSE_INVALID_DEALLOCATION_REQUEST = 5,
CCHCAUSE_INVALID_INTERRUPT_REQUEST = 6,
CCHCAUSE_CCH_BUSY = 7,
CCHCAUSE_NO_CBRS_TO_ALLOCATE = 8,
CCHCAUSE_BAD_TFM_CONFIG = 9,
CCHCAUSE_CBR_RESOURCES_OVERSUBSCRIPED = 10,
CCHCAUSE_DSR_RESOURCES_OVERSUBSCRIPED = 11,
CCHCAUSE_CBR_DEALLOCATION_ERROR = 12,
};
/*
* CBE - Control Block Extended
* Maintains internal GRU state for active CBs.
*
*/
struct gru_control_block_extended {
unsigned int reserved0:1; /* DW 0 - low */
unsigned int imacpy:3;
unsigned int reserved1:4;
unsigned int xtypecpy:3;
unsigned int iaa0cpy:2;
unsigned int iaa1cpy:2;
unsigned int reserved2:1;
unsigned int opccpy:8;
unsigned int exopccpy:8;
unsigned int idef2cpy:22; /* DW 0 - high */
unsigned int reserved3:10;
unsigned int idef4cpy:22; /* DW 1 */
unsigned int reserved4:10;
unsigned int idef4upd:22;
unsigned int reserved5:10;
unsigned long idef1upd:64; /* DW 2 */
unsigned long idef5cpy:64; /* DW 3 */
unsigned long idef6cpy:64; /* DW 4 */
unsigned long idef3upd:64; /* DW 5 */
unsigned long idef5upd:64; /* DW 6 */
unsigned int idef2upd:22; /* DW 7 */
unsigned int reserved6:10;
unsigned int ecause:20;
unsigned int cbrstate:4;
unsigned int cbrexecstatus:8;
};
/* CBE fields for active BCOPY instructions */
#define cbe_baddr0 idef1upd
#define cbe_baddr1 idef3upd
#define cbe_src_cl idef6cpy
#define cbe_nelemcur idef5upd
enum gru_cbr_state {
CBRSTATE_INACTIVE,
CBRSTATE_IDLE,
CBRSTATE_PE_CHECK,
CBRSTATE_QUEUED,
CBRSTATE_WAIT_RESPONSE,
CBRSTATE_INTERRUPTED,
CBRSTATE_INTERRUPTED_MISS_FMM,
CBRSTATE_BUSY_INTERRUPT_MISS_FMM,
CBRSTATE_INTERRUPTED_MISS_UPM,
CBRSTATE_BUSY_INTERRUPTED_MISS_UPM,
CBRSTATE_REQUEST_ISSUE,
CBRSTATE_BUSY_INTERRUPT,
};
/* CBE cbrexecstatus bits - defined in gru_instructions.h*/
/* CBE ecause bits - defined in gru_instructions.h */
/*
* Convert a processor pagesize into the strange encoded pagesize used by the
* GRU. Processor pagesize is encoded as log of bytes per page. (or PAGE_SHIFT)
* pagesize log pagesize grupagesize
* 4k 12 0
* 16k 14 1
* 64k 16 2
* 256k 18 3
* 1m 20 4
* 2m 21 5
* 4m 22 6
* 16m 24 7
* 64m 26 8
* ...
*/
#define GRU_PAGESIZE(sh) ((((sh) > 20 ? (sh) + 2 : (sh)) >> 1) - 6)
#define GRU_SIZEAVAIL(sh) (1UL << GRU_PAGESIZE(sh))
/* minimum TLB purge count to ensure a full purge */
#define GRUMAXINVAL 1024UL
int cch_allocate(struct gru_context_configuration_handle *cch);
int cch_start(struct gru_context_configuration_handle *cch);
int cch_interrupt(struct gru_context_configuration_handle *cch);
int cch_deallocate(struct gru_context_configuration_handle *cch);
int cch_interrupt_sync(struct gru_context_configuration_handle *cch);
int tgh_invalidate(struct gru_tlb_global_handle *tgh, unsigned long vaddr,
unsigned long vaddrmask, int asid, int pagesize, int global, int n,
unsigned short ctxbitmap);
int tfh_write_only(struct gru_tlb_fault_handle *tfh, unsigned long paddr,
int gaa, unsigned long vaddr, int asid, int dirty, int pagesize);
void tfh_write_restart(struct gru_tlb_fault_handle *tfh, unsigned long paddr,
int gaa, unsigned long vaddr, int asid, int dirty, int pagesize);
void tfh_restart(struct gru_tlb_fault_handle *tfh);
void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh);
void tfh_exception(struct gru_tlb_fault_handle *tfh);
#endif /* __GRUHANDLES_H__ */

View file

@ -0,0 +1,234 @@
/*
* SN Platform GRU Driver
*
* Dump GRU State
*
* Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/spinlock.h>
#include <linux/uaccess.h>
#include <linux/delay.h>
#include <linux/bitops.h>
#include <asm/uv/uv_hub.h>
#include "gru.h"
#include "grutables.h"
#include "gruhandles.h"
#include "grulib.h"
#define CCH_LOCK_ATTEMPTS 10
static int gru_user_copy_handle(void __user **dp, void *s)
{
if (copy_to_user(*dp, s, GRU_HANDLE_BYTES))
return -1;
*dp += GRU_HANDLE_BYTES;
return 0;
}
static int gru_dump_context_data(void *grubase,
struct gru_context_configuration_handle *cch,
void __user *ubuf, int ctxnum, int dsrcnt,
int flush_cbrs)
{
void *cb, *cbe, *tfh, *gseg;
int i, scr;
gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
cb = gseg + GRU_CB_BASE;
cbe = grubase + GRU_CBE_BASE;
tfh = grubase + GRU_TFH_BASE;
for_each_cbr_in_allocation_map(i, &cch->cbr_allocation_map, scr) {
if (flush_cbrs)
gru_flush_cache(cb);
if (gru_user_copy_handle(&ubuf, cb))
goto fail;
if (gru_user_copy_handle(&ubuf, tfh + i * GRU_HANDLE_STRIDE))
goto fail;
if (gru_user_copy_handle(&ubuf, cbe + i * GRU_HANDLE_STRIDE))
goto fail;
cb += GRU_HANDLE_STRIDE;
}
if (dsrcnt)
memcpy(ubuf, gseg + GRU_DS_BASE, dsrcnt * GRU_HANDLE_STRIDE);
return 0;
fail:
return -EFAULT;
}
static int gru_dump_tfm(struct gru_state *gru,
void __user *ubuf, void __user *ubufend)
{
struct gru_tlb_fault_map *tfm;
int i, ret, bytes;
bytes = GRU_NUM_TFM * GRU_CACHE_LINE_BYTES;
if (bytes > ubufend - ubuf)
ret = -EFBIG;
for (i = 0; i < GRU_NUM_TFM; i++) {
tfm = get_tfm(gru->gs_gru_base_vaddr, i);
if (gru_user_copy_handle(&ubuf, tfm))
goto fail;
}
return GRU_NUM_TFM * GRU_CACHE_LINE_BYTES;
fail:
return -EFAULT;
}
static int gru_dump_tgh(struct gru_state *gru,
void __user *ubuf, void __user *ubufend)
{
struct gru_tlb_global_handle *tgh;
int i, ret, bytes;
bytes = GRU_NUM_TGH * GRU_CACHE_LINE_BYTES;
if (bytes > ubufend - ubuf)
ret = -EFBIG;
for (i = 0; i < GRU_NUM_TGH; i++) {
tgh = get_tgh(gru->gs_gru_base_vaddr, i);
if (gru_user_copy_handle(&ubuf, tgh))
goto fail;
}
return GRU_NUM_TGH * GRU_CACHE_LINE_BYTES;
fail:
return -EFAULT;
}
static int gru_dump_context(struct gru_state *gru, int ctxnum,
void __user *ubuf, void __user *ubufend, char data_opt,
char lock_cch, char flush_cbrs)
{
struct gru_dump_context_header hdr;
struct gru_dump_context_header __user *uhdr = ubuf;
struct gru_context_configuration_handle *cch, *ubufcch;
struct gru_thread_state *gts;
int try, cch_locked, cbrcnt = 0, dsrcnt = 0, bytes = 0, ret = 0;
void *grubase;
memset(&hdr, 0, sizeof(hdr));
grubase = gru->gs_gru_base_vaddr;
cch = get_cch(grubase, ctxnum);
for (try = 0; try < CCH_LOCK_ATTEMPTS; try++) {
cch_locked = trylock_cch_handle(cch);
if (cch_locked)
break;
msleep(1);
}
ubuf += sizeof(hdr);
ubufcch = ubuf;
if (gru_user_copy_handle(&ubuf, cch)) {
if (cch_locked)
unlock_cch_handle(cch);
return -EFAULT;
}
if (cch_locked)
ubufcch->delresp = 0;
bytes = sizeof(hdr) + GRU_CACHE_LINE_BYTES;
if (cch_locked || !lock_cch) {
gts = gru->gs_gts[ctxnum];
if (gts && gts->ts_vma) {
hdr.pid = gts->ts_tgid_owner;
hdr.vaddr = gts->ts_vma->vm_start;
}
if (cch->state != CCHSTATE_INACTIVE) {
cbrcnt = hweight64(cch->cbr_allocation_map) *
GRU_CBR_AU_SIZE;
dsrcnt = data_opt ? hweight32(cch->dsr_allocation_map) *
GRU_DSR_AU_CL : 0;
}
bytes += (3 * cbrcnt + dsrcnt) * GRU_CACHE_LINE_BYTES;
if (bytes > ubufend - ubuf)
ret = -EFBIG;
else
ret = gru_dump_context_data(grubase, cch, ubuf, ctxnum,
dsrcnt, flush_cbrs);
}
if (cch_locked)
unlock_cch_handle(cch);
if (ret)
return ret;
hdr.magic = GRU_DUMP_MAGIC;
hdr.gid = gru->gs_gid;
hdr.ctxnum = ctxnum;
hdr.cbrcnt = cbrcnt;
hdr.dsrcnt = dsrcnt;
hdr.cch_locked = cch_locked;
if (copy_to_user(uhdr, &hdr, sizeof(hdr)))
return -EFAULT;
return bytes;
}
int gru_dump_chiplet_request(unsigned long arg)
{
struct gru_state *gru;
struct gru_dump_chiplet_state_req req;
void __user *ubuf;
void __user *ubufend;
int ctxnum, ret, cnt = 0;
if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
return -EFAULT;
/* Currently, only dump by gid is implemented */
if (req.gid >= gru_max_gids || req.gid < 0)
return -EINVAL;
gru = GID_TO_GRU(req.gid);
ubuf = req.buf;
ubufend = req.buf + req.buflen;
ret = gru_dump_tfm(gru, ubuf, ubufend);
if (ret < 0)
goto fail;
ubuf += ret;
ret = gru_dump_tgh(gru, ubuf, ubufend);
if (ret < 0)
goto fail;
ubuf += ret;
for (ctxnum = 0; ctxnum < GRU_NUM_CCH; ctxnum++) {
if (req.ctxnum == ctxnum || req.ctxnum < 0) {
ret = gru_dump_context(gru, ctxnum, ubuf, ubufend,
req.data_opt, req.lock_cch,
req.flush_cbrs);
if (ret < 0)
goto fail;
ubuf += ret;
cnt++;
}
}
if (copy_to_user((void __user *)arg, &req, sizeof(req)))
return -EFAULT;
return cnt;
fail:
return ret;
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,214 @@
/*
* Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef __GRU_KSERVICES_H_
#define __GRU_KSERVICES_H_
/*
* Message queues using the GRU to send/receive messages.
*
* These function allow the user to create a message queue for
* sending/receiving 1 or 2 cacheline messages using the GRU.
*
* Processes SENDING messages will use a kernel CBR/DSR to send
* the message. This is transparent to the caller.
*
* The receiver does not use any GRU resources.
*
* The functions support:
* - single receiver
* - multiple senders
* - cross partition message
*
* Missing features ZZZ:
* - user options for dealing with timeouts, queue full, etc.
* - gru_create_message_queue() needs interrupt vector info
*/
struct gru_message_queue_desc {
void *mq; /* message queue vaddress */
unsigned long mq_gpa; /* global address of mq */
int qlines; /* queue size in CL */
int interrupt_vector; /* interrupt vector */
int interrupt_pnode; /* pnode for interrupt */
int interrupt_apicid; /* lapicid for interrupt */
};
/*
* Initialize a user allocated chunk of memory to be used as
* a message queue. The caller must ensure that the queue is
* in contiguous physical memory and is cacheline aligned.
*
* Message queue size is the total number of bytes allocated
* to the queue including a 2 cacheline header that is used
* to manage the queue.
*
* Input:
* mqd pointer to message queue descriptor
* p pointer to user allocated mesq memory.
* bytes size of message queue in bytes
* vector interrupt vector (zero if no interrupts)
* nasid nasid of blade where interrupt is delivered
* apicid apicid of cpu for interrupt
*
* Errors:
* 0 OK
* >0 error
*/
extern int gru_create_message_queue(struct gru_message_queue_desc *mqd,
void *p, unsigned int bytes, int nasid, int vector, int apicid);
/*
* Send a message to a message queue.
*
* Note: The message queue transport mechanism uses the first 32
* bits of the message. Users should avoid using these bits.
*
*
* Input:
* mqd pointer to message queue descriptor
* mesg pointer to message. Must be 64-bit aligned
* bytes size of message in bytes
*
* Output:
* 0 message sent
* >0 Send failure - see error codes below
*
*/
extern int gru_send_message_gpa(struct gru_message_queue_desc *mqd,
void *mesg, unsigned int bytes);
/* Status values for gru_send_message() */
#define MQE_OK 0 /* message sent successfully */
#define MQE_CONGESTION 1 /* temporary congestion, try again */
#define MQE_QUEUE_FULL 2 /* queue is full */
#define MQE_UNEXPECTED_CB_ERR 3 /* unexpected CB error */
#define MQE_PAGE_OVERFLOW 10 /* BUG - queue overflowed a page */
#define MQE_BUG_NO_RESOURCES 11 /* BUG - could not alloc GRU cb/dsr */
/*
* Advance the receive pointer for the message queue to the next message.
* Note: current API requires messages to be gotten & freed in order. Future
* API extensions may allow for out-of-order freeing.
*
* Input
* mqd pointer to message queue descriptor
* mesq message being freed
*/
extern void gru_free_message(struct gru_message_queue_desc *mqd,
void *mesq);
/*
* Get next message from message queue. Returns pointer to
* message OR NULL if no message present.
* User must call gru_free_message() after message is processed
* in order to move the queue pointers to next message.
*
* Input
* mqd pointer to message queue descriptor
*
* Output:
* p pointer to message
* NULL no message available
*/
extern void *gru_get_next_message(struct gru_message_queue_desc *mqd);
/*
* Read a GRU global GPA. Source can be located in a remote partition.
*
* Input:
* value memory address where MMR value is returned
* gpa source numalink physical address of GPA
*
* Output:
* 0 OK
* >0 error
*/
int gru_read_gpa(unsigned long *value, unsigned long gpa);
/*
* Copy data using the GRU. Source or destination can be located in a remote
* partition.
*
* Input:
* dest_gpa destination global physical address
* src_gpa source global physical address
* bytes number of bytes to copy
*
* Output:
* 0 OK
* >0 error
*/
extern int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa,
unsigned int bytes);
/*
* Reserve GRU resources to be used asynchronously.
*
* input:
* blade_id - blade on which resources should be reserved
* cbrs - number of CBRs
* dsr_bytes - number of DSR bytes needed
* cmp - completion structure for waiting for
* async completions
* output:
* handle to identify resource
* (0 = no resources)
*/
extern unsigned long gru_reserve_async_resources(int blade_id, int cbrs, int dsr_bytes,
struct completion *cmp);
/*
* Release async resources previously reserved.
*
* input:
* han - handle to identify resources
*/
extern void gru_release_async_resources(unsigned long han);
/*
* Wait for async GRU instructions to complete.
*
* input:
* han - handle to identify resources
*/
extern void gru_wait_async_cbr(unsigned long han);
/*
* Lock previous reserved async GRU resources
*
* input:
* han - handle to identify resources
* output:
* cb - pointer to first CBR
* dsr - pointer to first DSR
*/
extern void gru_lock_async_resource(unsigned long han, void **cb, void **dsr);
/*
* Unlock previous reserved async GRU resources
*
* input:
* han - handle to identify resources
*/
extern void gru_unlock_async_resource(unsigned long han);
#endif /* __GRU_KSERVICES_H_ */

View file

@ -0,0 +1,153 @@
/*
* Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation; either version 2.1 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef __GRULIB_H__
#define __GRULIB_H__
#define GRU_BASENAME "gru"
#define GRU_FULLNAME "/dev/gru"
#define GRU_IOCTL_NUM 'G'
/*
* Maximum number of GRU segments that a user can have open
* ZZZ temp - set high for testing. Revisit.
*/
#define GRU_MAX_OPEN_CONTEXTS 32
/* Set Number of Request Blocks */
#define GRU_CREATE_CONTEXT _IOWR(GRU_IOCTL_NUM, 1, void *)
/* Set Context Options */
#define GRU_SET_CONTEXT_OPTION _IOWR(GRU_IOCTL_NUM, 4, void *)
/* Fetch exception detail */
#define GRU_USER_GET_EXCEPTION_DETAIL _IOWR(GRU_IOCTL_NUM, 6, void *)
/* For user call_os handling - normally a TLB fault */
#define GRU_USER_CALL_OS _IOWR(GRU_IOCTL_NUM, 8, void *)
/* For user unload context */
#define GRU_USER_UNLOAD_CONTEXT _IOWR(GRU_IOCTL_NUM, 9, void *)
/* For dumpping GRU chiplet state */
#define GRU_DUMP_CHIPLET_STATE _IOWR(GRU_IOCTL_NUM, 11, void *)
/* For getting gseg statistics */
#define GRU_GET_GSEG_STATISTICS _IOWR(GRU_IOCTL_NUM, 12, void *)
/* For user TLB flushing (primarily for tests) */
#define GRU_USER_FLUSH_TLB _IOWR(GRU_IOCTL_NUM, 50, void *)
/* Get some config options (primarily for tests & emulator) */
#define GRU_GET_CONFIG_INFO _IOWR(GRU_IOCTL_NUM, 51, void *)
/* Various kernel self-tests */
#define GRU_KTEST _IOWR(GRU_IOCTL_NUM, 52, void *)
#define CONTEXT_WINDOW_BYTES(th) (GRU_GSEG_PAGESIZE * (th))
#define THREAD_POINTER(p, th) (p + GRU_GSEG_PAGESIZE * (th))
#define GSEG_START(cb) ((void *)((unsigned long)(cb) & ~(GRU_GSEG_PAGESIZE - 1)))
struct gru_get_gseg_statistics_req {
unsigned long gseg;
struct gru_gseg_statistics stats;
};
/*
* Structure used to pass TLB flush parameters to the driver
*/
struct gru_create_context_req {
unsigned long gseg;
unsigned int data_segment_bytes;
unsigned int control_blocks;
unsigned int maximum_thread_count;
unsigned int options;
unsigned char tlb_preload_count;
};
/*
* Structure used to pass unload context parameters to the driver
*/
struct gru_unload_context_req {
unsigned long gseg;
};
/*
* Structure used to set context options
*/
enum {sco_gseg_owner, sco_cch_req_slice, sco_blade_chiplet};
struct gru_set_context_option_req {
unsigned long gseg;
int op;
int val0;
long val1;
};
/*
* Structure used to pass TLB flush parameters to the driver
*/
struct gru_flush_tlb_req {
unsigned long gseg;
unsigned long vaddr;
size_t len;
};
/*
* Structure used to pass TLB flush parameters to the driver
*/
enum {dcs_pid, dcs_gid};
struct gru_dump_chiplet_state_req {
unsigned int op;
unsigned int gid;
int ctxnum;
char data_opt;
char lock_cch;
char flush_cbrs;
char fill[10];
pid_t pid;
void *buf;
size_t buflen;
/* ---- output --- */
unsigned int num_contexts;
};
#define GRU_DUMP_MAGIC 0x3474ab6c
struct gru_dump_context_header {
unsigned int magic;
unsigned int gid;
unsigned char ctxnum;
unsigned char cbrcnt;
unsigned char dsrcnt;
pid_t pid;
unsigned long vaddr;
int cch_locked;
unsigned long data[0];
};
/*
* GRU configuration info (temp - for testing)
*/
struct gru_config_info {
int cpus;
int blades;
int nodes;
int chiplets;
int fill[16];
};
#endif /* __GRULIB_H__ */

View file

@ -0,0 +1,973 @@
/*
* SN Platform GRU Driver
*
* DRIVER TABLE MANAGER + GRU CONTEXT LOAD/UNLOAD
*
* Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/spinlock.h>
#include <linux/sched.h>
#include <linux/device.h>
#include <linux/list.h>
#include <linux/err.h>
#include <linux/prefetch.h>
#include <asm/uv/uv_hub.h>
#include "gru.h"
#include "grutables.h"
#include "gruhandles.h"
unsigned long gru_options __read_mostly;
static struct device_driver gru_driver = {
.name = "gru"
};
static struct device gru_device = {
.init_name = "",
.driver = &gru_driver,
};
struct device *grudev = &gru_device;
/*
* Select a gru fault map to be used by the current cpu. Note that
* multiple cpus may be using the same map.
* ZZZ should be inline but did not work on emulator
*/
int gru_cpu_fault_map_id(void)
{
#ifdef CONFIG_IA64
return uv_blade_processor_id() % GRU_NUM_TFM;
#else
int cpu = smp_processor_id();
int id, core;
core = uv_cpu_core_number(cpu);
id = core + UV_MAX_INT_CORES * uv_cpu_socket_number(cpu);
return id;
#endif
}
/*--------- ASID Management -------------------------------------------
*
* Initially, assign asids sequentially from MIN_ASID .. MAX_ASID.
* Once MAX is reached, flush the TLB & start over. However,
* some asids may still be in use. There won't be many (percentage wise) still
* in use. Search active contexts & determine the value of the first
* asid in use ("x"s below). Set "limit" to this value.
* This defines a block of assignable asids.
*
* When "limit" is reached, search forward from limit+1 and determine the
* next block of assignable asids.
*
* Repeat until MAX_ASID is reached, then start over again.
*
* Each time MAX_ASID is reached, increment the asid generation. Since
* the search for in-use asids only checks contexts with GRUs currently
* assigned, asids in some contexts will be missed. Prior to loading
* a context, the asid generation of the GTS asid is rechecked. If it
* doesn't match the current generation, a new asid will be assigned.
*
* 0---------------x------------x---------------------x----|
* ^-next ^-limit ^-MAX_ASID
*
* All asid manipulation & context loading/unloading is protected by the
* gs_lock.
*/
/* Hit the asid limit. Start over */
static int gru_wrap_asid(struct gru_state *gru)
{
gru_dbg(grudev, "gid %d\n", gru->gs_gid);
STAT(asid_wrap);
gru->gs_asid_gen++;
return MIN_ASID;
}
/* Find the next chunk of unused asids */
static int gru_reset_asid_limit(struct gru_state *gru, int asid)
{
int i, gid, inuse_asid, limit;
gru_dbg(grudev, "gid %d, asid 0x%x\n", gru->gs_gid, asid);
STAT(asid_next);
limit = MAX_ASID;
if (asid >= limit)
asid = gru_wrap_asid(gru);
gru_flush_all_tlb(gru);
gid = gru->gs_gid;
again:
for (i = 0; i < GRU_NUM_CCH; i++) {
if (!gru->gs_gts[i] || is_kernel_context(gru->gs_gts[i]))
continue;
inuse_asid = gru->gs_gts[i]->ts_gms->ms_asids[gid].mt_asid;
gru_dbg(grudev, "gid %d, gts %p, gms %p, inuse 0x%x, cxt %d\n",
gru->gs_gid, gru->gs_gts[i], gru->gs_gts[i]->ts_gms,
inuse_asid, i);
if (inuse_asid == asid) {
asid += ASID_INC;
if (asid >= limit) {
/*
* empty range: reset the range limit and
* start over
*/
limit = MAX_ASID;
if (asid >= MAX_ASID)
asid = gru_wrap_asid(gru);
goto again;
}
}
if ((inuse_asid > asid) && (inuse_asid < limit))
limit = inuse_asid;
}
gru->gs_asid_limit = limit;
gru->gs_asid = asid;
gru_dbg(grudev, "gid %d, new asid 0x%x, new_limit 0x%x\n", gru->gs_gid,
asid, limit);
return asid;
}
/* Assign a new ASID to a thread context. */
static int gru_assign_asid(struct gru_state *gru)
{
int asid;
gru->gs_asid += ASID_INC;
asid = gru->gs_asid;
if (asid >= gru->gs_asid_limit)
asid = gru_reset_asid_limit(gru, asid);
gru_dbg(grudev, "gid %d, asid 0x%x\n", gru->gs_gid, asid);
return asid;
}
/*
* Clear n bits in a word. Return a word indicating the bits that were cleared.
* Optionally, build an array of chars that contain the bit numbers allocated.
*/
static unsigned long reserve_resources(unsigned long *p, int n, int mmax,
char *idx)
{
unsigned long bits = 0;
int i;
while (n--) {
i = find_first_bit(p, mmax);
if (i == mmax)
BUG();
__clear_bit(i, p);
__set_bit(i, &bits);
if (idx)
*idx++ = i;
}
return bits;
}
unsigned long gru_reserve_cb_resources(struct gru_state *gru, int cbr_au_count,
char *cbmap)
{
return reserve_resources(&gru->gs_cbr_map, cbr_au_count, GRU_CBR_AU,
cbmap);
}
unsigned long gru_reserve_ds_resources(struct gru_state *gru, int dsr_au_count,
char *dsmap)
{
return reserve_resources(&gru->gs_dsr_map, dsr_au_count, GRU_DSR_AU,
dsmap);
}
static void reserve_gru_resources(struct gru_state *gru,
struct gru_thread_state *gts)
{
gru->gs_active_contexts++;
gts->ts_cbr_map =
gru_reserve_cb_resources(gru, gts->ts_cbr_au_count,
gts->ts_cbr_idx);
gts->ts_dsr_map =
gru_reserve_ds_resources(gru, gts->ts_dsr_au_count, NULL);
}
static void free_gru_resources(struct gru_state *gru,
struct gru_thread_state *gts)
{
gru->gs_active_contexts--;
gru->gs_cbr_map |= gts->ts_cbr_map;
gru->gs_dsr_map |= gts->ts_dsr_map;
}
/*
* Check if a GRU has sufficient free resources to satisfy an allocation
* request. Note: GRU locks may or may not be held when this is called. If
* not held, recheck after acquiring the appropriate locks.
*
* Returns 1 if sufficient resources, 0 if not
*/
static int check_gru_resources(struct gru_state *gru, int cbr_au_count,
int dsr_au_count, int max_active_contexts)
{
return hweight64(gru->gs_cbr_map) >= cbr_au_count
&& hweight64(gru->gs_dsr_map) >= dsr_au_count
&& gru->gs_active_contexts < max_active_contexts;
}
/*
* TLB manangment requires tracking all GRU chiplets that have loaded a GSEG
* context.
*/
static int gru_load_mm_tracker(struct gru_state *gru,
struct gru_thread_state *gts)
{
struct gru_mm_struct *gms = gts->ts_gms;
struct gru_mm_tracker *asids = &gms->ms_asids[gru->gs_gid];
unsigned short ctxbitmap = (1 << gts->ts_ctxnum);
int asid;
spin_lock(&gms->ms_asid_lock);
asid = asids->mt_asid;
spin_lock(&gru->gs_asid_lock);
if (asid == 0 || (asids->mt_ctxbitmap == 0 && asids->mt_asid_gen !=
gru->gs_asid_gen)) {
asid = gru_assign_asid(gru);
asids->mt_asid = asid;
asids->mt_asid_gen = gru->gs_asid_gen;
STAT(asid_new);
} else {
STAT(asid_reuse);
}
spin_unlock(&gru->gs_asid_lock);
BUG_ON(asids->mt_ctxbitmap & ctxbitmap);
asids->mt_ctxbitmap |= ctxbitmap;
if (!test_bit(gru->gs_gid, gms->ms_asidmap))
__set_bit(gru->gs_gid, gms->ms_asidmap);
spin_unlock(&gms->ms_asid_lock);
gru_dbg(grudev,
"gid %d, gts %p, gms %p, ctxnum %d, asid 0x%x, asidmap 0x%lx\n",
gru->gs_gid, gts, gms, gts->ts_ctxnum, asid,
gms->ms_asidmap[0]);
return asid;
}
static void gru_unload_mm_tracker(struct gru_state *gru,
struct gru_thread_state *gts)
{
struct gru_mm_struct *gms = gts->ts_gms;
struct gru_mm_tracker *asids;
unsigned short ctxbitmap;
asids = &gms->ms_asids[gru->gs_gid];
ctxbitmap = (1 << gts->ts_ctxnum);
spin_lock(&gms->ms_asid_lock);
spin_lock(&gru->gs_asid_lock);
BUG_ON((asids->mt_ctxbitmap & ctxbitmap) != ctxbitmap);
asids->mt_ctxbitmap ^= ctxbitmap;
gru_dbg(grudev, "gid %d, gts %p, gms %p, ctxnum 0x%d, asidmap 0x%lx\n",
gru->gs_gid, gts, gms, gts->ts_ctxnum, gms->ms_asidmap[0]);
spin_unlock(&gru->gs_asid_lock);
spin_unlock(&gms->ms_asid_lock);
}
/*
* Decrement the reference count on a GTS structure. Free the structure
* if the reference count goes to zero.
*/
void gts_drop(struct gru_thread_state *gts)
{
if (gts && atomic_dec_return(&gts->ts_refcnt) == 0) {
if (gts->ts_gms)
gru_drop_mmu_notifier(gts->ts_gms);
kfree(gts);
STAT(gts_free);
}
}
/*
* Locate the GTS structure for the current thread.
*/
static struct gru_thread_state *gru_find_current_gts_nolock(struct gru_vma_data
*vdata, int tsid)
{
struct gru_thread_state *gts;
list_for_each_entry(gts, &vdata->vd_head, ts_next)
if (gts->ts_tsid == tsid)
return gts;
return NULL;
}
/*
* Allocate a thread state structure.
*/
struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma,
int cbr_au_count, int dsr_au_count,
unsigned char tlb_preload_count, int options, int tsid)
{
struct gru_thread_state *gts;
struct gru_mm_struct *gms;
int bytes;
bytes = DSR_BYTES(dsr_au_count) + CBR_BYTES(cbr_au_count);
bytes += sizeof(struct gru_thread_state);
gts = kmalloc(bytes, GFP_KERNEL);
if (!gts)
return ERR_PTR(-ENOMEM);
STAT(gts_alloc);
memset(gts, 0, sizeof(struct gru_thread_state)); /* zero out header */
atomic_set(&gts->ts_refcnt, 1);
mutex_init(&gts->ts_ctxlock);
gts->ts_cbr_au_count = cbr_au_count;
gts->ts_dsr_au_count = dsr_au_count;
gts->ts_tlb_preload_count = tlb_preload_count;
gts->ts_user_options = options;
gts->ts_user_blade_id = -1;
gts->ts_user_chiplet_id = -1;
gts->ts_tsid = tsid;
gts->ts_ctxnum = NULLCTX;
gts->ts_tlb_int_select = -1;
gts->ts_cch_req_slice = -1;
gts->ts_sizeavail = GRU_SIZEAVAIL(PAGE_SHIFT);
if (vma) {
gts->ts_mm = current->mm;
gts->ts_vma = vma;
gms = gru_register_mmu_notifier();
if (IS_ERR(gms))
goto err;
gts->ts_gms = gms;
}
gru_dbg(grudev, "alloc gts %p\n", gts);
return gts;
err:
gts_drop(gts);
return ERR_CAST(gms);
}
/*
* Allocate a vma private data structure.
*/
struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma, int tsid)
{
struct gru_vma_data *vdata = NULL;
vdata = kmalloc(sizeof(*vdata), GFP_KERNEL);
if (!vdata)
return NULL;
STAT(vdata_alloc);
INIT_LIST_HEAD(&vdata->vd_head);
spin_lock_init(&vdata->vd_lock);
gru_dbg(grudev, "alloc vdata %p\n", vdata);
return vdata;
}
/*
* Find the thread state structure for the current thread.
*/
struct gru_thread_state *gru_find_thread_state(struct vm_area_struct *vma,
int tsid)
{
struct gru_vma_data *vdata = vma->vm_private_data;
struct gru_thread_state *gts;
spin_lock(&vdata->vd_lock);
gts = gru_find_current_gts_nolock(vdata, tsid);
spin_unlock(&vdata->vd_lock);
gru_dbg(grudev, "vma %p, gts %p\n", vma, gts);
return gts;
}
/*
* Allocate a new thread state for a GSEG. Note that races may allow
* another thread to race to create a gts.
*/
struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct *vma,
int tsid)
{
struct gru_vma_data *vdata = vma->vm_private_data;
struct gru_thread_state *gts, *ngts;
gts = gru_alloc_gts(vma, vdata->vd_cbr_au_count,
vdata->vd_dsr_au_count,
vdata->vd_tlb_preload_count,
vdata->vd_user_options, tsid);
if (IS_ERR(gts))
return gts;
spin_lock(&vdata->vd_lock);
ngts = gru_find_current_gts_nolock(vdata, tsid);
if (ngts) {
gts_drop(gts);
gts = ngts;
STAT(gts_double_allocate);
} else {
list_add(&gts->ts_next, &vdata->vd_head);
}
spin_unlock(&vdata->vd_lock);
gru_dbg(grudev, "vma %p, gts %p\n", vma, gts);
return gts;
}
/*
* Free the GRU context assigned to the thread state.
*/
static void gru_free_gru_context(struct gru_thread_state *gts)
{
struct gru_state *gru;
gru = gts->ts_gru;
gru_dbg(grudev, "gts %p, gid %d\n", gts, gru->gs_gid);
spin_lock(&gru->gs_lock);
gru->gs_gts[gts->ts_ctxnum] = NULL;
free_gru_resources(gru, gts);
BUG_ON(test_bit(gts->ts_ctxnum, &gru->gs_context_map) == 0);
__clear_bit(gts->ts_ctxnum, &gru->gs_context_map);
gts->ts_ctxnum = NULLCTX;
gts->ts_gru = NULL;
gts->ts_blade = -1;
spin_unlock(&gru->gs_lock);
gts_drop(gts);
STAT(free_context);
}
/*
* Prefetching cachelines help hardware performance.
* (Strictly a performance enhancement. Not functionally required).
*/
static void prefetch_data(void *p, int num, int stride)
{
while (num-- > 0) {
prefetchw(p);
p += stride;
}
}
static inline long gru_copy_handle(void *d, void *s)
{
memcpy(d, s, GRU_HANDLE_BYTES);
return GRU_HANDLE_BYTES;
}
static void gru_prefetch_context(void *gseg, void *cb, void *cbe,
unsigned long cbrmap, unsigned long length)
{
int i, scr;
prefetch_data(gseg + GRU_DS_BASE, length / GRU_CACHE_LINE_BYTES,
GRU_CACHE_LINE_BYTES);
for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
prefetch_data(cb, 1, GRU_CACHE_LINE_BYTES);
prefetch_data(cbe + i * GRU_HANDLE_STRIDE, 1,
GRU_CACHE_LINE_BYTES);
cb += GRU_HANDLE_STRIDE;
}
}
static void gru_load_context_data(void *save, void *grubase, int ctxnum,
unsigned long cbrmap, unsigned long dsrmap,
int data_valid)
{
void *gseg, *cb, *cbe;
unsigned long length;
int i, scr;
gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
cb = gseg + GRU_CB_BASE;
cbe = grubase + GRU_CBE_BASE;
length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
gru_prefetch_context(gseg, cb, cbe, cbrmap, length);
for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
if (data_valid) {
save += gru_copy_handle(cb, save);
save += gru_copy_handle(cbe + i * GRU_HANDLE_STRIDE,
save);
} else {
memset(cb, 0, GRU_CACHE_LINE_BYTES);
memset(cbe + i * GRU_HANDLE_STRIDE, 0,
GRU_CACHE_LINE_BYTES);
}
/* Flush CBE to hide race in context restart */
mb();
gru_flush_cache(cbe + i * GRU_HANDLE_STRIDE);
cb += GRU_HANDLE_STRIDE;
}
if (data_valid)
memcpy(gseg + GRU_DS_BASE, save, length);
else
memset(gseg + GRU_DS_BASE, 0, length);
}
static void gru_unload_context_data(void *save, void *grubase, int ctxnum,
unsigned long cbrmap, unsigned long dsrmap)
{
void *gseg, *cb, *cbe;
unsigned long length;
int i, scr;
gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
cb = gseg + GRU_CB_BASE;
cbe = grubase + GRU_CBE_BASE;
length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
/* CBEs may not be coherent. Flush them from cache */
for_each_cbr_in_allocation_map(i, &cbrmap, scr)
gru_flush_cache(cbe + i * GRU_HANDLE_STRIDE);
mb(); /* Let the CL flush complete */
gru_prefetch_context(gseg, cb, cbe, cbrmap, length);
for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
save += gru_copy_handle(save, cb);
save += gru_copy_handle(save, cbe + i * GRU_HANDLE_STRIDE);
cb += GRU_HANDLE_STRIDE;
}
memcpy(save, gseg + GRU_DS_BASE, length);
}
void gru_unload_context(struct gru_thread_state *gts, int savestate)
{
struct gru_state *gru = gts->ts_gru;
struct gru_context_configuration_handle *cch;
int ctxnum = gts->ts_ctxnum;
if (!is_kernel_context(gts))
zap_vma_ptes(gts->ts_vma, UGRUADDR(gts), GRU_GSEG_PAGESIZE);
cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
gru_dbg(grudev, "gts %p, cbrmap 0x%lx, dsrmap 0x%lx\n",
gts, gts->ts_cbr_map, gts->ts_dsr_map);
lock_cch_handle(cch);
if (cch_interrupt_sync(cch))
BUG();
if (!is_kernel_context(gts))
gru_unload_mm_tracker(gru, gts);
if (savestate) {
gru_unload_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr,
ctxnum, gts->ts_cbr_map,
gts->ts_dsr_map);
gts->ts_data_valid = 1;
}
if (cch_deallocate(cch))
BUG();
unlock_cch_handle(cch);
gru_free_gru_context(gts);
}
/*
* Load a GRU context by copying it from the thread data structure in memory
* to the GRU.
*/
void gru_load_context(struct gru_thread_state *gts)
{
struct gru_state *gru = gts->ts_gru;
struct gru_context_configuration_handle *cch;
int i, err, asid, ctxnum = gts->ts_ctxnum;
cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
lock_cch_handle(cch);
cch->tfm_fault_bit_enable =
(gts->ts_user_options == GRU_OPT_MISS_FMM_POLL
|| gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
cch->tlb_int_enable = (gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
if (cch->tlb_int_enable) {
gts->ts_tlb_int_select = gru_cpu_fault_map_id();
cch->tlb_int_select = gts->ts_tlb_int_select;
}
if (gts->ts_cch_req_slice >= 0) {
cch->req_slice_set_enable = 1;
cch->req_slice = gts->ts_cch_req_slice;
} else {
cch->req_slice_set_enable =0;
}
cch->tfm_done_bit_enable = 0;
cch->dsr_allocation_map = gts->ts_dsr_map;
cch->cbr_allocation_map = gts->ts_cbr_map;
if (is_kernel_context(gts)) {
cch->unmap_enable = 1;
cch->tfm_done_bit_enable = 1;
cch->cb_int_enable = 1;
cch->tlb_int_select = 0; /* For now, ints go to cpu 0 */
} else {
cch->unmap_enable = 0;
cch->tfm_done_bit_enable = 0;
cch->cb_int_enable = 0;
asid = gru_load_mm_tracker(gru, gts);
for (i = 0; i < 8; i++) {
cch->asid[i] = asid + i;
cch->sizeavail[i] = gts->ts_sizeavail;
}
}
err = cch_allocate(cch);
if (err) {
gru_dbg(grudev,
"err %d: cch %p, gts %p, cbr 0x%lx, dsr 0x%lx\n",
err, cch, gts, gts->ts_cbr_map, gts->ts_dsr_map);
BUG();
}
gru_load_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr, ctxnum,
gts->ts_cbr_map, gts->ts_dsr_map, gts->ts_data_valid);
if (cch_start(cch))
BUG();
unlock_cch_handle(cch);
gru_dbg(grudev, "gid %d, gts %p, cbrmap 0x%lx, dsrmap 0x%lx, tie %d, tis %d\n",
gts->ts_gru->gs_gid, gts, gts->ts_cbr_map, gts->ts_dsr_map,
(gts->ts_user_options == GRU_OPT_MISS_FMM_INTR), gts->ts_tlb_int_select);
}
/*
* Update fields in an active CCH:
* - retarget interrupts on local blade
* - update sizeavail mask
*/
int gru_update_cch(struct gru_thread_state *gts)
{
struct gru_context_configuration_handle *cch;
struct gru_state *gru = gts->ts_gru;
int i, ctxnum = gts->ts_ctxnum, ret = 0;
cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
lock_cch_handle(cch);
if (cch->state == CCHSTATE_ACTIVE) {
if (gru->gs_gts[gts->ts_ctxnum] != gts)
goto exit;
if (cch_interrupt(cch))
BUG();
for (i = 0; i < 8; i++)
cch->sizeavail[i] = gts->ts_sizeavail;
gts->ts_tlb_int_select = gru_cpu_fault_map_id();
cch->tlb_int_select = gru_cpu_fault_map_id();
cch->tfm_fault_bit_enable =
(gts->ts_user_options == GRU_OPT_MISS_FMM_POLL
|| gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
if (cch_start(cch))
BUG();
ret = 1;
}
exit:
unlock_cch_handle(cch);
return ret;
}
/*
* Update CCH tlb interrupt select. Required when all the following is true:
* - task's GRU context is loaded into a GRU
* - task is using interrupt notification for TLB faults
* - task has migrated to a different cpu on the same blade where
* it was previously running.
*/
static int gru_retarget_intr(struct gru_thread_state *gts)
{
if (gts->ts_tlb_int_select < 0
|| gts->ts_tlb_int_select == gru_cpu_fault_map_id())
return 0;
gru_dbg(grudev, "retarget from %d to %d\n", gts->ts_tlb_int_select,
gru_cpu_fault_map_id());
return gru_update_cch(gts);
}
/*
* Check if a GRU context is allowed to use a specific chiplet. By default
* a context is assigned to any blade-local chiplet. However, users can
* override this.
* Returns 1 if assignment allowed, 0 otherwise
*/
static int gru_check_chiplet_assignment(struct gru_state *gru,
struct gru_thread_state *gts)
{
int blade_id;
int chiplet_id;
blade_id = gts->ts_user_blade_id;
if (blade_id < 0)
blade_id = uv_numa_blade_id();
chiplet_id = gts->ts_user_chiplet_id;
return gru->gs_blade_id == blade_id &&
(chiplet_id < 0 || chiplet_id == gru->gs_chiplet_id);
}
/*
* Unload the gru context if it is not assigned to the correct blade or
* chiplet. Misassignment can occur if the process migrates to a different
* blade or if the user changes the selected blade/chiplet.
*/
void gru_check_context_placement(struct gru_thread_state *gts)
{
struct gru_state *gru;
/*
* If the current task is the context owner, verify that the
* context is correctly placed. This test is skipped for non-owner
* references. Pthread apps use non-owner references to the CBRs.
*/
gru = gts->ts_gru;
if (!gru || gts->ts_tgid_owner != current->tgid)
return;
if (!gru_check_chiplet_assignment(gru, gts)) {
STAT(check_context_unload);
gru_unload_context(gts, 1);
} else if (gru_retarget_intr(gts)) {
STAT(check_context_retarget_intr);
}
}
/*
* Insufficient GRU resources available on the local blade. Steal a context from
* a process. This is a hack until a _real_ resource scheduler is written....
*/
#define next_ctxnum(n) ((n) < GRU_NUM_CCH - 2 ? (n) + 1 : 0)
#define next_gru(b, g) (((g) < &(b)->bs_grus[GRU_CHIPLETS_PER_BLADE - 1]) ? \
((g)+1) : &(b)->bs_grus[0])
static int is_gts_stealable(struct gru_thread_state *gts,
struct gru_blade_state *bs)
{
if (is_kernel_context(gts))
return down_write_trylock(&bs->bs_kgts_sema);
else
return mutex_trylock(&gts->ts_ctxlock);
}
static void gts_stolen(struct gru_thread_state *gts,
struct gru_blade_state *bs)
{
if (is_kernel_context(gts)) {
up_write(&bs->bs_kgts_sema);
STAT(steal_kernel_context);
} else {
mutex_unlock(&gts->ts_ctxlock);
STAT(steal_user_context);
}
}
void gru_steal_context(struct gru_thread_state *gts)
{
struct gru_blade_state *blade;
struct gru_state *gru, *gru0;
struct gru_thread_state *ngts = NULL;
int ctxnum, ctxnum0, flag = 0, cbr, dsr;
int blade_id;
blade_id = gts->ts_user_blade_id;
if (blade_id < 0)
blade_id = uv_numa_blade_id();
cbr = gts->ts_cbr_au_count;
dsr = gts->ts_dsr_au_count;
blade = gru_base[blade_id];
spin_lock(&blade->bs_lock);
ctxnum = next_ctxnum(blade->bs_lru_ctxnum);
gru = blade->bs_lru_gru;
if (ctxnum == 0)
gru = next_gru(blade, gru);
blade->bs_lru_gru = gru;
blade->bs_lru_ctxnum = ctxnum;
ctxnum0 = ctxnum;
gru0 = gru;
while (1) {
if (gru_check_chiplet_assignment(gru, gts)) {
if (check_gru_resources(gru, cbr, dsr, GRU_NUM_CCH))
break;
spin_lock(&gru->gs_lock);
for (; ctxnum < GRU_NUM_CCH; ctxnum++) {
if (flag && gru == gru0 && ctxnum == ctxnum0)
break;
ngts = gru->gs_gts[ctxnum];
/*
* We are grabbing locks out of order, so trylock is
* needed. GTSs are usually not locked, so the odds of
* success are high. If trylock fails, try to steal a
* different GSEG.
*/
if (ngts && is_gts_stealable(ngts, blade))
break;
ngts = NULL;
}
spin_unlock(&gru->gs_lock);
if (ngts || (flag && gru == gru0 && ctxnum == ctxnum0))
break;
}
if (flag && gru == gru0)
break;
flag = 1;
ctxnum = 0;
gru = next_gru(blade, gru);
}
spin_unlock(&blade->bs_lock);
if (ngts) {
gts->ustats.context_stolen++;
ngts->ts_steal_jiffies = jiffies;
gru_unload_context(ngts, is_kernel_context(ngts) ? 0 : 1);
gts_stolen(ngts, blade);
} else {
STAT(steal_context_failed);
}
gru_dbg(grudev,
"stole gid %d, ctxnum %d from gts %p. Need cb %d, ds %d;"
" avail cb %ld, ds %ld\n",
gru->gs_gid, ctxnum, ngts, cbr, dsr, hweight64(gru->gs_cbr_map),
hweight64(gru->gs_dsr_map));
}
/*
* Assign a gru context.
*/
static int gru_assign_context_number(struct gru_state *gru)
{
int ctxnum;
ctxnum = find_first_zero_bit(&gru->gs_context_map, GRU_NUM_CCH);
__set_bit(ctxnum, &gru->gs_context_map);
return ctxnum;
}
/*
* Scan the GRUs on the local blade & assign a GRU context.
*/
struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts)
{
struct gru_state *gru, *grux;
int i, max_active_contexts;
int blade_id = gts->ts_user_blade_id;
if (blade_id < 0)
blade_id = uv_numa_blade_id();
again:
gru = NULL;
max_active_contexts = GRU_NUM_CCH;
for_each_gru_on_blade(grux, blade_id, i) {
if (!gru_check_chiplet_assignment(grux, gts))
continue;
if (check_gru_resources(grux, gts->ts_cbr_au_count,
gts->ts_dsr_au_count,
max_active_contexts)) {
gru = grux;
max_active_contexts = grux->gs_active_contexts;
if (max_active_contexts == 0)
break;
}
}
if (gru) {
spin_lock(&gru->gs_lock);
if (!check_gru_resources(gru, gts->ts_cbr_au_count,
gts->ts_dsr_au_count, GRU_NUM_CCH)) {
spin_unlock(&gru->gs_lock);
goto again;
}
reserve_gru_resources(gru, gts);
gts->ts_gru = gru;
gts->ts_blade = gru->gs_blade_id;
gts->ts_ctxnum = gru_assign_context_number(gru);
atomic_inc(&gts->ts_refcnt);
gru->gs_gts[gts->ts_ctxnum] = gts;
spin_unlock(&gru->gs_lock);
STAT(assign_context);
gru_dbg(grudev,
"gseg %p, gts %p, gid %d, ctx %d, cbr %d, dsr %d\n",
gseg_virtual_address(gts->ts_gru, gts->ts_ctxnum), gts,
gts->ts_gru->gs_gid, gts->ts_ctxnum,
gts->ts_cbr_au_count, gts->ts_dsr_au_count);
} else {
gru_dbg(grudev, "failed to allocate a GTS %s\n", "");
STAT(assign_context_failed);
}
return gru;
}
/*
* gru_nopage
*
* Map the user's GRU segment
*
* Note: gru segments alway mmaped on GRU_GSEG_PAGESIZE boundaries.
*/
int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct gru_thread_state *gts;
unsigned long paddr, vaddr;
vaddr = (unsigned long)vmf->virtual_address;
gru_dbg(grudev, "vma %p, vaddr 0x%lx (0x%lx)\n",
vma, vaddr, GSEG_BASE(vaddr));
STAT(nopfn);
/* The following check ensures vaddr is a valid address in the VMA */
gts = gru_find_thread_state(vma, TSID(vaddr, vma));
if (!gts)
return VM_FAULT_SIGBUS;
again:
mutex_lock(&gts->ts_ctxlock);
preempt_disable();
gru_check_context_placement(gts);
if (!gts->ts_gru) {
STAT(load_user_context);
if (!gru_assign_gru_context(gts)) {
preempt_enable();
mutex_unlock(&gts->ts_ctxlock);
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(GRU_ASSIGN_DELAY); /* true hack ZZZ */
if (gts->ts_steal_jiffies + GRU_STEAL_DELAY < jiffies)
gru_steal_context(gts);
goto again;
}
gru_load_context(gts);
paddr = gseg_physical_address(gts->ts_gru, gts->ts_ctxnum);
remap_pfn_range(vma, vaddr & ~(GRU_GSEG_PAGESIZE - 1),
paddr >> PAGE_SHIFT, GRU_GSEG_PAGESIZE,
vma->vm_page_prot);
}
preempt_enable();
mutex_unlock(&gts->ts_ctxlock);
return VM_FAULT_NOPAGE;
}

View file

@ -0,0 +1,377 @@
/*
* SN Platform GRU Driver
*
* PROC INTERFACES
*
* This file supports the /proc interfaces for the GRU driver
*
* Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/proc_fs.h>
#include <linux/device.h>
#include <linux/seq_file.h>
#include <linux/uaccess.h>
#include "gru.h"
#include "grulib.h"
#include "grutables.h"
#define printstat(s, f) printstat_val(s, &gru_stats.f, #f)
static void printstat_val(struct seq_file *s, atomic_long_t *v, char *id)
{
unsigned long val = atomic_long_read(v);
seq_printf(s, "%16lu %s\n", val, id);
}
static int statistics_show(struct seq_file *s, void *p)
{
printstat(s, vdata_alloc);
printstat(s, vdata_free);
printstat(s, gts_alloc);
printstat(s, gts_free);
printstat(s, gms_alloc);
printstat(s, gms_free);
printstat(s, gts_double_allocate);
printstat(s, assign_context);
printstat(s, assign_context_failed);
printstat(s, free_context);
printstat(s, load_user_context);
printstat(s, load_kernel_context);
printstat(s, lock_kernel_context);
printstat(s, unlock_kernel_context);
printstat(s, steal_user_context);
printstat(s, steal_kernel_context);
printstat(s, steal_context_failed);
printstat(s, nopfn);
printstat(s, asid_new);
printstat(s, asid_next);
printstat(s, asid_wrap);
printstat(s, asid_reuse);
printstat(s, intr);
printstat(s, intr_cbr);
printstat(s, intr_tfh);
printstat(s, intr_spurious);
printstat(s, intr_mm_lock_failed);
printstat(s, call_os);
printstat(s, call_os_wait_queue);
printstat(s, user_flush_tlb);
printstat(s, user_unload_context);
printstat(s, user_exception);
printstat(s, set_context_option);
printstat(s, check_context_retarget_intr);
printstat(s, check_context_unload);
printstat(s, tlb_dropin);
printstat(s, tlb_preload_page);
printstat(s, tlb_dropin_fail_no_asid);
printstat(s, tlb_dropin_fail_upm);
printstat(s, tlb_dropin_fail_invalid);
printstat(s, tlb_dropin_fail_range_active);
printstat(s, tlb_dropin_fail_idle);
printstat(s, tlb_dropin_fail_fmm);
printstat(s, tlb_dropin_fail_no_exception);
printstat(s, tfh_stale_on_fault);
printstat(s, mmu_invalidate_range);
printstat(s, mmu_invalidate_page);
printstat(s, flush_tlb);
printstat(s, flush_tlb_gru);
printstat(s, flush_tlb_gru_tgh);
printstat(s, flush_tlb_gru_zero_asid);
printstat(s, copy_gpa);
printstat(s, read_gpa);
printstat(s, mesq_receive);
printstat(s, mesq_receive_none);
printstat(s, mesq_send);
printstat(s, mesq_send_failed);
printstat(s, mesq_noop);
printstat(s, mesq_send_unexpected_error);
printstat(s, mesq_send_lb_overflow);
printstat(s, mesq_send_qlimit_reached);
printstat(s, mesq_send_amo_nacked);
printstat(s, mesq_send_put_nacked);
printstat(s, mesq_qf_locked);
printstat(s, mesq_qf_noop_not_full);
printstat(s, mesq_qf_switch_head_failed);
printstat(s, mesq_qf_unexpected_error);
printstat(s, mesq_noop_unexpected_error);
printstat(s, mesq_noop_lb_overflow);
printstat(s, mesq_noop_qlimit_reached);
printstat(s, mesq_noop_amo_nacked);
printstat(s, mesq_noop_put_nacked);
printstat(s, mesq_noop_page_overflow);
return 0;
}
static ssize_t statistics_write(struct file *file, const char __user *userbuf,
size_t count, loff_t *data)
{
memset(&gru_stats, 0, sizeof(gru_stats));
return count;
}
static int mcs_statistics_show(struct seq_file *s, void *p)
{
int op;
unsigned long total, count, max;
static char *id[] = {"cch_allocate", "cch_start", "cch_interrupt",
"cch_interrupt_sync", "cch_deallocate", "tfh_write_only",
"tfh_write_restart", "tgh_invalidate"};
seq_printf(s, "%-20s%12s%12s%12s\n", "#id", "count", "aver-clks", "max-clks");
for (op = 0; op < mcsop_last; op++) {
count = atomic_long_read(&mcs_op_statistics[op].count);
total = atomic_long_read(&mcs_op_statistics[op].total);
max = mcs_op_statistics[op].max;
seq_printf(s, "%-20s%12ld%12ld%12ld\n", id[op], count,
count ? total / count : 0, max);
}
return 0;
}
static ssize_t mcs_statistics_write(struct file *file,
const char __user *userbuf, size_t count, loff_t *data)
{
memset(mcs_op_statistics, 0, sizeof(mcs_op_statistics));
return count;
}
static int options_show(struct seq_file *s, void *p)
{
seq_printf(s, "#bitmask: 1=trace, 2=statistics\n");
seq_printf(s, "0x%lx\n", gru_options);
return 0;
}
static ssize_t options_write(struct file *file, const char __user *userbuf,
size_t count, loff_t *data)
{
int ret;
ret = kstrtoul_from_user(userbuf, count, 0, &gru_options);
if (ret)
return ret;
return count;
}
static int cch_seq_show(struct seq_file *file, void *data)
{
long gid = *(long *)data;
int i;
struct gru_state *gru = GID_TO_GRU(gid);
struct gru_thread_state *ts;
const char *mode[] = { "??", "UPM", "INTR", "OS_POLL" };
if (gid == 0)
seq_printf(file, "#%5s%5s%6s%7s%9s%6s%8s%8s\n", "gid", "bid",
"ctx#", "asid", "pid", "cbrs", "dsbytes", "mode");
if (gru)
for (i = 0; i < GRU_NUM_CCH; i++) {
ts = gru->gs_gts[i];
if (!ts)
continue;
seq_printf(file, " %5d%5d%6d%7d%9d%6d%8d%8s\n",
gru->gs_gid, gru->gs_blade_id, i,
is_kernel_context(ts) ? 0 : ts->ts_gms->ms_asids[gid].mt_asid,
is_kernel_context(ts) ? 0 : ts->ts_tgid_owner,
ts->ts_cbr_au_count * GRU_CBR_AU_SIZE,
ts->ts_cbr_au_count * GRU_DSR_AU_BYTES,
mode[ts->ts_user_options &
GRU_OPT_MISS_MASK]);
}
return 0;
}
static int gru_seq_show(struct seq_file *file, void *data)
{
long gid = *(long *)data, ctxfree, cbrfree, dsrfree;
struct gru_state *gru = GID_TO_GRU(gid);
if (gid == 0) {
seq_printf(file, "#%5s%5s%7s%6s%6s%8s%6s%6s\n", "gid", "nid",
"ctx", "cbr", "dsr", "ctx", "cbr", "dsr");
seq_printf(file, "#%5s%5s%7s%6s%6s%8s%6s%6s\n", "", "", "busy",
"busy", "busy", "free", "free", "free");
}
if (gru) {
ctxfree = GRU_NUM_CCH - gru->gs_active_contexts;
cbrfree = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE;
dsrfree = hweight64(gru->gs_dsr_map) * GRU_DSR_AU_BYTES;
seq_printf(file, " %5d%5d%7ld%6ld%6ld%8ld%6ld%6ld\n",
gru->gs_gid, gru->gs_blade_id, GRU_NUM_CCH - ctxfree,
GRU_NUM_CBE - cbrfree, GRU_NUM_DSR_BYTES - dsrfree,
ctxfree, cbrfree, dsrfree);
}
return 0;
}
static void seq_stop(struct seq_file *file, void *data)
{
}
static void *seq_start(struct seq_file *file, loff_t *gid)
{
if (*gid < gru_max_gids)
return gid;
return NULL;
}
static void *seq_next(struct seq_file *file, void *data, loff_t *gid)
{
(*gid)++;
if (*gid < gru_max_gids)
return gid;
return NULL;
}
static const struct seq_operations cch_seq_ops = {
.start = seq_start,
.next = seq_next,
.stop = seq_stop,
.show = cch_seq_show
};
static const struct seq_operations gru_seq_ops = {
.start = seq_start,
.next = seq_next,
.stop = seq_stop,
.show = gru_seq_show
};
static int statistics_open(struct inode *inode, struct file *file)
{
return single_open(file, statistics_show, NULL);
}
static int mcs_statistics_open(struct inode *inode, struct file *file)
{
return single_open(file, mcs_statistics_show, NULL);
}
static int options_open(struct inode *inode, struct file *file)
{
return single_open(file, options_show, NULL);
}
static int cch_open(struct inode *inode, struct file *file)
{
return seq_open(file, &cch_seq_ops);
}
static int gru_open(struct inode *inode, struct file *file)
{
return seq_open(file, &gru_seq_ops);
}
/* *INDENT-OFF* */
static const struct file_operations statistics_fops = {
.open = statistics_open,
.read = seq_read,
.write = statistics_write,
.llseek = seq_lseek,
.release = single_release,
};
static const struct file_operations mcs_statistics_fops = {
.open = mcs_statistics_open,
.read = seq_read,
.write = mcs_statistics_write,
.llseek = seq_lseek,
.release = single_release,
};
static const struct file_operations options_fops = {
.open = options_open,
.read = seq_read,
.write = options_write,
.llseek = seq_lseek,
.release = single_release,
};
static const struct file_operations cch_fops = {
.open = cch_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
};
static const struct file_operations gru_fops = {
.open = gru_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
};
static struct proc_entry {
char *name;
umode_t mode;
const struct file_operations *fops;
struct proc_dir_entry *entry;
} proc_files[] = {
{"statistics", 0644, &statistics_fops},
{"mcs_statistics", 0644, &mcs_statistics_fops},
{"debug_options", 0644, &options_fops},
{"cch_status", 0444, &cch_fops},
{"gru_status", 0444, &gru_fops},
{NULL}
};
/* *INDENT-ON* */
static struct proc_dir_entry *proc_gru __read_mostly;
static int create_proc_file(struct proc_entry *p)
{
p->entry = proc_create(p->name, p->mode, proc_gru, p->fops);
if (!p->entry)
return -1;
return 0;
}
static void delete_proc_files(void)
{
struct proc_entry *p;
if (proc_gru) {
for (p = proc_files; p->name; p++)
if (p->entry)
remove_proc_entry(p->name, proc_gru);
proc_remove(proc_gru);
}
}
int gru_proc_init(void)
{
struct proc_entry *p;
proc_gru = proc_mkdir("sgi_uv/gru", NULL);
for (p = proc_files; p->name; p++)
if (create_proc_file(p))
goto err;
return 0;
err:
delete_proc_files();
return -1;
}
void gru_proc_exit(void)
{
delete_proc_files();
}

View file

@ -0,0 +1,678 @@
/*
* SN Platform GRU Driver
*
* GRU DRIVER TABLES, MACROS, externs, etc
*
* Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef __GRUTABLES_H__
#define __GRUTABLES_H__
/*
* GRU Chiplet:
* The GRU is a user addressible memory accelerator. It provides
* several forms of load, store, memset, bcopy instructions. In addition, it
* contains special instructions for AMOs, sending messages to message
* queues, etc.
*
* The GRU is an integral part of the node controller. It connects
* directly to the cpu socket. In its current implementation, there are 2
* GRU chiplets in the node controller on each blade (~node).
*
* The entire GRU memory space is fully coherent and cacheable by the cpus.
*
* Each GRU chiplet has a physical memory map that looks like the following:
*
* +-----------------+
* |/////////////////|
* |/////////////////|
* |/////////////////|
* |/////////////////|
* |/////////////////|
* |/////////////////|
* |/////////////////|
* |/////////////////|
* +-----------------+
* | system control |
* +-----------------+ _______ +-------------+
* |/////////////////| / | |
* |/////////////////| / | |
* |/////////////////| / | instructions|
* |/////////////////| / | |
* |/////////////////| / | |
* |/////////////////| / |-------------|
* |/////////////////| / | |
* +-----------------+ | |
* | context 15 | | data |
* +-----------------+ | |
* | ...... | \ | |
* +-----------------+ \____________ +-------------+
* | context 1 |
* +-----------------+
* | context 0 |
* +-----------------+
*
* Each of the "contexts" is a chunk of memory that can be mmaped into user
* space. The context consists of 2 parts:
*
* - an instruction space that can be directly accessed by the user
* to issue GRU instructions and to check instruction status.
*
* - a data area that acts as normal RAM.
*
* User instructions contain virtual addresses of data to be accessed by the
* GRU. The GRU contains a TLB that is used to convert these user virtual
* addresses to physical addresses.
*
* The "system control" area of the GRU chiplet is used by the kernel driver
* to manage user contexts and to perform functions such as TLB dropin and
* purging.
*
* One context may be reserved for the kernel and used for cross-partition
* communication. The GRU will also be used to asynchronously zero out
* large blocks of memory (not currently implemented).
*
*
* Tables:
*
* VDATA-VMA Data - Holds a few parameters. Head of linked list of
* GTS tables for threads using the GSEG
* GTS - Gru Thread State - contains info for managing a GSEG context. A
* GTS is allocated for each thread accessing a
* GSEG.
* GTD - GRU Thread Data - contains shadow copy of GRU data when GSEG is
* not loaded into a GRU
* GMS - GRU Memory Struct - Used to manage TLB shootdowns. Tracks GRUs
* where a GSEG has been loaded. Similar to
* an mm_struct but for GRU.
*
* GS - GRU State - Used to manage the state of a GRU chiplet
* BS - Blade State - Used to manage state of all GRU chiplets
* on a blade
*
*
* Normal task tables for task using GRU.
* - 2 threads in process
* - 2 GSEGs open in process
* - GSEG1 is being used by both threads
* - GSEG2 is used only by thread 2
*
* task -->|
* task ---+---> mm ->------ (notifier) -------+-> gms
* | |
* |--> vma -> vdata ---> gts--->| GSEG1 (thread1)
* | | |
* | +-> gts--->| GSEG1 (thread2)
* | |
* |--> vma -> vdata ---> gts--->| GSEG2 (thread2)
* .
* .
*
* GSEGs are marked DONTCOPY on fork
*
* At open
* file.private_data -> NULL
*
* At mmap,
* vma -> vdata
*
* After gseg reference
* vma -> vdata ->gts
*
* After fork
* parent
* vma -> vdata -> gts
* child
* (vma is not copied)
*
*/
#include <linux/rmap.h>
#include <linux/interrupt.h>
#include <linux/mutex.h>
#include <linux/wait.h>
#include <linux/mmu_notifier.h>
#include "gru.h"
#include "grulib.h"
#include "gruhandles.h"
extern struct gru_stats_s gru_stats;
extern struct gru_blade_state *gru_base[];
extern unsigned long gru_start_paddr, gru_end_paddr;
extern void *gru_start_vaddr;
extern unsigned int gru_max_gids;
#define GRU_MAX_BLADES MAX_NUMNODES
#define GRU_MAX_GRUS (GRU_MAX_BLADES * GRU_CHIPLETS_PER_BLADE)
#define GRU_DRIVER_ID_STR "SGI GRU Device Driver"
#define GRU_DRIVER_VERSION_STR "0.85"
/*
* GRU statistics.
*/
struct gru_stats_s {
atomic_long_t vdata_alloc;
atomic_long_t vdata_free;
atomic_long_t gts_alloc;
atomic_long_t gts_free;
atomic_long_t gms_alloc;
atomic_long_t gms_free;
atomic_long_t gts_double_allocate;
atomic_long_t assign_context;
atomic_long_t assign_context_failed;
atomic_long_t free_context;
atomic_long_t load_user_context;
atomic_long_t load_kernel_context;
atomic_long_t lock_kernel_context;
atomic_long_t unlock_kernel_context;
atomic_long_t steal_user_context;
atomic_long_t steal_kernel_context;
atomic_long_t steal_context_failed;
atomic_long_t nopfn;
atomic_long_t asid_new;
atomic_long_t asid_next;
atomic_long_t asid_wrap;
atomic_long_t asid_reuse;
atomic_long_t intr;
atomic_long_t intr_cbr;
atomic_long_t intr_tfh;
atomic_long_t intr_spurious;
atomic_long_t intr_mm_lock_failed;
atomic_long_t call_os;
atomic_long_t call_os_wait_queue;
atomic_long_t user_flush_tlb;
atomic_long_t user_unload_context;
atomic_long_t user_exception;
atomic_long_t set_context_option;
atomic_long_t check_context_retarget_intr;
atomic_long_t check_context_unload;
atomic_long_t tlb_dropin;
atomic_long_t tlb_preload_page;
atomic_long_t tlb_dropin_fail_no_asid;
atomic_long_t tlb_dropin_fail_upm;
atomic_long_t tlb_dropin_fail_invalid;
atomic_long_t tlb_dropin_fail_range_active;
atomic_long_t tlb_dropin_fail_idle;
atomic_long_t tlb_dropin_fail_fmm;
atomic_long_t tlb_dropin_fail_no_exception;
atomic_long_t tfh_stale_on_fault;
atomic_long_t mmu_invalidate_range;
atomic_long_t mmu_invalidate_page;
atomic_long_t flush_tlb;
atomic_long_t flush_tlb_gru;
atomic_long_t flush_tlb_gru_tgh;
atomic_long_t flush_tlb_gru_zero_asid;
atomic_long_t copy_gpa;
atomic_long_t read_gpa;
atomic_long_t mesq_receive;
atomic_long_t mesq_receive_none;
atomic_long_t mesq_send;
atomic_long_t mesq_send_failed;
atomic_long_t mesq_noop;
atomic_long_t mesq_send_unexpected_error;
atomic_long_t mesq_send_lb_overflow;
atomic_long_t mesq_send_qlimit_reached;
atomic_long_t mesq_send_amo_nacked;
atomic_long_t mesq_send_put_nacked;
atomic_long_t mesq_page_overflow;
atomic_long_t mesq_qf_locked;
atomic_long_t mesq_qf_noop_not_full;
atomic_long_t mesq_qf_switch_head_failed;
atomic_long_t mesq_qf_unexpected_error;
atomic_long_t mesq_noop_unexpected_error;
atomic_long_t mesq_noop_lb_overflow;
atomic_long_t mesq_noop_qlimit_reached;
atomic_long_t mesq_noop_amo_nacked;
atomic_long_t mesq_noop_put_nacked;
atomic_long_t mesq_noop_page_overflow;
};
enum mcs_op {cchop_allocate, cchop_start, cchop_interrupt, cchop_interrupt_sync,
cchop_deallocate, tfhop_write_only, tfhop_write_restart,
tghop_invalidate, mcsop_last};
struct mcs_op_statistic {
atomic_long_t count;
atomic_long_t total;
unsigned long max;
};
extern struct mcs_op_statistic mcs_op_statistics[mcsop_last];
#define OPT_DPRINT 1
#define OPT_STATS 2
#define IRQ_GRU 110 /* Starting IRQ number for interrupts */
/* Delay in jiffies between attempts to assign a GRU context */
#define GRU_ASSIGN_DELAY ((HZ * 20) / 1000)
/*
* If a process has it's context stolen, min delay in jiffies before trying to
* steal a context from another process.
*/
#define GRU_STEAL_DELAY ((HZ * 200) / 1000)
#define STAT(id) do { \
if (gru_options & OPT_STATS) \
atomic_long_inc(&gru_stats.id); \
} while (0)
#ifdef CONFIG_SGI_GRU_DEBUG
#define gru_dbg(dev, fmt, x...) \
do { \
if (gru_options & OPT_DPRINT) \
printk(KERN_DEBUG "GRU:%d %s: " fmt, smp_processor_id(), __func__, x);\
} while (0)
#else
#define gru_dbg(x...)
#endif
/*-----------------------------------------------------------------------------
* ASID management
*/
#define MAX_ASID 0xfffff0
#define MIN_ASID 8
#define ASID_INC 8 /* number of regions */
/* Generate a GRU asid value from a GRU base asid & a virtual address. */
#define VADDR_HI_BIT 64
#define GRUREGION(addr) ((addr) >> (VADDR_HI_BIT - 3) & 3)
#define GRUASID(asid, addr) ((asid) + GRUREGION(addr))
/*------------------------------------------------------------------------------
* File & VMS Tables
*/
struct gru_state;
/*
* This structure is pointed to from the mmstruct via the notifier pointer.
* There is one of these per address space.
*/
struct gru_mm_tracker { /* pack to reduce size */
unsigned int mt_asid_gen:24; /* ASID wrap count */
unsigned int mt_asid:24; /* current base ASID for gru */
unsigned short mt_ctxbitmap:16;/* bitmap of contexts using
asid */
} __attribute__ ((packed));
struct gru_mm_struct {
struct mmu_notifier ms_notifier;
atomic_t ms_refcnt;
spinlock_t ms_asid_lock; /* protects ASID assignment */
atomic_t ms_range_active;/* num range_invals active */
char ms_released;
wait_queue_head_t ms_wait_queue;
DECLARE_BITMAP(ms_asidmap, GRU_MAX_GRUS);
struct gru_mm_tracker ms_asids[GRU_MAX_GRUS];
};
/*
* One of these structures is allocated when a GSEG is mmaped. The
* structure is pointed to by the vma->vm_private_data field in the vma struct.
*/
struct gru_vma_data {
spinlock_t vd_lock; /* Serialize access to vma */
struct list_head vd_head; /* head of linked list of gts */
long vd_user_options;/* misc user option flags */
int vd_cbr_au_count;
int vd_dsr_au_count;
unsigned char vd_tlb_preload_count;
};
/*
* One of these is allocated for each thread accessing a mmaped GRU. A linked
* list of these structure is hung off the struct gru_vma_data in the mm_struct.
*/
struct gru_thread_state {
struct list_head ts_next; /* list - head at vma-private */
struct mutex ts_ctxlock; /* load/unload CTX lock */
struct mm_struct *ts_mm; /* mm currently mapped to
context */
struct vm_area_struct *ts_vma; /* vma of GRU context */
struct gru_state *ts_gru; /* GRU where the context is
loaded */
struct gru_mm_struct *ts_gms; /* asid & ioproc struct */
unsigned char ts_tlb_preload_count; /* TLB preload pages */
unsigned long ts_cbr_map; /* map of allocated CBRs */
unsigned long ts_dsr_map; /* map of allocated DATA
resources */
unsigned long ts_steal_jiffies;/* jiffies when context last
stolen */
long ts_user_options;/* misc user option flags */
pid_t ts_tgid_owner; /* task that is using the
context - for migration */
short ts_user_blade_id;/* user selected blade */
char ts_user_chiplet_id;/* user selected chiplet */
unsigned short ts_sizeavail; /* Pagesizes in use */
int ts_tsid; /* thread that owns the
structure */
int ts_tlb_int_select;/* target cpu if interrupts
enabled */
int ts_ctxnum; /* context number where the
context is loaded */
atomic_t ts_refcnt; /* reference count GTS */
unsigned char ts_dsr_au_count;/* Number of DSR resources
required for contest */
unsigned char ts_cbr_au_count;/* Number of CBR resources
required for contest */
char ts_cch_req_slice;/* CCH packet slice */
char ts_blade; /* If >= 0, migrate context if
ref from different blade */
char ts_force_cch_reload;
char ts_cbr_idx[GRU_CBR_AU];/* CBR numbers of each
allocated CB */
int ts_data_valid; /* Indicates if ts_gdata has
valid data */
struct gru_gseg_statistics ustats; /* User statistics */
unsigned long ts_gdata[0]; /* save area for GRU data (CB,
DS, CBE) */
};
/*
* Threaded programs actually allocate an array of GSEGs when a context is
* created. Each thread uses a separate GSEG. TSID is the index into the GSEG
* array.
*/
#define TSID(a, v) (((a) - (v)->vm_start) / GRU_GSEG_PAGESIZE)
#define UGRUADDR(gts) ((gts)->ts_vma->vm_start + \
(gts)->ts_tsid * GRU_GSEG_PAGESIZE)
#define NULLCTX (-1) /* if context not loaded into GRU */
/*-----------------------------------------------------------------------------
* GRU State Tables
*/
/*
* One of these exists for each GRU chiplet.
*/
struct gru_state {
struct gru_blade_state *gs_blade; /* GRU state for entire
blade */
unsigned long gs_gru_base_paddr; /* Physical address of
gru segments (64) */
void *gs_gru_base_vaddr; /* Virtual address of
gru segments (64) */
unsigned short gs_gid; /* unique GRU number */
unsigned short gs_blade_id; /* blade of GRU */
unsigned char gs_chiplet_id; /* blade chiplet of GRU */
unsigned char gs_tgh_local_shift; /* used to pick TGH for
local flush */
unsigned char gs_tgh_first_remote; /* starting TGH# for
remote flush */
spinlock_t gs_asid_lock; /* lock used for
assigning asids */
spinlock_t gs_lock; /* lock used for
assigning contexts */
/* -- the following are protected by the gs_asid_lock spinlock ---- */
unsigned int gs_asid; /* Next availe ASID */
unsigned int gs_asid_limit; /* Limit of available
ASIDs */
unsigned int gs_asid_gen; /* asid generation.
Inc on wrap */
/* --- the following fields are protected by the gs_lock spinlock --- */
unsigned long gs_context_map; /* bitmap to manage
contexts in use */
unsigned long gs_cbr_map; /* bitmap to manage CB
resources */
unsigned long gs_dsr_map; /* bitmap used to manage
DATA resources */
unsigned int gs_reserved_cbrs; /* Number of kernel-
reserved cbrs */
unsigned int gs_reserved_dsr_bytes; /* Bytes of kernel-
reserved dsrs */
unsigned short gs_active_contexts; /* number of contexts
in use */
struct gru_thread_state *gs_gts[GRU_NUM_CCH]; /* GTS currently using
the context */
int gs_irq[GRU_NUM_TFM]; /* Interrupt irqs */
};
/*
* This structure contains the GRU state for all the GRUs on a blade.
*/
struct gru_blade_state {
void *kernel_cb; /* First kernel
reserved cb */
void *kernel_dsr; /* First kernel
reserved DSR */
struct rw_semaphore bs_kgts_sema; /* lock for kgts */
struct gru_thread_state *bs_kgts; /* GTS for kernel use */
/* ---- the following are used for managing kernel async GRU CBRs --- */
int bs_async_dsr_bytes; /* DSRs for async */
int bs_async_cbrs; /* CBRs AU for async */
struct completion *bs_async_wq;
/* ---- the following are protected by the bs_lock spinlock ---- */
spinlock_t bs_lock; /* lock used for
stealing contexts */
int bs_lru_ctxnum; /* STEAL - last context
stolen */
struct gru_state *bs_lru_gru; /* STEAL - last gru
stolen */
struct gru_state bs_grus[GRU_CHIPLETS_PER_BLADE];
};
/*-----------------------------------------------------------------------------
* Address Primitives
*/
#define get_tfm_for_cpu(g, c) \
((struct gru_tlb_fault_map *)get_tfm((g)->gs_gru_base_vaddr, (c)))
#define get_tfh_by_index(g, i) \
((struct gru_tlb_fault_handle *)get_tfh((g)->gs_gru_base_vaddr, (i)))
#define get_tgh_by_index(g, i) \
((struct gru_tlb_global_handle *)get_tgh((g)->gs_gru_base_vaddr, (i)))
#define get_cbe_by_index(g, i) \
((struct gru_control_block_extended *)get_cbe((g)->gs_gru_base_vaddr,\
(i)))
/*-----------------------------------------------------------------------------
* Useful Macros
*/
/* Given a blade# & chiplet#, get a pointer to the GRU */
#define get_gru(b, c) (&gru_base[b]->bs_grus[c])
/* Number of bytes to save/restore when unloading/loading GRU contexts */
#define DSR_BYTES(dsr) ((dsr) * GRU_DSR_AU_BYTES)
#define CBR_BYTES(cbr) ((cbr) * GRU_HANDLE_BYTES * GRU_CBR_AU_SIZE * 2)
/* Convert a user CB number to the actual CBRNUM */
#define thread_cbr_number(gts, n) ((gts)->ts_cbr_idx[(n) / GRU_CBR_AU_SIZE] \
* GRU_CBR_AU_SIZE + (n) % GRU_CBR_AU_SIZE)
/* Convert a gid to a pointer to the GRU */
#define GID_TO_GRU(gid) \
(gru_base[(gid) / GRU_CHIPLETS_PER_BLADE] ? \
(&gru_base[(gid) / GRU_CHIPLETS_PER_BLADE]-> \
bs_grus[(gid) % GRU_CHIPLETS_PER_BLADE]) : \
NULL)
/* Scan all active GRUs in a GRU bitmap */
#define for_each_gru_in_bitmap(gid, map) \
for_each_set_bit((gid), (map), GRU_MAX_GRUS)
/* Scan all active GRUs on a specific blade */
#define for_each_gru_on_blade(gru, nid, i) \
for ((gru) = gru_base[nid]->bs_grus, (i) = 0; \
(i) < GRU_CHIPLETS_PER_BLADE; \
(i)++, (gru)++)
/* Scan all GRUs */
#define foreach_gid(gid) \
for ((gid) = 0; (gid) < gru_max_gids; (gid)++)
/* Scan all active GTSs on a gru. Note: must hold ss_lock to use this macro. */
#define for_each_gts_on_gru(gts, gru, ctxnum) \
for ((ctxnum) = 0; (ctxnum) < GRU_NUM_CCH; (ctxnum)++) \
if (((gts) = (gru)->gs_gts[ctxnum]))
/* Scan each CBR whose bit is set in a TFM (or copy of) */
#define for_each_cbr_in_tfm(i, map) \
for_each_set_bit((i), (map), GRU_NUM_CBE)
/* Scan each CBR in a CBR bitmap. Note: multiple CBRs in an allocation unit */
#define for_each_cbr_in_allocation_map(i, map, k) \
for_each_set_bit((k), (map), GRU_CBR_AU) \
for ((i) = (k)*GRU_CBR_AU_SIZE; \
(i) < ((k) + 1) * GRU_CBR_AU_SIZE; (i)++)
/* Scan each DSR in a DSR bitmap. Note: multiple DSRs in an allocation unit */
#define for_each_dsr_in_allocation_map(i, map, k) \
for_each_set_bit((k), (const unsigned long *)(map), GRU_DSR_AU) \
for ((i) = (k) * GRU_DSR_AU_CL; \
(i) < ((k) + 1) * GRU_DSR_AU_CL; (i)++)
#define gseg_physical_address(gru, ctxnum) \
((gru)->gs_gru_base_paddr + ctxnum * GRU_GSEG_STRIDE)
#define gseg_virtual_address(gru, ctxnum) \
((gru)->gs_gru_base_vaddr + ctxnum * GRU_GSEG_STRIDE)
/*-----------------------------------------------------------------------------
* Lock / Unlock GRU handles
* Use the "delresp" bit in the handle as a "lock" bit.
*/
/* Lock hierarchy checking enabled only in emulator */
/* 0 = lock failed, 1 = locked */
static inline int __trylock_handle(void *h)
{
return !test_and_set_bit(1, h);
}
static inline void __lock_handle(void *h)
{
while (test_and_set_bit(1, h))
cpu_relax();
}
static inline void __unlock_handle(void *h)
{
clear_bit(1, h);
}
static inline int trylock_cch_handle(struct gru_context_configuration_handle *cch)
{
return __trylock_handle(cch);
}
static inline void lock_cch_handle(struct gru_context_configuration_handle *cch)
{
__lock_handle(cch);
}
static inline void unlock_cch_handle(struct gru_context_configuration_handle
*cch)
{
__unlock_handle(cch);
}
static inline void lock_tgh_handle(struct gru_tlb_global_handle *tgh)
{
__lock_handle(tgh);
}
static inline void unlock_tgh_handle(struct gru_tlb_global_handle *tgh)
{
__unlock_handle(tgh);
}
static inline int is_kernel_context(struct gru_thread_state *gts)
{
return !gts->ts_mm;
}
/*
* The following are for Nehelem-EX. A more general scheme is needed for
* future processors.
*/
#define UV_MAX_INT_CORES 8
#define uv_cpu_socket_number(p) ((cpu_physical_id(p) >> 5) & 1)
#define uv_cpu_ht_number(p) (cpu_physical_id(p) & 1)
#define uv_cpu_core_number(p) (((cpu_physical_id(p) >> 2) & 4) | \
((cpu_physical_id(p) >> 1) & 3))
/*-----------------------------------------------------------------------------
* Function prototypes & externs
*/
struct gru_unload_context_req;
extern const struct vm_operations_struct gru_vm_ops;
extern struct device *grudev;
extern struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma,
int tsid);
extern struct gru_thread_state *gru_find_thread_state(struct vm_area_struct
*vma, int tsid);
extern struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct
*vma, int tsid);
extern struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts);
extern void gru_load_context(struct gru_thread_state *gts);
extern void gru_steal_context(struct gru_thread_state *gts);
extern void gru_unload_context(struct gru_thread_state *gts, int savestate);
extern int gru_update_cch(struct gru_thread_state *gts);
extern void gts_drop(struct gru_thread_state *gts);
extern void gru_tgh_flush_init(struct gru_state *gru);
extern int gru_kservices_init(void);
extern void gru_kservices_exit(void);
extern irqreturn_t gru0_intr(int irq, void *dev_id);
extern irqreturn_t gru1_intr(int irq, void *dev_id);
extern irqreturn_t gru_intr_mblade(int irq, void *dev_id);
extern int gru_dump_chiplet_request(unsigned long arg);
extern long gru_get_gseg_statistics(unsigned long arg);
extern int gru_handle_user_call_os(unsigned long address);
extern int gru_user_flush_tlb(unsigned long arg);
extern int gru_user_unload_context(unsigned long arg);
extern int gru_get_exception_detail(unsigned long arg);
extern int gru_set_context_option(unsigned long address);
extern void gru_check_context_placement(struct gru_thread_state *gts);
extern int gru_cpu_fault_map_id(void);
extern struct vm_area_struct *gru_find_vma(unsigned long vaddr);
extern void gru_flush_all_tlb(struct gru_state *gru);
extern int gru_proc_init(void);
extern void gru_proc_exit(void);
extern struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma,
int cbr_au_count, int dsr_au_count,
unsigned char tlb_preload_count, int options, int tsid);
extern unsigned long gru_reserve_cb_resources(struct gru_state *gru,
int cbr_au_count, char *cbmap);
extern unsigned long gru_reserve_ds_resources(struct gru_state *gru,
int dsr_au_count, char *dsmap);
extern int gru_fault(struct vm_area_struct *, struct vm_fault *vmf);
extern struct gru_mm_struct *gru_register_mmu_notifier(void);
extern void gru_drop_mmu_notifier(struct gru_mm_struct *gms);
extern int gru_ktest(unsigned long arg);
extern void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start,
unsigned long len);
extern unsigned long gru_options;
#endif /* __GRUTABLES_H__ */

View file

@ -0,0 +1,377 @@
/*
* SN Platform GRU Driver
*
* MMUOPS callbacks + TLB flushing
*
* This file handles emu notifier callbacks from the core kernel. The callbacks
* are used to update the TLB in the GRU as a result of changes in the
* state of a process address space. This file also handles TLB invalidates
* from the GRU driver.
*
* Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/device.h>
#include <linux/hugetlb.h>
#include <linux/delay.h>
#include <linux/timex.h>
#include <linux/srcu.h>
#include <asm/processor.h>
#include "gru.h"
#include "grutables.h"
#include <asm/uv/uv_hub.h>
#define gru_random() get_cycles()
/* ---------------------------------- TLB Invalidation functions --------
* get_tgh_handle
*
* Find a TGH to use for issuing a TLB invalidate. For GRUs that are on the
* local blade, use a fixed TGH that is a function of the blade-local cpu
* number. Normally, this TGH is private to the cpu & no contention occurs for
* the TGH. For offblade GRUs, select a random TGH in the range above the
* private TGHs. A spinlock is required to access this TGH & the lock must be
* released when the invalidate is completes. This sucks, but it is the best we
* can do.
*
* Note that the spinlock is IN the TGH handle so locking does not involve
* additional cache lines.
*
*/
static inline int get_off_blade_tgh(struct gru_state *gru)
{
int n;
n = GRU_NUM_TGH - gru->gs_tgh_first_remote;
n = gru_random() % n;
n += gru->gs_tgh_first_remote;
return n;
}
static inline int get_on_blade_tgh(struct gru_state *gru)
{
return uv_blade_processor_id() >> gru->gs_tgh_local_shift;
}
static struct gru_tlb_global_handle *get_lock_tgh_handle(struct gru_state
*gru)
{
struct gru_tlb_global_handle *tgh;
int n;
preempt_disable();
if (uv_numa_blade_id() == gru->gs_blade_id)
n = get_on_blade_tgh(gru);
else
n = get_off_blade_tgh(gru);
tgh = get_tgh_by_index(gru, n);
lock_tgh_handle(tgh);
return tgh;
}
static void get_unlock_tgh_handle(struct gru_tlb_global_handle *tgh)
{
unlock_tgh_handle(tgh);
preempt_enable();
}
/*
* gru_flush_tlb_range
*
* General purpose TLB invalidation function. This function scans every GRU in
* the ENTIRE system (partition) looking for GRUs where the specified MM has
* been accessed by the GRU. For each GRU found, the TLB must be invalidated OR
* the ASID invalidated. Invalidating an ASID causes a new ASID to be assigned
* on the next fault. This effectively flushes the ENTIRE TLB for the MM at the
* cost of (possibly) a large number of future TLBmisses.
*
* The current algorithm is optimized based on the following (somewhat true)
* assumptions:
* - GRU contexts are not loaded into a GRU unless a reference is made to
* the data segment or control block (this is true, not an assumption).
* If a DS/CB is referenced, the user will also issue instructions that
* cause TLBmisses. It is not necessary to optimize for the case where
* contexts are loaded but no instructions cause TLB misses. (I know
* this will happen but I'm not optimizing for it).
* - GRU instructions to invalidate TLB entries are SLOOOOWWW - normally
* a few usec but in unusual cases, it could be longer. Avoid if
* possible.
* - intrablade process migration between cpus is not frequent but is
* common.
* - a GRU context is not typically migrated to a different GRU on the
* blade because of intrablade migration
* - interblade migration is rare. Processes migrate their GRU context to
* the new blade.
* - if interblade migration occurs, migration back to the original blade
* is very very rare (ie., no optimization for this case)
* - most GRU instruction operate on a subset of the user REGIONS. Code
* & shared library regions are not likely targets of GRU instructions.
*
* To help improve the efficiency of TLB invalidation, the GMS data
* structure is maintained for EACH address space (MM struct). The GMS is
* also the structure that contains the pointer to the mmu callout
* functions. This structure is linked to the mm_struct for the address space
* using the mmu "register" function. The mmu interfaces are used to
* provide the callbacks for TLB invalidation. The GMS contains:
*
* - asid[maxgrus] array. ASIDs are assigned to a GRU when a context is
* loaded into the GRU.
* - asidmap[maxgrus]. bitmap to make it easier to find non-zero asids in
* the above array
* - ctxbitmap[maxgrus]. Indicates the contexts that are currently active
* in the GRU for the address space. This bitmap must be passed to the
* GRU to do an invalidate.
*
* The current algorithm for invalidating TLBs is:
* - scan the asidmap for GRUs where the context has been loaded, ie,
* asid is non-zero.
* - for each gru found:
* - if the ctxtmap is non-zero, there are active contexts in the
* GRU. TLB invalidate instructions must be issued to the GRU.
* - if the ctxtmap is zero, no context is active. Set the ASID to
* zero to force a full TLB invalidation. This is fast but will
* cause a lot of TLB misses if the context is reloaded onto the
* GRU
*
*/
void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start,
unsigned long len)
{
struct gru_state *gru;
struct gru_mm_tracker *asids;
struct gru_tlb_global_handle *tgh;
unsigned long num;
int grupagesize, pagesize, pageshift, gid, asid;
/* ZZZ TODO - handle huge pages */
pageshift = PAGE_SHIFT;
pagesize = (1UL << pageshift);
grupagesize = GRU_PAGESIZE(pageshift);
num = min(((len + pagesize - 1) >> pageshift), GRUMAXINVAL);
STAT(flush_tlb);
gru_dbg(grudev, "gms %p, start 0x%lx, len 0x%lx, asidmap 0x%lx\n", gms,
start, len, gms->ms_asidmap[0]);
spin_lock(&gms->ms_asid_lock);
for_each_gru_in_bitmap(gid, gms->ms_asidmap) {
STAT(flush_tlb_gru);
gru = GID_TO_GRU(gid);
asids = gms->ms_asids + gid;
asid = asids->mt_asid;
if (asids->mt_ctxbitmap && asid) {
STAT(flush_tlb_gru_tgh);
asid = GRUASID(asid, start);
gru_dbg(grudev,
" FLUSH gruid %d, asid 0x%x, vaddr 0x%lx, vamask 0x%x, num %ld, cbmap 0x%x\n",
gid, asid, start, grupagesize, num, asids->mt_ctxbitmap);
tgh = get_lock_tgh_handle(gru);
tgh_invalidate(tgh, start, ~0, asid, grupagesize, 0,
num - 1, asids->mt_ctxbitmap);
get_unlock_tgh_handle(tgh);
} else {
STAT(flush_tlb_gru_zero_asid);
asids->mt_asid = 0;
__clear_bit(gru->gs_gid, gms->ms_asidmap);
gru_dbg(grudev,
" CLEARASID gruid %d, asid 0x%x, cbtmap 0x%x, asidmap 0x%lx\n",
gid, asid, asids->mt_ctxbitmap,
gms->ms_asidmap[0]);
}
}
spin_unlock(&gms->ms_asid_lock);
}
/*
* Flush the entire TLB on a chiplet.
*/
void gru_flush_all_tlb(struct gru_state *gru)
{
struct gru_tlb_global_handle *tgh;
gru_dbg(grudev, "gid %d\n", gru->gs_gid);
tgh = get_lock_tgh_handle(gru);
tgh_invalidate(tgh, 0, ~0, 0, 1, 1, GRUMAXINVAL - 1, 0xffff);
get_unlock_tgh_handle(tgh);
}
/*
* MMUOPS notifier callout functions
*/
static void gru_invalidate_range_start(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start, unsigned long end)
{
struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
ms_notifier);
STAT(mmu_invalidate_range);
atomic_inc(&gms->ms_range_active);
gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx, act %d\n", gms,
start, end, atomic_read(&gms->ms_range_active));
gru_flush_tlb_range(gms, start, end - start);
}
static void gru_invalidate_range_end(struct mmu_notifier *mn,
struct mm_struct *mm, unsigned long start,
unsigned long end)
{
struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
ms_notifier);
/* ..._and_test() provides needed barrier */
(void)atomic_dec_and_test(&gms->ms_range_active);
wake_up_all(&gms->ms_wait_queue);
gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx\n", gms, start, end);
}
static void gru_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm,
unsigned long address)
{
struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
ms_notifier);
STAT(mmu_invalidate_page);
gru_flush_tlb_range(gms, address, PAGE_SIZE);
gru_dbg(grudev, "gms %p, address 0x%lx\n", gms, address);
}
static void gru_release(struct mmu_notifier *mn, struct mm_struct *mm)
{
struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
ms_notifier);
gms->ms_released = 1;
gru_dbg(grudev, "gms %p\n", gms);
}
static const struct mmu_notifier_ops gru_mmuops = {
.invalidate_page = gru_invalidate_page,
.invalidate_range_start = gru_invalidate_range_start,
.invalidate_range_end = gru_invalidate_range_end,
.release = gru_release,
};
/* Move this to the basic mmu_notifier file. But for now... */
static struct mmu_notifier *mmu_find_ops(struct mm_struct *mm,
const struct mmu_notifier_ops *ops)
{
struct mmu_notifier *mn, *gru_mn = NULL;
if (mm->mmu_notifier_mm) {
rcu_read_lock();
hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list,
hlist)
if (mn->ops == ops) {
gru_mn = mn;
break;
}
rcu_read_unlock();
}
return gru_mn;
}
struct gru_mm_struct *gru_register_mmu_notifier(void)
{
struct gru_mm_struct *gms;
struct mmu_notifier *mn;
int err;
mn = mmu_find_ops(current->mm, &gru_mmuops);
if (mn) {
gms = container_of(mn, struct gru_mm_struct, ms_notifier);
atomic_inc(&gms->ms_refcnt);
} else {
gms = kzalloc(sizeof(*gms), GFP_KERNEL);
if (gms) {
STAT(gms_alloc);
spin_lock_init(&gms->ms_asid_lock);
gms->ms_notifier.ops = &gru_mmuops;
atomic_set(&gms->ms_refcnt, 1);
init_waitqueue_head(&gms->ms_wait_queue);
err = __mmu_notifier_register(&gms->ms_notifier, current->mm);
if (err)
goto error;
}
}
gru_dbg(grudev, "gms %p, refcnt %d\n", gms,
atomic_read(&gms->ms_refcnt));
return gms;
error:
kfree(gms);
return ERR_PTR(err);
}
void gru_drop_mmu_notifier(struct gru_mm_struct *gms)
{
gru_dbg(grudev, "gms %p, refcnt %d, released %d\n", gms,
atomic_read(&gms->ms_refcnt), gms->ms_released);
if (atomic_dec_return(&gms->ms_refcnt) == 0) {
if (!gms->ms_released)
mmu_notifier_unregister(&gms->ms_notifier, current->mm);
kfree(gms);
STAT(gms_free);
}
}
/*
* Setup TGH parameters. There are:
* - 24 TGH handles per GRU chiplet
* - a portion (MAX_LOCAL_TGH) of the handles are reserved for
* use by blade-local cpus
* - the rest are used by off-blade cpus. This usage is
* less frequent than blade-local usage.
*
* For now, use 16 handles for local flushes, 8 for remote flushes. If the blade
* has less tan or equal to 16 cpus, each cpu has a unique handle that it can
* use.
*/
#define MAX_LOCAL_TGH 16
void gru_tgh_flush_init(struct gru_state *gru)
{
int cpus, shift = 0, n;
cpus = uv_blade_nr_possible_cpus(gru->gs_blade_id);
/* n = cpus rounded up to next power of 2 */
if (cpus) {
n = 1 << fls(cpus - 1);
/*
* shift count for converting local cpu# to TGH index
* 0 if cpus <= MAX_LOCAL_TGH,
* 1 if cpus <= 2*MAX_LOCAL_TGH,
* etc
*/
shift = max(0, fls(n - 1) - fls(MAX_LOCAL_TGH - 1));
}
gru->gs_tgh_local_shift = shift;
/* first starting TGH index to use for remote purges */
gru->gs_tgh_first_remote = (cpus + (1 << shift) - 1) >> shift;
}