Newer
Older
/*
* SGI UltraViolet TLB flush routines.
*
* (c) 2008-2012 Cliff Wickman <cpw@sgi.com>, SGI.
*
* This code is released under the GNU General Public License version 2 or
* later.
*/
#include <linux/seq_file.h>
#include <linux/proc_fs.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/delay.h>
#include <asm/mmu_context.h>
#include <asm/uv/uv_mmrs.h>
#include <asm/uv/uv_bau.h>
#include <asm/tsc.h>
#include <asm/irq_vectors.h>
#include <asm/timer.h>
/* timeouts in nanoseconds (indexed by UVH_AGING_PRESCALE_SEL urgency7 30:28) */
static int timeout_base_ns[] = {
20,
160,
1280,
10240,
81920,
655360,
5242880,
167772160
};

Cliff Wickman
committed
static int nobau_perm;
static cycles_t congested_cycles;
static int max_concurr = MAX_BAU_CONCURRENT;
static int max_concurr_const = MAX_BAU_CONCURRENT;
static int plugged_delay = PLUGGED_DELAY;
static int plugsb4reset = PLUGSB4RESET;
static int giveup_limit = GIVEUP_LIMIT;
static int timeoutsb4reset = TIMEOUTSB4RESET;
static int ipi_reset_limit = IPI_RESET_LIMIT;
static int complete_threshold = COMPLETE_THRESHOLD;
static int congested_respns_us = CONGESTED_RESPONSE_US;
static int congested_reps = CONGESTED_REPS;
static int disabled_period = DISABLED_PERIOD;
static struct tunables tunables[] = {
{&max_concurr, MAX_BAU_CONCURRENT}, /* must be [0] */
{&plugged_delay, PLUGGED_DELAY},
{&plugsb4reset, PLUGSB4RESET},
{&timeoutsb4reset, TIMEOUTSB4RESET},
{&ipi_reset_limit, IPI_RESET_LIMIT},
{&complete_threshold, COMPLETE_THRESHOLD},
{&congested_respns_us, CONGESTED_RESPONSE_US},
{&congested_reps, CONGESTED_REPS},
{&disabled_period, DISABLED_PERIOD},
{&giveup_limit, GIVEUP_LIMIT}
static struct dentry *tunables_dir;
static struct dentry *tunables_file;
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
/* these correspond to the statistics printed by ptc_seq_show() */
static char *stat_description[] = {
"sent: number of shootdown messages sent",
"stime: time spent sending messages",
"numuvhubs: number of hubs targeted with shootdown",
"numuvhubs16: number times 16 or more hubs targeted",
"numuvhubs8: number times 8 or more hubs targeted",
"numuvhubs4: number times 4 or more hubs targeted",
"numuvhubs2: number times 2 or more hubs targeted",
"numuvhubs1: number times 1 hub targeted",
"numcpus: number of cpus targeted with shootdown",
"dto: number of destination timeouts",
"retries: destination timeout retries sent",
"rok: : destination timeouts successfully retried",
"resetp: ipi-style resource resets for plugs",
"resett: ipi-style resource resets for timeouts",
"giveup: fall-backs to ipi-style shootdowns",
"sto: number of source timeouts",
"bz: number of stay-busy's",
"throt: number times spun in throttle",
"swack: image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE",
"recv: shootdown messages received",
"rtime: time spent processing messages",
"all: shootdown all-tlb messages",
"one: shootdown one-tlb messages",
"mult: interrupts that found multiple messages",
"none: interrupts that found no messages",
"retry: number of retry messages processed",
"canc: number messages canceled by retries",
"nocan: number retries that found nothing to cancel",
"reset: number of ipi-style reset requests processed",
"rcan: number messages canceled by reset requests",
"disable: number times use of the BAU was disabled",
"enable: number times use of the BAU was re-enabled"
};
static int __init
setup_nobau(char *arg)
{
nobau = 1;
return 0;
}
early_param("nobau", setup_nobau);
/* base pnode in this partition */
static DEFINE_PER_CPU(struct ptc_stats, ptcstats);
static DEFINE_PER_CPU(struct bau_control, bau_control);
static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask);

Cliff Wickman
committed
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
static void
set_bau_on(void)
{
int cpu;
struct bau_control *bcp;
if (nobau_perm) {
pr_info("BAU not initialized; cannot be turned on\n");
return;
}
nobau = 0;
for_each_present_cpu(cpu) {
bcp = &per_cpu(bau_control, cpu);
bcp->nobau = 0;
}
pr_info("BAU turned on\n");
return;
}
static void
set_bau_off(void)
{
int cpu;
struct bau_control *bcp;
nobau = 1;
for_each_present_cpu(cpu) {
bcp = &per_cpu(bau_control, cpu);
bcp->nobau = 1;
}
pr_info("BAU turned off\n");
return;
}
* Determine the first node on a uvhub. 'Nodes' are used for kernel
* memory allocation.
static int __init uvhub_to_first_node(int uvhub)
{
int node, b;
for_each_online_node(node) {
b = uv_node_to_blade_id(node);
* Determine the apicid of the first cpu on a uvhub.
static int __init uvhub_to_first_apicid(int uvhub)
{
int cpu;
for_each_present_cpu(cpu)
if (uvhub == uv_cpu_to_blade_id(cpu))
return per_cpu(x86_cpu_to_apicid, cpu);
return -1;
}
/*
* Free a software acknowledge hardware resource by clearing its Pending
* bit. This will return a reply to the sender.
* If the message has timed out, a reply has already been sent by the
* hardware but the resource has not been released. In that case our
* clear of the Timeout bit (as well) will free the resource. No reply will
* be sent (the hardware will only do one reply per message).
*/
static void reply_to_message(struct msg_desc *mdp, struct bau_control *bcp,
int do_acknowledge)
unsigned long dw;
dw = (msg->swack_vec << UV_SW_ACK_NPENDING) | msg->swack_vec;
write_mmr_sw_ack(dw);
msg->replied_to = 1;
* Process the receipt of a RETRY message
static void bau_process_retry_msg(struct msg_desc *mdp,
struct bau_control *bcp)
int i;
int cancel_count = 0;
unsigned long msg_res;
unsigned long mmr = 0;
struct bau_pq_entry *msg = mdp->msg;
struct bau_pq_entry *msg2;
struct ptc_stats *stat = bcp->statp;
stat->d_retries++;
/*
* cancel any message from msg+1 to the retry itself
*/
for (msg2 = msg+1, i = 0; i < DEST_Q_SIZE; msg2++, i++) {
if (msg2 > mdp->queue_last)
msg2 = mdp->queue_first;
if (msg2 == msg)
break;
/* same conditions for cancellation as do_reset */
if ((msg2->replied_to == 0) && (msg2->canceled == 0) &&
(msg2->swack_vec) && ((msg2->swack_vec &
msg->swack_vec) == 0) &&
(msg2->sending_cpu == msg->sending_cpu) &&
(msg2->msg_type != MSG_NOOP)) {
mmr = read_mmr_sw_ack();
msg_res = msg2->swack_vec;
/*
* This is a message retry; clear the resources held
* by the previous message only if they timed out.
* If it has not timed out we have an unexpected
* situation to report.
*/
if (mmr & (msg_res << UV_SW_ACK_NPENDING)) {
* Is the resource timed out?
* Make everyone ignore the cancelled message.
*/
msg2->canceled = 1;
stat->d_canceled++;
cancel_count++;
mr = (msg_res << UV_SW_ACK_NPENDING) | msg_res;
write_mmr_sw_ack(mr);
}
}
if (!cancel_count)
stat->d_nocanceled++;
}
/*
* Do all the things a cpu should do for a TLB shootdown message.
* Other cpu's may come here at the same time for this message.
*/
static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp,
int do_acknowledge)
{
short socket_ack_count = 0;
short *sp;
struct atomic_short *asp;
struct ptc_stats *stat = bcp->statp;
struct bau_pq_entry *msg = mdp->msg;
struct bau_control *smaster = bcp->socket_master;
/*
* This must be a normal message, or retry of a normal message
*/
if (msg->address == TLB_FLUSH_ALL) {
local_flush_tlb();
} else {
__flush_tlb_one(msg->address);
stat->d_requestee++;
/*
* One cpu on each uvhub has the additional job on a RETRY
* of releasing the resource held by the message that is
* being retried. That message is identified by sending
* cpu number.
*/
if (msg->msg_type == MSG_RETRY && bcp == bcp->uvhub_master)
* This is a swack message, so we have to reply to it.
* Count each responding cpu on the socket. This avoids
* pinging the count's cache line back and forth between
* the sockets.
*/
sp = &smaster->socket_acknowledge_count[mdp->msg_slot];
asp = (struct atomic_short *)sp;
socket_ack_count = atom_asr(1, asp);
if (socket_ack_count == bcp->cpus_in_socket) {
/*
* Both sockets dump their completed count total into
* the message's count.
*/
asp = (struct atomic_short *)&msg->acknowledge_count;
msg_ack_count = atom_asr(socket_ack_count, asp);
if (msg_ack_count == bcp->cpus_in_uvhub) {
/*
* All cpus in uvhub saw it; reply
reply_to_message(mdp, bcp, do_acknowledge);
static int pnode_to_first_cpu(int pnode, struct bau_control *smaster)
struct hub_and_pnode *hpp;
for_each_present_cpu(cpu) {
hpp = &smaster->thp[cpu];
if (pnode == hpp->pnode)
return -1;
}
/*
* Last resort when we get a large number of destination timeouts is
* to clear resources held by a given cpu.
* Do this with IPI so that all messages in the BAU message queue
* can be identified by their nonzero swack_vec field.
* This is entered for a single cpu on the uvhub.
* The sender want's this uvhub to free a specific message's
struct bau_control *bcp = &per_cpu(bau_control, smp_processor_id());
struct reset_args *rap = (struct reset_args *)ptr;
struct bau_pq_entry *msg;
struct ptc_stats *stat = bcp->statp;
stat->d_resets++;
/*
* We're looking for the given sender, and
* If all cpu's finally responded after the timeout, its
* message 'replied_to' was set.
*/
for (msg = bcp->queue_first, i = 0; i < DEST_Q_SIZE; msg++, i++) {
unsigned long msg_res;
/* do_reset: same conditions for cancellation as
bau_process_retry_msg() */
if ((msg->replied_to == 0) &&
(msg->canceled == 0) &&
(msg->sending_cpu == rap->sender) &&
(msg->msg_type != MSG_NOOP)) {
/*
* make everyone else ignore this message
*/
msg->canceled = 1;
/*
* only reset the resource if it is still pending
*/
mmr = read_mmr_sw_ack();
msg_res = msg->swack_vec;
mr = (msg_res << UV_SW_ACK_NPENDING) | msg_res;
if (mmr & msg_res) {
stat->d_rcanceled++;
* Use IPI to get all target uvhubs to release resources held by
* a given sending cpu number.
static void reset_with_ipi(struct pnmask *distribution, struct bau_control *bcp)
cpumask_t *mask = bcp->uvhub_master->cpumask;
struct bau_control *smaster = bcp->socket_master;
struct reset_args reset_args;
reset_args.sender = sender;
/* find a single cpu for each uvhub in this distribution mask */
maskbits = sizeof(struct pnmask) * BITSPERBYTE;
/* each bit is a pnode relative to the partition base pnode */
for (pnode = 0; pnode < maskbits; pnode++) {
if (!bau_uvhub_isset(pnode, distribution))
apnode = pnode + bcp->partition_base_pnode;
cpu = pnode_to_first_cpu(apnode, smaster);
/* IPI all cpus; preemption is already disabled */
smp_call_function_many(mask, do_reset, (void *)&reset_args, 1);
/*
* Not to be confused with cycles_2_ns() from tsc.c; this gives a relative
* number, not an absolute. It converts a duration in cycles to a duration in
* ns.
*/
static inline unsigned long long cycles_2_ns(unsigned long long cyc)
struct cyc2ns_data *data = cyc2ns_read_begin();
unsigned long long ns;
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
ns = mul_u64_u32_shr(cyc, data->cyc2ns_mul, data->cyc2ns_shift);
cyc2ns_read_end(data);
return ns;
}
/*
* The reverse of the above; converts a duration in ns to a duration in cycles.
*/
static inline unsigned long long ns_2_cycles(unsigned long long ns)
{
struct cyc2ns_data *data = cyc2ns_read_begin();
unsigned long long cyc;
cyc = (ns << data->cyc2ns_shift) / data->cyc2ns_mul;
cyc2ns_read_end(data);
return cyc;
}
static inline unsigned long cycles_2_us(unsigned long long cyc)
{
return cycles_2_ns(cyc) / NSEC_PER_USEC;
}
static inline cycles_t sec_2_cycles(unsigned long sec)
{
return ns_2_cycles(sec * NSEC_PER_SEC);
}
static inline unsigned long long usec_2_cycles(unsigned long usec)
{
return ns_2_cycles(usec * NSEC_PER_USEC);
* wait for all cpus on this hub to finish their sends and go quiet
* leaves uvhub_quiesce set so that no new broadcasts are started by
* bau_flush_send_and_wait()
*/
static inline void quiesce_local_uvhub(struct bau_control *hmaster)
atom_asr(1, (struct atomic_short *)&hmaster->uvhub_quiesce);
}
/*
* mark this quiet-requestor as done
*/
static inline void end_uvhub_quiesce(struct bau_control *hmaster)
atom_asr(-1, (struct atomic_short *)&hmaster->uvhub_quiesce);
}
static unsigned long uv1_read_status(unsigned long mmr_offset, int right_shift)
{
unsigned long descriptor_status;
descriptor_status = uv_read_local_mmr(mmr_offset);
descriptor_status >>= right_shift;
descriptor_status &= UV_ACT_STATUS_MASK;
return descriptor_status;
}
/*
* Wait for completion of a broadcast software ack message
* return COMPLETE, RETRY(PLUGGED or TIMEOUT) or GIVEUP
static int uv1_wait_completion(struct bau_desc *bau_desc,
unsigned long mmr_offset, int right_shift,
struct bau_control *bcp, long try)
{
unsigned long descriptor_status;
struct ptc_stats *stat = bcp->statp;
descriptor_status = uv1_read_status(mmr_offset, right_shift);
/* spin on the status MMR, waiting for it to go idle */
* Our software ack messages may be blocked because
* there are no swack resources available. As long
* as none of them has timed out hardware will NACK
* our message and its state will stay IDLE.
if (descriptor_status == DS_SOURCE_TIMEOUT) {
stat->s_stimeout++;
return FLUSH_GIVEUP;
} else if (descriptor_status == DS_DESTINATION_TIMEOUT) {
/*
* Our retries may be blocked by all destination
* swack resources being consumed, and a timeout
* pending. In that case hardware returns the
* ERROR that looks like a destination timeout.
*/
if (cycles_2_us(ttm - bcp->send_message) < timeout_us) {
bcp->conseccompletes = 0;
return FLUSH_RETRY_PLUGGED;
}
bcp->conseccompletes = 0;
return FLUSH_RETRY_TIMEOUT;
} else {
/*
* descriptor_status is still BUSY
*/
cpu_relax();
}
descriptor_status = uv1_read_status(mmr_offset, right_shift);
}
bcp->conseccompletes++;
return FLUSH_COMPLETE;
}
* UV2 could have an extra bit of status in the ACTIVATION_STATUS_2 register.
* But not currently used.
static unsigned long uv2_read_status(unsigned long offset, int rshft, int desc)
{
unsigned long descriptor_status;
descriptor_status =
((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK) << 1;
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
/*
* Return whether the status of the descriptor that is normally used for this
* cpu (the one indexed by its hub-relative cpu number) is busy.
* The status of the original 32 descriptors is always reflected in the 64
* bits of UVH_LB_BAU_SB_ACTIVATION_STATUS_0.
* The bit provided by the activation_status_2 register is irrelevant to
* the status if it is only being tested for busy or not busy.
*/
int normal_busy(struct bau_control *bcp)
{
int cpu = bcp->uvhub_cpu;
int mmr_offset;
int right_shift;
mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
right_shift = cpu * UV_ACT_STATUS_SIZE;
return (((((read_lmmr(mmr_offset) >> right_shift) &
UV_ACT_STATUS_MASK)) << 1) == UV2H_DESC_BUSY);
}
/*
* Entered when a bau descriptor has gone into a permanent busy wait because
* of a hardware bug.
* Workaround the bug.
*/
int handle_uv2_busy(struct bau_control *bcp)
{
struct ptc_stats *stat = bcp->statp;
stat->s_uv2_wars++;
bcp->busy = 1;
return FLUSH_GIVEUP;
static int uv2_wait_completion(struct bau_desc *bau_desc,
unsigned long mmr_offset, int right_shift,
struct bau_control *bcp, long try)
{
unsigned long descriptor_stat;
cycles_t ttm;
struct ptc_stats *stat = bcp->statp;
descriptor_stat = uv2_read_status(mmr_offset, right_shift, desc);
/* spin on the status MMR, waiting for it to go idle */
if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT)) {
/*
* A h/w bug on the destination side may
* have prevented the message being marked
* pending, thus it doesn't get replied to
* and gets continually nacked until it times
* out with a SOURCE_TIMEOUT.
*/
stat->s_stimeout++;
return FLUSH_GIVEUP;
} else if (descriptor_stat == UV2H_DESC_DEST_TIMEOUT) {
ttm = get_cycles();
/*
* Our retries may be blocked by all destination
* swack resources being consumed, and a timeout
* pending. In that case hardware returns the
* ERROR that looks like a destination timeout.
* Without using the extended status we have to
* deduce from the short time that this was a
* strong nack.
*/
if (cycles_2_us(ttm - bcp->send_message) < timeout_us) {
bcp->conseccompletes = 0;
stat->s_plugged++;
/* FLUSH_RETRY_PLUGGED causes hang on boot */
return FLUSH_GIVEUP;
}
stat->s_dtimeout++;
bcp->conseccompletes = 0;
/* FLUSH_RETRY_TIMEOUT causes hang on boot */
return FLUSH_GIVEUP;
busy_reps++;
if (busy_reps > 1000000) {
/* not to hammer on the clock */
busy_reps = 0;
ttm = get_cycles();
if ((ttm - bcp->send_message) >
descriptor_stat = uv2_read_status(mmr_offset, right_shift,
desc);
bcp->conseccompletes++;
return FLUSH_COMPLETE;
}
/*
* There are 2 status registers; each and array[32] of 2 bits. Set up for
* which register to read and position in that register based on cpu in
* current hub.
*/
static int wait_completion(struct bau_desc *bau_desc,
struct bau_control *bcp, long try)
int right_shift;
unsigned long mmr_offset;
mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
} else {
mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1;
right_shift = ((desc - UV_CPUS_PER_AS) * UV_ACT_STATUS_SIZE);
if (bcp->uvhub_version == 1)
return uv1_wait_completion(bau_desc, mmr_offset, right_shift,
else
return uv2_wait_completion(bau_desc, mmr_offset, right_shift,
* Our retries are blocked by all destination sw ack resources being
* in use, and a timeout is pending. In that case hardware immediately
* returns the ERROR that looks like a destination timeout.
*/
static void destination_plugged(struct bau_desc *bau_desc,
struct bau_control *bcp,
struct bau_control *hmaster, struct ptc_stats *stat)
{
udelay(bcp->plugged_delay);
bcp->plugged_tries++;
if (bcp->plugged_tries >= bcp->plugsb4reset) {
bcp->plugged_tries = 0;
reset_with_ipi(&bau_desc->distribution, bcp);
spin_unlock(&hmaster->queue_lock);
bcp->ipi_attempts++;
stat->s_resets_plug++;
}
}
static void destination_timeout(struct bau_desc *bau_desc,
struct bau_control *bcp, struct bau_control *hmaster,
struct ptc_stats *stat)
bcp->timeout_tries++;
if (bcp->timeout_tries >= bcp->timeoutsb4reset) {
bcp->timeout_tries = 0;
reset_with_ipi(&bau_desc->distribution, bcp);
spin_unlock(&hmaster->queue_lock);
bcp->ipi_attempts++;
stat->s_resets_timeout++;
}
}
* Stop all cpus on a uvhub from using the BAU for a period of time.
* This is reversed by check_enable.
static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat)
int tcpu;
struct bau_control *tbcp;
struct bau_control *hmaster;
cycles_t tm1;
hmaster = bcp->uvhub_master;
spin_lock(&hmaster->disable_lock);
if (!bcp->baudisabled) {
for_each_present_cpu(tcpu) {
tbcp = &per_cpu(bau_control, tcpu);
if (tbcp->uvhub_master == hmaster) {
tbcp->baudisabled = 1;
tbcp->set_bau_on_time =
tm1 + bcp->disabled_period;
}
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
static void count_max_concurr(int stat, struct bau_control *bcp,
struct bau_control *hmaster)
{
bcp->plugged_tries = 0;
bcp->timeout_tries = 0;
if (stat != FLUSH_COMPLETE)
return;
if (bcp->conseccompletes <= bcp->complete_threshold)
return;
if (hmaster->max_concurr >= hmaster->max_concurr_const)
return;
hmaster->max_concurr++;
}
static void record_send_stats(cycles_t time1, cycles_t time2,
struct bau_control *bcp, struct ptc_stats *stat,
int completion_status, int try)
{
cycles_t elapsed;
if (time2 > time1) {
elapsed = time2 - time1;
stat->s_time += elapsed;
if ((completion_status == FLUSH_COMPLETE) && (try == 1)) {
bcp->period_requests++;
bcp->period_time += elapsed;
if ((elapsed > congested_cycles) &&
(bcp->period_requests > bcp->cong_reps) &&
((bcp->period_time / bcp->period_requests) >
congested_cycles)) {
stat->s_congested++;
disable_for_period(bcp, stat);
}
}
} else
stat->s_requestor--;
if (completion_status == FLUSH_COMPLETE && try > 1)
stat->s_retriesok++;
else if (completion_status == FLUSH_GIVEUP) {
if (get_cycles() > bcp->period_end)
bcp->period_giveups = 0;
bcp->period_giveups++;
if (bcp->period_giveups == 1)
bcp->period_end = get_cycles() + bcp->disabled_period;
if (bcp->period_giveups > bcp->giveup_limit) {
disable_for_period(bcp, stat);
stat->s_giveuplimit++;
}
}
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
}
/*
* Because of a uv1 hardware bug only a limited number of concurrent
* requests can be made.
*/
static void uv1_throttle(struct bau_control *hmaster, struct ptc_stats *stat)
{
spinlock_t *lock = &hmaster->uvhub_lock;
atomic_t *v;
v = &hmaster->active_descriptor_count;
if (!atomic_inc_unless_ge(lock, v, hmaster->max_concurr)) {
stat->s_throttles++;
do {
cpu_relax();
} while (!atomic_inc_unless_ge(lock, v, hmaster->max_concurr));
}
}
/*
* Handle the completion status of a message send.
*/
static void handle_cmplt(int completion_status, struct bau_desc *bau_desc,
struct bau_control *bcp, struct bau_control *hmaster,
struct ptc_stats *stat)
{
if (completion_status == FLUSH_RETRY_PLUGGED)
destination_plugged(bau_desc, bcp, hmaster, stat);
else if (completion_status == FLUSH_RETRY_TIMEOUT)
destination_timeout(bau_desc, bcp, hmaster, stat);
}
/*
* Send a broadcast and wait for it to complete.
* The flush_mask contains the cpus the broadcast is to be sent to including
* cpus that are on the local uvhub.
* Returns 0 if all flushing represented in the mask was done.
* Returns 1 if it gives up entirely and the original cpu mask is to be
* returned to the kernel.
int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp,
struct bau_desc *bau_desc)
int uv1 = 0;
cycles_t time1;
cycles_t time2;
struct ptc_stats *stat = bcp->statp;
struct bau_control *hmaster = bcp->uvhub_master;
struct uv1_bau_msg_header *uv1_hdr = NULL;
struct uv2_bau_msg_header *uv2_hdr = NULL;
if (bcp->uvhub_version == 1) {
uv1 = 1;
while (hmaster->uvhub_quiesce)
cpu_relax();
time1 = get_cycles();
if (uv1)
uv1_hdr = &bau_desc->header.uv1_hdr;
else
uv2_hdr = &bau_desc->header.uv2_hdr;
if (uv1)
uv1_hdr->msg_type = MSG_REGULAR;
else
uv2_hdr->msg_type = MSG_REGULAR;
seq_number = bcp->message_number++;
} else {
if (uv1)
uv1_hdr->msg_type = MSG_RETRY;
else
uv2_hdr->msg_type = MSG_RETRY;
stat->s_retry_messages++;
}
if (uv1)
uv1_hdr->sequence = seq_number;
else
uv2_hdr->sequence = seq_number;
index = (1UL << AS_PUSH_SHIFT) | bcp->uvhub_cpu;
bcp->send_message = get_cycles();
completion_stat = wait_completion(bau_desc, bcp, try);
handle_cmplt(completion_stat, bau_desc, bcp, hmaster, stat);
if (bcp->ipi_attempts >= bcp->ipi_reset_limit) {
bcp->ipi_attempts = 0;
break;
}
cpu_relax();
} while ((completion_stat == FLUSH_RETRY_PLUGGED) ||
(completion_stat == FLUSH_RETRY_TIMEOUT));
time2 = get_cycles();
count_max_concurr(completion_stat, bcp, hmaster);
while (hmaster->uvhub_quiesce)
cpu_relax();
atomic_dec(&hmaster->active_descriptor_count);
record_send_stats(time1, time2, bcp, stat, completion_stat, try);
if (completion_stat == FLUSH_GIVEUP)
/* FLUSH_GIVEUP will fall back to using IPI's for tlb flush */
* The BAU is disabled for this uvhub. When the disabled time period has
* expired re-enable it.
* Return 0 if it is re-enabled for all cpus on this uvhub.
*/
static int check_enable(struct bau_control *bcp, struct ptc_stats *stat)
{
int tcpu;
struct bau_control *tbcp;
hmaster = bcp->uvhub_master;
spin_lock(&hmaster->disable_lock);
if (bcp->baudisabled && (get_cycles() >= bcp->set_bau_on_time)) {
stat->s_bau_reenabled++;
for_each_present_cpu(tcpu) {
tbcp = &per_cpu(bau_control, tcpu);
if (tbcp->uvhub_master == hmaster) {
tbcp->baudisabled = 0;
tbcp->period_requests = 0;
tbcp->period_time = 0;
spin_unlock(&hmaster->disable_lock);
return 0;
return -1;
}
static void record_send_statistics(struct ptc_stats *stat, int locals, int hubs,
int remotes, struct bau_desc *bau_desc)
{
stat->s_requestor++;
stat->s_ntargcpu += remotes + locals;
stat->s_ntargremotes += remotes;
stat->s_ntarglocals += locals;