blob: 877118c9ed718cacd08160f2814f252ae8bf4ee8 [file] [log] [blame]
/*
* bakery_lock.c - Lamport's bakery algorithm
*
* Copyright (C) 2015 ARM Limited. All rights reserved.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE.txt file.
*
*
* Simplest implementation of Lamport's bakery lock [1]. Applies only to device
* memory with attributes non-gathering and non-reordering.
*
* This algorithm's strength resides in the fact that it doesn't rely on
* hardware synchronisation mechanisms and as such, doesn't require normal
* cacheable memory on ARMv8. CPUs write only to their own memory locations,
* and read from all other CPUs' ones, in order to decide whose turn it is to
* have the lock.
*
* The algorithm correctness is based on the following assumptions:
*
* 1) Accesses to choosing[k] (here tickets[k].choosing) are done atomically.
* In other words, simultaneous read and write to choosing[k] do not occur.
* In this implementation, it is guaranteed by single-copy atomicity, for
* accesses of type Device with non-gathering attributes. The algorithm
* doesn't require accesses to number[k] to be atomic, even though this
* implementation guarantees that as well.
*
* 2) Storage of number[k] allows it to become large enough for practical use of
* the lock. Indeed, if the lock is contended all of the time, the value of
* max(number[1..N]) will keep increasing, and this algorithm doesn't handle
* wrapping of the ticket number. In this implementation, we assume that we
* will never reach 32766 (0x7fff) overlapping calls to bakery_lock.
*
* [1] Lamport, L. "A New Solution of Dijkstra's Concurrent Programming Problem"
*/
#include <bakery_lock.h>
#include <cpu.h>
/*
* Return the result of (number_a, cpu_a) < (number_b, cpu_b)
*/
static unsigned int less_than(unsigned long cpu_a, unsigned long number_a,
unsigned long cpu_b, unsigned long number_b)
{
if (number_a == number_b)
return cpu_a < cpu_b;
return number_a < number_b;
}
static unsigned int choose_number(bakery_ticket_t *tickets, unsigned self)
{
int cpu;
unsigned int max_number = 0;
bakery_ticket_t ticket;
for (cpu = 0; cpu < NR_CPUS; cpu++) {
if (cpu == self)
continue;
ticket = read_ticket_once(tickets[cpu]);
if (max_number < ticket.number)
max_number = ticket.number;
}
return 1 + max_number;
}
/**
* Wait for our turn to enter a critical section
*
* @tickets: array of size NR_CPUS, indexed by logical IDs.
* @self: logical ID of the current CPU
*
* Note: since this implementation assumes that all loads and stores to tickets
* are of Device type with non-gathering and non-reordering attributes, we
* expect all of them to be performed, in program order. As a result, the
* following function is pretty relaxed in terms of barriers: we only
* synchronize before sev(), and introduce system-wide memory barriers around
* the critical section.
*/
void bakery_lock(bakery_ticket_t *tickets, unsigned self)
{
int cpu, number_self;
bakery_ticket_t ticket;
/* Doorway */
write_ticket_once(tickets[self], 1, 0);
number_self = choose_number(tickets, self);
write_ticket_once(tickets[self], 0, number_self);
dsb(st);
sev();
/* Bakery */
for (cpu = 0; cpu < NR_CPUS; cpu++) {
uint16_t number_cpu;
if (cpu == self)
continue;
ticket = read_ticket_once(tickets[cpu]);
while (ticket.choosing) {
wfe();
ticket = read_ticket_once(tickets[cpu]);
}
number_cpu = ticket.number;
/*
* Wait until that CPU updates its ticket. We only need to do
* the comparison once, since any update to tickets[cpu].number
* will be to a value greater than ours, or zero.
*/
if (number_cpu != 0 && less_than(cpu, number_cpu,
self, number_self)) {
do {
wfe();
ticket = read_ticket_once(tickets[cpu]);
} while (number_cpu == ticket.number);
}
}
dmb(sy);
}
void bakery_unlock(bakery_ticket_t *tickets, unsigned self)
{
dmb(sy);
write_ticket_once(tickets[self], 0, 0);
dsb(st);
sev();
}