bakery_lock.c - pub/scm/linux/kernel/git/maz/boot-wrapper-aarch64 - Git at Google

 /*
  * bakery_lock.c - Lamport's bakery algorithm
  *
  * Copyright (C) 2015 ARM Limited. All rights reserved.
  *
  * Use of this source code is governed by a BSD-style license that can be
  * found in the LICENSE.txt file.
  *
  *
  * Simplest implementation of Lamport's bakery lock [1]. Applies only to device
  * memory with attributes non-gathering and non-reordering.
  *
  * This algorithm's strength resides in the fact that it doesn't rely on
  * hardware synchronisation mechanisms and as such, doesn't require normal
  * cacheable memory on ARMv8. CPUs write only to their own memory locations,
  * and read from all other CPUs' ones, in order to decide whose turn it is to
  * have the lock.
  *
  * The algorithm correctness is based on the following assumptions:
  *
  * 1) Accesses to choosing[k] (here tickets[k].choosing) are done atomically.
  *    In other words, simultaneous read and write to choosing[k] do not occur.
  *    In this implementation, it is guaranteed by single-copy atomicity, for
  *    accesses of type Device with non-gathering attributes. The algorithm
  *    doesn't require accesses to number[k] to be atomic, even though this
  *    implementation guarantees that as well.
  *
  * 2) Storage of number[k] allows it to become large enough for practical use of
  *    the lock. Indeed, if the lock is contended all of the time, the value of
  *    max(number[1..N]) will keep increasing, and this algorithm doesn't handle
  *    wrapping of the ticket number. In this implementation, we assume that we
  *    will never reach 32766 (0x7fff) overlapping calls to bakery_lock.
  *
  * [1] Lamport, L. "A New Solution of Dijkstra's Concurrent Programming Problem"
  */

 #include <bakery_lock.h>
 #include <cpu.h>

 /*
  * Return the result of (number_a, cpu_a) < (number_b, cpu_b)
  */
 static unsigned int less_than(unsigned long cpu_a, unsigned long number_a,
 			      unsigned long cpu_b, unsigned long number_b)
 {
 	if (number_a == number_b)
 		return cpu_a < cpu_b;

 	return number_a < number_b;
 }

 static unsigned int choose_number(bakery_ticket_t *tickets, unsigned self)
 {
 	int cpu;
 	unsigned int max_number = 0;
 	bakery_ticket_t ticket;

 	for (cpu = 0; cpu < NR_CPUS; cpu++) {
 		if (cpu == self)
 			continue;

 		ticket = read_ticket_once(tickets[cpu]);

 		if (max_number < ticket.number)
 			max_number = ticket.number;
 	}

 	return 1 + max_number;
 }

 /**
  * Wait for our turn to enter a critical section
  *
  * @tickets: array of size NR_CPUS, indexed by logical IDs.
  * @self:    logical ID of the current CPU
  *
  * Note: since this implementation assumes that all loads and stores to tickets
  * are of Device type with non-gathering and non-reordering attributes, we
  * expect all of them to be performed, in program order. As a result, the
  * following function is pretty relaxed in terms of barriers: we only
  * synchronize before sev(), and introduce system-wide memory barriers around
  * the critical section.
  */
 void bakery_lock(bakery_ticket_t *tickets, unsigned self)
 {
 	int cpu, number_self;
 	bakery_ticket_t ticket;

 	/* Doorway */
 	write_ticket_once(tickets[self], 1, 0);
 	number_self = choose_number(tickets, self);
 	write_ticket_once(tickets[self], 0, number_self);

 	dsb(st);
 	sev();

 	/* Bakery */
 	for (cpu = 0; cpu < NR_CPUS; cpu++) {
 		uint16_t number_cpu;

 		if (cpu == self)
 			continue;

 		ticket = read_ticket_once(tickets[cpu]);
 		while (ticket.choosing) {
 			wfe();
 			ticket = read_ticket_once(tickets[cpu]);
 		}

 		number_cpu = ticket.number;

 		/*
 		 * Wait until that CPU updates its ticket. We only need to do
 		 * the comparison once, since any update to tickets[cpu].number
 		 * will be to a value greater than ours, or zero.
 		 */
 		if (number_cpu != 0 && less_than(cpu,  number_cpu,
 						 self, number_self)) {
 			do {
 				wfe();
 				ticket = read_ticket_once(tickets[cpu]);
 			} while (number_cpu == ticket.number);
 		}
 	}

 	dmb(sy);
 }

 void bakery_unlock(bakery_ticket_t *tickets, unsigned self)
 {
 	dmb(sy);

 	write_ticket_once(tickets[self], 0, 0);

 	dsb(st);
 	sev();
 }
	/*
	* bakery_lock.c - Lamport's bakery algorithm
	*
	* Copyright (C) 2015 ARM Limited. All rights reserved.
	*
	* Use of this source code is governed by a BSD-style license that can be
	* found in the LICENSE.txt file.
	*
	*
	* Simplest implementation of Lamport's bakery lock [1]. Applies only to device
	* memory with attributes non-gathering and non-reordering.
	*
	* This algorithm's strength resides in the fact that it doesn't rely on
	* hardware synchronisation mechanisms and as such, doesn't require normal
	* cacheable memory on ARMv8. CPUs write only to their own memory locations,
	* and read from all other CPUs' ones, in order to decide whose turn it is to
	* have the lock.
	*
	* The algorithm correctness is based on the following assumptions:
	*
	* 1) Accesses to choosing[k] (here tickets[k].choosing) are done atomically.
	* In other words, simultaneous read and write to choosing[k] do not occur.
	* In this implementation, it is guaranteed by single-copy atomicity, for
	* accesses of type Device with non-gathering attributes. The algorithm
	* doesn't require accesses to number[k] to be atomic, even though this
	* implementation guarantees that as well.
	*
	* 2) Storage of number[k] allows it to become large enough for practical use of
	* the lock. Indeed, if the lock is contended all of the time, the value of
	* max(number[1..N]) will keep increasing, and this algorithm doesn't handle
	* wrapping of the ticket number. In this implementation, we assume that we
	* will never reach 32766 (0x7fff) overlapping calls to bakery_lock.
	*
	* [1] Lamport, L. "A New Solution of Dijkstra's Concurrent Programming Problem"
	*/

	#include <bakery_lock.h>
	#include <cpu.h>

	/*
	* Return the result of (number_a, cpu_a) < (number_b, cpu_b)
	*/
	static unsigned int less_than(unsigned long cpu_a, unsigned long number_a,
	unsigned long cpu_b, unsigned long number_b)
	{
	if (number_a == number_b)
	return cpu_a < cpu_b;

	return number_a < number_b;
	}

	static unsigned int choose_number(bakery_ticket_t *tickets, unsigned self)
	{
	int cpu;
	unsigned int max_number = 0;
	bakery_ticket_t ticket;

	for (cpu = 0; cpu < NR_CPUS; cpu++) {
	if (cpu == self)
	continue;

	ticket = read_ticket_once(tickets[cpu]);

	if (max_number < ticket.number)
	max_number = ticket.number;
	}

	return 1 + max_number;
	}

	/**
	* Wait for our turn to enter a critical section
	*
	* @tickets: array of size NR_CPUS, indexed by logical IDs.
	* @self: logical ID of the current CPU
	*
	* Note: since this implementation assumes that all loads and stores to tickets
	* are of Device type with non-gathering and non-reordering attributes, we
	* expect all of them to be performed, in program order. As a result, the
	* following function is pretty relaxed in terms of barriers: we only
	* synchronize before sev(), and introduce system-wide memory barriers around
	* the critical section.
	*/
	void bakery_lock(bakery_ticket_t *tickets, unsigned self)
	{
	int cpu, number_self;
	bakery_ticket_t ticket;

	/* Doorway */
	write_ticket_once(tickets[self], 1, 0);
	number_self = choose_number(tickets, self);
	write_ticket_once(tickets[self], 0, number_self);

	dsb(st);
	sev();

	/* Bakery */
	for (cpu = 0; cpu < NR_CPUS; cpu++) {
	uint16_t number_cpu;

	if (cpu == self)
	continue;

	ticket = read_ticket_once(tickets[cpu]);
	while (ticket.choosing) {
	wfe();
	ticket = read_ticket_once(tickets[cpu]);
	}

	number_cpu = ticket.number;

	/*
	* Wait until that CPU updates its ticket. We only need to do
	* the comparison once, since any update to tickets[cpu].number
	* will be to a value greater than ours, or zero.
	*/
	if (number_cpu != 0 && less_than(cpu, number_cpu,
	self, number_self)) {
	do {
	wfe();
	ticket = read_ticket_once(tickets[cpu]);
	} while (number_cpu == ticket.number);
	}
	}

	dmb(sy);
	}

	void bakery_unlock(bakery_ticket_t *tickets, unsigned self)
	{
	dmb(sy);

	write_ticket_once(tickets[self], 0, 0);

	dsb(st);
	sev();
	}