Lesson 02 - Sorting Algorithms

Introduction

Sorting is a much more in-depth topic than you might expect. There are dozens of algorithms, each with different pros, cons, and trade-offs. We'll go over five of the most important here, analyzing their performance characteristics and why you might or might not want to use them.

The first four sorts are comparison sorts, meaning they operate based on comparing elements to each other. The theoretical minimum (worst-case) time complexity for this class of algorithms is O(n*log(n)). The fifth algorithm is not a comparison sort. This is because it makes some assumptions about the data - that it consists of positive integers.

Bubble Sort

Animation

void bubbleSort(vector<int>& data) {
	bool swapped;										// O(1)
	do {												// O(n)
		swapped = false;									// O(1)
		for(unsigned int i = 1; i < data.size(); i++) {				// O(n)
			if(data[i-1] > data[i]) {							// O(1)
				int temp = data[i];							// O(1)
				data[i] = data[i-1];							// O(1)
				data[i-1] = temp;							// O(1)
				swapped = true;							// O(1)
			}
		}
	} while(swapped);										// O(1)

	// Result : O(n^2) | Ω(n)
}

Insertion Sort

Animation

void insertionSort(vector<int>& data) {
	for(unsigned int i = 1; i < data.size(); i++) {		// O(n)
		int j = i;									// O(1)
		while(j > 0) {								// O(n)
			if (data[j - 1] > data[j]) {						// O(1)					
				int temp = data[j - 1];					// O(1)
				data[j - 1] = data[j];						// O(1)
				data[j] = temp;							// O(1)
				j--;									// O(1)
			}
			else {
				j = 0;								// O(1)
			}
		}
	}

	// Result : O(n^2) | Ω(n)
}

Quicksort

Animation

void quickSort(vector<int>& data, int first, int last) {
	if(last > first + 1) {						// O(1)
		int index = partition(data, first, last);	// O(n)
		quickSort(data, first, index);			// O(?)
		quickSort(data, index + 1, last);		// O(?)
	}

	// Result : because the recursive calls divide-and-conquer, the function is called log(n) times, and each iteration costs n, so O(n*log(n)). 
	// However, that's assuming a good partition - theoretically, the partition could take one element at a time, so the real worst case - where
	// the function is called n times, is O(n^2). However, the average case is still Θ(n*log(n)).
}

int partition(vector<int>& data, int first, int last) {
	int pivotIndex = first;							// O(1)
	for (int index = first; index < last; index++) {			// O(n)
		if (data[index] < data[pivotIndex]) {					// O(1)
			data.insert(data.begin() + first, data[index]);		// O(1)
			data.erase(data.begin() + index + 1);			// O(1)
			pivotIndex++;								// O(1)
		}
		else if(data[index] > data[pivotIndex]) {				// O(1)
			data.insert(data.begin() + last, data[index]);		// O(1)
			data.erase(data.begin() + index);				// O(1)
			last--;									// O(1)
			index--;									// O(1)
		}
	}
	return pivotIndex;

	// Result : O(n)
}

Merge Sort

Animation

vector<int> mergeSort(const vector<int>& data) {
	if(data.size() > 1) {
		vector<int> one = mergeSort(vector<int>(data.begin(), data.begin() + data.size() / 2), result);	// O(?)
		vector<int> two = mergeSort(vector<int>(data.begin() + data.size() / 2, data.end()), result);		// O(?)
		return merge(one, two, result);														// O(n)
	} else {
		return data;																		// O(1)
	}

	// Result : because the recursive calls are divide-and-conquer (every time), the function is called log(n) times, giving O(n*log(n))
	// However, because we are creating a new vector to return the answer in, memory usage is O(n) instead of O(1) like the others.
}

vector<int> merge(vector<int>& one, vector<int>& two) {
	vector<int> ret;
	while(one.size() || two.size()) {						// O(n)
		result.swaps++;
		if(!one.size()) {										// O(1)
			ret.insert(ret.end(), two.begin(), two.end());			// O(1)
			return ret;									// O(1)
		} else if(!two.size()) {								// O(1)
			ret.insert(ret.end(), one.begin(), one.end());			// O(1)
			return ret;									// O(1)
		} else if(one[0] <= two[0]) {							// O(1)
			ret.push_back(one[0]);							// O(1)
			one.erase(one.begin());							// O(1)
		} else {												
			ret.push_back(two[0]);							// O(1)
			two.erase(two.begin());							// O(1)
		}													
		result.comparisons++;								// O(1)
	}
	return ret;

	// Result : O(n)
}

Radix Sort

Animation (Click 'radix' - also includes other sorts.)

void radixSort(vector<int>& data) {
	// 10 buckets for base 10 numbers
	vector<vector<int>> buckets(10);
	int digits = getDigits(data);								// O(n)

	for(int i = 0; i < digits; i++) {								// O(digits) ~ O(1)
		for(int num : data) {										// O(n)
			buckets[(num / (int)pow(base,i)) % base].push_back(num);		// O(1)
		}

		data.clear();											// O(1)

		for(vector<int>& bucket : buckets) {						// O(base) = O(10) = O(1)
			data.insert(data.end(), bucket.begin(), bucket.end());			// O(1)
			bucket.clear();											// O(1)
		}
	}

	// Result : O(n)
}

int getDigits(const vector<int>& data) {
	int max = data[0], digits = 0;		// O(1)
	for(int i : data) {				// O(n)
		if(i > max) max = i;				// O(1)
	}								// O(1)
	for(; max != 0; max /= 10) {		// O(digits) ~ O(1)
		digits++;						// O(1)
	}
	return digits;

	// Result : O(n)
}