Redraw search sorting algorithm

1, Binary search algorithm

1. Non recursive code

// Return index found, return - 1 not found
int binary_search(const vector<int>& vec, int val) {
	int left = 0;
	int right = vec.size() - 1;
	while (left <= right) {
		int mid = (left + right) >> 1;
		if (vec[mid] == val) {
			return mid;
		}
		else if (vec[mid] < val) {
			left = mid + 1;
		}
		else {
			right = mid - 1;
		}
	}
	return -1;
}

2. Recursive code

Recursive function features:

  1. No matter what the data size, the way to solve the problem is the same
  2. Recursive formula

Notes for writing recursive functions:

  1. Find out the meaning of recursive function, return value, parameter list and what function to complete
  2. The delivery must have an end condition
  3. For each data scale, the calculation relationship should be written, that is, the recurrence formula

The thinking of recursive problem is horizontal, and the execution of recursive problem is vertical

// Function function: search val in the [left, right] interval, find the return subscript, and if not, return - 1
int recur_binary_search(const vector<int>& vec, int left, int right, int val) {
	// Recursive end condition
	if (left > right) {
		return -1;
	}
	int mid = (left + right) >> 1;
	if (vec[mid] == val) {
		return mid; // Return index found
	}
	else if (vec[mid] > val) {
		return recur_binary_search(vec, left, mid - 1, val); //Search for val in the [left, mid - 1] interval, find the return subscript, and if not, return - 1
	}
	else {
		return recur_binary_search(vec, mid + 1, right, val); //Search val in the [mid + 1, right] interval, find the return subscript, and if not, return - 1
	}
}

2, Bubble sorting algorithm

void bubble_sort(vector<int>& vec) {
	// The outer layer controls the number of times. The maximum number of n elements is n-1 times. There is only one element in the last trip, so it can be reduced by 1
	for (int i = 0; i < vec.size() - 1; i++) {
		// Suppose there are no exchange elements in this sort
		bool is_swap = false;
		// Each pass will process one element, so subtract i. Minus 1 is because vec[j] is compared with vec[j+1]
		for (int j = 0; j < vec.size() - 1 - i; j++) {
			if (vec[j] > vec[j + 1]) {
				int tmp = vec[j];
				vec[j] = vec[j + 1];
				vec[j + 1] = tmp;
				is_swap = true;
			}
		}
		// At the end of one sort, judge whether element exchange has been carried out
		if (!is_swap) {
			break;
		}
	}
}

The efficiency of bubble sorting is low because there are too many times to exchange elements, but it can be terminated in advance

3, Select sort

void choice_sort(vector<int>& vec) {
	if (vec.size() < 2) {
		return;
	}
	// The external loop is used to control the number of trips
	for (int i = 0; i < vec.size() - 1; i++) {
		int min_val = vec[i]; // Used to record the current minimum value
		int min_i = i;        // Record the location of the minimum value
		// Find the smallest element in the unordered sequence and record it for exchange
		for (int j = i+1; j < vec.size(); j++) {
			if (vec[j] < min_val) {
				min_val = vec[j];
				min_i = j;
			}
		}
		if (min_i != i) {
			int tmp = vec[i];
			vec[i] = vec[min_i];
			vec[min_i] = tmp;
		}
	}	
}

The efficiency of selection sorting is slightly higher than that of bubble sorting, because there are not many assignment operations (assignment is exchanged only once per trip), but there are many comparison operations, and unlike bubble sorting, it can be terminated in advance.

Unstable: for example, if there are sequences 5, 5 and 3, the first 5 and 3 exchange and become 3, 5 and 5

4, Insert sort

If the data tends to be in order, insertion sorting is the most efficient sorting algorithm among all sorting algorithms. Not only is there no exchange, but there are fewer comparisons

Algorithm idea: the previous sequence is ordered. Insert the first element in the disordered sequence into the ordered sequence in order, move the element while looking for it, find the first element less than or equal to (to ensure stability), and find the element behind it

void insert_sort(vector<int>& vec) {
	if (vec.size() < 2) {
		return;
	}
	for (int i = 1; i < vec.size(); i++) {
		int val = vec[i];
		int j;
		// Finds the first element less than or equal to the current element in an ordered sequence
		for (j = i - 1; j >= 0; j--) {
			// Note that this place is < = Val, not < = vec[i], vec[i] may have been covered long ago
			if (vec[j] <= val) {
				break;
			}
			else {
				vec[j + 1] = vec[j];
			}
		}
		vec[j + 1] = val;
	}
}

5, Hill sort

If the data tends to be in order, insertion sorting is the most efficient sorting algorithm among all sorting algorithms. Hill sort adjusts the data to order from the global point of view, and uses the feature of insertion sort to perform the last insertion sort (gap=1)

void shell_sort(vector<int>& vec) {
	if (vec.size() < 2) {
		return;
	}
	for (int gap = vec.size() >> 1; gap > 0; gap >>= 1) {
		for (int i = gap; i < vec.size(); i++) {
			int val = vec[i];
			int j;
			// Finds the first element less than or equal to the current element in an ordered sequence
			for (j = i - gap; j >= 0; j -= gap) {
				// Note that this place is < = Val, not < = vec[i], vec[i] may have been covered long ago		
				if (vec[j] <= val) {
					break;
				}
				else {
					vec[j + gap] = vec[j];
				}
			}
			vec[j + gap] = val;
		}
	}
}

6, Quick sort

It can be seen that if the binary tree is more balanced, the number of layers in depth will be less, that is, when the data distribution is more uniform and the order is more disordered, the binary tree will be more balanced and the efficiency of quick sorting is higher. If the binary tree is unbalanced, it will lead to low efficiency under sorting

int partition(vector<int>& vec, int l, int r) {
	int val = vec[l];
	while (l < r) {
		while (l < r && vec[r] > val) {
			r--;
		}
		if (l < r) {
			vec[l] = vec[r];
			l++;
		}
		while (l < r && vec[l] < val) {
			l++;
		}
		if (l < r) {
			vec[r] = vec[l];
			r--;
		}
	}
	// Here l == r, the loop exits
	vec[l] = val;
	return l;
}

// This is a recursive function. Its main function is to make the data in the interval [begin, end] orderly
void quick_sort(vector<int>& vec, int begin, int end) {
	// Write end condition first
	// Here is the return. The sorting has been completed when returning
	if (begin >= end) {
		return;
	}
	// This operation is to let the first element in the [begin, end] interval find its ordered position pos
	// Divide first and find the correct position of the datum element
	int pos = partition(vec, begin, end);
	// Re delivery
	quick_sort(vec, begin, pos-1);
	quick_sort(vec, pos+1, end);
}

Average time complexity: O(nlogn)
Worst time complexity: when the sequence itself is ordered, all nodes except leaf nodes have only one child node, O(n^2)
Space complexity: it mainly refers to the function stack frame opened during recursion, O(logn)
Worst space complexity: similarly, O(n)

Fast scheduling algorithm optimization

Optimization 1: after fast sorting and recursion, the action interval will become smaller and smaller, and the data will become more and more orderly. At this time, using insert sorting to sort the interval can improve efficiency

void quick_sort(vector<int>& vec, int begin, int end) {
	if (begin >= end) {
		return;
	}
	// COUNT is the data size
	if(end - begin < COUNT / 10){
		insert_sort(vec, begin, end);
		return;
	}
	int pos = partition(vec, begin, end);
	quick_sort(vec, begin, pos-1);
	quick_sort(vec, pos+1, end);
}

Optimization 2: triple median method

void right_order(const vector<int>& vec, int& small, int& mid, int& big) {
	int tmp;
	if (vec[small] > vec[mid]) { 
		tmp = small; 
		small = mid;
		mid = tmp; 
	}

	if (vec[small] > vec[big]) {
		tmp = small;
		small = big;
		big = tmp; 
	}

	if (vec[mid] > vec[big]) {
		tmp = mid; 
		mid = big; 
		big = tmp; 
	}
}

int partition(vector<int>& vec, int l, int r) {
	int small = l;
	int mid = (l + r) >> 1;
	int big = r;

	// After execution, VEC [small] < VEC [mid] < VEC [big]
	right_order(vec, small, mid, big); 

	int val = vec[mid];
	if (l != mid) {
		int tmp = vec[l];
		vec[l] = vec[mid];
		vec[mid] = tmp;
	}

	while (l < r) {
		while (l < r && vec[r] > val) {
			r--;
		}
		if (l < r) {
			vec[l] = vec[r];
			l++;
		}
		while (l < r && vec[l] < val) {
			l++;
		}
		if (l < r) {
			vec[r] = vec[l];
			r--;
		}
	}
	// Here l == r, the loop exits
	vec[l] = val;
	return l;
}

// This is a recursive function. Its main function is to make the data in the interval [begin, end] orderly
void quick_sort(vector<int>& vec, int begin, int end) {
	// Write end condition first
	if (begin >= end) {
		return;
	}
	// This operation is to let the first element in the [begin, end] interval find its ordered position pos
	// First division
	int pos = partition(vec, begin, end);
	quick_sort(vec, begin, pos-1);
	quick_sort(vec, pos+1, end);
}

In the process of delivery, it is divided, and nothing is done in the process of return

7, Merge sort


// Merge [begin, mid], [mid+1, end] in vec in order
void merge(vector<int>& vec, int l, int mid, int r) {
	int* tmp = new int[r - l + 1]();
	int i = l;        // Index of left subsequence
	int j = mid + 1;  // Index of right subsequence
	int idx = 0;      // Index of the merged sequence
	while (i <= mid && j <= r) {
		if (vec[i] <= vec[j]) {
			tmp[idx++] = vec[i++];
		}
		else {
			tmp[idx++] = vec[j++];
		}
	}
	while (i <= mid) {
		tmp[idx++] = vec[i++];
	}
	while (j <= r) {
		tmp[idx++] = vec[j++];
	}
	for (i = l, j = 0; i <= r; i++, j++) {
		vec[i] = tmp[j];
	}
	delete[] tmp;
}

// Make the elements in the vec interval [begin, end] orderly
void merge_sort(vector<int>& vec, int begin, int end) {
	if (begin >= end) {
		return;
	}
	int mid = (begin + end) >> 1;
	
	// Then merge and sort [begin, mid], [mid+1, end] respectively
	// This is a recursive process, and there is no sorting
	merge_sort(vec, begin, mid);
	merge_sort(vec, mid+1, end);

	// Merging process
	merge(vec, begin, mid, end);
}

Best and worst time complexity: it is a perfectly balanced binary tree. The depth of the tree is O (log n), and each layer is O(n), O(nlogn)
Space complexity: refers to the function stack frame O(logn) opened during delivery and the additional space O(n) during merging, taking the larger O(n)

Do nothing in the process of delivery, and merge and sort in the process of return

8, Pile

1. Concept of binary reactor


The last non leaf node calculation formula: (n-1)/2

2. Reactor adjustment

Stacking: stacking can only be conducted from the bottom of the reactor, and then the reactor can be adjusted

Out of the stack: out of the stack can only be out of the top of the stack, and then the reactor can be adjusted

After extracting elements from the top of the heap, first put the elements at the bottom of the heap to the top of the heap, and then sink: continuously raise the child nodes with large values and sink until there are no child nodes (that is, the current subscript is greater than (n-1)/2)

The time complexity of heap adjustment is the height of the tree: O(logn)

3. Implement priority queue with heap

class PriorityQueue {
public:
	// Define function object type
	using Comp = function<bool(int, int)>;
	PriorityQueue(int cap = 20, Comp comp = greater<int>())
		:_size(0)
		, _cap(cap)
		, _comp(comp) {
		_queue = new int[_cap];
	}
	PriorityQueue(Comp comp)
		:_size(0)
		, _cap(20)
		, _comp(comp) {
		_queue = new int[_cap];
	}
	~PriorityQueue(){
		delete[] _queue;
	}

public:
	bool empty() const{
		return _size == 0;
	}

	int top() const {
		if (empty()) {
			throw "queue is empty!";
		}
		return _queue[0];
	}

	int size() const {
		return _size;
	}

	// push pop top size
	void push(int val) {
		if (_size == _cap) {
			expand();
		}
		if (_size == 0) {
			_queue[_size] = val;
		}
		else {
			sift_up(_size, val); // From the bottom of the pile
		}
		_size++;
	}

	void pop() {
		if (empty()) {
			throw "queue is empty!";
		}
		_size--;
		// There is only one element in the heap, so it doesn't matter when you get out of the heap
		// There are still elements after the heap is removed. It is necessary to continue to adjust the heap
		if (_size > 0) {
			sift_down(0, _queue[_size]); // Put the last node in position 0, then adjust it.
		}
	}

private:
	void expand() {
		const int time = 2;
		int* tmp = new int[_cap * time];
		memcpy(tmp, _queue, _cap * sizeof(int));  // Copy byte by byte. memcpy is not available for objects with shallow copy errors
		delete _queue;
		_queue = tmp;
		_cap *= time;
	}
	
	// Node floating up: floating up the node in cur position with value val
	void sift_up(int cur, int val) {
		while (cur > 0) {
			int father = (cur - 1) >> 1;
			if (_comp(val , _queue[father])) {
				_queue[cur] = _queue[father];   // The parent node is pulled down
				cur = father;                   // Update the location at this time
			}
			else {
				break;
			}
		}
		_queue[cur] = val;
	}

	// Sink the node. Sink the node whose cur position value is val
	void sift_down(int cur, int val) {
		// The current node cur cannot exceed the last internal node
		while (cur <= (_size - 1) / 2) {
			int max_child = 2 * cur + 1; // Suppose the left child is older
			// There is a right child, and the right child is older
			if (2 * cur + 2 <= _size && _comp(_queue[2 * cur + 2], _queue[max_child])) {
				max_child = 2 * cur + 2;
			}
			// After selecting the older child, if the older child is greater than the current val, exchange
			if (_comp(_queue[max_child], val)) {
				_queue[cur] = _queue[max_child];
				cur = max_child;
			}
			else {
				// Otherwise exit
				break;
			}
		}
		_queue[cur] = val;
	}

private:
	int* _queue;
	int _size;
	int _cap;
	Comp _comp;
};

int main(){
	const int COUNT = 10;
	srand(time(nullptr));
	
	PriorityQueue q([](int a, int b)->bool {return a < b; });
	for (int i = 0; i < COUNT; i++) {
		int val = rand() % 100;
		q.push(val);
		cout << val << " ";
	}
	cout << endl;

	while (!q.empty()) {
		cout << q.top() << " ";
		q.pop();
	}
	cout << endl;
	
	return 0;
}

Writing 1:

PriorityQueue q(less<int>()); // The writing method is not regarded as a construction object in VS. VS considers it a function declaration
cout<<typeid(q).name();  // class PriorityQueue __cdecl(struct std::less<int> (__cdecl*)(void))

PriorityQueue fun(int a);
cout << typeid(fun).name(); // class PriorityQueue __cdecl(int)

PriorityQueue q1(std::move(less<int>())); // Forcibly modify to the right value, and the program can be executed normally

Using less type display to generate a temporary object is passed as an argument and processed as a function pointer type. VS thinks this is a function declaration rather than constructing an object, which belongs to a compiler parsing error.

It should have been an R-value. It is not regarded as an R-value here

Note: it's best not to display and construct temporary objects and pass them as arguments

Writing 2:

function<bool<int, int>> comp = less<int, int>();
PriorityQueue q(comp); // No error reporting

Writing 3:

PriorityQueue q([](int a, int b){return a < b;}); // No error reporting

4. Heap sorting

Start the heap adjustment from the last internal node, so that node 0 to the first internal node (n-1)/2 all meet the heap nature, and N represents the index of the last node

If the top element of the heap is exchanged with the last element, the sorting task of one element is completed


In the next sort, one less element is considered

// Make the nodes with cur index meet the heap nature, and the number of elements to be sorted in the current container is size
void sift_down(vector<int>& vec, int cur, int size) {
	int val = vec[cur];
	int n = size - 1;   // Subscript of the last node
	while (cur <= (n - 1) >> 1) {
		// Find the older child first
		int max_child = 2 * cur + 1;
		if (2 * cur + 2 <= n && vec[2 * cur + 2] > vec[max_child]) {
			max_child = 2 * cur + 2;
		}
		if (vec[max_child] > val) {
			vec[cur] = vec[max_child];
			cur = max_child;
		}
		else {
			// The nature of the pile has been met and the sinking process has been exited
			break;
		}
	}
	vec[cur] = val;
}

void heap_sort(vector<int>& vec) {
	// First, heap and adjust the heap from the last internal node
	int n = vec.size() - 1;  // Subscript of the last node
	for (int i = (n - 1) >> 1; i >= 0; i--) {
		sift_down(vec, i, vec.size()); // O(logn)
	}
	// After heap, exchange the position of the top element and the last element each time, and sift the top element_ Down to make the heap top elements meet the heap properties
	// i indicates the number of elements to be sorted in the current container
	for (int i = vec.size(); i > 1; i--) {//O(logn)
		// Exchange with the last element
		int tmp = vec[0];
		vec[0] = vec[i-1];
		vec[i-1] = tmp;
		sift_down(vec, 0, i-1);  // 0 means that only the heap top element needs to be sifted at a time_ down
	}
}

Unstable, only when two identical elements can establish a comparative relationship can there be a chance of stability.
In heap sorting, if the same value is in different subtrees, the two values cannot be compared and are unstable


Summary:

When the amount of data is large and uniform, fast sort > merge sort > Hill sort > heap sort

  1. Merge sorting is slower than fast sorting. You need to put the merged data in the temporary array and finally copy it to the original array
  2. No matter fast sorting or merge sorting, the array is accessed in order when traversing, which is friendly to CPU cache (locality principle), but heap sorting is not sequential access, which does not comply with locality principle, so sorting is slow
  3. During each sinking adjustment, the elements at the bottom of the pile and the elements at the top of the pile need to be exchanged. Because the elements at the bottom of the pile are relatively small, the sinking operation needs to be carried out many times before it can be re adjusted into a pile, which takes time

9, sort in STL

Partial source code

template <class _RanIt, class _Pr>
_CONSTEXPR20 void sort(const _RanIt _First, const _RanIt _Last, _Pr _Pred) { // order [_First, _Last)
    _Adl_verify_range(_First, _Last);
    const auto _UFirst = _Get_unwrapped(_First);
    const auto _ULast  = _Get_unwrapped(_Last);
    _Sort_unchecked(_UFirst, _ULast, _ULast - _UFirst, _Pass_fn(_Pred));
}
template <class _RanIt, class _Pr>
_CONSTEXPR20 void _Sort_unchecked(_RanIt _First, _RanIt _Last, _Iter_diff_t<_RanIt> _Ideal, _Pr _Pred) {
    // order [_First, _Last)
    for (;;) {
    	// As the fast row goes on, the elements tend to be orderly
  	  	// If the interval is not greater than_ ISORT_MAX = 32, go to insert sort
        if (_Last - _First <= _ISORT_MAX) { // small
            _Insertion_sort_unchecked(_First, _Last, _Pred);
            return;
        }
		// _ Ideal indicates the number of elements in the current interval
		// Every time you recurse, you let_ Ideal zoom out
		// To avoid recursion too deep, when_ When idea < = 0, stop recursion and adopt heap sorting with stable time complexity of O(log2n)
        if (_Ideal <= 0) { // heap sort if too many divisions
            _Make_heap_unchecked(_First, _Last, _Pred);
            _Sort_heap_unchecked(_First, _Last, _Pred);
            return;
        }

        // divide and conquer by quicksort
        auto _Mid = _Partition_by_median_guess_unchecked(_First, _Last, _Pred);

        _Ideal = (_Ideal >> 1) + (_Ideal >> 2); // allow 1.5 log2(N) divisions

        if (_Mid.first - _First < _Last - _Mid.second) { // loop on second half
            _Sort_unchecked(_First, _Mid.first, _Ideal, _Pred);
            _First = _Mid.second;
        } else { // loop on first half
            _Sort_unchecked(_Mid.second, _Last, _Ideal, _Pred);
            _Last = _Mid.first;
        }
    }
}
  1. Quick sort is not a stable O(log2n), and the worst case will become O(n^2). You can improve the deterioration by changing the insertion sort or three data retrieval method
  2. Refer to the source code of sort in STL, and the variable can be set_ Idea controls the recursion depth
  3. Too deep recursion may lead to excessive function call overhead, even stack overflow and program crash
  4. If the recursion is about to overflow and not finished, the heap sort with stable time complexity is selected in sort


Bubbling will lead to too many times of comparison and exchange, and many times of comparison for sorting selection. In the case of basic reverse order, the time complexity of insertion sorting and fast sorting is O(n^2). If you do not understand the characteristics of the sequence, choose a stable heap sorting O(log2n)


Investigate the space complexity and merge and sort O(n). At this time, disk IO must be used, which is very inefficient. The space complexity of quick sort recursion is O(log2n)


External sorting algorithm process:

  1. Create 11 txt files and write 1024M/11 data respectively. At this time, the data of each file is less than 100M
  2. The data of 11 files are read into the memory for sorting, and then written back to the hard disk. At this time, the data of 11 files are in order
  3. Read one number from each of the 11 files, select the smallest one to write to the final file according to the idea of merging, and read another number from the file corresponding to the written number. This cycle continues until all data sorting is completed

10, Cardinality sort


void radix_sort(vector<int>& vec) {
	if (vec.size() < 2) {
		return;
	}
	vector<vector<int>> buckets(10);
	// int max_num = *max_element(vec.begin(), vec.end());
	int max_num = vec[0];
	for (int i = 1; i < vec.size(); i++) {
		if (vec[i] > max_num) {
			max_num = vec[i];
		}
	}

	int len = to_string(max_num).size();
	int mod = 10; // Take out the remaining values after taking the mold first
	int dev = 1;  // Then take out the highest position

	for (int i = 0; i < len; i++, mod *= 10, dev *= 10) {
		for (int j = 0; j < vec.size(); j++) {
			// Put the corresponding bucket according to each bit
			int index = vec[j] % mod / dev;
			buckets[index].push_back(vec[j]);
		}
		// Take the elements from each bucket, put them back into the original array, and empty the bucket
		int cur = 0;
		for (int j = 0; j < 10; j++) {
			for (int v : buckets[j]) {
				vec[cur++] = v;
			}
			buckets[j].clear();
		}
	}
}

Disadvantages: negative numbers cannot be processed because they are indexed by values

void radix_sort(vector<int>& vec) {
	if (vec.size() < 2) {
		return;
	}
	// Generate 20 barrels, put negative numbers in barrels 1-9 and positive numbers in barrels 10-19
	int bucket_num = 20;
	vector<vector<int>> buckets(bucket_num);
	int longest_num = abs(vec[0]);
	for (int i = 1; i < vec.size(); i++) {
		if (abs(vec[i]) > longest_num) {
			longest_num = abs(vec[i]);
		}
	}

	int len = to_string(longest_num).size();
	int mod = 10; // Take out the remaining values after taking the mold first
	int dev = 1;  // Then take out the highest position

	for (int i = 0; i < len; i++, mod *= 10, dev *= 10) {
		for (int j = 0; j < vec.size(); j++) {
			// Put the corresponding bucket according to each bit
			int index = vec[j] % mod / dev + 10;
			buckets[index].push_back(vec[j]);
		}
		// Take the elements from each bucket, put them back into the original array, and empty the bucket
		int cur = 0;
		for (int j = 0; j < bucket_num; j++) {
			for (int v : buckets[j]) {
				vec[cur++] = v;
			}
			buckets[j].clear();
		}
	}
}


d represents the number of bits of data, and the spatial complexity is mainly related to the bucket

Tags: Algorithm data structure

Posted on Fri, 22 Oct 2021 10:04:42 -0400 by DaveTomneyUK