通过比较元素的大小来决定它们的顺序,时间复杂度通常下限为 $O(nlogn)$。
通过重复交换相邻的逆序元素,逐步将最大元素“冒泡”到末尾。
时间复杂度 $O(n^2)$
1
2
3
4
5
6
7
|
def bubble_sort(arr):
n = len(arr)
for i in range(n):
for j in range(0, n-i-1):
if arr[j] > arr[j+1]:
arr[j], arr[j+1] = arr[j+1], arr[j]
return arr
|
每次从未排序部分选择最小(或最大)元素放到已排序部分的末尾。
时间复杂度:$O(n^2)$
将未排序元素逐个插入到已排序部分的正确位置,适合小规模或部分有序数据。
时间复杂度:$O(n^2)$(最优情况:$O(n)$)
采用分治法,选择一个基准(pivot)将数组分为两部分,递归排序。
时间复杂度:$O(nlogn)$(最坏情况:$O(n^2)$)
1
2
3
4
5
6
7
8
|
def quick_sort(arr):
if len(arr) <= 1:
return arr
pivot = arr[len(arr)//2]
left = [x for x in arr if x < pivot]
middle = [x for x in arr if x == pivot]
right = [x for x in arr if x > pivot]
return quick_sort(left) + middle + quick_sort(right)
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
|
def merge_sort(arr):
if len(arr) <= 1:
return arr
mid = len(arr) // 2
left = merge_sort(arr[:mid])
right = merge_sort(arr[mid:])
return merge(left, right)
def merge(left, right):
result = []
i = j = 0
while i < len(left) and j < len(right):
if left[i] < right[j]:
result.append(left[i])
i += 1
else:
result.append(right[j])
j += 1
result.extend(left[i:])
result.extend(right[j:])
return result
|
利用二叉堆(通常是大顶堆)的性质,每次取出堆顶元素。
时间复杂度:$O(nlogn)$,原地排序但不稳定。
插入排序的改进版,通过分组间隔逐步缩小实现部分有序。
时间复杂度:取决于间隔序列(通常优于$O(n^2)$)
冒泡排序的改进版,通过较大间隔的比较减少逆序对。
1
2
3
4
5
6
7
8
9
10
11
12
|
def counting_sort(arr):
max_val = max(arr)
count = [0] * (max_val + 1)
for num in arr:
count[num] += 1
sorted_arr = []
for i in range(len(count)):
sorted_arr.extend([i] * count[i])
return sorted_arr
|
将数据分到有限数量的桶中,对每个桶单独排序后合并。
时间复杂度:$O(n+k)$(桶数量合理时接近线性)。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
|
def radix_sort(arr):
max_num = max(arr)
exp = 1
while max_num // exp > 0:
counting_sort_by_digit(arr, exp)
exp *= 10
return arr
def counting_sort_by_digit(arr, exp):
n = len(arr)
output = [0] * n
count = [0] * 10
for i in range(n):
index = arr[i] // exp
count[index % 10] += 1
for i in range(1, 10):
count[i] += count[i-1]
i = n - 1
while i >= 0:
index = arr[i] // exp
output[count[index % 10] - 1] = arr[i]
count[index % 10] -= 1
i -= 1
for i in range(n):
arr[i] = output[i]
|
1
2
3
4
5
|
def linear_search(arr, target):
for i in range(len(arr)):
if arr[i] == target:
return i
return -1
|
1
2
3
4
5
6
7
8
9
10
11
|
def binary_search(arr, target):
low, high = 0, len(arr) - 1
while low <= high:
mid = (low + high) // 2
if arr[mid] == target:
return mid
elif arr[mid] < target:
low = mid + 1
else:
high = mid - 1
return -1
|
Prim算法
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
|
import heapq
def prim(graph, start):
mst = []
visited = set([start])
edges = [
(cost, start, to)
for to, cost in graph[start].items()
]
heapq.heapify(edges)
while edges:
cost, frm, to = heapq.heappop(edges)
if to not in visited:
visited.add(to)
mst.append((frm, to, cost))
for to_next, cost in graph[to].items():
if to_next not in visited:
heapq.heappush(edges, (cost, to, to_next))
return mst
|
Dijkstra算法
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
|
import heapq
def dijkstra(graph, start):
distances = {vertex: float('infinity') for vertex in graph}
distances[start] = 0
pq = [(0, start)]
while pq:
current_distance, current_vertex = heapq.heappop(pq)
if current_distance > distances[current_vertex]:
continue
for neighbor, weight in graph[current_vertex].items():
distance = current_distance + weight
if distance < distances[neighbor]:
distances[neighbor] = distance
heapq.heappush(pq, (distance, neighbor))
return distances
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
|
def kmp_search(pattern, text):
def compute_lps(pattern):
lps = [0] * len(pattern)
length = 0
i = 1
while i < len(pattern):
if pattern[i] == pattern[length]:
length += 1
lps[i] = length
i += 1
else:
if length != 0:
length = lps[length-1]
else:
lps[i] = 0
i += 1
return lps
lps = compute_lps(pattern)
i = j = 0
while i < len(text):
if pattern[j] == text[i]:
i += 1
j += 1
if j == len(pattern):
return i - j
else:
if j != 0:
j = lps[j-1]
else:
i += 1
return -1
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
|
def rabin_karp(text, pattern):
d = 256 # 字符集大小
q = 101 # 质数
m = len(pattern)
n = len(text)
p = t = 0
h = 1
for i in range(m-1):
h = (h * d) % q
for i in range(m):
p = (d * p + ord(pattern[i])) % q
t = (d * t + ord(text[i])) % q
for i in range(n - m + 1):
if p == t:
if text[i:i+m] == pattern:
return i
if i < n - m:
t = (d * (t - ord(text[i]) * h) + ord(text[i + m])) % q
if t < 0:
t += q
return -1
|
...
...
...
...
...
1
2
3
4
5
6
7
8
9
10
11
12
|
def knapsack(weights, values, capacity):
n = len(weights)
dp = [[0] * (capacity + 1) for _ in range(n + 1)]
for i in range(1, n + 1):
for w in range(1, capacity + 1):
if weights[i-1] <= w:
dp[i][w] = max(values[i-1] + dp[i-1][w-weights[i-1]], dp[i-1][w])
else:
dp[i][w] = dp[i-1][w]
return dp[n][capacity]
|
...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
|
from collections import Counter
import math
def knn(data, query, k):
distances = []
for index, (features, label) in enumerate(data):
distance = 0
for i in range(len(features)):
distance += (features[i] - query[i]) ** 2
distance = math.sqrt(distance)
distances.append((distance, index))
distances = sorted(distances)
k_nearest = distances[:k]
k_labels = [data[i][1] for (_, i) in k_nearest]
return Counter(k_labels).most_common(1)[0][0]
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
|
import numpy as np
class LinearRegression:
def __init__(self, learning_rate=0.01, n_iterations=1000):
self.learning_rate = learning_rate
self.n_iterations = n_iterations
self.weights = None
self.bias = None
def fit(self, X, y):
n_samples, n_features = X.shape
self.weights = np.zeros(n_features)
self.bias = 0
for _ in range(self.n_iterations):
y_pred = np.dot(X, self.weights) + self.bias
dw = (1/n_samples) * np.dot(X.T, (y_pred - y))
db = (1/n_samples) * np.sum(y_pred - y)
self.weights -= self.learning_rate * dw
self.bias -= self.learning_rate * db
def predict(self, X):
return np.dot(X, self.weights) + self.bias
|