-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathstarter_code.py
More file actions
290 lines (214 loc) · 10.4 KB
/
starter_code.py
File metadata and controls
290 lines (214 loc) · 10.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
"""
Search Assignment Starter Code
Implement three search algorithms and benchmark their performance.
"""
import json
import time
import random
# ============================================================================
# PART 1: Linear Search
# ============================================================================
def linear_search(data, target):
"""
Search for target in data using linear search.
Linear search checks each element sequentially until finding the target
or reaching the end of the list.
Args:
data (list): List to search (can be sorted or unsorted)
target: Item to find
Returns:
int: Index of target if found, -1 if not found
Time Complexity: O(n) - must check up to n elements
Space Complexity: O(1) - uses constant extra space
Example:
linear_search([5, 2, 8, 1, 9], 8) returns 2
linear_search([5, 2, 8, 1, 9], 7) returns -1
"""
# TODO: Implement linear search that loops through each element and returns its index if found and -1 if not found.
pass # Delete pass and write your code here
# ============================================================================
# PART 2: Binary Search (Iterative)
# ============================================================================
def binary_search_iterative(data, target):
"""
Search for target in SORTED data using iterative binary search.
Binary search repeatedly divides the search space in half by comparing
the target to the middle element.
Args:
data (list): SORTED list to search
target: Item to find
Returns:
int: Index of target if found, -1 if not found
Time Complexity: O(log n) - divides search space in half each iteration
Space Complexity: O(1) - uses constant extra space
IMPORTANT: This only works on SORTED data!
Example:
binary_search_iterative([1, 2, 5, 8, 9], 8) returns 3
binary_search_iterative([1, 2, 5, 8, 9], 7) returns -1
"""
# TODO: Implement iterative binary search that uses iteration to find the target. Return the index if found and -1 if not found.
pass # Delete pass and write your code here
# ============================================================================
# PART 3: Binary Search (Recursive)
# ============================================================================
def binary_search_recursive(data, target, left=None, right=None):
"""
Search for target in SORTED data using recursive binary search.
This is the recursive version of binary search, which naturally expresses
the divide-and-conquer approach.
Args:
data (list): SORTED list to search
target: Item to find
left (int): Left boundary of search space (defaults to 0)
right (int): Right boundary of search space (defaults to len(data)-1)
Returns:
int: Index of target if found, -1 if not found
Time Complexity: O(log n)
Space Complexity: O(log n) - recursion call stack
Example:
binary_search_recursive([1, 2, 5, 8, 9], 8) returns 3
"""
# Handle default parameters on first call
if left is None:
left = 0
if right is None:
right = len(data) - 1
# TODO: Implement recursive binary search that uses recursion to find the target. Return the index if found and -1 if not found. Note that default parameters are already handled above.
pass # Delete pass and write your code here
# ============================================================================
# BENCHMARKING & TESTING
# ============================================================================
def load_dataset(filename):
"""Load a dataset from JSON file."""
with open(f"datasets/{filename}", "r") as f:
return json.load(f)
def load_test_cases():
"""Load test cases for validation."""
with open("datasets/test_cases.json", "r") as f:
return json.load(f)
def test_search_correctness():
"""Test that search functions work correctly."""
print("="*70)
print("TESTING SEARCH CORRECTNESS")
print("="*70 + "\n")
# Simple test data
sorted_data = [1, 3, 5, 7, 9, 11, 13, 15]
unsorted_data = [7, 2, 9, 1, 5, 13, 3, 11]
print("Test 1: Linear search on unsorted data")
result = linear_search(unsorted_data, 9)
print(f" linear_search({unsorted_data}, 9) = {result}")
print(f" Expected: 2, Got: {result}, {'✓ PASS' if result == 2 else '✗ FAIL'}")
print("\nTest 2: Linear search - item not found")
result = linear_search(unsorted_data, 99)
print(f" linear_search({unsorted_data}, 99) = {result}")
print(f" Expected: -1, Got: {result}, {'✓ PASS' if result == -1 else '✗ FAIL'}")
print("\nTest 3: Binary search iterative on sorted data")
result = binary_search_iterative(sorted_data, 9)
print(f" binary_search_iterative({sorted_data}, 9) = {result}")
print(f" Expected: 4, Got: {result}, {'✓ PASS' if result == 4 else '✗ FAIL'}")
print("\nTest 4: Binary search iterative - item not found")
result = binary_search_iterative(sorted_data, 10)
print(f" binary_search_iterative({sorted_data}, 10) = {result}")
print(f" Expected: -1, Got: {result}, {'✓ PASS' if result == -1 else '✗ FAIL'}")
print("\nTest 5: Binary search recursive on sorted data")
result = binary_search_recursive(sorted_data, 13)
print(f" binary_search_recursive({sorted_data}, 13) = {result}")
print(f" Expected: 6, Got: {result}, {'✓ PASS' if result == 6 else '✗ FAIL'}")
print("\nTest 6: Binary search recursive - item not found")
result = binary_search_recursive(sorted_data, 8)
print(f" binary_search_recursive({sorted_data}, 8) = {result}")
print(f" Expected: -1, Got: {result}, {'✓ PASS' if result == -1 else '✗ FAIL'}")
def benchmark_algorithm(search_func, data, targets):
"""
Benchmark a search algorithm on given data with multiple targets.
Args:
search_func: The search function to test
data: The dataset to search
targets: List of items to search for
Returns:
float: Average time per search in seconds
"""
start = time.time()
for target in targets:
search_func(data, target)
end = time.time()
return (end - start) / len(targets)
def benchmark_all_datasets():
"""Benchmark all search algorithms on all datasets."""
print("\n" + "="*70)
print("BENCHMARKING SEARCH ALGORITHMS")
print("="*70 + "\n")
datasets = {
"customer_ids.json": "Unsorted Customer IDs (100K)",
"product_catalog.json": "Pre-sorted Product Catalog (50K)",
"config_settings.json": "Small Config Settings (500)",
"dictionary_words.json": "Dictionary Words (10K)"
}
test_cases = load_test_cases()
for filename, description in datasets.items():
print(f"Dataset: {description}")
print("-" * 70)
data = load_dataset(filename)
dataset_key = filename.replace(".json", "")
# Get targets to search for (mix of present and absent)
targets = test_cases[dataset_key]["present"][:50] + test_cases[dataset_key]["absent"][:50]
random.shuffle(targets)
# Benchmark linear search
linear_time = benchmark_algorithm(linear_search, data, targets)
print(f" Linear Search: {linear_time*1000:.4f} ms per search")
# For unsorted data, sort it first for binary search
if "unsorted" in description.lower() or "small config" in description.lower():
sorted_data = sorted(data)
sort_start = time.time()
sorted(data)
sort_time = time.time() - sort_start
print(f" Time to sort data: {sort_time*1000:.2f} ms (one-time cost)")
else:
sorted_data = data
sort_time = 0
# Benchmark binary search iterative
binary_iter_time = benchmark_algorithm(binary_search_iterative, sorted_data, targets)
print(f" Binary Search (Iterative): {binary_iter_time*1000:.4f} ms per search")
# Benchmark binary search recursive
binary_rec_time = benchmark_algorithm(binary_search_recursive, sorted_data, targets)
print(f" Binary Search (Recursive): {binary_rec_time*1000:.4f} ms per search")
# Calculate speedup
if binary_iter_time > 0:
speedup = linear_time / binary_iter_time
print(f" Binary speedup: {speedup:.2f}x faster than linear")
print()
def analyze_preprocessing_costs():
"""Analyze when sorting overhead is worth it."""
print("="*70)
print("PREPROCESSING COST ANALYSIS")
print("="*70 + "\n")
# Use customer IDs dataset (unsorted, large)
data = load_dataset("customer_ids.json")
test_cases = load_test_cases()
targets = test_cases["customer_ids"]["present"][:100]
# Measure sort time
sort_start = time.time()
sorted_data = sorted(data)
sort_time = time.time() - sort_start
# Measure search times
linear_time = benchmark_algorithm(linear_search, data, targets[:10])
binary_time = benchmark_algorithm(binary_search_iterative, sorted_data, targets[:10])
print(f"Dataset: Customer IDs (100,000 unsorted entries)")
print(f"One-time sort cost: {sort_time*1000:.2f} ms")
print(f"Linear search time: {linear_time*1000:.4f} ms per search")
print(f"Binary search time: {binary_time*1000:.4f} ms per search")
print(f"Time saved per search: {(linear_time - binary_time)*1000:.4f} ms\n")
# Calculate break-even point
time_saved_per_search = linear_time - binary_time
if time_saved_per_search > 0:
searches_to_break_even = sort_time / time_saved_per_search
print(f"Break-even point: {int(searches_to_break_even)} searches")
print(f"After {int(searches_to_break_even)} searches, sorting + binary search becomes faster\n")
if __name__ == "__main__":
print("SEARCH ASSIGNMENT - STARTER CODE")
print("Implement the search functions above, then run tests.\n")
# Uncomment these as you complete each part:
# test_search_correctness()
# benchmark_all_datasets()
# analyze_preprocessing_costs()
print("\n⚠ Uncomment the test functions in the main block to run benchmarks!")