Introduction
Process pools create separate Python processes, bypassing the GIL for true parallelism. ProcessPoolExecutor from concurrent.futures provides this capability.
ProcessPoolExecutor Basics
from concurrent.futures import ProcessPoolExecutor
import math
def calculate_prime(n):
is_prime = True
for i in range(2, int(math.sqrt(n)) + 1):
if n % i == 0:
is_prime = False
break
return n, is_prime
numbers = [1000003, 1000033, 1000037, 1000039, 1000081]
with ProcessPoolExecutor(max_workers=4) as executor:
results = list(executor.map(calculate_prime, numbers))
print(results)
Shared Memory
import multiprocessing
from multiprocessing import Value, Array, shared_memory
# Use Value/Array for shared data
counter = Value("i", 0)
def increment(counter):
with counter.get_lock():
counter.value += 1
with multiprocessing.Pool(4) as pool:
pool.map(increment, [counter] * 10)
print(counter.value)
Array and Shared Memory
import multiprocessing
def process_array(arr):
return sum(arr)
# Shared array
shared_arr = Array("i", [1, 2, 3, 4, 5])
with multiprocessing.Pool(2) as pool:
chunks = [shared_arr[i:i+3] for i in range(0, len(shared_arr), 3)]
results = pool.map(process_array, chunks)
print(results)
ProcessPoolExecutor vs ThreadPoolExecutor
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
import time
def cpu_bound(n):
return sum(i**2 for i in range(10**6))
# ThreadPoolExecutor - limited by GIL
with ThreadPoolExecutor(4) as executor:
start = time.time()
list(executor.map(cpu_bound, range(4)))
print(f"Threads: {time.time() - start:.2f}s")
# ProcessPoolExecutor - bypasses GIL
with ProcessPoolExecutor(4) as executor:
start = time.time()
list(executor.map(cpu_bound, range(4)))
print(f"Processes: {time.time() - start:.2f}s")
Passing Data to Processes
from concurrent.futures import ProcessPoolExecutor
import pickle
class LargeObject:
pass
def worker(args):
obj, data = args
return process(data)
# Objects are pickled when passed
with ProcessPoolExecutor() as executor:
obj = LargeObject()
executor.map(worker, [(obj, data) for data in range(10)])
Practice Problems
- Use ProcessPoolExecutor for CPU-bound tasks
- Share memory between processes
- Compare with ThreadPoolExecutor
- Handle large objects in pool
- Implement parallel data processing