123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282 |
- # Hungarian algorithm (Kuhn-Munkres) for solving the linear sum assignment
- # problem. Taken from scikit-learn. Based on original code by Brian Clapper,
- # adapted to NumPy by Gael Varoquaux.
- # Further improvements by Ben Root, Vlad Niculae and Lars Buitinck.
- #
- # Copyright (c) 2008 Brian M. Clapper <bmc@clapper.org>, Gael Varoquaux
- # Author: Brian M. Clapper, Gael Varoquaux
- # License: 3-clause BSD
- import numpy as np
- def linear_sum_assignment(cost_matrix):
- """Solve the linear sum assignment problem.
- The linear sum assignment problem is also known as minimum weight matching
- in bipartite graphs. A problem instance is described by a matrix C, where
- each C[i,j] is the cost of matching vertex i of the first partite set
- (a "worker") and vertex j of the second set (a "job"). The goal is to find
- a complete assignment of workers to jobs of minimal cost.
- Formally, let X be a boolean matrix where :math:`X[i,j] = 1` iff row i is
- assigned to column j. Then the optimal assignment has cost
- .. math::
- \\min \\sum_i \\sum_j C_{i,j} X_{i,j}
- s.t. each row is assignment to at most one column, and each column to at
- most one row.
- This function can also solve a generalization of the classic assignment
- problem where the cost matrix is rectangular. If it has more rows than
- columns, then not every row needs to be assigned to a column, and vice
- versa.
- The method used is the Hungarian algorithm, also known as the Munkres or
- Kuhn-Munkres algorithm.
- Parameters
- ----------
- cost_matrix : array
- The cost matrix of the bipartite graph.
- Returns
- -------
- row_ind, col_ind : array
- An array of row indices and one of corresponding column indices giving
- the optimal assignment. The cost of the assignment can be computed
- as ``cost_matrix[row_ind, col_ind].sum()``. The row indices will be
- sorted; in the case of a square cost matrix they will be equal to
- ``numpy.arange(cost_matrix.shape[0])``.
- Notes
- -----
- .. versionadded:: 0.17.0
- Examples
- --------
- >>> cost = np.array([[4, 1, 3], [2, 0, 5], [3, 2, 2]])
- >>> from scipy.optimize import linear_sum_assignment
- >>> row_ind, col_ind = linear_sum_assignment(cost)
- >>> col_ind
- array([1, 0, 2])
- >>> cost[row_ind, col_ind].sum()
- 5
- References
- ----------
- 1. http://csclab.murraystate.edu/bob.pilgrim/445/munkres.html
- 2. Harold W. Kuhn. The Hungarian Method for the assignment problem.
- *Naval Research Logistics Quarterly*, 2:83-97, 1955.
- 3. Harold W. Kuhn. Variants of the Hungarian method for assignment
- problems. *Naval Research Logistics Quarterly*, 3: 253-258, 1956.
- 4. Munkres, J. Algorithms for the Assignment and Transportation Problems.
- *J. SIAM*, 5(1):32-38, March, 1957.
- 5. https://en.wikipedia.org/wiki/Hungarian_algorithm
- """
- cost_matrix = np.asarray(cost_matrix)
- if len(cost_matrix.shape) != 2:
- raise ValueError("expected a matrix (2-d array), got a %r array"
- % (cost_matrix.shape,))
- if not (np.issubdtype(cost_matrix.dtype, np.number) or
- cost_matrix.dtype == np.dtype(np.bool)):
- raise ValueError("expected a matrix containing numerical entries, got %s"
- % (cost_matrix.dtype,))
- if np.any(np.isinf(cost_matrix) | np.isnan(cost_matrix)):
- raise ValueError("matrix contains invalid numeric entries")
- if cost_matrix.dtype == np.dtype(np.bool):
- cost_matrix = cost_matrix.astype(np.int)
- # The algorithm expects more columns than rows in the cost matrix.
- if cost_matrix.shape[1] < cost_matrix.shape[0]:
- cost_matrix = cost_matrix.T
- transposed = True
- else:
- transposed = False
- state = _Hungary(cost_matrix)
- # No need to bother with assignments if one of the dimensions
- # of the cost matrix is zero-length.
- step = None if 0 in cost_matrix.shape else _step1
- while step is not None:
- step = step(state)
- if transposed:
- marked = state.marked.T
- else:
- marked = state.marked
- return np.nonzero(marked == 1)
- class _Hungary(object):
- """State of the Hungarian algorithm.
- Parameters
- ----------
- cost_matrix : 2D matrix
- The cost matrix. Must have shape[1] >= shape[0].
- """
- def __init__(self, cost_matrix):
- self.C = cost_matrix.copy()
- n, m = self.C.shape
- self.row_uncovered = np.ones(n, dtype=bool)
- self.col_uncovered = np.ones(m, dtype=bool)
- self.Z0_r = 0
- self.Z0_c = 0
- self.path = np.zeros((n + m, 2), dtype=int)
- self.marked = np.zeros((n, m), dtype=int)
- def _clear_covers(self):
- """Clear all covered matrix cells"""
- self.row_uncovered[:] = True
- self.col_uncovered[:] = True
- # Individual steps of the algorithm follow, as a state machine: they return
- # the next step to be taken (function to be called), if any.
- def _step1(state):
- """Steps 1 and 2 in the Wikipedia page."""
- # Step 1: For each row of the matrix, find the smallest element and
- # subtract it from every element in its row.
- state.C -= state.C.min(axis=1)[:, np.newaxis]
- # Step 2: Find a zero (Z) in the resulting matrix. If there is no
- # starred zero in its row or column, star Z. Repeat for each element
- # in the matrix.
- for i, j in zip(*np.nonzero(state.C == 0)):
- if state.col_uncovered[j] and state.row_uncovered[i]:
- state.marked[i, j] = 1
- state.col_uncovered[j] = False
- state.row_uncovered[i] = False
- state._clear_covers()
- return _step3
- def _step3(state):
- """
- Cover each column containing a starred zero. If n columns are covered,
- the starred zeros describe a complete set of unique assignments.
- In this case, Go to DONE, otherwise, Go to Step 4.
- """
- marked = (state.marked == 1)
- state.col_uncovered[np.any(marked, axis=0)] = False
- if marked.sum() < state.C.shape[0]:
- return _step4
- def _step4(state):
- """
- Find a noncovered zero and prime it. If there is no starred zero
- in the row containing this primed zero, Go to Step 5. Otherwise,
- cover this row and uncover the column containing the starred
- zero. Continue in this manner until there are no uncovered zeros
- left. Save the smallest uncovered value and Go to Step 6.
- """
- # We convert to int as numpy operations are faster on int
- C = (state.C == 0).astype(int)
- covered_C = C * state.row_uncovered[:, np.newaxis]
- covered_C *= np.asarray(state.col_uncovered, dtype=int)
- n = state.C.shape[0]
- m = state.C.shape[1]
- while True:
- # Find an uncovered zero
- row, col = np.unravel_index(np.argmax(covered_C), (n, m))
- if covered_C[row, col] == 0:
- return _step6
- else:
- state.marked[row, col] = 2
- # Find the first starred element in the row
- star_col = np.argmax(state.marked[row] == 1)
- if state.marked[row, star_col] != 1:
- # Could not find one
- state.Z0_r = row
- state.Z0_c = col
- return _step5
- else:
- col = star_col
- state.row_uncovered[row] = False
- state.col_uncovered[col] = True
- covered_C[:, col] = C[:, col] * (
- np.asarray(state.row_uncovered, dtype=int))
- covered_C[row] = 0
- def _step5(state):
- """
- Construct a series of alternating primed and starred zeros as follows.
- Let Z0 represent the uncovered primed zero found in Step 4.
- Let Z1 denote the starred zero in the column of Z0 (if any).
- Let Z2 denote the primed zero in the row of Z1 (there will always be one).
- Continue until the series terminates at a primed zero that has no starred
- zero in its column. Unstar each starred zero of the series, star each
- primed zero of the series, erase all primes and uncover every line in the
- matrix. Return to Step 3
- """
- count = 0
- path = state.path
- path[count, 0] = state.Z0_r
- path[count, 1] = state.Z0_c
- while True:
- # Find the first starred element in the col defined by
- # the path.
- row = np.argmax(state.marked[:, path[count, 1]] == 1)
- if state.marked[row, path[count, 1]] != 1:
- # Could not find one
- break
- else:
- count += 1
- path[count, 0] = row
- path[count, 1] = path[count - 1, 1]
- # Find the first prime element in the row defined by the
- # first path step
- col = np.argmax(state.marked[path[count, 0]] == 2)
- if state.marked[row, col] != 2:
- col = -1
- count += 1
- path[count, 0] = path[count - 1, 0]
- path[count, 1] = col
- # Convert paths
- for i in range(count + 1):
- if state.marked[path[i, 0], path[i, 1]] == 1:
- state.marked[path[i, 0], path[i, 1]] = 0
- else:
- state.marked[path[i, 0], path[i, 1]] = 1
- state._clear_covers()
- # Erase all prime markings
- state.marked[state.marked == 2] = 0
- return _step3
- def _step6(state):
- """
- Add the value found in Step 4 to every element of each covered row,
- and subtract it from every element of each uncovered column.
- Return to Step 4 without altering any stars, primes, or covered lines.
- """
- # the smallest uncovered value in the matrix
- if np.any(state.row_uncovered) and np.any(state.col_uncovered):
- minval = np.min(state.C[state.row_uncovered], axis=0)
- minval = np.min(minval[state.col_uncovered])
- state.C[~state.row_uncovered] += minval
- state.C[:, state.col_uncovered] -= minval
- return _step4
|