test_sketches.py 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. """Tests for _sketches.py."""
  2. from __future__ import division, print_function, absolute_import
  3. import numpy as np
  4. from scipy.linalg import clarkson_woodruff_transform
  5. from numpy.testing import assert_
  6. def make_random_dense_gaussian_matrix(n_rows, n_columns, mu=0, sigma=0.01):
  7. """
  8. Make some random data with Gaussian distributed values
  9. """
  10. np.random.seed(142352345)
  11. res = np.random.normal(mu, sigma, n_rows*n_columns)
  12. return np.reshape(res, (n_rows, n_columns))
  13. class TestClarksonWoodruffTransform(object):
  14. """
  15. Testing the Clarkson Woodruff Transform
  16. """
  17. # Big dense matrix dimensions
  18. n_matrix_rows = 2000
  19. n_matrix_columns = 100
  20. # Sketch matrix dimensions
  21. n_sketch_rows = 100
  22. # Error threshold
  23. threshold = 0.1
  24. dense_big_matrix = make_random_dense_gaussian_matrix(n_matrix_rows,
  25. n_matrix_columns)
  26. def test_sketch_dimensions(self):
  27. sketch = clarkson_woodruff_transform(self.dense_big_matrix,
  28. self.n_sketch_rows)
  29. assert_(sketch.shape == (self.n_sketch_rows,
  30. self.dense_big_matrix.shape[1]))
  31. def test_sketch_rows_norm(self):
  32. # Given the probabilistic nature of the sketches
  33. # we run the 'test' multiple times and check that
  34. # we pass all/almost all the tries
  35. n_errors = 0
  36. seeds = [1755490010, 934377150, 1391612830, 1752708722, 2008891431,
  37. 1302443994, 1521083269, 1501189312, 1126232505, 1533465685]
  38. for seed_ in seeds:
  39. sketch = clarkson_woodruff_transform(self.dense_big_matrix,
  40. self.n_sketch_rows, seed_)
  41. # We could use other norms (like L2)
  42. err = np.linalg.norm(self.dense_big_matrix) - np.linalg.norm(sketch)
  43. if err > self.threshold:
  44. n_errors += 1
  45. assert_(n_errors == 0)