1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092 |
- #
- # Author: Damian Eads
- # Date: April 17, 2008
- #
- # Copyright (C) 2008 Damian Eads
- #
- # Redistribution and use in source and binary forms, with or without
- # modification, are permitted provided that the following conditions
- # are met:
- #
- # 1. Redistributions of source code must retain the above copyright
- # notice, this list of conditions and the following disclaimer.
- #
- # 2. Redistributions in binary form must reproduce the above
- # copyright notice, this list of conditions and the following
- # disclaimer in the documentation and/or other materials provided
- # with the distribution.
- #
- # 3. The name of the author may not be used to endorse or promote
- # products derived from this software without specific prior
- # written permission.
- #
- # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
- # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
- # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
- # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- from __future__ import division, print_function, absolute_import
- import os.path
- from functools import wraps, partial
- from scipy._lib.six import xrange, u
- import numpy as np
- import warnings
- from numpy.linalg import norm
- from numpy.testing import (verbose, assert_,
- assert_array_equal, assert_equal,
- assert_almost_equal, assert_allclose)
- import pytest
- from pytest import raises as assert_raises
- from scipy._lib._numpy_compat import suppress_warnings
- from scipy.spatial.distance import (squareform, pdist, cdist, num_obs_y,
- num_obs_dm, is_valid_dm, is_valid_y,
- _validate_vector, _METRICS_NAMES)
- # these were missing: chebyshev cityblock kulsinski
- from scipy.spatial.distance import (braycurtis, canberra, chebyshev, cityblock,
- correlation, cosine, dice, euclidean,
- hamming, jaccard, jensenshannon,
- kulsinski, mahalanobis, matching,
- minkowski, rogerstanimoto, russellrao,
- seuclidean, sokalmichener, sokalsneath,
- sqeuclidean, yule)
- from scipy.spatial.distance import wminkowski as old_wminkowski
- _filenames = [
- "cdist-X1.txt",
- "cdist-X2.txt",
- "iris.txt",
- "pdist-boolean-inp.txt",
- "pdist-chebyshev-ml-iris.txt",
- "pdist-chebyshev-ml.txt",
- "pdist-cityblock-ml-iris.txt",
- "pdist-cityblock-ml.txt",
- "pdist-correlation-ml-iris.txt",
- "pdist-correlation-ml.txt",
- "pdist-cosine-ml-iris.txt",
- "pdist-cosine-ml.txt",
- "pdist-double-inp.txt",
- "pdist-euclidean-ml-iris.txt",
- "pdist-euclidean-ml.txt",
- "pdist-hamming-ml.txt",
- "pdist-jaccard-ml.txt",
- "pdist-jensenshannon-ml-iris.txt",
- "pdist-jensenshannon-ml.txt",
- "pdist-minkowski-3.2-ml-iris.txt",
- "pdist-minkowski-3.2-ml.txt",
- "pdist-minkowski-5.8-ml-iris.txt",
- "pdist-seuclidean-ml-iris.txt",
- "pdist-seuclidean-ml.txt",
- "pdist-spearman-ml.txt",
- "random-bool-data.txt",
- "random-double-data.txt",
- "random-int-data.txt",
- "random-uint-data.txt",
- ]
- _tdist = np.array([[0, 662, 877, 255, 412, 996],
- [662, 0, 295, 468, 268, 400],
- [877, 295, 0, 754, 564, 138],
- [255, 468, 754, 0, 219, 869],
- [412, 268, 564, 219, 0, 669],
- [996, 400, 138, 869, 669, 0]], dtype='double')
- _ytdist = squareform(_tdist)
- # A hashmap of expected output arrays for the tests. These arrays
- # come from a list of text files, which are read prior to testing.
- # Each test loads inputs and outputs from this dictionary.
- eo = {}
- def load_testing_files():
- for fn in _filenames:
- name = fn.replace(".txt", "").replace("-ml", "")
- fqfn = os.path.join(os.path.dirname(__file__), 'data', fn)
- fp = open(fqfn)
- eo[name] = np.loadtxt(fp)
- fp.close()
- eo['pdist-boolean-inp'] = np.bool_(eo['pdist-boolean-inp'])
- eo['random-bool-data'] = np.bool_(eo['random-bool-data'])
- eo['random-float32-data'] = np.float32(eo['random-double-data'])
- eo['random-int-data'] = np.int_(eo['random-int-data'])
- eo['random-uint-data'] = np.uint(eo['random-uint-data'])
- load_testing_files()
- def _chk_asarrays(arrays, axis=None):
- arrays = [np.asanyarray(a) for a in arrays]
- if axis is None:
- # np < 1.10 ravel removes subclass from arrays
- arrays = [np.ravel(a) if a.ndim != 1 else a
- for a in arrays]
- axis = 0
- arrays = tuple(np.atleast_1d(a) for a in arrays)
- if axis < 0:
- if not all(a.ndim == arrays[0].ndim for a in arrays):
- raise ValueError("array ndim must be the same for neg axis")
- axis = range(arrays[0].ndim)[axis]
- return arrays + (axis,)
- def _chk_weights(arrays, weights=None, axis=None,
- force_weights=False, simplify_weights=True,
- pos_only=False, neg_check=False,
- nan_screen=False, mask_screen=False,
- ddof=None):
- chked = _chk_asarrays(arrays, axis=axis)
- arrays, axis = chked[:-1], chked[-1]
- simplify_weights = simplify_weights and not force_weights
- if not force_weights and mask_screen:
- force_weights = any(np.ma.getmask(a) is not np.ma.nomask for a in arrays)
- if nan_screen:
- has_nans = [np.isnan(np.sum(a)) for a in arrays]
- if any(has_nans):
- mask_screen = True
- force_weights = True
- arrays = tuple(np.ma.masked_invalid(a) if has_nan else a
- for a, has_nan in zip(arrays, has_nans))
- if weights is not None:
- weights = np.asanyarray(weights)
- elif force_weights:
- weights = np.ones(arrays[0].shape[axis])
- else:
- return arrays + (weights, axis)
- if ddof:
- weights = _freq_weights(weights)
- if mask_screen:
- weights = _weight_masked(arrays, weights, axis)
- if not all(weights.shape == (a.shape[axis],) for a in arrays):
- raise ValueError("weights shape must match arrays along axis")
- if neg_check and (weights < 0).any():
- raise ValueError("weights cannot be negative")
- if pos_only:
- pos_weights = np.nonzero(weights > 0)[0]
- if pos_weights.size < weights.size:
- arrays = tuple(np.take(a, pos_weights, axis=axis) for a in arrays)
- weights = weights[pos_weights]
- if simplify_weights and (weights == 1).all():
- weights = None
- return arrays + (weights, axis)
- def _freq_weights(weights):
- if weights is None:
- return weights
- int_weights = weights.astype(int)
- if (weights != int_weights).any():
- raise ValueError("frequency (integer count-type) weights required %s" % weights)
- return int_weights
- def _weight_masked(arrays, weights, axis):
- if axis is None:
- axis = 0
- weights = np.asanyarray(weights)
- for a in arrays:
- axis_mask = np.ma.getmask(a)
- if axis_mask is np.ma.nomask:
- continue
- if a.ndim > 1:
- not_axes = tuple(i for i in range(a.ndim) if i != axis)
- axis_mask = axis_mask.any(axis=not_axes)
- weights *= 1 - axis_mask.astype(int)
- return weights
- def within_tol(a, b, tol):
- return np.abs(a - b).max() < tol
- def _assert_within_tol(a, b, atol=0, rtol=0, verbose_=False):
- if verbose_:
- print(np.abs(a - b).max())
- assert_allclose(a, b, rtol=rtol, atol=atol)
- def _rand_split(arrays, weights, axis, split_per, seed=None):
- # inverse operation for stats.collapse_weights
- weights = np.array(weights, dtype=np.float64) # modified inplace; need a copy
- seeded_rand = np.random.RandomState(seed)
- def mytake(a, ix, axis):
- record = np.asanyarray(np.take(a, ix, axis=axis))
- return record.reshape([a.shape[i] if i != axis else 1
- for i in range(a.ndim)])
- n_obs = arrays[0].shape[axis]
- assert all(a.shape[axis] == n_obs for a in arrays), "data must be aligned on sample axis"
- for i in range(int(split_per) * n_obs):
- split_ix = seeded_rand.randint(n_obs + i)
- prev_w = weights[split_ix]
- q = seeded_rand.rand()
- weights[split_ix] = q * prev_w
- weights = np.append(weights, (1. - q) * prev_w)
- arrays = [np.append(a, mytake(a, split_ix, axis=axis),
- axis=axis) for a in arrays]
- return arrays, weights
- def _rough_check(a, b, compare_assert=partial(assert_allclose, atol=1e-5),
- key=lambda x: x, w=None):
- check_a = key(a)
- check_b = key(b)
- try:
- if np.array(check_a != check_b).any(): # try strict equality for string types
- compare_assert(check_a, check_b)
- except AttributeError: # masked array
- compare_assert(check_a, check_b)
- except (TypeError, ValueError): # nested data structure
- for a_i, b_i in zip(check_a, check_b):
- _rough_check(a_i, b_i, compare_assert=compare_assert)
- # diff from test_stats:
- # n_args=2, weight_arg='w', default_axis=None
- # ma_safe = False, nan_safe = False
- def _weight_checked(fn, n_args=2, default_axis=None, key=lambda x: x, weight_arg='w',
- squeeze=True, silent=False,
- ones_test=True, const_test=True, dup_test=True,
- split_test=True, dud_test=True, ma_safe=False, ma_very_safe=False, nan_safe=False,
- split_per=1.0, seed=0, compare_assert=partial(assert_allclose, atol=1e-5)):
- """runs fn on its arguments 2 or 3 ways, checks that the results are the same,
- then returns the same thing it would have returned before"""
- @wraps(fn)
- def wrapped(*args, **kwargs):
- result = fn(*args, **kwargs)
- arrays = args[:n_args]
- rest = args[n_args:]
- weights = kwargs.get(weight_arg, None)
- axis = kwargs.get('axis', default_axis)
- chked = _chk_weights(arrays, weights=weights, axis=axis, force_weights=True, mask_screen=True)
- arrays, weights, axis = chked[:-2], chked[-2], chked[-1]
- if squeeze:
- arrays = [np.atleast_1d(a.squeeze()) for a in arrays]
- try:
- # WEIGHTS CHECK 1: EQUAL WEIGHTED OBESERVATIONS
- args = tuple(arrays) + rest
- if ones_test:
- kwargs[weight_arg] = weights
- _rough_check(result, fn(*args, **kwargs), key=key)
- if const_test:
- kwargs[weight_arg] = weights * 101.0
- _rough_check(result, fn(*args, **kwargs), key=key)
- kwargs[weight_arg] = weights * 0.101
- try:
- _rough_check(result, fn(*args, **kwargs), key=key)
- except Exception as e:
- raise type(e)((e, arrays, weights))
- # WEIGHTS CHECK 2: ADDL 0-WEIGHTED OBS
- if dud_test:
- # add randomly resampled rows, weighted at 0
- dud_arrays, dud_weights = _rand_split(arrays, weights, axis, split_per=split_per, seed=seed)
- dud_weights[:weights.size] = weights # not exactly 1 because of masked arrays
- dud_weights[weights.size:] = 0
- dud_args = tuple(dud_arrays) + rest
- kwargs[weight_arg] = dud_weights
- _rough_check(result, fn(*dud_args, **kwargs), key=key)
- # increase the value of those 0-weighted rows
- for a in dud_arrays:
- indexer = [slice(None)] * a.ndim
- indexer[axis] = slice(weights.size, None)
- indexer = tuple(indexer)
- a[indexer] = a[indexer] * 101
- dud_args = tuple(dud_arrays) + rest
- _rough_check(result, fn(*dud_args, **kwargs), key=key)
- # set those 0-weighted rows to NaNs
- for a in dud_arrays:
- indexer = [slice(None)] * a.ndim
- indexer[axis] = slice(weights.size, None)
- indexer = tuple(indexer)
- a[indexer] = a[indexer] * np.nan
- if kwargs.get("nan_policy", None) == "omit" and nan_safe:
- dud_args = tuple(dud_arrays) + rest
- _rough_check(result, fn(*dud_args, **kwargs), key=key)
- # mask out those nan values
- if ma_safe:
- dud_arrays = [np.ma.masked_invalid(a) for a in dud_arrays]
- dud_args = tuple(dud_arrays) + rest
- _rough_check(result, fn(*dud_args, **kwargs), key=key)
- if ma_very_safe:
- kwargs[weight_arg] = None
- _rough_check(result, fn(*dud_args, **kwargs), key=key)
- del dud_arrays, dud_args, dud_weights
- # WEIGHTS CHECK 3: DUPLICATE DATA (DUMB SPLITTING)
- if dup_test:
- dup_arrays = [np.append(a, a, axis=axis) for a in arrays]
- dup_weights = np.append(weights, weights) / 2.0
- dup_args = tuple(dup_arrays) + rest
- kwargs[weight_arg] = dup_weights
- _rough_check(result, fn(*dup_args, **kwargs), key=key)
- del dup_args, dup_arrays, dup_weights
- # WEIGHT CHECK 3: RANDOM SPLITTING
- if split_test and split_per > 0:
- split_arrays, split_weights = _rand_split(arrays, weights, axis, split_per=split_per, seed=seed)
- split_args = tuple(split_arrays) + rest
- kwargs[weight_arg] = split_weights
- _rough_check(result, fn(*split_args, **kwargs), key=key)
- except NotImplementedError as e:
- # when some combination of arguments makes weighting impossible,
- # this is the desired response
- if not silent:
- warnings.warn("%s NotImplemented weights: %s" % (fn.__name__, e))
- return result
- return wrapped
- wcdist = _weight_checked(cdist, default_axis=1, squeeze=False)
- wcdist_no_const = _weight_checked(cdist, default_axis=1, squeeze=False, const_test=False)
- wpdist = _weight_checked(pdist, default_axis=1, squeeze=False, n_args=1)
- wpdist_no_const = _weight_checked(pdist, default_axis=1, squeeze=False, const_test=False, n_args=1)
- wrogerstanimoto = _weight_checked(rogerstanimoto)
- wmatching = whamming = _weight_checked(hamming, dud_test=False)
- wyule = _weight_checked(yule)
- wdice = _weight_checked(dice)
- wcityblock = _weight_checked(cityblock)
- wchebyshev = _weight_checked(chebyshev)
- wcosine = _weight_checked(cosine)
- wcorrelation = _weight_checked(correlation)
- wkulsinski = _weight_checked(kulsinski)
- wminkowski = _weight_checked(minkowski, const_test=False)
- wjaccard = _weight_checked(jaccard)
- weuclidean = _weight_checked(euclidean, const_test=False)
- wsqeuclidean = _weight_checked(sqeuclidean, const_test=False)
- wbraycurtis = _weight_checked(braycurtis)
- wcanberra = _weight_checked(canberra, const_test=False)
- wsokalsneath = _weight_checked(sokalsneath)
- wsokalmichener = _weight_checked(sokalmichener)
- wrussellrao = _weight_checked(russellrao)
- class TestCdist(object):
- def setup_method(self):
- self.rnd_eo_names = ['random-float32-data', 'random-int-data',
- 'random-uint-data', 'random-double-data',
- 'random-bool-data']
- self.valid_upcasts = {'bool': [np.uint, np.int_, np.float32, np.double],
- 'uint': [np.int_, np.float32, np.double],
- 'int': [np.float32, np.double],
- 'float32': [np.double]}
- def test_cdist_extra_args(self):
- # Tests that args and kwargs are correctly handled
- def _my_metric(x, y, arg, kwarg=1, kwarg2=2):
- return arg + kwarg + kwarg2
- X1 = [[1., 2., 3.], [1.2, 2.3, 3.4], [2.2, 2.3, 4.4]]
- X2 = [[7., 5., 8.], [7.5, 5.8, 8.4], [5.5, 5.8, 4.4]]
- kwargs = {'N0tV4l1D_p4raM': 3.14, "w":np.arange(3)}
- args = [3.14] * 200
- with suppress_warnings() as w:
- w.filter(DeprecationWarning)
- for metric in _METRICS_NAMES:
- assert_raises(TypeError, cdist, X1, X2,
- metric=metric, **kwargs)
- assert_raises(TypeError, cdist, X1, X2,
- metric=eval(metric), **kwargs)
- assert_raises(TypeError, cdist, X1, X2,
- metric="test_" + metric, **kwargs)
- assert_raises(TypeError, cdist, X1, X2,
- metric=metric, *args)
- assert_raises(TypeError, cdist, X1, X2,
- metric=eval(metric), *args)
- assert_raises(TypeError, cdist, X1, X2,
- metric="test_" + metric, *args)
- assert_raises(TypeError, cdist, X1, X2, _my_metric)
- assert_raises(TypeError, cdist, X1, X2, _my_metric, *args)
- assert_raises(TypeError, cdist, X1, X2, _my_metric, **kwargs)
- assert_raises(TypeError, cdist, X1, X2, _my_metric,
- kwarg=2.2, kwarg2=3.3)
- assert_raises(TypeError, cdist, X1, X2, _my_metric, 1, 2, kwarg=2.2)
- assert_raises(TypeError, cdist, X1, X2, _my_metric, 1.1, 2.2, 3.3)
- assert_raises(TypeError, cdist, X1, X2, _my_metric, 1.1, 2.2)
- assert_raises(TypeError, cdist, X1, X2, _my_metric, 1.1)
- assert_raises(TypeError, cdist, X1, X2, _my_metric, 1.1,
- kwarg=2.2, kwarg2=3.3)
- # this should work
- assert_allclose(cdist(X1, X2, metric=_my_metric,
- arg=1.1, kwarg2=3.3), 5.4)
- def test_cdist_euclidean_random_unicode(self):
- eps = 1e-07
- X1 = eo['cdist-X1']
- X2 = eo['cdist-X2']
- Y1 = wcdist_no_const(X1, X2, u('euclidean'))
- Y2 = wcdist_no_const(X1, X2, u('test_euclidean'))
- _assert_within_tol(Y1, Y2, eps, verbose > 2)
- def test_cdist_minkowski_random_p3d8(self):
- eps = 1e-07
- X1 = eo['cdist-X1']
- X2 = eo['cdist-X2']
- Y1 = wcdist_no_const(X1, X2, 'minkowski', p=3.8)
- Y2 = wcdist_no_const(X1, X2, 'test_minkowski', p=3.8)
- _assert_within_tol(Y1, Y2, eps, verbose > 2)
- def test_cdist_minkowski_random_p4d6(self):
- eps = 1e-07
- X1 = eo['cdist-X1']
- X2 = eo['cdist-X2']
- Y1 = wcdist_no_const(X1, X2, 'minkowski', p=4.6)
- Y2 = wcdist_no_const(X1, X2, 'test_minkowski', p=4.6)
- _assert_within_tol(Y1, Y2, eps, verbose > 2)
- def test_cdist_minkowski_random_p1d23(self):
- eps = 1e-07
- X1 = eo['cdist-X1']
- X2 = eo['cdist-X2']
- Y1 = wcdist_no_const(X1, X2, 'minkowski', p=1.23)
- Y2 = wcdist_no_const(X1, X2, 'test_minkowski', p=1.23)
- _assert_within_tol(Y1, Y2, eps, verbose > 2)
- def test_cdist_cosine_random(self):
- eps = 1e-07
- X1 = eo['cdist-X1']
- X2 = eo['cdist-X2']
- Y1 = wcdist(X1, X2, 'cosine')
- # Naive implementation
- def norms(X):
- return np.linalg.norm(X, axis=1).reshape(-1, 1)
- Y2 = 1 - np.dot((X1 / norms(X1)), (X2 / norms(X2)).T)
- _assert_within_tol(Y1, Y2, eps, verbose > 2)
- def test_cdist_mahalanobis(self):
- # 1-dimensional observations
- x1 = np.array([[2], [3]])
- x2 = np.array([[2], [5]])
- dist = cdist(x1, x2, metric='mahalanobis')
- assert_allclose(dist, [[0.0, np.sqrt(4.5)], [np.sqrt(0.5), np.sqrt(2)]])
- # 2-dimensional observations
- x1 = np.array([[0, 0], [-1, 0]])
- x2 = np.array([[0, 2], [1, 0], [0, -2]])
- dist = cdist(x1, x2, metric='mahalanobis')
- rt2 = np.sqrt(2)
- assert_allclose(dist, [[rt2, rt2, rt2], [2, 2 * rt2, 2]])
- # Too few observations
- assert_raises(ValueError,
- cdist, [[0, 1]], [[2, 3]], metric='mahalanobis')
- def test_cdist_custom_notdouble(self):
- class myclass(object):
- pass
- def _my_metric(x, y):
- if not isinstance(x[0], myclass) or not isinstance(y[0], myclass):
- raise ValueError("Type has been changed")
- return 1.123
- data = np.array([[myclass()]], dtype=object)
- cdist_y = cdist(data, data, metric=_my_metric)
- right_y = 1.123
- assert_equal(cdist_y, right_y, verbose=verbose > 2)
- def _check_calling_conventions(self, X1, X2, metric, eps=1e-07, **kwargs):
- # helper function for test_cdist_calling_conventions
- try:
- y1 = cdist(X1, X2, metric=metric, **kwargs)
- y2 = cdist(X1, X2, metric=eval(metric), **kwargs)
- y3 = cdist(X1, X2, metric="test_" + metric, **kwargs)
- except Exception as e:
- e_cls = e.__class__
- if verbose > 2:
- print(e_cls.__name__)
- print(e)
- assert_raises(e_cls, cdist, X1, X2, metric=metric, **kwargs)
- assert_raises(e_cls, cdist, X1, X2, metric=eval(metric), **kwargs)
- assert_raises(e_cls, cdist, X1, X2, metric="test_" + metric, **kwargs)
- else:
- _assert_within_tol(y1, y2, rtol=eps, verbose_=verbose > 2)
- _assert_within_tol(y1, y3, rtol=eps, verbose_=verbose > 2)
- def test_cdist_calling_conventions(self):
- # Ensures that specifying the metric with a str or scipy function
- # gives the same behaviour (i.e. same result or same exception).
- # NOTE: The correctness should be checked within each metric tests.
- for eo_name in self.rnd_eo_names:
- # subsampling input data to speed-up tests
- # NOTE: num samples needs to be > than dimensions for mahalanobis
- X1 = eo[eo_name][::5, ::-2]
- X2 = eo[eo_name][1::5, ::2]
- for metric in _METRICS_NAMES:
- if verbose > 2:
- print("testing: ", metric, " with: ", eo_name)
- if metric == 'wminkowski':
- continue
- if metric in {'dice', 'yule', 'kulsinski', 'matching',
- 'rogerstanimoto', 'russellrao', 'sokalmichener',
- 'sokalsneath'} and 'bool' not in eo_name:
- # python version permits non-bools e.g. for fuzzy logic
- continue
- self._check_calling_conventions(X1, X2, metric)
- # Testing built-in metrics with extra args
- if metric == "seuclidean":
- X12 = np.vstack([X1, X2]).astype(np.double)
- V = np.var(X12, axis=0, ddof=1)
- self._check_calling_conventions(X1, X2, metric, V=V)
- elif metric == "mahalanobis":
- X12 = np.vstack([X1, X2]).astype(np.double)
- V = np.atleast_2d(np.cov(X12.T))
- VI = np.array(np.linalg.inv(V).T)
- self._check_calling_conventions(X1, X2, metric, VI=VI)
- def test_cdist_dtype_equivalence(self):
- # Tests that the result is not affected by type up-casting
- eps = 1e-07
- tests = [(eo['random-bool-data'], self.valid_upcasts['bool']),
- (eo['random-uint-data'], self.valid_upcasts['uint']),
- (eo['random-int-data'], self.valid_upcasts['int']),
- (eo['random-float32-data'], self.valid_upcasts['float32'])]
- for metric in _METRICS_NAMES:
- for test in tests:
- X1 = test[0][::5, ::-2]
- X2 = test[0][1::5, ::2]
- try:
- y1 = cdist(X1, X2, metric=metric)
- except Exception as e:
- e_cls = e.__class__
- if verbose > 2:
- print(e_cls.__name__)
- print(e)
- for new_type in test[1]:
- X1new = new_type(X1)
- X2new = new_type(X2)
- assert_raises(e_cls, cdist, X1new, X2new, metric=metric)
- else:
- for new_type in test[1]:
- y2 = cdist(new_type(X1), new_type(X2), metric=metric)
- _assert_within_tol(y1, y2, eps, verbose > 2)
- def test_cdist_out(self):
- # Test that out parameter works properly
- eps = 1e-07
- X1 = eo['cdist-X1']
- X2 = eo['cdist-X2']
- out_r, out_c = X1.shape[0], X2.shape[0]
- for metric in _METRICS_NAMES:
- kwargs = dict()
- if metric in ['minkowski', 'wminkowski']:
- kwargs['p'] = 1.23
- if metric == 'wminkowski':
- kwargs['w'] = 1.0 / X1.std(axis=0)
- out1 = np.empty((out_r, out_c), dtype=np.double)
- Y1 = cdist(X1, X2, metric, **kwargs)
- Y2 = cdist(X1, X2, metric, out=out1, **kwargs)
- # test that output is numerically equivalent
- _assert_within_tol(Y1, Y2, eps, verbose > 2)
- # test that Y_test1 and out1 are the same object
- assert_(Y2 is out1)
- # test for incorrect shape
- out2 = np.empty((out_r-1, out_c+1), dtype=np.double)
- assert_raises(ValueError, cdist, X1, X2, metric, out=out2, **kwargs)
- # test for C-contiguous order
- out3 = np.empty((2 * out_r, 2 * out_c), dtype=np.double)[::2, ::2]
- out4 = np.empty((out_r, out_c), dtype=np.double, order='F')
- assert_raises(ValueError, cdist, X1, X2, metric, out=out3, **kwargs)
- assert_raises(ValueError, cdist, X1, X2, metric, out=out4, **kwargs)
- # test for incorrect dtype
- out5 = np.empty((out_r, out_c), dtype=np.int64)
- assert_raises(ValueError, cdist, X1, X2, metric, out=out5, **kwargs)
- def test_striding(self):
- # test that striding is handled correct with calls to
- # _copy_array_if_base_present
- eps = 1e-07
- X1 = eo['cdist-X1'][::2, ::2]
- X2 = eo['cdist-X2'][::2, ::2]
- X1_copy = X1.copy()
- X2_copy = X2.copy()
- # confirm equivalence
- assert_equal(X1, X1_copy)
- assert_equal(X2, X2_copy)
- # confirm contiguity
- assert_(not X1.flags.c_contiguous)
- assert_(not X2.flags.c_contiguous)
- assert_(X1_copy.flags.c_contiguous)
- assert_(X2_copy.flags.c_contiguous)
- for metric in _METRICS_NAMES:
- kwargs = dict()
- if metric in ['minkowski', 'wminkowski']:
- kwargs['p'] = 1.23
- if metric == 'wminkowski':
- kwargs['w'] = 1.0 / X1.std(axis=0)
- Y1 = cdist(X1, X2, metric, **kwargs)
- Y2 = cdist(X1_copy, X2_copy, metric, **kwargs)
- # test that output is numerically equivalent
- _assert_within_tol(Y1, Y2, eps, verbose > 2)
- class TestPdist(object):
- def setup_method(self):
- self.rnd_eo_names = ['random-float32-data', 'random-int-data',
- 'random-uint-data', 'random-double-data',
- 'random-bool-data']
- self.valid_upcasts = {'bool': [np.uint, np.int_, np.float32, np.double],
- 'uint': [np.int_, np.float32, np.double],
- 'int': [np.float32, np.double],
- 'float32': [np.double]}
- def test_pdist_extra_args(self):
- # Tests that args and kwargs are correctly handled
- def _my_metric(x, y, arg, kwarg=1, kwarg2=2):
- return arg + kwarg + kwarg2
- X1 = [[1., 2.], [1.2, 2.3], [2.2, 2.3]]
- kwargs = {'N0tV4l1D_p4raM': 3.14, "w":np.arange(2)}
- args = [3.14] * 200
- with suppress_warnings() as w:
- w.filter(DeprecationWarning)
- for metric in _METRICS_NAMES:
- assert_raises(TypeError, pdist, X1, metric=metric, **kwargs)
- assert_raises(TypeError, pdist, X1,
- metric=eval(metric), **kwargs)
- assert_raises(TypeError, pdist, X1,
- metric="test_" + metric, **kwargs)
- assert_raises(TypeError, pdist, X1, metric=metric, *args)
- assert_raises(TypeError, pdist, X1, metric=eval(metric), *args)
- assert_raises(TypeError, pdist, X1,
- metric="test_" + metric, *args)
- assert_raises(TypeError, pdist, X1, _my_metric)
- assert_raises(TypeError, pdist, X1, _my_metric, *args)
- assert_raises(TypeError, pdist, X1, _my_metric, **kwargs)
- assert_raises(TypeError, pdist, X1, _my_metric,
- kwarg=2.2, kwarg2=3.3)
- assert_raises(TypeError, pdist, X1, _my_metric, 1, 2, kwarg=2.2)
- assert_raises(TypeError, pdist, X1, _my_metric, 1.1, 2.2, 3.3)
- assert_raises(TypeError, pdist, X1, _my_metric, 1.1, 2.2)
- assert_raises(TypeError, pdist, X1, _my_metric, 1.1)
- assert_raises(TypeError, pdist, X1, _my_metric, 1.1,
- kwarg=2.2, kwarg2=3.3)
- # these should work
- assert_allclose(pdist(X1, metric=_my_metric,
- arg=1.1, kwarg2=3.3), 5.4)
- def test_pdist_euclidean_random(self):
- eps = 1e-07
- X = eo['pdist-double-inp']
- Y_right = eo['pdist-euclidean']
- Y_test1 = wpdist_no_const(X, 'euclidean')
- _assert_within_tol(Y_test1, Y_right, eps)
- def test_pdist_euclidean_random_u(self):
- eps = 1e-07
- X = eo['pdist-double-inp']
- Y_right = eo['pdist-euclidean']
- Y_test1 = wpdist_no_const(X, u('euclidean'))
- _assert_within_tol(Y_test1, Y_right, eps)
- def test_pdist_euclidean_random_float32(self):
- eps = 1e-07
- X = np.float32(eo['pdist-double-inp'])
- Y_right = eo['pdist-euclidean']
- Y_test1 = wpdist_no_const(X, 'euclidean')
- _assert_within_tol(Y_test1, Y_right, eps)
- def test_pdist_euclidean_random_nonC(self):
- eps = 1e-07
- X = eo['pdist-double-inp']
- Y_right = eo['pdist-euclidean']
- Y_test2 = wpdist_no_const(X, 'test_euclidean')
- _assert_within_tol(Y_test2, Y_right, eps)
- @pytest.mark.slow
- def test_pdist_euclidean_iris_double(self):
- eps = 1e-07
- X = eo['iris']
- Y_right = eo['pdist-euclidean-iris']
- Y_test1 = wpdist_no_const(X, 'euclidean')
- _assert_within_tol(Y_test1, Y_right, eps)
- @pytest.mark.slow
- def test_pdist_euclidean_iris_float32(self):
- eps = 1e-06
- X = np.float32(eo['iris'])
- Y_right = eo['pdist-euclidean-iris']
- Y_test1 = wpdist_no_const(X, 'euclidean')
- _assert_within_tol(Y_test1, Y_right, eps, verbose > 2)
- @pytest.mark.slow
- def test_pdist_euclidean_iris_nonC(self):
- # Test pdist(X, 'test_euclidean') [the non-C implementation] on the
- # Iris data set.
- eps = 1e-07
- X = eo['iris']
- Y_right = eo['pdist-euclidean-iris']
- Y_test2 = wpdist_no_const(X, 'test_euclidean')
- _assert_within_tol(Y_test2, Y_right, eps)
- def test_pdist_seuclidean_random(self):
- eps = 1e-05
- X = eo['pdist-double-inp']
- Y_right = eo['pdist-seuclidean']
- Y_test1 = pdist(X, 'seuclidean')
- _assert_within_tol(Y_test1, Y_right, eps)
- def test_pdist_seuclidean_random_float32(self):
- eps = 1e-05
- X = np.float32(eo['pdist-double-inp'])
- Y_right = eo['pdist-seuclidean']
- Y_test1 = pdist(X, 'seuclidean')
- _assert_within_tol(Y_test1, Y_right, eps)
- def test_pdist_seuclidean_random_nonC(self):
- # Test pdist(X, 'test_sqeuclidean') [the non-C implementation]
- eps = 1e-05
- X = eo['pdist-double-inp']
- Y_right = eo['pdist-seuclidean']
- Y_test2 = pdist(X, 'test_seuclidean')
- _assert_within_tol(Y_test2, Y_right, eps)
- def test_pdist_seuclidean_iris(self):
- eps = 1e-05
- X = eo['iris']
- Y_right = eo['pdist-seuclidean-iris']
- Y_test1 = pdist(X, 'seuclidean')
- _assert_within_tol(Y_test1, Y_right, eps)
- def test_pdist_seuclidean_iris_float32(self):
- # Tests pdist(X, 'seuclidean') on the Iris data set (float32).
- eps = 1e-05
- X = np.float32(eo['iris'])
- Y_right = eo['pdist-seuclidean-iris']
- Y_test1 = pdist(X, 'seuclidean')
- _assert_within_tol(Y_test1, Y_right, eps)
- def test_pdist_seuclidean_iris_nonC(self):
- # Test pdist(X, 'test_seuclidean') [the non-C implementation] on the
- # Iris data set.
- eps = 1e-05
- X = eo['iris']
- Y_right = eo['pdist-seuclidean-iris']
- Y_test2 = pdist(X, 'test_seuclidean')
- _assert_within_tol(Y_test2, Y_right, eps)
- def test_pdist_cosine_random(self):
- eps = 1e-08
- X = eo['pdist-double-inp']
- Y_right = eo['pdist-cosine']
- Y_test1 = wpdist(X, 'cosine')
- _assert_within_tol(Y_test1, Y_right, eps)
- def test_pdist_cosine_random_float32(self):
- eps = 1e-08
- X = np.float32(eo['pdist-double-inp'])
- Y_right = eo['pdist-cosine']
- Y_test1 = wpdist(X, 'cosine')
- _assert_within_tol(Y_test1, Y_right, eps)
- def test_pdist_cosine_random_nonC(self):
- # Test pdist(X, 'test_cosine') [the non-C implementation]
- eps = 1e-08
- X = eo['pdist-double-inp']
- Y_right = eo['pdist-cosine']
- Y_test2 = wpdist(X, 'test_cosine')
- _assert_within_tol(Y_test2, Y_right, eps)
- @pytest.mark.slow
- def test_pdist_cosine_iris(self):
- eps = 1e-08
- X = eo['iris']
- Y_right = eo['pdist-cosine-iris']
- Y_test1 = wpdist(X, 'cosine')
- _assert_within_tol(Y_test1, Y_right, eps)
- @pytest.mark.slow
- def test_pdist_cosine_iris_float32(self):
- eps = 1e-07
- X = np.float32(eo['iris'])
- Y_right = eo['pdist-cosine-iris']
- Y_test1 = wpdist(X, 'cosine')
- _assert_within_tol(Y_test1, Y_right, eps, verbose > 2)
- @pytest.mark.slow
- def test_pdist_cosine_iris_nonC(self):
- eps = 1e-08
- X = eo['iris']
- Y_right = eo['pdist-cosine-iris']
- Y_test2 = wpdist(X, 'test_cosine')
- _assert_within_tol(Y_test2, Y_right, eps)
- def test_pdist_cosine_bounds(self):
- # Test adapted from @joernhees's example at gh-5208: case where
- # cosine distance used to be negative. XXX: very sensitive to the
- # specific norm computation.
- x = np.abs(np.random.RandomState(1337).rand(91))
- X = np.vstack([x, x])
- assert_(wpdist(X, 'cosine')[0] >= 0,
- msg='cosine distance should be non-negative')
- def test_pdist_cityblock_random(self):
- eps = 1e-06
- X = eo['pdist-double-inp']
- Y_right = eo['pdist-cityblock']
- Y_test1 = wpdist_no_const(X, 'cityblock')
- _assert_within_tol(Y_test1, Y_right, eps)
- def test_pdist_cityblock_random_float32(self):
- eps = 1e-06
- X = np.float32(eo['pdist-double-inp'])
- Y_right = eo['pdist-cityblock']
- Y_test1 = wpdist_no_const(X, 'cityblock')
- _assert_within_tol(Y_test1, Y_right, eps)
- def test_pdist_cityblock_random_nonC(self):
- eps = 1e-06
- X = eo['pdist-double-inp']
- Y_right = eo['pdist-cityblock']
- Y_test2 = wpdist_no_const(X, 'test_cityblock')
- _assert_within_tol(Y_test2, Y_right, eps)
- @pytest.mark.slow
- def test_pdist_cityblock_iris(self):
- eps = 1e-14
- X = eo['iris']
- Y_right = eo['pdist-cityblock-iris']
- Y_test1 = wpdist_no_const(X, 'cityblock')
- _assert_within_tol(Y_test1, Y_right, eps)
- @pytest.mark.slow
- def test_pdist_cityblock_iris_float32(self):
- eps = 1e-06
- X = np.float32(eo['iris'])
- Y_right = eo['pdist-cityblock-iris']
- Y_test1 = wpdist_no_const(X, 'cityblock')
- _assert_within_tol(Y_test1, Y_right, eps, verbose > 2)
- @pytest.mark.slow
- def test_pdist_cityblock_iris_nonC(self):
- # Test pdist(X, 'test_cityblock') [the non-C implementation] on the
- # Iris data set.
- eps = 1e-14
- X = eo['iris']
- Y_right = eo['pdist-cityblock-iris']
- Y_test2 = wpdist_no_const(X, 'test_cityblock')
- _assert_within_tol(Y_test2, Y_right, eps)
- def test_pdist_correlation_random(self):
- eps = 1e-07
- X = eo['pdist-double-inp']
- Y_right = eo['pdist-correlation']
- Y_test1 = wpdist(X, 'correlation')
- _assert_within_tol(Y_test1, Y_right, eps)
- def test_pdist_correlation_random_float32(self):
- eps = 1e-07
- X = np.float32(eo['pdist-double-inp'])
- Y_right = eo['pdist-correlation']
- Y_test1 = wpdist(X, 'correlation')
- _assert_within_tol(Y_test1, Y_right, eps)
- def test_pdist_correlation_random_nonC(self):
- eps = 1e-07
- X = eo['pdist-double-inp']
- Y_right = eo['pdist-correlation']
- Y_test2 = wpdist(X, 'test_correlation')
- _assert_within_tol(Y_test2, Y_right, eps)
- @pytest.mark.slow
- def test_pdist_correlation_iris(self):
- eps = 1e-08
- X = eo['iris']
- Y_right = eo['pdist-correlation-iris']
- Y_test1 = wpdist(X, 'correlation')
- _assert_within_tol(Y_test1, Y_right, eps)
- @pytest.mark.slow
- def test_pdist_correlation_iris_float32(self):
- eps = 1e-07
- X = eo['iris']
- Y_right = np.float32(eo['pdist-correlation-iris'])
- Y_test1 = wpdist(X, 'correlation')
- _assert_within_tol(Y_test1, Y_right, eps, verbose > 2)
- @pytest.mark.slow
- def test_pdist_correlation_iris_nonC(self):
- eps = 1e-08
- X = eo['iris']
- Y_right = eo['pdist-correlation-iris']
- Y_test2 = wpdist(X, 'test_correlation')
- _assert_within_tol(Y_test2, Y_right, eps)
- def test_pdist_minkowski_random(self):
- eps = 1e-05
- X = eo['pdist-double-inp']
- Y_right = eo['pdist-minkowski-3.2']
- Y_test1 = wpdist_no_const(X, 'minkowski', p=3.2)
- _assert_within_tol(Y_test1, Y_right, eps)
- def test_pdist_minkowski_random_float32(self):
- eps = 1e-05
- X = np.float32(eo['pdist-double-inp'])
- Y_right = eo['pdist-minkowski-3.2']
- Y_test1 = wpdist_no_const(X, 'minkowski', p=3.2)
- _assert_within_tol(Y_test1, Y_right, eps)
- def test_pdist_minkowski_random_nonC(self):
- eps = 1e-05
- X = eo['pdist-double-inp']
- Y_right = eo['pdist-minkowski-3.2']
- Y_test2 = wpdist_no_const(X, 'test_minkowski', p=3.2)
- _assert_within_tol(Y_test2, Y_right, eps)
- @pytest.mark.slow
- def test_pdist_minkowski_3_2_iris(self):
- eps = 1e-07
- X = eo['iris']
- Y_right = eo['pdist-minkowski-3.2-iris']
- Y_test1 = wpdist_no_const(X, 'minkowski', p=3.2)
- _assert_within_tol(Y_test1, Y_right, eps)
- @pytest.mark.slow
- def test_pdist_minkowski_3_2_iris_float32(self):
- eps = 1e-06
- X = np.float32(eo['iris'])
- Y_right = eo['pdist-minkowski-3.2-iris']
- Y_test1 = wpdist_no_const(X, 'minkowski', p=3.2)
- _assert_within_tol(Y_test1, Y_right, eps)
- @pytest.mark.slow
- def test_pdist_minkowski_3_2_iris_nonC(self):
- eps = 1e-07
- X = eo['iris']
- Y_right = eo['pdist-minkowski-3.2-iris']
- Y_test2 = wpdist_no_const(X, 'test_minkowski', p=3.2)
- _assert_within_tol(Y_test2, Y_right, eps)
- @pytest.mark.slow
- def test_pdist_minkowski_5_8_iris(self):
- eps = 1e-07
- X = eo['iris']
- Y_right = eo['pdist-minkowski-5.8-iris']
- Y_test1 = wpdist_no_const(X, 'minkowski', p=5.8)
- _assert_within_tol(Y_test1, Y_right, eps)
- @pytest.mark.slow
- def test_pdist_minkowski_5_8_iris_float32(self):
- eps = 1e-06
- X = np.float32(eo['iris'])
- Y_right = eo['pdist-minkowski-5.8-iris']
- Y_test1 = wpdist_no_const(X, 'minkowski', p=5.8)
- _assert_within_tol(Y_test1, Y_right, eps, verbose > 2)
- @pytest.mark.slow
- def test_pdist_minkowski_5_8_iris_nonC(self):
- eps = 1e-07
- X = eo['iris']
- Y_right = eo['pdist-minkowski-5.8-iris']
- Y_test2 = wpdist_no_const(X, 'test_minkowski', p=5.8)
- _assert_within_tol(Y_test2, Y_right, eps)
- def test_pdist_mahalanobis(self):
- # 1-dimensional observations
- x = np.array([2.0, 2.0, 3.0, 5.0]).reshape(-1, 1)
- dist = pdist(x, metric='mahalanobis')
- assert_allclose(dist, [0.0, np.sqrt(0.5), np.sqrt(4.5),
- np.sqrt(0.5), np.sqrt(4.5), np.sqrt(2.0)])
- # 2-dimensional observations
- x = np.array([[0, 0], [-1, 0], [0, 2], [1, 0], [0, -2]])
- dist = pdist(x, metric='mahalanobis')
- rt2 = np.sqrt(2)
- assert_allclose(dist, [rt2, rt2, rt2, rt2, 2, 2 * rt2, 2, 2, 2 * rt2, 2])
- # Too few observations
- assert_raises(ValueError,
- wpdist, [[0, 1], [2, 3]], metric='mahalanobis')
- def test_pdist_hamming_random(self):
- eps = 1e-07
- X = eo['pdist-boolean-inp']
- Y_right = eo['pdist-hamming']
- Y_test1 = wpdist(X, 'hamming')
- _assert_within_tol(Y_test1, Y_right, eps)
- def test_pdist_hamming_random_float32(self):
- eps = 1e-07
- X = np.float32(eo['pdist-boolean-inp'])
- Y_right = eo['pdist-hamming']
- Y_test1 = wpdist(X, 'hamming')
- _assert_within_tol(Y_test1, Y_right, eps)
- def test_pdist_hamming_random_nonC(self):
- eps = 1e-07
- X = eo['pdist-boolean-inp']
- Y_right = eo['pdist-hamming']
- Y_test2 = wpdist(X, 'test_hamming')
- _assert_within_tol(Y_test2, Y_right, eps)
- def test_pdist_dhamming_random(self):
- eps = 1e-07
- X = np.float64(eo['pdist-boolean-inp'])
- Y_right = eo['pdist-hamming']
- Y_test1 = wpdist(X, 'hamming')
- _assert_within_tol(Y_test1, Y_right, eps)
- def test_pdist_dhamming_random_float32(self):
- eps = 1e-07
- X = np.float32(eo['pdist-boolean-inp'])
- Y_right = eo['pdist-hamming']
- Y_test1 = wpdist(X, 'hamming')
- _assert_within_tol(Y_test1, Y_right, eps)
- def test_pdist_dhamming_random_nonC(self):
- eps = 1e-07
- X = np.float64(eo['pdist-boolean-inp'])
- Y_right = eo['pdist-hamming']
- Y_test2 = wpdist(X, 'test_hamming')
- _assert_within_tol(Y_test2, Y_right, eps)
- def test_pdist_jaccard_random(self):
- eps = 1e-08
- X = eo['pdist-boolean-inp']
- Y_right = eo['pdist-jaccard']
- Y_test1 = wpdist(X, 'jaccard')
- _assert_within_tol(Y_test1, Y_right, eps)
- def test_pdist_jaccard_random_float32(self):
- eps = 1e-08
- X = np.float32(eo['pdist-boolean-inp'])
- Y_right = eo['pdist-jaccard']
- Y_test1 = wpdist(X, 'jaccard')
- _assert_within_tol(Y_test1, Y_right, eps)
- def test_pdist_jaccard_random_nonC(self):
- eps = 1e-08
- X = eo['pdist-boolean-inp']
- Y_right = eo['pdist-jaccard']
- Y_test2 = wpdist(X, 'test_jaccard')
- _assert_within_tol(Y_test2, Y_right, eps)
- def test_pdist_djaccard_random(self):
- eps = 1e-08
- X = np.float64(eo['pdist-boolean-inp'])
- Y_right = eo['pdist-jaccard']
- Y_test1 = wpdist(X, 'jaccard')
- _assert_within_tol(Y_test1, Y_right, eps)
- def test_pdist_djaccard_random_float32(self):
- eps = 1e-08
- X = np.float32(eo['pdist-boolean-inp'])
- Y_right = eo['pdist-jaccard']
- Y_test1 = wpdist(X, 'jaccard')
- _assert_within_tol(Y_test1, Y_right, eps)
- def test_pdist_djaccard_allzeros(self):
- eps = 1e-08
- Y = pdist(np.zeros((5, 3)), 'jaccard')
- _assert_within_tol(np.zeros(10), Y, eps)
- def test_pdist_djaccard_random_nonC(self):
- eps = 1e-08
- X = np.float64(eo['pdist-boolean-inp'])
- Y_right = eo['pdist-jaccard']
- Y_test2 = wpdist(X, 'test_jaccard')
- _assert_within_tol(Y_test2, Y_right, eps)
- def test_pdist_jensenshannon_random(self):
- eps = 1e-08
- X = eo['pdist-double-inp']
- Y_right = eo['pdist-jensenshannon']
- Y_test1 = pdist(X, 'jensenshannon')
- _assert_within_tol(Y_test1, Y_right, eps)
- def test_pdist_jensenshannon_random_float32(self):
- eps = 1e-07
- X = np.float32(eo['pdist-double-inp'])
- Y_right = eo['pdist-jensenshannon']
- Y_test1 = pdist(X, 'jensenshannon')
- _assert_within_tol(Y_test1, Y_right, eps, verbose > 2)
- def test_pdist_jensenshannon_random_nonC(self):
- eps = 1e-08
- X = eo['pdist-double-inp']
- Y_right = eo['pdist-jensenshannon']
- Y_test2 = pdist(X, 'test_jensenshannon')
- _assert_within_tol(Y_test2, Y_right, eps)
- def test_pdist_jensenshannon_iris(self):
- eps = 1e-12
- X = eo['iris']
- Y_right = eo['pdist-jensenshannon-iris']
- Y_test1 = pdist(X, 'jensenshannon')
- _assert_within_tol(Y_test1, Y_right, eps)
- def test_pdist_jensenshannon_iris_float32(self):
- eps = 1e-06
- X = np.float32(eo['iris'])
- Y_right = eo['pdist-jensenshannon-iris']
- Y_test1 = pdist(X, 'jensenshannon')
- _assert_within_tol(Y_test1, Y_right, eps, verbose > 2)
- def test_pdist_jensenshannon_iris_nonC(self):
- eps = 5e-12
- X = eo['iris']
- Y_right = eo['pdist-jensenshannon-iris']
- Y_test2 = pdist(X, 'test_jensenshannon')
- _assert_within_tol(Y_test2, Y_right, eps)
- def test_pdist_djaccard_allzeros_nonC(self):
- eps = 1e-08
- Y = pdist(np.zeros((5, 3)), 'test_jaccard')
- _assert_within_tol(np.zeros(10), Y, eps)
- def test_pdist_chebyshev_random(self):
- eps = 1e-08
- X = eo['pdist-double-inp']
- Y_right = eo['pdist-chebyshev']
- Y_test1 = pdist(X, 'chebyshev')
- _assert_within_tol(Y_test1, Y_right, eps)
- def test_pdist_chebyshev_random_float32(self):
- eps = 1e-07
- X = np.float32(eo['pdist-double-inp'])
- Y_right = eo['pdist-chebyshev']
- Y_test1 = pdist(X, 'chebyshev')
- _assert_within_tol(Y_test1, Y_right, eps, verbose > 2)
- def test_pdist_chebyshev_random_nonC(self):
- eps = 1e-08
- X = eo['pdist-double-inp']
- Y_right = eo['pdist-chebyshev']
- Y_test2 = pdist(X, 'test_chebyshev')
- _assert_within_tol(Y_test2, Y_right, eps)
- def test_pdist_chebyshev_iris(self):
- eps = 1e-15
- X = eo['iris']
- Y_right = eo['pdist-chebyshev-iris']
- Y_test1 = pdist(X, 'chebyshev')
- _assert_within_tol(Y_test1, Y_right, eps)
- def test_pdist_chebyshev_iris_float32(self):
- eps = 1e-06
- X = np.float32(eo['iris'])
- Y_right = eo['pdist-chebyshev-iris']
- Y_test1 = pdist(X, 'chebyshev')
- _assert_within_tol(Y_test1, Y_right, eps, verbose > 2)
- def test_pdist_chebyshev_iris_nonC(self):
- eps = 1e-15
- X = eo['iris']
- Y_right = eo['pdist-chebyshev-iris']
- Y_test2 = pdist(X, 'test_chebyshev')
- _assert_within_tol(Y_test2, Y_right, eps)
- def test_pdist_matching_mtica1(self):
- # Test matching(*,*) with mtica example #1 (nums).
- m = wmatching(np.array([1, 0, 1, 1, 0]),
- np.array([1, 1, 0, 1, 1]))
- m2 = wmatching(np.array([1, 0, 1, 1, 0], dtype=bool),
- np.array([1, 1, 0, 1, 1], dtype=bool))
- assert_allclose(m, 0.6, rtol=0, atol=1e-10)
- assert_allclose(m2, 0.6, rtol=0, atol=1e-10)
- def test_pdist_matching_mtica2(self):
- # Test matching(*,*) with mtica example #2.
- m = wmatching(np.array([1, 0, 1]),
- np.array([1, 1, 0]))
- m2 = wmatching(np.array([1, 0, 1], dtype=bool),
- np.array([1, 1, 0], dtype=bool))
- assert_allclose(m, 2 / 3, rtol=0, atol=1e-10)
- assert_allclose(m2, 2 / 3, rtol=0, atol=1e-10)
- def test_pdist_jaccard_mtica1(self):
- m = wjaccard(np.array([1, 0, 1, 1, 0]),
- np.array([1, 1, 0, 1, 1]))
- m2 = wjaccard(np.array([1, 0, 1, 1, 0], dtype=bool),
- np.array([1, 1, 0, 1, 1], dtype=bool))
- assert_allclose(m, 0.6, rtol=0, atol=1e-10)
- assert_allclose(m2, 0.6, rtol=0, atol=1e-10)
- def test_pdist_jaccard_mtica2(self):
- m = wjaccard(np.array([1, 0, 1]),
- np.array([1, 1, 0]))
- m2 = wjaccard(np.array([1, 0, 1], dtype=bool),
- np.array([1, 1, 0], dtype=bool))
- assert_allclose(m, 2 / 3, rtol=0, atol=1e-10)
- assert_allclose(m2, 2 / 3, rtol=0, atol=1e-10)
- def test_pdist_yule_mtica1(self):
- m = wyule(np.array([1, 0, 1, 1, 0]),
- np.array([1, 1, 0, 1, 1]))
- m2 = wyule(np.array([1, 0, 1, 1, 0], dtype=bool),
- np.array([1, 1, 0, 1, 1], dtype=bool))
- if verbose > 2:
- print(m)
- assert_allclose(m, 2, rtol=0, atol=1e-10)
- assert_allclose(m2, 2, rtol=0, atol=1e-10)
- def test_pdist_yule_mtica2(self):
- m = wyule(np.array([1, 0, 1]),
- np.array([1, 1, 0]))
- m2 = wyule(np.array([1, 0, 1], dtype=bool),
- np.array([1, 1, 0], dtype=bool))
- if verbose > 2:
- print(m)
- assert_allclose(m, 2, rtol=0, atol=1e-10)
- assert_allclose(m2, 2, rtol=0, atol=1e-10)
- def test_pdist_dice_mtica1(self):
- m = wdice(np.array([1, 0, 1, 1, 0]),
- np.array([1, 1, 0, 1, 1]))
- m2 = wdice(np.array([1, 0, 1, 1, 0], dtype=bool),
- np.array([1, 1, 0, 1, 1], dtype=bool))
- if verbose > 2:
- print(m)
- assert_allclose(m, 3 / 7, rtol=0, atol=1e-10)
- assert_allclose(m2, 3 / 7, rtol=0, atol=1e-10)
- def test_pdist_dice_mtica2(self):
- m = wdice(np.array([1, 0, 1]),
- np.array([1, 1, 0]))
- m2 = wdice(np.array([1, 0, 1], dtype=bool),
- np.array([1, 1, 0], dtype=bool))
- if verbose > 2:
- print(m)
- assert_allclose(m, 0.5, rtol=0, atol=1e-10)
- assert_allclose(m2, 0.5, rtol=0, atol=1e-10)
- def test_pdist_sokalsneath_mtica1(self):
- m = sokalsneath(np.array([1, 0, 1, 1, 0]),
- np.array([1, 1, 0, 1, 1]))
- m2 = sokalsneath(np.array([1, 0, 1, 1, 0], dtype=bool),
- np.array([1, 1, 0, 1, 1], dtype=bool))
- if verbose > 2:
- print(m)
- assert_allclose(m, 3 / 4, rtol=0, atol=1e-10)
- assert_allclose(m2, 3 / 4, rtol=0, atol=1e-10)
- def test_pdist_sokalsneath_mtica2(self):
- m = wsokalsneath(np.array([1, 0, 1]),
- np.array([1, 1, 0]))
- m2 = wsokalsneath(np.array([1, 0, 1], dtype=bool),
- np.array([1, 1, 0], dtype=bool))
- if verbose > 2:
- print(m)
- assert_allclose(m, 4 / 5, rtol=0, atol=1e-10)
- assert_allclose(m2, 4 / 5, rtol=0, atol=1e-10)
- def test_pdist_rogerstanimoto_mtica1(self):
- m = wrogerstanimoto(np.array([1, 0, 1, 1, 0]),
- np.array([1, 1, 0, 1, 1]))
- m2 = wrogerstanimoto(np.array([1, 0, 1, 1, 0], dtype=bool),
- np.array([1, 1, 0, 1, 1], dtype=bool))
- if verbose > 2:
- print(m)
- assert_allclose(m, 3 / 4, rtol=0, atol=1e-10)
- assert_allclose(m2, 3 / 4, rtol=0, atol=1e-10)
- def test_pdist_rogerstanimoto_mtica2(self):
- m = wrogerstanimoto(np.array([1, 0, 1]),
- np.array([1, 1, 0]))
- m2 = wrogerstanimoto(np.array([1, 0, 1], dtype=bool),
- np.array([1, 1, 0], dtype=bool))
- if verbose > 2:
- print(m)
- assert_allclose(m, 4 / 5, rtol=0, atol=1e-10)
- assert_allclose(m2, 4 / 5, rtol=0, atol=1e-10)
- def test_pdist_russellrao_mtica1(self):
- m = wrussellrao(np.array([1, 0, 1, 1, 0]),
- np.array([1, 1, 0, 1, 1]))
- m2 = wrussellrao(np.array([1, 0, 1, 1, 0], dtype=bool),
- np.array([1, 1, 0, 1, 1], dtype=bool))
- if verbose > 2:
- print(m)
- assert_allclose(m, 3 / 5, rtol=0, atol=1e-10)
- assert_allclose(m2, 3 / 5, rtol=0, atol=1e-10)
- def test_pdist_russellrao_mtica2(self):
- m = wrussellrao(np.array([1, 0, 1]),
- np.array([1, 1, 0]))
- m2 = wrussellrao(np.array([1, 0, 1], dtype=bool),
- np.array([1, 1, 0], dtype=bool))
- if verbose > 2:
- print(m)
- assert_allclose(m, 2 / 3, rtol=0, atol=1e-10)
- assert_allclose(m2, 2 / 3, rtol=0, atol=1e-10)
- @pytest.mark.slow
- def test_pdist_canberra_match(self):
- D = eo['iris']
- if verbose > 2:
- print(D.shape, D.dtype)
- eps = 1e-10
- y1 = wpdist_no_const(D, "canberra")
- y2 = wpdist_no_const(D, "test_canberra")
- _assert_within_tol(y1, y2, eps, verbose > 2)
- def test_pdist_canberra_ticket_711(self):
- # Test pdist(X, 'canberra') to see if Canberra gives the right result
- # as reported on gh-1238.
- eps = 1e-8
- pdist_y = wpdist_no_const(([3.3], [3.4]), "canberra")
- right_y = 0.01492537
- _assert_within_tol(pdist_y, right_y, eps, verbose > 2)
- def test_pdist_custom_notdouble(self):
- # tests that when using a custom metric the data type is not altered
- class myclass(object):
- pass
- def _my_metric(x, y):
- if not isinstance(x[0], myclass) or not isinstance(y[0], myclass):
- raise ValueError("Type has been changed")
- return 1.123
- data = np.array([[myclass()], [myclass()]], dtype=object)
- pdist_y = pdist(data, metric=_my_metric)
- right_y = 1.123
- assert_equal(pdist_y, right_y, verbose=verbose > 2)
- def _check_calling_conventions(self, X, metric, eps=1e-07, **kwargs):
- # helper function for test_pdist_calling_conventions
- try:
- y1 = pdist(X, metric=metric, **kwargs)
- y2 = pdist(X, metric=eval(metric), **kwargs)
- y3 = pdist(X, metric="test_" + metric, **kwargs)
- except Exception as e:
- e_cls = e.__class__
- if verbose > 2:
- print(e_cls.__name__)
- print(e)
- assert_raises(e_cls, pdist, X, metric=metric, **kwargs)
- assert_raises(e_cls, pdist, X, metric=eval(metric), **kwargs)
- assert_raises(e_cls, pdist, X, metric="test_" + metric, **kwargs)
- else:
- _assert_within_tol(y1, y2, rtol=eps, verbose_=verbose > 2)
- _assert_within_tol(y1, y3, rtol=eps, verbose_=verbose > 2)
- def test_pdist_calling_conventions(self):
- # Ensures that specifying the metric with a str or scipy function
- # gives the same behaviour (i.e. same result or same exception).
- # NOTE: The correctness should be checked within each metric tests.
- # NOTE: Extra args should be checked with a dedicated test
- eps = 1e-07
- for eo_name in self.rnd_eo_names:
- # subsampling input data to speed-up tests
- # NOTE: num samples needs to be > than dimensions for mahalanobis
- X = eo[eo_name][::5, ::2]
- for metric in _METRICS_NAMES:
- if metric == 'wminkowski':
- continue
- if verbose > 2:
- print("testing: ", metric, " with: ", eo_name)
- if metric in {'dice', 'yule', 'kulsinski', 'matching',
- 'rogerstanimoto', 'russellrao', 'sokalmichener',
- 'sokalsneath'} and 'bool' not in eo_name:
- # python version permits non-bools e.g. for fuzzy logic
- continue
- self._check_calling_conventions(X, metric)
- # Testing built-in metrics with extra args
- if metric == "seuclidean":
- V = np.var(X.astype(np.double), axis=0, ddof=1)
- self._check_calling_conventions(X, metric, V=V)
- elif metric == "mahalanobis":
- V = np.atleast_2d(np.cov(X.astype(np.double).T))
- VI = np.array(np.linalg.inv(V).T)
- self._check_calling_conventions(X, metric, VI=VI)
- def test_pdist_dtype_equivalence(self):
- # Tests that the result is not affected by type up-casting
- eps = 1e-07
- tests = [(eo['random-bool-data'], self.valid_upcasts['bool']),
- (eo['random-uint-data'], self.valid_upcasts['uint']),
- (eo['random-int-data'], self.valid_upcasts['int']),
- (eo['random-float32-data'], self.valid_upcasts['float32'])]
- for metric in _METRICS_NAMES:
- for test in tests:
- X1 = test[0][::5, ::2]
- try:
- y1 = pdist(X1, metric=metric)
- except Exception as e:
- e_cls = e.__class__
- if verbose > 2:
- print(e_cls.__name__)
- print(e)
- for new_type in test[1]:
- X2 = new_type(X1)
- assert_raises(e_cls, pdist, X2, metric=metric)
- else:
- for new_type in test[1]:
- y2 = pdist(new_type(X1), metric=metric)
- _assert_within_tol(y1, y2, eps, verbose > 2)
- def test_pdist_out(self):
- # Test that out parameter works properly
- eps = 1e-07
- X = eo['random-float32-data'][::5, ::2]
- out_size = int((X.shape[0] * (X.shape[0] - 1)) / 2)
- for metric in _METRICS_NAMES:
- kwargs = dict()
- if metric in ['minkowski', 'wminkowski']:
- kwargs['p'] = 1.23
- if metric == 'wminkowski':
- kwargs['w'] = 1.0 / X.std(axis=0)
- out1 = np.empty(out_size, dtype=np.double)
- Y_right = pdist(X, metric, **kwargs)
- Y_test1 = pdist(X, metric, out=out1, **kwargs)
- # test that output is numerically equivalent
- _assert_within_tol(Y_test1, Y_right, eps)
- # test that Y_test1 and out1 are the same object
- assert_(Y_test1 is out1)
- # test for incorrect shape
- out2 = np.empty(out_size + 3, dtype=np.double)
- assert_raises(ValueError, pdist, X, metric, out=out2, **kwargs)
- # test for (C-)contiguous output
- out3 = np.empty(2 * out_size, dtype=np.double)[::2]
- assert_raises(ValueError, pdist, X, metric, out=out3, **kwargs)
- # test for incorrect dtype
- out5 = np.empty(out_size, dtype=np.int64)
- assert_raises(ValueError, pdist, X, metric, out=out5, **kwargs)
- def test_striding(self):
- # test that striding is handled correct with calls to
- # _copy_array_if_base_present
- eps = 1e-07
- X = eo['random-float32-data'][::5, ::2]
- X_copy = X.copy()
- # confirm contiguity
- assert_(not X.flags.c_contiguous)
- assert_(X_copy.flags.c_contiguous)
- for metric in _METRICS_NAMES:
- kwargs = dict()
- if metric in ['minkowski', 'wminkowski']:
- kwargs['p'] = 1.23
- if metric == 'wminkowski':
- kwargs['w'] = 1.0 / X.std(axis=0)
- Y1 = pdist(X, metric, **kwargs)
- Y2 = pdist(X_copy, metric, **kwargs)
- # test that output is numerically equivalent
- _assert_within_tol(Y1, Y2, eps, verbose > 2)
- class TestSomeDistanceFunctions(object):
- def setup_method(self):
- # 1D arrays
- x = np.array([1.0, 2.0, 3.0])
- y = np.array([1.0, 1.0, 5.0])
- # 3x1 arrays
- x31 = x[:, np.newaxis]
- y31 = y[:, np.newaxis]
- # 1x3 arrays
- x13 = x31.T
- y13 = y31.T
- self.cases = [(x, y), (x31, y31), (x13, y13)]
- def test_minkowski(self):
- with suppress_warnings() as w:
- w.filter(message="`wminkowski` is deprecated")
- for x, y in self.cases:
- dist1 = wminkowski(x, y, p=1)
- assert_almost_equal(dist1, 3.0)
- dist1p5 = wminkowski(x, y, p=1.5)
- assert_almost_equal(dist1p5, (1.0 + 2.0**1.5)**(2. / 3))
- dist2 = wminkowski(x, y, p=2)
- def test_old_wminkowski(self):
- with suppress_warnings() as wrn:
- wrn.filter(message="`wminkowski` is deprecated")
- w = np.array([1.0, 2.0, 0.5])
- for x, y in self.cases:
- dist1 = old_wminkowski(x, y, p=1, w=w)
- assert_almost_equal(dist1, 3.0)
- dist1p5 = old_wminkowski(x, y, p=1.5, w=w)
- assert_almost_equal(dist1p5, (2.0**1.5+1.0)**(2./3))
- dist2 = old_wminkowski(x, y, p=2, w=w)
- assert_almost_equal(dist2, np.sqrt(5))
- # test weights Issue #7893
- arr = np.arange(4)
- w = np.full_like(arr, 4)
- assert_almost_equal(old_wminkowski(arr, arr + 1, p=2, w=w), 8.0)
- assert_almost_equal(wminkowski(arr, arr + 1, p=2, w=w), 4.0)
- def test_euclidean(self):
- for x, y in self.cases:
- dist = weuclidean(x, y)
- assert_almost_equal(dist, np.sqrt(5))
- def test_sqeuclidean(self):
- for x, y in self.cases:
- dist = wsqeuclidean(x, y)
- assert_almost_equal(dist, 5.0)
- def test_cosine(self):
- for x, y in self.cases:
- dist = wcosine(x, y)
- assert_almost_equal(dist, 1.0 - 18.0 / (np.sqrt(14) * np.sqrt(27)))
- def test_correlation(self):
- xm = np.array([-1.0, 0, 1.0])
- ym = np.array([-4.0 / 3, -4.0 / 3, 5.0 - 7.0 / 3])
- for x, y in self.cases:
- dist = wcorrelation(x, y)
- assert_almost_equal(dist, 1.0 - np.dot(xm, ym) / (norm(xm) * norm(ym)))
- def test_mahalanobis(self):
- x = np.array([1.0, 2.0, 3.0])
- y = np.array([1.0, 1.0, 5.0])
- vi = np.array([[2.0, 1.0, 0.0], [1.0, 2.0, 1.0], [0.0, 1.0, 2.0]])
- for x, y in self.cases:
- dist = mahalanobis(x, y, vi)
- assert_almost_equal(dist, np.sqrt(6.0))
- class TestSquareForm(object):
- checked_dtypes = [np.float64, np.float32, np.int32, np.int8, bool]
- def test_squareform_matrix(self):
- for dtype in self.checked_dtypes:
- self.check_squareform_matrix(dtype)
- def test_squareform_vector(self):
- for dtype in self.checked_dtypes:
- self.check_squareform_vector(dtype)
- def check_squareform_matrix(self, dtype):
- A = np.zeros((0, 0), dtype=dtype)
- rA = squareform(A)
- assert_equal(rA.shape, (0,))
- assert_equal(rA.dtype, dtype)
- A = np.zeros((1, 1), dtype=dtype)
- rA = squareform(A)
- assert_equal(rA.shape, (0,))
- assert_equal(rA.dtype, dtype)
- A = np.array([[0, 4.2], [4.2, 0]], dtype=dtype)
- rA = squareform(A)
- assert_equal(rA.shape, (1,))
- assert_equal(rA.dtype, dtype)
- assert_array_equal(rA, np.array([4.2], dtype=dtype))
- def check_squareform_vector(self, dtype):
- v = np.zeros((0,), dtype=dtype)
- rv = squareform(v)
- assert_equal(rv.shape, (1, 1))
- assert_equal(rv.dtype, dtype)
- assert_array_equal(rv, [[0]])
- v = np.array([8.3], dtype=dtype)
- rv = squareform(v)
- assert_equal(rv.shape, (2, 2))
- assert_equal(rv.dtype, dtype)
- assert_array_equal(rv, np.array([[0, 8.3], [8.3, 0]], dtype=dtype))
- def test_squareform_multi_matrix(self):
- for n in xrange(2, 5):
- self.check_squareform_multi_matrix(n)
- def check_squareform_multi_matrix(self, n):
- X = np.random.rand(n, 4)
- Y = wpdist_no_const(X)
- assert_equal(len(Y.shape), 1)
- A = squareform(Y)
- Yr = squareform(A)
- s = A.shape
- k = 0
- if verbose >= 3:
- print(A.shape, Y.shape, Yr.shape)
- assert_equal(len(s), 2)
- assert_equal(len(Yr.shape), 1)
- assert_equal(s[0], s[1])
- for i in xrange(0, s[0]):
- for j in xrange(i + 1, s[1]):
- if i != j:
- assert_equal(A[i, j], Y[k])
- k += 1
- else:
- assert_equal(A[i, j], 0)
- class TestNumObsY(object):
- def test_num_obs_y_multi_matrix(self):
- for n in xrange(2, 10):
- X = np.random.rand(n, 4)
- Y = wpdist_no_const(X)
- assert_equal(num_obs_y(Y), n)
- def test_num_obs_y_1(self):
- # Tests num_obs_y(y) on a condensed distance matrix over 1
- # observations. Expecting exception.
- assert_raises(ValueError, self.check_y, 1)
- def test_num_obs_y_2(self):
- # Tests num_obs_y(y) on a condensed distance matrix over 2
- # observations.
- assert_(self.check_y(2))
- def test_num_obs_y_3(self):
- assert_(self.check_y(3))
- def test_num_obs_y_4(self):
- assert_(self.check_y(4))
- def test_num_obs_y_5_10(self):
- for i in xrange(5, 16):
- self.minit(i)
- def test_num_obs_y_2_100(self):
- # Tests num_obs_y(y) on 100 improper condensed distance matrices.
- # Expecting exception.
- a = set([])
- for n in xrange(2, 16):
- a.add(n * (n - 1) / 2)
- for i in xrange(5, 105):
- if i not in a:
- assert_raises(ValueError, self.bad_y, i)
- def minit(self, n):
- assert_(self.check_y(n))
- def bad_y(self, n):
- y = np.random.rand(n)
- return num_obs_y(y)
- def check_y(self, n):
- return num_obs_y(self.make_y(n)) == n
- def make_y(self, n):
- return np.random.rand((n * (n - 1)) // 2)
- class TestNumObsDM(object):
- def test_num_obs_dm_multi_matrix(self):
- for n in xrange(1, 10):
- X = np.random.rand(n, 4)
- Y = wpdist_no_const(X)
- A = squareform(Y)
- if verbose >= 3:
- print(A.shape, Y.shape)
- assert_equal(num_obs_dm(A), n)
- def test_num_obs_dm_0(self):
- # Tests num_obs_dm(D) on a 0x0 distance matrix. Expecting exception.
- assert_(self.check_D(0))
- def test_num_obs_dm_1(self):
- # Tests num_obs_dm(D) on a 1x1 distance matrix.
- assert_(self.check_D(1))
- def test_num_obs_dm_2(self):
- assert_(self.check_D(2))
- def test_num_obs_dm_3(self):
- assert_(self.check_D(2))
- def test_num_obs_dm_4(self):
- assert_(self.check_D(4))
- def check_D(self, n):
- return num_obs_dm(self.make_D(n)) == n
- def make_D(self, n):
- return np.random.rand(n, n)
- def is_valid_dm_throw(D):
- return is_valid_dm(D, throw=True)
- class TestIsValidDM(object):
- def test_is_valid_dm_improper_shape_1D_E(self):
- D = np.zeros((5,), dtype=np.double)
- assert_raises(ValueError, is_valid_dm_throw, (D))
- def test_is_valid_dm_improper_shape_1D_F(self):
- D = np.zeros((5,), dtype=np.double)
- assert_equal(is_valid_dm(D), False)
- def test_is_valid_dm_improper_shape_3D_E(self):
- D = np.zeros((3, 3, 3), dtype=np.double)
- assert_raises(ValueError, is_valid_dm_throw, (D))
- def test_is_valid_dm_improper_shape_3D_F(self):
- D = np.zeros((3, 3, 3), dtype=np.double)
- assert_equal(is_valid_dm(D), False)
- def test_is_valid_dm_nonzero_diagonal_E(self):
- y = np.random.rand(10)
- D = squareform(y)
- for i in xrange(0, 5):
- D[i, i] = 2.0
- assert_raises(ValueError, is_valid_dm_throw, (D))
- def test_is_valid_dm_nonzero_diagonal_F(self):
- y = np.random.rand(10)
- D = squareform(y)
- for i in xrange(0, 5):
- D[i, i] = 2.0
- assert_equal(is_valid_dm(D), False)
- def test_is_valid_dm_asymmetric_E(self):
- y = np.random.rand(10)
- D = squareform(y)
- D[1, 3] = D[3, 1] + 1
- assert_raises(ValueError, is_valid_dm_throw, (D))
- def test_is_valid_dm_asymmetric_F(self):
- y = np.random.rand(10)
- D = squareform(y)
- D[1, 3] = D[3, 1] + 1
- assert_equal(is_valid_dm(D), False)
- def test_is_valid_dm_correct_1_by_1(self):
- D = np.zeros((1, 1), dtype=np.double)
- assert_equal(is_valid_dm(D), True)
- def test_is_valid_dm_correct_2_by_2(self):
- y = np.random.rand(1)
- D = squareform(y)
- assert_equal(is_valid_dm(D), True)
- def test_is_valid_dm_correct_3_by_3(self):
- y = np.random.rand(3)
- D = squareform(y)
- assert_equal(is_valid_dm(D), True)
- def test_is_valid_dm_correct_4_by_4(self):
- y = np.random.rand(6)
- D = squareform(y)
- assert_equal(is_valid_dm(D), True)
- def test_is_valid_dm_correct_5_by_5(self):
- y = np.random.rand(10)
- D = squareform(y)
- assert_equal(is_valid_dm(D), True)
- def is_valid_y_throw(y):
- return is_valid_y(y, throw=True)
- class TestIsValidY(object):
- # If test case name ends on "_E" then an exception is expected for the
- # given input, if it ends in "_F" then False is expected for the is_valid_y
- # check. Otherwise the input is expected to be valid.
- def test_is_valid_y_improper_shape_2D_E(self):
- y = np.zeros((3, 3,), dtype=np.double)
- assert_raises(ValueError, is_valid_y_throw, (y))
- def test_is_valid_y_improper_shape_2D_F(self):
- y = np.zeros((3, 3,), dtype=np.double)
- assert_equal(is_valid_y(y), False)
- def test_is_valid_y_improper_shape_3D_E(self):
- y = np.zeros((3, 3, 3), dtype=np.double)
- assert_raises(ValueError, is_valid_y_throw, (y))
- def test_is_valid_y_improper_shape_3D_F(self):
- y = np.zeros((3, 3, 3), dtype=np.double)
- assert_equal(is_valid_y(y), False)
- def test_is_valid_y_correct_2_by_2(self):
- y = self.correct_n_by_n(2)
- assert_equal(is_valid_y(y), True)
- def test_is_valid_y_correct_3_by_3(self):
- y = self.correct_n_by_n(3)
- assert_equal(is_valid_y(y), True)
- def test_is_valid_y_correct_4_by_4(self):
- y = self.correct_n_by_n(4)
- assert_equal(is_valid_y(y), True)
- def test_is_valid_y_correct_5_by_5(self):
- y = self.correct_n_by_n(5)
- assert_equal(is_valid_y(y), True)
- def test_is_valid_y_2_100(self):
- a = set([])
- for n in xrange(2, 16):
- a.add(n * (n - 1) / 2)
- for i in xrange(5, 105):
- if i not in a:
- assert_raises(ValueError, self.bad_y, i)
- def bad_y(self, n):
- y = np.random.rand(n)
- return is_valid_y(y, throw=True)
- def correct_n_by_n(self, n):
- y = np.random.rand((n * (n - 1)) // 2)
- return y
- def test_bad_p():
- # Raise ValueError if p < 1.
- p = 0.5
- with suppress_warnings() as w:
- w.filter(message="`wminkowski` is deprecated")
- assert_raises(ValueError, wminkowski, [1, 2], [3, 4], p)
- assert_raises(ValueError, wminkowski, [1, 2], [3, 4], p, [1, 1])
- def test_sokalsneath_all_false():
- # Regression test for ticket #876
- assert_raises(ValueError, sokalsneath, [False, False, False], [False, False, False])
- def test_canberra():
- # Regression test for ticket #1430.
- assert_equal(wcanberra([1, 2, 3], [2, 4, 6]), 1)
- assert_equal(wcanberra([1, 1, 0, 0], [1, 0, 1, 0]), 2)
- def test_braycurtis():
- # Regression test for ticket #1430.
- assert_almost_equal(wbraycurtis([1, 2, 3], [2, 4, 6]), 1. / 3, decimal=15)
- assert_almost_equal(wbraycurtis([1, 1, 0, 0], [1, 0, 1, 0]), 0.5, decimal=15)
- def test_euclideans():
- # Regression test for ticket #1328.
- x1 = np.array([1, 1, 1])
- x2 = np.array([0, 0, 0])
- # Basic test of the calculation.
- assert_almost_equal(wsqeuclidean(x1, x2), 3.0, decimal=14)
- assert_almost_equal(weuclidean(x1, x2), np.sqrt(3), decimal=14)
- # Check flattening for (1, N) or (N, 1) inputs
- assert_almost_equal(weuclidean(x1[np.newaxis, :], x2[np.newaxis, :]),
- np.sqrt(3), decimal=14)
- assert_almost_equal(wsqeuclidean(x1[np.newaxis, :], x2[np.newaxis, :]),
- 3.0, decimal=14)
- assert_almost_equal(wsqeuclidean(x1[:, np.newaxis], x2[:, np.newaxis]),
- 3.0, decimal=14)
- # Distance metrics only defined for vectors (= 1-D)
- x = np.arange(4).reshape(2, 2)
- assert_raises(ValueError, weuclidean, x, x)
- assert_raises(ValueError, wsqeuclidean, x, x)
- # Another check, with random data.
- rs = np.random.RandomState(1234567890)
- x = rs.rand(10)
- y = rs.rand(10)
- d1 = weuclidean(x, y)
- d2 = wsqeuclidean(x, y)
- assert_almost_equal(d1**2, d2, decimal=14)
- def test_hamming_unequal_length():
- # Regression test for gh-4290.
- x = [0, 0, 1]
- y = [1, 0, 1, 0]
- # Used to give an AttributeError from ndarray.mean called on bool
- assert_raises(ValueError, whamming, x, y)
- def test_hamming_string_array():
- # https://github.com/scikit-learn/scikit-learn/issues/4014
- a = np.array(['eggs', 'spam', 'spam', 'eggs', 'spam', 'spam', 'spam',
- 'spam', 'spam', 'spam', 'spam', 'eggs', 'eggs', 'spam',
- 'eggs', 'eggs', 'eggs', 'eggs', 'eggs', 'spam'],
- dtype='|S4')
- b = np.array(['eggs', 'spam', 'spam', 'eggs', 'eggs', 'spam', 'spam',
- 'spam', 'spam', 'eggs', 'spam', 'eggs', 'spam', 'eggs',
- 'spam', 'spam', 'eggs', 'spam', 'spam', 'eggs'],
- dtype='|S4')
- desired = 0.45
- assert_allclose(whamming(a, b), desired)
- def test_minkowski_w():
- # Regression test for gh-8142.
- arr_in = np.array([[83.33333333, 100., 83.33333333, 100., 36.,
- 60., 90., 150., 24., 48.],
- [83.33333333, 100., 83.33333333, 100., 36.,
- 60., 90., 150., 24., 48.]])
- p0 = pdist(arr_in, metric='minkowski', p=1, w=None)
- c0 = cdist(arr_in, arr_in, metric='minkowski', p=1, w=None)
- p1 = pdist(arr_in, metric='minkowski', p=1)
- c1 = cdist(arr_in, arr_in, metric='minkowski', p=1)
- assert_allclose(p0, p1, rtol=1e-15)
- assert_allclose(c0, c1, rtol=1e-15)
- def test_sqeuclidean_dtypes():
- # Assert that sqeuclidean returns the right types of values.
- # Integer types should be converted to floating for stability.
- # Floating point types should be the same as the input.
- x = [1, 2, 3]
- y = [4, 5, 6]
- for dtype in [np.int8, np.int16, np.int32, np.int64]:
- d = wsqeuclidean(np.asarray(x, dtype=dtype), np.asarray(y, dtype=dtype))
- assert_(np.issubdtype(d.dtype, np.floating))
- for dtype in [np.uint8, np.uint16, np.uint32, np.uint64]:
- d1 = wsqeuclidean([0], np.asarray([-1], dtype=dtype))
- d2 = wsqeuclidean(np.asarray([-1], dtype=dtype), [0])
- assert_equal(d1, d2)
- assert_equal(d1, np.float64(np.iinfo(dtype).max)**2)
- dtypes = [np.float32, np.float64, np.complex64, np.complex128]
- for dtype in ['float16', 'float128']:
- # These aren't present in older numpy versions; float128 may also not
- # be present on all platforms.
- if hasattr(np, dtype):
- dtypes.append(getattr(np, dtype))
- for dtype in dtypes:
- d = wsqeuclidean(np.asarray(x, dtype=dtype), np.asarray(y, dtype=dtype))
- assert_equal(d.dtype, dtype)
- def test_sokalmichener():
- # Test that sokalmichener has the same result for bool and int inputs.
- p = [True, True, False]
- q = [True, False, True]
- x = [int(b) for b in p]
- y = [int(b) for b in q]
- dist1 = sokalmichener(p, q)
- dist2 = sokalmichener(x, y)
- # These should be exactly the same.
- assert_equal(dist1, dist2)
- def test_modifies_input():
- # test whether cdist or pdist modifies input arrays
- X1 = np.asarray([[1., 2., 3.],
- [1.2, 2.3, 3.4],
- [2.2, 2.3, 4.4],
- [22.2, 23.3, 44.4]])
- X1_copy = X1.copy()
- with suppress_warnings() as w:
- w.filter(message="`wminkowski` is deprecated")
- for metric in _METRICS_NAMES:
- kwargs = {"w": 1.0 / X1.std(axis=0)} if metric == "wminkowski" else {}
- cdist(X1, X1, metric, **kwargs)
- pdist(X1, metric, **kwargs)
- assert_array_equal(X1, X1_copy)
- def test_Xdist_deprecated_args():
- # testing both cdist and pdist deprecated warnings
- X1 = np.asarray([[1., 2., 3.],
- [1.2, 2.3, 3.4],
- [2.2, 2.3, 4.4],
- [22.2, 23.3, 44.4]])
- weights = np.arange(3)
- warn_msg_kwargs = "Got unexpected kwarg"
- warn_msg_args = "[0-9]* metric parameters have been passed as positional"
- for metric in _METRICS_NAMES:
- kwargs = {"w": weights} if metric == "wminkowski" else dict()
- with suppress_warnings() as w:
- log = w.record(message=warn_msg_args)
- w.filter(message=warn_msg_kwargs)
- w.filter(message="`wminkowski` is deprecated")
- cdist(X1, X1, metric, 2., **kwargs)
- pdist(X1, metric, 2., **kwargs)
- assert_(len(log) == 2)
- for arg in ["p", "V", "VI"]:
- kwargs = {arg:"foo"}
- if metric == "wminkowski":
- if "p" in kwargs or "w" in kwargs:
- continue
- kwargs["w"] = weights
- if((arg == "V" and metric == "seuclidean") or
- (arg == "VI" and metric == "mahalanobis") or
- (arg == "p" and metric == "minkowski")):
- continue
- with suppress_warnings() as w:
- log = w.record(message=warn_msg_kwargs)
- w.filter(message="`wminkowski` is deprecated")
- cdist(X1, X1, metric, **kwargs)
- pdist(X1, metric, **kwargs)
- assert_(len(log) == 2)
- def test_Xdist_non_negative_weights():
- X = eo['random-float32-data'][::5, ::2]
- w = np.ones(X.shape[1])
- w[::5] = -w[::5]
- for metric in _METRICS_NAMES:
- if metric in ['seuclidean', 'mahalanobis', 'jensenshannon']:
- continue
- for m in [metric, eval(metric), "test_" + metric]:
- assert_raises(ValueError, pdist, X, m, w=w)
- assert_raises(ValueError, cdist, X, X, m, w=w)
- def test__validate_vector():
- x = [1, 2, 3]
- y = _validate_vector(x)
- assert_array_equal(y, x)
- y = _validate_vector(x, dtype=np.float64)
- assert_array_equal(y, x)
- assert_equal(y.dtype, np.float64)
- x = [1]
- y = _validate_vector(x)
- assert_equal(y.ndim, 1)
- assert_equal(y, x)
- x = 1
- y = _validate_vector(x)
- assert_equal(y.ndim, 1)
- assert_equal(y, [x])
- x = np.arange(5).reshape(1, -1, 1)
- y = _validate_vector(x)
- assert_equal(y.ndim, 1)
- assert_array_equal(y, x[0, :, 0])
- x = [[1, 2], [3, 4]]
- assert_raises(ValueError, _validate_vector, x)
|