123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835 |
- """
- This module contains a set of functions for vectorized string
- operations and methods.
- .. note::
- The `chararray` class exists for backwards compatibility with
- Numarray, it is not recommended for new development. Starting from numpy
- 1.4, if one needs arrays of strings, it is recommended to use arrays of
- `dtype` `object_`, `string_` or `unicode_`, and use the free functions
- in the `numpy.char` module for fast vectorized string operations.
- Some methods will only be available if the corresponding string method is
- available in your version of Python.
- The preferred alias for `defchararray` is `numpy.char`.
- """
- from __future__ import division, absolute_import, print_function
- import functools
- import sys
- from .numerictypes import string_, unicode_, integer, object_, bool_, character
- from .numeric import ndarray, compare_chararrays
- from .numeric import array as narray
- from numpy.core.multiarray import _vec_string
- from numpy.core.overrides import set_module
- from numpy.core import overrides
- from numpy.compat import asbytes, long
- import numpy
- __all__ = [
- 'chararray', 'equal', 'not_equal', 'greater_equal', 'less_equal',
- 'greater', 'less', 'str_len', 'add', 'multiply', 'mod', 'capitalize',
- 'center', 'count', 'decode', 'encode', 'endswith', 'expandtabs',
- 'find', 'index', 'isalnum', 'isalpha', 'isdigit', 'islower', 'isspace',
- 'istitle', 'isupper', 'join', 'ljust', 'lower', 'lstrip', 'partition',
- 'replace', 'rfind', 'rindex', 'rjust', 'rpartition', 'rsplit',
- 'rstrip', 'split', 'splitlines', 'startswith', 'strip', 'swapcase',
- 'title', 'translate', 'upper', 'zfill', 'isnumeric', 'isdecimal',
- 'array', 'asarray'
- ]
- _globalvar = 0
- if sys.version_info[0] >= 3:
- _unicode = str
- _bytes = bytes
- else:
- _unicode = unicode
- _bytes = str
- _len = len
- array_function_dispatch = functools.partial(
- overrides.array_function_dispatch, module='numpy.char')
- def _use_unicode(*args):
- """
- Helper function for determining the output type of some string
- operations.
- For an operation on two ndarrays, if at least one is unicode, the
- result should be unicode.
- """
- for x in args:
- if (isinstance(x, _unicode) or
- issubclass(numpy.asarray(x).dtype.type, unicode_)):
- return unicode_
- return string_
- def _to_string_or_unicode_array(result):
- """
- Helper function to cast a result back into a string or unicode array
- if an object array must be used as an intermediary.
- """
- return numpy.asarray(result.tolist())
- def _clean_args(*args):
- """
- Helper function for delegating arguments to Python string
- functions.
- Many of the Python string operations that have optional arguments
- do not use 'None' to indicate a default value. In these cases,
- we need to remove all `None` arguments, and those following them.
- """
- newargs = []
- for chk in args:
- if chk is None:
- break
- newargs.append(chk)
- return newargs
- def _get_num_chars(a):
- """
- Helper function that returns the number of characters per field in
- a string or unicode array. This is to abstract out the fact that
- for a unicode array this is itemsize / 4.
- """
- if issubclass(a.dtype.type, unicode_):
- return a.itemsize // 4
- return a.itemsize
- def _binary_op_dispatcher(x1, x2):
- return (x1, x2)
- @array_function_dispatch(_binary_op_dispatcher)
- def equal(x1, x2):
- """
- Return (x1 == x2) element-wise.
- Unlike `numpy.equal`, this comparison is performed by first
- stripping whitespace characters from the end of the string. This
- behavior is provided for backward-compatibility with numarray.
- Parameters
- ----------
- x1, x2 : array_like of str or unicode
- Input arrays of the same shape.
- Returns
- -------
- out : ndarray or bool
- Output array of bools, or a single bool if x1 and x2 are scalars.
- See Also
- --------
- not_equal, greater_equal, less_equal, greater, less
- """
- return compare_chararrays(x1, x2, '==', True)
- @array_function_dispatch(_binary_op_dispatcher)
- def not_equal(x1, x2):
- """
- Return (x1 != x2) element-wise.
- Unlike `numpy.not_equal`, this comparison is performed by first
- stripping whitespace characters from the end of the string. This
- behavior is provided for backward-compatibility with numarray.
- Parameters
- ----------
- x1, x2 : array_like of str or unicode
- Input arrays of the same shape.
- Returns
- -------
- out : ndarray or bool
- Output array of bools, or a single bool if x1 and x2 are scalars.
- See Also
- --------
- equal, greater_equal, less_equal, greater, less
- """
- return compare_chararrays(x1, x2, '!=', True)
- @array_function_dispatch(_binary_op_dispatcher)
- def greater_equal(x1, x2):
- """
- Return (x1 >= x2) element-wise.
- Unlike `numpy.greater_equal`, this comparison is performed by
- first stripping whitespace characters from the end of the string.
- This behavior is provided for backward-compatibility with
- numarray.
- Parameters
- ----------
- x1, x2 : array_like of str or unicode
- Input arrays of the same shape.
- Returns
- -------
- out : ndarray or bool
- Output array of bools, or a single bool if x1 and x2 are scalars.
- See Also
- --------
- equal, not_equal, less_equal, greater, less
- """
- return compare_chararrays(x1, x2, '>=', True)
- @array_function_dispatch(_binary_op_dispatcher)
- def less_equal(x1, x2):
- """
- Return (x1 <= x2) element-wise.
- Unlike `numpy.less_equal`, this comparison is performed by first
- stripping whitespace characters from the end of the string. This
- behavior is provided for backward-compatibility with numarray.
- Parameters
- ----------
- x1, x2 : array_like of str or unicode
- Input arrays of the same shape.
- Returns
- -------
- out : ndarray or bool
- Output array of bools, or a single bool if x1 and x2 are scalars.
- See Also
- --------
- equal, not_equal, greater_equal, greater, less
- """
- return compare_chararrays(x1, x2, '<=', True)
- @array_function_dispatch(_binary_op_dispatcher)
- def greater(x1, x2):
- """
- Return (x1 > x2) element-wise.
- Unlike `numpy.greater`, this comparison is performed by first
- stripping whitespace characters from the end of the string. This
- behavior is provided for backward-compatibility with numarray.
- Parameters
- ----------
- x1, x2 : array_like of str or unicode
- Input arrays of the same shape.
- Returns
- -------
- out : ndarray or bool
- Output array of bools, or a single bool if x1 and x2 are scalars.
- See Also
- --------
- equal, not_equal, greater_equal, less_equal, less
- """
- return compare_chararrays(x1, x2, '>', True)
- @array_function_dispatch(_binary_op_dispatcher)
- def less(x1, x2):
- """
- Return (x1 < x2) element-wise.
- Unlike `numpy.greater`, this comparison is performed by first
- stripping whitespace characters from the end of the string. This
- behavior is provided for backward-compatibility with numarray.
- Parameters
- ----------
- x1, x2 : array_like of str or unicode
- Input arrays of the same shape.
- Returns
- -------
- out : ndarray or bool
- Output array of bools, or a single bool if x1 and x2 are scalars.
- See Also
- --------
- equal, not_equal, greater_equal, less_equal, greater
- """
- return compare_chararrays(x1, x2, '<', True)
- def _unary_op_dispatcher(a):
- return (a,)
- @array_function_dispatch(_unary_op_dispatcher)
- def str_len(a):
- """
- Return len(a) element-wise.
- Parameters
- ----------
- a : array_like of str or unicode
- Returns
- -------
- out : ndarray
- Output array of integers
- See also
- --------
- __builtin__.len
- """
- return _vec_string(a, integer, '__len__')
- @array_function_dispatch(_binary_op_dispatcher)
- def add(x1, x2):
- """
- Return element-wise string concatenation for two arrays of str or unicode.
- Arrays `x1` and `x2` must have the same shape.
- Parameters
- ----------
- x1 : array_like of str or unicode
- Input array.
- x2 : array_like of str or unicode
- Input array.
- Returns
- -------
- add : ndarray
- Output array of `string_` or `unicode_`, depending on input types
- of the same shape as `x1` and `x2`.
- """
- arr1 = numpy.asarray(x1)
- arr2 = numpy.asarray(x2)
- out_size = _get_num_chars(arr1) + _get_num_chars(arr2)
- dtype = _use_unicode(arr1, arr2)
- return _vec_string(arr1, (dtype, out_size), '__add__', (arr2,))
- def _multiply_dispatcher(a, i):
- return (a,)
- @array_function_dispatch(_multiply_dispatcher)
- def multiply(a, i):
- """
- Return (a * i), that is string multiple concatenation,
- element-wise.
- Values in `i` of less than 0 are treated as 0 (which yields an
- empty string).
- Parameters
- ----------
- a : array_like of str or unicode
- i : array_like of ints
- Returns
- -------
- out : ndarray
- Output array of str or unicode, depending on input types
- """
- a_arr = numpy.asarray(a)
- i_arr = numpy.asarray(i)
- if not issubclass(i_arr.dtype.type, integer):
- raise ValueError("Can only multiply by integers")
- out_size = _get_num_chars(a_arr) * max(long(i_arr.max()), 0)
- return _vec_string(
- a_arr, (a_arr.dtype.type, out_size), '__mul__', (i_arr,))
- def _mod_dispatcher(a, values):
- return (a, values)
- @array_function_dispatch(_mod_dispatcher)
- def mod(a, values):
- """
- Return (a % i), that is pre-Python 2.6 string formatting
- (iterpolation), element-wise for a pair of array_likes of str
- or unicode.
- Parameters
- ----------
- a : array_like of str or unicode
- values : array_like of values
- These values will be element-wise interpolated into the string.
- Returns
- -------
- out : ndarray
- Output array of str or unicode, depending on input types
- See also
- --------
- str.__mod__
- """
- return _to_string_or_unicode_array(
- _vec_string(a, object_, '__mod__', (values,)))
- @array_function_dispatch(_unary_op_dispatcher)
- def capitalize(a):
- """
- Return a copy of `a` with only the first character of each element
- capitalized.
- Calls `str.capitalize` element-wise.
- For 8-bit strings, this method is locale-dependent.
- Parameters
- ----------
- a : array_like of str or unicode
- Input array of strings to capitalize.
- Returns
- -------
- out : ndarray
- Output array of str or unicode, depending on input
- types
- See also
- --------
- str.capitalize
- Examples
- --------
- >>> c = np.array(['a1b2','1b2a','b2a1','2a1b'],'S4'); c
- array(['a1b2', '1b2a', 'b2a1', '2a1b'],
- dtype='|S4')
- >>> np.char.capitalize(c)
- array(['A1b2', '1b2a', 'B2a1', '2a1b'],
- dtype='|S4')
- """
- a_arr = numpy.asarray(a)
- return _vec_string(a_arr, a_arr.dtype, 'capitalize')
- def _center_dispatcher(a, width, fillchar=None):
- return (a,)
- @array_function_dispatch(_center_dispatcher)
- def center(a, width, fillchar=' '):
- """
- Return a copy of `a` with its elements centered in a string of
- length `width`.
- Calls `str.center` element-wise.
- Parameters
- ----------
- a : array_like of str or unicode
- width : int
- The length of the resulting strings
- fillchar : str or unicode, optional
- The padding character to use (default is space).
- Returns
- -------
- out : ndarray
- Output array of str or unicode, depending on input
- types
- See also
- --------
- str.center
- """
- a_arr = numpy.asarray(a)
- width_arr = numpy.asarray(width)
- size = long(numpy.max(width_arr.flat))
- if numpy.issubdtype(a_arr.dtype, numpy.string_):
- fillchar = asbytes(fillchar)
- return _vec_string(
- a_arr, (a_arr.dtype.type, size), 'center', (width_arr, fillchar))
- def _count_dispatcher(a, sub, start=None, end=None):
- return (a,)
- @array_function_dispatch(_count_dispatcher)
- def count(a, sub, start=0, end=None):
- """
- Returns an array with the number of non-overlapping occurrences of
- substring `sub` in the range [`start`, `end`].
- Calls `str.count` element-wise.
- Parameters
- ----------
- a : array_like of str or unicode
- sub : str or unicode
- The substring to search for.
- start, end : int, optional
- Optional arguments `start` and `end` are interpreted as slice
- notation to specify the range in which to count.
- Returns
- -------
- out : ndarray
- Output array of ints.
- See also
- --------
- str.count
- Examples
- --------
- >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
- >>> c
- array(['aAaAaA', ' aA ', 'abBABba'],
- dtype='|S7')
- >>> np.char.count(c, 'A')
- array([3, 1, 1])
- >>> np.char.count(c, 'aA')
- array([3, 1, 0])
- >>> np.char.count(c, 'A', start=1, end=4)
- array([2, 1, 1])
- >>> np.char.count(c, 'A', start=1, end=3)
- array([1, 0, 0])
- """
- return _vec_string(a, integer, 'count', [sub, start] + _clean_args(end))
- def _code_dispatcher(a, encoding=None, errors=None):
- return (a,)
- @array_function_dispatch(_code_dispatcher)
- def decode(a, encoding=None, errors=None):
- """
- Calls `str.decode` element-wise.
- The set of available codecs comes from the Python standard library,
- and may be extended at runtime. For more information, see the
- :mod:`codecs` module.
- Parameters
- ----------
- a : array_like of str or unicode
- encoding : str, optional
- The name of an encoding
- errors : str, optional
- Specifies how to handle encoding errors
- Returns
- -------
- out : ndarray
- See also
- --------
- str.decode
- Notes
- -----
- The type of the result will depend on the encoding specified.
- Examples
- --------
- >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
- >>> c
- array(['aAaAaA', ' aA ', 'abBABba'],
- dtype='|S7')
- >>> np.char.encode(c, encoding='cp037')
- array(['\\x81\\xc1\\x81\\xc1\\x81\\xc1', '@@\\x81\\xc1@@',
- '\\x81\\x82\\xc2\\xc1\\xc2\\x82\\x81'],
- dtype='|S7')
- """
- return _to_string_or_unicode_array(
- _vec_string(a, object_, 'decode', _clean_args(encoding, errors)))
- @array_function_dispatch(_code_dispatcher)
- def encode(a, encoding=None, errors=None):
- """
- Calls `str.encode` element-wise.
- The set of available codecs comes from the Python standard library,
- and may be extended at runtime. For more information, see the codecs
- module.
- Parameters
- ----------
- a : array_like of str or unicode
- encoding : str, optional
- The name of an encoding
- errors : str, optional
- Specifies how to handle encoding errors
- Returns
- -------
- out : ndarray
- See also
- --------
- str.encode
- Notes
- -----
- The type of the result will depend on the encoding specified.
- """
- return _to_string_or_unicode_array(
- _vec_string(a, object_, 'encode', _clean_args(encoding, errors)))
- def _endswith_dispatcher(a, suffix, start=None, end=None):
- return (a,)
- @array_function_dispatch(_endswith_dispatcher)
- def endswith(a, suffix, start=0, end=None):
- """
- Returns a boolean array which is `True` where the string element
- in `a` ends with `suffix`, otherwise `False`.
- Calls `str.endswith` element-wise.
- Parameters
- ----------
- a : array_like of str or unicode
- suffix : str
- start, end : int, optional
- With optional `start`, test beginning at that position. With
- optional `end`, stop comparing at that position.
- Returns
- -------
- out : ndarray
- Outputs an array of bools.
- See also
- --------
- str.endswith
- Examples
- --------
- >>> s = np.array(['foo', 'bar'])
- >>> s[0] = 'foo'
- >>> s[1] = 'bar'
- >>> s
- array(['foo', 'bar'],
- dtype='|S3')
- >>> np.char.endswith(s, 'ar')
- array([False, True])
- >>> np.char.endswith(s, 'a', start=1, end=2)
- array([False, True])
- """
- return _vec_string(
- a, bool_, 'endswith', [suffix, start] + _clean_args(end))
- def _expandtabs_dispatcher(a, tabsize=None):
- return (a,)
- @array_function_dispatch(_expandtabs_dispatcher)
- def expandtabs(a, tabsize=8):
- """
- Return a copy of each string element where all tab characters are
- replaced by one or more spaces.
- Calls `str.expandtabs` element-wise.
- Return a copy of each string element where all tab characters are
- replaced by one or more spaces, depending on the current column
- and the given `tabsize`. The column number is reset to zero after
- each newline occurring in the string. This doesn't understand other
- non-printing characters or escape sequences.
- Parameters
- ----------
- a : array_like of str or unicode
- Input array
- tabsize : int, optional
- Replace tabs with `tabsize` number of spaces. If not given defaults
- to 8 spaces.
- Returns
- -------
- out : ndarray
- Output array of str or unicode, depending on input type
- See also
- --------
- str.expandtabs
- """
- return _to_string_or_unicode_array(
- _vec_string(a, object_, 'expandtabs', (tabsize,)))
- @array_function_dispatch(_count_dispatcher)
- def find(a, sub, start=0, end=None):
- """
- For each element, return the lowest index in the string where
- substring `sub` is found.
- Calls `str.find` element-wise.
- For each element, return the lowest index in the string where
- substring `sub` is found, such that `sub` is contained in the
- range [`start`, `end`].
- Parameters
- ----------
- a : array_like of str or unicode
- sub : str or unicode
- start, end : int, optional
- Optional arguments `start` and `end` are interpreted as in
- slice notation.
- Returns
- -------
- out : ndarray or int
- Output array of ints. Returns -1 if `sub` is not found.
- See also
- --------
- str.find
- """
- return _vec_string(
- a, integer, 'find', [sub, start] + _clean_args(end))
- @array_function_dispatch(_count_dispatcher)
- def index(a, sub, start=0, end=None):
- """
- Like `find`, but raises `ValueError` when the substring is not found.
- Calls `str.index` element-wise.
- Parameters
- ----------
- a : array_like of str or unicode
- sub : str or unicode
- start, end : int, optional
- Returns
- -------
- out : ndarray
- Output array of ints. Returns -1 if `sub` is not found.
- See also
- --------
- find, str.find
- """
- return _vec_string(
- a, integer, 'index', [sub, start] + _clean_args(end))
- @array_function_dispatch(_unary_op_dispatcher)
- def isalnum(a):
- """
- Returns true for each element if all characters in the string are
- alphanumeric and there is at least one character, false otherwise.
- Calls `str.isalnum` element-wise.
- For 8-bit strings, this method is locale-dependent.
- Parameters
- ----------
- a : array_like of str or unicode
- Returns
- -------
- out : ndarray
- Output array of str or unicode, depending on input type
- See also
- --------
- str.isalnum
- """
- return _vec_string(a, bool_, 'isalnum')
- @array_function_dispatch(_unary_op_dispatcher)
- def isalpha(a):
- """
- Returns true for each element if all characters in the string are
- alphabetic and there is at least one character, false otherwise.
- Calls `str.isalpha` element-wise.
- For 8-bit strings, this method is locale-dependent.
- Parameters
- ----------
- a : array_like of str or unicode
- Returns
- -------
- out : ndarray
- Output array of bools
- See also
- --------
- str.isalpha
- """
- return _vec_string(a, bool_, 'isalpha')
- @array_function_dispatch(_unary_op_dispatcher)
- def isdigit(a):
- """
- Returns true for each element if all characters in the string are
- digits and there is at least one character, false otherwise.
- Calls `str.isdigit` element-wise.
- For 8-bit strings, this method is locale-dependent.
- Parameters
- ----------
- a : array_like of str or unicode
- Returns
- -------
- out : ndarray
- Output array of bools
- See also
- --------
- str.isdigit
- """
- return _vec_string(a, bool_, 'isdigit')
- @array_function_dispatch(_unary_op_dispatcher)
- def islower(a):
- """
- Returns true for each element if all cased characters in the
- string are lowercase and there is at least one cased character,
- false otherwise.
- Calls `str.islower` element-wise.
- For 8-bit strings, this method is locale-dependent.
- Parameters
- ----------
- a : array_like of str or unicode
- Returns
- -------
- out : ndarray
- Output array of bools
- See also
- --------
- str.islower
- """
- return _vec_string(a, bool_, 'islower')
- @array_function_dispatch(_unary_op_dispatcher)
- def isspace(a):
- """
- Returns true for each element if there are only whitespace
- characters in the string and there is at least one character,
- false otherwise.
- Calls `str.isspace` element-wise.
- For 8-bit strings, this method is locale-dependent.
- Parameters
- ----------
- a : array_like of str or unicode
- Returns
- -------
- out : ndarray
- Output array of bools
- See also
- --------
- str.isspace
- """
- return _vec_string(a, bool_, 'isspace')
- @array_function_dispatch(_unary_op_dispatcher)
- def istitle(a):
- """
- Returns true for each element if the element is a titlecased
- string and there is at least one character, false otherwise.
- Call `str.istitle` element-wise.
- For 8-bit strings, this method is locale-dependent.
- Parameters
- ----------
- a : array_like of str or unicode
- Returns
- -------
- out : ndarray
- Output array of bools
- See also
- --------
- str.istitle
- """
- return _vec_string(a, bool_, 'istitle')
- @array_function_dispatch(_unary_op_dispatcher)
- def isupper(a):
- """
- Returns true for each element if all cased characters in the
- string are uppercase and there is at least one character, false
- otherwise.
- Call `str.isupper` element-wise.
- For 8-bit strings, this method is locale-dependent.
- Parameters
- ----------
- a : array_like of str or unicode
- Returns
- -------
- out : ndarray
- Output array of bools
- See also
- --------
- str.isupper
- """
- return _vec_string(a, bool_, 'isupper')
- def _join_dispatcher(sep, seq):
- return (sep, seq)
- @array_function_dispatch(_join_dispatcher)
- def join(sep, seq):
- """
- Return a string which is the concatenation of the strings in the
- sequence `seq`.
- Calls `str.join` element-wise.
- Parameters
- ----------
- sep : array_like of str or unicode
- seq : array_like of str or unicode
- Returns
- -------
- out : ndarray
- Output array of str or unicode, depending on input types
- See also
- --------
- str.join
- """
- return _to_string_or_unicode_array(
- _vec_string(sep, object_, 'join', (seq,)))
- def _just_dispatcher(a, width, fillchar=None):
- return (a,)
- @array_function_dispatch(_just_dispatcher)
- def ljust(a, width, fillchar=' '):
- """
- Return an array with the elements of `a` left-justified in a
- string of length `width`.
- Calls `str.ljust` element-wise.
- Parameters
- ----------
- a : array_like of str or unicode
- width : int
- The length of the resulting strings
- fillchar : str or unicode, optional
- The character to use for padding
- Returns
- -------
- out : ndarray
- Output array of str or unicode, depending on input type
- See also
- --------
- str.ljust
- """
- a_arr = numpy.asarray(a)
- width_arr = numpy.asarray(width)
- size = long(numpy.max(width_arr.flat))
- if numpy.issubdtype(a_arr.dtype, numpy.string_):
- fillchar = asbytes(fillchar)
- return _vec_string(
- a_arr, (a_arr.dtype.type, size), 'ljust', (width_arr, fillchar))
- @array_function_dispatch(_unary_op_dispatcher)
- def lower(a):
- """
- Return an array with the elements converted to lowercase.
- Call `str.lower` element-wise.
- For 8-bit strings, this method is locale-dependent.
- Parameters
- ----------
- a : array_like, {str, unicode}
- Input array.
- Returns
- -------
- out : ndarray, {str, unicode}
- Output array of str or unicode, depending on input type
- See also
- --------
- str.lower
- Examples
- --------
- >>> c = np.array(['A1B C', '1BCA', 'BCA1']); c
- array(['A1B C', '1BCA', 'BCA1'],
- dtype='|S5')
- >>> np.char.lower(c)
- array(['a1b c', '1bca', 'bca1'],
- dtype='|S5')
- """
- a_arr = numpy.asarray(a)
- return _vec_string(a_arr, a_arr.dtype, 'lower')
- def _strip_dispatcher(a, chars=None):
- return (a,)
- @array_function_dispatch(_strip_dispatcher)
- def lstrip(a, chars=None):
- """
- For each element in `a`, return a copy with the leading characters
- removed.
- Calls `str.lstrip` element-wise.
- Parameters
- ----------
- a : array-like, {str, unicode}
- Input array.
- chars : {str, unicode}, optional
- The `chars` argument is a string specifying the set of
- characters to be removed. If omitted or None, the `chars`
- argument defaults to removing whitespace. The `chars` argument
- is not a prefix; rather, all combinations of its values are
- stripped.
- Returns
- -------
- out : ndarray, {str, unicode}
- Output array of str or unicode, depending on input type
- See also
- --------
- str.lstrip
- Examples
- --------
- >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
- >>> c
- array(['aAaAaA', ' aA ', 'abBABba'],
- dtype='|S7')
- The 'a' variable is unstripped from c[1] because whitespace leading.
- >>> np.char.lstrip(c, 'a')
- array(['AaAaA', ' aA ', 'bBABba'],
- dtype='|S7')
- >>> np.char.lstrip(c, 'A') # leaves c unchanged
- array(['aAaAaA', ' aA ', 'abBABba'],
- dtype='|S7')
- >>> (np.char.lstrip(c, ' ') == np.char.lstrip(c, '')).all()
- ... # XXX: is this a regression? this line now returns False
- ... # np.char.lstrip(c,'') does not modify c at all.
- True
- >>> (np.char.lstrip(c, ' ') == np.char.lstrip(c, None)).all()
- True
- """
- a_arr = numpy.asarray(a)
- return _vec_string(a_arr, a_arr.dtype, 'lstrip', (chars,))
- def _partition_dispatcher(a, sep):
- return (a,)
- @array_function_dispatch(_partition_dispatcher)
- def partition(a, sep):
- """
- Partition each element in `a` around `sep`.
- Calls `str.partition` element-wise.
- For each element in `a`, split the element as the first
- occurrence of `sep`, and return 3 strings containing the part
- before the separator, the separator itself, and the part after
- the separator. If the separator is not found, return 3 strings
- containing the string itself, followed by two empty strings.
- Parameters
- ----------
- a : array_like, {str, unicode}
- Input array
- sep : {str, unicode}
- Separator to split each string element in `a`.
- Returns
- -------
- out : ndarray, {str, unicode}
- Output array of str or unicode, depending on input type.
- The output array will have an extra dimension with 3
- elements per input element.
- See also
- --------
- str.partition
- """
- return _to_string_or_unicode_array(
- _vec_string(a, object_, 'partition', (sep,)))
- def _replace_dispatcher(a, old, new, count=None):
- return (a,)
- @array_function_dispatch(_replace_dispatcher)
- def replace(a, old, new, count=None):
- """
- For each element in `a`, return a copy of the string with all
- occurrences of substring `old` replaced by `new`.
- Calls `str.replace` element-wise.
- Parameters
- ----------
- a : array-like of str or unicode
- old, new : str or unicode
- count : int, optional
- If the optional argument `count` is given, only the first
- `count` occurrences are replaced.
- Returns
- -------
- out : ndarray
- Output array of str or unicode, depending on input type
- See also
- --------
- str.replace
- """
- return _to_string_or_unicode_array(
- _vec_string(
- a, object_, 'replace', [old, new] + _clean_args(count)))
- @array_function_dispatch(_count_dispatcher)
- def rfind(a, sub, start=0, end=None):
- """
- For each element in `a`, return the highest index in the string
- where substring `sub` is found, such that `sub` is contained
- within [`start`, `end`].
- Calls `str.rfind` element-wise.
- Parameters
- ----------
- a : array-like of str or unicode
- sub : str or unicode
- start, end : int, optional
- Optional arguments `start` and `end` are interpreted as in
- slice notation.
- Returns
- -------
- out : ndarray
- Output array of ints. Return -1 on failure.
- See also
- --------
- str.rfind
- """
- return _vec_string(
- a, integer, 'rfind', [sub, start] + _clean_args(end))
- @array_function_dispatch(_count_dispatcher)
- def rindex(a, sub, start=0, end=None):
- """
- Like `rfind`, but raises `ValueError` when the substring `sub` is
- not found.
- Calls `str.rindex` element-wise.
- Parameters
- ----------
- a : array-like of str or unicode
- sub : str or unicode
- start, end : int, optional
- Returns
- -------
- out : ndarray
- Output array of ints.
- See also
- --------
- rfind, str.rindex
- """
- return _vec_string(
- a, integer, 'rindex', [sub, start] + _clean_args(end))
- @array_function_dispatch(_just_dispatcher)
- def rjust(a, width, fillchar=' '):
- """
- Return an array with the elements of `a` right-justified in a
- string of length `width`.
- Calls `str.rjust` element-wise.
- Parameters
- ----------
- a : array_like of str or unicode
- width : int
- The length of the resulting strings
- fillchar : str or unicode, optional
- The character to use for padding
- Returns
- -------
- out : ndarray
- Output array of str or unicode, depending on input type
- See also
- --------
- str.rjust
- """
- a_arr = numpy.asarray(a)
- width_arr = numpy.asarray(width)
- size = long(numpy.max(width_arr.flat))
- if numpy.issubdtype(a_arr.dtype, numpy.string_):
- fillchar = asbytes(fillchar)
- return _vec_string(
- a_arr, (a_arr.dtype.type, size), 'rjust', (width_arr, fillchar))
- @array_function_dispatch(_partition_dispatcher)
- def rpartition(a, sep):
- """
- Partition (split) each element around the right-most separator.
- Calls `str.rpartition` element-wise.
- For each element in `a`, split the element as the last
- occurrence of `sep`, and return 3 strings containing the part
- before the separator, the separator itself, and the part after
- the separator. If the separator is not found, return 3 strings
- containing the string itself, followed by two empty strings.
- Parameters
- ----------
- a : array_like of str or unicode
- Input array
- sep : str or unicode
- Right-most separator to split each element in array.
- Returns
- -------
- out : ndarray
- Output array of string or unicode, depending on input
- type. The output array will have an extra dimension with
- 3 elements per input element.
- See also
- --------
- str.rpartition
- """
- return _to_string_or_unicode_array(
- _vec_string(a, object_, 'rpartition', (sep,)))
- def _split_dispatcher(a, sep=None, maxsplit=None):
- return (a,)
- @array_function_dispatch(_split_dispatcher)
- def rsplit(a, sep=None, maxsplit=None):
- """
- For each element in `a`, return a list of the words in the
- string, using `sep` as the delimiter string.
- Calls `str.rsplit` element-wise.
- Except for splitting from the right, `rsplit`
- behaves like `split`.
- Parameters
- ----------
- a : array_like of str or unicode
- sep : str or unicode, optional
- If `sep` is not specified or `None`, any whitespace string
- is a separator.
- maxsplit : int, optional
- If `maxsplit` is given, at most `maxsplit` splits are done,
- the rightmost ones.
- Returns
- -------
- out : ndarray
- Array of list objects
- See also
- --------
- str.rsplit, split
- """
- # This will return an array of lists of different sizes, so we
- # leave it as an object array
- return _vec_string(
- a, object_, 'rsplit', [sep] + _clean_args(maxsplit))
- def _strip_dispatcher(a, chars=None):
- return (a,)
- @array_function_dispatch(_strip_dispatcher)
- def rstrip(a, chars=None):
- """
- For each element in `a`, return a copy with the trailing
- characters removed.
- Calls `str.rstrip` element-wise.
- Parameters
- ----------
- a : array-like of str or unicode
- chars : str or unicode, optional
- The `chars` argument is a string specifying the set of
- characters to be removed. If omitted or None, the `chars`
- argument defaults to removing whitespace. The `chars` argument
- is not a suffix; rather, all combinations of its values are
- stripped.
- Returns
- -------
- out : ndarray
- Output array of str or unicode, depending on input type
- See also
- --------
- str.rstrip
- Examples
- --------
- >>> c = np.array(['aAaAaA', 'abBABba'], dtype='S7'); c
- array(['aAaAaA', 'abBABba'],
- dtype='|S7')
- >>> np.char.rstrip(c, 'a')
- array(['aAaAaA', 'abBABb'],
- dtype='|S7')
- >>> np.char.rstrip(c, 'A')
- array(['aAaAa', 'abBABba'],
- dtype='|S7')
- """
- a_arr = numpy.asarray(a)
- return _vec_string(a_arr, a_arr.dtype, 'rstrip', (chars,))
- @array_function_dispatch(_split_dispatcher)
- def split(a, sep=None, maxsplit=None):
- """
- For each element in `a`, return a list of the words in the
- string, using `sep` as the delimiter string.
- Calls `str.split` element-wise.
- Parameters
- ----------
- a : array_like of str or unicode
- sep : str or unicode, optional
- If `sep` is not specified or `None`, any whitespace string is a
- separator.
- maxsplit : int, optional
- If `maxsplit` is given, at most `maxsplit` splits are done.
- Returns
- -------
- out : ndarray
- Array of list objects
- See also
- --------
- str.split, rsplit
- """
- # This will return an array of lists of different sizes, so we
- # leave it as an object array
- return _vec_string(
- a, object_, 'split', [sep] + _clean_args(maxsplit))
- def _splitlines_dispatcher(a, keepends=None):
- return (a,)
- @array_function_dispatch(_splitlines_dispatcher)
- def splitlines(a, keepends=None):
- """
- For each element in `a`, return a list of the lines in the
- element, breaking at line boundaries.
- Calls `str.splitlines` element-wise.
- Parameters
- ----------
- a : array_like of str or unicode
- keepends : bool, optional
- Line breaks are not included in the resulting list unless
- keepends is given and true.
- Returns
- -------
- out : ndarray
- Array of list objects
- See also
- --------
- str.splitlines
- """
- return _vec_string(
- a, object_, 'splitlines', _clean_args(keepends))
- def _startswith_dispatcher(a, prefix, start=None, end=None):
- return (a,)
- @array_function_dispatch(_startswith_dispatcher)
- def startswith(a, prefix, start=0, end=None):
- """
- Returns a boolean array which is `True` where the string element
- in `a` starts with `prefix`, otherwise `False`.
- Calls `str.startswith` element-wise.
- Parameters
- ----------
- a : array_like of str or unicode
- prefix : str
- start, end : int, optional
- With optional `start`, test beginning at that position. With
- optional `end`, stop comparing at that position.
- Returns
- -------
- out : ndarray
- Array of booleans
- See also
- --------
- str.startswith
- """
- return _vec_string(
- a, bool_, 'startswith', [prefix, start] + _clean_args(end))
- @array_function_dispatch(_strip_dispatcher)
- def strip(a, chars=None):
- """
- For each element in `a`, return a copy with the leading and
- trailing characters removed.
- Calls `str.strip` element-wise.
- Parameters
- ----------
- a : array-like of str or unicode
- chars : str or unicode, optional
- The `chars` argument is a string specifying the set of
- characters to be removed. If omitted or None, the `chars`
- argument defaults to removing whitespace. The `chars` argument
- is not a prefix or suffix; rather, all combinations of its
- values are stripped.
- Returns
- -------
- out : ndarray
- Output array of str or unicode, depending on input type
- See also
- --------
- str.strip
- Examples
- --------
- >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
- >>> c
- array(['aAaAaA', ' aA ', 'abBABba'],
- dtype='|S7')
- >>> np.char.strip(c)
- array(['aAaAaA', 'aA', 'abBABba'],
- dtype='|S7')
- >>> np.char.strip(c, 'a') # 'a' unstripped from c[1] because whitespace leads
- array(['AaAaA', ' aA ', 'bBABb'],
- dtype='|S7')
- >>> np.char.strip(c, 'A') # 'A' unstripped from c[1] because (unprinted) ws trails
- array(['aAaAa', ' aA ', 'abBABba'],
- dtype='|S7')
- """
- a_arr = numpy.asarray(a)
- return _vec_string(a_arr, a_arr.dtype, 'strip', _clean_args(chars))
- @array_function_dispatch(_unary_op_dispatcher)
- def swapcase(a):
- """
- Return element-wise a copy of the string with
- uppercase characters converted to lowercase and vice versa.
- Calls `str.swapcase` element-wise.
- For 8-bit strings, this method is locale-dependent.
- Parameters
- ----------
- a : array_like, {str, unicode}
- Input array.
- Returns
- -------
- out : ndarray, {str, unicode}
- Output array of str or unicode, depending on input type
- See also
- --------
- str.swapcase
- Examples
- --------
- >>> c=np.array(['a1B c','1b Ca','b Ca1','cA1b'],'S5'); c
- array(['a1B c', '1b Ca', 'b Ca1', 'cA1b'],
- dtype='|S5')
- >>> np.char.swapcase(c)
- array(['A1b C', '1B cA', 'B cA1', 'Ca1B'],
- dtype='|S5')
- """
- a_arr = numpy.asarray(a)
- return _vec_string(a_arr, a_arr.dtype, 'swapcase')
- @array_function_dispatch(_unary_op_dispatcher)
- def title(a):
- """
- Return element-wise title cased version of string or unicode.
- Title case words start with uppercase characters, all remaining cased
- characters are lowercase.
- Calls `str.title` element-wise.
- For 8-bit strings, this method is locale-dependent.
- Parameters
- ----------
- a : array_like, {str, unicode}
- Input array.
- Returns
- -------
- out : ndarray
- Output array of str or unicode, depending on input type
- See also
- --------
- str.title
- Examples
- --------
- >>> c=np.array(['a1b c','1b ca','b ca1','ca1b'],'S5'); c
- array(['a1b c', '1b ca', 'b ca1', 'ca1b'],
- dtype='|S5')
- >>> np.char.title(c)
- array(['A1B C', '1B Ca', 'B Ca1', 'Ca1B'],
- dtype='|S5')
- """
- a_arr = numpy.asarray(a)
- return _vec_string(a_arr, a_arr.dtype, 'title')
- def _translate_dispatcher(a, table, deletechars=None):
- return (a,)
- @array_function_dispatch(_translate_dispatcher)
- def translate(a, table, deletechars=None):
- """
- For each element in `a`, return a copy of the string where all
- characters occurring in the optional argument `deletechars` are
- removed, and the remaining characters have been mapped through the
- given translation table.
- Calls `str.translate` element-wise.
- Parameters
- ----------
- a : array-like of str or unicode
- table : str of length 256
- deletechars : str
- Returns
- -------
- out : ndarray
- Output array of str or unicode, depending on input type
- See also
- --------
- str.translate
- """
- a_arr = numpy.asarray(a)
- if issubclass(a_arr.dtype.type, unicode_):
- return _vec_string(
- a_arr, a_arr.dtype, 'translate', (table,))
- else:
- return _vec_string(
- a_arr, a_arr.dtype, 'translate', [table] + _clean_args(deletechars))
- @array_function_dispatch(_unary_op_dispatcher)
- def upper(a):
- """
- Return an array with the elements converted to uppercase.
- Calls `str.upper` element-wise.
- For 8-bit strings, this method is locale-dependent.
- Parameters
- ----------
- a : array_like, {str, unicode}
- Input array.
- Returns
- -------
- out : ndarray, {str, unicode}
- Output array of str or unicode, depending on input type
- See also
- --------
- str.upper
- Examples
- --------
- >>> c = np.array(['a1b c', '1bca', 'bca1']); c
- array(['a1b c', '1bca', 'bca1'],
- dtype='|S5')
- >>> np.char.upper(c)
- array(['A1B C', '1BCA', 'BCA1'],
- dtype='|S5')
- """
- a_arr = numpy.asarray(a)
- return _vec_string(a_arr, a_arr.dtype, 'upper')
- def _zfill_dispatcher(a, width):
- return (a,)
- @array_function_dispatch(_zfill_dispatcher)
- def zfill(a, width):
- """
- Return the numeric string left-filled with zeros
- Calls `str.zfill` element-wise.
- Parameters
- ----------
- a : array_like, {str, unicode}
- Input array.
- width : int
- Width of string to left-fill elements in `a`.
- Returns
- -------
- out : ndarray, {str, unicode}
- Output array of str or unicode, depending on input type
- See also
- --------
- str.zfill
- """
- a_arr = numpy.asarray(a)
- width_arr = numpy.asarray(width)
- size = long(numpy.max(width_arr.flat))
- return _vec_string(
- a_arr, (a_arr.dtype.type, size), 'zfill', (width_arr,))
- @array_function_dispatch(_unary_op_dispatcher)
- def isnumeric(a):
- """
- For each element, return True if there are only numeric
- characters in the element.
- Calls `unicode.isnumeric` element-wise.
- Numeric characters include digit characters, and all characters
- that have the Unicode numeric value property, e.g. ``U+2155,
- VULGAR FRACTION ONE FIFTH``.
- Parameters
- ----------
- a : array_like, unicode
- Input array.
- Returns
- -------
- out : ndarray, bool
- Array of booleans of same shape as `a`.
- See also
- --------
- unicode.isnumeric
- """
- if _use_unicode(a) != unicode_:
- raise TypeError("isnumeric is only available for Unicode strings and arrays")
- return _vec_string(a, bool_, 'isnumeric')
- @array_function_dispatch(_unary_op_dispatcher)
- def isdecimal(a):
- """
- For each element, return True if there are only decimal
- characters in the element.
- Calls `unicode.isdecimal` element-wise.
- Decimal characters include digit characters, and all characters
- that that can be used to form decimal-radix numbers,
- e.g. ``U+0660, ARABIC-INDIC DIGIT ZERO``.
- Parameters
- ----------
- a : array_like, unicode
- Input array.
- Returns
- -------
- out : ndarray, bool
- Array of booleans identical in shape to `a`.
- See also
- --------
- unicode.isdecimal
- """
- if _use_unicode(a) != unicode_:
- raise TypeError("isnumeric is only available for Unicode strings and arrays")
- return _vec_string(a, bool_, 'isdecimal')
- @set_module('numpy')
- class chararray(ndarray):
- """
- chararray(shape, itemsize=1, unicode=False, buffer=None, offset=0,
- strides=None, order=None)
- Provides a convenient view on arrays of string and unicode values.
- .. note::
- The `chararray` class exists for backwards compatibility with
- Numarray, it is not recommended for new development. Starting from numpy
- 1.4, if one needs arrays of strings, it is recommended to use arrays of
- `dtype` `object_`, `string_` or `unicode_`, and use the free functions
- in the `numpy.char` module for fast vectorized string operations.
- Versus a regular NumPy array of type `str` or `unicode`, this
- class adds the following functionality:
- 1) values automatically have whitespace removed from the end
- when indexed
- 2) comparison operators automatically remove whitespace from the
- end when comparing values
- 3) vectorized string operations are provided as methods
- (e.g. `.endswith`) and infix operators (e.g. ``"+", "*", "%"``)
- chararrays should be created using `numpy.char.array` or
- `numpy.char.asarray`, rather than this constructor directly.
- This constructor creates the array, using `buffer` (with `offset`
- and `strides`) if it is not ``None``. If `buffer` is ``None``, then
- constructs a new array with `strides` in "C order", unless both
- ``len(shape) >= 2`` and ``order='Fortran'``, in which case `strides`
- is in "Fortran order".
- Methods
- -------
- astype
- argsort
- copy
- count
- decode
- dump
- dumps
- encode
- endswith
- expandtabs
- fill
- find
- flatten
- getfield
- index
- isalnum
- isalpha
- isdecimal
- isdigit
- islower
- isnumeric
- isspace
- istitle
- isupper
- item
- join
- ljust
- lower
- lstrip
- nonzero
- put
- ravel
- repeat
- replace
- reshape
- resize
- rfind
- rindex
- rjust
- rsplit
- rstrip
- searchsorted
- setfield
- setflags
- sort
- split
- splitlines
- squeeze
- startswith
- strip
- swapaxes
- swapcase
- take
- title
- tofile
- tolist
- tostring
- translate
- transpose
- upper
- view
- zfill
- Parameters
- ----------
- shape : tuple
- Shape of the array.
- itemsize : int, optional
- Length of each array element, in number of characters. Default is 1.
- unicode : bool, optional
- Are the array elements of type unicode (True) or string (False).
- Default is False.
- buffer : int, optional
- Memory address of the start of the array data. Default is None,
- in which case a new array is created.
- offset : int, optional
- Fixed stride displacement from the beginning of an axis?
- Default is 0. Needs to be >=0.
- strides : array_like of ints, optional
- Strides for the array (see `ndarray.strides` for full description).
- Default is None.
- order : {'C', 'F'}, optional
- The order in which the array data is stored in memory: 'C' ->
- "row major" order (the default), 'F' -> "column major"
- (Fortran) order.
- Examples
- --------
- >>> charar = np.chararray((3, 3))
- >>> charar[:] = 'a'
- >>> charar
- chararray([['a', 'a', 'a'],
- ['a', 'a', 'a'],
- ['a', 'a', 'a']],
- dtype='|S1')
- >>> charar = np.chararray(charar.shape, itemsize=5)
- >>> charar[:] = 'abc'
- >>> charar
- chararray([['abc', 'abc', 'abc'],
- ['abc', 'abc', 'abc'],
- ['abc', 'abc', 'abc']],
- dtype='|S5')
- """
- def __new__(subtype, shape, itemsize=1, unicode=False, buffer=None,
- offset=0, strides=None, order='C'):
- global _globalvar
- if unicode:
- dtype = unicode_
- else:
- dtype = string_
- # force itemsize to be a Python long, since using NumPy integer
- # types results in itemsize.itemsize being used as the size of
- # strings in the new array.
- itemsize = long(itemsize)
- if sys.version_info[0] >= 3 and isinstance(buffer, _unicode):
- # On Py3, unicode objects do not have the buffer interface
- filler = buffer
- buffer = None
- else:
- filler = None
- _globalvar = 1
- if buffer is None:
- self = ndarray.__new__(subtype, shape, (dtype, itemsize),
- order=order)
- else:
- self = ndarray.__new__(subtype, shape, (dtype, itemsize),
- buffer=buffer,
- offset=offset, strides=strides,
- order=order)
- if filler is not None:
- self[...] = filler
- _globalvar = 0
- return self
- def __array_finalize__(self, obj):
- # The b is a special case because it is used for reconstructing.
- if not _globalvar and self.dtype.char not in 'SUbc':
- raise ValueError("Can only create a chararray from string data.")
- def __getitem__(self, obj):
- val = ndarray.__getitem__(self, obj)
- if isinstance(val, character):
- temp = val.rstrip()
- if _len(temp) == 0:
- val = ''
- else:
- val = temp
- return val
- # IMPLEMENTATION NOTE: Most of the methods of this class are
- # direct delegations to the free functions in this module.
- # However, those that return an array of strings should instead
- # return a chararray, so some extra wrapping is required.
- def __eq__(self, other):
- """
- Return (self == other) element-wise.
- See also
- --------
- equal
- """
- return equal(self, other)
- def __ne__(self, other):
- """
- Return (self != other) element-wise.
- See also
- --------
- not_equal
- """
- return not_equal(self, other)
- def __ge__(self, other):
- """
- Return (self >= other) element-wise.
- See also
- --------
- greater_equal
- """
- return greater_equal(self, other)
- def __le__(self, other):
- """
- Return (self <= other) element-wise.
- See also
- --------
- less_equal
- """
- return less_equal(self, other)
- def __gt__(self, other):
- """
- Return (self > other) element-wise.
- See also
- --------
- greater
- """
- return greater(self, other)
- def __lt__(self, other):
- """
- Return (self < other) element-wise.
- See also
- --------
- less
- """
- return less(self, other)
- def __add__(self, other):
- """
- Return (self + other), that is string concatenation,
- element-wise for a pair of array_likes of str or unicode.
- See also
- --------
- add
- """
- return asarray(add(self, other))
- def __radd__(self, other):
- """
- Return (other + self), that is string concatenation,
- element-wise for a pair of array_likes of `string_` or `unicode_`.
- See also
- --------
- add
- """
- return asarray(add(numpy.asarray(other), self))
- def __mul__(self, i):
- """
- Return (self * i), that is string multiple concatenation,
- element-wise.
- See also
- --------
- multiply
- """
- return asarray(multiply(self, i))
- def __rmul__(self, i):
- """
- Return (self * i), that is string multiple concatenation,
- element-wise.
- See also
- --------
- multiply
- """
- return asarray(multiply(self, i))
- def __mod__(self, i):
- """
- Return (self % i), that is pre-Python 2.6 string formatting
- (iterpolation), element-wise for a pair of array_likes of `string_`
- or `unicode_`.
- See also
- --------
- mod
- """
- return asarray(mod(self, i))
- def __rmod__(self, other):
- return NotImplemented
- def argsort(self, axis=-1, kind='quicksort', order=None):
- """
- Return the indices that sort the array lexicographically.
- For full documentation see `numpy.argsort`, for which this method is
- in fact merely a "thin wrapper."
- Examples
- --------
- >>> c = np.array(['a1b c', '1b ca', 'b ca1', 'Ca1b'], 'S5')
- >>> c = c.view(np.chararray); c
- chararray(['a1b c', '1b ca', 'b ca1', 'Ca1b'],
- dtype='|S5')
- >>> c[c.argsort()]
- chararray(['1b ca', 'Ca1b', 'a1b c', 'b ca1'],
- dtype='|S5')
- """
- return self.__array__().argsort(axis, kind, order)
- argsort.__doc__ = ndarray.argsort.__doc__
- def capitalize(self):
- """
- Return a copy of `self` with only the first character of each element
- capitalized.
- See also
- --------
- char.capitalize
- """
- return asarray(capitalize(self))
- def center(self, width, fillchar=' '):
- """
- Return a copy of `self` with its elements centered in a
- string of length `width`.
- See also
- --------
- center
- """
- return asarray(center(self, width, fillchar))
- def count(self, sub, start=0, end=None):
- """
- Returns an array with the number of non-overlapping occurrences of
- substring `sub` in the range [`start`, `end`].
- See also
- --------
- char.count
- """
- return count(self, sub, start, end)
- def decode(self, encoding=None, errors=None):
- """
- Calls `str.decode` element-wise.
- See also
- --------
- char.decode
- """
- return decode(self, encoding, errors)
- def encode(self, encoding=None, errors=None):
- """
- Calls `str.encode` element-wise.
- See also
- --------
- char.encode
- """
- return encode(self, encoding, errors)
- def endswith(self, suffix, start=0, end=None):
- """
- Returns a boolean array which is `True` where the string element
- in `self` ends with `suffix`, otherwise `False`.
- See also
- --------
- char.endswith
- """
- return endswith(self, suffix, start, end)
- def expandtabs(self, tabsize=8):
- """
- Return a copy of each string element where all tab characters are
- replaced by one or more spaces.
- See also
- --------
- char.expandtabs
- """
- return asarray(expandtabs(self, tabsize))
- def find(self, sub, start=0, end=None):
- """
- For each element, return the lowest index in the string where
- substring `sub` is found.
- See also
- --------
- char.find
- """
- return find(self, sub, start, end)
- def index(self, sub, start=0, end=None):
- """
- Like `find`, but raises `ValueError` when the substring is not found.
- See also
- --------
- char.index
- """
- return index(self, sub, start, end)
- def isalnum(self):
- """
- Returns true for each element if all characters in the string
- are alphanumeric and there is at least one character, false
- otherwise.
- See also
- --------
- char.isalnum
- """
- return isalnum(self)
- def isalpha(self):
- """
- Returns true for each element if all characters in the string
- are alphabetic and there is at least one character, false
- otherwise.
- See also
- --------
- char.isalpha
- """
- return isalpha(self)
- def isdigit(self):
- """
- Returns true for each element if all characters in the string are
- digits and there is at least one character, false otherwise.
- See also
- --------
- char.isdigit
- """
- return isdigit(self)
- def islower(self):
- """
- Returns true for each element if all cased characters in the
- string are lowercase and there is at least one cased character,
- false otherwise.
- See also
- --------
- char.islower
- """
- return islower(self)
- def isspace(self):
- """
- Returns true for each element if there are only whitespace
- characters in the string and there is at least one character,
- false otherwise.
- See also
- --------
- char.isspace
- """
- return isspace(self)
- def istitle(self):
- """
- Returns true for each element if the element is a titlecased
- string and there is at least one character, false otherwise.
- See also
- --------
- char.istitle
- """
- return istitle(self)
- def isupper(self):
- """
- Returns true for each element if all cased characters in the
- string are uppercase and there is at least one character, false
- otherwise.
- See also
- --------
- char.isupper
- """
- return isupper(self)
- def join(self, seq):
- """
- Return a string which is the concatenation of the strings in the
- sequence `seq`.
- See also
- --------
- char.join
- """
- return join(self, seq)
- def ljust(self, width, fillchar=' '):
- """
- Return an array with the elements of `self` left-justified in a
- string of length `width`.
- See also
- --------
- char.ljust
- """
- return asarray(ljust(self, width, fillchar))
- def lower(self):
- """
- Return an array with the elements of `self` converted to
- lowercase.
- See also
- --------
- char.lower
- """
- return asarray(lower(self))
- def lstrip(self, chars=None):
- """
- For each element in `self`, return a copy with the leading characters
- removed.
- See also
- --------
- char.lstrip
- """
- return asarray(lstrip(self, chars))
- def partition(self, sep):
- """
- Partition each element in `self` around `sep`.
- See also
- --------
- partition
- """
- return asarray(partition(self, sep))
- def replace(self, old, new, count=None):
- """
- For each element in `self`, return a copy of the string with all
- occurrences of substring `old` replaced by `new`.
- See also
- --------
- char.replace
- """
- return asarray(replace(self, old, new, count))
- def rfind(self, sub, start=0, end=None):
- """
- For each element in `self`, return the highest index in the string
- where substring `sub` is found, such that `sub` is contained
- within [`start`, `end`].
- See also
- --------
- char.rfind
- """
- return rfind(self, sub, start, end)
- def rindex(self, sub, start=0, end=None):
- """
- Like `rfind`, but raises `ValueError` when the substring `sub` is
- not found.
- See also
- --------
- char.rindex
- """
- return rindex(self, sub, start, end)
- def rjust(self, width, fillchar=' '):
- """
- Return an array with the elements of `self`
- right-justified in a string of length `width`.
- See also
- --------
- char.rjust
- """
- return asarray(rjust(self, width, fillchar))
- def rpartition(self, sep):
- """
- Partition each element in `self` around `sep`.
- See also
- --------
- rpartition
- """
- return asarray(rpartition(self, sep))
- def rsplit(self, sep=None, maxsplit=None):
- """
- For each element in `self`, return a list of the words in
- the string, using `sep` as the delimiter string.
- See also
- --------
- char.rsplit
- """
- return rsplit(self, sep, maxsplit)
- def rstrip(self, chars=None):
- """
- For each element in `self`, return a copy with the trailing
- characters removed.
- See also
- --------
- char.rstrip
- """
- return asarray(rstrip(self, chars))
- def split(self, sep=None, maxsplit=None):
- """
- For each element in `self`, return a list of the words in the
- string, using `sep` as the delimiter string.
- See also
- --------
- char.split
- """
- return split(self, sep, maxsplit)
- def splitlines(self, keepends=None):
- """
- For each element in `self`, return a list of the lines in the
- element, breaking at line boundaries.
- See also
- --------
- char.splitlines
- """
- return splitlines(self, keepends)
- def startswith(self, prefix, start=0, end=None):
- """
- Returns a boolean array which is `True` where the string element
- in `self` starts with `prefix`, otherwise `False`.
- See also
- --------
- char.startswith
- """
- return startswith(self, prefix, start, end)
- def strip(self, chars=None):
- """
- For each element in `self`, return a copy with the leading and
- trailing characters removed.
- See also
- --------
- char.strip
- """
- return asarray(strip(self, chars))
- def swapcase(self):
- """
- For each element in `self`, return a copy of the string with
- uppercase characters converted to lowercase and vice versa.
- See also
- --------
- char.swapcase
- """
- return asarray(swapcase(self))
- def title(self):
- """
- For each element in `self`, return a titlecased version of the
- string: words start with uppercase characters, all remaining cased
- characters are lowercase.
- See also
- --------
- char.title
- """
- return asarray(title(self))
- def translate(self, table, deletechars=None):
- """
- For each element in `self`, return a copy of the string where
- all characters occurring in the optional argument
- `deletechars` are removed, and the remaining characters have
- been mapped through the given translation table.
- See also
- --------
- char.translate
- """
- return asarray(translate(self, table, deletechars))
- def upper(self):
- """
- Return an array with the elements of `self` converted to
- uppercase.
- See also
- --------
- char.upper
- """
- return asarray(upper(self))
- def zfill(self, width):
- """
- Return the numeric string left-filled with zeros in a string of
- length `width`.
- See also
- --------
- char.zfill
- """
- return asarray(zfill(self, width))
- def isnumeric(self):
- """
- For each element in `self`, return True if there are only
- numeric characters in the element.
- See also
- --------
- char.isnumeric
- """
- return isnumeric(self)
- def isdecimal(self):
- """
- For each element in `self`, return True if there are only
- decimal characters in the element.
- See also
- --------
- char.isdecimal
- """
- return isdecimal(self)
- def array(obj, itemsize=None, copy=True, unicode=None, order=None):
- """
- Create a `chararray`.
- .. note::
- This class is provided for numarray backward-compatibility.
- New code (not concerned with numarray compatibility) should use
- arrays of type `string_` or `unicode_` and use the free functions
- in :mod:`numpy.char <numpy.core.defchararray>` for fast
- vectorized string operations instead.
- Versus a regular NumPy array of type `str` or `unicode`, this
- class adds the following functionality:
- 1) values automatically have whitespace removed from the end
- when indexed
- 2) comparison operators automatically remove whitespace from the
- end when comparing values
- 3) vectorized string operations are provided as methods
- (e.g. `str.endswith`) and infix operators (e.g. ``+, *, %``)
- Parameters
- ----------
- obj : array of str or unicode-like
- itemsize : int, optional
- `itemsize` is the number of characters per scalar in the
- resulting array. If `itemsize` is None, and `obj` is an
- object array or a Python list, the `itemsize` will be
- automatically determined. If `itemsize` is provided and `obj`
- is of type str or unicode, then the `obj` string will be
- chunked into `itemsize` pieces.
- copy : bool, optional
- If true (default), then the object is copied. Otherwise, a copy
- will only be made if __array__ returns a copy, if obj is a
- nested sequence, or if a copy is needed to satisfy any of the other
- requirements (`itemsize`, unicode, `order`, etc.).
- unicode : bool, optional
- When true, the resulting `chararray` can contain Unicode
- characters, when false only 8-bit characters. If unicode is
- `None` and `obj` is one of the following:
- - a `chararray`,
- - an ndarray of type `str` or `unicode`
- - a Python str or unicode object,
- then the unicode setting of the output array will be
- automatically determined.
- order : {'C', 'F', 'A'}, optional
- Specify the order of the array. If order is 'C' (default), then the
- array will be in C-contiguous order (last-index varies the
- fastest). If order is 'F', then the returned array
- will be in Fortran-contiguous order (first-index varies the
- fastest). If order is 'A', then the returned array may
- be in any order (either C-, Fortran-contiguous, or even
- discontiguous).
- """
- if isinstance(obj, (_bytes, _unicode)):
- if unicode is None:
- if isinstance(obj, _unicode):
- unicode = True
- else:
- unicode = False
- if itemsize is None:
- itemsize = _len(obj)
- shape = _len(obj) // itemsize
- if unicode:
- if sys.maxunicode == 0xffff:
- # On a narrow Python build, the buffer for Unicode
- # strings is UCS2, which doesn't match the buffer for
- # NumPy Unicode types, which is ALWAYS UCS4.
- # Therefore, we need to convert the buffer. On Python
- # 2.6 and later, we can use the utf_32 codec. Earlier
- # versions don't have that codec, so we convert to a
- # numerical array that matches the input buffer, and
- # then use NumPy to convert it to UCS4. All of this
- # should happen in native endianness.
- obj = obj.encode('utf_32')
- else:
- obj = _unicode(obj)
- else:
- # Let the default Unicode -> string encoding (if any) take
- # precedence.
- obj = _bytes(obj)
- return chararray(shape, itemsize=itemsize, unicode=unicode,
- buffer=obj, order=order)
- if isinstance(obj, (list, tuple)):
- obj = numpy.asarray(obj)
- if isinstance(obj, ndarray) and issubclass(obj.dtype.type, character):
- # If we just have a vanilla chararray, create a chararray
- # view around it.
- if not isinstance(obj, chararray):
- obj = obj.view(chararray)
- if itemsize is None:
- itemsize = obj.itemsize
- # itemsize is in 8-bit chars, so for Unicode, we need
- # to divide by the size of a single Unicode character,
- # which for NumPy is always 4
- if issubclass(obj.dtype.type, unicode_):
- itemsize //= 4
- if unicode is None:
- if issubclass(obj.dtype.type, unicode_):
- unicode = True
- else:
- unicode = False
- if unicode:
- dtype = unicode_
- else:
- dtype = string_
- if order is not None:
- obj = numpy.asarray(obj, order=order)
- if (copy or
- (itemsize != obj.itemsize) or
- (not unicode and isinstance(obj, unicode_)) or
- (unicode and isinstance(obj, string_))):
- obj = obj.astype((dtype, long(itemsize)))
- return obj
- if isinstance(obj, ndarray) and issubclass(obj.dtype.type, object):
- if itemsize is None:
- # Since no itemsize was specified, convert the input array to
- # a list so the ndarray constructor will automatically
- # determine the itemsize for us.
- obj = obj.tolist()
- # Fall through to the default case
- if unicode:
- dtype = unicode_
- else:
- dtype = string_
- if itemsize is None:
- val = narray(obj, dtype=dtype, order=order, subok=True)
- else:
- val = narray(obj, dtype=(dtype, itemsize), order=order, subok=True)
- return val.view(chararray)
- def asarray(obj, itemsize=None, unicode=None, order=None):
- """
- Convert the input to a `chararray`, copying the data only if
- necessary.
- Versus a regular NumPy array of type `str` or `unicode`, this
- class adds the following functionality:
- 1) values automatically have whitespace removed from the end
- when indexed
- 2) comparison operators automatically remove whitespace from the
- end when comparing values
- 3) vectorized string operations are provided as methods
- (e.g. `str.endswith`) and infix operators (e.g. ``+``, ``*``,``%``)
- Parameters
- ----------
- obj : array of str or unicode-like
- itemsize : int, optional
- `itemsize` is the number of characters per scalar in the
- resulting array. If `itemsize` is None, and `obj` is an
- object array or a Python list, the `itemsize` will be
- automatically determined. If `itemsize` is provided and `obj`
- is of type str or unicode, then the `obj` string will be
- chunked into `itemsize` pieces.
- unicode : bool, optional
- When true, the resulting `chararray` can contain Unicode
- characters, when false only 8-bit characters. If unicode is
- `None` and `obj` is one of the following:
- - a `chararray`,
- - an ndarray of type `str` or 'unicode`
- - a Python str or unicode object,
- then the unicode setting of the output array will be
- automatically determined.
- order : {'C', 'F'}, optional
- Specify the order of the array. If order is 'C' (default), then the
- array will be in C-contiguous order (last-index varies the
- fastest). If order is 'F', then the returned array
- will be in Fortran-contiguous order (first-index varies the
- fastest).
- """
- return array(obj, itemsize, copy=False,
- unicode=unicode, order=order)
|