defchararray.py 69 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835
  1. """
  2. This module contains a set of functions for vectorized string
  3. operations and methods.
  4. .. note::
  5. The `chararray` class exists for backwards compatibility with
  6. Numarray, it is not recommended for new development. Starting from numpy
  7. 1.4, if one needs arrays of strings, it is recommended to use arrays of
  8. `dtype` `object_`, `string_` or `unicode_`, and use the free functions
  9. in the `numpy.char` module for fast vectorized string operations.
  10. Some methods will only be available if the corresponding string method is
  11. available in your version of Python.
  12. The preferred alias for `defchararray` is `numpy.char`.
  13. """
  14. from __future__ import division, absolute_import, print_function
  15. import functools
  16. import sys
  17. from .numerictypes import string_, unicode_, integer, object_, bool_, character
  18. from .numeric import ndarray, compare_chararrays
  19. from .numeric import array as narray
  20. from numpy.core.multiarray import _vec_string
  21. from numpy.core.overrides import set_module
  22. from numpy.core import overrides
  23. from numpy.compat import asbytes, long
  24. import numpy
  25. __all__ = [
  26. 'chararray', 'equal', 'not_equal', 'greater_equal', 'less_equal',
  27. 'greater', 'less', 'str_len', 'add', 'multiply', 'mod', 'capitalize',
  28. 'center', 'count', 'decode', 'encode', 'endswith', 'expandtabs',
  29. 'find', 'index', 'isalnum', 'isalpha', 'isdigit', 'islower', 'isspace',
  30. 'istitle', 'isupper', 'join', 'ljust', 'lower', 'lstrip', 'partition',
  31. 'replace', 'rfind', 'rindex', 'rjust', 'rpartition', 'rsplit',
  32. 'rstrip', 'split', 'splitlines', 'startswith', 'strip', 'swapcase',
  33. 'title', 'translate', 'upper', 'zfill', 'isnumeric', 'isdecimal',
  34. 'array', 'asarray'
  35. ]
  36. _globalvar = 0
  37. if sys.version_info[0] >= 3:
  38. _unicode = str
  39. _bytes = bytes
  40. else:
  41. _unicode = unicode
  42. _bytes = str
  43. _len = len
  44. array_function_dispatch = functools.partial(
  45. overrides.array_function_dispatch, module='numpy.char')
  46. def _use_unicode(*args):
  47. """
  48. Helper function for determining the output type of some string
  49. operations.
  50. For an operation on two ndarrays, if at least one is unicode, the
  51. result should be unicode.
  52. """
  53. for x in args:
  54. if (isinstance(x, _unicode) or
  55. issubclass(numpy.asarray(x).dtype.type, unicode_)):
  56. return unicode_
  57. return string_
  58. def _to_string_or_unicode_array(result):
  59. """
  60. Helper function to cast a result back into a string or unicode array
  61. if an object array must be used as an intermediary.
  62. """
  63. return numpy.asarray(result.tolist())
  64. def _clean_args(*args):
  65. """
  66. Helper function for delegating arguments to Python string
  67. functions.
  68. Many of the Python string operations that have optional arguments
  69. do not use 'None' to indicate a default value. In these cases,
  70. we need to remove all `None` arguments, and those following them.
  71. """
  72. newargs = []
  73. for chk in args:
  74. if chk is None:
  75. break
  76. newargs.append(chk)
  77. return newargs
  78. def _get_num_chars(a):
  79. """
  80. Helper function that returns the number of characters per field in
  81. a string or unicode array. This is to abstract out the fact that
  82. for a unicode array this is itemsize / 4.
  83. """
  84. if issubclass(a.dtype.type, unicode_):
  85. return a.itemsize // 4
  86. return a.itemsize
  87. def _binary_op_dispatcher(x1, x2):
  88. return (x1, x2)
  89. @array_function_dispatch(_binary_op_dispatcher)
  90. def equal(x1, x2):
  91. """
  92. Return (x1 == x2) element-wise.
  93. Unlike `numpy.equal`, this comparison is performed by first
  94. stripping whitespace characters from the end of the string. This
  95. behavior is provided for backward-compatibility with numarray.
  96. Parameters
  97. ----------
  98. x1, x2 : array_like of str or unicode
  99. Input arrays of the same shape.
  100. Returns
  101. -------
  102. out : ndarray or bool
  103. Output array of bools, or a single bool if x1 and x2 are scalars.
  104. See Also
  105. --------
  106. not_equal, greater_equal, less_equal, greater, less
  107. """
  108. return compare_chararrays(x1, x2, '==', True)
  109. @array_function_dispatch(_binary_op_dispatcher)
  110. def not_equal(x1, x2):
  111. """
  112. Return (x1 != x2) element-wise.
  113. Unlike `numpy.not_equal`, this comparison is performed by first
  114. stripping whitespace characters from the end of the string. This
  115. behavior is provided for backward-compatibility with numarray.
  116. Parameters
  117. ----------
  118. x1, x2 : array_like of str or unicode
  119. Input arrays of the same shape.
  120. Returns
  121. -------
  122. out : ndarray or bool
  123. Output array of bools, or a single bool if x1 and x2 are scalars.
  124. See Also
  125. --------
  126. equal, greater_equal, less_equal, greater, less
  127. """
  128. return compare_chararrays(x1, x2, '!=', True)
  129. @array_function_dispatch(_binary_op_dispatcher)
  130. def greater_equal(x1, x2):
  131. """
  132. Return (x1 >= x2) element-wise.
  133. Unlike `numpy.greater_equal`, this comparison is performed by
  134. first stripping whitespace characters from the end of the string.
  135. This behavior is provided for backward-compatibility with
  136. numarray.
  137. Parameters
  138. ----------
  139. x1, x2 : array_like of str or unicode
  140. Input arrays of the same shape.
  141. Returns
  142. -------
  143. out : ndarray or bool
  144. Output array of bools, or a single bool if x1 and x2 are scalars.
  145. See Also
  146. --------
  147. equal, not_equal, less_equal, greater, less
  148. """
  149. return compare_chararrays(x1, x2, '>=', True)
  150. @array_function_dispatch(_binary_op_dispatcher)
  151. def less_equal(x1, x2):
  152. """
  153. Return (x1 <= x2) element-wise.
  154. Unlike `numpy.less_equal`, this comparison is performed by first
  155. stripping whitespace characters from the end of the string. This
  156. behavior is provided for backward-compatibility with numarray.
  157. Parameters
  158. ----------
  159. x1, x2 : array_like of str or unicode
  160. Input arrays of the same shape.
  161. Returns
  162. -------
  163. out : ndarray or bool
  164. Output array of bools, or a single bool if x1 and x2 are scalars.
  165. See Also
  166. --------
  167. equal, not_equal, greater_equal, greater, less
  168. """
  169. return compare_chararrays(x1, x2, '<=', True)
  170. @array_function_dispatch(_binary_op_dispatcher)
  171. def greater(x1, x2):
  172. """
  173. Return (x1 > x2) element-wise.
  174. Unlike `numpy.greater`, this comparison is performed by first
  175. stripping whitespace characters from the end of the string. This
  176. behavior is provided for backward-compatibility with numarray.
  177. Parameters
  178. ----------
  179. x1, x2 : array_like of str or unicode
  180. Input arrays of the same shape.
  181. Returns
  182. -------
  183. out : ndarray or bool
  184. Output array of bools, or a single bool if x1 and x2 are scalars.
  185. See Also
  186. --------
  187. equal, not_equal, greater_equal, less_equal, less
  188. """
  189. return compare_chararrays(x1, x2, '>', True)
  190. @array_function_dispatch(_binary_op_dispatcher)
  191. def less(x1, x2):
  192. """
  193. Return (x1 < x2) element-wise.
  194. Unlike `numpy.greater`, this comparison is performed by first
  195. stripping whitespace characters from the end of the string. This
  196. behavior is provided for backward-compatibility with numarray.
  197. Parameters
  198. ----------
  199. x1, x2 : array_like of str or unicode
  200. Input arrays of the same shape.
  201. Returns
  202. -------
  203. out : ndarray or bool
  204. Output array of bools, or a single bool if x1 and x2 are scalars.
  205. See Also
  206. --------
  207. equal, not_equal, greater_equal, less_equal, greater
  208. """
  209. return compare_chararrays(x1, x2, '<', True)
  210. def _unary_op_dispatcher(a):
  211. return (a,)
  212. @array_function_dispatch(_unary_op_dispatcher)
  213. def str_len(a):
  214. """
  215. Return len(a) element-wise.
  216. Parameters
  217. ----------
  218. a : array_like of str or unicode
  219. Returns
  220. -------
  221. out : ndarray
  222. Output array of integers
  223. See also
  224. --------
  225. __builtin__.len
  226. """
  227. return _vec_string(a, integer, '__len__')
  228. @array_function_dispatch(_binary_op_dispatcher)
  229. def add(x1, x2):
  230. """
  231. Return element-wise string concatenation for two arrays of str or unicode.
  232. Arrays `x1` and `x2` must have the same shape.
  233. Parameters
  234. ----------
  235. x1 : array_like of str or unicode
  236. Input array.
  237. x2 : array_like of str or unicode
  238. Input array.
  239. Returns
  240. -------
  241. add : ndarray
  242. Output array of `string_` or `unicode_`, depending on input types
  243. of the same shape as `x1` and `x2`.
  244. """
  245. arr1 = numpy.asarray(x1)
  246. arr2 = numpy.asarray(x2)
  247. out_size = _get_num_chars(arr1) + _get_num_chars(arr2)
  248. dtype = _use_unicode(arr1, arr2)
  249. return _vec_string(arr1, (dtype, out_size), '__add__', (arr2,))
  250. def _multiply_dispatcher(a, i):
  251. return (a,)
  252. @array_function_dispatch(_multiply_dispatcher)
  253. def multiply(a, i):
  254. """
  255. Return (a * i), that is string multiple concatenation,
  256. element-wise.
  257. Values in `i` of less than 0 are treated as 0 (which yields an
  258. empty string).
  259. Parameters
  260. ----------
  261. a : array_like of str or unicode
  262. i : array_like of ints
  263. Returns
  264. -------
  265. out : ndarray
  266. Output array of str or unicode, depending on input types
  267. """
  268. a_arr = numpy.asarray(a)
  269. i_arr = numpy.asarray(i)
  270. if not issubclass(i_arr.dtype.type, integer):
  271. raise ValueError("Can only multiply by integers")
  272. out_size = _get_num_chars(a_arr) * max(long(i_arr.max()), 0)
  273. return _vec_string(
  274. a_arr, (a_arr.dtype.type, out_size), '__mul__', (i_arr,))
  275. def _mod_dispatcher(a, values):
  276. return (a, values)
  277. @array_function_dispatch(_mod_dispatcher)
  278. def mod(a, values):
  279. """
  280. Return (a % i), that is pre-Python 2.6 string formatting
  281. (iterpolation), element-wise for a pair of array_likes of str
  282. or unicode.
  283. Parameters
  284. ----------
  285. a : array_like of str or unicode
  286. values : array_like of values
  287. These values will be element-wise interpolated into the string.
  288. Returns
  289. -------
  290. out : ndarray
  291. Output array of str or unicode, depending on input types
  292. See also
  293. --------
  294. str.__mod__
  295. """
  296. return _to_string_or_unicode_array(
  297. _vec_string(a, object_, '__mod__', (values,)))
  298. @array_function_dispatch(_unary_op_dispatcher)
  299. def capitalize(a):
  300. """
  301. Return a copy of `a` with only the first character of each element
  302. capitalized.
  303. Calls `str.capitalize` element-wise.
  304. For 8-bit strings, this method is locale-dependent.
  305. Parameters
  306. ----------
  307. a : array_like of str or unicode
  308. Input array of strings to capitalize.
  309. Returns
  310. -------
  311. out : ndarray
  312. Output array of str or unicode, depending on input
  313. types
  314. See also
  315. --------
  316. str.capitalize
  317. Examples
  318. --------
  319. >>> c = np.array(['a1b2','1b2a','b2a1','2a1b'],'S4'); c
  320. array(['a1b2', '1b2a', 'b2a1', '2a1b'],
  321. dtype='|S4')
  322. >>> np.char.capitalize(c)
  323. array(['A1b2', '1b2a', 'B2a1', '2a1b'],
  324. dtype='|S4')
  325. """
  326. a_arr = numpy.asarray(a)
  327. return _vec_string(a_arr, a_arr.dtype, 'capitalize')
  328. def _center_dispatcher(a, width, fillchar=None):
  329. return (a,)
  330. @array_function_dispatch(_center_dispatcher)
  331. def center(a, width, fillchar=' '):
  332. """
  333. Return a copy of `a` with its elements centered in a string of
  334. length `width`.
  335. Calls `str.center` element-wise.
  336. Parameters
  337. ----------
  338. a : array_like of str or unicode
  339. width : int
  340. The length of the resulting strings
  341. fillchar : str or unicode, optional
  342. The padding character to use (default is space).
  343. Returns
  344. -------
  345. out : ndarray
  346. Output array of str or unicode, depending on input
  347. types
  348. See also
  349. --------
  350. str.center
  351. """
  352. a_arr = numpy.asarray(a)
  353. width_arr = numpy.asarray(width)
  354. size = long(numpy.max(width_arr.flat))
  355. if numpy.issubdtype(a_arr.dtype, numpy.string_):
  356. fillchar = asbytes(fillchar)
  357. return _vec_string(
  358. a_arr, (a_arr.dtype.type, size), 'center', (width_arr, fillchar))
  359. def _count_dispatcher(a, sub, start=None, end=None):
  360. return (a,)
  361. @array_function_dispatch(_count_dispatcher)
  362. def count(a, sub, start=0, end=None):
  363. """
  364. Returns an array with the number of non-overlapping occurrences of
  365. substring `sub` in the range [`start`, `end`].
  366. Calls `str.count` element-wise.
  367. Parameters
  368. ----------
  369. a : array_like of str or unicode
  370. sub : str or unicode
  371. The substring to search for.
  372. start, end : int, optional
  373. Optional arguments `start` and `end` are interpreted as slice
  374. notation to specify the range in which to count.
  375. Returns
  376. -------
  377. out : ndarray
  378. Output array of ints.
  379. See also
  380. --------
  381. str.count
  382. Examples
  383. --------
  384. >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
  385. >>> c
  386. array(['aAaAaA', ' aA ', 'abBABba'],
  387. dtype='|S7')
  388. >>> np.char.count(c, 'A')
  389. array([3, 1, 1])
  390. >>> np.char.count(c, 'aA')
  391. array([3, 1, 0])
  392. >>> np.char.count(c, 'A', start=1, end=4)
  393. array([2, 1, 1])
  394. >>> np.char.count(c, 'A', start=1, end=3)
  395. array([1, 0, 0])
  396. """
  397. return _vec_string(a, integer, 'count', [sub, start] + _clean_args(end))
  398. def _code_dispatcher(a, encoding=None, errors=None):
  399. return (a,)
  400. @array_function_dispatch(_code_dispatcher)
  401. def decode(a, encoding=None, errors=None):
  402. """
  403. Calls `str.decode` element-wise.
  404. The set of available codecs comes from the Python standard library,
  405. and may be extended at runtime. For more information, see the
  406. :mod:`codecs` module.
  407. Parameters
  408. ----------
  409. a : array_like of str or unicode
  410. encoding : str, optional
  411. The name of an encoding
  412. errors : str, optional
  413. Specifies how to handle encoding errors
  414. Returns
  415. -------
  416. out : ndarray
  417. See also
  418. --------
  419. str.decode
  420. Notes
  421. -----
  422. The type of the result will depend on the encoding specified.
  423. Examples
  424. --------
  425. >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
  426. >>> c
  427. array(['aAaAaA', ' aA ', 'abBABba'],
  428. dtype='|S7')
  429. >>> np.char.encode(c, encoding='cp037')
  430. array(['\\x81\\xc1\\x81\\xc1\\x81\\xc1', '@@\\x81\\xc1@@',
  431. '\\x81\\x82\\xc2\\xc1\\xc2\\x82\\x81'],
  432. dtype='|S7')
  433. """
  434. return _to_string_or_unicode_array(
  435. _vec_string(a, object_, 'decode', _clean_args(encoding, errors)))
  436. @array_function_dispatch(_code_dispatcher)
  437. def encode(a, encoding=None, errors=None):
  438. """
  439. Calls `str.encode` element-wise.
  440. The set of available codecs comes from the Python standard library,
  441. and may be extended at runtime. For more information, see the codecs
  442. module.
  443. Parameters
  444. ----------
  445. a : array_like of str or unicode
  446. encoding : str, optional
  447. The name of an encoding
  448. errors : str, optional
  449. Specifies how to handle encoding errors
  450. Returns
  451. -------
  452. out : ndarray
  453. See also
  454. --------
  455. str.encode
  456. Notes
  457. -----
  458. The type of the result will depend on the encoding specified.
  459. """
  460. return _to_string_or_unicode_array(
  461. _vec_string(a, object_, 'encode', _clean_args(encoding, errors)))
  462. def _endswith_dispatcher(a, suffix, start=None, end=None):
  463. return (a,)
  464. @array_function_dispatch(_endswith_dispatcher)
  465. def endswith(a, suffix, start=0, end=None):
  466. """
  467. Returns a boolean array which is `True` where the string element
  468. in `a` ends with `suffix`, otherwise `False`.
  469. Calls `str.endswith` element-wise.
  470. Parameters
  471. ----------
  472. a : array_like of str or unicode
  473. suffix : str
  474. start, end : int, optional
  475. With optional `start`, test beginning at that position. With
  476. optional `end`, stop comparing at that position.
  477. Returns
  478. -------
  479. out : ndarray
  480. Outputs an array of bools.
  481. See also
  482. --------
  483. str.endswith
  484. Examples
  485. --------
  486. >>> s = np.array(['foo', 'bar'])
  487. >>> s[0] = 'foo'
  488. >>> s[1] = 'bar'
  489. >>> s
  490. array(['foo', 'bar'],
  491. dtype='|S3')
  492. >>> np.char.endswith(s, 'ar')
  493. array([False, True])
  494. >>> np.char.endswith(s, 'a', start=1, end=2)
  495. array([False, True])
  496. """
  497. return _vec_string(
  498. a, bool_, 'endswith', [suffix, start] + _clean_args(end))
  499. def _expandtabs_dispatcher(a, tabsize=None):
  500. return (a,)
  501. @array_function_dispatch(_expandtabs_dispatcher)
  502. def expandtabs(a, tabsize=8):
  503. """
  504. Return a copy of each string element where all tab characters are
  505. replaced by one or more spaces.
  506. Calls `str.expandtabs` element-wise.
  507. Return a copy of each string element where all tab characters are
  508. replaced by one or more spaces, depending on the current column
  509. and the given `tabsize`. The column number is reset to zero after
  510. each newline occurring in the string. This doesn't understand other
  511. non-printing characters or escape sequences.
  512. Parameters
  513. ----------
  514. a : array_like of str or unicode
  515. Input array
  516. tabsize : int, optional
  517. Replace tabs with `tabsize` number of spaces. If not given defaults
  518. to 8 spaces.
  519. Returns
  520. -------
  521. out : ndarray
  522. Output array of str or unicode, depending on input type
  523. See also
  524. --------
  525. str.expandtabs
  526. """
  527. return _to_string_or_unicode_array(
  528. _vec_string(a, object_, 'expandtabs', (tabsize,)))
  529. @array_function_dispatch(_count_dispatcher)
  530. def find(a, sub, start=0, end=None):
  531. """
  532. For each element, return the lowest index in the string where
  533. substring `sub` is found.
  534. Calls `str.find` element-wise.
  535. For each element, return the lowest index in the string where
  536. substring `sub` is found, such that `sub` is contained in the
  537. range [`start`, `end`].
  538. Parameters
  539. ----------
  540. a : array_like of str or unicode
  541. sub : str or unicode
  542. start, end : int, optional
  543. Optional arguments `start` and `end` are interpreted as in
  544. slice notation.
  545. Returns
  546. -------
  547. out : ndarray or int
  548. Output array of ints. Returns -1 if `sub` is not found.
  549. See also
  550. --------
  551. str.find
  552. """
  553. return _vec_string(
  554. a, integer, 'find', [sub, start] + _clean_args(end))
  555. @array_function_dispatch(_count_dispatcher)
  556. def index(a, sub, start=0, end=None):
  557. """
  558. Like `find`, but raises `ValueError` when the substring is not found.
  559. Calls `str.index` element-wise.
  560. Parameters
  561. ----------
  562. a : array_like of str or unicode
  563. sub : str or unicode
  564. start, end : int, optional
  565. Returns
  566. -------
  567. out : ndarray
  568. Output array of ints. Returns -1 if `sub` is not found.
  569. See also
  570. --------
  571. find, str.find
  572. """
  573. return _vec_string(
  574. a, integer, 'index', [sub, start] + _clean_args(end))
  575. @array_function_dispatch(_unary_op_dispatcher)
  576. def isalnum(a):
  577. """
  578. Returns true for each element if all characters in the string are
  579. alphanumeric and there is at least one character, false otherwise.
  580. Calls `str.isalnum` element-wise.
  581. For 8-bit strings, this method is locale-dependent.
  582. Parameters
  583. ----------
  584. a : array_like of str or unicode
  585. Returns
  586. -------
  587. out : ndarray
  588. Output array of str or unicode, depending on input type
  589. See also
  590. --------
  591. str.isalnum
  592. """
  593. return _vec_string(a, bool_, 'isalnum')
  594. @array_function_dispatch(_unary_op_dispatcher)
  595. def isalpha(a):
  596. """
  597. Returns true for each element if all characters in the string are
  598. alphabetic and there is at least one character, false otherwise.
  599. Calls `str.isalpha` element-wise.
  600. For 8-bit strings, this method is locale-dependent.
  601. Parameters
  602. ----------
  603. a : array_like of str or unicode
  604. Returns
  605. -------
  606. out : ndarray
  607. Output array of bools
  608. See also
  609. --------
  610. str.isalpha
  611. """
  612. return _vec_string(a, bool_, 'isalpha')
  613. @array_function_dispatch(_unary_op_dispatcher)
  614. def isdigit(a):
  615. """
  616. Returns true for each element if all characters in the string are
  617. digits and there is at least one character, false otherwise.
  618. Calls `str.isdigit` element-wise.
  619. For 8-bit strings, this method is locale-dependent.
  620. Parameters
  621. ----------
  622. a : array_like of str or unicode
  623. Returns
  624. -------
  625. out : ndarray
  626. Output array of bools
  627. See also
  628. --------
  629. str.isdigit
  630. """
  631. return _vec_string(a, bool_, 'isdigit')
  632. @array_function_dispatch(_unary_op_dispatcher)
  633. def islower(a):
  634. """
  635. Returns true for each element if all cased characters in the
  636. string are lowercase and there is at least one cased character,
  637. false otherwise.
  638. Calls `str.islower` element-wise.
  639. For 8-bit strings, this method is locale-dependent.
  640. Parameters
  641. ----------
  642. a : array_like of str or unicode
  643. Returns
  644. -------
  645. out : ndarray
  646. Output array of bools
  647. See also
  648. --------
  649. str.islower
  650. """
  651. return _vec_string(a, bool_, 'islower')
  652. @array_function_dispatch(_unary_op_dispatcher)
  653. def isspace(a):
  654. """
  655. Returns true for each element if there are only whitespace
  656. characters in the string and there is at least one character,
  657. false otherwise.
  658. Calls `str.isspace` element-wise.
  659. For 8-bit strings, this method is locale-dependent.
  660. Parameters
  661. ----------
  662. a : array_like of str or unicode
  663. Returns
  664. -------
  665. out : ndarray
  666. Output array of bools
  667. See also
  668. --------
  669. str.isspace
  670. """
  671. return _vec_string(a, bool_, 'isspace')
  672. @array_function_dispatch(_unary_op_dispatcher)
  673. def istitle(a):
  674. """
  675. Returns true for each element if the element is a titlecased
  676. string and there is at least one character, false otherwise.
  677. Call `str.istitle` element-wise.
  678. For 8-bit strings, this method is locale-dependent.
  679. Parameters
  680. ----------
  681. a : array_like of str or unicode
  682. Returns
  683. -------
  684. out : ndarray
  685. Output array of bools
  686. See also
  687. --------
  688. str.istitle
  689. """
  690. return _vec_string(a, bool_, 'istitle')
  691. @array_function_dispatch(_unary_op_dispatcher)
  692. def isupper(a):
  693. """
  694. Returns true for each element if all cased characters in the
  695. string are uppercase and there is at least one character, false
  696. otherwise.
  697. Call `str.isupper` element-wise.
  698. For 8-bit strings, this method is locale-dependent.
  699. Parameters
  700. ----------
  701. a : array_like of str or unicode
  702. Returns
  703. -------
  704. out : ndarray
  705. Output array of bools
  706. See also
  707. --------
  708. str.isupper
  709. """
  710. return _vec_string(a, bool_, 'isupper')
  711. def _join_dispatcher(sep, seq):
  712. return (sep, seq)
  713. @array_function_dispatch(_join_dispatcher)
  714. def join(sep, seq):
  715. """
  716. Return a string which is the concatenation of the strings in the
  717. sequence `seq`.
  718. Calls `str.join` element-wise.
  719. Parameters
  720. ----------
  721. sep : array_like of str or unicode
  722. seq : array_like of str or unicode
  723. Returns
  724. -------
  725. out : ndarray
  726. Output array of str or unicode, depending on input types
  727. See also
  728. --------
  729. str.join
  730. """
  731. return _to_string_or_unicode_array(
  732. _vec_string(sep, object_, 'join', (seq,)))
  733. def _just_dispatcher(a, width, fillchar=None):
  734. return (a,)
  735. @array_function_dispatch(_just_dispatcher)
  736. def ljust(a, width, fillchar=' '):
  737. """
  738. Return an array with the elements of `a` left-justified in a
  739. string of length `width`.
  740. Calls `str.ljust` element-wise.
  741. Parameters
  742. ----------
  743. a : array_like of str or unicode
  744. width : int
  745. The length of the resulting strings
  746. fillchar : str or unicode, optional
  747. The character to use for padding
  748. Returns
  749. -------
  750. out : ndarray
  751. Output array of str or unicode, depending on input type
  752. See also
  753. --------
  754. str.ljust
  755. """
  756. a_arr = numpy.asarray(a)
  757. width_arr = numpy.asarray(width)
  758. size = long(numpy.max(width_arr.flat))
  759. if numpy.issubdtype(a_arr.dtype, numpy.string_):
  760. fillchar = asbytes(fillchar)
  761. return _vec_string(
  762. a_arr, (a_arr.dtype.type, size), 'ljust', (width_arr, fillchar))
  763. @array_function_dispatch(_unary_op_dispatcher)
  764. def lower(a):
  765. """
  766. Return an array with the elements converted to lowercase.
  767. Call `str.lower` element-wise.
  768. For 8-bit strings, this method is locale-dependent.
  769. Parameters
  770. ----------
  771. a : array_like, {str, unicode}
  772. Input array.
  773. Returns
  774. -------
  775. out : ndarray, {str, unicode}
  776. Output array of str or unicode, depending on input type
  777. See also
  778. --------
  779. str.lower
  780. Examples
  781. --------
  782. >>> c = np.array(['A1B C', '1BCA', 'BCA1']); c
  783. array(['A1B C', '1BCA', 'BCA1'],
  784. dtype='|S5')
  785. >>> np.char.lower(c)
  786. array(['a1b c', '1bca', 'bca1'],
  787. dtype='|S5')
  788. """
  789. a_arr = numpy.asarray(a)
  790. return _vec_string(a_arr, a_arr.dtype, 'lower')
  791. def _strip_dispatcher(a, chars=None):
  792. return (a,)
  793. @array_function_dispatch(_strip_dispatcher)
  794. def lstrip(a, chars=None):
  795. """
  796. For each element in `a`, return a copy with the leading characters
  797. removed.
  798. Calls `str.lstrip` element-wise.
  799. Parameters
  800. ----------
  801. a : array-like, {str, unicode}
  802. Input array.
  803. chars : {str, unicode}, optional
  804. The `chars` argument is a string specifying the set of
  805. characters to be removed. If omitted or None, the `chars`
  806. argument defaults to removing whitespace. The `chars` argument
  807. is not a prefix; rather, all combinations of its values are
  808. stripped.
  809. Returns
  810. -------
  811. out : ndarray, {str, unicode}
  812. Output array of str or unicode, depending on input type
  813. See also
  814. --------
  815. str.lstrip
  816. Examples
  817. --------
  818. >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
  819. >>> c
  820. array(['aAaAaA', ' aA ', 'abBABba'],
  821. dtype='|S7')
  822. The 'a' variable is unstripped from c[1] because whitespace leading.
  823. >>> np.char.lstrip(c, 'a')
  824. array(['AaAaA', ' aA ', 'bBABba'],
  825. dtype='|S7')
  826. >>> np.char.lstrip(c, 'A') # leaves c unchanged
  827. array(['aAaAaA', ' aA ', 'abBABba'],
  828. dtype='|S7')
  829. >>> (np.char.lstrip(c, ' ') == np.char.lstrip(c, '')).all()
  830. ... # XXX: is this a regression? this line now returns False
  831. ... # np.char.lstrip(c,'') does not modify c at all.
  832. True
  833. >>> (np.char.lstrip(c, ' ') == np.char.lstrip(c, None)).all()
  834. True
  835. """
  836. a_arr = numpy.asarray(a)
  837. return _vec_string(a_arr, a_arr.dtype, 'lstrip', (chars,))
  838. def _partition_dispatcher(a, sep):
  839. return (a,)
  840. @array_function_dispatch(_partition_dispatcher)
  841. def partition(a, sep):
  842. """
  843. Partition each element in `a` around `sep`.
  844. Calls `str.partition` element-wise.
  845. For each element in `a`, split the element as the first
  846. occurrence of `sep`, and return 3 strings containing the part
  847. before the separator, the separator itself, and the part after
  848. the separator. If the separator is not found, return 3 strings
  849. containing the string itself, followed by two empty strings.
  850. Parameters
  851. ----------
  852. a : array_like, {str, unicode}
  853. Input array
  854. sep : {str, unicode}
  855. Separator to split each string element in `a`.
  856. Returns
  857. -------
  858. out : ndarray, {str, unicode}
  859. Output array of str or unicode, depending on input type.
  860. The output array will have an extra dimension with 3
  861. elements per input element.
  862. See also
  863. --------
  864. str.partition
  865. """
  866. return _to_string_or_unicode_array(
  867. _vec_string(a, object_, 'partition', (sep,)))
  868. def _replace_dispatcher(a, old, new, count=None):
  869. return (a,)
  870. @array_function_dispatch(_replace_dispatcher)
  871. def replace(a, old, new, count=None):
  872. """
  873. For each element in `a`, return a copy of the string with all
  874. occurrences of substring `old` replaced by `new`.
  875. Calls `str.replace` element-wise.
  876. Parameters
  877. ----------
  878. a : array-like of str or unicode
  879. old, new : str or unicode
  880. count : int, optional
  881. If the optional argument `count` is given, only the first
  882. `count` occurrences are replaced.
  883. Returns
  884. -------
  885. out : ndarray
  886. Output array of str or unicode, depending on input type
  887. See also
  888. --------
  889. str.replace
  890. """
  891. return _to_string_or_unicode_array(
  892. _vec_string(
  893. a, object_, 'replace', [old, new] + _clean_args(count)))
  894. @array_function_dispatch(_count_dispatcher)
  895. def rfind(a, sub, start=0, end=None):
  896. """
  897. For each element in `a`, return the highest index in the string
  898. where substring `sub` is found, such that `sub` is contained
  899. within [`start`, `end`].
  900. Calls `str.rfind` element-wise.
  901. Parameters
  902. ----------
  903. a : array-like of str or unicode
  904. sub : str or unicode
  905. start, end : int, optional
  906. Optional arguments `start` and `end` are interpreted as in
  907. slice notation.
  908. Returns
  909. -------
  910. out : ndarray
  911. Output array of ints. Return -1 on failure.
  912. See also
  913. --------
  914. str.rfind
  915. """
  916. return _vec_string(
  917. a, integer, 'rfind', [sub, start] + _clean_args(end))
  918. @array_function_dispatch(_count_dispatcher)
  919. def rindex(a, sub, start=0, end=None):
  920. """
  921. Like `rfind`, but raises `ValueError` when the substring `sub` is
  922. not found.
  923. Calls `str.rindex` element-wise.
  924. Parameters
  925. ----------
  926. a : array-like of str or unicode
  927. sub : str or unicode
  928. start, end : int, optional
  929. Returns
  930. -------
  931. out : ndarray
  932. Output array of ints.
  933. See also
  934. --------
  935. rfind, str.rindex
  936. """
  937. return _vec_string(
  938. a, integer, 'rindex', [sub, start] + _clean_args(end))
  939. @array_function_dispatch(_just_dispatcher)
  940. def rjust(a, width, fillchar=' '):
  941. """
  942. Return an array with the elements of `a` right-justified in a
  943. string of length `width`.
  944. Calls `str.rjust` element-wise.
  945. Parameters
  946. ----------
  947. a : array_like of str or unicode
  948. width : int
  949. The length of the resulting strings
  950. fillchar : str or unicode, optional
  951. The character to use for padding
  952. Returns
  953. -------
  954. out : ndarray
  955. Output array of str or unicode, depending on input type
  956. See also
  957. --------
  958. str.rjust
  959. """
  960. a_arr = numpy.asarray(a)
  961. width_arr = numpy.asarray(width)
  962. size = long(numpy.max(width_arr.flat))
  963. if numpy.issubdtype(a_arr.dtype, numpy.string_):
  964. fillchar = asbytes(fillchar)
  965. return _vec_string(
  966. a_arr, (a_arr.dtype.type, size), 'rjust', (width_arr, fillchar))
  967. @array_function_dispatch(_partition_dispatcher)
  968. def rpartition(a, sep):
  969. """
  970. Partition (split) each element around the right-most separator.
  971. Calls `str.rpartition` element-wise.
  972. For each element in `a`, split the element as the last
  973. occurrence of `sep`, and return 3 strings containing the part
  974. before the separator, the separator itself, and the part after
  975. the separator. If the separator is not found, return 3 strings
  976. containing the string itself, followed by two empty strings.
  977. Parameters
  978. ----------
  979. a : array_like of str or unicode
  980. Input array
  981. sep : str or unicode
  982. Right-most separator to split each element in array.
  983. Returns
  984. -------
  985. out : ndarray
  986. Output array of string or unicode, depending on input
  987. type. The output array will have an extra dimension with
  988. 3 elements per input element.
  989. See also
  990. --------
  991. str.rpartition
  992. """
  993. return _to_string_or_unicode_array(
  994. _vec_string(a, object_, 'rpartition', (sep,)))
  995. def _split_dispatcher(a, sep=None, maxsplit=None):
  996. return (a,)
  997. @array_function_dispatch(_split_dispatcher)
  998. def rsplit(a, sep=None, maxsplit=None):
  999. """
  1000. For each element in `a`, return a list of the words in the
  1001. string, using `sep` as the delimiter string.
  1002. Calls `str.rsplit` element-wise.
  1003. Except for splitting from the right, `rsplit`
  1004. behaves like `split`.
  1005. Parameters
  1006. ----------
  1007. a : array_like of str or unicode
  1008. sep : str or unicode, optional
  1009. If `sep` is not specified or `None`, any whitespace string
  1010. is a separator.
  1011. maxsplit : int, optional
  1012. If `maxsplit` is given, at most `maxsplit` splits are done,
  1013. the rightmost ones.
  1014. Returns
  1015. -------
  1016. out : ndarray
  1017. Array of list objects
  1018. See also
  1019. --------
  1020. str.rsplit, split
  1021. """
  1022. # This will return an array of lists of different sizes, so we
  1023. # leave it as an object array
  1024. return _vec_string(
  1025. a, object_, 'rsplit', [sep] + _clean_args(maxsplit))
  1026. def _strip_dispatcher(a, chars=None):
  1027. return (a,)
  1028. @array_function_dispatch(_strip_dispatcher)
  1029. def rstrip(a, chars=None):
  1030. """
  1031. For each element in `a`, return a copy with the trailing
  1032. characters removed.
  1033. Calls `str.rstrip` element-wise.
  1034. Parameters
  1035. ----------
  1036. a : array-like of str or unicode
  1037. chars : str or unicode, optional
  1038. The `chars` argument is a string specifying the set of
  1039. characters to be removed. If omitted or None, the `chars`
  1040. argument defaults to removing whitespace. The `chars` argument
  1041. is not a suffix; rather, all combinations of its values are
  1042. stripped.
  1043. Returns
  1044. -------
  1045. out : ndarray
  1046. Output array of str or unicode, depending on input type
  1047. See also
  1048. --------
  1049. str.rstrip
  1050. Examples
  1051. --------
  1052. >>> c = np.array(['aAaAaA', 'abBABba'], dtype='S7'); c
  1053. array(['aAaAaA', 'abBABba'],
  1054. dtype='|S7')
  1055. >>> np.char.rstrip(c, 'a')
  1056. array(['aAaAaA', 'abBABb'],
  1057. dtype='|S7')
  1058. >>> np.char.rstrip(c, 'A')
  1059. array(['aAaAa', 'abBABba'],
  1060. dtype='|S7')
  1061. """
  1062. a_arr = numpy.asarray(a)
  1063. return _vec_string(a_arr, a_arr.dtype, 'rstrip', (chars,))
  1064. @array_function_dispatch(_split_dispatcher)
  1065. def split(a, sep=None, maxsplit=None):
  1066. """
  1067. For each element in `a`, return a list of the words in the
  1068. string, using `sep` as the delimiter string.
  1069. Calls `str.split` element-wise.
  1070. Parameters
  1071. ----------
  1072. a : array_like of str or unicode
  1073. sep : str or unicode, optional
  1074. If `sep` is not specified or `None`, any whitespace string is a
  1075. separator.
  1076. maxsplit : int, optional
  1077. If `maxsplit` is given, at most `maxsplit` splits are done.
  1078. Returns
  1079. -------
  1080. out : ndarray
  1081. Array of list objects
  1082. See also
  1083. --------
  1084. str.split, rsplit
  1085. """
  1086. # This will return an array of lists of different sizes, so we
  1087. # leave it as an object array
  1088. return _vec_string(
  1089. a, object_, 'split', [sep] + _clean_args(maxsplit))
  1090. def _splitlines_dispatcher(a, keepends=None):
  1091. return (a,)
  1092. @array_function_dispatch(_splitlines_dispatcher)
  1093. def splitlines(a, keepends=None):
  1094. """
  1095. For each element in `a`, return a list of the lines in the
  1096. element, breaking at line boundaries.
  1097. Calls `str.splitlines` element-wise.
  1098. Parameters
  1099. ----------
  1100. a : array_like of str or unicode
  1101. keepends : bool, optional
  1102. Line breaks are not included in the resulting list unless
  1103. keepends is given and true.
  1104. Returns
  1105. -------
  1106. out : ndarray
  1107. Array of list objects
  1108. See also
  1109. --------
  1110. str.splitlines
  1111. """
  1112. return _vec_string(
  1113. a, object_, 'splitlines', _clean_args(keepends))
  1114. def _startswith_dispatcher(a, prefix, start=None, end=None):
  1115. return (a,)
  1116. @array_function_dispatch(_startswith_dispatcher)
  1117. def startswith(a, prefix, start=0, end=None):
  1118. """
  1119. Returns a boolean array which is `True` where the string element
  1120. in `a` starts with `prefix`, otherwise `False`.
  1121. Calls `str.startswith` element-wise.
  1122. Parameters
  1123. ----------
  1124. a : array_like of str or unicode
  1125. prefix : str
  1126. start, end : int, optional
  1127. With optional `start`, test beginning at that position. With
  1128. optional `end`, stop comparing at that position.
  1129. Returns
  1130. -------
  1131. out : ndarray
  1132. Array of booleans
  1133. See also
  1134. --------
  1135. str.startswith
  1136. """
  1137. return _vec_string(
  1138. a, bool_, 'startswith', [prefix, start] + _clean_args(end))
  1139. @array_function_dispatch(_strip_dispatcher)
  1140. def strip(a, chars=None):
  1141. """
  1142. For each element in `a`, return a copy with the leading and
  1143. trailing characters removed.
  1144. Calls `str.strip` element-wise.
  1145. Parameters
  1146. ----------
  1147. a : array-like of str or unicode
  1148. chars : str or unicode, optional
  1149. The `chars` argument is a string specifying the set of
  1150. characters to be removed. If omitted or None, the `chars`
  1151. argument defaults to removing whitespace. The `chars` argument
  1152. is not a prefix or suffix; rather, all combinations of its
  1153. values are stripped.
  1154. Returns
  1155. -------
  1156. out : ndarray
  1157. Output array of str or unicode, depending on input type
  1158. See also
  1159. --------
  1160. str.strip
  1161. Examples
  1162. --------
  1163. >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
  1164. >>> c
  1165. array(['aAaAaA', ' aA ', 'abBABba'],
  1166. dtype='|S7')
  1167. >>> np.char.strip(c)
  1168. array(['aAaAaA', 'aA', 'abBABba'],
  1169. dtype='|S7')
  1170. >>> np.char.strip(c, 'a') # 'a' unstripped from c[1] because whitespace leads
  1171. array(['AaAaA', ' aA ', 'bBABb'],
  1172. dtype='|S7')
  1173. >>> np.char.strip(c, 'A') # 'A' unstripped from c[1] because (unprinted) ws trails
  1174. array(['aAaAa', ' aA ', 'abBABba'],
  1175. dtype='|S7')
  1176. """
  1177. a_arr = numpy.asarray(a)
  1178. return _vec_string(a_arr, a_arr.dtype, 'strip', _clean_args(chars))
  1179. @array_function_dispatch(_unary_op_dispatcher)
  1180. def swapcase(a):
  1181. """
  1182. Return element-wise a copy of the string with
  1183. uppercase characters converted to lowercase and vice versa.
  1184. Calls `str.swapcase` element-wise.
  1185. For 8-bit strings, this method is locale-dependent.
  1186. Parameters
  1187. ----------
  1188. a : array_like, {str, unicode}
  1189. Input array.
  1190. Returns
  1191. -------
  1192. out : ndarray, {str, unicode}
  1193. Output array of str or unicode, depending on input type
  1194. See also
  1195. --------
  1196. str.swapcase
  1197. Examples
  1198. --------
  1199. >>> c=np.array(['a1B c','1b Ca','b Ca1','cA1b'],'S5'); c
  1200. array(['a1B c', '1b Ca', 'b Ca1', 'cA1b'],
  1201. dtype='|S5')
  1202. >>> np.char.swapcase(c)
  1203. array(['A1b C', '1B cA', 'B cA1', 'Ca1B'],
  1204. dtype='|S5')
  1205. """
  1206. a_arr = numpy.asarray(a)
  1207. return _vec_string(a_arr, a_arr.dtype, 'swapcase')
  1208. @array_function_dispatch(_unary_op_dispatcher)
  1209. def title(a):
  1210. """
  1211. Return element-wise title cased version of string or unicode.
  1212. Title case words start with uppercase characters, all remaining cased
  1213. characters are lowercase.
  1214. Calls `str.title` element-wise.
  1215. For 8-bit strings, this method is locale-dependent.
  1216. Parameters
  1217. ----------
  1218. a : array_like, {str, unicode}
  1219. Input array.
  1220. Returns
  1221. -------
  1222. out : ndarray
  1223. Output array of str or unicode, depending on input type
  1224. See also
  1225. --------
  1226. str.title
  1227. Examples
  1228. --------
  1229. >>> c=np.array(['a1b c','1b ca','b ca1','ca1b'],'S5'); c
  1230. array(['a1b c', '1b ca', 'b ca1', 'ca1b'],
  1231. dtype='|S5')
  1232. >>> np.char.title(c)
  1233. array(['A1B C', '1B Ca', 'B Ca1', 'Ca1B'],
  1234. dtype='|S5')
  1235. """
  1236. a_arr = numpy.asarray(a)
  1237. return _vec_string(a_arr, a_arr.dtype, 'title')
  1238. def _translate_dispatcher(a, table, deletechars=None):
  1239. return (a,)
  1240. @array_function_dispatch(_translate_dispatcher)
  1241. def translate(a, table, deletechars=None):
  1242. """
  1243. For each element in `a`, return a copy of the string where all
  1244. characters occurring in the optional argument `deletechars` are
  1245. removed, and the remaining characters have been mapped through the
  1246. given translation table.
  1247. Calls `str.translate` element-wise.
  1248. Parameters
  1249. ----------
  1250. a : array-like of str or unicode
  1251. table : str of length 256
  1252. deletechars : str
  1253. Returns
  1254. -------
  1255. out : ndarray
  1256. Output array of str or unicode, depending on input type
  1257. See also
  1258. --------
  1259. str.translate
  1260. """
  1261. a_arr = numpy.asarray(a)
  1262. if issubclass(a_arr.dtype.type, unicode_):
  1263. return _vec_string(
  1264. a_arr, a_arr.dtype, 'translate', (table,))
  1265. else:
  1266. return _vec_string(
  1267. a_arr, a_arr.dtype, 'translate', [table] + _clean_args(deletechars))
  1268. @array_function_dispatch(_unary_op_dispatcher)
  1269. def upper(a):
  1270. """
  1271. Return an array with the elements converted to uppercase.
  1272. Calls `str.upper` element-wise.
  1273. For 8-bit strings, this method is locale-dependent.
  1274. Parameters
  1275. ----------
  1276. a : array_like, {str, unicode}
  1277. Input array.
  1278. Returns
  1279. -------
  1280. out : ndarray, {str, unicode}
  1281. Output array of str or unicode, depending on input type
  1282. See also
  1283. --------
  1284. str.upper
  1285. Examples
  1286. --------
  1287. >>> c = np.array(['a1b c', '1bca', 'bca1']); c
  1288. array(['a1b c', '1bca', 'bca1'],
  1289. dtype='|S5')
  1290. >>> np.char.upper(c)
  1291. array(['A1B C', '1BCA', 'BCA1'],
  1292. dtype='|S5')
  1293. """
  1294. a_arr = numpy.asarray(a)
  1295. return _vec_string(a_arr, a_arr.dtype, 'upper')
  1296. def _zfill_dispatcher(a, width):
  1297. return (a,)
  1298. @array_function_dispatch(_zfill_dispatcher)
  1299. def zfill(a, width):
  1300. """
  1301. Return the numeric string left-filled with zeros
  1302. Calls `str.zfill` element-wise.
  1303. Parameters
  1304. ----------
  1305. a : array_like, {str, unicode}
  1306. Input array.
  1307. width : int
  1308. Width of string to left-fill elements in `a`.
  1309. Returns
  1310. -------
  1311. out : ndarray, {str, unicode}
  1312. Output array of str or unicode, depending on input type
  1313. See also
  1314. --------
  1315. str.zfill
  1316. """
  1317. a_arr = numpy.asarray(a)
  1318. width_arr = numpy.asarray(width)
  1319. size = long(numpy.max(width_arr.flat))
  1320. return _vec_string(
  1321. a_arr, (a_arr.dtype.type, size), 'zfill', (width_arr,))
  1322. @array_function_dispatch(_unary_op_dispatcher)
  1323. def isnumeric(a):
  1324. """
  1325. For each element, return True if there are only numeric
  1326. characters in the element.
  1327. Calls `unicode.isnumeric` element-wise.
  1328. Numeric characters include digit characters, and all characters
  1329. that have the Unicode numeric value property, e.g. ``U+2155,
  1330. VULGAR FRACTION ONE FIFTH``.
  1331. Parameters
  1332. ----------
  1333. a : array_like, unicode
  1334. Input array.
  1335. Returns
  1336. -------
  1337. out : ndarray, bool
  1338. Array of booleans of same shape as `a`.
  1339. See also
  1340. --------
  1341. unicode.isnumeric
  1342. """
  1343. if _use_unicode(a) != unicode_:
  1344. raise TypeError("isnumeric is only available for Unicode strings and arrays")
  1345. return _vec_string(a, bool_, 'isnumeric')
  1346. @array_function_dispatch(_unary_op_dispatcher)
  1347. def isdecimal(a):
  1348. """
  1349. For each element, return True if there are only decimal
  1350. characters in the element.
  1351. Calls `unicode.isdecimal` element-wise.
  1352. Decimal characters include digit characters, and all characters
  1353. that that can be used to form decimal-radix numbers,
  1354. e.g. ``U+0660, ARABIC-INDIC DIGIT ZERO``.
  1355. Parameters
  1356. ----------
  1357. a : array_like, unicode
  1358. Input array.
  1359. Returns
  1360. -------
  1361. out : ndarray, bool
  1362. Array of booleans identical in shape to `a`.
  1363. See also
  1364. --------
  1365. unicode.isdecimal
  1366. """
  1367. if _use_unicode(a) != unicode_:
  1368. raise TypeError("isnumeric is only available for Unicode strings and arrays")
  1369. return _vec_string(a, bool_, 'isdecimal')
  1370. @set_module('numpy')
  1371. class chararray(ndarray):
  1372. """
  1373. chararray(shape, itemsize=1, unicode=False, buffer=None, offset=0,
  1374. strides=None, order=None)
  1375. Provides a convenient view on arrays of string and unicode values.
  1376. .. note::
  1377. The `chararray` class exists for backwards compatibility with
  1378. Numarray, it is not recommended for new development. Starting from numpy
  1379. 1.4, if one needs arrays of strings, it is recommended to use arrays of
  1380. `dtype` `object_`, `string_` or `unicode_`, and use the free functions
  1381. in the `numpy.char` module for fast vectorized string operations.
  1382. Versus a regular NumPy array of type `str` or `unicode`, this
  1383. class adds the following functionality:
  1384. 1) values automatically have whitespace removed from the end
  1385. when indexed
  1386. 2) comparison operators automatically remove whitespace from the
  1387. end when comparing values
  1388. 3) vectorized string operations are provided as methods
  1389. (e.g. `.endswith`) and infix operators (e.g. ``"+", "*", "%"``)
  1390. chararrays should be created using `numpy.char.array` or
  1391. `numpy.char.asarray`, rather than this constructor directly.
  1392. This constructor creates the array, using `buffer` (with `offset`
  1393. and `strides`) if it is not ``None``. If `buffer` is ``None``, then
  1394. constructs a new array with `strides` in "C order", unless both
  1395. ``len(shape) >= 2`` and ``order='Fortran'``, in which case `strides`
  1396. is in "Fortran order".
  1397. Methods
  1398. -------
  1399. astype
  1400. argsort
  1401. copy
  1402. count
  1403. decode
  1404. dump
  1405. dumps
  1406. encode
  1407. endswith
  1408. expandtabs
  1409. fill
  1410. find
  1411. flatten
  1412. getfield
  1413. index
  1414. isalnum
  1415. isalpha
  1416. isdecimal
  1417. isdigit
  1418. islower
  1419. isnumeric
  1420. isspace
  1421. istitle
  1422. isupper
  1423. item
  1424. join
  1425. ljust
  1426. lower
  1427. lstrip
  1428. nonzero
  1429. put
  1430. ravel
  1431. repeat
  1432. replace
  1433. reshape
  1434. resize
  1435. rfind
  1436. rindex
  1437. rjust
  1438. rsplit
  1439. rstrip
  1440. searchsorted
  1441. setfield
  1442. setflags
  1443. sort
  1444. split
  1445. splitlines
  1446. squeeze
  1447. startswith
  1448. strip
  1449. swapaxes
  1450. swapcase
  1451. take
  1452. title
  1453. tofile
  1454. tolist
  1455. tostring
  1456. translate
  1457. transpose
  1458. upper
  1459. view
  1460. zfill
  1461. Parameters
  1462. ----------
  1463. shape : tuple
  1464. Shape of the array.
  1465. itemsize : int, optional
  1466. Length of each array element, in number of characters. Default is 1.
  1467. unicode : bool, optional
  1468. Are the array elements of type unicode (True) or string (False).
  1469. Default is False.
  1470. buffer : int, optional
  1471. Memory address of the start of the array data. Default is None,
  1472. in which case a new array is created.
  1473. offset : int, optional
  1474. Fixed stride displacement from the beginning of an axis?
  1475. Default is 0. Needs to be >=0.
  1476. strides : array_like of ints, optional
  1477. Strides for the array (see `ndarray.strides` for full description).
  1478. Default is None.
  1479. order : {'C', 'F'}, optional
  1480. The order in which the array data is stored in memory: 'C' ->
  1481. "row major" order (the default), 'F' -> "column major"
  1482. (Fortran) order.
  1483. Examples
  1484. --------
  1485. >>> charar = np.chararray((3, 3))
  1486. >>> charar[:] = 'a'
  1487. >>> charar
  1488. chararray([['a', 'a', 'a'],
  1489. ['a', 'a', 'a'],
  1490. ['a', 'a', 'a']],
  1491. dtype='|S1')
  1492. >>> charar = np.chararray(charar.shape, itemsize=5)
  1493. >>> charar[:] = 'abc'
  1494. >>> charar
  1495. chararray([['abc', 'abc', 'abc'],
  1496. ['abc', 'abc', 'abc'],
  1497. ['abc', 'abc', 'abc']],
  1498. dtype='|S5')
  1499. """
  1500. def __new__(subtype, shape, itemsize=1, unicode=False, buffer=None,
  1501. offset=0, strides=None, order='C'):
  1502. global _globalvar
  1503. if unicode:
  1504. dtype = unicode_
  1505. else:
  1506. dtype = string_
  1507. # force itemsize to be a Python long, since using NumPy integer
  1508. # types results in itemsize.itemsize being used as the size of
  1509. # strings in the new array.
  1510. itemsize = long(itemsize)
  1511. if sys.version_info[0] >= 3 and isinstance(buffer, _unicode):
  1512. # On Py3, unicode objects do not have the buffer interface
  1513. filler = buffer
  1514. buffer = None
  1515. else:
  1516. filler = None
  1517. _globalvar = 1
  1518. if buffer is None:
  1519. self = ndarray.__new__(subtype, shape, (dtype, itemsize),
  1520. order=order)
  1521. else:
  1522. self = ndarray.__new__(subtype, shape, (dtype, itemsize),
  1523. buffer=buffer,
  1524. offset=offset, strides=strides,
  1525. order=order)
  1526. if filler is not None:
  1527. self[...] = filler
  1528. _globalvar = 0
  1529. return self
  1530. def __array_finalize__(self, obj):
  1531. # The b is a special case because it is used for reconstructing.
  1532. if not _globalvar and self.dtype.char not in 'SUbc':
  1533. raise ValueError("Can only create a chararray from string data.")
  1534. def __getitem__(self, obj):
  1535. val = ndarray.__getitem__(self, obj)
  1536. if isinstance(val, character):
  1537. temp = val.rstrip()
  1538. if _len(temp) == 0:
  1539. val = ''
  1540. else:
  1541. val = temp
  1542. return val
  1543. # IMPLEMENTATION NOTE: Most of the methods of this class are
  1544. # direct delegations to the free functions in this module.
  1545. # However, those that return an array of strings should instead
  1546. # return a chararray, so some extra wrapping is required.
  1547. def __eq__(self, other):
  1548. """
  1549. Return (self == other) element-wise.
  1550. See also
  1551. --------
  1552. equal
  1553. """
  1554. return equal(self, other)
  1555. def __ne__(self, other):
  1556. """
  1557. Return (self != other) element-wise.
  1558. See also
  1559. --------
  1560. not_equal
  1561. """
  1562. return not_equal(self, other)
  1563. def __ge__(self, other):
  1564. """
  1565. Return (self >= other) element-wise.
  1566. See also
  1567. --------
  1568. greater_equal
  1569. """
  1570. return greater_equal(self, other)
  1571. def __le__(self, other):
  1572. """
  1573. Return (self <= other) element-wise.
  1574. See also
  1575. --------
  1576. less_equal
  1577. """
  1578. return less_equal(self, other)
  1579. def __gt__(self, other):
  1580. """
  1581. Return (self > other) element-wise.
  1582. See also
  1583. --------
  1584. greater
  1585. """
  1586. return greater(self, other)
  1587. def __lt__(self, other):
  1588. """
  1589. Return (self < other) element-wise.
  1590. See also
  1591. --------
  1592. less
  1593. """
  1594. return less(self, other)
  1595. def __add__(self, other):
  1596. """
  1597. Return (self + other), that is string concatenation,
  1598. element-wise for a pair of array_likes of str or unicode.
  1599. See also
  1600. --------
  1601. add
  1602. """
  1603. return asarray(add(self, other))
  1604. def __radd__(self, other):
  1605. """
  1606. Return (other + self), that is string concatenation,
  1607. element-wise for a pair of array_likes of `string_` or `unicode_`.
  1608. See also
  1609. --------
  1610. add
  1611. """
  1612. return asarray(add(numpy.asarray(other), self))
  1613. def __mul__(self, i):
  1614. """
  1615. Return (self * i), that is string multiple concatenation,
  1616. element-wise.
  1617. See also
  1618. --------
  1619. multiply
  1620. """
  1621. return asarray(multiply(self, i))
  1622. def __rmul__(self, i):
  1623. """
  1624. Return (self * i), that is string multiple concatenation,
  1625. element-wise.
  1626. See also
  1627. --------
  1628. multiply
  1629. """
  1630. return asarray(multiply(self, i))
  1631. def __mod__(self, i):
  1632. """
  1633. Return (self % i), that is pre-Python 2.6 string formatting
  1634. (iterpolation), element-wise for a pair of array_likes of `string_`
  1635. or `unicode_`.
  1636. See also
  1637. --------
  1638. mod
  1639. """
  1640. return asarray(mod(self, i))
  1641. def __rmod__(self, other):
  1642. return NotImplemented
  1643. def argsort(self, axis=-1, kind='quicksort', order=None):
  1644. """
  1645. Return the indices that sort the array lexicographically.
  1646. For full documentation see `numpy.argsort`, for which this method is
  1647. in fact merely a "thin wrapper."
  1648. Examples
  1649. --------
  1650. >>> c = np.array(['a1b c', '1b ca', 'b ca1', 'Ca1b'], 'S5')
  1651. >>> c = c.view(np.chararray); c
  1652. chararray(['a1b c', '1b ca', 'b ca1', 'Ca1b'],
  1653. dtype='|S5')
  1654. >>> c[c.argsort()]
  1655. chararray(['1b ca', 'Ca1b', 'a1b c', 'b ca1'],
  1656. dtype='|S5')
  1657. """
  1658. return self.__array__().argsort(axis, kind, order)
  1659. argsort.__doc__ = ndarray.argsort.__doc__
  1660. def capitalize(self):
  1661. """
  1662. Return a copy of `self` with only the first character of each element
  1663. capitalized.
  1664. See also
  1665. --------
  1666. char.capitalize
  1667. """
  1668. return asarray(capitalize(self))
  1669. def center(self, width, fillchar=' '):
  1670. """
  1671. Return a copy of `self` with its elements centered in a
  1672. string of length `width`.
  1673. See also
  1674. --------
  1675. center
  1676. """
  1677. return asarray(center(self, width, fillchar))
  1678. def count(self, sub, start=0, end=None):
  1679. """
  1680. Returns an array with the number of non-overlapping occurrences of
  1681. substring `sub` in the range [`start`, `end`].
  1682. See also
  1683. --------
  1684. char.count
  1685. """
  1686. return count(self, sub, start, end)
  1687. def decode(self, encoding=None, errors=None):
  1688. """
  1689. Calls `str.decode` element-wise.
  1690. See also
  1691. --------
  1692. char.decode
  1693. """
  1694. return decode(self, encoding, errors)
  1695. def encode(self, encoding=None, errors=None):
  1696. """
  1697. Calls `str.encode` element-wise.
  1698. See also
  1699. --------
  1700. char.encode
  1701. """
  1702. return encode(self, encoding, errors)
  1703. def endswith(self, suffix, start=0, end=None):
  1704. """
  1705. Returns a boolean array which is `True` where the string element
  1706. in `self` ends with `suffix`, otherwise `False`.
  1707. See also
  1708. --------
  1709. char.endswith
  1710. """
  1711. return endswith(self, suffix, start, end)
  1712. def expandtabs(self, tabsize=8):
  1713. """
  1714. Return a copy of each string element where all tab characters are
  1715. replaced by one or more spaces.
  1716. See also
  1717. --------
  1718. char.expandtabs
  1719. """
  1720. return asarray(expandtabs(self, tabsize))
  1721. def find(self, sub, start=0, end=None):
  1722. """
  1723. For each element, return the lowest index in the string where
  1724. substring `sub` is found.
  1725. See also
  1726. --------
  1727. char.find
  1728. """
  1729. return find(self, sub, start, end)
  1730. def index(self, sub, start=0, end=None):
  1731. """
  1732. Like `find`, but raises `ValueError` when the substring is not found.
  1733. See also
  1734. --------
  1735. char.index
  1736. """
  1737. return index(self, sub, start, end)
  1738. def isalnum(self):
  1739. """
  1740. Returns true for each element if all characters in the string
  1741. are alphanumeric and there is at least one character, false
  1742. otherwise.
  1743. See also
  1744. --------
  1745. char.isalnum
  1746. """
  1747. return isalnum(self)
  1748. def isalpha(self):
  1749. """
  1750. Returns true for each element if all characters in the string
  1751. are alphabetic and there is at least one character, false
  1752. otherwise.
  1753. See also
  1754. --------
  1755. char.isalpha
  1756. """
  1757. return isalpha(self)
  1758. def isdigit(self):
  1759. """
  1760. Returns true for each element if all characters in the string are
  1761. digits and there is at least one character, false otherwise.
  1762. See also
  1763. --------
  1764. char.isdigit
  1765. """
  1766. return isdigit(self)
  1767. def islower(self):
  1768. """
  1769. Returns true for each element if all cased characters in the
  1770. string are lowercase and there is at least one cased character,
  1771. false otherwise.
  1772. See also
  1773. --------
  1774. char.islower
  1775. """
  1776. return islower(self)
  1777. def isspace(self):
  1778. """
  1779. Returns true for each element if there are only whitespace
  1780. characters in the string and there is at least one character,
  1781. false otherwise.
  1782. See also
  1783. --------
  1784. char.isspace
  1785. """
  1786. return isspace(self)
  1787. def istitle(self):
  1788. """
  1789. Returns true for each element if the element is a titlecased
  1790. string and there is at least one character, false otherwise.
  1791. See also
  1792. --------
  1793. char.istitle
  1794. """
  1795. return istitle(self)
  1796. def isupper(self):
  1797. """
  1798. Returns true for each element if all cased characters in the
  1799. string are uppercase and there is at least one character, false
  1800. otherwise.
  1801. See also
  1802. --------
  1803. char.isupper
  1804. """
  1805. return isupper(self)
  1806. def join(self, seq):
  1807. """
  1808. Return a string which is the concatenation of the strings in the
  1809. sequence `seq`.
  1810. See also
  1811. --------
  1812. char.join
  1813. """
  1814. return join(self, seq)
  1815. def ljust(self, width, fillchar=' '):
  1816. """
  1817. Return an array with the elements of `self` left-justified in a
  1818. string of length `width`.
  1819. See also
  1820. --------
  1821. char.ljust
  1822. """
  1823. return asarray(ljust(self, width, fillchar))
  1824. def lower(self):
  1825. """
  1826. Return an array with the elements of `self` converted to
  1827. lowercase.
  1828. See also
  1829. --------
  1830. char.lower
  1831. """
  1832. return asarray(lower(self))
  1833. def lstrip(self, chars=None):
  1834. """
  1835. For each element in `self`, return a copy with the leading characters
  1836. removed.
  1837. See also
  1838. --------
  1839. char.lstrip
  1840. """
  1841. return asarray(lstrip(self, chars))
  1842. def partition(self, sep):
  1843. """
  1844. Partition each element in `self` around `sep`.
  1845. See also
  1846. --------
  1847. partition
  1848. """
  1849. return asarray(partition(self, sep))
  1850. def replace(self, old, new, count=None):
  1851. """
  1852. For each element in `self`, return a copy of the string with all
  1853. occurrences of substring `old` replaced by `new`.
  1854. See also
  1855. --------
  1856. char.replace
  1857. """
  1858. return asarray(replace(self, old, new, count))
  1859. def rfind(self, sub, start=0, end=None):
  1860. """
  1861. For each element in `self`, return the highest index in the string
  1862. where substring `sub` is found, such that `sub` is contained
  1863. within [`start`, `end`].
  1864. See also
  1865. --------
  1866. char.rfind
  1867. """
  1868. return rfind(self, sub, start, end)
  1869. def rindex(self, sub, start=0, end=None):
  1870. """
  1871. Like `rfind`, but raises `ValueError` when the substring `sub` is
  1872. not found.
  1873. See also
  1874. --------
  1875. char.rindex
  1876. """
  1877. return rindex(self, sub, start, end)
  1878. def rjust(self, width, fillchar=' '):
  1879. """
  1880. Return an array with the elements of `self`
  1881. right-justified in a string of length `width`.
  1882. See also
  1883. --------
  1884. char.rjust
  1885. """
  1886. return asarray(rjust(self, width, fillchar))
  1887. def rpartition(self, sep):
  1888. """
  1889. Partition each element in `self` around `sep`.
  1890. See also
  1891. --------
  1892. rpartition
  1893. """
  1894. return asarray(rpartition(self, sep))
  1895. def rsplit(self, sep=None, maxsplit=None):
  1896. """
  1897. For each element in `self`, return a list of the words in
  1898. the string, using `sep` as the delimiter string.
  1899. See also
  1900. --------
  1901. char.rsplit
  1902. """
  1903. return rsplit(self, sep, maxsplit)
  1904. def rstrip(self, chars=None):
  1905. """
  1906. For each element in `self`, return a copy with the trailing
  1907. characters removed.
  1908. See also
  1909. --------
  1910. char.rstrip
  1911. """
  1912. return asarray(rstrip(self, chars))
  1913. def split(self, sep=None, maxsplit=None):
  1914. """
  1915. For each element in `self`, return a list of the words in the
  1916. string, using `sep` as the delimiter string.
  1917. See also
  1918. --------
  1919. char.split
  1920. """
  1921. return split(self, sep, maxsplit)
  1922. def splitlines(self, keepends=None):
  1923. """
  1924. For each element in `self`, return a list of the lines in the
  1925. element, breaking at line boundaries.
  1926. See also
  1927. --------
  1928. char.splitlines
  1929. """
  1930. return splitlines(self, keepends)
  1931. def startswith(self, prefix, start=0, end=None):
  1932. """
  1933. Returns a boolean array which is `True` where the string element
  1934. in `self` starts with `prefix`, otherwise `False`.
  1935. See also
  1936. --------
  1937. char.startswith
  1938. """
  1939. return startswith(self, prefix, start, end)
  1940. def strip(self, chars=None):
  1941. """
  1942. For each element in `self`, return a copy with the leading and
  1943. trailing characters removed.
  1944. See also
  1945. --------
  1946. char.strip
  1947. """
  1948. return asarray(strip(self, chars))
  1949. def swapcase(self):
  1950. """
  1951. For each element in `self`, return a copy of the string with
  1952. uppercase characters converted to lowercase and vice versa.
  1953. See also
  1954. --------
  1955. char.swapcase
  1956. """
  1957. return asarray(swapcase(self))
  1958. def title(self):
  1959. """
  1960. For each element in `self`, return a titlecased version of the
  1961. string: words start with uppercase characters, all remaining cased
  1962. characters are lowercase.
  1963. See also
  1964. --------
  1965. char.title
  1966. """
  1967. return asarray(title(self))
  1968. def translate(self, table, deletechars=None):
  1969. """
  1970. For each element in `self`, return a copy of the string where
  1971. all characters occurring in the optional argument
  1972. `deletechars` are removed, and the remaining characters have
  1973. been mapped through the given translation table.
  1974. See also
  1975. --------
  1976. char.translate
  1977. """
  1978. return asarray(translate(self, table, deletechars))
  1979. def upper(self):
  1980. """
  1981. Return an array with the elements of `self` converted to
  1982. uppercase.
  1983. See also
  1984. --------
  1985. char.upper
  1986. """
  1987. return asarray(upper(self))
  1988. def zfill(self, width):
  1989. """
  1990. Return the numeric string left-filled with zeros in a string of
  1991. length `width`.
  1992. See also
  1993. --------
  1994. char.zfill
  1995. """
  1996. return asarray(zfill(self, width))
  1997. def isnumeric(self):
  1998. """
  1999. For each element in `self`, return True if there are only
  2000. numeric characters in the element.
  2001. See also
  2002. --------
  2003. char.isnumeric
  2004. """
  2005. return isnumeric(self)
  2006. def isdecimal(self):
  2007. """
  2008. For each element in `self`, return True if there are only
  2009. decimal characters in the element.
  2010. See also
  2011. --------
  2012. char.isdecimal
  2013. """
  2014. return isdecimal(self)
  2015. def array(obj, itemsize=None, copy=True, unicode=None, order=None):
  2016. """
  2017. Create a `chararray`.
  2018. .. note::
  2019. This class is provided for numarray backward-compatibility.
  2020. New code (not concerned with numarray compatibility) should use
  2021. arrays of type `string_` or `unicode_` and use the free functions
  2022. in :mod:`numpy.char <numpy.core.defchararray>` for fast
  2023. vectorized string operations instead.
  2024. Versus a regular NumPy array of type `str` or `unicode`, this
  2025. class adds the following functionality:
  2026. 1) values automatically have whitespace removed from the end
  2027. when indexed
  2028. 2) comparison operators automatically remove whitespace from the
  2029. end when comparing values
  2030. 3) vectorized string operations are provided as methods
  2031. (e.g. `str.endswith`) and infix operators (e.g. ``+, *, %``)
  2032. Parameters
  2033. ----------
  2034. obj : array of str or unicode-like
  2035. itemsize : int, optional
  2036. `itemsize` is the number of characters per scalar in the
  2037. resulting array. If `itemsize` is None, and `obj` is an
  2038. object array or a Python list, the `itemsize` will be
  2039. automatically determined. If `itemsize` is provided and `obj`
  2040. is of type str or unicode, then the `obj` string will be
  2041. chunked into `itemsize` pieces.
  2042. copy : bool, optional
  2043. If true (default), then the object is copied. Otherwise, a copy
  2044. will only be made if __array__ returns a copy, if obj is a
  2045. nested sequence, or if a copy is needed to satisfy any of the other
  2046. requirements (`itemsize`, unicode, `order`, etc.).
  2047. unicode : bool, optional
  2048. When true, the resulting `chararray` can contain Unicode
  2049. characters, when false only 8-bit characters. If unicode is
  2050. `None` and `obj` is one of the following:
  2051. - a `chararray`,
  2052. - an ndarray of type `str` or `unicode`
  2053. - a Python str or unicode object,
  2054. then the unicode setting of the output array will be
  2055. automatically determined.
  2056. order : {'C', 'F', 'A'}, optional
  2057. Specify the order of the array. If order is 'C' (default), then the
  2058. array will be in C-contiguous order (last-index varies the
  2059. fastest). If order is 'F', then the returned array
  2060. will be in Fortran-contiguous order (first-index varies the
  2061. fastest). If order is 'A', then the returned array may
  2062. be in any order (either C-, Fortran-contiguous, or even
  2063. discontiguous).
  2064. """
  2065. if isinstance(obj, (_bytes, _unicode)):
  2066. if unicode is None:
  2067. if isinstance(obj, _unicode):
  2068. unicode = True
  2069. else:
  2070. unicode = False
  2071. if itemsize is None:
  2072. itemsize = _len(obj)
  2073. shape = _len(obj) // itemsize
  2074. if unicode:
  2075. if sys.maxunicode == 0xffff:
  2076. # On a narrow Python build, the buffer for Unicode
  2077. # strings is UCS2, which doesn't match the buffer for
  2078. # NumPy Unicode types, which is ALWAYS UCS4.
  2079. # Therefore, we need to convert the buffer. On Python
  2080. # 2.6 and later, we can use the utf_32 codec. Earlier
  2081. # versions don't have that codec, so we convert to a
  2082. # numerical array that matches the input buffer, and
  2083. # then use NumPy to convert it to UCS4. All of this
  2084. # should happen in native endianness.
  2085. obj = obj.encode('utf_32')
  2086. else:
  2087. obj = _unicode(obj)
  2088. else:
  2089. # Let the default Unicode -> string encoding (if any) take
  2090. # precedence.
  2091. obj = _bytes(obj)
  2092. return chararray(shape, itemsize=itemsize, unicode=unicode,
  2093. buffer=obj, order=order)
  2094. if isinstance(obj, (list, tuple)):
  2095. obj = numpy.asarray(obj)
  2096. if isinstance(obj, ndarray) and issubclass(obj.dtype.type, character):
  2097. # If we just have a vanilla chararray, create a chararray
  2098. # view around it.
  2099. if not isinstance(obj, chararray):
  2100. obj = obj.view(chararray)
  2101. if itemsize is None:
  2102. itemsize = obj.itemsize
  2103. # itemsize is in 8-bit chars, so for Unicode, we need
  2104. # to divide by the size of a single Unicode character,
  2105. # which for NumPy is always 4
  2106. if issubclass(obj.dtype.type, unicode_):
  2107. itemsize //= 4
  2108. if unicode is None:
  2109. if issubclass(obj.dtype.type, unicode_):
  2110. unicode = True
  2111. else:
  2112. unicode = False
  2113. if unicode:
  2114. dtype = unicode_
  2115. else:
  2116. dtype = string_
  2117. if order is not None:
  2118. obj = numpy.asarray(obj, order=order)
  2119. if (copy or
  2120. (itemsize != obj.itemsize) or
  2121. (not unicode and isinstance(obj, unicode_)) or
  2122. (unicode and isinstance(obj, string_))):
  2123. obj = obj.astype((dtype, long(itemsize)))
  2124. return obj
  2125. if isinstance(obj, ndarray) and issubclass(obj.dtype.type, object):
  2126. if itemsize is None:
  2127. # Since no itemsize was specified, convert the input array to
  2128. # a list so the ndarray constructor will automatically
  2129. # determine the itemsize for us.
  2130. obj = obj.tolist()
  2131. # Fall through to the default case
  2132. if unicode:
  2133. dtype = unicode_
  2134. else:
  2135. dtype = string_
  2136. if itemsize is None:
  2137. val = narray(obj, dtype=dtype, order=order, subok=True)
  2138. else:
  2139. val = narray(obj, dtype=(dtype, itemsize), order=order, subok=True)
  2140. return val.view(chararray)
  2141. def asarray(obj, itemsize=None, unicode=None, order=None):
  2142. """
  2143. Convert the input to a `chararray`, copying the data only if
  2144. necessary.
  2145. Versus a regular NumPy array of type `str` or `unicode`, this
  2146. class adds the following functionality:
  2147. 1) values automatically have whitespace removed from the end
  2148. when indexed
  2149. 2) comparison operators automatically remove whitespace from the
  2150. end when comparing values
  2151. 3) vectorized string operations are provided as methods
  2152. (e.g. `str.endswith`) and infix operators (e.g. ``+``, ``*``,``%``)
  2153. Parameters
  2154. ----------
  2155. obj : array of str or unicode-like
  2156. itemsize : int, optional
  2157. `itemsize` is the number of characters per scalar in the
  2158. resulting array. If `itemsize` is None, and `obj` is an
  2159. object array or a Python list, the `itemsize` will be
  2160. automatically determined. If `itemsize` is provided and `obj`
  2161. is of type str or unicode, then the `obj` string will be
  2162. chunked into `itemsize` pieces.
  2163. unicode : bool, optional
  2164. When true, the resulting `chararray` can contain Unicode
  2165. characters, when false only 8-bit characters. If unicode is
  2166. `None` and `obj` is one of the following:
  2167. - a `chararray`,
  2168. - an ndarray of type `str` or 'unicode`
  2169. - a Python str or unicode object,
  2170. then the unicode setting of the output array will be
  2171. automatically determined.
  2172. order : {'C', 'F'}, optional
  2173. Specify the order of the array. If order is 'C' (default), then the
  2174. array will be in C-contiguous order (last-index varies the
  2175. fastest). If order is 'F', then the returned array
  2176. will be in Fortran-contiguous order (first-index varies the
  2177. fastest).
  2178. """
  2179. return array(obj, itemsize, copy=False,
  2180. unicode=unicode, order=order)