series.py 140 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394
  1. """
  2. Data structure for 1-dimensional cross-sectional and time series data
  3. """
  4. from __future__ import division
  5. from textwrap import dedent
  6. import warnings
  7. import numpy as np
  8. from pandas._libs import iNaT, index as libindex, lib, tslibs
  9. import pandas.compat as compat
  10. from pandas.compat import PY36, OrderedDict, StringIO, u, zip
  11. from pandas.compat.numpy import function as nv
  12. from pandas.util._decorators import Appender, Substitution, deprecate
  13. from pandas.util._validators import validate_bool_kwarg
  14. from pandas.core.dtypes.common import (
  15. _is_unorderable_exception, ensure_platform_int, is_bool,
  16. is_categorical_dtype, is_datetime64_dtype, is_datetimelike, is_dict_like,
  17. is_extension_array_dtype, is_extension_type, is_hashable, is_integer,
  18. is_iterator, is_list_like, is_scalar, is_string_like, is_timedelta64_dtype)
  19. from pandas.core.dtypes.generic import (
  20. ABCDataFrame, ABCDatetimeArray, ABCDatetimeIndex, ABCSeries,
  21. ABCSparseArray, ABCSparseSeries)
  22. from pandas.core.dtypes.missing import (
  23. isna, na_value_for_dtype, notna, remove_na_arraylike)
  24. from pandas.core import algorithms, base, generic, nanops, ops
  25. from pandas.core.accessor import CachedAccessor
  26. from pandas.core.arrays import ExtensionArray, SparseArray
  27. from pandas.core.arrays.categorical import Categorical, CategoricalAccessor
  28. from pandas.core.arrays.sparse import SparseAccessor
  29. import pandas.core.common as com
  30. from pandas.core.config import get_option
  31. from pandas.core.index import (
  32. Float64Index, Index, InvalidIndexError, MultiIndex, ensure_index)
  33. from pandas.core.indexes.accessors import CombinedDatetimelikeProperties
  34. import pandas.core.indexes.base as ibase
  35. from pandas.core.indexes.datetimes import DatetimeIndex
  36. from pandas.core.indexes.period import PeriodIndex
  37. from pandas.core.indexes.timedeltas import TimedeltaIndex
  38. from pandas.core.indexing import check_bool_indexer, maybe_convert_indices
  39. from pandas.core.internals import SingleBlockManager
  40. from pandas.core.internals.construction import sanitize_array
  41. from pandas.core.strings import StringMethods
  42. from pandas.core.tools.datetimes import to_datetime
  43. import pandas.io.formats.format as fmt
  44. from pandas.io.formats.terminal import get_terminal_size
  45. import pandas.plotting._core as gfx
  46. # pylint: disable=E1101,E1103
  47. # pylint: disable=W0703,W0622,W0613,W0201
  48. __all__ = ['Series']
  49. _shared_doc_kwargs = dict(
  50. axes='index', klass='Series', axes_single_arg="{0 or 'index'}",
  51. axis="""axis : {0 or 'index'}
  52. Parameter needed for compatibility with DataFrame.""",
  53. inplace="""inplace : boolean, default False
  54. If True, performs operation inplace and returns None.""",
  55. unique='np.ndarray', duplicated='Series',
  56. optional_by='', optional_mapper='', optional_labels='', optional_axis='',
  57. versionadded_to_excel='\n .. versionadded:: 0.20.0\n')
  58. # see gh-16971
  59. def remove_na(arr):
  60. """
  61. Remove null values from array like structure.
  62. .. deprecated:: 0.21.0
  63. Use s[s.notnull()] instead.
  64. """
  65. warnings.warn("remove_na is deprecated and is a private "
  66. "function. Do not use.", FutureWarning, stacklevel=2)
  67. return remove_na_arraylike(arr)
  68. def _coerce_method(converter):
  69. """
  70. Install the scalar coercion methods.
  71. """
  72. def wrapper(self):
  73. if len(self) == 1:
  74. return converter(self.iloc[0])
  75. raise TypeError("cannot convert the series to "
  76. "{0}".format(str(converter)))
  77. wrapper.__name__ = "__{name}__".format(name=converter.__name__)
  78. return wrapper
  79. # ----------------------------------------------------------------------
  80. # Series class
  81. class Series(base.IndexOpsMixin, generic.NDFrame):
  82. """
  83. One-dimensional ndarray with axis labels (including time series).
  84. Labels need not be unique but must be a hashable type. The object
  85. supports both integer- and label-based indexing and provides a host of
  86. methods for performing operations involving the index. Statistical
  87. methods from ndarray have been overridden to automatically exclude
  88. missing data (currently represented as NaN).
  89. Operations between Series (+, -, /, *, **) align values based on their
  90. associated index values-- they need not be the same length. The result
  91. index will be the sorted union of the two indexes.
  92. Parameters
  93. ----------
  94. data : array-like, Iterable, dict, or scalar value
  95. Contains data stored in Series.
  96. .. versionchanged :: 0.23.0
  97. If data is a dict, argument order is maintained for Python 3.6
  98. and later.
  99. index : array-like or Index (1d)
  100. Values must be hashable and have the same length as `data`.
  101. Non-unique index values are allowed. Will default to
  102. RangeIndex (0, 1, 2, ..., n) if not provided. If both a dict and index
  103. sequence are used, the index will override the keys found in the
  104. dict.
  105. dtype : str, numpy.dtype, or ExtensionDtype, optional
  106. dtype for the output Series. If not specified, this will be
  107. inferred from `data`.
  108. See the :ref:`user guide <basics.dtypes>` for more usages.
  109. copy : bool, default False
  110. Copy input data.
  111. """
  112. _metadata = ['name']
  113. _accessors = {'dt', 'cat', 'str', 'sparse'}
  114. # tolist is not actually deprecated, just suppressed in the __dir__
  115. _deprecations = generic.NDFrame._deprecations | frozenset(
  116. ['asobject', 'reshape', 'get_value', 'set_value',
  117. 'from_csv', 'valid', 'tolist'])
  118. # Override cache_readonly bc Series is mutable
  119. hasnans = property(base.IndexOpsMixin.hasnans.func,
  120. doc=base.IndexOpsMixin.hasnans.__doc__)
  121. # ----------------------------------------------------------------------
  122. # Constructors
  123. def __init__(self, data=None, index=None, dtype=None, name=None,
  124. copy=False, fastpath=False):
  125. # we are called internally, so short-circuit
  126. if fastpath:
  127. # data is an ndarray, index is defined
  128. if not isinstance(data, SingleBlockManager):
  129. data = SingleBlockManager(data, index, fastpath=True)
  130. if copy:
  131. data = data.copy()
  132. if index is None:
  133. index = data.index
  134. else:
  135. if index is not None:
  136. index = ensure_index(index)
  137. if data is None:
  138. data = {}
  139. if dtype is not None:
  140. dtype = self._validate_dtype(dtype)
  141. if isinstance(data, MultiIndex):
  142. raise NotImplementedError("initializing a Series from a "
  143. "MultiIndex is not supported")
  144. elif isinstance(data, Index):
  145. if name is None:
  146. name = data.name
  147. if dtype is not None:
  148. # astype copies
  149. data = data.astype(dtype)
  150. else:
  151. # need to copy to avoid aliasing issues
  152. data = data._values.copy()
  153. if (isinstance(data, ABCDatetimeIndex) and
  154. data.tz is not None):
  155. # GH#24096 need copy to be deep for datetime64tz case
  156. # TODO: See if we can avoid these copies
  157. data = data._values.copy(deep=True)
  158. copy = False
  159. elif isinstance(data, np.ndarray):
  160. pass
  161. elif isinstance(data, (ABCSeries, ABCSparseSeries)):
  162. if name is None:
  163. name = data.name
  164. if index is None:
  165. index = data.index
  166. else:
  167. data = data.reindex(index, copy=copy)
  168. data = data._data
  169. elif isinstance(data, dict):
  170. data, index = self._init_dict(data, index, dtype)
  171. dtype = None
  172. copy = False
  173. elif isinstance(data, SingleBlockManager):
  174. if index is None:
  175. index = data.index
  176. elif not data.index.equals(index) or copy:
  177. # GH#19275 SingleBlockManager input should only be called
  178. # internally
  179. raise AssertionError('Cannot pass both SingleBlockManager '
  180. '`data` argument and a different '
  181. '`index` argument. `copy` must '
  182. 'be False.')
  183. elif is_extension_array_dtype(data):
  184. pass
  185. elif isinstance(data, (set, frozenset)):
  186. raise TypeError("{0!r} type is unordered"
  187. "".format(data.__class__.__name__))
  188. # If data is Iterable but not list-like, consume into list.
  189. elif (isinstance(data, compat.Iterable)
  190. and not isinstance(data, compat.Sized)):
  191. data = list(data)
  192. else:
  193. # handle sparse passed here (and force conversion)
  194. if isinstance(data, ABCSparseArray):
  195. data = data.to_dense()
  196. if index is None:
  197. if not is_list_like(data):
  198. data = [data]
  199. index = ibase.default_index(len(data))
  200. elif is_list_like(data):
  201. # a scalar numpy array is list-like but doesn't
  202. # have a proper length
  203. try:
  204. if len(index) != len(data):
  205. raise ValueError(
  206. 'Length of passed values is {val}, '
  207. 'index implies {ind}'
  208. .format(val=len(data), ind=len(index)))
  209. except TypeError:
  210. pass
  211. # create/copy the manager
  212. if isinstance(data, SingleBlockManager):
  213. if dtype is not None:
  214. data = data.astype(dtype=dtype, errors='ignore',
  215. copy=copy)
  216. elif copy:
  217. data = data.copy()
  218. else:
  219. data = sanitize_array(data, index, dtype, copy,
  220. raise_cast_failure=True)
  221. data = SingleBlockManager(data, index, fastpath=True)
  222. generic.NDFrame.__init__(self, data, fastpath=True)
  223. self.name = name
  224. self._set_axis(0, index, fastpath=True)
  225. def _init_dict(self, data, index=None, dtype=None):
  226. """
  227. Derive the "_data" and "index" attributes of a new Series from a
  228. dictionary input.
  229. Parameters
  230. ----------
  231. data : dict or dict-like
  232. Data used to populate the new Series
  233. index : Index or index-like, default None
  234. index for the new Series: if None, use dict keys
  235. dtype : dtype, default None
  236. dtype for the new Series: if None, infer from data
  237. Returns
  238. -------
  239. _data : BlockManager for the new Series
  240. index : index for the new Series
  241. """
  242. # Looking for NaN in dict doesn't work ({np.nan : 1}[float('nan')]
  243. # raises KeyError), so we iterate the entire dict, and align
  244. if data:
  245. keys, values = zip(*compat.iteritems(data))
  246. values = list(values)
  247. elif index is not None:
  248. # fastpath for Series(data=None). Just use broadcasting a scalar
  249. # instead of reindexing.
  250. values = na_value_for_dtype(dtype)
  251. keys = index
  252. else:
  253. keys, values = [], []
  254. # Input is now list-like, so rely on "standard" construction:
  255. s = Series(values, index=keys, dtype=dtype)
  256. # Now we just make sure the order is respected, if any
  257. if data and index is not None:
  258. s = s.reindex(index, copy=False)
  259. elif not PY36 and not isinstance(data, OrderedDict) and data:
  260. # Need the `and data` to avoid sorting Series(None, index=[...])
  261. # since that isn't really dict-like
  262. try:
  263. s = s.sort_index()
  264. except TypeError:
  265. pass
  266. return s._data, s.index
  267. @classmethod
  268. def from_array(cls, arr, index=None, name=None, dtype=None, copy=False,
  269. fastpath=False):
  270. """
  271. Construct Series from array.
  272. .. deprecated :: 0.23.0
  273. Use pd.Series(..) constructor instead.
  274. """
  275. warnings.warn("'from_array' is deprecated and will be removed in a "
  276. "future version. Please use the pd.Series(..) "
  277. "constructor instead.", FutureWarning, stacklevel=2)
  278. if isinstance(arr, ABCSparseArray):
  279. from pandas.core.sparse.series import SparseSeries
  280. cls = SparseSeries
  281. return cls(arr, index=index, name=name, dtype=dtype,
  282. copy=copy, fastpath=fastpath)
  283. # ----------------------------------------------------------------------
  284. @property
  285. def _constructor(self):
  286. return Series
  287. @property
  288. def _constructor_expanddim(self):
  289. from pandas.core.frame import DataFrame
  290. return DataFrame
  291. # types
  292. @property
  293. def _can_hold_na(self):
  294. return self._data._can_hold_na
  295. _index = None
  296. def _set_axis(self, axis, labels, fastpath=False):
  297. """
  298. Override generic, we want to set the _typ here.
  299. """
  300. if not fastpath:
  301. labels = ensure_index(labels)
  302. is_all_dates = labels.is_all_dates
  303. if is_all_dates:
  304. if not isinstance(labels,
  305. (DatetimeIndex, PeriodIndex, TimedeltaIndex)):
  306. try:
  307. labels = DatetimeIndex(labels)
  308. # need to set here because we changed the index
  309. if fastpath:
  310. self._data.set_axis(axis, labels)
  311. except (tslibs.OutOfBoundsDatetime, ValueError):
  312. # labels may exceeds datetime bounds,
  313. # or not be a DatetimeIndex
  314. pass
  315. self._set_subtyp(is_all_dates)
  316. object.__setattr__(self, '_index', labels)
  317. if not fastpath:
  318. self._data.set_axis(axis, labels)
  319. def _set_subtyp(self, is_all_dates):
  320. if is_all_dates:
  321. object.__setattr__(self, '_subtyp', 'time_series')
  322. else:
  323. object.__setattr__(self, '_subtyp', 'series')
  324. def _update_inplace(self, result, **kwargs):
  325. # we want to call the generic version and not the IndexOpsMixin
  326. return generic.NDFrame._update_inplace(self, result, **kwargs)
  327. @property
  328. def name(self):
  329. """
  330. Return name of the Series.
  331. """
  332. return self._name
  333. @name.setter
  334. def name(self, value):
  335. if value is not None and not is_hashable(value):
  336. raise TypeError('Series.name must be a hashable type')
  337. object.__setattr__(self, '_name', value)
  338. # ndarray compatibility
  339. @property
  340. def dtype(self):
  341. """
  342. Return the dtype object of the underlying data.
  343. """
  344. return self._data.dtype
  345. @property
  346. def dtypes(self):
  347. """
  348. Return the dtype object of the underlying data.
  349. """
  350. return self._data.dtype
  351. @property
  352. def ftype(self):
  353. """
  354. Return if the data is sparse|dense.
  355. """
  356. return self._data.ftype
  357. @property
  358. def ftypes(self):
  359. """
  360. Return if the data is sparse|dense.
  361. """
  362. return self._data.ftype
  363. @property
  364. def values(self):
  365. """
  366. Return Series as ndarray or ndarray-like depending on the dtype.
  367. .. warning::
  368. We recommend using :attr:`Series.array` or
  369. :meth:`Series.to_numpy`, depending on whether you need
  370. a reference to the underlying data or a NumPy array.
  371. Returns
  372. -------
  373. arr : numpy.ndarray or ndarray-like
  374. See Also
  375. --------
  376. Series.array : Reference to the underlying data.
  377. Series.to_numpy : A NumPy array representing the underlying data.
  378. Examples
  379. --------
  380. >>> pd.Series([1, 2, 3]).values
  381. array([1, 2, 3])
  382. >>> pd.Series(list('aabc')).values
  383. array(['a', 'a', 'b', 'c'], dtype=object)
  384. >>> pd.Series(list('aabc')).astype('category').values
  385. [a, a, b, c]
  386. Categories (3, object): [a, b, c]
  387. Timezone aware datetime data is converted to UTC:
  388. >>> pd.Series(pd.date_range('20130101', periods=3,
  389. ... tz='US/Eastern')).values
  390. array(['2013-01-01T05:00:00.000000000',
  391. '2013-01-02T05:00:00.000000000',
  392. '2013-01-03T05:00:00.000000000'], dtype='datetime64[ns]')
  393. """
  394. return self._data.external_values()
  395. @property
  396. def _values(self):
  397. """
  398. Return the internal repr of this data.
  399. """
  400. return self._data.internal_values()
  401. def _formatting_values(self):
  402. """
  403. Return the values that can be formatted (used by SeriesFormatter
  404. and DataFrameFormatter).
  405. """
  406. return self._data.formatting_values()
  407. def get_values(self):
  408. """
  409. Same as values (but handles sparseness conversions); is a view.
  410. """
  411. return self._data.get_values()
  412. @property
  413. def asobject(self):
  414. """
  415. Return object Series which contains boxed values.
  416. .. deprecated :: 0.23.0
  417. Use ``astype(object)`` instead.
  418. *this is an internal non-public method*
  419. """
  420. warnings.warn("'asobject' is deprecated. Use 'astype(object)'"
  421. " instead", FutureWarning, stacklevel=2)
  422. return self.astype(object).values
  423. # ops
  424. def ravel(self, order='C'):
  425. """
  426. Return the flattened underlying data as an ndarray.
  427. See Also
  428. --------
  429. numpy.ndarray.ravel
  430. """
  431. return self._values.ravel(order=order)
  432. def compress(self, condition, *args, **kwargs):
  433. """
  434. Return selected slices of an array along given axis as a Series.
  435. .. deprecated:: 0.24.0
  436. See Also
  437. --------
  438. numpy.ndarray.compress
  439. """
  440. msg = ("Series.compress(condition) is deprecated. "
  441. "Use 'Series[condition]' or "
  442. "'np.asarray(series).compress(condition)' instead.")
  443. warnings.warn(msg, FutureWarning, stacklevel=2)
  444. nv.validate_compress(args, kwargs)
  445. return self[condition]
  446. def nonzero(self):
  447. """
  448. Return the *integer* indices of the elements that are non-zero.
  449. .. deprecated:: 0.24.0
  450. Please use .to_numpy().nonzero() as a replacement.
  451. This method is equivalent to calling `numpy.nonzero` on the
  452. series data. For compatibility with NumPy, the return value is
  453. the same (a tuple with an array of indices for each dimension),
  454. but it will always be a one-item tuple because series only have
  455. one dimension.
  456. See Also
  457. --------
  458. numpy.nonzero
  459. Examples
  460. --------
  461. >>> s = pd.Series([0, 3, 0, 4])
  462. >>> s.nonzero()
  463. (array([1, 3]),)
  464. >>> s.iloc[s.nonzero()[0]]
  465. 1 3
  466. 3 4
  467. dtype: int64
  468. >>> s = pd.Series([0, 3, 0, 4], index=['a', 'b', 'c', 'd'])
  469. # same return although index of s is different
  470. >>> s.nonzero()
  471. (array([1, 3]),)
  472. >>> s.iloc[s.nonzero()[0]]
  473. b 3
  474. d 4
  475. dtype: int64
  476. """
  477. msg = ("Series.nonzero() is deprecated "
  478. "and will be removed in a future version."
  479. "Use Series.to_numpy().nonzero() instead")
  480. warnings.warn(msg, FutureWarning, stacklevel=2)
  481. return self._values.nonzero()
  482. def put(self, *args, **kwargs):
  483. """
  484. Applies the `put` method to its `values` attribute if it has one.
  485. See Also
  486. --------
  487. numpy.ndarray.put
  488. """
  489. self._values.put(*args, **kwargs)
  490. def __len__(self):
  491. """
  492. Return the length of the Series.
  493. """
  494. return len(self._data)
  495. def view(self, dtype=None):
  496. """
  497. Create a new view of the Series.
  498. This function will return a new Series with a view of the same
  499. underlying values in memory, optionally reinterpreted with a new data
  500. type. The new data type must preserve the same size in bytes as to not
  501. cause index misalignment.
  502. Parameters
  503. ----------
  504. dtype : data type
  505. Data type object or one of their string representations.
  506. Returns
  507. -------
  508. Series
  509. A new Series object as a view of the same data in memory.
  510. See Also
  511. --------
  512. numpy.ndarray.view : Equivalent numpy function to create a new view of
  513. the same data in memory.
  514. Notes
  515. -----
  516. Series are instantiated with ``dtype=float64`` by default. While
  517. ``numpy.ndarray.view()`` will return a view with the same data type as
  518. the original array, ``Series.view()`` (without specified dtype)
  519. will try using ``float64`` and may fail if the original data type size
  520. in bytes is not the same.
  521. Examples
  522. --------
  523. >>> s = pd.Series([-2, -1, 0, 1, 2], dtype='int8')
  524. >>> s
  525. 0 -2
  526. 1 -1
  527. 2 0
  528. 3 1
  529. 4 2
  530. dtype: int8
  531. The 8 bit signed integer representation of `-1` is `0b11111111`, but
  532. the same bytes represent 255 if read as an 8 bit unsigned integer:
  533. >>> us = s.view('uint8')
  534. >>> us
  535. 0 254
  536. 1 255
  537. 2 0
  538. 3 1
  539. 4 2
  540. dtype: uint8
  541. The views share the same underlying values:
  542. >>> us[0] = 128
  543. >>> s
  544. 0 -128
  545. 1 -1
  546. 2 0
  547. 3 1
  548. 4 2
  549. dtype: int8
  550. """
  551. return self._constructor(self._values.view(dtype),
  552. index=self.index).__finalize__(self)
  553. # ----------------------------------------------------------------------
  554. # NDArray Compat
  555. def __array__(self, dtype=None):
  556. """
  557. Return the values as a NumPy array.
  558. Users should not call this directly. Rather, it is invoked by
  559. :func:`numpy.array` and :func:`numpy.asarray`.
  560. Parameters
  561. ----------
  562. dtype : str or numpy.dtype, optional
  563. The dtype to use for the resulting NumPy array. By default,
  564. the dtype is inferred from the data.
  565. Returns
  566. -------
  567. numpy.ndarray
  568. The values in the series converted to a :class:`numpy.ndarary`
  569. with the specified `dtype`.
  570. See Also
  571. --------
  572. pandas.array : Create a new array from data.
  573. Series.array : Zero-copy view to the array backing the Series.
  574. Series.to_numpy : Series method for similar behavior.
  575. Examples
  576. --------
  577. >>> ser = pd.Series([1, 2, 3])
  578. >>> np.asarray(ser)
  579. array([1, 2, 3])
  580. For timezone-aware data, the timezones may be retained with
  581. ``dtype='object'``
  582. >>> tzser = pd.Series(pd.date_range('2000', periods=2, tz="CET"))
  583. >>> np.asarray(tzser, dtype="object")
  584. array([Timestamp('2000-01-01 00:00:00+0100', tz='CET', freq='D'),
  585. Timestamp('2000-01-02 00:00:00+0100', tz='CET', freq='D')],
  586. dtype=object)
  587. Or the values may be localized to UTC and the tzinfo discared with
  588. ``dtype='datetime64[ns]'``
  589. >>> np.asarray(tzser, dtype="datetime64[ns]") # doctest: +ELLIPSIS
  590. array(['1999-12-31T23:00:00.000000000', ...],
  591. dtype='datetime64[ns]')
  592. """
  593. if (dtype is None and isinstance(self.array, ABCDatetimeArray)
  594. and getattr(self.dtype, 'tz', None)):
  595. msg = (
  596. "Converting timezone-aware DatetimeArray to timezone-naive "
  597. "ndarray with 'datetime64[ns]' dtype. In the future, this "
  598. "will return an ndarray with 'object' dtype where each "
  599. "element is a 'pandas.Timestamp' with the correct 'tz'.\n\t"
  600. "To accept the future behavior, pass 'dtype=object'.\n\t"
  601. "To keep the old behavior, pass 'dtype=\"datetime64[ns]\"'."
  602. )
  603. warnings.warn(msg, FutureWarning, stacklevel=3)
  604. dtype = 'M8[ns]'
  605. return np.asarray(self.array, dtype)
  606. def __array_wrap__(self, result, context=None):
  607. """
  608. Gets called after a ufunc.
  609. """
  610. return self._constructor(result, index=self.index,
  611. copy=False).__finalize__(self)
  612. def __array_prepare__(self, result, context=None):
  613. """
  614. Gets called prior to a ufunc.
  615. """
  616. # nice error message for non-ufunc types
  617. if (context is not None and
  618. (not isinstance(self._values, (np.ndarray, ExtensionArray))
  619. or isinstance(self._values, Categorical))):
  620. obj = context[1][0]
  621. raise TypeError("{obj} with dtype {dtype} cannot perform "
  622. "the numpy op {op}".format(
  623. obj=type(obj).__name__,
  624. dtype=getattr(obj, 'dtype', None),
  625. op=context[0].__name__))
  626. return result
  627. # ----------------------------------------------------------------------
  628. # Unary Methods
  629. @property
  630. def real(self):
  631. """
  632. Return the real value of vector.
  633. """
  634. return self.values.real
  635. @real.setter
  636. def real(self, v):
  637. self.values.real = v
  638. @property
  639. def imag(self):
  640. """
  641. Return imag value of vector.
  642. """
  643. return self.values.imag
  644. @imag.setter
  645. def imag(self, v):
  646. self.values.imag = v
  647. # coercion
  648. __float__ = _coerce_method(float)
  649. __long__ = _coerce_method(int)
  650. __int__ = _coerce_method(int)
  651. # ----------------------------------------------------------------------
  652. def _unpickle_series_compat(self, state):
  653. if isinstance(state, dict):
  654. self._data = state['_data']
  655. self.name = state['name']
  656. self.index = self._data.index
  657. elif isinstance(state, tuple):
  658. # < 0.12 series pickle
  659. nd_state, own_state = state
  660. # recreate the ndarray
  661. data = np.empty(nd_state[1], dtype=nd_state[2])
  662. np.ndarray.__setstate__(data, nd_state)
  663. # backwards compat
  664. index, name = own_state[0], None
  665. if len(own_state) > 1:
  666. name = own_state[1]
  667. # recreate
  668. self._data = SingleBlockManager(data, index, fastpath=True)
  669. self._index = index
  670. self.name = name
  671. else:
  672. raise Exception("cannot unpickle legacy formats -> [%s]" % state)
  673. # indexers
  674. @property
  675. def axes(self):
  676. """
  677. Return a list of the row axis labels.
  678. """
  679. return [self.index]
  680. def _ixs(self, i, axis=0):
  681. """
  682. Return the i-th value or values in the Series by location.
  683. Parameters
  684. ----------
  685. i : int, slice, or sequence of integers
  686. Returns
  687. -------
  688. value : scalar (int) or Series (slice, sequence)
  689. """
  690. try:
  691. # dispatch to the values if we need
  692. values = self._values
  693. if isinstance(values, np.ndarray):
  694. return libindex.get_value_at(values, i)
  695. else:
  696. return values[i]
  697. except IndexError:
  698. raise
  699. except Exception:
  700. if isinstance(i, slice):
  701. indexer = self.index._convert_slice_indexer(i, kind='iloc')
  702. return self._get_values(indexer)
  703. else:
  704. label = self.index[i]
  705. if isinstance(label, Index):
  706. return self.take(i, axis=axis, convert=True)
  707. else:
  708. return libindex.get_value_at(self, i)
  709. @property
  710. def _is_mixed_type(self):
  711. return False
  712. def _slice(self, slobj, axis=0, kind=None):
  713. slobj = self.index._convert_slice_indexer(slobj,
  714. kind=kind or 'getitem')
  715. return self._get_values(slobj)
  716. def __getitem__(self, key):
  717. key = com.apply_if_callable(key, self)
  718. try:
  719. result = self.index.get_value(self, key)
  720. if not is_scalar(result):
  721. if is_list_like(result) and not isinstance(result, Series):
  722. # we need to box if loc of the key isn't scalar here
  723. # otherwise have inline ndarray/lists
  724. try:
  725. if not is_scalar(self.index.get_loc(key)):
  726. result = self._constructor(
  727. result, index=[key] * len(result),
  728. dtype=self.dtype).__finalize__(self)
  729. except KeyError:
  730. pass
  731. return result
  732. except InvalidIndexError:
  733. pass
  734. except (KeyError, ValueError):
  735. if isinstance(key, tuple) and isinstance(self.index, MultiIndex):
  736. # kludge
  737. pass
  738. elif key is Ellipsis:
  739. return self
  740. elif com.is_bool_indexer(key):
  741. pass
  742. else:
  743. # we can try to coerce the indexer (or this will raise)
  744. new_key = self.index._convert_scalar_indexer(key,
  745. kind='getitem')
  746. if type(new_key) != type(key):
  747. return self.__getitem__(new_key)
  748. raise
  749. except Exception:
  750. raise
  751. if is_iterator(key):
  752. key = list(key)
  753. if com.is_bool_indexer(key):
  754. key = check_bool_indexer(self.index, key)
  755. return self._get_with(key)
  756. def _get_with(self, key):
  757. # other: fancy integer or otherwise
  758. if isinstance(key, slice):
  759. indexer = self.index._convert_slice_indexer(key, kind='getitem')
  760. return self._get_values(indexer)
  761. elif isinstance(key, ABCDataFrame):
  762. raise TypeError('Indexing a Series with DataFrame is not '
  763. 'supported, use the appropriate DataFrame column')
  764. elif isinstance(key, tuple):
  765. try:
  766. return self._get_values_tuple(key)
  767. except Exception:
  768. if len(key) == 1:
  769. key = key[0]
  770. if isinstance(key, slice):
  771. return self._get_values(key)
  772. raise
  773. # pragma: no cover
  774. if not isinstance(key, (list, np.ndarray, Series, Index)):
  775. key = list(key)
  776. if isinstance(key, Index):
  777. key_type = key.inferred_type
  778. else:
  779. key_type = lib.infer_dtype(key, skipna=False)
  780. if key_type == 'integer':
  781. if self.index.is_integer() or self.index.is_floating():
  782. return self.loc[key]
  783. else:
  784. return self._get_values(key)
  785. elif key_type == 'boolean':
  786. return self._get_values(key)
  787. try:
  788. # handle the dup indexing case (GH 4246)
  789. if isinstance(key, (list, tuple)):
  790. return self.loc[key]
  791. return self.reindex(key)
  792. except Exception:
  793. # [slice(0, 5, None)] will break if you convert to ndarray,
  794. # e.g. as requested by np.median
  795. # hack
  796. if isinstance(key[0], slice):
  797. return self._get_values(key)
  798. raise
  799. def _get_values_tuple(self, key):
  800. # mpl hackaround
  801. if com._any_none(*key):
  802. return self._get_values(key)
  803. if not isinstance(self.index, MultiIndex):
  804. raise ValueError('Can only tuple-index with a MultiIndex')
  805. # If key is contained, would have returned by now
  806. indexer, new_index = self.index.get_loc_level(key)
  807. return self._constructor(self._values[indexer],
  808. index=new_index).__finalize__(self)
  809. def _get_values(self, indexer):
  810. try:
  811. return self._constructor(self._data.get_slice(indexer),
  812. fastpath=True).__finalize__(self)
  813. except Exception:
  814. return self._values[indexer]
  815. def __setitem__(self, key, value):
  816. key = com.apply_if_callable(key, self)
  817. def setitem(key, value):
  818. try:
  819. self._set_with_engine(key, value)
  820. return
  821. except com.SettingWithCopyError:
  822. raise
  823. except (KeyError, ValueError):
  824. values = self._values
  825. if (is_integer(key) and
  826. not self.index.inferred_type == 'integer'):
  827. values[key] = value
  828. return
  829. elif key is Ellipsis:
  830. self[:] = value
  831. return
  832. elif com.is_bool_indexer(key):
  833. pass
  834. elif is_timedelta64_dtype(self.dtype):
  835. # reassign a null value to iNaT
  836. if isna(value):
  837. value = iNaT
  838. try:
  839. self.index._engine.set_value(self._values, key,
  840. value)
  841. return
  842. except TypeError:
  843. pass
  844. self.loc[key] = value
  845. return
  846. except TypeError as e:
  847. if (isinstance(key, tuple) and
  848. not isinstance(self.index, MultiIndex)):
  849. raise ValueError("Can only tuple-index with a MultiIndex")
  850. # python 3 type errors should be raised
  851. if _is_unorderable_exception(e):
  852. raise IndexError(key)
  853. if com.is_bool_indexer(key):
  854. key = check_bool_indexer(self.index, key)
  855. try:
  856. self._where(~key, value, inplace=True)
  857. return
  858. except InvalidIndexError:
  859. pass
  860. self._set_with(key, value)
  861. # do the setitem
  862. cacher_needs_updating = self._check_is_chained_assignment_possible()
  863. setitem(key, value)
  864. if cacher_needs_updating:
  865. self._maybe_update_cacher()
  866. def _set_with_engine(self, key, value):
  867. values = self._values
  868. try:
  869. self.index._engine.set_value(values, key, value)
  870. return
  871. except KeyError:
  872. values[self.index.get_loc(key)] = value
  873. return
  874. def _set_with(self, key, value):
  875. # other: fancy integer or otherwise
  876. if isinstance(key, slice):
  877. indexer = self.index._convert_slice_indexer(key, kind='getitem')
  878. return self._set_values(indexer, value)
  879. else:
  880. if isinstance(key, tuple):
  881. try:
  882. self._set_values(key, value)
  883. except Exception:
  884. pass
  885. if is_scalar(key):
  886. key = [key]
  887. elif not isinstance(key, (list, Series, np.ndarray)):
  888. try:
  889. key = list(key)
  890. except Exception:
  891. key = [key]
  892. if isinstance(key, Index):
  893. key_type = key.inferred_type
  894. else:
  895. key_type = lib.infer_dtype(key, skipna=False)
  896. if key_type == 'integer':
  897. if self.index.inferred_type == 'integer':
  898. self._set_labels(key, value)
  899. else:
  900. return self._set_values(key, value)
  901. elif key_type == 'boolean':
  902. self._set_values(key.astype(np.bool_), value)
  903. else:
  904. self._set_labels(key, value)
  905. def _set_labels(self, key, value):
  906. if isinstance(key, Index):
  907. key = key.values
  908. else:
  909. key = com.asarray_tuplesafe(key)
  910. indexer = self.index.get_indexer(key)
  911. mask = indexer == -1
  912. if mask.any():
  913. raise ValueError('%s not contained in the index' % str(key[mask]))
  914. self._set_values(indexer, value)
  915. def _set_values(self, key, value):
  916. if isinstance(key, Series):
  917. key = key._values
  918. self._data = self._data.setitem(indexer=key, value=value)
  919. self._maybe_update_cacher()
  920. def repeat(self, repeats, axis=None):
  921. """
  922. Repeat elements of a Series.
  923. Returns a new Series where each element of the current Series
  924. is repeated consecutively a given number of times.
  925. Parameters
  926. ----------
  927. repeats : int or array of ints
  928. The number of repetitions for each element. This should be a
  929. non-negative integer. Repeating 0 times will return an empty
  930. Series.
  931. axis : None
  932. Must be ``None``. Has no effect but is accepted for compatibility
  933. with numpy.
  934. Returns
  935. -------
  936. repeated_series : Series
  937. Newly created Series with repeated elements.
  938. See Also
  939. --------
  940. Index.repeat : Equivalent function for Index.
  941. numpy.repeat : Similar method for :class:`numpy.ndarray`.
  942. Examples
  943. --------
  944. >>> s = pd.Series(['a', 'b', 'c'])
  945. >>> s
  946. 0 a
  947. 1 b
  948. 2 c
  949. dtype: object
  950. >>> s.repeat(2)
  951. 0 a
  952. 0 a
  953. 1 b
  954. 1 b
  955. 2 c
  956. 2 c
  957. dtype: object
  958. >>> s.repeat([1, 2, 3])
  959. 0 a
  960. 1 b
  961. 1 b
  962. 2 c
  963. 2 c
  964. 2 c
  965. dtype: object
  966. """
  967. nv.validate_repeat(tuple(), dict(axis=axis))
  968. new_index = self.index.repeat(repeats)
  969. new_values = self._values.repeat(repeats)
  970. return self._constructor(new_values,
  971. index=new_index).__finalize__(self)
  972. def get_value(self, label, takeable=False):
  973. """
  974. Quickly retrieve single value at passed index label.
  975. .. deprecated:: 0.21.0
  976. Please use .at[] or .iat[] accessors.
  977. Parameters
  978. ----------
  979. label : object
  980. takeable : interpret the index as indexers, default False
  981. Returns
  982. -------
  983. value : scalar value
  984. """
  985. warnings.warn("get_value is deprecated and will be removed "
  986. "in a future release. Please use "
  987. ".at[] or .iat[] accessors instead", FutureWarning,
  988. stacklevel=2)
  989. return self._get_value(label, takeable=takeable)
  990. def _get_value(self, label, takeable=False):
  991. if takeable is True:
  992. return com.maybe_box_datetimelike(self._values[label])
  993. return self.index.get_value(self._values, label)
  994. _get_value.__doc__ = get_value.__doc__
  995. def set_value(self, label, value, takeable=False):
  996. """
  997. Quickly set single value at passed label.
  998. .. deprecated:: 0.21.0
  999. Please use .at[] or .iat[] accessors.
  1000. If label is not contained, a new object is created with the label
  1001. placed at the end of the result index.
  1002. Parameters
  1003. ----------
  1004. label : object
  1005. Partial indexing with MultiIndex not allowed
  1006. value : object
  1007. Scalar value
  1008. takeable : interpret the index as indexers, default False
  1009. Returns
  1010. -------
  1011. series : Series
  1012. If label is contained, will be reference to calling Series,
  1013. otherwise a new object
  1014. """
  1015. warnings.warn("set_value is deprecated and will be removed "
  1016. "in a future release. Please use "
  1017. ".at[] or .iat[] accessors instead", FutureWarning,
  1018. stacklevel=2)
  1019. return self._set_value(label, value, takeable=takeable)
  1020. def _set_value(self, label, value, takeable=False):
  1021. try:
  1022. if takeable:
  1023. self._values[label] = value
  1024. else:
  1025. self.index._engine.set_value(self._values, label, value)
  1026. except KeyError:
  1027. # set using a non-recursive method
  1028. self.loc[label] = value
  1029. return self
  1030. _set_value.__doc__ = set_value.__doc__
  1031. def reset_index(self, level=None, drop=False, name=None, inplace=False):
  1032. """
  1033. Generate a new DataFrame or Series with the index reset.
  1034. This is useful when the index needs to be treated as a column, or
  1035. when the index is meaningless and needs to be reset to the default
  1036. before another operation.
  1037. Parameters
  1038. ----------
  1039. level : int, str, tuple, or list, default optional
  1040. For a Series with a MultiIndex, only remove the specified levels
  1041. from the index. Removes all levels by default.
  1042. drop : bool, default False
  1043. Just reset the index, without inserting it as a column in
  1044. the new DataFrame.
  1045. name : object, optional
  1046. The name to use for the column containing the original Series
  1047. values. Uses ``self.name`` by default. This argument is ignored
  1048. when `drop` is True.
  1049. inplace : bool, default False
  1050. Modify the Series in place (do not create a new object).
  1051. Returns
  1052. -------
  1053. Series or DataFrame
  1054. When `drop` is False (the default), a DataFrame is returned.
  1055. The newly created columns will come first in the DataFrame,
  1056. followed by the original Series values.
  1057. When `drop` is True, a `Series` is returned.
  1058. In either case, if ``inplace=True``, no value is returned.
  1059. See Also
  1060. --------
  1061. DataFrame.reset_index: Analogous function for DataFrame.
  1062. Examples
  1063. --------
  1064. >>> s = pd.Series([1, 2, 3, 4], name='foo',
  1065. ... index=pd.Index(['a', 'b', 'c', 'd'], name='idx'))
  1066. Generate a DataFrame with default index.
  1067. >>> s.reset_index()
  1068. idx foo
  1069. 0 a 1
  1070. 1 b 2
  1071. 2 c 3
  1072. 3 d 4
  1073. To specify the name of the new column use `name`.
  1074. >>> s.reset_index(name='values')
  1075. idx values
  1076. 0 a 1
  1077. 1 b 2
  1078. 2 c 3
  1079. 3 d 4
  1080. To generate a new Series with the default set `drop` to True.
  1081. >>> s.reset_index(drop=True)
  1082. 0 1
  1083. 1 2
  1084. 2 3
  1085. 3 4
  1086. Name: foo, dtype: int64
  1087. To update the Series in place, without generating a new one
  1088. set `inplace` to True. Note that it also requires ``drop=True``.
  1089. >>> s.reset_index(inplace=True, drop=True)
  1090. >>> s
  1091. 0 1
  1092. 1 2
  1093. 2 3
  1094. 3 4
  1095. Name: foo, dtype: int64
  1096. The `level` parameter is interesting for Series with a multi-level
  1097. index.
  1098. >>> arrays = [np.array(['bar', 'bar', 'baz', 'baz']),
  1099. ... np.array(['one', 'two', 'one', 'two'])]
  1100. >>> s2 = pd.Series(
  1101. ... range(4), name='foo',
  1102. ... index=pd.MultiIndex.from_arrays(arrays,
  1103. ... names=['a', 'b']))
  1104. To remove a specific level from the Index, use `level`.
  1105. >>> s2.reset_index(level='a')
  1106. a foo
  1107. b
  1108. one bar 0
  1109. two bar 1
  1110. one baz 2
  1111. two baz 3
  1112. If `level` is not set, all levels are removed from the Index.
  1113. >>> s2.reset_index()
  1114. a b foo
  1115. 0 bar one 0
  1116. 1 bar two 1
  1117. 2 baz one 2
  1118. 3 baz two 3
  1119. """
  1120. inplace = validate_bool_kwarg(inplace, 'inplace')
  1121. if drop:
  1122. new_index = ibase.default_index(len(self))
  1123. if level is not None:
  1124. if not isinstance(level, (tuple, list)):
  1125. level = [level]
  1126. level = [self.index._get_level_number(lev) for lev in level]
  1127. if len(level) < self.index.nlevels:
  1128. new_index = self.index.droplevel(level)
  1129. if inplace:
  1130. self.index = new_index
  1131. # set name if it was passed, otherwise, keep the previous name
  1132. self.name = name or self.name
  1133. else:
  1134. return self._constructor(self._values.copy(),
  1135. index=new_index).__finalize__(self)
  1136. elif inplace:
  1137. raise TypeError('Cannot reset_index inplace on a Series '
  1138. 'to create a DataFrame')
  1139. else:
  1140. df = self.to_frame(name)
  1141. return df.reset_index(level=level, drop=drop)
  1142. # ----------------------------------------------------------------------
  1143. # Rendering Methods
  1144. def __unicode__(self):
  1145. """
  1146. Return a string representation for a particular DataFrame.
  1147. Invoked by unicode(df) in py2 only. Yields a Unicode String in both
  1148. py2/py3.
  1149. """
  1150. buf = StringIO(u(""))
  1151. width, height = get_terminal_size()
  1152. max_rows = (height if get_option("display.max_rows") == 0 else
  1153. get_option("display.max_rows"))
  1154. show_dimensions = get_option("display.show_dimensions")
  1155. self.to_string(buf=buf, name=self.name, dtype=self.dtype,
  1156. max_rows=max_rows, length=show_dimensions)
  1157. result = buf.getvalue()
  1158. return result
  1159. def to_string(self, buf=None, na_rep='NaN', float_format=None, header=True,
  1160. index=True, length=False, dtype=False, name=False,
  1161. max_rows=None):
  1162. """
  1163. Render a string representation of the Series.
  1164. Parameters
  1165. ----------
  1166. buf : StringIO-like, optional
  1167. buffer to write to
  1168. na_rep : string, optional
  1169. string representation of NAN to use, default 'NaN'
  1170. float_format : one-parameter function, optional
  1171. formatter function to apply to columns' elements if they are floats
  1172. default None
  1173. header : boolean, default True
  1174. Add the Series header (index name)
  1175. index : bool, optional
  1176. Add index (row) labels, default True
  1177. length : boolean, default False
  1178. Add the Series length
  1179. dtype : boolean, default False
  1180. Add the Series dtype
  1181. name : boolean, default False
  1182. Add the Series name if not None
  1183. max_rows : int, optional
  1184. Maximum number of rows to show before truncating. If None, show
  1185. all.
  1186. Returns
  1187. -------
  1188. formatted : string (if not buffer passed)
  1189. """
  1190. formatter = fmt.SeriesFormatter(self, name=name, length=length,
  1191. header=header, index=index,
  1192. dtype=dtype, na_rep=na_rep,
  1193. float_format=float_format,
  1194. max_rows=max_rows)
  1195. result = formatter.to_string()
  1196. # catch contract violations
  1197. if not isinstance(result, compat.text_type):
  1198. raise AssertionError("result must be of type unicode, type"
  1199. " of result is {0!r}"
  1200. "".format(result.__class__.__name__))
  1201. if buf is None:
  1202. return result
  1203. else:
  1204. try:
  1205. buf.write(result)
  1206. except AttributeError:
  1207. with open(buf, 'w') as f:
  1208. f.write(result)
  1209. # ----------------------------------------------------------------------
  1210. def iteritems(self):
  1211. """
  1212. Lazily iterate over (index, value) tuples.
  1213. """
  1214. return zip(iter(self.index), iter(self))
  1215. items = iteritems
  1216. # ----------------------------------------------------------------------
  1217. # Misc public methods
  1218. def keys(self):
  1219. """
  1220. Alias for index.
  1221. """
  1222. return self.index
  1223. def to_dict(self, into=dict):
  1224. """
  1225. Convert Series to {label -> value} dict or dict-like object.
  1226. Parameters
  1227. ----------
  1228. into : class, default dict
  1229. The collections.Mapping subclass to use as the return
  1230. object. Can be the actual class or an empty
  1231. instance of the mapping type you want. If you want a
  1232. collections.defaultdict, you must pass it initialized.
  1233. .. versionadded:: 0.21.0
  1234. Returns
  1235. -------
  1236. value_dict : collections.Mapping
  1237. Examples
  1238. --------
  1239. >>> s = pd.Series([1, 2, 3, 4])
  1240. >>> s.to_dict()
  1241. {0: 1, 1: 2, 2: 3, 3: 4}
  1242. >>> from collections import OrderedDict, defaultdict
  1243. >>> s.to_dict(OrderedDict)
  1244. OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)])
  1245. >>> dd = defaultdict(list)
  1246. >>> s.to_dict(dd)
  1247. defaultdict(<type 'list'>, {0: 1, 1: 2, 2: 3, 3: 4})
  1248. """
  1249. # GH16122
  1250. into_c = com.standardize_mapping(into)
  1251. return into_c(compat.iteritems(self))
  1252. def to_frame(self, name=None):
  1253. """
  1254. Convert Series to DataFrame.
  1255. Parameters
  1256. ----------
  1257. name : object, default None
  1258. The passed name should substitute for the series name (if it has
  1259. one).
  1260. Returns
  1261. -------
  1262. data_frame : DataFrame
  1263. """
  1264. if name is None:
  1265. df = self._constructor_expanddim(self)
  1266. else:
  1267. df = self._constructor_expanddim({name: self})
  1268. return df
  1269. def to_sparse(self, kind='block', fill_value=None):
  1270. """
  1271. Convert Series to SparseSeries.
  1272. Parameters
  1273. ----------
  1274. kind : {'block', 'integer'}
  1275. fill_value : float, defaults to NaN (missing)
  1276. Returns
  1277. -------
  1278. sp : SparseSeries
  1279. """
  1280. # TODO: deprecate
  1281. from pandas.core.sparse.series import SparseSeries
  1282. values = SparseArray(self, kind=kind, fill_value=fill_value)
  1283. return SparseSeries(
  1284. values, index=self.index, name=self.name
  1285. ).__finalize__(self)
  1286. def _set_name(self, name, inplace=False):
  1287. """
  1288. Set the Series name.
  1289. Parameters
  1290. ----------
  1291. name : str
  1292. inplace : bool
  1293. whether to modify `self` directly or return a copy
  1294. """
  1295. inplace = validate_bool_kwarg(inplace, 'inplace')
  1296. ser = self if inplace else self.copy()
  1297. ser.name = name
  1298. return ser
  1299. # ----------------------------------------------------------------------
  1300. # Statistics, overridden ndarray methods
  1301. # TODO: integrate bottleneck
  1302. def count(self, level=None):
  1303. """
  1304. Return number of non-NA/null observations in the Series.
  1305. Parameters
  1306. ----------
  1307. level : int or level name, default None
  1308. If the axis is a MultiIndex (hierarchical), count along a
  1309. particular level, collapsing into a smaller Series
  1310. Returns
  1311. -------
  1312. nobs : int or Series (if level specified)
  1313. """
  1314. if level is None:
  1315. return notna(com.values_from_object(self)).sum()
  1316. if isinstance(level, compat.string_types):
  1317. level = self.index._get_level_number(level)
  1318. lev = self.index.levels[level]
  1319. level_codes = np.array(self.index.codes[level], subok=False, copy=True)
  1320. mask = level_codes == -1
  1321. if mask.any():
  1322. level_codes[mask] = cnt = len(lev)
  1323. lev = lev.insert(cnt, lev._na_value)
  1324. obs = level_codes[notna(self.values)]
  1325. out = np.bincount(obs, minlength=len(lev) or None)
  1326. return self._constructor(out, index=lev,
  1327. dtype='int64').__finalize__(self)
  1328. def mode(self, dropna=True):
  1329. """
  1330. Return the mode(s) of the dataset.
  1331. Always returns Series even if only one value is returned.
  1332. Parameters
  1333. ----------
  1334. dropna : boolean, default True
  1335. Don't consider counts of NaN/NaT.
  1336. .. versionadded:: 0.24.0
  1337. Returns
  1338. -------
  1339. modes : Series (sorted)
  1340. """
  1341. # TODO: Add option for bins like value_counts()
  1342. return algorithms.mode(self, dropna=dropna)
  1343. def unique(self):
  1344. """
  1345. Return unique values of Series object.
  1346. Uniques are returned in order of appearance. Hash table-based unique,
  1347. therefore does NOT sort.
  1348. Returns
  1349. -------
  1350. ndarray or ExtensionArray
  1351. The unique values returned as a NumPy array. In case of an
  1352. extension-array backed Series, a new
  1353. :class:`~api.extensions.ExtensionArray` of that type with just
  1354. the unique values is returned. This includes
  1355. * Categorical
  1356. * Period
  1357. * Datetime with Timezone
  1358. * Interval
  1359. * Sparse
  1360. * IntegerNA
  1361. See Also
  1362. --------
  1363. unique : Top-level unique method for any 1-d array-like object.
  1364. Index.unique : Return Index with unique values from an Index object.
  1365. Examples
  1366. --------
  1367. >>> pd.Series([2, 1, 3, 3], name='A').unique()
  1368. array([2, 1, 3])
  1369. >>> pd.Series([pd.Timestamp('2016-01-01') for _ in range(3)]).unique()
  1370. array(['2016-01-01T00:00:00.000000000'], dtype='datetime64[ns]')
  1371. >>> pd.Series([pd.Timestamp('2016-01-01', tz='US/Eastern')
  1372. ... for _ in range(3)]).unique()
  1373. <DatetimeArray>
  1374. ['2016-01-01 00:00:00-05:00']
  1375. Length: 1, dtype: datetime64[ns, US/Eastern]
  1376. An unordered Categorical will return categories in the order of
  1377. appearance.
  1378. >>> pd.Series(pd.Categorical(list('baabc'))).unique()
  1379. [b, a, c]
  1380. Categories (3, object): [b, a, c]
  1381. An ordered Categorical preserves the category ordering.
  1382. >>> pd.Series(pd.Categorical(list('baabc'), categories=list('abc'),
  1383. ... ordered=True)).unique()
  1384. [b, a, c]
  1385. Categories (3, object): [a < b < c]
  1386. """
  1387. result = super(Series, self).unique()
  1388. return result
  1389. def drop_duplicates(self, keep='first', inplace=False):
  1390. """
  1391. Return Series with duplicate values removed.
  1392. Parameters
  1393. ----------
  1394. keep : {'first', 'last', ``False``}, default 'first'
  1395. - 'first' : Drop duplicates except for the first occurrence.
  1396. - 'last' : Drop duplicates except for the last occurrence.
  1397. - ``False`` : Drop all duplicates.
  1398. inplace : boolean, default ``False``
  1399. If ``True``, performs operation inplace and returns None.
  1400. Returns
  1401. -------
  1402. deduplicated : Series
  1403. See Also
  1404. --------
  1405. Index.drop_duplicates : Equivalent method on Index.
  1406. DataFrame.drop_duplicates : Equivalent method on DataFrame.
  1407. Series.duplicated : Related method on Series, indicating duplicate
  1408. Series values.
  1409. Examples
  1410. --------
  1411. Generate an Series with duplicated entries.
  1412. >>> s = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo'],
  1413. ... name='animal')
  1414. >>> s
  1415. 0 lama
  1416. 1 cow
  1417. 2 lama
  1418. 3 beetle
  1419. 4 lama
  1420. 5 hippo
  1421. Name: animal, dtype: object
  1422. With the 'keep' parameter, the selection behaviour of duplicated values
  1423. can be changed. The value 'first' keeps the first occurrence for each
  1424. set of duplicated entries. The default value of keep is 'first'.
  1425. >>> s.drop_duplicates()
  1426. 0 lama
  1427. 1 cow
  1428. 3 beetle
  1429. 5 hippo
  1430. Name: animal, dtype: object
  1431. The value 'last' for parameter 'keep' keeps the last occurrence for
  1432. each set of duplicated entries.
  1433. >>> s.drop_duplicates(keep='last')
  1434. 1 cow
  1435. 3 beetle
  1436. 4 lama
  1437. 5 hippo
  1438. Name: animal, dtype: object
  1439. The value ``False`` for parameter 'keep' discards all sets of
  1440. duplicated entries. Setting the value of 'inplace' to ``True`` performs
  1441. the operation inplace and returns ``None``.
  1442. >>> s.drop_duplicates(keep=False, inplace=True)
  1443. >>> s
  1444. 1 cow
  1445. 3 beetle
  1446. 5 hippo
  1447. Name: animal, dtype: object
  1448. """
  1449. return super(Series, self).drop_duplicates(keep=keep, inplace=inplace)
  1450. def duplicated(self, keep='first'):
  1451. """
  1452. Indicate duplicate Series values.
  1453. Duplicated values are indicated as ``True`` values in the resulting
  1454. Series. Either all duplicates, all except the first or all except the
  1455. last occurrence of duplicates can be indicated.
  1456. Parameters
  1457. ----------
  1458. keep : {'first', 'last', False}, default 'first'
  1459. - 'first' : Mark duplicates as ``True`` except for the first
  1460. occurrence.
  1461. - 'last' : Mark duplicates as ``True`` except for the last
  1462. occurrence.
  1463. - ``False`` : Mark all duplicates as ``True``.
  1464. Returns
  1465. -------
  1466. pandas.core.series.Series
  1467. See Also
  1468. --------
  1469. Index.duplicated : Equivalent method on pandas.Index.
  1470. DataFrame.duplicated : Equivalent method on pandas.DataFrame.
  1471. Series.drop_duplicates : Remove duplicate values from Series.
  1472. Examples
  1473. --------
  1474. By default, for each set of duplicated values, the first occurrence is
  1475. set on False and all others on True:
  1476. >>> animals = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama'])
  1477. >>> animals.duplicated()
  1478. 0 False
  1479. 1 False
  1480. 2 True
  1481. 3 False
  1482. 4 True
  1483. dtype: bool
  1484. which is equivalent to
  1485. >>> animals.duplicated(keep='first')
  1486. 0 False
  1487. 1 False
  1488. 2 True
  1489. 3 False
  1490. 4 True
  1491. dtype: bool
  1492. By using 'last', the last occurrence of each set of duplicated values
  1493. is set on False and all others on True:
  1494. >>> animals.duplicated(keep='last')
  1495. 0 True
  1496. 1 False
  1497. 2 True
  1498. 3 False
  1499. 4 False
  1500. dtype: bool
  1501. By setting keep on ``False``, all duplicates are True:
  1502. >>> animals.duplicated(keep=False)
  1503. 0 True
  1504. 1 False
  1505. 2 True
  1506. 3 False
  1507. 4 True
  1508. dtype: bool
  1509. """
  1510. return super(Series, self).duplicated(keep=keep)
  1511. def idxmin(self, axis=0, skipna=True, *args, **kwargs):
  1512. """
  1513. Return the row label of the minimum value.
  1514. If multiple values equal the minimum, the first row label with that
  1515. value is returned.
  1516. Parameters
  1517. ----------
  1518. skipna : boolean, default True
  1519. Exclude NA/null values. If the entire Series is NA, the result
  1520. will be NA.
  1521. axis : int, default 0
  1522. For compatibility with DataFrame.idxmin. Redundant for application
  1523. on Series.
  1524. *args, **kwargs
  1525. Additional keywords have no effect but might be accepted
  1526. for compatibility with NumPy.
  1527. Returns
  1528. -------
  1529. idxmin : Index of minimum of values.
  1530. Raises
  1531. ------
  1532. ValueError
  1533. If the Series is empty.
  1534. See Also
  1535. --------
  1536. numpy.argmin : Return indices of the minimum values
  1537. along the given axis.
  1538. DataFrame.idxmin : Return index of first occurrence of minimum
  1539. over requested axis.
  1540. Series.idxmax : Return index *label* of the first occurrence
  1541. of maximum of values.
  1542. Notes
  1543. -----
  1544. This method is the Series version of ``ndarray.argmin``. This method
  1545. returns the label of the minimum, while ``ndarray.argmin`` returns
  1546. the position. To get the position, use ``series.values.argmin()``.
  1547. Examples
  1548. --------
  1549. >>> s = pd.Series(data=[1, None, 4, 1],
  1550. ... index=['A' ,'B' ,'C' ,'D'])
  1551. >>> s
  1552. A 1.0
  1553. B NaN
  1554. C 4.0
  1555. D 1.0
  1556. dtype: float64
  1557. >>> s.idxmin()
  1558. 'A'
  1559. If `skipna` is False and there is an NA value in the data,
  1560. the function returns ``nan``.
  1561. >>> s.idxmin(skipna=False)
  1562. nan
  1563. """
  1564. skipna = nv.validate_argmin_with_skipna(skipna, args, kwargs)
  1565. i = nanops.nanargmin(com.values_from_object(self), skipna=skipna)
  1566. if i == -1:
  1567. return np.nan
  1568. return self.index[i]
  1569. def idxmax(self, axis=0, skipna=True, *args, **kwargs):
  1570. """
  1571. Return the row label of the maximum value.
  1572. If multiple values equal the maximum, the first row label with that
  1573. value is returned.
  1574. Parameters
  1575. ----------
  1576. skipna : boolean, default True
  1577. Exclude NA/null values. If the entire Series is NA, the result
  1578. will be NA.
  1579. axis : int, default 0
  1580. For compatibility with DataFrame.idxmax. Redundant for application
  1581. on Series.
  1582. *args, **kwargs
  1583. Additional keywords have no effect but might be accepted
  1584. for compatibility with NumPy.
  1585. Returns
  1586. -------
  1587. idxmax : Index of maximum of values.
  1588. Raises
  1589. ------
  1590. ValueError
  1591. If the Series is empty.
  1592. See Also
  1593. --------
  1594. numpy.argmax : Return indices of the maximum values
  1595. along the given axis.
  1596. DataFrame.idxmax : Return index of first occurrence of maximum
  1597. over requested axis.
  1598. Series.idxmin : Return index *label* of the first occurrence
  1599. of minimum of values.
  1600. Notes
  1601. -----
  1602. This method is the Series version of ``ndarray.argmax``. This method
  1603. returns the label of the maximum, while ``ndarray.argmax`` returns
  1604. the position. To get the position, use ``series.values.argmax()``.
  1605. Examples
  1606. --------
  1607. >>> s = pd.Series(data=[1, None, 4, 3, 4],
  1608. ... index=['A', 'B', 'C', 'D', 'E'])
  1609. >>> s
  1610. A 1.0
  1611. B NaN
  1612. C 4.0
  1613. D 3.0
  1614. E 4.0
  1615. dtype: float64
  1616. >>> s.idxmax()
  1617. 'C'
  1618. If `skipna` is False and there is an NA value in the data,
  1619. the function returns ``nan``.
  1620. >>> s.idxmax(skipna=False)
  1621. nan
  1622. """
  1623. skipna = nv.validate_argmax_with_skipna(skipna, args, kwargs)
  1624. i = nanops.nanargmax(com.values_from_object(self), skipna=skipna)
  1625. if i == -1:
  1626. return np.nan
  1627. return self.index[i]
  1628. # ndarray compat
  1629. argmin = deprecate(
  1630. 'argmin', idxmin, '0.21.0',
  1631. msg=dedent("""
  1632. The current behaviour of 'Series.argmin' is deprecated, use 'idxmin'
  1633. instead.
  1634. The behavior of 'argmin' will be corrected to return the positional
  1635. minimum in the future. For now, use 'series.values.argmin' or
  1636. 'np.argmin(np.array(values))' to get the position of the minimum
  1637. row.""")
  1638. )
  1639. argmax = deprecate(
  1640. 'argmax', idxmax, '0.21.0',
  1641. msg=dedent("""
  1642. The current behaviour of 'Series.argmax' is deprecated, use 'idxmax'
  1643. instead.
  1644. The behavior of 'argmax' will be corrected to return the positional
  1645. maximum in the future. For now, use 'series.values.argmax' or
  1646. 'np.argmax(np.array(values))' to get the position of the maximum
  1647. row.""")
  1648. )
  1649. def round(self, decimals=0, *args, **kwargs):
  1650. """
  1651. Round each value in a Series to the given number of decimals.
  1652. Parameters
  1653. ----------
  1654. decimals : int
  1655. Number of decimal places to round to (default: 0).
  1656. If decimals is negative, it specifies the number of
  1657. positions to the left of the decimal point.
  1658. Returns
  1659. -------
  1660. Series object
  1661. See Also
  1662. --------
  1663. numpy.around
  1664. DataFrame.round
  1665. """
  1666. nv.validate_round(args, kwargs)
  1667. result = com.values_from_object(self).round(decimals)
  1668. result = self._constructor(result, index=self.index).__finalize__(self)
  1669. return result
  1670. def quantile(self, q=0.5, interpolation='linear'):
  1671. """
  1672. Return value at the given quantile.
  1673. Parameters
  1674. ----------
  1675. q : float or array-like, default 0.5 (50% quantile)
  1676. 0 <= q <= 1, the quantile(s) to compute
  1677. interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
  1678. .. versionadded:: 0.18.0
  1679. This optional parameter specifies the interpolation method to use,
  1680. when the desired quantile lies between two data points `i` and `j`:
  1681. * linear: `i + (j - i) * fraction`, where `fraction` is the
  1682. fractional part of the index surrounded by `i` and `j`.
  1683. * lower: `i`.
  1684. * higher: `j`.
  1685. * nearest: `i` or `j` whichever is nearest.
  1686. * midpoint: (`i` + `j`) / 2.
  1687. Returns
  1688. -------
  1689. quantile : float or Series
  1690. if ``q`` is an array, a Series will be returned where the
  1691. index is ``q`` and the values are the quantiles.
  1692. See Also
  1693. --------
  1694. core.window.Rolling.quantile
  1695. numpy.percentile
  1696. Examples
  1697. --------
  1698. >>> s = pd.Series([1, 2, 3, 4])
  1699. >>> s.quantile(.5)
  1700. 2.5
  1701. >>> s.quantile([.25, .5, .75])
  1702. 0.25 1.75
  1703. 0.50 2.50
  1704. 0.75 3.25
  1705. dtype: float64
  1706. """
  1707. self._check_percentile(q)
  1708. # We dispatch to DataFrame so that core.internals only has to worry
  1709. # about 2D cases.
  1710. df = self.to_frame()
  1711. result = df.quantile(q=q, interpolation=interpolation,
  1712. numeric_only=False)
  1713. if result.ndim == 2:
  1714. result = result.iloc[:, 0]
  1715. if is_list_like(q):
  1716. result.name = self.name
  1717. return self._constructor(result,
  1718. index=Float64Index(q),
  1719. name=self.name)
  1720. else:
  1721. # scalar
  1722. return result.iloc[0]
  1723. def corr(self, other, method='pearson', min_periods=None):
  1724. """
  1725. Compute correlation with `other` Series, excluding missing values.
  1726. Parameters
  1727. ----------
  1728. other : Series
  1729. method : {'pearson', 'kendall', 'spearman'} or callable
  1730. * pearson : standard correlation coefficient
  1731. * kendall : Kendall Tau correlation coefficient
  1732. * spearman : Spearman rank correlation
  1733. * callable: callable with input two 1d ndarray
  1734. and returning a float
  1735. .. versionadded:: 0.24.0
  1736. min_periods : int, optional
  1737. Minimum number of observations needed to have a valid result
  1738. Returns
  1739. -------
  1740. correlation : float
  1741. Examples
  1742. --------
  1743. >>> histogram_intersection = lambda a, b: np.minimum(a, b
  1744. ... ).sum().round(decimals=1)
  1745. >>> s1 = pd.Series([.2, .0, .6, .2])
  1746. >>> s2 = pd.Series([.3, .6, .0, .1])
  1747. >>> s1.corr(s2, method=histogram_intersection)
  1748. 0.3
  1749. """
  1750. this, other = self.align(other, join='inner', copy=False)
  1751. if len(this) == 0:
  1752. return np.nan
  1753. if method in ['pearson', 'spearman', 'kendall'] or callable(method):
  1754. return nanops.nancorr(this.values, other.values, method=method,
  1755. min_periods=min_periods)
  1756. raise ValueError("method must be either 'pearson', "
  1757. "'spearman', or 'kendall', '{method}' "
  1758. "was supplied".format(method=method))
  1759. def cov(self, other, min_periods=None):
  1760. """
  1761. Compute covariance with Series, excluding missing values.
  1762. Parameters
  1763. ----------
  1764. other : Series
  1765. min_periods : int, optional
  1766. Minimum number of observations needed to have a valid result
  1767. Returns
  1768. -------
  1769. covariance : float
  1770. Normalized by N-1 (unbiased estimator).
  1771. """
  1772. this, other = self.align(other, join='inner', copy=False)
  1773. if len(this) == 0:
  1774. return np.nan
  1775. return nanops.nancov(this.values, other.values,
  1776. min_periods=min_periods)
  1777. def diff(self, periods=1):
  1778. """
  1779. First discrete difference of element.
  1780. Calculates the difference of a Series element compared with another
  1781. element in the Series (default is element in previous row).
  1782. Parameters
  1783. ----------
  1784. periods : int, default 1
  1785. Periods to shift for calculating difference, accepts negative
  1786. values.
  1787. Returns
  1788. -------
  1789. diffed : Series
  1790. See Also
  1791. --------
  1792. Series.pct_change: Percent change over given number of periods.
  1793. Series.shift: Shift index by desired number of periods with an
  1794. optional time freq.
  1795. DataFrame.diff: First discrete difference of object.
  1796. Examples
  1797. --------
  1798. Difference with previous row
  1799. >>> s = pd.Series([1, 1, 2, 3, 5, 8])
  1800. >>> s.diff()
  1801. 0 NaN
  1802. 1 0.0
  1803. 2 1.0
  1804. 3 1.0
  1805. 4 2.0
  1806. 5 3.0
  1807. dtype: float64
  1808. Difference with 3rd previous row
  1809. >>> s.diff(periods=3)
  1810. 0 NaN
  1811. 1 NaN
  1812. 2 NaN
  1813. 3 2.0
  1814. 4 4.0
  1815. 5 6.0
  1816. dtype: float64
  1817. Difference with following row
  1818. >>> s.diff(periods=-1)
  1819. 0 0.0
  1820. 1 -1.0
  1821. 2 -1.0
  1822. 3 -2.0
  1823. 4 -3.0
  1824. 5 NaN
  1825. dtype: float64
  1826. """
  1827. result = algorithms.diff(com.values_from_object(self), periods)
  1828. return self._constructor(result, index=self.index).__finalize__(self)
  1829. def autocorr(self, lag=1):
  1830. """
  1831. Compute the lag-N autocorrelation.
  1832. This method computes the Pearson correlation between
  1833. the Series and its shifted self.
  1834. Parameters
  1835. ----------
  1836. lag : int, default 1
  1837. Number of lags to apply before performing autocorrelation.
  1838. Returns
  1839. -------
  1840. float
  1841. The Pearson correlation between self and self.shift(lag).
  1842. See Also
  1843. --------
  1844. Series.corr : Compute the correlation between two Series.
  1845. Series.shift : Shift index by desired number of periods.
  1846. DataFrame.corr : Compute pairwise correlation of columns.
  1847. DataFrame.corrwith : Compute pairwise correlation between rows or
  1848. columns of two DataFrame objects.
  1849. Notes
  1850. -----
  1851. If the Pearson correlation is not well defined return 'NaN'.
  1852. Examples
  1853. --------
  1854. >>> s = pd.Series([0.25, 0.5, 0.2, -0.05])
  1855. >>> s.autocorr() # doctest: +ELLIPSIS
  1856. 0.10355...
  1857. >>> s.autocorr(lag=2) # doctest: +ELLIPSIS
  1858. -0.99999...
  1859. If the Pearson correlation is not well defined, then 'NaN' is returned.
  1860. >>> s = pd.Series([1, 0, 0, 0])
  1861. >>> s.autocorr()
  1862. nan
  1863. """
  1864. return self.corr(self.shift(lag))
  1865. def dot(self, other):
  1866. """
  1867. Compute the dot product between the Series and the columns of other.
  1868. This method computes the dot product between the Series and another
  1869. one, or the Series and each columns of a DataFrame, or the Series and
  1870. each columns of an array.
  1871. It can also be called using `self @ other` in Python >= 3.5.
  1872. Parameters
  1873. ----------
  1874. other : Series, DataFrame or array-like
  1875. The other object to compute the dot product with its columns.
  1876. Returns
  1877. -------
  1878. scalar, Series or numpy.ndarray
  1879. Return the dot product of the Series and other if other is a
  1880. Series, the Series of the dot product of Series and each rows of
  1881. other if other is a DataFrame or a numpy.ndarray between the Series
  1882. and each columns of the numpy array.
  1883. See Also
  1884. --------
  1885. DataFrame.dot: Compute the matrix product with the DataFrame.
  1886. Series.mul: Multiplication of series and other, element-wise.
  1887. Notes
  1888. -----
  1889. The Series and other has to share the same index if other is a Series
  1890. or a DataFrame.
  1891. Examples
  1892. --------
  1893. >>> s = pd.Series([0, 1, 2, 3])
  1894. >>> other = pd.Series([-1, 2, -3, 4])
  1895. >>> s.dot(other)
  1896. 8
  1897. >>> s @ other
  1898. 8
  1899. >>> df = pd.DataFrame([[0 ,1], [-2, 3], [4, -5], [6, 7]])
  1900. >>> s.dot(df)
  1901. 0 24
  1902. 1 14
  1903. dtype: int64
  1904. >>> arr = np.array([[0, 1], [-2, 3], [4, -5], [6, 7]])
  1905. >>> s.dot(arr)
  1906. array([24, 14])
  1907. """
  1908. from pandas.core.frame import DataFrame
  1909. if isinstance(other, (Series, DataFrame)):
  1910. common = self.index.union(other.index)
  1911. if (len(common) > len(self.index) or
  1912. len(common) > len(other.index)):
  1913. raise ValueError('matrices are not aligned')
  1914. left = self.reindex(index=common, copy=False)
  1915. right = other.reindex(index=common, copy=False)
  1916. lvals = left.values
  1917. rvals = right.values
  1918. else:
  1919. lvals = self.values
  1920. rvals = np.asarray(other)
  1921. if lvals.shape[0] != rvals.shape[0]:
  1922. raise Exception('Dot product shape mismatch, %s vs %s' %
  1923. (lvals.shape, rvals.shape))
  1924. if isinstance(other, DataFrame):
  1925. return self._constructor(np.dot(lvals, rvals),
  1926. index=other.columns).__finalize__(self)
  1927. elif isinstance(other, Series):
  1928. return np.dot(lvals, rvals)
  1929. elif isinstance(rvals, np.ndarray):
  1930. return np.dot(lvals, rvals)
  1931. else: # pragma: no cover
  1932. raise TypeError('unsupported type: %s' % type(other))
  1933. def __matmul__(self, other):
  1934. """
  1935. Matrix multiplication using binary `@` operator in Python>=3.5.
  1936. """
  1937. return self.dot(other)
  1938. def __rmatmul__(self, other):
  1939. """
  1940. Matrix multiplication using binary `@` operator in Python>=3.5.
  1941. """
  1942. return self.dot(np.transpose(other))
  1943. @Substitution(klass='Series')
  1944. @Appender(base._shared_docs['searchsorted'])
  1945. def searchsorted(self, value, side='left', sorter=None):
  1946. if sorter is not None:
  1947. sorter = ensure_platform_int(sorter)
  1948. result = self._values.searchsorted(Series(value)._values,
  1949. side=side, sorter=sorter)
  1950. return result[0] if is_scalar(value) else result
  1951. # -------------------------------------------------------------------
  1952. # Combination
  1953. def append(self, to_append, ignore_index=False, verify_integrity=False):
  1954. """
  1955. Concatenate two or more Series.
  1956. Parameters
  1957. ----------
  1958. to_append : Series or list/tuple of Series
  1959. ignore_index : boolean, default False
  1960. If True, do not use the index labels.
  1961. .. versionadded:: 0.19.0
  1962. verify_integrity : boolean, default False
  1963. If True, raise Exception on creating index with duplicates
  1964. Returns
  1965. -------
  1966. appended : Series
  1967. See Also
  1968. --------
  1969. concat : General function to concatenate DataFrame, Series
  1970. or Panel objects.
  1971. Notes
  1972. -----
  1973. Iteratively appending to a Series can be more computationally intensive
  1974. than a single concatenate. A better solution is to append values to a
  1975. list and then concatenate the list with the original Series all at
  1976. once.
  1977. Examples
  1978. --------
  1979. >>> s1 = pd.Series([1, 2, 3])
  1980. >>> s2 = pd.Series([4, 5, 6])
  1981. >>> s3 = pd.Series([4, 5, 6], index=[3,4,5])
  1982. >>> s1.append(s2)
  1983. 0 1
  1984. 1 2
  1985. 2 3
  1986. 0 4
  1987. 1 5
  1988. 2 6
  1989. dtype: int64
  1990. >>> s1.append(s3)
  1991. 0 1
  1992. 1 2
  1993. 2 3
  1994. 3 4
  1995. 4 5
  1996. 5 6
  1997. dtype: int64
  1998. With `ignore_index` set to True:
  1999. >>> s1.append(s2, ignore_index=True)
  2000. 0 1
  2001. 1 2
  2002. 2 3
  2003. 3 4
  2004. 4 5
  2005. 5 6
  2006. dtype: int64
  2007. With `verify_integrity` set to True:
  2008. >>> s1.append(s2, verify_integrity=True)
  2009. Traceback (most recent call last):
  2010. ...
  2011. ValueError: Indexes have overlapping values: [0, 1, 2]
  2012. """
  2013. from pandas.core.reshape.concat import concat
  2014. if isinstance(to_append, (list, tuple)):
  2015. to_concat = [self] + to_append
  2016. else:
  2017. to_concat = [self, to_append]
  2018. return concat(to_concat, ignore_index=ignore_index,
  2019. verify_integrity=verify_integrity)
  2020. def _binop(self, other, func, level=None, fill_value=None):
  2021. """
  2022. Perform generic binary operation with optional fill value.
  2023. Parameters
  2024. ----------
  2025. other : Series
  2026. func : binary operator
  2027. fill_value : float or object
  2028. Value to substitute for NA/null values. If both Series are NA in a
  2029. location, the result will be NA regardless of the passed fill value
  2030. level : int or level name, default None
  2031. Broadcast across a level, matching Index values on the
  2032. passed MultiIndex level
  2033. Returns
  2034. -------
  2035. combined : Series
  2036. """
  2037. if not isinstance(other, Series):
  2038. raise AssertionError('Other operand must be Series')
  2039. new_index = self.index
  2040. this = self
  2041. if not self.index.equals(other.index):
  2042. this, other = self.align(other, level=level, join='outer',
  2043. copy=False)
  2044. new_index = this.index
  2045. this_vals, other_vals = ops.fill_binop(this.values, other.values,
  2046. fill_value)
  2047. with np.errstate(all='ignore'):
  2048. result = func(this_vals, other_vals)
  2049. name = ops.get_op_result_name(self, other)
  2050. result = self._constructor(result, index=new_index, name=name)
  2051. result = result.__finalize__(self)
  2052. if name is None:
  2053. # When name is None, __finalize__ overwrites current name
  2054. result.name = None
  2055. return result
  2056. def combine(self, other, func, fill_value=None):
  2057. """
  2058. Combine the Series with a Series or scalar according to `func`.
  2059. Combine the Series and `other` using `func` to perform elementwise
  2060. selection for combined Series.
  2061. `fill_value` is assumed when value is missing at some index
  2062. from one of the two objects being combined.
  2063. Parameters
  2064. ----------
  2065. other : Series or scalar
  2066. The value(s) to be combined with the `Series`.
  2067. func : function
  2068. Function that takes two scalars as inputs and returns an element.
  2069. fill_value : scalar, optional
  2070. The value to assume when an index is missing from
  2071. one Series or the other. The default specifies to use the
  2072. appropriate NaN value for the underlying dtype of the Series.
  2073. Returns
  2074. -------
  2075. Series
  2076. The result of combining the Series with the other object.
  2077. See Also
  2078. --------
  2079. Series.combine_first : Combine Series values, choosing the calling
  2080. Series' values first.
  2081. Examples
  2082. --------
  2083. Consider 2 Datasets ``s1`` and ``s2`` containing
  2084. highest clocked speeds of different birds.
  2085. >>> s1 = pd.Series({'falcon': 330.0, 'eagle': 160.0})
  2086. >>> s1
  2087. falcon 330.0
  2088. eagle 160.0
  2089. dtype: float64
  2090. >>> s2 = pd.Series({'falcon': 345.0, 'eagle': 200.0, 'duck': 30.0})
  2091. >>> s2
  2092. falcon 345.0
  2093. eagle 200.0
  2094. duck 30.0
  2095. dtype: float64
  2096. Now, to combine the two datasets and view the highest speeds
  2097. of the birds across the two datasets
  2098. >>> s1.combine(s2, max)
  2099. duck NaN
  2100. eagle 200.0
  2101. falcon 345.0
  2102. dtype: float64
  2103. In the previous example, the resulting value for duck is missing,
  2104. because the maximum of a NaN and a float is a NaN.
  2105. So, in the example, we set ``fill_value=0``,
  2106. so the maximum value returned will be the value from some dataset.
  2107. >>> s1.combine(s2, max, fill_value=0)
  2108. duck 30.0
  2109. eagle 200.0
  2110. falcon 345.0
  2111. dtype: float64
  2112. """
  2113. if fill_value is None:
  2114. fill_value = na_value_for_dtype(self.dtype, compat=False)
  2115. if isinstance(other, Series):
  2116. # If other is a Series, result is based on union of Series,
  2117. # so do this element by element
  2118. new_index = self.index.union(other.index)
  2119. new_name = ops.get_op_result_name(self, other)
  2120. new_values = []
  2121. for idx in new_index:
  2122. lv = self.get(idx, fill_value)
  2123. rv = other.get(idx, fill_value)
  2124. with np.errstate(all='ignore'):
  2125. new_values.append(func(lv, rv))
  2126. else:
  2127. # Assume that other is a scalar, so apply the function for
  2128. # each element in the Series
  2129. new_index = self.index
  2130. with np.errstate(all='ignore'):
  2131. new_values = [func(lv, other) for lv in self._values]
  2132. new_name = self.name
  2133. if is_categorical_dtype(self.values):
  2134. pass
  2135. elif is_extension_array_dtype(self.values):
  2136. # The function can return something of any type, so check
  2137. # if the type is compatible with the calling EA.
  2138. try:
  2139. new_values = self._values._from_sequence(new_values)
  2140. except Exception:
  2141. # https://github.com/pandas-dev/pandas/issues/22850
  2142. # pandas has no control over what 3rd-party ExtensionArrays
  2143. # do in _values_from_sequence. We still want ops to work
  2144. # though, so we catch any regular Exception.
  2145. pass
  2146. return self._constructor(new_values, index=new_index, name=new_name)
  2147. def combine_first(self, other):
  2148. """
  2149. Combine Series values, choosing the calling Series's values first.
  2150. Parameters
  2151. ----------
  2152. other : Series
  2153. The value(s) to be combined with the `Series`.
  2154. Returns
  2155. -------
  2156. Series
  2157. The result of combining the Series with the other object.
  2158. See Also
  2159. --------
  2160. Series.combine : Perform elementwise operation on two Series
  2161. using a given function.
  2162. Notes
  2163. -----
  2164. Result index will be the union of the two indexes.
  2165. Examples
  2166. --------
  2167. >>> s1 = pd.Series([1, np.nan])
  2168. >>> s2 = pd.Series([3, 4])
  2169. >>> s1.combine_first(s2)
  2170. 0 1.0
  2171. 1 4.0
  2172. dtype: float64
  2173. """
  2174. new_index = self.index.union(other.index)
  2175. this = self.reindex(new_index, copy=False)
  2176. other = other.reindex(new_index, copy=False)
  2177. if is_datetimelike(this) and not is_datetimelike(other):
  2178. other = to_datetime(other)
  2179. return this.where(notna(this), other)
  2180. def update(self, other):
  2181. """
  2182. Modify Series in place using non-NA values from passed
  2183. Series. Aligns on index.
  2184. Parameters
  2185. ----------
  2186. other : Series
  2187. Examples
  2188. --------
  2189. >>> s = pd.Series([1, 2, 3])
  2190. >>> s.update(pd.Series([4, 5, 6]))
  2191. >>> s
  2192. 0 4
  2193. 1 5
  2194. 2 6
  2195. dtype: int64
  2196. >>> s = pd.Series(['a', 'b', 'c'])
  2197. >>> s.update(pd.Series(['d', 'e'], index=[0, 2]))
  2198. >>> s
  2199. 0 d
  2200. 1 b
  2201. 2 e
  2202. dtype: object
  2203. >>> s = pd.Series([1, 2, 3])
  2204. >>> s.update(pd.Series([4, 5, 6, 7, 8]))
  2205. >>> s
  2206. 0 4
  2207. 1 5
  2208. 2 6
  2209. dtype: int64
  2210. If ``other`` contains NaNs the corresponding values are not updated
  2211. in the original Series.
  2212. >>> s = pd.Series([1, 2, 3])
  2213. >>> s.update(pd.Series([4, np.nan, 6]))
  2214. >>> s
  2215. 0 4
  2216. 1 2
  2217. 2 6
  2218. dtype: int64
  2219. """
  2220. other = other.reindex_like(self)
  2221. mask = notna(other)
  2222. self._data = self._data.putmask(mask=mask, new=other, inplace=True)
  2223. self._maybe_update_cacher()
  2224. # ----------------------------------------------------------------------
  2225. # Reindexing, sorting
  2226. def sort_values(self, axis=0, ascending=True, inplace=False,
  2227. kind='quicksort', na_position='last'):
  2228. """
  2229. Sort by the values.
  2230. Sort a Series in ascending or descending order by some
  2231. criterion.
  2232. Parameters
  2233. ----------
  2234. axis : {0 or 'index'}, default 0
  2235. Axis to direct sorting. The value 'index' is accepted for
  2236. compatibility with DataFrame.sort_values.
  2237. ascending : bool, default True
  2238. If True, sort values in ascending order, otherwise descending.
  2239. inplace : bool, default False
  2240. If True, perform operation in-place.
  2241. kind : {'quicksort', 'mergesort' or 'heapsort'}, default 'quicksort'
  2242. Choice of sorting algorithm. See also :func:`numpy.sort` for more
  2243. information. 'mergesort' is the only stable algorithm.
  2244. na_position : {'first' or 'last'}, default 'last'
  2245. Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at
  2246. the end.
  2247. Returns
  2248. -------
  2249. Series
  2250. Series ordered by values.
  2251. See Also
  2252. --------
  2253. Series.sort_index : Sort by the Series indices.
  2254. DataFrame.sort_values : Sort DataFrame by the values along either axis.
  2255. DataFrame.sort_index : Sort DataFrame by indices.
  2256. Examples
  2257. --------
  2258. >>> s = pd.Series([np.nan, 1, 3, 10, 5])
  2259. >>> s
  2260. 0 NaN
  2261. 1 1.0
  2262. 2 3.0
  2263. 3 10.0
  2264. 4 5.0
  2265. dtype: float64
  2266. Sort values ascending order (default behaviour)
  2267. >>> s.sort_values(ascending=True)
  2268. 1 1.0
  2269. 2 3.0
  2270. 4 5.0
  2271. 3 10.0
  2272. 0 NaN
  2273. dtype: float64
  2274. Sort values descending order
  2275. >>> s.sort_values(ascending=False)
  2276. 3 10.0
  2277. 4 5.0
  2278. 2 3.0
  2279. 1 1.0
  2280. 0 NaN
  2281. dtype: float64
  2282. Sort values inplace
  2283. >>> s.sort_values(ascending=False, inplace=True)
  2284. >>> s
  2285. 3 10.0
  2286. 4 5.0
  2287. 2 3.0
  2288. 1 1.0
  2289. 0 NaN
  2290. dtype: float64
  2291. Sort values putting NAs first
  2292. >>> s.sort_values(na_position='first')
  2293. 0 NaN
  2294. 1 1.0
  2295. 2 3.0
  2296. 4 5.0
  2297. 3 10.0
  2298. dtype: float64
  2299. Sort a series of strings
  2300. >>> s = pd.Series(['z', 'b', 'd', 'a', 'c'])
  2301. >>> s
  2302. 0 z
  2303. 1 b
  2304. 2 d
  2305. 3 a
  2306. 4 c
  2307. dtype: object
  2308. >>> s.sort_values()
  2309. 3 a
  2310. 1 b
  2311. 4 c
  2312. 2 d
  2313. 0 z
  2314. dtype: object
  2315. """
  2316. inplace = validate_bool_kwarg(inplace, 'inplace')
  2317. # Validate the axis parameter
  2318. self._get_axis_number(axis)
  2319. # GH 5856/5853
  2320. if inplace and self._is_cached:
  2321. raise ValueError("This Series is a view of some other array, to "
  2322. "sort in-place you must create a copy")
  2323. def _try_kind_sort(arr):
  2324. # easier to ask forgiveness than permission
  2325. try:
  2326. # if kind==mergesort, it can fail for object dtype
  2327. return arr.argsort(kind=kind)
  2328. except TypeError:
  2329. # stable sort not available for object dtype
  2330. # uses the argsort default quicksort
  2331. return arr.argsort(kind='quicksort')
  2332. arr = self._values
  2333. sortedIdx = np.empty(len(self), dtype=np.int32)
  2334. bad = isna(arr)
  2335. good = ~bad
  2336. idx = ibase.default_index(len(self))
  2337. argsorted = _try_kind_sort(arr[good])
  2338. if is_list_like(ascending):
  2339. if len(ascending) != 1:
  2340. raise ValueError('Length of ascending (%d) must be 1 '
  2341. 'for Series' % (len(ascending)))
  2342. ascending = ascending[0]
  2343. if not is_bool(ascending):
  2344. raise ValueError('ascending must be boolean')
  2345. if not ascending:
  2346. argsorted = argsorted[::-1]
  2347. if na_position == 'last':
  2348. n = good.sum()
  2349. sortedIdx[:n] = idx[good][argsorted]
  2350. sortedIdx[n:] = idx[bad]
  2351. elif na_position == 'first':
  2352. n = bad.sum()
  2353. sortedIdx[n:] = idx[good][argsorted]
  2354. sortedIdx[:n] = idx[bad]
  2355. else:
  2356. raise ValueError('invalid na_position: {!r}'.format(na_position))
  2357. result = self._constructor(arr[sortedIdx], index=self.index[sortedIdx])
  2358. if inplace:
  2359. self._update_inplace(result)
  2360. else:
  2361. return result.__finalize__(self)
  2362. def sort_index(self, axis=0, level=None, ascending=True, inplace=False,
  2363. kind='quicksort', na_position='last', sort_remaining=True):
  2364. """
  2365. Sort Series by index labels.
  2366. Returns a new Series sorted by label if `inplace` argument is
  2367. ``False``, otherwise updates the original series and returns None.
  2368. Parameters
  2369. ----------
  2370. axis : int, default 0
  2371. Axis to direct sorting. This can only be 0 for Series.
  2372. level : int, optional
  2373. If not None, sort on values in specified index level(s).
  2374. ascending : bool, default true
  2375. Sort ascending vs. descending.
  2376. inplace : bool, default False
  2377. If True, perform operation in-place.
  2378. kind : {'quicksort', 'mergesort', 'heapsort'}, default 'quicksort'
  2379. Choice of sorting algorithm. See also :func:`numpy.sort` for more
  2380. information. 'mergesort' is the only stable algorithm. For
  2381. DataFrames, this option is only applied when sorting on a single
  2382. column or label.
  2383. na_position : {'first', 'last'}, default 'last'
  2384. If 'first' puts NaNs at the beginning, 'last' puts NaNs at the end.
  2385. Not implemented for MultiIndex.
  2386. sort_remaining : bool, default True
  2387. If true and sorting by level and index is multilevel, sort by other
  2388. levels too (in order) after sorting by specified level.
  2389. Returns
  2390. -------
  2391. pandas.Series
  2392. The original Series sorted by the labels
  2393. See Also
  2394. --------
  2395. DataFrame.sort_index: Sort DataFrame by the index.
  2396. DataFrame.sort_values: Sort DataFrame by the value.
  2397. Series.sort_values : Sort Series by the value.
  2398. Examples
  2399. --------
  2400. >>> s = pd.Series(['a', 'b', 'c', 'd'], index=[3, 2, 1, 4])
  2401. >>> s.sort_index()
  2402. 1 c
  2403. 2 b
  2404. 3 a
  2405. 4 d
  2406. dtype: object
  2407. Sort Descending
  2408. >>> s.sort_index(ascending=False)
  2409. 4 d
  2410. 3 a
  2411. 2 b
  2412. 1 c
  2413. dtype: object
  2414. Sort Inplace
  2415. >>> s.sort_index(inplace=True)
  2416. >>> s
  2417. 1 c
  2418. 2 b
  2419. 3 a
  2420. 4 d
  2421. dtype: object
  2422. By default NaNs are put at the end, but use `na_position` to place
  2423. them at the beginning
  2424. >>> s = pd.Series(['a', 'b', 'c', 'd'], index=[3, 2, 1, np.nan])
  2425. >>> s.sort_index(na_position='first')
  2426. NaN d
  2427. 1.0 c
  2428. 2.0 b
  2429. 3.0 a
  2430. dtype: object
  2431. Specify index level to sort
  2432. >>> arrays = [np.array(['qux', 'qux', 'foo', 'foo',
  2433. ... 'baz', 'baz', 'bar', 'bar']),
  2434. ... np.array(['two', 'one', 'two', 'one',
  2435. ... 'two', 'one', 'two', 'one'])]
  2436. >>> s = pd.Series([1, 2, 3, 4, 5, 6, 7, 8], index=arrays)
  2437. >>> s.sort_index(level=1)
  2438. bar one 8
  2439. baz one 6
  2440. foo one 4
  2441. qux one 2
  2442. bar two 7
  2443. baz two 5
  2444. foo two 3
  2445. qux two 1
  2446. dtype: int64
  2447. Does not sort by remaining levels when sorting by levels
  2448. >>> s.sort_index(level=1, sort_remaining=False)
  2449. qux one 2
  2450. foo one 4
  2451. baz one 6
  2452. bar one 8
  2453. qux two 1
  2454. foo two 3
  2455. baz two 5
  2456. bar two 7
  2457. dtype: int64
  2458. """
  2459. # TODO: this can be combined with DataFrame.sort_index impl as
  2460. # almost identical
  2461. inplace = validate_bool_kwarg(inplace, 'inplace')
  2462. # Validate the axis parameter
  2463. self._get_axis_number(axis)
  2464. index = self.index
  2465. if level is not None:
  2466. new_index, indexer = index.sortlevel(level, ascending=ascending,
  2467. sort_remaining=sort_remaining)
  2468. elif isinstance(index, MultiIndex):
  2469. from pandas.core.sorting import lexsort_indexer
  2470. labels = index._sort_levels_monotonic()
  2471. indexer = lexsort_indexer(labels._get_codes_for_sorting(),
  2472. orders=ascending,
  2473. na_position=na_position)
  2474. else:
  2475. from pandas.core.sorting import nargsort
  2476. # Check monotonic-ness before sort an index
  2477. # GH11080
  2478. if ((ascending and index.is_monotonic_increasing) or
  2479. (not ascending and index.is_monotonic_decreasing)):
  2480. if inplace:
  2481. return
  2482. else:
  2483. return self.copy()
  2484. indexer = nargsort(index, kind=kind, ascending=ascending,
  2485. na_position=na_position)
  2486. indexer = ensure_platform_int(indexer)
  2487. new_index = index.take(indexer)
  2488. new_index = new_index._sort_levels_monotonic()
  2489. new_values = self._values.take(indexer)
  2490. result = self._constructor(new_values, index=new_index)
  2491. if inplace:
  2492. self._update_inplace(result)
  2493. else:
  2494. return result.__finalize__(self)
  2495. def argsort(self, axis=0, kind='quicksort', order=None):
  2496. """
  2497. Overrides ndarray.argsort. Argsorts the value, omitting NA/null values,
  2498. and places the result in the same locations as the non-NA values.
  2499. Parameters
  2500. ----------
  2501. axis : int
  2502. Has no effect but is accepted for compatibility with numpy.
  2503. kind : {'mergesort', 'quicksort', 'heapsort'}, default 'quicksort'
  2504. Choice of sorting algorithm. See np.sort for more
  2505. information. 'mergesort' is the only stable algorithm
  2506. order : None
  2507. Has no effect but is accepted for compatibility with numpy.
  2508. Returns
  2509. -------
  2510. argsorted : Series, with -1 indicated where nan values are present
  2511. See Also
  2512. --------
  2513. numpy.ndarray.argsort
  2514. """
  2515. values = self._values
  2516. mask = isna(values)
  2517. if mask.any():
  2518. result = Series(-1, index=self.index, name=self.name,
  2519. dtype='int64')
  2520. notmask = ~mask
  2521. result[notmask] = np.argsort(values[notmask], kind=kind)
  2522. return self._constructor(result,
  2523. index=self.index).__finalize__(self)
  2524. else:
  2525. return self._constructor(
  2526. np.argsort(values, kind=kind), index=self.index,
  2527. dtype='int64').__finalize__(self)
  2528. def nlargest(self, n=5, keep='first'):
  2529. """
  2530. Return the largest `n` elements.
  2531. Parameters
  2532. ----------
  2533. n : int, default 5
  2534. Return this many descending sorted values.
  2535. keep : {'first', 'last', 'all'}, default 'first'
  2536. When there are duplicate values that cannot all fit in a
  2537. Series of `n` elements:
  2538. - ``first`` : take the first occurrences based on the index order
  2539. - ``last`` : take the last occurrences based on the index order
  2540. - ``all`` : keep all occurrences. This can result in a Series of
  2541. size larger than `n`.
  2542. Returns
  2543. -------
  2544. Series
  2545. The `n` largest values in the Series, sorted in decreasing order.
  2546. See Also
  2547. --------
  2548. Series.nsmallest: Get the `n` smallest elements.
  2549. Series.sort_values: Sort Series by values.
  2550. Series.head: Return the first `n` rows.
  2551. Notes
  2552. -----
  2553. Faster than ``.sort_values(ascending=False).head(n)`` for small `n`
  2554. relative to the size of the ``Series`` object.
  2555. Examples
  2556. --------
  2557. >>> countries_population = {"Italy": 59000000, "France": 65000000,
  2558. ... "Malta": 434000, "Maldives": 434000,
  2559. ... "Brunei": 434000, "Iceland": 337000,
  2560. ... "Nauru": 11300, "Tuvalu": 11300,
  2561. ... "Anguilla": 11300, "Monserat": 5200}
  2562. >>> s = pd.Series(countries_population)
  2563. >>> s
  2564. Italy 59000000
  2565. France 65000000
  2566. Malta 434000
  2567. Maldives 434000
  2568. Brunei 434000
  2569. Iceland 337000
  2570. Nauru 11300
  2571. Tuvalu 11300
  2572. Anguilla 11300
  2573. Monserat 5200
  2574. dtype: int64
  2575. The `n` largest elements where ``n=5`` by default.
  2576. >>> s.nlargest()
  2577. France 65000000
  2578. Italy 59000000
  2579. Malta 434000
  2580. Maldives 434000
  2581. Brunei 434000
  2582. dtype: int64
  2583. The `n` largest elements where ``n=3``. Default `keep` value is 'first'
  2584. so Malta will be kept.
  2585. >>> s.nlargest(3)
  2586. France 65000000
  2587. Italy 59000000
  2588. Malta 434000
  2589. dtype: int64
  2590. The `n` largest elements where ``n=3`` and keeping the last duplicates.
  2591. Brunei will be kept since it is the last with value 434000 based on
  2592. the index order.
  2593. >>> s.nlargest(3, keep='last')
  2594. France 65000000
  2595. Italy 59000000
  2596. Brunei 434000
  2597. dtype: int64
  2598. The `n` largest elements where ``n=3`` with all duplicates kept. Note
  2599. that the returned Series has five elements due to the three duplicates.
  2600. >>> s.nlargest(3, keep='all')
  2601. France 65000000
  2602. Italy 59000000
  2603. Malta 434000
  2604. Maldives 434000
  2605. Brunei 434000
  2606. dtype: int64
  2607. """
  2608. return algorithms.SelectNSeries(self, n=n, keep=keep).nlargest()
  2609. def nsmallest(self, n=5, keep='first'):
  2610. """
  2611. Return the smallest `n` elements.
  2612. Parameters
  2613. ----------
  2614. n : int, default 5
  2615. Return this many ascending sorted values.
  2616. keep : {'first', 'last', 'all'}, default 'first'
  2617. When there are duplicate values that cannot all fit in a
  2618. Series of `n` elements:
  2619. - ``first`` : take the first occurrences based on the index order
  2620. - ``last`` : take the last occurrences based on the index order
  2621. - ``all`` : keep all occurrences. This can result in a Series of
  2622. size larger than `n`.
  2623. Returns
  2624. -------
  2625. Series
  2626. The `n` smallest values in the Series, sorted in increasing order.
  2627. See Also
  2628. --------
  2629. Series.nlargest: Get the `n` largest elements.
  2630. Series.sort_values: Sort Series by values.
  2631. Series.head: Return the first `n` rows.
  2632. Notes
  2633. -----
  2634. Faster than ``.sort_values().head(n)`` for small `n` relative to
  2635. the size of the ``Series`` object.
  2636. Examples
  2637. --------
  2638. >>> countries_population = {"Italy": 59000000, "France": 65000000,
  2639. ... "Brunei": 434000, "Malta": 434000,
  2640. ... "Maldives": 434000, "Iceland": 337000,
  2641. ... "Nauru": 11300, "Tuvalu": 11300,
  2642. ... "Anguilla": 11300, "Monserat": 5200}
  2643. >>> s = pd.Series(countries_population)
  2644. >>> s
  2645. Italy 59000000
  2646. France 65000000
  2647. Brunei 434000
  2648. Malta 434000
  2649. Maldives 434000
  2650. Iceland 337000
  2651. Nauru 11300
  2652. Tuvalu 11300
  2653. Anguilla 11300
  2654. Monserat 5200
  2655. dtype: int64
  2656. The `n` largest elements where ``n=5`` by default.
  2657. >>> s.nsmallest()
  2658. Monserat 5200
  2659. Nauru 11300
  2660. Tuvalu 11300
  2661. Anguilla 11300
  2662. Iceland 337000
  2663. dtype: int64
  2664. The `n` smallest elements where ``n=3``. Default `keep` value is
  2665. 'first' so Nauru and Tuvalu will be kept.
  2666. >>> s.nsmallest(3)
  2667. Monserat 5200
  2668. Nauru 11300
  2669. Tuvalu 11300
  2670. dtype: int64
  2671. The `n` smallest elements where ``n=3`` and keeping the last
  2672. duplicates. Anguilla and Tuvalu will be kept since they are the last
  2673. with value 11300 based on the index order.
  2674. >>> s.nsmallest(3, keep='last')
  2675. Monserat 5200
  2676. Anguilla 11300
  2677. Tuvalu 11300
  2678. dtype: int64
  2679. The `n` smallest elements where ``n=3`` with all duplicates kept. Note
  2680. that the returned Series has four elements due to the three duplicates.
  2681. >>> s.nsmallest(3, keep='all')
  2682. Monserat 5200
  2683. Nauru 11300
  2684. Tuvalu 11300
  2685. Anguilla 11300
  2686. dtype: int64
  2687. """
  2688. return algorithms.SelectNSeries(self, n=n, keep=keep).nsmallest()
  2689. def swaplevel(self, i=-2, j=-1, copy=True):
  2690. """
  2691. Swap levels i and j in a MultiIndex.
  2692. Parameters
  2693. ----------
  2694. i, j : int, string (can be mixed)
  2695. Level of index to be swapped. Can pass level name as string.
  2696. Returns
  2697. -------
  2698. swapped : Series
  2699. .. versionchanged:: 0.18.1
  2700. The indexes ``i`` and ``j`` are now optional, and default to
  2701. the two innermost levels of the index.
  2702. """
  2703. new_index = self.index.swaplevel(i, j)
  2704. return self._constructor(self._values, index=new_index,
  2705. copy=copy).__finalize__(self)
  2706. def reorder_levels(self, order):
  2707. """
  2708. Rearrange index levels using input order.
  2709. May not drop or duplicate levels.
  2710. Parameters
  2711. ----------
  2712. order : list of int representing new level order
  2713. (reference level by number or key)
  2714. Returns
  2715. -------
  2716. type of caller (new object)
  2717. """
  2718. if not isinstance(self.index, MultiIndex): # pragma: no cover
  2719. raise Exception('Can only reorder levels on a hierarchical axis.')
  2720. result = self.copy()
  2721. result.index = result.index.reorder_levels(order)
  2722. return result
  2723. def unstack(self, level=-1, fill_value=None):
  2724. """
  2725. Unstack, a.k.a. pivot, Series with MultiIndex to produce DataFrame.
  2726. The level involved will automatically get sorted.
  2727. Parameters
  2728. ----------
  2729. level : int, string, or list of these, default last level
  2730. Level(s) to unstack, can pass level name
  2731. fill_value : replace NaN with this value if the unstack produces
  2732. missing values
  2733. .. versionadded:: 0.18.0
  2734. Returns
  2735. -------
  2736. unstacked : DataFrame
  2737. Examples
  2738. --------
  2739. >>> s = pd.Series([1, 2, 3, 4],
  2740. ... index=pd.MultiIndex.from_product([['one', 'two'], ['a', 'b']]))
  2741. >>> s
  2742. one a 1
  2743. b 2
  2744. two a 3
  2745. b 4
  2746. dtype: int64
  2747. >>> s.unstack(level=-1)
  2748. a b
  2749. one 1 2
  2750. two 3 4
  2751. >>> s.unstack(level=0)
  2752. one two
  2753. a 1 3
  2754. b 2 4
  2755. """
  2756. from pandas.core.reshape.reshape import unstack
  2757. return unstack(self, level, fill_value)
  2758. # ----------------------------------------------------------------------
  2759. # function application
  2760. def map(self, arg, na_action=None):
  2761. """
  2762. Map values of Series according to input correspondence.
  2763. Used for substituting each value in a Series with another value,
  2764. that may be derived from a function, a ``dict`` or
  2765. a :class:`Series`.
  2766. Parameters
  2767. ----------
  2768. arg : function, dict, or Series
  2769. Mapping correspondence.
  2770. na_action : {None, 'ignore'}, default None
  2771. If 'ignore', propagate NaN values, without passing them to the
  2772. mapping correspondence.
  2773. Returns
  2774. -------
  2775. Series
  2776. Same index as caller.
  2777. See Also
  2778. --------
  2779. Series.apply : For applying more complex functions on a Series.
  2780. DataFrame.apply : Apply a function row-/column-wise.
  2781. DataFrame.applymap : Apply a function elementwise on a whole DataFrame.
  2782. Notes
  2783. -----
  2784. When ``arg`` is a dictionary, values in Series that are not in the
  2785. dictionary (as keys) are converted to ``NaN``. However, if the
  2786. dictionary is a ``dict`` subclass that defines ``__missing__`` (i.e.
  2787. provides a method for default values), then this default is used
  2788. rather than ``NaN``.
  2789. Examples
  2790. --------
  2791. >>> s = pd.Series(['cat', 'dog', np.nan, 'rabbit'])
  2792. >>> s
  2793. 0 cat
  2794. 1 dog
  2795. 2 NaN
  2796. 3 rabbit
  2797. dtype: object
  2798. ``map`` accepts a ``dict`` or a ``Series``. Values that are not found
  2799. in the ``dict`` are converted to ``NaN``, unless the dict has a default
  2800. value (e.g. ``defaultdict``):
  2801. >>> s.map({'cat': 'kitten', 'dog': 'puppy'})
  2802. 0 kitten
  2803. 1 puppy
  2804. 2 NaN
  2805. 3 NaN
  2806. dtype: object
  2807. It also accepts a function:
  2808. >>> s.map('I am a {}'.format)
  2809. 0 I am a cat
  2810. 1 I am a dog
  2811. 2 I am a nan
  2812. 3 I am a rabbit
  2813. dtype: object
  2814. To avoid applying the function to missing values (and keep them as
  2815. ``NaN``) ``na_action='ignore'`` can be used:
  2816. >>> s.map('I am a {}'.format, na_action='ignore')
  2817. 0 I am a cat
  2818. 1 I am a dog
  2819. 2 NaN
  2820. 3 I am a rabbit
  2821. dtype: object
  2822. """
  2823. new_values = super(Series, self)._map_values(
  2824. arg, na_action=na_action)
  2825. return self._constructor(new_values,
  2826. index=self.index).__finalize__(self)
  2827. def _gotitem(self, key, ndim, subset=None):
  2828. """
  2829. Sub-classes to define. Return a sliced object.
  2830. Parameters
  2831. ----------
  2832. key : string / list of selections
  2833. ndim : 1,2
  2834. requested ndim of result
  2835. subset : object, default None
  2836. subset to act on
  2837. """
  2838. return self
  2839. _agg_see_also_doc = dedent("""
  2840. See Also
  2841. --------
  2842. Series.apply : Invoke function on a Series.
  2843. Series.transform : Transform function producing a Series with like indexes.
  2844. """)
  2845. _agg_examples_doc = dedent("""
  2846. Examples
  2847. --------
  2848. >>> s = pd.Series([1, 2, 3, 4])
  2849. >>> s
  2850. 0 1
  2851. 1 2
  2852. 2 3
  2853. 3 4
  2854. dtype: int64
  2855. >>> s.agg('min')
  2856. 1
  2857. >>> s.agg(['min', 'max'])
  2858. min 1
  2859. max 4
  2860. dtype: int64
  2861. """)
  2862. @Substitution(see_also=_agg_see_also_doc,
  2863. examples=_agg_examples_doc,
  2864. versionadded='.. versionadded:: 0.20.0',
  2865. **_shared_doc_kwargs)
  2866. @Appender(generic._shared_docs['aggregate'])
  2867. def aggregate(self, func, axis=0, *args, **kwargs):
  2868. # Validate the axis parameter
  2869. self._get_axis_number(axis)
  2870. result, how = self._aggregate(func, *args, **kwargs)
  2871. if result is None:
  2872. # we can be called from an inner function which
  2873. # passes this meta-data
  2874. kwargs.pop('_axis', None)
  2875. kwargs.pop('_level', None)
  2876. # try a regular apply, this evaluates lambdas
  2877. # row-by-row; however if the lambda is expected a Series
  2878. # expression, e.g.: lambda x: x-x.quantile(0.25)
  2879. # this will fail, so we can try a vectorized evaluation
  2880. # we cannot FIRST try the vectorized evaluation, because
  2881. # then .agg and .apply would have different semantics if the
  2882. # operation is actually defined on the Series, e.g. str
  2883. try:
  2884. result = self.apply(func, *args, **kwargs)
  2885. except (ValueError, AttributeError, TypeError):
  2886. result = func(self, *args, **kwargs)
  2887. return result
  2888. agg = aggregate
  2889. @Appender(generic._shared_docs['transform'] % _shared_doc_kwargs)
  2890. def transform(self, func, axis=0, *args, **kwargs):
  2891. # Validate the axis parameter
  2892. self._get_axis_number(axis)
  2893. return super(Series, self).transform(func, *args, **kwargs)
  2894. def apply(self, func, convert_dtype=True, args=(), **kwds):
  2895. """
  2896. Invoke function on values of Series.
  2897. Can be ufunc (a NumPy function that applies to the entire Series)
  2898. or a Python function that only works on single values.
  2899. Parameters
  2900. ----------
  2901. func : function
  2902. Python function or NumPy ufunc to apply.
  2903. convert_dtype : bool, default True
  2904. Try to find better dtype for elementwise function results. If
  2905. False, leave as dtype=object.
  2906. args : tuple
  2907. Positional arguments passed to func after the series value.
  2908. **kwds
  2909. Additional keyword arguments passed to func.
  2910. Returns
  2911. -------
  2912. Series or DataFrame
  2913. If func returns a Series object the result will be a DataFrame.
  2914. See Also
  2915. --------
  2916. Series.map: For element-wise operations.
  2917. Series.agg: Only perform aggregating type operations.
  2918. Series.transform: Only perform transforming type operations.
  2919. Examples
  2920. --------
  2921. Create a series with typical summer temperatures for each city.
  2922. >>> s = pd.Series([20, 21, 12],
  2923. ... index=['London', 'New York', 'Helsinki'])
  2924. >>> s
  2925. London 20
  2926. New York 21
  2927. Helsinki 12
  2928. dtype: int64
  2929. Square the values by defining a function and passing it as an
  2930. argument to ``apply()``.
  2931. >>> def square(x):
  2932. ... return x ** 2
  2933. >>> s.apply(square)
  2934. London 400
  2935. New York 441
  2936. Helsinki 144
  2937. dtype: int64
  2938. Square the values by passing an anonymous function as an
  2939. argument to ``apply()``.
  2940. >>> s.apply(lambda x: x ** 2)
  2941. London 400
  2942. New York 441
  2943. Helsinki 144
  2944. dtype: int64
  2945. Define a custom function that needs additional positional
  2946. arguments and pass these additional arguments using the
  2947. ``args`` keyword.
  2948. >>> def subtract_custom_value(x, custom_value):
  2949. ... return x - custom_value
  2950. >>> s.apply(subtract_custom_value, args=(5,))
  2951. London 15
  2952. New York 16
  2953. Helsinki 7
  2954. dtype: int64
  2955. Define a custom function that takes keyword arguments
  2956. and pass these arguments to ``apply``.
  2957. >>> def add_custom_values(x, **kwargs):
  2958. ... for month in kwargs:
  2959. ... x += kwargs[month]
  2960. ... return x
  2961. >>> s.apply(add_custom_values, june=30, july=20, august=25)
  2962. London 95
  2963. New York 96
  2964. Helsinki 87
  2965. dtype: int64
  2966. Use a function from the Numpy library.
  2967. >>> s.apply(np.log)
  2968. London 2.995732
  2969. New York 3.044522
  2970. Helsinki 2.484907
  2971. dtype: float64
  2972. """
  2973. if len(self) == 0:
  2974. return self._constructor(dtype=self.dtype,
  2975. index=self.index).__finalize__(self)
  2976. # dispatch to agg
  2977. if isinstance(func, (list, dict)):
  2978. return self.aggregate(func, *args, **kwds)
  2979. # if we are a string, try to dispatch
  2980. if isinstance(func, compat.string_types):
  2981. return self._try_aggregate_string_function(func, *args, **kwds)
  2982. # handle ufuncs and lambdas
  2983. if kwds or args and not isinstance(func, np.ufunc):
  2984. def f(x):
  2985. return func(x, *args, **kwds)
  2986. else:
  2987. f = func
  2988. with np.errstate(all='ignore'):
  2989. if isinstance(f, np.ufunc):
  2990. return f(self)
  2991. # row-wise access
  2992. if is_extension_type(self.dtype):
  2993. mapped = self._values.map(f)
  2994. else:
  2995. values = self.astype(object).values
  2996. mapped = lib.map_infer(values, f, convert=convert_dtype)
  2997. if len(mapped) and isinstance(mapped[0], Series):
  2998. from pandas.core.frame import DataFrame
  2999. return DataFrame(mapped.tolist(), index=self.index)
  3000. else:
  3001. return self._constructor(mapped,
  3002. index=self.index).__finalize__(self)
  3003. def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
  3004. filter_type=None, **kwds):
  3005. """
  3006. Perform a reduction operation.
  3007. If we have an ndarray as a value, then simply perform the operation,
  3008. otherwise delegate to the object.
  3009. """
  3010. delegate = self._values
  3011. if axis is not None:
  3012. self._get_axis_number(axis)
  3013. if isinstance(delegate, Categorical):
  3014. # TODO deprecate numeric_only argument for Categorical and use
  3015. # skipna as well, see GH25303
  3016. return delegate._reduce(name, numeric_only=numeric_only, **kwds)
  3017. elif isinstance(delegate, ExtensionArray):
  3018. # dispatch to ExtensionArray interface
  3019. return delegate._reduce(name, skipna=skipna, **kwds)
  3020. elif is_datetime64_dtype(delegate):
  3021. # use DatetimeIndex implementation to handle skipna correctly
  3022. delegate = DatetimeIndex(delegate)
  3023. # dispatch to numpy arrays
  3024. elif isinstance(delegate, np.ndarray):
  3025. if numeric_only:
  3026. raise NotImplementedError('Series.{0} does not implement '
  3027. 'numeric_only.'.format(name))
  3028. with np.errstate(all='ignore'):
  3029. return op(delegate, skipna=skipna, **kwds)
  3030. # TODO(EA) dispatch to Index
  3031. # remove once all internals extension types are
  3032. # moved to ExtensionArrays
  3033. return delegate._reduce(op=op, name=name, axis=axis, skipna=skipna,
  3034. numeric_only=numeric_only,
  3035. filter_type=filter_type, **kwds)
  3036. def _reindex_indexer(self, new_index, indexer, copy):
  3037. if indexer is None:
  3038. if copy:
  3039. return self.copy()
  3040. return self
  3041. new_values = algorithms.take_1d(self._values, indexer,
  3042. allow_fill=True, fill_value=None)
  3043. return self._constructor(new_values, index=new_index)
  3044. def _needs_reindex_multi(self, axes, method, level):
  3045. """
  3046. Check if we do need a multi reindex; this is for compat with
  3047. higher dims.
  3048. """
  3049. return False
  3050. @Appender(generic._shared_docs['align'] % _shared_doc_kwargs)
  3051. def align(self, other, join='outer', axis=None, level=None, copy=True,
  3052. fill_value=None, method=None, limit=None, fill_axis=0,
  3053. broadcast_axis=None):
  3054. return super(Series, self).align(other, join=join, axis=axis,
  3055. level=level, copy=copy,
  3056. fill_value=fill_value, method=method,
  3057. limit=limit, fill_axis=fill_axis,
  3058. broadcast_axis=broadcast_axis)
  3059. def rename(self, index=None, **kwargs):
  3060. """
  3061. Alter Series index labels or name.
  3062. Function / dict values must be unique (1-to-1). Labels not contained in
  3063. a dict / Series will be left as-is. Extra labels listed don't throw an
  3064. error.
  3065. Alternatively, change ``Series.name`` with a scalar value.
  3066. See the :ref:`user guide <basics.rename>` for more.
  3067. Parameters
  3068. ----------
  3069. index : scalar, hashable sequence, dict-like or function, optional
  3070. dict-like or functions are transformations to apply to
  3071. the index.
  3072. Scalar or hashable sequence-like will alter the ``Series.name``
  3073. attribute.
  3074. copy : bool, default True
  3075. Also copy underlying data
  3076. inplace : bool, default False
  3077. Whether to return a new Series. If True then value of copy is
  3078. ignored.
  3079. level : int or level name, default None
  3080. In case of a MultiIndex, only rename labels in the specified
  3081. level.
  3082. Returns
  3083. -------
  3084. renamed : Series (new object)
  3085. See Also
  3086. --------
  3087. Series.rename_axis
  3088. Examples
  3089. --------
  3090. >>> s = pd.Series([1, 2, 3])
  3091. >>> s
  3092. 0 1
  3093. 1 2
  3094. 2 3
  3095. dtype: int64
  3096. >>> s.rename("my_name") # scalar, changes Series.name
  3097. 0 1
  3098. 1 2
  3099. 2 3
  3100. Name: my_name, dtype: int64
  3101. >>> s.rename(lambda x: x ** 2) # function, changes labels
  3102. 0 1
  3103. 1 2
  3104. 4 3
  3105. dtype: int64
  3106. >>> s.rename({1: 3, 2: 5}) # mapping, changes labels
  3107. 0 1
  3108. 3 2
  3109. 5 3
  3110. dtype: int64
  3111. """
  3112. kwargs['inplace'] = validate_bool_kwarg(kwargs.get('inplace', False),
  3113. 'inplace')
  3114. non_mapping = is_scalar(index) or (is_list_like(index) and
  3115. not is_dict_like(index))
  3116. if non_mapping:
  3117. return self._set_name(index, inplace=kwargs.get('inplace'))
  3118. return super(Series, self).rename(index=index, **kwargs)
  3119. @Substitution(**_shared_doc_kwargs)
  3120. @Appender(generic.NDFrame.reindex.__doc__)
  3121. def reindex(self, index=None, **kwargs):
  3122. return super(Series, self).reindex(index=index, **kwargs)
  3123. def drop(self, labels=None, axis=0, index=None, columns=None,
  3124. level=None, inplace=False, errors='raise'):
  3125. """
  3126. Return Series with specified index labels removed.
  3127. Remove elements of a Series based on specifying the index labels.
  3128. When using a multi-index, labels on different levels can be removed
  3129. by specifying the level.
  3130. Parameters
  3131. ----------
  3132. labels : single label or list-like
  3133. Index labels to drop.
  3134. axis : 0, default 0
  3135. Redundant for application on Series.
  3136. index, columns : None
  3137. Redundant for application on Series, but index can be used instead
  3138. of labels.
  3139. .. versionadded:: 0.21.0
  3140. level : int or level name, optional
  3141. For MultiIndex, level for which the labels will be removed.
  3142. inplace : bool, default False
  3143. If True, do operation inplace and return None.
  3144. errors : {'ignore', 'raise'}, default 'raise'
  3145. If 'ignore', suppress error and only existing labels are dropped.
  3146. Returns
  3147. -------
  3148. dropped : pandas.Series
  3149. Raises
  3150. ------
  3151. KeyError
  3152. If none of the labels are found in the index.
  3153. See Also
  3154. --------
  3155. Series.reindex : Return only specified index labels of Series.
  3156. Series.dropna : Return series without null values.
  3157. Series.drop_duplicates : Return Series with duplicate values removed.
  3158. DataFrame.drop : Drop specified labels from rows or columns.
  3159. Examples
  3160. --------
  3161. >>> s = pd.Series(data=np.arange(3), index=['A','B','C'])
  3162. >>> s
  3163. A 0
  3164. B 1
  3165. C 2
  3166. dtype: int64
  3167. Drop labels B en C
  3168. >>> s.drop(labels=['B','C'])
  3169. A 0
  3170. dtype: int64
  3171. Drop 2nd level label in MultiIndex Series
  3172. >>> midx = pd.MultiIndex(levels=[['lama', 'cow', 'falcon'],
  3173. ... ['speed', 'weight', 'length']],
  3174. ... codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2],
  3175. ... [0, 1, 2, 0, 1, 2, 0, 1, 2]])
  3176. >>> s = pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3],
  3177. ... index=midx)
  3178. >>> s
  3179. lama speed 45.0
  3180. weight 200.0
  3181. length 1.2
  3182. cow speed 30.0
  3183. weight 250.0
  3184. length 1.5
  3185. falcon speed 320.0
  3186. weight 1.0
  3187. length 0.3
  3188. dtype: float64
  3189. >>> s.drop(labels='weight', level=1)
  3190. lama speed 45.0
  3191. length 1.2
  3192. cow speed 30.0
  3193. length 1.5
  3194. falcon speed 320.0
  3195. length 0.3
  3196. dtype: float64
  3197. """
  3198. return super(Series, self).drop(labels=labels, axis=axis, index=index,
  3199. columns=columns, level=level,
  3200. inplace=inplace, errors=errors)
  3201. @Substitution(**_shared_doc_kwargs)
  3202. @Appender(generic.NDFrame.fillna.__doc__)
  3203. def fillna(self, value=None, method=None, axis=None, inplace=False,
  3204. limit=None, downcast=None, **kwargs):
  3205. return super(Series, self).fillna(value=value, method=method,
  3206. axis=axis, inplace=inplace,
  3207. limit=limit, downcast=downcast,
  3208. **kwargs)
  3209. @Appender(generic._shared_docs['replace'] % _shared_doc_kwargs)
  3210. def replace(self, to_replace=None, value=None, inplace=False, limit=None,
  3211. regex=False, method='pad'):
  3212. return super(Series, self).replace(to_replace=to_replace, value=value,
  3213. inplace=inplace, limit=limit,
  3214. regex=regex, method=method)
  3215. @Appender(generic._shared_docs['shift'] % _shared_doc_kwargs)
  3216. def shift(self, periods=1, freq=None, axis=0, fill_value=None):
  3217. return super(Series, self).shift(periods=periods, freq=freq, axis=axis,
  3218. fill_value=fill_value)
  3219. def reindex_axis(self, labels, axis=0, **kwargs):
  3220. """
  3221. Conform Series to new index with optional filling logic.
  3222. .. deprecated:: 0.21.0
  3223. Use ``Series.reindex`` instead.
  3224. """
  3225. # for compatibility with higher dims
  3226. if axis != 0:
  3227. raise ValueError("cannot reindex series on non-zero axis!")
  3228. msg = ("'.reindex_axis' is deprecated and will be removed in a future "
  3229. "version. Use '.reindex' instead.")
  3230. warnings.warn(msg, FutureWarning, stacklevel=2)
  3231. return self.reindex(index=labels, **kwargs)
  3232. def memory_usage(self, index=True, deep=False):
  3233. """
  3234. Return the memory usage of the Series.
  3235. The memory usage can optionally include the contribution of
  3236. the index and of elements of `object` dtype.
  3237. Parameters
  3238. ----------
  3239. index : bool, default True
  3240. Specifies whether to include the memory usage of the Series index.
  3241. deep : bool, default False
  3242. If True, introspect the data deeply by interrogating
  3243. `object` dtypes for system-level memory consumption, and include
  3244. it in the returned value.
  3245. Returns
  3246. -------
  3247. int
  3248. Bytes of memory consumed.
  3249. See Also
  3250. --------
  3251. numpy.ndarray.nbytes : Total bytes consumed by the elements of the
  3252. array.
  3253. DataFrame.memory_usage : Bytes consumed by a DataFrame.
  3254. Examples
  3255. --------
  3256. >>> s = pd.Series(range(3))
  3257. >>> s.memory_usage()
  3258. 104
  3259. Not including the index gives the size of the rest of the data, which
  3260. is necessarily smaller:
  3261. >>> s.memory_usage(index=False)
  3262. 24
  3263. The memory footprint of `object` values is ignored by default:
  3264. >>> s = pd.Series(["a", "b"])
  3265. >>> s.values
  3266. array(['a', 'b'], dtype=object)
  3267. >>> s.memory_usage()
  3268. 96
  3269. >>> s.memory_usage(deep=True)
  3270. 212
  3271. """
  3272. v = super(Series, self).memory_usage(deep=deep)
  3273. if index:
  3274. v += self.index.memory_usage(deep=deep)
  3275. return v
  3276. @Appender(generic.NDFrame._take.__doc__)
  3277. def _take(self, indices, axis=0, is_copy=False):
  3278. indices = ensure_platform_int(indices)
  3279. new_index = self.index.take(indices)
  3280. if is_categorical_dtype(self):
  3281. # https://github.com/pandas-dev/pandas/issues/20664
  3282. # TODO: remove when the default Categorical.take behavior changes
  3283. indices = maybe_convert_indices(indices, len(self._get_axis(axis)))
  3284. kwargs = {'allow_fill': False}
  3285. else:
  3286. kwargs = {}
  3287. new_values = self._values.take(indices, **kwargs)
  3288. result = (self._constructor(new_values, index=new_index,
  3289. fastpath=True).__finalize__(self))
  3290. # Maybe set copy if we didn't actually change the index.
  3291. if is_copy:
  3292. if not result._get_axis(axis).equals(self._get_axis(axis)):
  3293. result._set_is_copy(self)
  3294. return result
  3295. def isin(self, values):
  3296. """
  3297. Check whether `values` are contained in Series.
  3298. Return a boolean Series showing whether each element in the Series
  3299. matches an element in the passed sequence of `values` exactly.
  3300. Parameters
  3301. ----------
  3302. values : set or list-like
  3303. The sequence of values to test. Passing in a single string will
  3304. raise a ``TypeError``. Instead, turn a single string into a
  3305. list of one element.
  3306. .. versionadded:: 0.18.1
  3307. Support for values as a set.
  3308. Returns
  3309. -------
  3310. isin : Series (bool dtype)
  3311. Raises
  3312. ------
  3313. TypeError
  3314. * If `values` is a string
  3315. See Also
  3316. --------
  3317. DataFrame.isin : Equivalent method on DataFrame.
  3318. Examples
  3319. --------
  3320. >>> s = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama',
  3321. ... 'hippo'], name='animal')
  3322. >>> s.isin(['cow', 'lama'])
  3323. 0 True
  3324. 1 True
  3325. 2 True
  3326. 3 False
  3327. 4 True
  3328. 5 False
  3329. Name: animal, dtype: bool
  3330. Passing a single string as ``s.isin('lama')`` will raise an error. Use
  3331. a list of one element instead:
  3332. >>> s.isin(['lama'])
  3333. 0 True
  3334. 1 False
  3335. 2 True
  3336. 3 False
  3337. 4 True
  3338. 5 False
  3339. Name: animal, dtype: bool
  3340. """
  3341. result = algorithms.isin(self, values)
  3342. return self._constructor(result, index=self.index).__finalize__(self)
  3343. def between(self, left, right, inclusive=True):
  3344. """
  3345. Return boolean Series equivalent to left <= series <= right.
  3346. This function returns a boolean vector containing `True` wherever the
  3347. corresponding Series element is between the boundary values `left` and
  3348. `right`. NA values are treated as `False`.
  3349. Parameters
  3350. ----------
  3351. left : scalar
  3352. Left boundary.
  3353. right : scalar
  3354. Right boundary.
  3355. inclusive : bool, default True
  3356. Include boundaries.
  3357. Returns
  3358. -------
  3359. Series
  3360. Each element will be a boolean.
  3361. See Also
  3362. --------
  3363. Series.gt : Greater than of series and other.
  3364. Series.lt : Less than of series and other.
  3365. Notes
  3366. -----
  3367. This function is equivalent to ``(left <= ser) & (ser <= right)``
  3368. Examples
  3369. --------
  3370. >>> s = pd.Series([2, 0, 4, 8, np.nan])
  3371. Boundary values are included by default:
  3372. >>> s.between(1, 4)
  3373. 0 True
  3374. 1 False
  3375. 2 True
  3376. 3 False
  3377. 4 False
  3378. dtype: bool
  3379. With `inclusive` set to ``False`` boundary values are excluded:
  3380. >>> s.between(1, 4, inclusive=False)
  3381. 0 True
  3382. 1 False
  3383. 2 False
  3384. 3 False
  3385. 4 False
  3386. dtype: bool
  3387. `left` and `right` can be any scalar value:
  3388. >>> s = pd.Series(['Alice', 'Bob', 'Carol', 'Eve'])
  3389. >>> s.between('Anna', 'Daniel')
  3390. 0 False
  3391. 1 True
  3392. 2 True
  3393. 3 False
  3394. dtype: bool
  3395. """
  3396. if inclusive:
  3397. lmask = self >= left
  3398. rmask = self <= right
  3399. else:
  3400. lmask = self > left
  3401. rmask = self < right
  3402. return lmask & rmask
  3403. @classmethod
  3404. def from_csv(cls, path, sep=',', parse_dates=True, header=None,
  3405. index_col=0, encoding=None, infer_datetime_format=False):
  3406. """
  3407. Read CSV file.
  3408. .. deprecated:: 0.21.0
  3409. Use :func:`pandas.read_csv` instead.
  3410. It is preferable to use the more powerful :func:`pandas.read_csv`
  3411. for most general purposes, but ``from_csv`` makes for an easy
  3412. roundtrip to and from a file (the exact counterpart of
  3413. ``to_csv``), especially with a time Series.
  3414. This method only differs from :func:`pandas.read_csv` in some defaults:
  3415. - `index_col` is ``0`` instead of ``None`` (take first column as index
  3416. by default)
  3417. - `header` is ``None`` instead of ``0`` (the first row is not used as
  3418. the column names)
  3419. - `parse_dates` is ``True`` instead of ``False`` (try parsing the index
  3420. as datetime by default)
  3421. With :func:`pandas.read_csv`, the option ``squeeze=True`` can be used
  3422. to return a Series like ``from_csv``.
  3423. Parameters
  3424. ----------
  3425. path : string file path or file handle / StringIO
  3426. sep : string, default ','
  3427. Field delimiter
  3428. parse_dates : boolean, default True
  3429. Parse dates. Different default from read_table
  3430. header : int, default None
  3431. Row to use as header (skip prior rows)
  3432. index_col : int or sequence, default 0
  3433. Column to use for index. If a sequence is given, a MultiIndex
  3434. is used. Different default from read_table
  3435. encoding : string, optional
  3436. a string representing the encoding to use if the contents are
  3437. non-ascii, for python versions prior to 3
  3438. infer_datetime_format : boolean, default False
  3439. If True and `parse_dates` is True for a column, try to infer the
  3440. datetime format based on the first datetime string. If the format
  3441. can be inferred, there often will be a large parsing speed-up.
  3442. Returns
  3443. -------
  3444. y : Series
  3445. See Also
  3446. --------
  3447. read_csv
  3448. """
  3449. # We're calling `DataFrame.from_csv` in the implementation,
  3450. # which will propagate a warning regarding `from_csv` deprecation.
  3451. from pandas.core.frame import DataFrame
  3452. df = DataFrame.from_csv(path, header=header, index_col=index_col,
  3453. sep=sep, parse_dates=parse_dates,
  3454. encoding=encoding,
  3455. infer_datetime_format=infer_datetime_format)
  3456. result = df.iloc[:, 0]
  3457. if header is None:
  3458. result.index.name = result.name = None
  3459. return result
  3460. @Appender(generic.NDFrame.to_csv.__doc__)
  3461. def to_csv(self, *args, **kwargs):
  3462. names = ["path_or_buf", "sep", "na_rep", "float_format", "columns",
  3463. "header", "index", "index_label", "mode", "encoding",
  3464. "compression", "quoting", "quotechar", "line_terminator",
  3465. "chunksize", "tupleize_cols", "date_format", "doublequote",
  3466. "escapechar", "decimal"]
  3467. old_names = ["path_or_buf", "index", "sep", "na_rep", "float_format",
  3468. "header", "index_label", "mode", "encoding",
  3469. "compression", "date_format", "decimal"]
  3470. if "path" in kwargs:
  3471. warnings.warn("The signature of `Series.to_csv` was aligned "
  3472. "to that of `DataFrame.to_csv`, and argument "
  3473. "'path' will be renamed to 'path_or_buf'.",
  3474. FutureWarning, stacklevel=2)
  3475. kwargs["path_or_buf"] = kwargs.pop("path")
  3476. if len(args) > 1:
  3477. # Either "index" (old signature) or "sep" (new signature) is being
  3478. # passed as second argument (while the first is the same)
  3479. maybe_sep = args[1]
  3480. if not (is_string_like(maybe_sep) and len(maybe_sep) == 1):
  3481. # old signature
  3482. warnings.warn("The signature of `Series.to_csv` was aligned "
  3483. "to that of `DataFrame.to_csv`. Note that the "
  3484. "order of arguments changed, and the new one "
  3485. "has 'sep' in first place, for which \"{}\" is "
  3486. "not a valid value. The old order will cease to "
  3487. "be supported in a future version. Please refer "
  3488. "to the documentation for `DataFrame.to_csv` "
  3489. "when updating your function "
  3490. "calls.".format(maybe_sep),
  3491. FutureWarning, stacklevel=2)
  3492. names = old_names
  3493. pos_args = dict(zip(names[:len(args)], args))
  3494. for key in pos_args:
  3495. if key in kwargs:
  3496. raise ValueError("Argument given by name ('{}') and position "
  3497. "({})".format(key, names.index(key)))
  3498. kwargs[key] = pos_args[key]
  3499. if kwargs.get("header", None) is None:
  3500. warnings.warn("The signature of `Series.to_csv` was aligned "
  3501. "to that of `DataFrame.to_csv`, and argument "
  3502. "'header' will change its default value from False "
  3503. "to True: please pass an explicit value to suppress "
  3504. "this warning.", FutureWarning,
  3505. stacklevel=2)
  3506. kwargs["header"] = False # Backwards compatibility.
  3507. return self.to_frame().to_csv(**kwargs)
  3508. @Appender(generic._shared_docs['isna'] % _shared_doc_kwargs)
  3509. def isna(self):
  3510. return super(Series, self).isna()
  3511. @Appender(generic._shared_docs['isna'] % _shared_doc_kwargs)
  3512. def isnull(self):
  3513. return super(Series, self).isnull()
  3514. @Appender(generic._shared_docs['notna'] % _shared_doc_kwargs)
  3515. def notna(self):
  3516. return super(Series, self).notna()
  3517. @Appender(generic._shared_docs['notna'] % _shared_doc_kwargs)
  3518. def notnull(self):
  3519. return super(Series, self).notnull()
  3520. def dropna(self, axis=0, inplace=False, **kwargs):
  3521. """
  3522. Return a new Series with missing values removed.
  3523. See the :ref:`User Guide <missing_data>` for more on which values are
  3524. considered missing, and how to work with missing data.
  3525. Parameters
  3526. ----------
  3527. axis : {0 or 'index'}, default 0
  3528. There is only one axis to drop values from.
  3529. inplace : bool, default False
  3530. If True, do operation inplace and return None.
  3531. **kwargs
  3532. Not in use.
  3533. Returns
  3534. -------
  3535. Series
  3536. Series with NA entries dropped from it.
  3537. See Also
  3538. --------
  3539. Series.isna: Indicate missing values.
  3540. Series.notna : Indicate existing (non-missing) values.
  3541. Series.fillna : Replace missing values.
  3542. DataFrame.dropna : Drop rows or columns which contain NA values.
  3543. Index.dropna : Drop missing indices.
  3544. Examples
  3545. --------
  3546. >>> ser = pd.Series([1., 2., np.nan])
  3547. >>> ser
  3548. 0 1.0
  3549. 1 2.0
  3550. 2 NaN
  3551. dtype: float64
  3552. Drop NA values from a Series.
  3553. >>> ser.dropna()
  3554. 0 1.0
  3555. 1 2.0
  3556. dtype: float64
  3557. Keep the Series with valid entries in the same variable.
  3558. >>> ser.dropna(inplace=True)
  3559. >>> ser
  3560. 0 1.0
  3561. 1 2.0
  3562. dtype: float64
  3563. Empty strings are not considered NA values. ``None`` is considered an
  3564. NA value.
  3565. >>> ser = pd.Series([np.NaN, 2, pd.NaT, '', None, 'I stay'])
  3566. >>> ser
  3567. 0 NaN
  3568. 1 2
  3569. 2 NaT
  3570. 3
  3571. 4 None
  3572. 5 I stay
  3573. dtype: object
  3574. >>> ser.dropna()
  3575. 1 2
  3576. 3
  3577. 5 I stay
  3578. dtype: object
  3579. """
  3580. inplace = validate_bool_kwarg(inplace, 'inplace')
  3581. kwargs.pop('how', None)
  3582. if kwargs:
  3583. raise TypeError('dropna() got an unexpected keyword '
  3584. 'argument "{0}"'.format(list(kwargs.keys())[0]))
  3585. # Validate the axis parameter
  3586. self._get_axis_number(axis or 0)
  3587. if self._can_hold_na:
  3588. result = remove_na_arraylike(self)
  3589. if inplace:
  3590. self._update_inplace(result)
  3591. else:
  3592. return result
  3593. else:
  3594. if inplace:
  3595. # do nothing
  3596. pass
  3597. else:
  3598. return self.copy()
  3599. def valid(self, inplace=False, **kwargs):
  3600. """
  3601. Return Series without null values.
  3602. .. deprecated:: 0.23.0
  3603. Use :meth:`Series.dropna` instead.
  3604. """
  3605. warnings.warn("Method .valid will be removed in a future version. "
  3606. "Use .dropna instead.", FutureWarning, stacklevel=2)
  3607. return self.dropna(inplace=inplace, **kwargs)
  3608. # ----------------------------------------------------------------------
  3609. # Time series-oriented methods
  3610. def to_timestamp(self, freq=None, how='start', copy=True):
  3611. """
  3612. Cast to datetimeindex of timestamps, at *beginning* of period.
  3613. Parameters
  3614. ----------
  3615. freq : string, default frequency of PeriodIndex
  3616. Desired frequency
  3617. how : {'s', 'e', 'start', 'end'}
  3618. Convention for converting period to timestamp; start of period
  3619. vs. end
  3620. Returns
  3621. -------
  3622. ts : Series with DatetimeIndex
  3623. """
  3624. new_values = self._values
  3625. if copy:
  3626. new_values = new_values.copy()
  3627. new_index = self.index.to_timestamp(freq=freq, how=how)
  3628. return self._constructor(new_values,
  3629. index=new_index).__finalize__(self)
  3630. def to_period(self, freq=None, copy=True):
  3631. """
  3632. Convert Series from DatetimeIndex to PeriodIndex with desired
  3633. frequency (inferred from index if not passed).
  3634. Parameters
  3635. ----------
  3636. freq : string, default
  3637. Returns
  3638. -------
  3639. ts : Series with PeriodIndex
  3640. """
  3641. new_values = self._values
  3642. if copy:
  3643. new_values = new_values.copy()
  3644. new_index = self.index.to_period(freq=freq)
  3645. return self._constructor(new_values,
  3646. index=new_index).__finalize__(self)
  3647. # ----------------------------------------------------------------------
  3648. # Accessor Methods
  3649. # ----------------------------------------------------------------------
  3650. str = CachedAccessor("str", StringMethods)
  3651. dt = CachedAccessor("dt", CombinedDatetimelikeProperties)
  3652. cat = CachedAccessor("cat", CategoricalAccessor)
  3653. plot = CachedAccessor("plot", gfx.SeriesPlotMethods)
  3654. sparse = CachedAccessor("sparse", SparseAccessor)
  3655. # ----------------------------------------------------------------------
  3656. # Add plotting methods to Series
  3657. hist = gfx.hist_series
  3658. Series._setup_axes(['index'], info_axis=0, stat_axis=0, aliases={'rows': 0},
  3659. docs={'index': 'The index (axis labels) of the Series.'})
  3660. Series._add_numeric_operations()
  3661. Series._add_series_only_operations()
  3662. Series._add_series_or_dataframe_operations()
  3663. # Add arithmetic!
  3664. ops.add_flex_arithmetic_methods(Series)
  3665. ops.add_special_arithmetic_methods(Series)