test_sql.py 97 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708
  1. """SQL io tests
  2. The SQL tests are broken down in different classes:
  3. - `PandasSQLTest`: base class with common methods for all test classes
  4. - Tests for the public API (only tests with sqlite3)
  5. - `_TestSQLApi` base class
  6. - `TestSQLApi`: test the public API with sqlalchemy engine
  7. - `TestSQLiteFallbackApi`: test the public API with a sqlite DBAPI
  8. connection
  9. - Tests for the different SQL flavors (flavor specific type conversions)
  10. - Tests for the sqlalchemy mode: `_TestSQLAlchemy` is the base class with
  11. common methods, `_TestSQLAlchemyConn` tests the API with a SQLAlchemy
  12. Connection object. The different tested flavors (sqlite3, MySQL,
  13. PostgreSQL) derive from the base class
  14. - Tests for the fallback mode (`TestSQLiteFallback`)
  15. """
  16. from __future__ import print_function
  17. import csv
  18. from datetime import date, datetime, time
  19. import sqlite3
  20. import warnings
  21. import numpy as np
  22. import pytest
  23. import pandas.compat as compat
  24. from pandas.compat import PY36, lrange, range, string_types
  25. from pandas.core.dtypes.common import (
  26. is_datetime64_dtype, is_datetime64tz_dtype)
  27. import pandas as pd
  28. from pandas import (
  29. DataFrame, Index, MultiIndex, Series, Timestamp, concat, date_range, isna,
  30. to_datetime, to_timedelta)
  31. import pandas.util.testing as tm
  32. import pandas.io.sql as sql
  33. from pandas.io.sql import read_sql_query, read_sql_table
  34. try:
  35. import sqlalchemy
  36. import sqlalchemy.schema
  37. import sqlalchemy.sql.sqltypes as sqltypes
  38. from sqlalchemy.ext import declarative
  39. from sqlalchemy.orm import session as sa_session
  40. SQLALCHEMY_INSTALLED = True
  41. except ImportError:
  42. SQLALCHEMY_INSTALLED = False
  43. SQL_STRINGS = {
  44. 'create_iris': {
  45. 'sqlite': """CREATE TABLE iris (
  46. "SepalLength" REAL,
  47. "SepalWidth" REAL,
  48. "PetalLength" REAL,
  49. "PetalWidth" REAL,
  50. "Name" TEXT
  51. )""",
  52. 'mysql': """CREATE TABLE iris (
  53. `SepalLength` DOUBLE,
  54. `SepalWidth` DOUBLE,
  55. `PetalLength` DOUBLE,
  56. `PetalWidth` DOUBLE,
  57. `Name` VARCHAR(200)
  58. )""",
  59. 'postgresql': """CREATE TABLE iris (
  60. "SepalLength" DOUBLE PRECISION,
  61. "SepalWidth" DOUBLE PRECISION,
  62. "PetalLength" DOUBLE PRECISION,
  63. "PetalWidth" DOUBLE PRECISION,
  64. "Name" VARCHAR(200)
  65. )"""
  66. },
  67. 'insert_iris': {
  68. 'sqlite': """INSERT INTO iris VALUES(?, ?, ?, ?, ?)""",
  69. 'mysql': """INSERT INTO iris VALUES(%s, %s, %s, %s, "%s");""",
  70. 'postgresql': """INSERT INTO iris VALUES(%s, %s, %s, %s, %s);"""
  71. },
  72. 'create_test_types': {
  73. 'sqlite': """CREATE TABLE types_test_data (
  74. "TextCol" TEXT,
  75. "DateCol" TEXT,
  76. "IntDateCol" INTEGER,
  77. "IntDateOnlyCol" INTEGER,
  78. "FloatCol" REAL,
  79. "IntCol" INTEGER,
  80. "BoolCol" INTEGER,
  81. "IntColWithNull" INTEGER,
  82. "BoolColWithNull" INTEGER
  83. )""",
  84. 'mysql': """CREATE TABLE types_test_data (
  85. `TextCol` TEXT,
  86. `DateCol` DATETIME,
  87. `IntDateCol` INTEGER,
  88. `IntDateOnlyCol` INTEGER,
  89. `FloatCol` DOUBLE,
  90. `IntCol` INTEGER,
  91. `BoolCol` BOOLEAN,
  92. `IntColWithNull` INTEGER,
  93. `BoolColWithNull` BOOLEAN
  94. )""",
  95. 'postgresql': """CREATE TABLE types_test_data (
  96. "TextCol" TEXT,
  97. "DateCol" TIMESTAMP,
  98. "DateColWithTz" TIMESTAMP WITH TIME ZONE,
  99. "IntDateCol" INTEGER,
  100. "IntDateOnlyCol" INTEGER,
  101. "FloatCol" DOUBLE PRECISION,
  102. "IntCol" INTEGER,
  103. "BoolCol" BOOLEAN,
  104. "IntColWithNull" INTEGER,
  105. "BoolColWithNull" BOOLEAN
  106. )"""
  107. },
  108. 'insert_test_types': {
  109. 'sqlite': {
  110. 'query': """
  111. INSERT INTO types_test_data
  112. VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?)
  113. """,
  114. 'fields': (
  115. 'TextCol', 'DateCol', 'IntDateCol', 'IntDateOnlyCol',
  116. 'FloatCol', 'IntCol', 'BoolCol', 'IntColWithNull',
  117. 'BoolColWithNull'
  118. )
  119. },
  120. 'mysql': {
  121. 'query': """
  122. INSERT INTO types_test_data
  123. VALUES("%s", %s, %s, %s, %s, %s, %s, %s, %s)
  124. """,
  125. 'fields': (
  126. 'TextCol', 'DateCol', 'IntDateCol', 'IntDateOnlyCol',
  127. 'FloatCol', 'IntCol', 'BoolCol', 'IntColWithNull',
  128. 'BoolColWithNull'
  129. )
  130. },
  131. 'postgresql': {
  132. 'query': """
  133. INSERT INTO types_test_data
  134. VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
  135. """,
  136. 'fields': (
  137. 'TextCol', 'DateCol', 'DateColWithTz',
  138. 'IntDateCol', 'IntDateOnlyCol', 'FloatCol',
  139. 'IntCol', 'BoolCol', 'IntColWithNull', 'BoolColWithNull'
  140. )
  141. },
  142. },
  143. 'read_parameters': {
  144. 'sqlite': "SELECT * FROM iris WHERE Name=? AND SepalLength=?",
  145. 'mysql': 'SELECT * FROM iris WHERE `Name`="%s" AND `SepalLength`=%s',
  146. 'postgresql': 'SELECT * FROM iris WHERE "Name"=%s AND "SepalLength"=%s'
  147. },
  148. 'read_named_parameters': {
  149. 'sqlite': """
  150. SELECT * FROM iris WHERE Name=:name AND SepalLength=:length
  151. """,
  152. 'mysql': """
  153. SELECT * FROM iris WHERE
  154. `Name`="%(name)s" AND `SepalLength`=%(length)s
  155. """,
  156. 'postgresql': """
  157. SELECT * FROM iris WHERE
  158. "Name"=%(name)s AND "SepalLength"=%(length)s
  159. """
  160. },
  161. 'create_view': {
  162. 'sqlite': """
  163. CREATE VIEW iris_view AS
  164. SELECT * FROM iris
  165. """
  166. }
  167. }
  168. class MixInBase(object):
  169. def teardown_method(self, method):
  170. # if setup fails, there may not be a connection to close.
  171. if hasattr(self, 'conn'):
  172. for tbl in self._get_all_tables():
  173. self.drop_table(tbl)
  174. self._close_conn()
  175. class MySQLMixIn(MixInBase):
  176. def drop_table(self, table_name):
  177. cur = self.conn.cursor()
  178. cur.execute("DROP TABLE IF EXISTS %s" %
  179. sql._get_valid_mysql_name(table_name))
  180. self.conn.commit()
  181. def _get_all_tables(self):
  182. cur = self.conn.cursor()
  183. cur.execute('SHOW TABLES')
  184. return [table[0] for table in cur.fetchall()]
  185. def _close_conn(self):
  186. from pymysql.err import Error
  187. try:
  188. self.conn.close()
  189. except Error:
  190. pass
  191. class SQLiteMixIn(MixInBase):
  192. def drop_table(self, table_name):
  193. self.conn.execute("DROP TABLE IF EXISTS %s" %
  194. sql._get_valid_sqlite_name(table_name))
  195. self.conn.commit()
  196. def _get_all_tables(self):
  197. c = self.conn.execute(
  198. "SELECT name FROM sqlite_master WHERE type='table'")
  199. return [table[0] for table in c.fetchall()]
  200. def _close_conn(self):
  201. self.conn.close()
  202. class SQLAlchemyMixIn(MixInBase):
  203. def drop_table(self, table_name):
  204. sql.SQLDatabase(self.conn).drop_table(table_name)
  205. def _get_all_tables(self):
  206. meta = sqlalchemy.schema.MetaData(bind=self.conn)
  207. meta.reflect()
  208. table_list = meta.tables.keys()
  209. return table_list
  210. def _close_conn(self):
  211. pass
  212. class PandasSQLTest(object):
  213. """
  214. Base class with common private methods for SQLAlchemy and fallback cases.
  215. """
  216. def _get_exec(self):
  217. if hasattr(self.conn, 'execute'):
  218. return self.conn
  219. else:
  220. return self.conn.cursor()
  221. @pytest.fixture(params=[('io', 'data', 'iris.csv')])
  222. def load_iris_data(self, datapath, request):
  223. import io
  224. iris_csv_file = datapath(*request.param)
  225. if not hasattr(self, 'conn'):
  226. self.setup_connect()
  227. self.drop_table('iris')
  228. self._get_exec().execute(SQL_STRINGS['create_iris'][self.flavor])
  229. with io.open(iris_csv_file, mode='r', newline=None) as iris_csv:
  230. r = csv.reader(iris_csv)
  231. next(r) # skip header row
  232. ins = SQL_STRINGS['insert_iris'][self.flavor]
  233. for row in r:
  234. self._get_exec().execute(ins, row)
  235. def _load_iris_view(self):
  236. self.drop_table('iris_view')
  237. self._get_exec().execute(SQL_STRINGS['create_view'][self.flavor])
  238. def _check_iris_loaded_frame(self, iris_frame):
  239. pytype = iris_frame.dtypes[0].type
  240. row = iris_frame.iloc[0]
  241. assert issubclass(pytype, np.floating)
  242. tm.equalContents(row.values, [5.1, 3.5, 1.4, 0.2, 'Iris-setosa'])
  243. def _load_test1_data(self):
  244. columns = ['index', 'A', 'B', 'C', 'D']
  245. data = [(
  246. '2000-01-03 00:00:00', 0.980268513777, 3.68573087906,
  247. -0.364216805298, -1.15973806169),
  248. ('2000-01-04 00:00:00', 1.04791624281, -
  249. 0.0412318367011, -0.16181208307, 0.212549316967),
  250. ('2000-01-05 00:00:00', 0.498580885705,
  251. 0.731167677815, -0.537677223318, 1.34627041952),
  252. ('2000-01-06 00:00:00', 1.12020151869, 1.56762092543,
  253. 0.00364077397681, 0.67525259227)]
  254. self.test_frame1 = DataFrame(data, columns=columns)
  255. def _load_test2_data(self):
  256. df = DataFrame(dict(A=[4, 1, 3, 6],
  257. B=['asd', 'gsq', 'ylt', 'jkl'],
  258. C=[1.1, 3.1, 6.9, 5.3],
  259. D=[False, True, True, False],
  260. E=['1990-11-22', '1991-10-26',
  261. '1993-11-26', '1995-12-12']))
  262. df['E'] = to_datetime(df['E'])
  263. self.test_frame2 = df
  264. def _load_test3_data(self):
  265. columns = ['index', 'A', 'B']
  266. data = [(
  267. '2000-01-03 00:00:00', 2 ** 31 - 1, -1.987670),
  268. ('2000-01-04 00:00:00', -29, -0.0412318367011),
  269. ('2000-01-05 00:00:00', 20000, 0.731167677815),
  270. ('2000-01-06 00:00:00', -290867, 1.56762092543)]
  271. self.test_frame3 = DataFrame(data, columns=columns)
  272. def _load_raw_sql(self):
  273. self.drop_table('types_test_data')
  274. self._get_exec().execute(SQL_STRINGS['create_test_types'][self.flavor])
  275. ins = SQL_STRINGS['insert_test_types'][self.flavor]
  276. data = [
  277. {
  278. 'TextCol': 'first',
  279. 'DateCol': '2000-01-03 00:00:00',
  280. 'DateColWithTz': '2000-01-01 00:00:00-08:00',
  281. 'IntDateCol': 535852800,
  282. 'IntDateOnlyCol': 20101010,
  283. 'FloatCol': 10.10,
  284. 'IntCol': 1,
  285. 'BoolCol': False,
  286. 'IntColWithNull': 1,
  287. 'BoolColWithNull': False,
  288. },
  289. {
  290. 'TextCol': 'first',
  291. 'DateCol': '2000-01-04 00:00:00',
  292. 'DateColWithTz': '2000-06-01 00:00:00-07:00',
  293. 'IntDateCol': 1356998400,
  294. 'IntDateOnlyCol': 20101212,
  295. 'FloatCol': 10.10,
  296. 'IntCol': 1,
  297. 'BoolCol': False,
  298. 'IntColWithNull': None,
  299. 'BoolColWithNull': None,
  300. },
  301. ]
  302. for d in data:
  303. self._get_exec().execute(
  304. ins['query'],
  305. [d[field] for field in ins['fields']]
  306. )
  307. def _count_rows(self, table_name):
  308. result = self._get_exec().execute(
  309. "SELECT count(*) AS count_1 FROM %s" % table_name).fetchone()
  310. return result[0]
  311. def _read_sql_iris(self):
  312. iris_frame = self.pandasSQL.read_query("SELECT * FROM iris")
  313. self._check_iris_loaded_frame(iris_frame)
  314. def _read_sql_iris_parameter(self):
  315. query = SQL_STRINGS['read_parameters'][self.flavor]
  316. params = ['Iris-setosa', 5.1]
  317. iris_frame = self.pandasSQL.read_query(query, params=params)
  318. self._check_iris_loaded_frame(iris_frame)
  319. def _read_sql_iris_named_parameter(self):
  320. query = SQL_STRINGS['read_named_parameters'][self.flavor]
  321. params = {'name': 'Iris-setosa', 'length': 5.1}
  322. iris_frame = self.pandasSQL.read_query(query, params=params)
  323. self._check_iris_loaded_frame(iris_frame)
  324. def _to_sql(self, method=None):
  325. self.drop_table('test_frame1')
  326. self.pandasSQL.to_sql(self.test_frame1, 'test_frame1', method=method)
  327. assert self.pandasSQL.has_table('test_frame1')
  328. num_entries = len(self.test_frame1)
  329. num_rows = self._count_rows('test_frame1')
  330. assert num_rows == num_entries
  331. # Nuke table
  332. self.drop_table('test_frame1')
  333. def _to_sql_empty(self):
  334. self.drop_table('test_frame1')
  335. self.pandasSQL.to_sql(self.test_frame1.iloc[:0], 'test_frame1')
  336. def _to_sql_fail(self):
  337. self.drop_table('test_frame1')
  338. self.pandasSQL.to_sql(
  339. self.test_frame1, 'test_frame1', if_exists='fail')
  340. assert self.pandasSQL.has_table('test_frame1')
  341. pytest.raises(ValueError, self.pandasSQL.to_sql,
  342. self.test_frame1, 'test_frame1', if_exists='fail')
  343. self.drop_table('test_frame1')
  344. def _to_sql_replace(self):
  345. self.drop_table('test_frame1')
  346. self.pandasSQL.to_sql(
  347. self.test_frame1, 'test_frame1', if_exists='fail')
  348. # Add to table again
  349. self.pandasSQL.to_sql(
  350. self.test_frame1, 'test_frame1', if_exists='replace')
  351. assert self.pandasSQL.has_table('test_frame1')
  352. num_entries = len(self.test_frame1)
  353. num_rows = self._count_rows('test_frame1')
  354. assert num_rows == num_entries
  355. self.drop_table('test_frame1')
  356. def _to_sql_append(self):
  357. # Nuke table just in case
  358. self.drop_table('test_frame1')
  359. self.pandasSQL.to_sql(
  360. self.test_frame1, 'test_frame1', if_exists='fail')
  361. # Add to table again
  362. self.pandasSQL.to_sql(
  363. self.test_frame1, 'test_frame1', if_exists='append')
  364. assert self.pandasSQL.has_table('test_frame1')
  365. num_entries = 2 * len(self.test_frame1)
  366. num_rows = self._count_rows('test_frame1')
  367. assert num_rows == num_entries
  368. self.drop_table('test_frame1')
  369. def _to_sql_method_callable(self):
  370. check = [] # used to double check function below is really being used
  371. def sample(pd_table, conn, keys, data_iter):
  372. check.append(1)
  373. data = [dict(zip(keys, row)) for row in data_iter]
  374. conn.execute(pd_table.table.insert(), data)
  375. self.drop_table('test_frame1')
  376. self.pandasSQL.to_sql(self.test_frame1, 'test_frame1', method=sample)
  377. assert self.pandasSQL.has_table('test_frame1')
  378. assert check == [1]
  379. num_entries = len(self.test_frame1)
  380. num_rows = self._count_rows('test_frame1')
  381. assert num_rows == num_entries
  382. # Nuke table
  383. self.drop_table('test_frame1')
  384. def _roundtrip(self):
  385. self.drop_table('test_frame_roundtrip')
  386. self.pandasSQL.to_sql(self.test_frame1, 'test_frame_roundtrip')
  387. result = self.pandasSQL.read_query(
  388. 'SELECT * FROM test_frame_roundtrip')
  389. result.set_index('level_0', inplace=True)
  390. # result.index.astype(int)
  391. result.index.name = None
  392. tm.assert_frame_equal(result, self.test_frame1)
  393. def _execute_sql(self):
  394. # drop_sql = "DROP TABLE IF EXISTS test" # should already be done
  395. iris_results = self.pandasSQL.execute("SELECT * FROM iris")
  396. row = iris_results.fetchone()
  397. tm.equalContents(row, [5.1, 3.5, 1.4, 0.2, 'Iris-setosa'])
  398. def _to_sql_save_index(self):
  399. df = DataFrame.from_records([(1, 2.1, 'line1'), (2, 1.5, 'line2')],
  400. columns=['A', 'B', 'C'], index=['A'])
  401. self.pandasSQL.to_sql(df, 'test_to_sql_saves_index')
  402. ix_cols = self._get_index_columns('test_to_sql_saves_index')
  403. assert ix_cols == [['A', ], ]
  404. def _transaction_test(self):
  405. self.pandasSQL.execute("CREATE TABLE test_trans (A INT, B TEXT)")
  406. ins_sql = "INSERT INTO test_trans (A,B) VALUES (1, 'blah')"
  407. # Make sure when transaction is rolled back, no rows get inserted
  408. try:
  409. with self.pandasSQL.run_transaction() as trans:
  410. trans.execute(ins_sql)
  411. raise Exception('error')
  412. except Exception:
  413. # ignore raised exception
  414. pass
  415. res = self.pandasSQL.read_query('SELECT * FROM test_trans')
  416. assert len(res) == 0
  417. # Make sure when transaction is committed, rows do get inserted
  418. with self.pandasSQL.run_transaction() as trans:
  419. trans.execute(ins_sql)
  420. res2 = self.pandasSQL.read_query('SELECT * FROM test_trans')
  421. assert len(res2) == 1
  422. # -----------------------------------------------------------------------------
  423. # -- Testing the public API
  424. class _TestSQLApi(PandasSQLTest):
  425. """
  426. Base class to test the public API.
  427. From this two classes are derived to run these tests for both the
  428. sqlalchemy mode (`TestSQLApi`) and the fallback mode
  429. (`TestSQLiteFallbackApi`). These tests are run with sqlite3. Specific
  430. tests for the different sql flavours are included in `_TestSQLAlchemy`.
  431. Notes:
  432. flavor can always be passed even in SQLAlchemy mode,
  433. should be correctly ignored.
  434. we don't use drop_table because that isn't part of the public api
  435. """
  436. flavor = 'sqlite'
  437. mode = None
  438. def setup_connect(self):
  439. self.conn = self.connect()
  440. @pytest.fixture(autouse=True)
  441. def setup_method(self, load_iris_data):
  442. self.load_test_data_and_sql()
  443. def load_test_data_and_sql(self):
  444. self._load_iris_view()
  445. self._load_test1_data()
  446. self._load_test2_data()
  447. self._load_test3_data()
  448. self._load_raw_sql()
  449. def test_read_sql_iris(self):
  450. iris_frame = sql.read_sql_query(
  451. "SELECT * FROM iris", self.conn)
  452. self._check_iris_loaded_frame(iris_frame)
  453. def test_read_sql_view(self):
  454. iris_frame = sql.read_sql_query(
  455. "SELECT * FROM iris_view", self.conn)
  456. self._check_iris_loaded_frame(iris_frame)
  457. def test_to_sql(self):
  458. sql.to_sql(self.test_frame1, 'test_frame1', self.conn)
  459. assert sql.has_table('test_frame1', self.conn)
  460. def test_to_sql_fail(self):
  461. sql.to_sql(self.test_frame1, 'test_frame2',
  462. self.conn, if_exists='fail')
  463. assert sql.has_table('test_frame2', self.conn)
  464. pytest.raises(ValueError, sql.to_sql, self.test_frame1,
  465. 'test_frame2', self.conn, if_exists='fail')
  466. def test_to_sql_replace(self):
  467. sql.to_sql(self.test_frame1, 'test_frame3',
  468. self.conn, if_exists='fail')
  469. # Add to table again
  470. sql.to_sql(self.test_frame1, 'test_frame3',
  471. self.conn, if_exists='replace')
  472. assert sql.has_table('test_frame3', self.conn)
  473. num_entries = len(self.test_frame1)
  474. num_rows = self._count_rows('test_frame3')
  475. assert num_rows == num_entries
  476. def test_to_sql_append(self):
  477. sql.to_sql(self.test_frame1, 'test_frame4',
  478. self.conn, if_exists='fail')
  479. # Add to table again
  480. sql.to_sql(self.test_frame1, 'test_frame4',
  481. self.conn, if_exists='append')
  482. assert sql.has_table('test_frame4', self.conn)
  483. num_entries = 2 * len(self.test_frame1)
  484. num_rows = self._count_rows('test_frame4')
  485. assert num_rows == num_entries
  486. def test_to_sql_type_mapping(self):
  487. sql.to_sql(self.test_frame3, 'test_frame5', self.conn, index=False)
  488. result = sql.read_sql("SELECT * FROM test_frame5", self.conn)
  489. tm.assert_frame_equal(self.test_frame3, result)
  490. def test_to_sql_series(self):
  491. s = Series(np.arange(5, dtype='int64'), name='series')
  492. sql.to_sql(s, "test_series", self.conn, index=False)
  493. s2 = sql.read_sql_query("SELECT * FROM test_series", self.conn)
  494. tm.assert_frame_equal(s.to_frame(), s2)
  495. @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
  496. def test_to_sql_panel(self):
  497. panel = tm.makePanel()
  498. pytest.raises(NotImplementedError, sql.to_sql, panel,
  499. 'test_panel', self.conn)
  500. def test_roundtrip(self):
  501. sql.to_sql(self.test_frame1, 'test_frame_roundtrip',
  502. con=self.conn)
  503. result = sql.read_sql_query(
  504. 'SELECT * FROM test_frame_roundtrip',
  505. con=self.conn)
  506. # HACK!
  507. result.index = self.test_frame1.index
  508. result.set_index('level_0', inplace=True)
  509. result.index.astype(int)
  510. result.index.name = None
  511. tm.assert_frame_equal(result, self.test_frame1)
  512. def test_roundtrip_chunksize(self):
  513. sql.to_sql(self.test_frame1, 'test_frame_roundtrip', con=self.conn,
  514. index=False, chunksize=2)
  515. result = sql.read_sql_query(
  516. 'SELECT * FROM test_frame_roundtrip',
  517. con=self.conn)
  518. tm.assert_frame_equal(result, self.test_frame1)
  519. def test_execute_sql(self):
  520. # drop_sql = "DROP TABLE IF EXISTS test" # should already be done
  521. iris_results = sql.execute("SELECT * FROM iris", con=self.conn)
  522. row = iris_results.fetchone()
  523. tm.equalContents(row, [5.1, 3.5, 1.4, 0.2, 'Iris-setosa'])
  524. def test_date_parsing(self):
  525. # Test date parsing in read_sql
  526. # No Parsing
  527. df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn)
  528. assert not issubclass(df.DateCol.dtype.type, np.datetime64)
  529. df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn,
  530. parse_dates=['DateCol'])
  531. assert issubclass(df.DateCol.dtype.type, np.datetime64)
  532. assert df.DateCol.tolist() == [
  533. pd.Timestamp(2000, 1, 3, 0, 0, 0),
  534. pd.Timestamp(2000, 1, 4, 0, 0, 0)
  535. ]
  536. df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn,
  537. parse_dates={'DateCol': '%Y-%m-%d %H:%M:%S'})
  538. assert issubclass(df.DateCol.dtype.type, np.datetime64)
  539. assert df.DateCol.tolist() == [
  540. pd.Timestamp(2000, 1, 3, 0, 0, 0),
  541. pd.Timestamp(2000, 1, 4, 0, 0, 0)
  542. ]
  543. df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn,
  544. parse_dates=['IntDateCol'])
  545. assert issubclass(df.IntDateCol.dtype.type, np.datetime64)
  546. assert df.IntDateCol.tolist() == [
  547. pd.Timestamp(1986, 12, 25, 0, 0, 0),
  548. pd.Timestamp(2013, 1, 1, 0, 0, 0)
  549. ]
  550. df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn,
  551. parse_dates={'IntDateCol': 's'})
  552. assert issubclass(df.IntDateCol.dtype.type, np.datetime64)
  553. assert df.IntDateCol.tolist() == [
  554. pd.Timestamp(1986, 12, 25, 0, 0, 0),
  555. pd.Timestamp(2013, 1, 1, 0, 0, 0)
  556. ]
  557. df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn,
  558. parse_dates={'IntDateOnlyCol': '%Y%m%d'})
  559. assert issubclass(df.IntDateOnlyCol.dtype.type, np.datetime64)
  560. assert df.IntDateOnlyCol.tolist() == [
  561. pd.Timestamp('2010-10-10'),
  562. pd.Timestamp('2010-12-12')
  563. ]
  564. def test_date_and_index(self):
  565. # Test case where same column appears in parse_date and index_col
  566. df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn,
  567. index_col='DateCol',
  568. parse_dates=['DateCol', 'IntDateCol'])
  569. assert issubclass(df.index.dtype.type, np.datetime64)
  570. assert issubclass(df.IntDateCol.dtype.type, np.datetime64)
  571. def test_timedelta(self):
  572. # see #6921
  573. df = to_timedelta(
  574. Series(['00:00:01', '00:00:03'], name='foo')).to_frame()
  575. with tm.assert_produces_warning(UserWarning):
  576. df.to_sql('test_timedelta', self.conn)
  577. result = sql.read_sql_query('SELECT * FROM test_timedelta', self.conn)
  578. tm.assert_series_equal(result['foo'], df['foo'].astype('int64'))
  579. def test_complex(self):
  580. df = DataFrame({'a': [1 + 1j, 2j]})
  581. # Complex data type should raise error
  582. pytest.raises(ValueError, df.to_sql, 'test_complex', self.conn)
  583. def test_to_sql_index_label(self):
  584. temp_frame = DataFrame({'col1': range(4)})
  585. # no index name, defaults to 'index'
  586. sql.to_sql(temp_frame, 'test_index_label', self.conn)
  587. frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn)
  588. assert frame.columns[0] == 'index'
  589. # specifying index_label
  590. sql.to_sql(temp_frame, 'test_index_label', self.conn,
  591. if_exists='replace', index_label='other_label')
  592. frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn)
  593. assert frame.columns[0] == "other_label"
  594. # using the index name
  595. temp_frame.index.name = 'index_name'
  596. sql.to_sql(temp_frame, 'test_index_label', self.conn,
  597. if_exists='replace')
  598. frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn)
  599. assert frame.columns[0] == "index_name"
  600. # has index name, but specifying index_label
  601. sql.to_sql(temp_frame, 'test_index_label', self.conn,
  602. if_exists='replace', index_label='other_label')
  603. frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn)
  604. assert frame.columns[0] == "other_label"
  605. # index name is integer
  606. temp_frame.index.name = 0
  607. sql.to_sql(temp_frame, 'test_index_label', self.conn,
  608. if_exists='replace')
  609. frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn)
  610. assert frame.columns[0] == "0"
  611. temp_frame.index.name = None
  612. sql.to_sql(temp_frame, 'test_index_label', self.conn,
  613. if_exists='replace', index_label=0)
  614. frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn)
  615. assert frame.columns[0] == "0"
  616. def test_to_sql_index_label_multiindex(self):
  617. temp_frame = DataFrame({'col1': range(4)},
  618. index=MultiIndex.from_product(
  619. [('A0', 'A1'), ('B0', 'B1')]))
  620. # no index name, defaults to 'level_0' and 'level_1'
  621. sql.to_sql(temp_frame, 'test_index_label', self.conn)
  622. frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn)
  623. assert frame.columns[0] == 'level_0'
  624. assert frame.columns[1] == 'level_1'
  625. # specifying index_label
  626. sql.to_sql(temp_frame, 'test_index_label', self.conn,
  627. if_exists='replace', index_label=['A', 'B'])
  628. frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn)
  629. assert frame.columns[:2].tolist() == ['A', 'B']
  630. # using the index name
  631. temp_frame.index.names = ['A', 'B']
  632. sql.to_sql(temp_frame, 'test_index_label', self.conn,
  633. if_exists='replace')
  634. frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn)
  635. assert frame.columns[:2].tolist() == ['A', 'B']
  636. # has index name, but specifying index_label
  637. sql.to_sql(temp_frame, 'test_index_label', self.conn,
  638. if_exists='replace', index_label=['C', 'D'])
  639. frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn)
  640. assert frame.columns[:2].tolist() == ['C', 'D']
  641. # wrong length of index_label
  642. pytest.raises(ValueError, sql.to_sql, temp_frame,
  643. 'test_index_label', self.conn, if_exists='replace',
  644. index_label='C')
  645. def test_multiindex_roundtrip(self):
  646. df = DataFrame.from_records([(1, 2.1, 'line1'), (2, 1.5, 'line2')],
  647. columns=['A', 'B', 'C'], index=['A', 'B'])
  648. df.to_sql('test_multiindex_roundtrip', self.conn)
  649. result = sql.read_sql_query('SELECT * FROM test_multiindex_roundtrip',
  650. self.conn, index_col=['A', 'B'])
  651. tm.assert_frame_equal(df, result, check_index_type=True)
  652. def test_integer_col_names(self):
  653. df = DataFrame([[1, 2], [3, 4]], columns=[0, 1])
  654. sql.to_sql(df, "test_frame_integer_col_names", self.conn,
  655. if_exists='replace')
  656. def test_get_schema(self):
  657. create_sql = sql.get_schema(self.test_frame1, 'test', con=self.conn)
  658. assert 'CREATE' in create_sql
  659. def test_get_schema_dtypes(self):
  660. float_frame = DataFrame({'a': [1.1, 1.2], 'b': [2.1, 2.2]})
  661. dtype = sqlalchemy.Integer if self.mode == 'sqlalchemy' else 'INTEGER'
  662. create_sql = sql.get_schema(float_frame, 'test',
  663. con=self.conn, dtype={'b': dtype})
  664. assert 'CREATE' in create_sql
  665. assert 'INTEGER' in create_sql
  666. def test_get_schema_keys(self):
  667. frame = DataFrame({'Col1': [1.1, 1.2], 'Col2': [2.1, 2.2]})
  668. create_sql = sql.get_schema(frame, 'test', con=self.conn, keys='Col1')
  669. constraint_sentence = 'CONSTRAINT test_pk PRIMARY KEY ("Col1")'
  670. assert constraint_sentence in create_sql
  671. # multiple columns as key (GH10385)
  672. create_sql = sql.get_schema(self.test_frame1, 'test',
  673. con=self.conn, keys=['A', 'B'])
  674. constraint_sentence = 'CONSTRAINT test_pk PRIMARY KEY ("A", "B")'
  675. assert constraint_sentence in create_sql
  676. def test_chunksize_read(self):
  677. df = DataFrame(np.random.randn(22, 5), columns=list('abcde'))
  678. df.to_sql('test_chunksize', self.conn, index=False)
  679. # reading the query in one time
  680. res1 = sql.read_sql_query("select * from test_chunksize", self.conn)
  681. # reading the query in chunks with read_sql_query
  682. res2 = DataFrame()
  683. i = 0
  684. sizes = [5, 5, 5, 5, 2]
  685. for chunk in sql.read_sql_query("select * from test_chunksize",
  686. self.conn, chunksize=5):
  687. res2 = concat([res2, chunk], ignore_index=True)
  688. assert len(chunk) == sizes[i]
  689. i += 1
  690. tm.assert_frame_equal(res1, res2)
  691. # reading the query in chunks with read_sql_query
  692. if self.mode == 'sqlalchemy':
  693. res3 = DataFrame()
  694. i = 0
  695. sizes = [5, 5, 5, 5, 2]
  696. for chunk in sql.read_sql_table("test_chunksize", self.conn,
  697. chunksize=5):
  698. res3 = concat([res3, chunk], ignore_index=True)
  699. assert len(chunk) == sizes[i]
  700. i += 1
  701. tm.assert_frame_equal(res1, res3)
  702. def test_categorical(self):
  703. # GH8624
  704. # test that categorical gets written correctly as dense column
  705. df = DataFrame(
  706. {'person_id': [1, 2, 3],
  707. 'person_name': ['John P. Doe', 'Jane Dove', 'John P. Doe']})
  708. df2 = df.copy()
  709. df2['person_name'] = df2['person_name'].astype('category')
  710. df2.to_sql('test_categorical', self.conn, index=False)
  711. res = sql.read_sql_query('SELECT * FROM test_categorical', self.conn)
  712. tm.assert_frame_equal(res, df)
  713. def test_unicode_column_name(self):
  714. # GH 11431
  715. df = DataFrame([[1, 2], [3, 4]], columns=[u'\xe9', u'b'])
  716. df.to_sql('test_unicode', self.conn, index=False)
  717. def test_escaped_table_name(self):
  718. # GH 13206
  719. df = DataFrame({'A': [0, 1, 2], 'B': [0.2, np.nan, 5.6]})
  720. df.to_sql('d1187b08-4943-4c8d-a7f6', self.conn, index=False)
  721. res = sql.read_sql_query('SELECT * FROM `d1187b08-4943-4c8d-a7f6`',
  722. self.conn)
  723. tm.assert_frame_equal(res, df)
  724. @pytest.mark.single
  725. class TestSQLApi(SQLAlchemyMixIn, _TestSQLApi):
  726. """
  727. Test the public API as it would be used directly
  728. Tests for `read_sql_table` are included here, as this is specific for the
  729. sqlalchemy mode.
  730. """
  731. flavor = 'sqlite'
  732. mode = 'sqlalchemy'
  733. def connect(self):
  734. if SQLALCHEMY_INSTALLED:
  735. return sqlalchemy.create_engine('sqlite:///:memory:')
  736. else:
  737. pytest.skip('SQLAlchemy not installed')
  738. def test_read_table_columns(self):
  739. # test columns argument in read_table
  740. sql.to_sql(self.test_frame1, 'test_frame', self.conn)
  741. cols = ['A', 'B']
  742. result = sql.read_sql_table('test_frame', self.conn, columns=cols)
  743. assert result.columns.tolist() == cols
  744. def test_read_table_index_col(self):
  745. # test columns argument in read_table
  746. sql.to_sql(self.test_frame1, 'test_frame', self.conn)
  747. result = sql.read_sql_table('test_frame', self.conn, index_col="index")
  748. assert result.index.names == ["index"]
  749. result = sql.read_sql_table(
  750. 'test_frame', self.conn, index_col=["A", "B"])
  751. assert result.index.names == ["A", "B"]
  752. result = sql.read_sql_table('test_frame', self.conn,
  753. index_col=["A", "B"],
  754. columns=["C", "D"])
  755. assert result.index.names == ["A", "B"]
  756. assert result.columns.tolist() == ["C", "D"]
  757. def test_read_sql_delegate(self):
  758. iris_frame1 = sql.read_sql_query(
  759. "SELECT * FROM iris", self.conn)
  760. iris_frame2 = sql.read_sql(
  761. "SELECT * FROM iris", self.conn)
  762. tm.assert_frame_equal(iris_frame1, iris_frame2)
  763. iris_frame1 = sql.read_sql_table('iris', self.conn)
  764. iris_frame2 = sql.read_sql('iris', self.conn)
  765. tm.assert_frame_equal(iris_frame1, iris_frame2)
  766. def test_not_reflect_all_tables(self):
  767. # create invalid table
  768. qry = """CREATE TABLE invalid (x INTEGER, y UNKNOWN);"""
  769. self.conn.execute(qry)
  770. qry = """CREATE TABLE other_table (x INTEGER, y INTEGER);"""
  771. self.conn.execute(qry)
  772. with warnings.catch_warnings(record=True) as w:
  773. # Cause all warnings to always be triggered.
  774. warnings.simplefilter("always")
  775. # Trigger a warning.
  776. sql.read_sql_table('other_table', self.conn)
  777. sql.read_sql_query('SELECT * FROM other_table', self.conn)
  778. # Verify some things
  779. assert len(w) == 0
  780. def test_warning_case_insensitive_table_name(self):
  781. # see gh-7815
  782. #
  783. # We can't test that this warning is triggered, a the database
  784. # configuration would have to be altered. But here we test that
  785. # the warning is certainly NOT triggered in a normal case.
  786. with warnings.catch_warnings(record=True) as w:
  787. # Cause all warnings to always be triggered.
  788. warnings.simplefilter("always")
  789. # This should not trigger a Warning
  790. self.test_frame1.to_sql('CaseSensitive', self.conn)
  791. # Verify some things
  792. assert len(w) == 0
  793. def _get_index_columns(self, tbl_name):
  794. from sqlalchemy.engine import reflection
  795. insp = reflection.Inspector.from_engine(self.conn)
  796. ixs = insp.get_indexes('test_index_saved')
  797. ixs = [i['column_names'] for i in ixs]
  798. return ixs
  799. def test_sqlalchemy_type_mapping(self):
  800. # Test Timestamp objects (no datetime64 because of timezone) (GH9085)
  801. df = DataFrame({'time': to_datetime(['201412120154', '201412110254'],
  802. utc=True)})
  803. db = sql.SQLDatabase(self.conn)
  804. table = sql.SQLTable("test_type", db, frame=df)
  805. # GH 9086: TIMESTAMP is the suggested type for datetimes with timezones
  806. assert isinstance(table.table.c['time'].type, sqltypes.TIMESTAMP)
  807. def test_database_uri_string(self):
  808. # Test read_sql and .to_sql method with a database URI (GH10654)
  809. test_frame1 = self.test_frame1
  810. # db_uri = 'sqlite:///:memory:' # raises
  811. # sqlalchemy.exc.OperationalError: (sqlite3.OperationalError) near
  812. # "iris": syntax error [SQL: 'iris']
  813. with tm.ensure_clean() as name:
  814. db_uri = 'sqlite:///' + name
  815. table = 'iris'
  816. test_frame1.to_sql(table, db_uri, if_exists='replace', index=False)
  817. test_frame2 = sql.read_sql(table, db_uri)
  818. test_frame3 = sql.read_sql_table(table, db_uri)
  819. query = 'SELECT * FROM iris'
  820. test_frame4 = sql.read_sql_query(query, db_uri)
  821. tm.assert_frame_equal(test_frame1, test_frame2)
  822. tm.assert_frame_equal(test_frame1, test_frame3)
  823. tm.assert_frame_equal(test_frame1, test_frame4)
  824. # using driver that will not be installed on Travis to trigger error
  825. # in sqlalchemy.create_engine -> test passing of this error to user
  826. try:
  827. # the rest of this test depends on pg8000's being absent
  828. import pg8000 # noqa
  829. pytest.skip("pg8000 is installed")
  830. except ImportError:
  831. pass
  832. db_uri = "postgresql+pg8000://user:pass@host/dbname"
  833. with pytest.raises(ImportError, match="pg8000"):
  834. sql.read_sql("select * from table", db_uri)
  835. def _make_iris_table_metadata(self):
  836. sa = sqlalchemy
  837. metadata = sa.MetaData()
  838. iris = sa.Table('iris', metadata,
  839. sa.Column('SepalLength', sa.REAL),
  840. sa.Column('SepalWidth', sa.REAL),
  841. sa.Column('PetalLength', sa.REAL),
  842. sa.Column('PetalWidth', sa.REAL),
  843. sa.Column('Name', sa.TEXT)
  844. )
  845. return iris
  846. def test_query_by_text_obj(self):
  847. # WIP : GH10846
  848. name_text = sqlalchemy.text('select * from iris where name=:name')
  849. iris_df = sql.read_sql(name_text, self.conn, params={
  850. 'name': 'Iris-versicolor'})
  851. all_names = set(iris_df['Name'])
  852. assert all_names == {'Iris-versicolor'}
  853. def test_query_by_select_obj(self):
  854. # WIP : GH10846
  855. iris = self._make_iris_table_metadata()
  856. name_select = sqlalchemy.select([iris]).where(
  857. iris.c.Name == sqlalchemy.bindparam('name'))
  858. iris_df = sql.read_sql(name_select, self.conn,
  859. params={'name': 'Iris-setosa'})
  860. all_names = set(iris_df['Name'])
  861. assert all_names == {'Iris-setosa'}
  862. class _EngineToConnMixin(object):
  863. """
  864. A mixin that causes setup_connect to create a conn rather than an engine.
  865. """
  866. @pytest.fixture(autouse=True)
  867. def setup_method(self, load_iris_data):
  868. super(_EngineToConnMixin, self).load_test_data_and_sql()
  869. engine = self.conn
  870. conn = engine.connect()
  871. self.__tx = conn.begin()
  872. self.pandasSQL = sql.SQLDatabase(conn)
  873. self.__engine = engine
  874. self.conn = conn
  875. yield
  876. self.__tx.rollback()
  877. self.conn.close()
  878. self.conn = self.__engine
  879. self.pandasSQL = sql.SQLDatabase(self.__engine)
  880. # XXX:
  881. # super(_EngineToConnMixin, self).teardown_method(method)
  882. @pytest.mark.single
  883. class TestSQLApiConn(_EngineToConnMixin, TestSQLApi):
  884. pass
  885. @pytest.mark.single
  886. class TestSQLiteFallbackApi(SQLiteMixIn, _TestSQLApi):
  887. """
  888. Test the public sqlite connection fallback API
  889. """
  890. flavor = 'sqlite'
  891. mode = 'fallback'
  892. def connect(self, database=":memory:"):
  893. return sqlite3.connect(database)
  894. def test_sql_open_close(self):
  895. # Test if the IO in the database still work if the connection closed
  896. # between the writing and reading (as in many real situations).
  897. with tm.ensure_clean() as name:
  898. conn = self.connect(name)
  899. sql.to_sql(self.test_frame3, "test_frame3_legacy",
  900. conn, index=False)
  901. conn.close()
  902. conn = self.connect(name)
  903. result = sql.read_sql_query("SELECT * FROM test_frame3_legacy;",
  904. conn)
  905. conn.close()
  906. tm.assert_frame_equal(self.test_frame3, result)
  907. def test_con_string_import_error(self):
  908. if not SQLALCHEMY_INSTALLED:
  909. conn = 'mysql://root@localhost/pandas_nosetest'
  910. pytest.raises(ImportError, sql.read_sql, "SELECT * FROM iris",
  911. conn)
  912. else:
  913. pytest.skip('SQLAlchemy is installed')
  914. def test_read_sql_delegate(self):
  915. iris_frame1 = sql.read_sql_query("SELECT * FROM iris", self.conn)
  916. iris_frame2 = sql.read_sql("SELECT * FROM iris", self.conn)
  917. tm.assert_frame_equal(iris_frame1, iris_frame2)
  918. pytest.raises(sql.DatabaseError, sql.read_sql, 'iris', self.conn)
  919. def test_safe_names_warning(self):
  920. # GH 6798
  921. df = DataFrame([[1, 2], [3, 4]], columns=['a', 'b ']) # has a space
  922. # warns on create table with spaces in names
  923. with tm.assert_produces_warning():
  924. sql.to_sql(df, "test_frame3_legacy", self.conn, index=False)
  925. def test_get_schema2(self):
  926. # without providing a connection object (available for backwards comp)
  927. create_sql = sql.get_schema(self.test_frame1, 'test')
  928. assert 'CREATE' in create_sql
  929. def _get_sqlite_column_type(self, schema, column):
  930. for col in schema.split('\n'):
  931. if col.split()[0].strip('""') == column:
  932. return col.split()[1]
  933. raise ValueError('Column %s not found' % (column))
  934. def test_sqlite_type_mapping(self):
  935. # Test Timestamp objects (no datetime64 because of timezone) (GH9085)
  936. df = DataFrame({'time': to_datetime(['201412120154', '201412110254'],
  937. utc=True)})
  938. db = sql.SQLiteDatabase(self.conn)
  939. table = sql.SQLiteTable("test_type", db, frame=df)
  940. schema = table.sql_schema()
  941. assert self._get_sqlite_column_type(schema, 'time') == "TIMESTAMP"
  942. # -----------------------------------------------------------------------------
  943. # -- Database flavor specific tests
  944. class _TestSQLAlchemy(SQLAlchemyMixIn, PandasSQLTest):
  945. """
  946. Base class for testing the sqlalchemy backend.
  947. Subclasses for specific database types are created below. Tests that
  948. deviate for each flavor are overwritten there.
  949. """
  950. flavor = None
  951. @pytest.fixture(autouse=True, scope='class')
  952. def setup_class(cls):
  953. cls.setup_import()
  954. cls.setup_driver()
  955. conn = cls.connect()
  956. conn.connect()
  957. def load_test_data_and_sql(self):
  958. self._load_raw_sql()
  959. self._load_test1_data()
  960. @pytest.fixture(autouse=True)
  961. def setup_method(self, load_iris_data):
  962. self.load_test_data_and_sql()
  963. @classmethod
  964. def setup_import(cls):
  965. # Skip this test if SQLAlchemy not available
  966. if not SQLALCHEMY_INSTALLED:
  967. pytest.skip('SQLAlchemy not installed')
  968. @classmethod
  969. def setup_driver(cls):
  970. raise NotImplementedError()
  971. @classmethod
  972. def connect(cls):
  973. raise NotImplementedError()
  974. def setup_connect(self):
  975. try:
  976. self.conn = self.connect()
  977. self.pandasSQL = sql.SQLDatabase(self.conn)
  978. # to test if connection can be made:
  979. self.conn.connect()
  980. except sqlalchemy.exc.OperationalError:
  981. pytest.skip(
  982. "Can't connect to {0} server".format(self.flavor))
  983. def test_read_sql(self):
  984. self._read_sql_iris()
  985. def test_read_sql_parameter(self):
  986. self._read_sql_iris_parameter()
  987. def test_read_sql_named_parameter(self):
  988. self._read_sql_iris_named_parameter()
  989. def test_to_sql(self):
  990. self._to_sql()
  991. def test_to_sql_empty(self):
  992. self._to_sql_empty()
  993. def test_to_sql_fail(self):
  994. self._to_sql_fail()
  995. def test_to_sql_replace(self):
  996. self._to_sql_replace()
  997. def test_to_sql_append(self):
  998. self._to_sql_append()
  999. def test_to_sql_method_multi(self):
  1000. self._to_sql(method='multi')
  1001. def test_to_sql_method_callable(self):
  1002. self._to_sql_method_callable()
  1003. def test_create_table(self):
  1004. temp_conn = self.connect()
  1005. temp_frame = DataFrame(
  1006. {'one': [1., 2., 3., 4.], 'two': [4., 3., 2., 1.]})
  1007. pandasSQL = sql.SQLDatabase(temp_conn)
  1008. pandasSQL.to_sql(temp_frame, 'temp_frame')
  1009. assert temp_conn.has_table('temp_frame')
  1010. def test_drop_table(self):
  1011. temp_conn = self.connect()
  1012. temp_frame = DataFrame(
  1013. {'one': [1., 2., 3., 4.], 'two': [4., 3., 2., 1.]})
  1014. pandasSQL = sql.SQLDatabase(temp_conn)
  1015. pandasSQL.to_sql(temp_frame, 'temp_frame')
  1016. assert temp_conn.has_table('temp_frame')
  1017. pandasSQL.drop_table('temp_frame')
  1018. assert not temp_conn.has_table('temp_frame')
  1019. def test_roundtrip(self):
  1020. self._roundtrip()
  1021. def test_execute_sql(self):
  1022. self._execute_sql()
  1023. def test_read_table(self):
  1024. iris_frame = sql.read_sql_table("iris", con=self.conn)
  1025. self._check_iris_loaded_frame(iris_frame)
  1026. def test_read_table_columns(self):
  1027. iris_frame = sql.read_sql_table(
  1028. "iris", con=self.conn, columns=['SepalLength', 'SepalLength'])
  1029. tm.equalContents(
  1030. iris_frame.columns.values, ['SepalLength', 'SepalLength'])
  1031. def test_read_table_absent(self):
  1032. pytest.raises(
  1033. ValueError, sql.read_sql_table, "this_doesnt_exist", con=self.conn)
  1034. def test_default_type_conversion(self):
  1035. df = sql.read_sql_table("types_test_data", self.conn)
  1036. assert issubclass(df.FloatCol.dtype.type, np.floating)
  1037. assert issubclass(df.IntCol.dtype.type, np.integer)
  1038. assert issubclass(df.BoolCol.dtype.type, np.bool_)
  1039. # Int column with NA values stays as float
  1040. assert issubclass(df.IntColWithNull.dtype.type, np.floating)
  1041. # Bool column with NA values becomes object
  1042. assert issubclass(df.BoolColWithNull.dtype.type, np.object)
  1043. def test_bigint(self):
  1044. # int64 should be converted to BigInteger, GH7433
  1045. df = DataFrame(data={'i64': [2**62]})
  1046. df.to_sql('test_bigint', self.conn, index=False)
  1047. result = sql.read_sql_table('test_bigint', self.conn)
  1048. tm.assert_frame_equal(df, result)
  1049. def test_default_date_load(self):
  1050. df = sql.read_sql_table("types_test_data", self.conn)
  1051. # IMPORTANT - sqlite has no native date type, so shouldn't parse, but
  1052. # MySQL SHOULD be converted.
  1053. assert issubclass(df.DateCol.dtype.type, np.datetime64)
  1054. def test_datetime_with_timezone(self):
  1055. # edge case that converts postgresql datetime with time zone types
  1056. # to datetime64[ns,psycopg2.tz.FixedOffsetTimezone..], which is ok
  1057. # but should be more natural, so coerce to datetime64[ns] for now
  1058. def check(col):
  1059. # check that a column is either datetime64[ns]
  1060. # or datetime64[ns, UTC]
  1061. if is_datetime64_dtype(col.dtype):
  1062. # "2000-01-01 00:00:00-08:00" should convert to
  1063. # "2000-01-01 08:00:00"
  1064. assert col[0] == Timestamp('2000-01-01 08:00:00')
  1065. # "2000-06-01 00:00:00-07:00" should convert to
  1066. # "2000-06-01 07:00:00"
  1067. assert col[1] == Timestamp('2000-06-01 07:00:00')
  1068. elif is_datetime64tz_dtype(col.dtype):
  1069. assert str(col.dt.tz) == 'UTC'
  1070. # "2000-01-01 00:00:00-08:00" should convert to
  1071. # "2000-01-01 08:00:00"
  1072. # "2000-06-01 00:00:00-07:00" should convert to
  1073. # "2000-06-01 07:00:00"
  1074. # GH 6415
  1075. expected_data = [Timestamp('2000-01-01 08:00:00', tz='UTC'),
  1076. Timestamp('2000-06-01 07:00:00', tz='UTC')]
  1077. expected = Series(expected_data, name=col.name)
  1078. tm.assert_series_equal(col, expected)
  1079. else:
  1080. raise AssertionError("DateCol loaded with incorrect type "
  1081. "-> {0}".format(col.dtype))
  1082. # GH11216
  1083. df = pd.read_sql_query("select * from types_test_data", self.conn)
  1084. if not hasattr(df, 'DateColWithTz'):
  1085. pytest.skip("no column with datetime with time zone")
  1086. # this is parsed on Travis (linux), but not on macosx for some reason
  1087. # even with the same versions of psycopg2 & sqlalchemy, possibly a
  1088. # Postgrsql server version difference
  1089. col = df.DateColWithTz
  1090. assert is_datetime64tz_dtype(col.dtype)
  1091. df = pd.read_sql_query("select * from types_test_data",
  1092. self.conn, parse_dates=['DateColWithTz'])
  1093. if not hasattr(df, 'DateColWithTz'):
  1094. pytest.skip("no column with datetime with time zone")
  1095. col = df.DateColWithTz
  1096. assert is_datetime64tz_dtype(col.dtype)
  1097. assert str(col.dt.tz) == 'UTC'
  1098. check(df.DateColWithTz)
  1099. df = pd.concat(list(pd.read_sql_query("select * from types_test_data",
  1100. self.conn, chunksize=1)),
  1101. ignore_index=True)
  1102. col = df.DateColWithTz
  1103. assert is_datetime64tz_dtype(col.dtype)
  1104. assert str(col.dt.tz) == 'UTC'
  1105. expected = sql.read_sql_table("types_test_data", self.conn)
  1106. col = expected.DateColWithTz
  1107. assert is_datetime64tz_dtype(col.dtype)
  1108. tm.assert_series_equal(df.DateColWithTz, expected.DateColWithTz)
  1109. # xref #7139
  1110. # this might or might not be converted depending on the postgres driver
  1111. df = sql.read_sql_table("types_test_data", self.conn)
  1112. check(df.DateColWithTz)
  1113. def test_datetime_with_timezone_roundtrip(self):
  1114. # GH 9086
  1115. # Write datetimetz data to a db and read it back
  1116. # For dbs that support timestamps with timezones, should get back UTC
  1117. # otherwise naive data should be returned
  1118. expected = DataFrame({'A': date_range(
  1119. '2013-01-01 09:00:00', periods=3, tz='US/Pacific'
  1120. )})
  1121. expected.to_sql('test_datetime_tz', self.conn, index=False)
  1122. if self.flavor == 'postgresql':
  1123. # SQLAlchemy "timezones" (i.e. offsets) are coerced to UTC
  1124. expected['A'] = expected['A'].dt.tz_convert('UTC')
  1125. else:
  1126. # Otherwise, timestamps are returned as local, naive
  1127. expected['A'] = expected['A'].dt.tz_localize(None)
  1128. result = sql.read_sql_table('test_datetime_tz', self.conn)
  1129. tm.assert_frame_equal(result, expected)
  1130. result = sql.read_sql_query(
  1131. 'SELECT * FROM test_datetime_tz', self.conn
  1132. )
  1133. if self.flavor == 'sqlite':
  1134. # read_sql_query does not return datetime type like read_sql_table
  1135. assert isinstance(result.loc[0, 'A'], string_types)
  1136. result['A'] = to_datetime(result['A'])
  1137. tm.assert_frame_equal(result, expected)
  1138. def test_naive_datetimeindex_roundtrip(self):
  1139. # GH 23510
  1140. # Ensure that a naive DatetimeIndex isn't converted to UTC
  1141. dates = date_range('2018-01-01', periods=5, freq='6H')
  1142. expected = DataFrame({'nums': range(5)}, index=dates)
  1143. expected.to_sql('foo_table', self.conn, index_label='info_date')
  1144. result = sql.read_sql_table('foo_table', self.conn,
  1145. index_col='info_date')
  1146. # result index with gain a name from a set_index operation; expected
  1147. tm.assert_frame_equal(result, expected, check_names=False)
  1148. def test_date_parsing(self):
  1149. # No Parsing
  1150. df = sql.read_sql_table("types_test_data", self.conn)
  1151. expected_type = object if self.flavor == 'sqlite' else np.datetime64
  1152. assert issubclass(df.DateCol.dtype.type, expected_type)
  1153. df = sql.read_sql_table("types_test_data", self.conn,
  1154. parse_dates=['DateCol'])
  1155. assert issubclass(df.DateCol.dtype.type, np.datetime64)
  1156. df = sql.read_sql_table("types_test_data", self.conn,
  1157. parse_dates={'DateCol': '%Y-%m-%d %H:%M:%S'})
  1158. assert issubclass(df.DateCol.dtype.type, np.datetime64)
  1159. df = sql.read_sql_table("types_test_data", self.conn, parse_dates={
  1160. 'DateCol': {'format': '%Y-%m-%d %H:%M:%S'}})
  1161. assert issubclass(df.DateCol.dtype.type, np.datetime64)
  1162. df = sql.read_sql_table(
  1163. "types_test_data", self.conn, parse_dates=['IntDateCol'])
  1164. assert issubclass(df.IntDateCol.dtype.type, np.datetime64)
  1165. df = sql.read_sql_table(
  1166. "types_test_data", self.conn, parse_dates={'IntDateCol': 's'})
  1167. assert issubclass(df.IntDateCol.dtype.type, np.datetime64)
  1168. df = sql.read_sql_table("types_test_data", self.conn,
  1169. parse_dates={'IntDateCol': {'unit': 's'}})
  1170. assert issubclass(df.IntDateCol.dtype.type, np.datetime64)
  1171. def test_datetime(self):
  1172. df = DataFrame({'A': date_range('2013-01-01 09:00:00', periods=3),
  1173. 'B': np.arange(3.0)})
  1174. df.to_sql('test_datetime', self.conn)
  1175. # with read_table -> type information from schema used
  1176. result = sql.read_sql_table('test_datetime', self.conn)
  1177. result = result.drop('index', axis=1)
  1178. tm.assert_frame_equal(result, df)
  1179. # with read_sql -> no type information -> sqlite has no native
  1180. result = sql.read_sql_query('SELECT * FROM test_datetime', self.conn)
  1181. result = result.drop('index', axis=1)
  1182. if self.flavor == 'sqlite':
  1183. assert isinstance(result.loc[0, 'A'], string_types)
  1184. result['A'] = to_datetime(result['A'])
  1185. tm.assert_frame_equal(result, df)
  1186. else:
  1187. tm.assert_frame_equal(result, df)
  1188. def test_datetime_NaT(self):
  1189. df = DataFrame({'A': date_range('2013-01-01 09:00:00', periods=3),
  1190. 'B': np.arange(3.0)})
  1191. df.loc[1, 'A'] = np.nan
  1192. df.to_sql('test_datetime', self.conn, index=False)
  1193. # with read_table -> type information from schema used
  1194. result = sql.read_sql_table('test_datetime', self.conn)
  1195. tm.assert_frame_equal(result, df)
  1196. # with read_sql -> no type information -> sqlite has no native
  1197. result = sql.read_sql_query('SELECT * FROM test_datetime', self.conn)
  1198. if self.flavor == 'sqlite':
  1199. assert isinstance(result.loc[0, 'A'], string_types)
  1200. result['A'] = to_datetime(result['A'], errors='coerce')
  1201. tm.assert_frame_equal(result, df)
  1202. else:
  1203. tm.assert_frame_equal(result, df)
  1204. def test_datetime_date(self):
  1205. # test support for datetime.date
  1206. df = DataFrame([date(2014, 1, 1), date(2014, 1, 2)], columns=["a"])
  1207. df.to_sql('test_date', self.conn, index=False)
  1208. res = read_sql_table('test_date', self.conn)
  1209. result = res['a']
  1210. expected = to_datetime(df['a'])
  1211. # comes back as datetime64
  1212. tm.assert_series_equal(result, expected)
  1213. def test_datetime_time(self):
  1214. # test support for datetime.time
  1215. df = DataFrame([time(9, 0, 0), time(9, 1, 30)], columns=["a"])
  1216. df.to_sql('test_time', self.conn, index=False)
  1217. res = read_sql_table('test_time', self.conn)
  1218. tm.assert_frame_equal(res, df)
  1219. # GH8341
  1220. # first, use the fallback to have the sqlite adapter put in place
  1221. sqlite_conn = TestSQLiteFallback.connect()
  1222. sql.to_sql(df, "test_time2", sqlite_conn, index=False)
  1223. res = sql.read_sql_query("SELECT * FROM test_time2", sqlite_conn)
  1224. ref = df.applymap(lambda _: _.strftime("%H:%M:%S.%f"))
  1225. tm.assert_frame_equal(ref, res) # check if adapter is in place
  1226. # then test if sqlalchemy is unaffected by the sqlite adapter
  1227. sql.to_sql(df, "test_time3", self.conn, index=False)
  1228. if self.flavor == 'sqlite':
  1229. res = sql.read_sql_query("SELECT * FROM test_time3", self.conn)
  1230. ref = df.applymap(lambda _: _.strftime("%H:%M:%S.%f"))
  1231. tm.assert_frame_equal(ref, res)
  1232. res = sql.read_sql_table("test_time3", self.conn)
  1233. tm.assert_frame_equal(df, res)
  1234. def test_mixed_dtype_insert(self):
  1235. # see GH6509
  1236. s1 = Series(2**25 + 1, dtype=np.int32)
  1237. s2 = Series(0.0, dtype=np.float32)
  1238. df = DataFrame({'s1': s1, 's2': s2})
  1239. # write and read again
  1240. df.to_sql("test_read_write", self.conn, index=False)
  1241. df2 = sql.read_sql_table("test_read_write", self.conn)
  1242. tm.assert_frame_equal(df, df2, check_dtype=False, check_exact=True)
  1243. def test_nan_numeric(self):
  1244. # NaNs in numeric float column
  1245. df = DataFrame({'A': [0, 1, 2], 'B': [0.2, np.nan, 5.6]})
  1246. df.to_sql('test_nan', self.conn, index=False)
  1247. # with read_table
  1248. result = sql.read_sql_table('test_nan', self.conn)
  1249. tm.assert_frame_equal(result, df)
  1250. # with read_sql
  1251. result = sql.read_sql_query('SELECT * FROM test_nan', self.conn)
  1252. tm.assert_frame_equal(result, df)
  1253. def test_nan_fullcolumn(self):
  1254. # full NaN column (numeric float column)
  1255. df = DataFrame({'A': [0, 1, 2], 'B': [np.nan, np.nan, np.nan]})
  1256. df.to_sql('test_nan', self.conn, index=False)
  1257. # with read_table
  1258. result = sql.read_sql_table('test_nan', self.conn)
  1259. tm.assert_frame_equal(result, df)
  1260. # with read_sql -> not type info from table -> stays None
  1261. df['B'] = df['B'].astype('object')
  1262. df['B'] = None
  1263. result = sql.read_sql_query('SELECT * FROM test_nan', self.conn)
  1264. tm.assert_frame_equal(result, df)
  1265. def test_nan_string(self):
  1266. # NaNs in string column
  1267. df = DataFrame({'A': [0, 1, 2], 'B': ['a', 'b', np.nan]})
  1268. df.to_sql('test_nan', self.conn, index=False)
  1269. # NaNs are coming back as None
  1270. df.loc[2, 'B'] = None
  1271. # with read_table
  1272. result = sql.read_sql_table('test_nan', self.conn)
  1273. tm.assert_frame_equal(result, df)
  1274. # with read_sql
  1275. result = sql.read_sql_query('SELECT * FROM test_nan', self.conn)
  1276. tm.assert_frame_equal(result, df)
  1277. def _get_index_columns(self, tbl_name):
  1278. from sqlalchemy.engine import reflection
  1279. insp = reflection.Inspector.from_engine(self.conn)
  1280. ixs = insp.get_indexes(tbl_name)
  1281. ixs = [i['column_names'] for i in ixs]
  1282. return ixs
  1283. def test_to_sql_save_index(self):
  1284. self._to_sql_save_index()
  1285. def test_transactions(self):
  1286. self._transaction_test()
  1287. def test_get_schema_create_table(self):
  1288. # Use a dataframe without a bool column, since MySQL converts bool to
  1289. # TINYINT (which read_sql_table returns as an int and causes a dtype
  1290. # mismatch)
  1291. self._load_test3_data()
  1292. tbl = 'test_get_schema_create_table'
  1293. create_sql = sql.get_schema(self.test_frame3, tbl, con=self.conn)
  1294. blank_test_df = self.test_frame3.iloc[:0]
  1295. self.drop_table(tbl)
  1296. self.conn.execute(create_sql)
  1297. returned_df = sql.read_sql_table(tbl, self.conn)
  1298. tm.assert_frame_equal(returned_df, blank_test_df,
  1299. check_index_type=False)
  1300. self.drop_table(tbl)
  1301. def test_dtype(self):
  1302. cols = ['A', 'B']
  1303. data = [(0.8, True),
  1304. (0.9, None)]
  1305. df = DataFrame(data, columns=cols)
  1306. df.to_sql('dtype_test', self.conn)
  1307. df.to_sql('dtype_test2', self.conn, dtype={'B': sqlalchemy.TEXT})
  1308. meta = sqlalchemy.schema.MetaData(bind=self.conn)
  1309. meta.reflect()
  1310. sqltype = meta.tables['dtype_test2'].columns['B'].type
  1311. assert isinstance(sqltype, sqlalchemy.TEXT)
  1312. pytest.raises(ValueError, df.to_sql,
  1313. 'error', self.conn, dtype={'B': str})
  1314. # GH9083
  1315. df.to_sql('dtype_test3', self.conn, dtype={'B': sqlalchemy.String(10)})
  1316. meta.reflect()
  1317. sqltype = meta.tables['dtype_test3'].columns['B'].type
  1318. assert isinstance(sqltype, sqlalchemy.String)
  1319. assert sqltype.length == 10
  1320. # single dtype
  1321. df.to_sql('single_dtype_test', self.conn, dtype=sqlalchemy.TEXT)
  1322. meta = sqlalchemy.schema.MetaData(bind=self.conn)
  1323. meta.reflect()
  1324. sqltypea = meta.tables['single_dtype_test'].columns['A'].type
  1325. sqltypeb = meta.tables['single_dtype_test'].columns['B'].type
  1326. assert isinstance(sqltypea, sqlalchemy.TEXT)
  1327. assert isinstance(sqltypeb, sqlalchemy.TEXT)
  1328. def test_notna_dtype(self):
  1329. cols = {'Bool': Series([True, None]),
  1330. 'Date': Series([datetime(2012, 5, 1), None]),
  1331. 'Int': Series([1, None], dtype='object'),
  1332. 'Float': Series([1.1, None])
  1333. }
  1334. df = DataFrame(cols)
  1335. tbl = 'notna_dtype_test'
  1336. df.to_sql(tbl, self.conn)
  1337. returned_df = sql.read_sql_table(tbl, self.conn) # noqa
  1338. meta = sqlalchemy.schema.MetaData(bind=self.conn)
  1339. meta.reflect()
  1340. if self.flavor == 'mysql':
  1341. my_type = sqltypes.Integer
  1342. else:
  1343. my_type = sqltypes.Boolean
  1344. col_dict = meta.tables[tbl].columns
  1345. assert isinstance(col_dict['Bool'].type, my_type)
  1346. assert isinstance(col_dict['Date'].type, sqltypes.DateTime)
  1347. assert isinstance(col_dict['Int'].type, sqltypes.Integer)
  1348. assert isinstance(col_dict['Float'].type, sqltypes.Float)
  1349. def test_double_precision(self):
  1350. V = 1.23456789101112131415
  1351. df = DataFrame({'f32': Series([V, ], dtype='float32'),
  1352. 'f64': Series([V, ], dtype='float64'),
  1353. 'f64_as_f32': Series([V, ], dtype='float64'),
  1354. 'i32': Series([5, ], dtype='int32'),
  1355. 'i64': Series([5, ], dtype='int64'),
  1356. })
  1357. df.to_sql('test_dtypes', self.conn, index=False, if_exists='replace',
  1358. dtype={'f64_as_f32': sqlalchemy.Float(precision=23)})
  1359. res = sql.read_sql_table('test_dtypes', self.conn)
  1360. # check precision of float64
  1361. assert (np.round(df['f64'].iloc[0], 14) ==
  1362. np.round(res['f64'].iloc[0], 14))
  1363. # check sql types
  1364. meta = sqlalchemy.schema.MetaData(bind=self.conn)
  1365. meta.reflect()
  1366. col_dict = meta.tables['test_dtypes'].columns
  1367. assert str(col_dict['f32'].type) == str(col_dict['f64_as_f32'].type)
  1368. assert isinstance(col_dict['f32'].type, sqltypes.Float)
  1369. assert isinstance(col_dict['f64'].type, sqltypes.Float)
  1370. assert isinstance(col_dict['i32'].type, sqltypes.Integer)
  1371. assert isinstance(col_dict['i64'].type, sqltypes.BigInteger)
  1372. def test_connectable_issue_example(self):
  1373. # This tests the example raised in issue
  1374. # https://github.com/pandas-dev/pandas/issues/10104
  1375. def foo(connection):
  1376. query = 'SELECT test_foo_data FROM test_foo_data'
  1377. return sql.read_sql_query(query, con=connection)
  1378. def bar(connection, data):
  1379. data.to_sql(name='test_foo_data',
  1380. con=connection, if_exists='append')
  1381. def main(connectable):
  1382. with connectable.connect() as conn:
  1383. with conn.begin():
  1384. foo_data = conn.run_callable(foo)
  1385. conn.run_callable(bar, foo_data)
  1386. DataFrame({'test_foo_data': [0, 1, 2]}).to_sql(
  1387. 'test_foo_data', self.conn)
  1388. main(self.conn)
  1389. def test_temporary_table(self):
  1390. test_data = u'Hello, World!'
  1391. expected = DataFrame({'spam': [test_data]})
  1392. Base = declarative.declarative_base()
  1393. class Temporary(Base):
  1394. __tablename__ = 'temp_test'
  1395. __table_args__ = {'prefixes': ['TEMPORARY']}
  1396. id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True)
  1397. spam = sqlalchemy.Column(sqlalchemy.Unicode(30), nullable=False)
  1398. Session = sa_session.sessionmaker(bind=self.conn)
  1399. session = Session()
  1400. with session.transaction:
  1401. conn = session.connection()
  1402. Temporary.__table__.create(conn)
  1403. session.add(Temporary(spam=test_data))
  1404. session.flush()
  1405. df = sql.read_sql_query(
  1406. sql=sqlalchemy.select([Temporary.spam]),
  1407. con=conn,
  1408. )
  1409. tm.assert_frame_equal(df, expected)
  1410. class _TestSQLAlchemyConn(_EngineToConnMixin, _TestSQLAlchemy):
  1411. def test_transactions(self):
  1412. pytest.skip(
  1413. "Nested transactions rollbacks don't work with Pandas")
  1414. class _TestSQLiteAlchemy(object):
  1415. """
  1416. Test the sqlalchemy backend against an in-memory sqlite database.
  1417. """
  1418. flavor = 'sqlite'
  1419. @classmethod
  1420. def connect(cls):
  1421. return sqlalchemy.create_engine('sqlite:///:memory:')
  1422. @classmethod
  1423. def setup_driver(cls):
  1424. # sqlite3 is built-in
  1425. cls.driver = None
  1426. def test_default_type_conversion(self):
  1427. df = sql.read_sql_table("types_test_data", self.conn)
  1428. assert issubclass(df.FloatCol.dtype.type, np.floating)
  1429. assert issubclass(df.IntCol.dtype.type, np.integer)
  1430. # sqlite has no boolean type, so integer type is returned
  1431. assert issubclass(df.BoolCol.dtype.type, np.integer)
  1432. # Int column with NA values stays as float
  1433. assert issubclass(df.IntColWithNull.dtype.type, np.floating)
  1434. # Non-native Bool column with NA values stays as float
  1435. assert issubclass(df.BoolColWithNull.dtype.type, np.floating)
  1436. def test_default_date_load(self):
  1437. df = sql.read_sql_table("types_test_data", self.conn)
  1438. # IMPORTANT - sqlite has no native date type, so shouldn't parse, but
  1439. assert not issubclass(df.DateCol.dtype.type, np.datetime64)
  1440. def test_bigint_warning(self):
  1441. # test no warning for BIGINT (to support int64) is raised (GH7433)
  1442. df = DataFrame({'a': [1, 2]}, dtype='int64')
  1443. df.to_sql('test_bigintwarning', self.conn, index=False)
  1444. with warnings.catch_warnings(record=True) as w:
  1445. warnings.simplefilter("always")
  1446. sql.read_sql_table('test_bigintwarning', self.conn)
  1447. assert len(w) == 0
  1448. class _TestMySQLAlchemy(object):
  1449. """
  1450. Test the sqlalchemy backend against an MySQL database.
  1451. """
  1452. flavor = 'mysql'
  1453. @classmethod
  1454. def connect(cls):
  1455. url = 'mysql+{driver}://root@localhost/pandas_nosetest'
  1456. return sqlalchemy.create_engine(url.format(driver=cls.driver),
  1457. connect_args=cls.connect_args)
  1458. @classmethod
  1459. def setup_driver(cls):
  1460. pymysql = pytest.importorskip('pymysql')
  1461. cls.driver = 'pymysql'
  1462. cls.connect_args = {
  1463. 'client_flag': pymysql.constants.CLIENT.MULTI_STATEMENTS}
  1464. def test_default_type_conversion(self):
  1465. df = sql.read_sql_table("types_test_data", self.conn)
  1466. assert issubclass(df.FloatCol.dtype.type, np.floating)
  1467. assert issubclass(df.IntCol.dtype.type, np.integer)
  1468. # MySQL has no real BOOL type (it's an alias for TINYINT)
  1469. assert issubclass(df.BoolCol.dtype.type, np.integer)
  1470. # Int column with NA values stays as float
  1471. assert issubclass(df.IntColWithNull.dtype.type, np.floating)
  1472. # Bool column with NA = int column with NA values => becomes float
  1473. assert issubclass(df.BoolColWithNull.dtype.type, np.floating)
  1474. def test_read_procedure(self):
  1475. import pymysql
  1476. # see GH7324. Although it is more an api test, it is added to the
  1477. # mysql tests as sqlite does not have stored procedures
  1478. df = DataFrame({'a': [1, 2, 3], 'b': [0.1, 0.2, 0.3]})
  1479. df.to_sql('test_procedure', self.conn, index=False)
  1480. proc = """DROP PROCEDURE IF EXISTS get_testdb;
  1481. CREATE PROCEDURE get_testdb ()
  1482. BEGIN
  1483. SELECT * FROM test_procedure;
  1484. END"""
  1485. connection = self.conn.connect()
  1486. trans = connection.begin()
  1487. try:
  1488. r1 = connection.execute(proc) # noqa
  1489. trans.commit()
  1490. except pymysql.Error:
  1491. trans.rollback()
  1492. raise
  1493. res1 = sql.read_sql_query("CALL get_testdb();", self.conn)
  1494. tm.assert_frame_equal(df, res1)
  1495. # test delegation to read_sql_query
  1496. res2 = sql.read_sql("CALL get_testdb();", self.conn)
  1497. tm.assert_frame_equal(df, res2)
  1498. class _TestPostgreSQLAlchemy(object):
  1499. """
  1500. Test the sqlalchemy backend against an PostgreSQL database.
  1501. """
  1502. flavor = 'postgresql'
  1503. @classmethod
  1504. def connect(cls):
  1505. url = 'postgresql+{driver}://postgres@localhost/pandas_nosetest'
  1506. return sqlalchemy.create_engine(url.format(driver=cls.driver))
  1507. @classmethod
  1508. def setup_driver(cls):
  1509. pytest.importorskip('psycopg2')
  1510. cls.driver = 'psycopg2'
  1511. def test_schema_support(self):
  1512. # only test this for postgresql (schema's not supported in
  1513. # mysql/sqlite)
  1514. df = DataFrame({'col1': [1, 2], 'col2': [
  1515. 0.1, 0.2], 'col3': ['a', 'n']})
  1516. # create a schema
  1517. self.conn.execute("DROP SCHEMA IF EXISTS other CASCADE;")
  1518. self.conn.execute("CREATE SCHEMA other;")
  1519. # write dataframe to different schema's
  1520. df.to_sql('test_schema_public', self.conn, index=False)
  1521. df.to_sql('test_schema_public_explicit', self.conn, index=False,
  1522. schema='public')
  1523. df.to_sql('test_schema_other', self.conn, index=False, schema='other')
  1524. # read dataframes back in
  1525. res1 = sql.read_sql_table('test_schema_public', self.conn)
  1526. tm.assert_frame_equal(df, res1)
  1527. res2 = sql.read_sql_table('test_schema_public_explicit', self.conn)
  1528. tm.assert_frame_equal(df, res2)
  1529. res3 = sql.read_sql_table('test_schema_public_explicit', self.conn,
  1530. schema='public')
  1531. tm.assert_frame_equal(df, res3)
  1532. res4 = sql.read_sql_table('test_schema_other', self.conn,
  1533. schema='other')
  1534. tm.assert_frame_equal(df, res4)
  1535. pytest.raises(ValueError, sql.read_sql_table, 'test_schema_other',
  1536. self.conn, schema='public')
  1537. # different if_exists options
  1538. # create a schema
  1539. self.conn.execute("DROP SCHEMA IF EXISTS other CASCADE;")
  1540. self.conn.execute("CREATE SCHEMA other;")
  1541. # write dataframe with different if_exists options
  1542. df.to_sql('test_schema_other', self.conn, schema='other', index=False)
  1543. df.to_sql('test_schema_other', self.conn, schema='other', index=False,
  1544. if_exists='replace')
  1545. df.to_sql('test_schema_other', self.conn, schema='other', index=False,
  1546. if_exists='append')
  1547. res = sql.read_sql_table(
  1548. 'test_schema_other', self.conn, schema='other')
  1549. tm.assert_frame_equal(concat([df, df], ignore_index=True), res)
  1550. # specifying schema in user-provided meta
  1551. # The schema won't be applied on another Connection
  1552. # because of transactional schemas
  1553. if isinstance(self.conn, sqlalchemy.engine.Engine):
  1554. engine2 = self.connect()
  1555. meta = sqlalchemy.MetaData(engine2, schema='other')
  1556. pdsql = sql.SQLDatabase(engine2, meta=meta)
  1557. pdsql.to_sql(df, 'test_schema_other2', index=False)
  1558. pdsql.to_sql(df, 'test_schema_other2',
  1559. index=False, if_exists='replace')
  1560. pdsql.to_sql(df, 'test_schema_other2',
  1561. index=False, if_exists='append')
  1562. res1 = sql.read_sql_table(
  1563. 'test_schema_other2', self.conn, schema='other')
  1564. res2 = pdsql.read_table('test_schema_other2')
  1565. tm.assert_frame_equal(res1, res2)
  1566. def test_copy_from_callable_insertion_method(self):
  1567. # GH 8953
  1568. # Example in io.rst found under _io.sql.method
  1569. # not available in sqlite, mysql
  1570. def psql_insert_copy(table, conn, keys, data_iter):
  1571. # gets a DBAPI connection that can provide a cursor
  1572. dbapi_conn = conn.connection
  1573. with dbapi_conn.cursor() as cur:
  1574. s_buf = compat.StringIO()
  1575. writer = csv.writer(s_buf)
  1576. writer.writerows(data_iter)
  1577. s_buf.seek(0)
  1578. columns = ', '.join('"{}"'.format(k) for k in keys)
  1579. if table.schema:
  1580. table_name = '{}.{}'.format(table.schema, table.name)
  1581. else:
  1582. table_name = table.name
  1583. sql_query = 'COPY {} ({}) FROM STDIN WITH CSV'.format(
  1584. table_name, columns)
  1585. cur.copy_expert(sql=sql_query, file=s_buf)
  1586. expected = DataFrame({'col1': [1, 2], 'col2': [0.1, 0.2],
  1587. 'col3': ['a', 'n']})
  1588. expected.to_sql('test_copy_insert', self.conn, index=False,
  1589. method=psql_insert_copy)
  1590. result = sql.read_sql_table('test_copy_insert', self.conn)
  1591. tm.assert_frame_equal(result, expected)
  1592. @pytest.mark.single
  1593. @pytest.mark.db
  1594. class TestMySQLAlchemy(_TestMySQLAlchemy, _TestSQLAlchemy):
  1595. pass
  1596. @pytest.mark.single
  1597. @pytest.mark.db
  1598. class TestMySQLAlchemyConn(_TestMySQLAlchemy, _TestSQLAlchemyConn):
  1599. pass
  1600. @pytest.mark.single
  1601. @pytest.mark.db
  1602. class TestPostgreSQLAlchemy(_TestPostgreSQLAlchemy, _TestSQLAlchemy):
  1603. pass
  1604. @pytest.mark.single
  1605. @pytest.mark.db
  1606. class TestPostgreSQLAlchemyConn(_TestPostgreSQLAlchemy, _TestSQLAlchemyConn):
  1607. pass
  1608. @pytest.mark.single
  1609. class TestSQLiteAlchemy(_TestSQLiteAlchemy, _TestSQLAlchemy):
  1610. pass
  1611. @pytest.mark.single
  1612. class TestSQLiteAlchemyConn(_TestSQLiteAlchemy, _TestSQLAlchemyConn):
  1613. pass
  1614. # -----------------------------------------------------------------------------
  1615. # -- Test Sqlite / MySQL fallback
  1616. @pytest.mark.single
  1617. class TestSQLiteFallback(SQLiteMixIn, PandasSQLTest):
  1618. """
  1619. Test the fallback mode against an in-memory sqlite database.
  1620. """
  1621. flavor = 'sqlite'
  1622. @classmethod
  1623. def connect(cls):
  1624. return sqlite3.connect(':memory:')
  1625. def setup_connect(self):
  1626. self.conn = self.connect()
  1627. def load_test_data_and_sql(self):
  1628. self.pandasSQL = sql.SQLiteDatabase(self.conn)
  1629. self._load_test1_data()
  1630. @pytest.fixture(autouse=True)
  1631. def setup_method(self, load_iris_data):
  1632. self.load_test_data_and_sql()
  1633. def test_read_sql(self):
  1634. self._read_sql_iris()
  1635. def test_read_sql_parameter(self):
  1636. self._read_sql_iris_parameter()
  1637. def test_read_sql_named_parameter(self):
  1638. self._read_sql_iris_named_parameter()
  1639. def test_to_sql(self):
  1640. self._to_sql()
  1641. def test_to_sql_empty(self):
  1642. self._to_sql_empty()
  1643. def test_to_sql_fail(self):
  1644. self._to_sql_fail()
  1645. def test_to_sql_replace(self):
  1646. self._to_sql_replace()
  1647. def test_to_sql_append(self):
  1648. self._to_sql_append()
  1649. def test_create_and_drop_table(self):
  1650. temp_frame = DataFrame(
  1651. {'one': [1., 2., 3., 4.], 'two': [4., 3., 2., 1.]})
  1652. self.pandasSQL.to_sql(temp_frame, 'drop_test_frame')
  1653. assert self.pandasSQL.has_table('drop_test_frame')
  1654. self.pandasSQL.drop_table('drop_test_frame')
  1655. assert not self.pandasSQL.has_table('drop_test_frame')
  1656. def test_roundtrip(self):
  1657. self._roundtrip()
  1658. def test_execute_sql(self):
  1659. self._execute_sql()
  1660. def test_datetime_date(self):
  1661. # test support for datetime.date
  1662. df = DataFrame([date(2014, 1, 1), date(2014, 1, 2)], columns=["a"])
  1663. df.to_sql('test_date', self.conn, index=False)
  1664. res = read_sql_query('SELECT * FROM test_date', self.conn)
  1665. if self.flavor == 'sqlite':
  1666. # comes back as strings
  1667. tm.assert_frame_equal(res, df.astype(str))
  1668. elif self.flavor == 'mysql':
  1669. tm.assert_frame_equal(res, df)
  1670. def test_datetime_time(self):
  1671. # test support for datetime.time, GH #8341
  1672. df = DataFrame([time(9, 0, 0), time(9, 1, 30)], columns=["a"])
  1673. df.to_sql('test_time', self.conn, index=False)
  1674. res = read_sql_query('SELECT * FROM test_time', self.conn)
  1675. if self.flavor == 'sqlite':
  1676. # comes back as strings
  1677. expected = df.applymap(lambda _: _.strftime("%H:%M:%S.%f"))
  1678. tm.assert_frame_equal(res, expected)
  1679. def _get_index_columns(self, tbl_name):
  1680. ixs = sql.read_sql_query(
  1681. "SELECT * FROM sqlite_master WHERE type = 'index' " +
  1682. "AND tbl_name = '%s'" % tbl_name, self.conn)
  1683. ix_cols = []
  1684. for ix_name in ixs.name:
  1685. ix_info = sql.read_sql_query(
  1686. "PRAGMA index_info(%s)" % ix_name, self.conn)
  1687. ix_cols.append(ix_info.name.tolist())
  1688. return ix_cols
  1689. def test_to_sql_save_index(self):
  1690. self._to_sql_save_index()
  1691. def test_transactions(self):
  1692. if PY36:
  1693. pytest.skip("not working on python > 3.5")
  1694. self._transaction_test()
  1695. def _get_sqlite_column_type(self, table, column):
  1696. recs = self.conn.execute('PRAGMA table_info(%s)' % table)
  1697. for cid, name, ctype, not_null, default, pk in recs:
  1698. if name == column:
  1699. return ctype
  1700. raise ValueError('Table %s, column %s not found' % (table, column))
  1701. def test_dtype(self):
  1702. if self.flavor == 'mysql':
  1703. pytest.skip('Not applicable to MySQL legacy')
  1704. cols = ['A', 'B']
  1705. data = [(0.8, True),
  1706. (0.9, None)]
  1707. df = DataFrame(data, columns=cols)
  1708. df.to_sql('dtype_test', self.conn)
  1709. df.to_sql('dtype_test2', self.conn, dtype={'B': 'STRING'})
  1710. # sqlite stores Boolean values as INTEGER
  1711. assert self._get_sqlite_column_type(
  1712. 'dtype_test', 'B') == 'INTEGER'
  1713. assert self._get_sqlite_column_type(
  1714. 'dtype_test2', 'B') == 'STRING'
  1715. pytest.raises(ValueError, df.to_sql,
  1716. 'error', self.conn, dtype={'B': bool})
  1717. # single dtype
  1718. df.to_sql('single_dtype_test', self.conn, dtype='STRING')
  1719. assert self._get_sqlite_column_type(
  1720. 'single_dtype_test', 'A') == 'STRING'
  1721. assert self._get_sqlite_column_type(
  1722. 'single_dtype_test', 'B') == 'STRING'
  1723. def test_notna_dtype(self):
  1724. if self.flavor == 'mysql':
  1725. pytest.skip('Not applicable to MySQL legacy')
  1726. cols = {'Bool': Series([True, None]),
  1727. 'Date': Series([datetime(2012, 5, 1), None]),
  1728. 'Int': Series([1, None], dtype='object'),
  1729. 'Float': Series([1.1, None])
  1730. }
  1731. df = DataFrame(cols)
  1732. tbl = 'notna_dtype_test'
  1733. df.to_sql(tbl, self.conn)
  1734. assert self._get_sqlite_column_type(tbl, 'Bool') == 'INTEGER'
  1735. assert self._get_sqlite_column_type(tbl, 'Date') == 'TIMESTAMP'
  1736. assert self._get_sqlite_column_type(tbl, 'Int') == 'INTEGER'
  1737. assert self._get_sqlite_column_type(tbl, 'Float') == 'REAL'
  1738. def test_illegal_names(self):
  1739. # For sqlite, these should work fine
  1740. df = DataFrame([[1, 2], [3, 4]], columns=['a', 'b'])
  1741. # Raise error on blank
  1742. pytest.raises(ValueError, df.to_sql, "", self.conn)
  1743. for ndx, weird_name in enumerate(
  1744. ['test_weird_name]', 'test_weird_name[',
  1745. 'test_weird_name`', 'test_weird_name"', 'test_weird_name\'',
  1746. '_b.test_weird_name_01-30', '"_b.test_weird_name_01-30"',
  1747. '99beginswithnumber', '12345', u'\xe9']):
  1748. df.to_sql(weird_name, self.conn)
  1749. sql.table_exists(weird_name, self.conn)
  1750. df2 = DataFrame([[1, 2], [3, 4]], columns=['a', weird_name])
  1751. c_tbl = 'test_weird_col_name%d' % ndx
  1752. df2.to_sql(c_tbl, self.conn)
  1753. sql.table_exists(c_tbl, self.conn)
  1754. # -----------------------------------------------------------------------------
  1755. # -- Old tests from 0.13.1 (before refactor using sqlalchemy)
  1756. def date_format(dt):
  1757. """Returns date in YYYYMMDD format."""
  1758. return dt.strftime('%Y%m%d')
  1759. _formatters = {
  1760. datetime: lambda dt: "'%s'" % date_format(dt),
  1761. str: lambda x: "'%s'" % x,
  1762. np.str_: lambda x: "'%s'" % x,
  1763. compat.text_type: lambda x: "'%s'" % x,
  1764. compat.binary_type: lambda x: "'%s'" % x,
  1765. float: lambda x: "%.8f" % x,
  1766. int: lambda x: "%s" % x,
  1767. type(None): lambda x: "NULL",
  1768. np.float64: lambda x: "%.10f" % x,
  1769. bool: lambda x: "'%s'" % x,
  1770. }
  1771. def format_query(sql, *args):
  1772. """
  1773. """
  1774. processed_args = []
  1775. for arg in args:
  1776. if isinstance(arg, float) and isna(arg):
  1777. arg = None
  1778. formatter = _formatters[type(arg)]
  1779. processed_args.append(formatter(arg))
  1780. return sql % tuple(processed_args)
  1781. def tquery(query, con=None, cur=None):
  1782. """Replace removed sql.tquery function"""
  1783. res = sql.execute(query, con=con, cur=cur).fetchall()
  1784. if res is None:
  1785. return None
  1786. else:
  1787. return list(res)
  1788. @pytest.mark.single
  1789. class TestXSQLite(SQLiteMixIn):
  1790. @pytest.fixture(autouse=True)
  1791. def setup_method(self, request, datapath):
  1792. self.method = request.function
  1793. self.conn = sqlite3.connect(':memory:')
  1794. # In some test cases we may close db connection
  1795. # Re-open conn here so we can perform cleanup in teardown
  1796. yield
  1797. self.method = request.function
  1798. self.conn = sqlite3.connect(':memory:')
  1799. def test_basic(self):
  1800. frame = tm.makeTimeDataFrame()
  1801. self._check_roundtrip(frame)
  1802. def test_write_row_by_row(self):
  1803. frame = tm.makeTimeDataFrame()
  1804. frame.iloc[0, 0] = np.nan
  1805. create_sql = sql.get_schema(frame, 'test')
  1806. cur = self.conn.cursor()
  1807. cur.execute(create_sql)
  1808. cur = self.conn.cursor()
  1809. ins = "INSERT INTO test VALUES (%s, %s, %s, %s)"
  1810. for idx, row in frame.iterrows():
  1811. fmt_sql = format_query(ins, *row)
  1812. tquery(fmt_sql, cur=cur)
  1813. self.conn.commit()
  1814. result = sql.read_sql("select * from test", con=self.conn)
  1815. result.index = frame.index
  1816. tm.assert_frame_equal(result, frame, check_less_precise=True)
  1817. def test_execute(self):
  1818. frame = tm.makeTimeDataFrame()
  1819. create_sql = sql.get_schema(frame, 'test')
  1820. cur = self.conn.cursor()
  1821. cur.execute(create_sql)
  1822. ins = "INSERT INTO test VALUES (?, ?, ?, ?)"
  1823. row = frame.iloc[0]
  1824. sql.execute(ins, self.conn, params=tuple(row))
  1825. self.conn.commit()
  1826. result = sql.read_sql("select * from test", self.conn)
  1827. result.index = frame.index[:1]
  1828. tm.assert_frame_equal(result, frame[:1])
  1829. def test_schema(self):
  1830. frame = tm.makeTimeDataFrame()
  1831. create_sql = sql.get_schema(frame, 'test')
  1832. lines = create_sql.splitlines()
  1833. for l in lines:
  1834. tokens = l.split(' ')
  1835. if len(tokens) == 2 and tokens[0] == 'A':
  1836. assert tokens[1] == 'DATETIME'
  1837. frame = tm.makeTimeDataFrame()
  1838. create_sql = sql.get_schema(frame, 'test', keys=['A', 'B'])
  1839. lines = create_sql.splitlines()
  1840. assert 'PRIMARY KEY ("A", "B")' in create_sql
  1841. cur = self.conn.cursor()
  1842. cur.execute(create_sql)
  1843. def test_execute_fail(self):
  1844. create_sql = """
  1845. CREATE TABLE test
  1846. (
  1847. a TEXT,
  1848. b TEXT,
  1849. c REAL,
  1850. PRIMARY KEY (a, b)
  1851. );
  1852. """
  1853. cur = self.conn.cursor()
  1854. cur.execute(create_sql)
  1855. sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.conn)
  1856. sql.execute('INSERT INTO test VALUES("foo", "baz", 2.567)', self.conn)
  1857. with pytest.raises(Exception):
  1858. sql.execute('INSERT INTO test VALUES("foo", "bar", 7)', self.conn)
  1859. def test_execute_closed_connection(self):
  1860. create_sql = """
  1861. CREATE TABLE test
  1862. (
  1863. a TEXT,
  1864. b TEXT,
  1865. c REAL,
  1866. PRIMARY KEY (a, b)
  1867. );
  1868. """
  1869. cur = self.conn.cursor()
  1870. cur.execute(create_sql)
  1871. sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.conn)
  1872. self.conn.close()
  1873. with pytest.raises(Exception):
  1874. tquery("select * from test", con=self.conn)
  1875. def test_na_roundtrip(self):
  1876. pass
  1877. def _check_roundtrip(self, frame):
  1878. sql.to_sql(frame, name='test_table', con=self.conn, index=False)
  1879. result = sql.read_sql("select * from test_table", self.conn)
  1880. # HACK! Change this once indexes are handled properly.
  1881. result.index = frame.index
  1882. expected = frame
  1883. tm.assert_frame_equal(result, expected)
  1884. frame['txt'] = ['a'] * len(frame)
  1885. frame2 = frame.copy()
  1886. frame2['Idx'] = Index(lrange(len(frame2))) + 10
  1887. sql.to_sql(frame2, name='test_table2', con=self.conn, index=False)
  1888. result = sql.read_sql("select * from test_table2", self.conn,
  1889. index_col='Idx')
  1890. expected = frame.copy()
  1891. expected.index = Index(lrange(len(frame2))) + 10
  1892. expected.index.name = 'Idx'
  1893. tm.assert_frame_equal(expected, result)
  1894. def test_keyword_as_column_names(self):
  1895. df = DataFrame({'From': np.ones(5)})
  1896. sql.to_sql(df, con=self.conn, name='testkeywords', index=False)
  1897. def test_onecolumn_of_integer(self):
  1898. # GH 3628
  1899. # a column_of_integers dataframe should transfer well to sql
  1900. mono_df = DataFrame([1, 2], columns=['c0'])
  1901. sql.to_sql(mono_df, con=self.conn, name='mono_df', index=False)
  1902. # computing the sum via sql
  1903. con_x = self.conn
  1904. the_sum = sum(my_c0[0]
  1905. for my_c0 in con_x.execute("select * from mono_df"))
  1906. # it should not fail, and gives 3 ( Issue #3628 )
  1907. assert the_sum == 3
  1908. result = sql.read_sql("select * from mono_df", con_x)
  1909. tm.assert_frame_equal(result, mono_df)
  1910. def test_if_exists(self):
  1911. df_if_exists_1 = DataFrame({'col1': [1, 2], 'col2': ['A', 'B']})
  1912. df_if_exists_2 = DataFrame(
  1913. {'col1': [3, 4, 5], 'col2': ['C', 'D', 'E']})
  1914. table_name = 'table_if_exists'
  1915. sql_select = "SELECT * FROM %s" % table_name
  1916. def clean_up(test_table_to_drop):
  1917. """
  1918. Drops tables created from individual tests
  1919. so no dependencies arise from sequential tests
  1920. """
  1921. self.drop_table(test_table_to_drop)
  1922. # test if invalid value for if_exists raises appropriate error
  1923. pytest.raises(ValueError,
  1924. sql.to_sql,
  1925. frame=df_if_exists_1,
  1926. con=self.conn,
  1927. name=table_name,
  1928. if_exists='notvalidvalue')
  1929. clean_up(table_name)
  1930. # test if_exists='fail'
  1931. sql.to_sql(frame=df_if_exists_1, con=self.conn,
  1932. name=table_name, if_exists='fail')
  1933. pytest.raises(ValueError,
  1934. sql.to_sql,
  1935. frame=df_if_exists_1,
  1936. con=self.conn,
  1937. name=table_name,
  1938. if_exists='fail')
  1939. # test if_exists='replace'
  1940. sql.to_sql(frame=df_if_exists_1, con=self.conn, name=table_name,
  1941. if_exists='replace', index=False)
  1942. assert tquery(sql_select, con=self.conn) == [(1, 'A'), (2, 'B')]
  1943. sql.to_sql(frame=df_if_exists_2, con=self.conn, name=table_name,
  1944. if_exists='replace', index=False)
  1945. assert (tquery(sql_select, con=self.conn) ==
  1946. [(3, 'C'), (4, 'D'), (5, 'E')])
  1947. clean_up(table_name)
  1948. # test if_exists='append'
  1949. sql.to_sql(frame=df_if_exists_1, con=self.conn, name=table_name,
  1950. if_exists='fail', index=False)
  1951. assert tquery(sql_select, con=self.conn) == [(1, 'A'), (2, 'B')]
  1952. sql.to_sql(frame=df_if_exists_2, con=self.conn, name=table_name,
  1953. if_exists='append', index=False)
  1954. assert (tquery(sql_select, con=self.conn) ==
  1955. [(1, 'A'), (2, 'B'), (3, 'C'), (4, 'D'), (5, 'E')])
  1956. clean_up(table_name)
  1957. @pytest.mark.single
  1958. @pytest.mark.db
  1959. @pytest.mark.skip(reason="gh-13611: there is no support for MySQL "
  1960. "if SQLAlchemy is not installed")
  1961. class TestXMySQL(MySQLMixIn):
  1962. @pytest.fixture(autouse=True, scope='class')
  1963. def setup_class(cls):
  1964. pymysql = pytest.importorskip('pymysql')
  1965. pymysql.connect(host='localhost', user='root', passwd='',
  1966. db='pandas_nosetest')
  1967. try:
  1968. pymysql.connect(read_default_group='pandas')
  1969. except pymysql.ProgrammingError:
  1970. raise RuntimeError(
  1971. "Create a group of connection parameters under the heading "
  1972. "[pandas] in your system's mysql default file, "
  1973. "typically located at ~/.my.cnf or /etc/.my.cnf.")
  1974. except pymysql.Error:
  1975. raise RuntimeError(
  1976. "Cannot connect to database. "
  1977. "Create a group of connection parameters under the heading "
  1978. "[pandas] in your system's mysql default file, "
  1979. "typically located at ~/.my.cnf or /etc/.my.cnf.")
  1980. @pytest.fixture(autouse=True)
  1981. def setup_method(self, request, datapath):
  1982. pymysql = pytest.importorskip('pymysql')
  1983. pymysql.connect(host='localhost', user='root', passwd='',
  1984. db='pandas_nosetest')
  1985. try:
  1986. pymysql.connect(read_default_group='pandas')
  1987. except pymysql.ProgrammingError:
  1988. raise RuntimeError(
  1989. "Create a group of connection parameters under the heading "
  1990. "[pandas] in your system's mysql default file, "
  1991. "typically located at ~/.my.cnf or /etc/.my.cnf.")
  1992. except pymysql.Error:
  1993. raise RuntimeError(
  1994. "Cannot connect to database. "
  1995. "Create a group of connection parameters under the heading "
  1996. "[pandas] in your system's mysql default file, "
  1997. "typically located at ~/.my.cnf or /etc/.my.cnf.")
  1998. self.method = request.function
  1999. def test_basic(self):
  2000. frame = tm.makeTimeDataFrame()
  2001. self._check_roundtrip(frame)
  2002. def test_write_row_by_row(self):
  2003. frame = tm.makeTimeDataFrame()
  2004. frame.iloc[0, 0] = np.nan
  2005. drop_sql = "DROP TABLE IF EXISTS test"
  2006. create_sql = sql.get_schema(frame, 'test')
  2007. cur = self.conn.cursor()
  2008. cur.execute(drop_sql)
  2009. cur.execute(create_sql)
  2010. ins = "INSERT INTO test VALUES (%s, %s, %s, %s)"
  2011. for idx, row in frame.iterrows():
  2012. fmt_sql = format_query(ins, *row)
  2013. tquery(fmt_sql, cur=cur)
  2014. self.conn.commit()
  2015. result = sql.read_sql("select * from test", con=self.conn)
  2016. result.index = frame.index
  2017. tm.assert_frame_equal(result, frame, check_less_precise=True)
  2018. def test_chunksize_read_type(self):
  2019. frame = tm.makeTimeDataFrame()
  2020. frame.index.name = "index"
  2021. drop_sql = "DROP TABLE IF EXISTS test"
  2022. cur = self.conn.cursor()
  2023. cur.execute(drop_sql)
  2024. sql.to_sql(frame, name='test', con=self.conn)
  2025. query = "select * from test"
  2026. chunksize = 5
  2027. chunk_gen = pd.read_sql_query(sql=query, con=self.conn,
  2028. chunksize=chunksize, index_col="index")
  2029. chunk_df = next(chunk_gen)
  2030. tm.assert_frame_equal(frame[:chunksize], chunk_df)
  2031. def test_execute(self):
  2032. frame = tm.makeTimeDataFrame()
  2033. drop_sql = "DROP TABLE IF EXISTS test"
  2034. create_sql = sql.get_schema(frame, 'test')
  2035. cur = self.conn.cursor()
  2036. with warnings.catch_warnings():
  2037. warnings.filterwarnings("ignore", "Unknown table.*")
  2038. cur.execute(drop_sql)
  2039. cur.execute(create_sql)
  2040. ins = "INSERT INTO test VALUES (%s, %s, %s, %s)"
  2041. row = frame.iloc[0].values.tolist()
  2042. sql.execute(ins, self.conn, params=tuple(row))
  2043. self.conn.commit()
  2044. result = sql.read_sql("select * from test", self.conn)
  2045. result.index = frame.index[:1]
  2046. tm.assert_frame_equal(result, frame[:1])
  2047. def test_schema(self):
  2048. frame = tm.makeTimeDataFrame()
  2049. create_sql = sql.get_schema(frame, 'test')
  2050. lines = create_sql.splitlines()
  2051. for l in lines:
  2052. tokens = l.split(' ')
  2053. if len(tokens) == 2 and tokens[0] == 'A':
  2054. assert tokens[1] == 'DATETIME'
  2055. frame = tm.makeTimeDataFrame()
  2056. drop_sql = "DROP TABLE IF EXISTS test"
  2057. create_sql = sql.get_schema(frame, 'test', keys=['A', 'B'])
  2058. lines = create_sql.splitlines()
  2059. assert 'PRIMARY KEY (`A`, `B`)' in create_sql
  2060. cur = self.conn.cursor()
  2061. cur.execute(drop_sql)
  2062. cur.execute(create_sql)
  2063. def test_execute_fail(self):
  2064. drop_sql = "DROP TABLE IF EXISTS test"
  2065. create_sql = """
  2066. CREATE TABLE test
  2067. (
  2068. a TEXT,
  2069. b TEXT,
  2070. c REAL,
  2071. PRIMARY KEY (a(5), b(5))
  2072. );
  2073. """
  2074. cur = self.conn.cursor()
  2075. cur.execute(drop_sql)
  2076. cur.execute(create_sql)
  2077. sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.conn)
  2078. sql.execute('INSERT INTO test VALUES("foo", "baz", 2.567)', self.conn)
  2079. with pytest.raises(Exception):
  2080. sql.execute('INSERT INTO test VALUES("foo", "bar", 7)', self.conn)
  2081. def test_execute_closed_connection(self, request, datapath):
  2082. drop_sql = "DROP TABLE IF EXISTS test"
  2083. create_sql = """
  2084. CREATE TABLE test
  2085. (
  2086. a TEXT,
  2087. b TEXT,
  2088. c REAL,
  2089. PRIMARY KEY (a(5), b(5))
  2090. );
  2091. """
  2092. cur = self.conn.cursor()
  2093. cur.execute(drop_sql)
  2094. cur.execute(create_sql)
  2095. sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.conn)
  2096. self.conn.close()
  2097. with pytest.raises(Exception):
  2098. tquery("select * from test", con=self.conn)
  2099. # Initialize connection again (needed for tearDown)
  2100. self.setup_method(request, datapath)
  2101. def test_na_roundtrip(self):
  2102. pass
  2103. def _check_roundtrip(self, frame):
  2104. drop_sql = "DROP TABLE IF EXISTS test_table"
  2105. cur = self.conn.cursor()
  2106. with warnings.catch_warnings():
  2107. warnings.filterwarnings("ignore", "Unknown table.*")
  2108. cur.execute(drop_sql)
  2109. sql.to_sql(frame, name='test_table', con=self.conn, index=False)
  2110. result = sql.read_sql("select * from test_table", self.conn)
  2111. # HACK! Change this once indexes are handled properly.
  2112. result.index = frame.index
  2113. result.index.name = frame.index.name
  2114. expected = frame
  2115. tm.assert_frame_equal(result, expected)
  2116. frame['txt'] = ['a'] * len(frame)
  2117. frame2 = frame.copy()
  2118. index = Index(lrange(len(frame2))) + 10
  2119. frame2['Idx'] = index
  2120. drop_sql = "DROP TABLE IF EXISTS test_table2"
  2121. cur = self.conn.cursor()
  2122. with warnings.catch_warnings():
  2123. warnings.filterwarnings("ignore", "Unknown table.*")
  2124. cur.execute(drop_sql)
  2125. sql.to_sql(frame2, name='test_table2',
  2126. con=self.conn, index=False)
  2127. result = sql.read_sql("select * from test_table2", self.conn,
  2128. index_col='Idx')
  2129. expected = frame.copy()
  2130. # HACK! Change this once indexes are handled properly.
  2131. expected.index = index
  2132. expected.index.names = result.index.names
  2133. tm.assert_frame_equal(expected, result)
  2134. def test_keyword_as_column_names(self):
  2135. df = DataFrame({'From': np.ones(5)})
  2136. sql.to_sql(df, con=self.conn, name='testkeywords',
  2137. if_exists='replace', index=False)
  2138. def test_if_exists(self):
  2139. df_if_exists_1 = DataFrame({'col1': [1, 2], 'col2': ['A', 'B']})
  2140. df_if_exists_2 = DataFrame(
  2141. {'col1': [3, 4, 5], 'col2': ['C', 'D', 'E']})
  2142. table_name = 'table_if_exists'
  2143. sql_select = "SELECT * FROM %s" % table_name
  2144. def clean_up(test_table_to_drop):
  2145. """
  2146. Drops tables created from individual tests
  2147. so no dependencies arise from sequential tests
  2148. """
  2149. self.drop_table(test_table_to_drop)
  2150. # test if invalid value for if_exists raises appropriate error
  2151. pytest.raises(ValueError,
  2152. sql.to_sql,
  2153. frame=df_if_exists_1,
  2154. con=self.conn,
  2155. name=table_name,
  2156. if_exists='notvalidvalue')
  2157. clean_up(table_name)
  2158. # test if_exists='fail'
  2159. sql.to_sql(frame=df_if_exists_1, con=self.conn, name=table_name,
  2160. if_exists='fail', index=False)
  2161. pytest.raises(ValueError,
  2162. sql.to_sql,
  2163. frame=df_if_exists_1,
  2164. con=self.conn,
  2165. name=table_name,
  2166. if_exists='fail')
  2167. # test if_exists='replace'
  2168. sql.to_sql(frame=df_if_exists_1, con=self.conn, name=table_name,
  2169. if_exists='replace', index=False)
  2170. assert tquery(sql_select, con=self.conn) == [(1, 'A'), (2, 'B')]
  2171. sql.to_sql(frame=df_if_exists_2, con=self.conn, name=table_name,
  2172. if_exists='replace', index=False)
  2173. assert (tquery(sql_select, con=self.conn) ==
  2174. [(3, 'C'), (4, 'D'), (5, 'E')])
  2175. clean_up(table_name)
  2176. # test if_exists='append'
  2177. sql.to_sql(frame=df_if_exists_1, con=self.conn, name=table_name,
  2178. if_exists='fail', index=False)
  2179. assert tquery(sql_select, con=self.conn) == [(1, 'A'), (2, 'B')]
  2180. sql.to_sql(frame=df_if_exists_2, con=self.conn, name=table_name,
  2181. if_exists='append', index=False)
  2182. assert (tquery(sql_select, con=self.conn) ==
  2183. [(1, 'A'), (2, 'B'), (3, 'C'), (4, 'D'), (5, 'E')])
  2184. clean_up(table_name)