file.pyx 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377
  1. #!/usr/bin/env python
  2. # encoding: utf-8
  3. # cython: profile=False
  4. import sys
  5. from Naked.settings import debug as DEBUG_FLAG
  6. #------------------------------------------------------------------------------
  7. # [ IO class ]
  8. # interface for all local file IO classes
  9. #------------------------------------------------------------------------------
  10. class IO:
  11. def __init__(self,filepath):
  12. self.filepath = filepath
  13. #------------------------------------------------------------------------------
  14. # [ FileWriter class ]
  15. # writes data to local files
  16. #------------------------------------------------------------------------------
  17. class FileWriter(IO):
  18. def __init__(self, filepath):
  19. IO.__init__(self, filepath)
  20. #------------------------------------------------------------------------------
  21. # [ append method ]
  22. # Universal text file writer that appends to existing file using system default text encoding or utf-8 if throws unicode error
  23. # Tests: test_IO.py:: test_file_ascii_readwrite_append, test_file_append_missingfile
  24. #------------------------------------------------------------------------------
  25. def append(self, text):
  26. try:
  27. from Naked.toolshed.system import file_exists
  28. if not file_exists(self.filepath): #confirm that file exists, if not raise IOError (assuming that developer expected existing file if using append)
  29. raise IOError("The file specified for the text append does not exist (Naked.toolshed.file.py:append).")
  30. with open(self.filepath, 'a') as appender:
  31. appender.write(text)
  32. except UnicodeEncodeError as ue:
  33. self.append_utf8(text) #try writing as utf-8
  34. except Exception as e:
  35. if DEBUG_FLAG:
  36. sys.stderr.write("Naked Framework Error: Unable to append text to the file with the append() method (Naked.toolshed.file.py).")
  37. raise e
  38. #------------------------------------------------------------------------------
  39. # [ append_utf8 method ]
  40. # Text writer that appends text to existing file with utf-8 encoding
  41. # Tests: test_IO.py :: test_file_utf8_readwrite_append
  42. #------------------------------------------------------------------------------
  43. def append_utf8(self, text):
  44. try:
  45. from Naked.toolshed.system import file_exists
  46. if not file_exists(self.filepath):
  47. raise IOError("The file specified for the text append does not exist (Naked.toolshed.file.py:append_utf8).")
  48. import codecs
  49. import unicodedata
  50. norm_text = unicodedata.normalize('NFKD', text) # NKFD normalization of the unicode data before write
  51. with codecs.open(self.filepath, mode='a', encoding="utf_8") as appender:
  52. appender.write(norm_text)
  53. except Exception as e:
  54. if DEBUG_FLAG:
  55. sys.stderr.write("Naked Framework Error: Unable to append text to the file with the append_utf8 method (Naked.toolshed.file.py).")
  56. raise e
  57. #------------------------------------------------------------------------------
  58. # [ gzip method (writer) ]
  59. # writes data to gzip compressed file
  60. # Note: adds .gz extension to filename if user did not specify it in the FileWriter class constructor
  61. # Note: uses compresslevel = 6 as default to balance speed and compression level (which in general is not significantly less than 9)
  62. # Tests: test_IO.py :: test_file_gzip_ascii_readwrite, test_file_gzip_utf8_readwrite,
  63. # test_file_gzip_utf8_readwrite_explicit_decode
  64. #------------------------------------------------------------------------------
  65. def gzip(self, text, compression_level=6):
  66. try:
  67. import gzip
  68. if not self.filepath.endswith(".gz"):
  69. self.filepath = self.filepath + ".gz"
  70. with gzip.open(self.filepath, 'wb', compresslevel=compression_level) as gzip_writer:
  71. gzip_writer.write(text)
  72. except UnicodeEncodeError as ue:
  73. import unicodedata
  74. norm_text = unicodedata.normalize('NFKD', text) # NKFD normalization of the unicode data before write
  75. import codecs
  76. binary_data = codecs.encode(norm_text, "utf_8")
  77. with gzip.open(self.filepath, 'wb', compresslevel=compression_level) as gzip_writer:
  78. gzip_writer.write(binary_data)
  79. except Exception as e:
  80. if DEBUG_FLAG:
  81. sys.stderr.write("Naked Framework Error: unable to gzip compress the file with the gzip method (Naked.toolshed.file.py).")
  82. raise e
  83. #------------------------------------------------------------------------------
  84. # [ write method ]
  85. # Universal text file writer that writes by system default or utf-8 encoded unicode if throws UnicdeEncodeError
  86. # Tests: test_IO.py :: test_file_ascii_readwrite, test_file_ascii_readwrite_missing_file,
  87. # test_file_utf8_write_raises_unicodeerror
  88. #------------------------------------------------------------------------------
  89. def write(self, text):
  90. try:
  91. with open(self.filepath, 'wt') as writer:
  92. writer.write(text)
  93. except UnicodeEncodeError as ue:
  94. self.write_utf8(text) # attempt to write with utf-8 encoding
  95. except Exception as e:
  96. if DEBUG_FLAG:
  97. sys.stderr.write("Naked Framework Error: Unable to write to requested file with the write() method (Naked.toolshed.file.py).")
  98. raise e
  99. #------------------------------------------------------------------------------
  100. # [ write_as method ]
  101. # text file writer that uses developer specified text encoding
  102. # Tests: test_IO.py :: test_file_utf8_readas_writeas
  103. #------------------------------------------------------------------------------
  104. def write_as(self, text, the_encoding=""):
  105. try:
  106. if the_encoding == "": #if the developer did not include the encoding type, raise an exception
  107. raise RuntimeError("The text encoding was not specified as an argument to the write_as() method (Naked.toolshed.file.py:write_as).")
  108. import codecs
  109. with codecs.open(self.filepath, encoding=the_encoding, mode='w') as f:
  110. f.write(text)
  111. except Exception as e:
  112. if DEBUG_FLAG:
  113. sys.stderr.write("Naked Framework Error: unable to write file with the specified encoding using the write_as() method (Naked.toolshed.file.py).")
  114. raise e
  115. #------------------------------------------------------------------------------
  116. # [ write_bin method ]
  117. # binary data file writer
  118. # Tests: test_IO.py :: test_file_bin_readwrite
  119. #------------------------------------------------------------------------------
  120. def write_bin(self, binary_data):
  121. try:
  122. with open(self.filepath, 'wb') as bin_writer:
  123. bin_writer.write(binary_data)
  124. except Exception as e:
  125. if DEBUG_FLAG:
  126. sys.stderr.write("Naked Framework Error: Unable to write binary data to file with the write_bin method (Naked.toolshed.file.py).")
  127. raise e
  128. #------------------------------------------------------------------------------
  129. # [ safe_write method ] (boolean)
  130. # Universal text file writer (writes in default encoding unless throws unicode error) that will NOT overwrite existing file at the requested filepath
  131. # returns boolean indicator for success of write based upon test for existence of file (False = write failed because file exists)
  132. # Tests: test_IO.py :: test_file_ascii_safewrite, test_file_utf8_safewrite
  133. #------------------------------------------------------------------------------
  134. def safe_write(self, text):
  135. import os.path
  136. if not os.path.exists(self.filepath): # if the file does not exist, then can write
  137. try:
  138. with open(self.filepath, 'wt') as writer:
  139. writer.write(text)
  140. return True
  141. except UnicodeEncodeError as ue:
  142. self.write_utf8(text)
  143. return True
  144. except Exception as e:
  145. if DEBUG_FLAG:
  146. sys.stderr.write("Naked Framework Error: Unable to write to requested file with the safe_write() method (Naked.toolshed.file.py).")
  147. raise e
  148. else:
  149. return False # if file exists, do not write and return False
  150. #------------------------------------------------------------------------------
  151. # [ safe_write_bin method ]
  152. # Binary data file writer that will NOT overwrite existing file at the requested filepath
  153. # returns boolean indicator for success of write based upon test for existence of file (False = write failed because file exists)
  154. #------------------------------------------------------------------------------
  155. def safe_write_bin(self, file_data):
  156. try:
  157. import os.path
  158. if not os.path.exists(self.filepath):
  159. with open(self.filepath, 'wb') as writer:
  160. writer.write(file_data)
  161. return True
  162. else:
  163. return False
  164. except Exception as e:
  165. if DEBUG_FLAG:
  166. sys.stderr.write("Naked Framework Error: Unable to write to requested file with the safe_write_bin() method (Naked.toolshed.file.py).")
  167. raise e
  168. #------------------------------------------------------------------------------
  169. # [ write_utf8 method ]
  170. # Text file writer with explicit UTF-8 text encoding
  171. # uses filepath from class constructor
  172. # requires text to passed as a method parameter
  173. # Tests: test_IO.py :: test_file_utf8_readwrite, test_file_utf8_readwrite_raises_unicodeerror
  174. #------------------------------------------------------------------------------
  175. def write_utf8(self, text):
  176. try:
  177. import codecs
  178. f = codecs.open(self.filepath, encoding='utf_8', mode='w')
  179. except IOError as ioe:
  180. if DEBUG_FLAG:
  181. sys.stderr.write("Naked Framework Error: Unable to open file for write with the write_utf8() method (Naked.toolshed.file.py).")
  182. raise ioe
  183. try:
  184. import unicodedata
  185. norm_text = unicodedata.normalize('NFKD', text) # NKFD normalization of the unicode data before write
  186. f.write(norm_text)
  187. except Exception as e:
  188. if DEBUG_FLAG:
  189. sys.stderr.write("Naked Framework Error: Unable to write UTF-8 encoded text to file with the write_utf8() method (Naked.toolshed.file.py).")
  190. raise e
  191. finally:
  192. f.close()
  193. #------------------------------------------------------------------------------
  194. # [ FileReader class ]
  195. # reads data from local files
  196. # filename assigned in constructor (inherited from IO class interface)
  197. #------------------------------------------------------------------------------
  198. class FileReader(IO):
  199. def __init__(self, filepath):
  200. IO.__init__(self, filepath)
  201. #------------------------------------------------------------------------------
  202. # [ read method ] (string)
  203. # Universal text file reader that will read utf-8 encoded unicode or non-unicode text as utf-8
  204. # returns string or unicode (py3 = string for unicode and non-unicode, py2 = str for non-unicode, unicode for unicode)
  205. # Tests: test_IO.py :: test_file_ascii_readwrite, test_file_read_missing_file,
  206. #------------------------------------------------------------------------------
  207. def read(self):
  208. try:
  209. return self.read_utf8() #reads everything as unicode in utf8 encoding
  210. except Exception as e:
  211. if DEBUG_FLAG:
  212. sys.stderr.write("Naked Framework Error: Unable to read text from the requested file with the read() method (Naked.toolshed.file.py).")
  213. raise e
  214. #------------------------------------------------------------------------------
  215. # [ read_bin method ] (binary byte string)
  216. # Universal binary data file reader
  217. # returns file contents in binary mode as binary byte strings
  218. # Tests: test_IO.py :: test_file_bin_readwrite, test_file_read_bin_missing_file
  219. #------------------------------------------------------------------------------
  220. def read_bin(self):
  221. try:
  222. with open(self.filepath, 'rb') as bin_reader:
  223. data = bin_reader.read()
  224. return data
  225. except Exception as e:
  226. if DEBUG_FLAG:
  227. sys.stderr.write("Naked Framework Error: Unable to read the binary data from the file with the read_bin method (Naked.toolshed.file.py).")
  228. raise e
  229. #------------------------------------------------------------------------------
  230. # [ read_as method ] (string with developer specified text encoding)
  231. # Text file reader with developer specified text encoding
  232. # returns file contents in developer specified text encoding
  233. # Tests: test_IO.py :: test_file_utf8_readas_writeas, test_file_readas_missing_file
  234. #------------------------------------------------------------------------------
  235. def read_as(self, the_encoding):
  236. try:
  237. if the_encoding == "":
  238. raise RuntimeError("The text file encoding was not specified as an argument to the read_as method (Naked.toolshed.file.py:read_as).")
  239. import codecs
  240. with codecs.open(self.filepath, encoding=the_encoding, mode='r') as f:
  241. data = f.read()
  242. return data
  243. except Exception as e:
  244. if DEBUG_FLAG:
  245. sys.stderr.write("Naked Framework Error: Unable to read the file with the developer specified text encoding with the read_as method (Naked.toolshed.file.py).")
  246. raise e
  247. #------------------------------------------------------------------------------
  248. # [ readlines method ] (list of strings)
  249. # Read text from file line by line, uses utf8 encoding by default
  250. # returns list of utf8 encoded file lines as strings
  251. # Tests: test_IO.py :: test_file_readlines, test_file_readlines_missing_file
  252. #------------------------------------------------------------------------------
  253. def readlines(self):
  254. try:
  255. return self.readlines_utf8() # read as utf8 encoded file
  256. except Exception as e:
  257. if DEBUG_FLAG:
  258. sys.stderr.write("Naked Framework Error: Unable to read text from the requested file with the readlines() method (Naked.toolshed.file.py).")
  259. raise e
  260. #------------------------------------------------------------------------------
  261. # [ readlines_as method ] (list of developer specified encoded strings)
  262. # Read lines from file with developer specified text encoding
  263. # Returns a list of developer specified encoded lines from the file
  264. # Tests: test_IO.py ::
  265. #------------------------------------------------------------------------------
  266. def readlines_as(self, dev_spec_encoding):
  267. try:
  268. if dev_spec_encoding == "":
  269. raise RuntimeError("The text file encoding was not specified as an argument to the readlines_as method (Naked.toolshed.file.py:readlines_as).")
  270. import codecs
  271. with codecs.open(self.filepath, encoding=dev_spec_encoding, mode='r') as reader:
  272. data_list = []
  273. for line in reader:
  274. data_list.append(line)
  275. return data_list
  276. except Exception as e:
  277. if DEBUG_FLAG:
  278. sys.stderr.write("Naked Framework Error: unable to read lines in the specified encoding with the readlines_as method (Naked.toolshed.file.py).")
  279. raise e
  280. #------------------------------------------------------------------------------
  281. # [ readlines_utf8 method ] (list of utf-8 encoded strings)
  282. # Read text from unicode file by line
  283. # Returns list of file unicode text lines as unicode strings
  284. # Tests: test_IO.py :: test_file_readlines_unicode, test_file_readlines_utf8_missing_file
  285. #------------------------------------------------------------------------------
  286. def readlines_utf8(self):
  287. try:
  288. import codecs
  289. with codecs.open(self.filepath, encoding='utf-8', mode='r') as uni_reader:
  290. modified_text_list = []
  291. for line in uni_reader:
  292. import unicodedata
  293. norm_line = unicodedata.normalize('NFKD', line) # NKFD normalization of the unicode data before use
  294. modified_text_list.append(norm_line)
  295. return modified_text_list
  296. except Exception as e:
  297. if DEBUG_FLAG:
  298. sys.stderr.write("Naked Framework Error: unable to read lines in the unicode file with the readlines_utf8 method (Naked.toolshed.file.py)")
  299. raise e
  300. #------------------------------------------------------------------------------
  301. # [ read_gzip ] (byte string)
  302. # reads data from a gzip compressed file
  303. # returns the decompressed binary data from the file
  304. # Note: if decompressing unicode file, set encoding="utf-8"
  305. # Tests: test_IO.py :: test_file_gzip_ascii_readwrite, test_file_gzip_utf8_readwrite,
  306. # test_file_read_gzip_missing_file
  307. #------------------------------------------------------------------------------
  308. def read_gzip(self, encoding="system_default"):
  309. try:
  310. import gzip
  311. with gzip.open(self.filepath, 'rb') as gzip_reader:
  312. file_data = gzip_reader.read()
  313. if encoding in ["utf-8", "utf8", "utf_8", "UTF-8", "UTF8", "UTF_8"]:
  314. import codecs
  315. file_data = codecs.decode(file_data, "utf-8")
  316. import unicodedata
  317. norm_data = unicodedata.normalize('NFKD', file_data) # NKFD normalization of the unicode data before passing back to the caller
  318. return norm_data
  319. else:
  320. return file_data
  321. except Exception as e:
  322. if DEBUG_FLAG:
  323. sys.stderr.write("Naked Framework Error: Unable to read from the gzip compressed file with the read_gzip() method (Naked.toolshed.file.py).")
  324. raise e
  325. #------------------------------------------------------------------------------
  326. # [ read_utf8 method ] (string)
  327. # read data from a file with explicit UTF-8 encoding
  328. # uses filepath from class constructor
  329. # returns a unicode string containing the file data (unicode in py2, str in py3)
  330. # Tests: test_IO.py :: test_file_utf8_readwrite, test_file_utf8_readwrite_append,
  331. # test_file_read_utf8_missing_file
  332. #------------------------------------------------------------------------------
  333. def read_utf8(self):
  334. try:
  335. import codecs
  336. f = codecs.open(self.filepath, encoding='utf_8', mode='r')
  337. except IOError as ioe:
  338. if DEBUG_FLAG:
  339. sys.stderr.write("Naked Framework Error: Unable to open file for read with read_utf8() method (Naked.toolshed.file.py).")
  340. raise ioe
  341. try:
  342. textstring = f.read()
  343. import unicodedata
  344. norm_text = unicodedata.normalize('NFKD', textstring) # NKFD normalization of the unicode data before returns
  345. return norm_text
  346. except Exception as e:
  347. if DEBUG_FLAG:
  348. sys.stderr.write("Naked Framework Error: Unable to read the file with UTF-8 encoding using the read_utf8() method (Naked.toolshed.file.py).")
  349. raise e
  350. finally:
  351. f.close()
  352. if __name__ == '__main__':
  353. pass