file.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376
  1. #!/usr/bin/env python
  2. # encoding: utf-8
  3. import sys
  4. from Naked.settings import debug as DEBUG_FLAG
  5. #------------------------------------------------------------------------------
  6. # [ IO class ]
  7. # interface for all local file IO classes
  8. #------------------------------------------------------------------------------
  9. class IO:
  10. def __init__(self,filepath):
  11. self.filepath = filepath
  12. #------------------------------------------------------------------------------
  13. # [ FileWriter class ]
  14. # writes data to local files
  15. #------------------------------------------------------------------------------
  16. class FileWriter(IO):
  17. def __init__(self, filepath):
  18. IO.__init__(self, filepath)
  19. #------------------------------------------------------------------------------
  20. # [ append method ]
  21. # Universal text file writer that appends to existing file using system default text encoding or utf-8 if throws unicode error
  22. # Tests: test_IO.py:: test_file_ascii_readwrite_append, test_file_append_missingfile
  23. #------------------------------------------------------------------------------
  24. def append(self, text):
  25. try:
  26. from Naked.toolshed.system import file_exists
  27. if not file_exists(self.filepath): #confirm that file exists, if not raise IOError (assuming that developer expected existing file if using append)
  28. raise IOError("The file specified for the text append does not exist (Naked.toolshed.file.py:append).")
  29. with open(self.filepath, 'a') as appender:
  30. appender.write(text)
  31. except UnicodeEncodeError as ue:
  32. self.append_utf8(text) #try writing as utf-8
  33. except Exception as e:
  34. if DEBUG_FLAG:
  35. sys.stderr.write("Naked Framework Error: Unable to append text to the file with the append() method (Naked.toolshed.file.py).")
  36. raise e
  37. #------------------------------------------------------------------------------
  38. # [ append_utf8 method ]
  39. # Text writer that appends text to existing file with utf-8 encoding
  40. # Tests: test_IO.py :: test_file_utf8_readwrite_append
  41. #------------------------------------------------------------------------------
  42. def append_utf8(self, text):
  43. try:
  44. from Naked.toolshed.system import file_exists
  45. if not file_exists(self.filepath):
  46. raise IOError("The file specified for the text append does not exist (Naked.toolshed.file.py:append_utf8).")
  47. import codecs
  48. import unicodedata
  49. norm_text = unicodedata.normalize('NFKD', text) # NKFD normalization of the unicode data before write
  50. with codecs.open(self.filepath, mode='a', encoding="utf_8") as appender:
  51. appender.write(norm_text)
  52. except Exception as e:
  53. if DEBUG_FLAG:
  54. sys.stderr.write("Naked Framework Error: Unable to append text to the file with the append_utf8 method (Naked.toolshed.file.py).")
  55. raise e
  56. #------------------------------------------------------------------------------
  57. # [ gzip method (writer) ]
  58. # writes data to gzip compressed file
  59. # Note: adds .gz extension to filename if user did not specify it in the FileWriter class constructor
  60. # Note: uses compresslevel = 6 as default to balance speed and compression level (which in general is not significantly less than 9)
  61. # Tests: test_IO.py :: test_file_gzip_ascii_readwrite, test_file_gzip_utf8_readwrite,
  62. # test_file_gzip_utf8_readwrite_explicit_decode
  63. #------------------------------------------------------------------------------
  64. def gzip(self, text, compression_level=6):
  65. try:
  66. import gzip
  67. if not self.filepath.endswith(".gz"):
  68. self.filepath = self.filepath + ".gz"
  69. with gzip.open(self.filepath, 'wb', compresslevel=compression_level) as gzip_writer:
  70. gzip_writer.write(text)
  71. except UnicodeEncodeError as ue:
  72. import unicodedata
  73. norm_text = unicodedata.normalize('NFKD', text) # NKFD normalization of the unicode data before write
  74. import codecs
  75. binary_data = codecs.encode(norm_text, "utf_8")
  76. with gzip.open(self.filepath, 'wb', compresslevel=compression_level) as gzip_writer:
  77. gzip_writer.write(binary_data)
  78. except Exception as e:
  79. if DEBUG_FLAG:
  80. sys.stderr.write("Naked Framework Error: unable to gzip compress the file with the gzip method (Naked.toolshed.file.py).")
  81. raise e
  82. #------------------------------------------------------------------------------
  83. # [ write method ]
  84. # Universal text file writer that writes by system default or utf-8 encoded unicode if throws UnicdeEncodeError
  85. # Tests: test_IO.py :: test_file_ascii_readwrite, test_file_ascii_readwrite_missing_file,
  86. # test_file_utf8_write_raises_unicodeerror
  87. #------------------------------------------------------------------------------
  88. def write(self, text):
  89. try:
  90. with open(self.filepath, 'wt') as writer:
  91. writer.write(text)
  92. except UnicodeEncodeError as ue:
  93. self.write_utf8(text) # attempt to write with utf-8 encoding
  94. except Exception as e:
  95. if DEBUG_FLAG:
  96. sys.stderr.write("Naked Framework Error: Unable to write to requested file with the write() method (Naked.toolshed.file.py).")
  97. raise e
  98. #------------------------------------------------------------------------------
  99. # [ write_as method ]
  100. # text file writer that uses developer specified text encoding
  101. # Tests: test_IO.py :: test_file_utf8_readas_writeas
  102. #------------------------------------------------------------------------------
  103. def write_as(self, text, the_encoding=""):
  104. try:
  105. if the_encoding == "": #if the developer did not include the encoding type, raise an exception
  106. raise RuntimeError("The text encoding was not specified as an argument to the write_as() method (Naked.toolshed.file.py:write_as).")
  107. import codecs
  108. with codecs.open(self.filepath, encoding=the_encoding, mode='w') as f:
  109. f.write(text)
  110. except Exception as e:
  111. if DEBUG_FLAG:
  112. sys.stderr.write("Naked Framework Error: unable to write file with the specified encoding using the write_as() method (Naked.toolshed.file.py).")
  113. raise e
  114. #------------------------------------------------------------------------------
  115. # [ write_bin method ]
  116. # binary data file writer
  117. # Tests: test_IO.py :: test_file_bin_readwrite
  118. #------------------------------------------------------------------------------
  119. def write_bin(self, binary_data):
  120. try:
  121. with open(self.filepath, 'wb') as bin_writer:
  122. bin_writer.write(binary_data)
  123. except Exception as e:
  124. if DEBUG_FLAG:
  125. sys.stderr.write("Naked Framework Error: Unable to write binary data to file with the write_bin method (Naked.toolshed.file.py).")
  126. raise e
  127. #------------------------------------------------------------------------------
  128. # [ safe_write method ] (boolean)
  129. # Universal text file writer (writes in default encoding unless throws unicode error) that will NOT overwrite existing file at the requested filepath
  130. # returns boolean indicator for success of write based upon test for existence of file (False = write failed because file exists)
  131. # Tests: test_IO.py :: test_file_ascii_safewrite, test_file_utf8_safewrite
  132. #------------------------------------------------------------------------------
  133. def safe_write(self, text):
  134. import os.path
  135. if not os.path.exists(self.filepath): # if the file does not exist, then can write
  136. try:
  137. with open(self.filepath, 'wt') as writer:
  138. writer.write(text)
  139. return True
  140. except UnicodeEncodeError as ue:
  141. self.write_utf8(text)
  142. return True
  143. except Exception as e:
  144. if DEBUG_FLAG:
  145. sys.stderr.write("Naked Framework Error: Unable to write to requested file with the safe_write() method (Naked.toolshed.file.py).")
  146. raise e
  147. else:
  148. return False # if file exists, do not write and return False
  149. #------------------------------------------------------------------------------
  150. # [ safe_write_bin method ]
  151. # Binary data file writer that will NOT overwrite existing file at the requested filepath
  152. # returns boolean indicator for success of write based upon test for existence of file (False = write failed because file exists)
  153. #------------------------------------------------------------------------------
  154. def safe_write_bin(self, file_data):
  155. try:
  156. import os.path
  157. if not os.path.exists(self.filepath):
  158. with open(self.filepath, 'wb') as writer:
  159. writer.write(file_data)
  160. return True
  161. else:
  162. return False
  163. except Exception as e:
  164. if DEBUG_FLAG:
  165. sys.stderr.write("Naked Framework Error: Unable to write to requested file with the safe_write_bin() method (Naked.toolshed.file.py).")
  166. raise e
  167. #------------------------------------------------------------------------------
  168. # [ write_utf8 method ]
  169. # Text file writer with explicit UTF-8 text encoding
  170. # uses filepath from class constructor
  171. # requires text to passed as a method parameter
  172. # Tests: test_IO.py :: test_file_utf8_readwrite, test_file_utf8_readwrite_raises_unicodeerror
  173. #------------------------------------------------------------------------------
  174. def write_utf8(self, text):
  175. try:
  176. import codecs
  177. f = codecs.open(self.filepath, encoding='utf_8', mode='w')
  178. except IOError as ioe:
  179. if DEBUG_FLAG:
  180. sys.stderr.write("Naked Framework Error: Unable to open file for write with the write_utf8() method (Naked.toolshed.file.py).")
  181. raise ioe
  182. try:
  183. import unicodedata
  184. norm_text = unicodedata.normalize('NFKD', text) # NKFD normalization of the unicode data before write
  185. f.write(norm_text)
  186. except Exception as e:
  187. if DEBUG_FLAG:
  188. sys.stderr.write("Naked Framework Error: Unable to write UTF-8 encoded text to file with the write_utf8() method (Naked.toolshed.file.py).")
  189. raise e
  190. finally:
  191. f.close()
  192. #------------------------------------------------------------------------------
  193. # [ FileReader class ]
  194. # reads data from local files
  195. # filename assigned in constructor (inherited from IO class interface)
  196. #------------------------------------------------------------------------------
  197. class FileReader(IO):
  198. def __init__(self, filepath):
  199. IO.__init__(self, filepath)
  200. #------------------------------------------------------------------------------
  201. # [ read method ] (string)
  202. # Universal text file reader that will read utf-8 encoded unicode or non-unicode text as utf-8
  203. # returns string or unicode (py3 = string for unicode and non-unicode, py2 = str for non-unicode, unicode for unicode)
  204. # Tests: test_IO.py :: test_file_ascii_readwrite, test_file_read_missing_file,
  205. #------------------------------------------------------------------------------
  206. def read(self):
  207. try:
  208. return self.read_utf8() #reads everything as unicode in utf8 encoding
  209. except Exception as e:
  210. if DEBUG_FLAG:
  211. sys.stderr.write("Naked Framework Error: Unable to read text from the requested file with the read() method (Naked.toolshed.file.py).")
  212. raise e
  213. #------------------------------------------------------------------------------
  214. # [ read_bin method ] (binary byte string)
  215. # Universal binary data file reader
  216. # returns file contents in binary mode as binary byte strings
  217. # Tests: test_IO.py :: test_file_bin_readwrite, test_file_read_bin_missing_file
  218. #------------------------------------------------------------------------------
  219. def read_bin(self):
  220. try:
  221. with open(self.filepath, 'rb') as bin_reader:
  222. data = bin_reader.read()
  223. return data
  224. except Exception as e:
  225. if DEBUG_FLAG:
  226. sys.stderr.write("Naked Framework Error: Unable to read the binary data from the file with the read_bin method (Naked.toolshed.file.py).")
  227. raise e
  228. #------------------------------------------------------------------------------
  229. # [ read_as method ] (string with developer specified text encoding)
  230. # Text file reader with developer specified text encoding
  231. # returns file contents in developer specified text encoding
  232. # Tests: test_IO.py :: test_file_utf8_readas_writeas, test_file_readas_missing_file
  233. #------------------------------------------------------------------------------
  234. def read_as(self, the_encoding):
  235. try:
  236. if the_encoding == "":
  237. raise RuntimeError("The text file encoding was not specified as an argument to the read_as method (Naked.toolshed.file.py:read_as).")
  238. import codecs
  239. with codecs.open(self.filepath, encoding=the_encoding, mode='r') as f:
  240. data = f.read()
  241. return data
  242. except Exception as e:
  243. if DEBUG_FLAG:
  244. sys.stderr.write("Naked Framework Error: Unable to read the file with the developer specified text encoding with the read_as method (Naked.toolshed.file.py).")
  245. raise e
  246. #------------------------------------------------------------------------------
  247. # [ readlines method ] (list of strings)
  248. # Read text from file line by line, uses utf8 encoding by default
  249. # returns list of utf8 encoded file lines as strings
  250. # Tests: test_IO.py :: test_file_readlines, test_file_readlines_missing_file
  251. #------------------------------------------------------------------------------
  252. def readlines(self):
  253. try:
  254. return self.readlines_utf8() # read as utf8 encoded file
  255. except Exception as e:
  256. if DEBUG_FLAG:
  257. sys.stderr.write("Naked Framework Error: Unable to read text from the requested file with the readlines() method (Naked.toolshed.file.py).")
  258. raise e
  259. #------------------------------------------------------------------------------
  260. # [ readlines_as method ] (list of developer specified encoded strings)
  261. # Read lines from file with developer specified text encoding
  262. # Returns a list of developer specified encoded lines from the file
  263. # Tests: test_IO.py ::
  264. #------------------------------------------------------------------------------
  265. def readlines_as(self, dev_spec_encoding):
  266. try:
  267. if dev_spec_encoding == "":
  268. raise RuntimeError("The text file encoding was not specified as an argument to the readlines_as method (Naked.toolshed.file.py:readlines_as).")
  269. import codecs
  270. with codecs.open(self.filepath, encoding=dev_spec_encoding, mode='r') as reader:
  271. data_list = []
  272. for line in reader:
  273. data_list.append(line)
  274. return data_list
  275. except Exception as e:
  276. if DEBUG_FLAG:
  277. sys.stderr.write("Naked Framework Error: unable to read lines in the specified encoding with the readlines_as method (Naked.toolshed.file.py).")
  278. raise e
  279. #------------------------------------------------------------------------------
  280. # [ readlines_utf8 method ] (list of utf-8 encoded strings)
  281. # Read text from unicode file by line
  282. # Returns list of file unicode text lines as unicode strings
  283. # Tests: test_IO.py :: test_file_readlines_unicode, test_file_readlines_utf8_missing_file
  284. #------------------------------------------------------------------------------
  285. def readlines_utf8(self):
  286. try:
  287. import codecs
  288. with codecs.open(self.filepath, encoding='utf-8', mode='r') as uni_reader:
  289. modified_text_list = []
  290. for line in uni_reader:
  291. import unicodedata
  292. norm_line = unicodedata.normalize('NFKD', line) # NKFD normalization of the unicode data before use
  293. modified_text_list.append(norm_line)
  294. return modified_text_list
  295. except Exception as e:
  296. if DEBUG_FLAG:
  297. sys.stderr.write("Naked Framework Error: unable to read lines in the unicode file with the readlines_utf8 method (Naked.toolshed.file.py)")
  298. raise e
  299. #------------------------------------------------------------------------------
  300. # [ read_gzip ] (byte string)
  301. # reads data from a gzip compressed file
  302. # returns the decompressed binary data from the file
  303. # Note: if decompressing unicode file, set encoding="utf-8"
  304. # Tests: test_IO.py :: test_file_gzip_ascii_readwrite, test_file_gzip_utf8_readwrite,
  305. # test_file_read_gzip_missing_file
  306. #------------------------------------------------------------------------------
  307. def read_gzip(self, encoding="system_default"):
  308. try:
  309. import gzip
  310. with gzip.open(self.filepath, 'rb') as gzip_reader:
  311. file_data = gzip_reader.read()
  312. if encoding in ["utf-8", "utf8", "utf_8", "UTF-8", "UTF8", "UTF_8"]:
  313. import codecs
  314. file_data = codecs.decode(file_data, "utf-8")
  315. import unicodedata
  316. norm_data = unicodedata.normalize('NFKD', file_data) # NKFD normalization of the unicode data before passing back to the caller
  317. return norm_data
  318. else:
  319. return file_data
  320. except Exception as e:
  321. if DEBUG_FLAG:
  322. sys.stderr.write("Naked Framework Error: Unable to read from the gzip compressed file with the read_gzip() method (Naked.toolshed.file.py).")
  323. raise e
  324. #------------------------------------------------------------------------------
  325. # [ read_utf8 method ] (string)
  326. # read data from a file with explicit UTF-8 encoding
  327. # uses filepath from class constructor
  328. # returns a unicode string containing the file data (unicode in py2, str in py3)
  329. # Tests: test_IO.py :: test_file_utf8_readwrite, test_file_utf8_readwrite_append,
  330. # test_file_read_utf8_missing_file
  331. #------------------------------------------------------------------------------
  332. def read_utf8(self):
  333. try:
  334. import codecs
  335. f = codecs.open(self.filepath, encoding='utf_8', mode='r')
  336. except IOError as ioe:
  337. if DEBUG_FLAG:
  338. sys.stderr.write("Naked Framework Error: Unable to open file for read with read_utf8() method (Naked.toolshed.file.py).")
  339. raise ioe
  340. try:
  341. textstring = f.read()
  342. import unicodedata
  343. norm_text = unicodedata.normalize('NFKD', textstring) # NKFD normalization of the unicode data before returns
  344. return norm_text
  345. except Exception as e:
  346. if DEBUG_FLAG:
  347. sys.stderr.write("Naked Framework Error: Unable to read the file with UTF-8 encoding using the read_utf8() method (Naked.toolshed.file.py).")
  348. raise e
  349. finally:
  350. f.close()
  351. if __name__ == '__main__':
  352. pass