123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376 |
- #!/usr/bin/env python
- # encoding: utf-8
- import sys
- from Naked.settings import debug as DEBUG_FLAG
- #------------------------------------------------------------------------------
- # [ IO class ]
- # interface for all local file IO classes
- #------------------------------------------------------------------------------
- class IO:
- def __init__(self,filepath):
- self.filepath = filepath
- #------------------------------------------------------------------------------
- # [ FileWriter class ]
- # writes data to local files
- #------------------------------------------------------------------------------
- class FileWriter(IO):
- def __init__(self, filepath):
- IO.__init__(self, filepath)
- #------------------------------------------------------------------------------
- # [ append method ]
- # Universal text file writer that appends to existing file using system default text encoding or utf-8 if throws unicode error
- # Tests: test_IO.py:: test_file_ascii_readwrite_append, test_file_append_missingfile
- #------------------------------------------------------------------------------
- def append(self, text):
- try:
- from Naked.toolshed.system import file_exists
- if not file_exists(self.filepath): #confirm that file exists, if not raise IOError (assuming that developer expected existing file if using append)
- raise IOError("The file specified for the text append does not exist (Naked.toolshed.file.py:append).")
- with open(self.filepath, 'a') as appender:
- appender.write(text)
- except UnicodeEncodeError as ue:
- self.append_utf8(text) #try writing as utf-8
- except Exception as e:
- if DEBUG_FLAG:
- sys.stderr.write("Naked Framework Error: Unable to append text to the file with the append() method (Naked.toolshed.file.py).")
- raise e
- #------------------------------------------------------------------------------
- # [ append_utf8 method ]
- # Text writer that appends text to existing file with utf-8 encoding
- # Tests: test_IO.py :: test_file_utf8_readwrite_append
- #------------------------------------------------------------------------------
- def append_utf8(self, text):
- try:
- from Naked.toolshed.system import file_exists
- if not file_exists(self.filepath):
- raise IOError("The file specified for the text append does not exist (Naked.toolshed.file.py:append_utf8).")
- import codecs
- import unicodedata
- norm_text = unicodedata.normalize('NFKD', text) # NKFD normalization of the unicode data before write
- with codecs.open(self.filepath, mode='a', encoding="utf_8") as appender:
- appender.write(norm_text)
- except Exception as e:
- if DEBUG_FLAG:
- sys.stderr.write("Naked Framework Error: Unable to append text to the file with the append_utf8 method (Naked.toolshed.file.py).")
- raise e
- #------------------------------------------------------------------------------
- # [ gzip method (writer) ]
- # writes data to gzip compressed file
- # Note: adds .gz extension to filename if user did not specify it in the FileWriter class constructor
- # Note: uses compresslevel = 6 as default to balance speed and compression level (which in general is not significantly less than 9)
- # Tests: test_IO.py :: test_file_gzip_ascii_readwrite, test_file_gzip_utf8_readwrite,
- # test_file_gzip_utf8_readwrite_explicit_decode
- #------------------------------------------------------------------------------
- def gzip(self, text, compression_level=6):
- try:
- import gzip
- if not self.filepath.endswith(".gz"):
- self.filepath = self.filepath + ".gz"
- with gzip.open(self.filepath, 'wb', compresslevel=compression_level) as gzip_writer:
- gzip_writer.write(text)
- except UnicodeEncodeError as ue:
- import unicodedata
- norm_text = unicodedata.normalize('NFKD', text) # NKFD normalization of the unicode data before write
- import codecs
- binary_data = codecs.encode(norm_text, "utf_8")
- with gzip.open(self.filepath, 'wb', compresslevel=compression_level) as gzip_writer:
- gzip_writer.write(binary_data)
- except Exception as e:
- if DEBUG_FLAG:
- sys.stderr.write("Naked Framework Error: unable to gzip compress the file with the gzip method (Naked.toolshed.file.py).")
- raise e
- #------------------------------------------------------------------------------
- # [ write method ]
- # Universal text file writer that writes by system default or utf-8 encoded unicode if throws UnicdeEncodeError
- # Tests: test_IO.py :: test_file_ascii_readwrite, test_file_ascii_readwrite_missing_file,
- # test_file_utf8_write_raises_unicodeerror
- #------------------------------------------------------------------------------
- def write(self, text):
- try:
- with open(self.filepath, 'wt') as writer:
- writer.write(text)
- except UnicodeEncodeError as ue:
- self.write_utf8(text) # attempt to write with utf-8 encoding
- except Exception as e:
- if DEBUG_FLAG:
- sys.stderr.write("Naked Framework Error: Unable to write to requested file with the write() method (Naked.toolshed.file.py).")
- raise e
- #------------------------------------------------------------------------------
- # [ write_as method ]
- # text file writer that uses developer specified text encoding
- # Tests: test_IO.py :: test_file_utf8_readas_writeas
- #------------------------------------------------------------------------------
- def write_as(self, text, the_encoding=""):
- try:
- if the_encoding == "": #if the developer did not include the encoding type, raise an exception
- raise RuntimeError("The text encoding was not specified as an argument to the write_as() method (Naked.toolshed.file.py:write_as).")
- import codecs
- with codecs.open(self.filepath, encoding=the_encoding, mode='w') as f:
- f.write(text)
- except Exception as e:
- if DEBUG_FLAG:
- sys.stderr.write("Naked Framework Error: unable to write file with the specified encoding using the write_as() method (Naked.toolshed.file.py).")
- raise e
- #------------------------------------------------------------------------------
- # [ write_bin method ]
- # binary data file writer
- # Tests: test_IO.py :: test_file_bin_readwrite
- #------------------------------------------------------------------------------
- def write_bin(self, binary_data):
- try:
- with open(self.filepath, 'wb') as bin_writer:
- bin_writer.write(binary_data)
- except Exception as e:
- if DEBUG_FLAG:
- sys.stderr.write("Naked Framework Error: Unable to write binary data to file with the write_bin method (Naked.toolshed.file.py).")
- raise e
- #------------------------------------------------------------------------------
- # [ safe_write method ] (boolean)
- # Universal text file writer (writes in default encoding unless throws unicode error) that will NOT overwrite existing file at the requested filepath
- # returns boolean indicator for success of write based upon test for existence of file (False = write failed because file exists)
- # Tests: test_IO.py :: test_file_ascii_safewrite, test_file_utf8_safewrite
- #------------------------------------------------------------------------------
- def safe_write(self, text):
- import os.path
- if not os.path.exists(self.filepath): # if the file does not exist, then can write
- try:
- with open(self.filepath, 'wt') as writer:
- writer.write(text)
- return True
- except UnicodeEncodeError as ue:
- self.write_utf8(text)
- return True
- except Exception as e:
- if DEBUG_FLAG:
- sys.stderr.write("Naked Framework Error: Unable to write to requested file with the safe_write() method (Naked.toolshed.file.py).")
- raise e
- else:
- return False # if file exists, do not write and return False
- #------------------------------------------------------------------------------
- # [ safe_write_bin method ]
- # Binary data file writer that will NOT overwrite existing file at the requested filepath
- # returns boolean indicator for success of write based upon test for existence of file (False = write failed because file exists)
- #------------------------------------------------------------------------------
- def safe_write_bin(self, file_data):
- try:
- import os.path
- if not os.path.exists(self.filepath):
- with open(self.filepath, 'wb') as writer:
- writer.write(file_data)
- return True
- else:
- return False
- except Exception as e:
- if DEBUG_FLAG:
- sys.stderr.write("Naked Framework Error: Unable to write to requested file with the safe_write_bin() method (Naked.toolshed.file.py).")
- raise e
- #------------------------------------------------------------------------------
- # [ write_utf8 method ]
- # Text file writer with explicit UTF-8 text encoding
- # uses filepath from class constructor
- # requires text to passed as a method parameter
- # Tests: test_IO.py :: test_file_utf8_readwrite, test_file_utf8_readwrite_raises_unicodeerror
- #------------------------------------------------------------------------------
- def write_utf8(self, text):
- try:
- import codecs
- f = codecs.open(self.filepath, encoding='utf_8', mode='w')
- except IOError as ioe:
- if DEBUG_FLAG:
- sys.stderr.write("Naked Framework Error: Unable to open file for write with the write_utf8() method (Naked.toolshed.file.py).")
- raise ioe
- try:
- import unicodedata
- norm_text = unicodedata.normalize('NFKD', text) # NKFD normalization of the unicode data before write
- f.write(norm_text)
- except Exception as e:
- if DEBUG_FLAG:
- sys.stderr.write("Naked Framework Error: Unable to write UTF-8 encoded text to file with the write_utf8() method (Naked.toolshed.file.py).")
- raise e
- finally:
- f.close()
- #------------------------------------------------------------------------------
- # [ FileReader class ]
- # reads data from local files
- # filename assigned in constructor (inherited from IO class interface)
- #------------------------------------------------------------------------------
- class FileReader(IO):
- def __init__(self, filepath):
- IO.__init__(self, filepath)
- #------------------------------------------------------------------------------
- # [ read method ] (string)
- # Universal text file reader that will read utf-8 encoded unicode or non-unicode text as utf-8
- # returns string or unicode (py3 = string for unicode and non-unicode, py2 = str for non-unicode, unicode for unicode)
- # Tests: test_IO.py :: test_file_ascii_readwrite, test_file_read_missing_file,
- #------------------------------------------------------------------------------
- def read(self):
- try:
- return self.read_utf8() #reads everything as unicode in utf8 encoding
- except Exception as e:
- if DEBUG_FLAG:
- sys.stderr.write("Naked Framework Error: Unable to read text from the requested file with the read() method (Naked.toolshed.file.py).")
- raise e
- #------------------------------------------------------------------------------
- # [ read_bin method ] (binary byte string)
- # Universal binary data file reader
- # returns file contents in binary mode as binary byte strings
- # Tests: test_IO.py :: test_file_bin_readwrite, test_file_read_bin_missing_file
- #------------------------------------------------------------------------------
- def read_bin(self):
- try:
- with open(self.filepath, 'rb') as bin_reader:
- data = bin_reader.read()
- return data
- except Exception as e:
- if DEBUG_FLAG:
- sys.stderr.write("Naked Framework Error: Unable to read the binary data from the file with the read_bin method (Naked.toolshed.file.py).")
- raise e
- #------------------------------------------------------------------------------
- # [ read_as method ] (string with developer specified text encoding)
- # Text file reader with developer specified text encoding
- # returns file contents in developer specified text encoding
- # Tests: test_IO.py :: test_file_utf8_readas_writeas, test_file_readas_missing_file
- #------------------------------------------------------------------------------
- def read_as(self, the_encoding):
- try:
- if the_encoding == "":
- raise RuntimeError("The text file encoding was not specified as an argument to the read_as method (Naked.toolshed.file.py:read_as).")
- import codecs
- with codecs.open(self.filepath, encoding=the_encoding, mode='r') as f:
- data = f.read()
- return data
- except Exception as e:
- if DEBUG_FLAG:
- sys.stderr.write("Naked Framework Error: Unable to read the file with the developer specified text encoding with the read_as method (Naked.toolshed.file.py).")
- raise e
- #------------------------------------------------------------------------------
- # [ readlines method ] (list of strings)
- # Read text from file line by line, uses utf8 encoding by default
- # returns list of utf8 encoded file lines as strings
- # Tests: test_IO.py :: test_file_readlines, test_file_readlines_missing_file
- #------------------------------------------------------------------------------
- def readlines(self):
- try:
- return self.readlines_utf8() # read as utf8 encoded file
- except Exception as e:
- if DEBUG_FLAG:
- sys.stderr.write("Naked Framework Error: Unable to read text from the requested file with the readlines() method (Naked.toolshed.file.py).")
- raise e
- #------------------------------------------------------------------------------
- # [ readlines_as method ] (list of developer specified encoded strings)
- # Read lines from file with developer specified text encoding
- # Returns a list of developer specified encoded lines from the file
- # Tests: test_IO.py ::
- #------------------------------------------------------------------------------
- def readlines_as(self, dev_spec_encoding):
- try:
- if dev_spec_encoding == "":
- raise RuntimeError("The text file encoding was not specified as an argument to the readlines_as method (Naked.toolshed.file.py:readlines_as).")
- import codecs
- with codecs.open(self.filepath, encoding=dev_spec_encoding, mode='r') as reader:
- data_list = []
- for line in reader:
- data_list.append(line)
- return data_list
- except Exception as e:
- if DEBUG_FLAG:
- sys.stderr.write("Naked Framework Error: unable to read lines in the specified encoding with the readlines_as method (Naked.toolshed.file.py).")
- raise e
- #------------------------------------------------------------------------------
- # [ readlines_utf8 method ] (list of utf-8 encoded strings)
- # Read text from unicode file by line
- # Returns list of file unicode text lines as unicode strings
- # Tests: test_IO.py :: test_file_readlines_unicode, test_file_readlines_utf8_missing_file
- #------------------------------------------------------------------------------
- def readlines_utf8(self):
- try:
- import codecs
- with codecs.open(self.filepath, encoding='utf-8', mode='r') as uni_reader:
- modified_text_list = []
- for line in uni_reader:
- import unicodedata
- norm_line = unicodedata.normalize('NFKD', line) # NKFD normalization of the unicode data before use
- modified_text_list.append(norm_line)
- return modified_text_list
- except Exception as e:
- if DEBUG_FLAG:
- sys.stderr.write("Naked Framework Error: unable to read lines in the unicode file with the readlines_utf8 method (Naked.toolshed.file.py)")
- raise e
- #------------------------------------------------------------------------------
- # [ read_gzip ] (byte string)
- # reads data from a gzip compressed file
- # returns the decompressed binary data from the file
- # Note: if decompressing unicode file, set encoding="utf-8"
- # Tests: test_IO.py :: test_file_gzip_ascii_readwrite, test_file_gzip_utf8_readwrite,
- # test_file_read_gzip_missing_file
- #------------------------------------------------------------------------------
- def read_gzip(self, encoding="system_default"):
- try:
- import gzip
- with gzip.open(self.filepath, 'rb') as gzip_reader:
- file_data = gzip_reader.read()
- if encoding in ["utf-8", "utf8", "utf_8", "UTF-8", "UTF8", "UTF_8"]:
- import codecs
- file_data = codecs.decode(file_data, "utf-8")
- import unicodedata
- norm_data = unicodedata.normalize('NFKD', file_data) # NKFD normalization of the unicode data before passing back to the caller
- return norm_data
- else:
- return file_data
- except Exception as e:
- if DEBUG_FLAG:
- sys.stderr.write("Naked Framework Error: Unable to read from the gzip compressed file with the read_gzip() method (Naked.toolshed.file.py).")
- raise e
- #------------------------------------------------------------------------------
- # [ read_utf8 method ] (string)
- # read data from a file with explicit UTF-8 encoding
- # uses filepath from class constructor
- # returns a unicode string containing the file data (unicode in py2, str in py3)
- # Tests: test_IO.py :: test_file_utf8_readwrite, test_file_utf8_readwrite_append,
- # test_file_read_utf8_missing_file
- #------------------------------------------------------------------------------
- def read_utf8(self):
- try:
- import codecs
- f = codecs.open(self.filepath, encoding='utf_8', mode='r')
- except IOError as ioe:
- if DEBUG_FLAG:
- sys.stderr.write("Naked Framework Error: Unable to open file for read with read_utf8() method (Naked.toolshed.file.py).")
- raise ioe
- try:
- textstring = f.read()
- import unicodedata
- norm_text = unicodedata.normalize('NFKD', textstring) # NKFD normalization of the unicode data before returns
- return norm_text
- except Exception as e:
- if DEBUG_FLAG:
- sys.stderr.write("Naked Framework Error: Unable to read the file with UTF-8 encoding using the read_utf8() method (Naked.toolshed.file.py).")
- raise e
- finally:
- f.close()
- if __name__ == '__main__':
- pass
|