text.py 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208
  1. # -*- test-case-name: twisted.test.test_text -*-
  2. #
  3. # Copyright (c) Twisted Matrix Laboratories.
  4. # See LICENSE for details.
  5. """
  6. Miscellany of text-munging functions.
  7. """
  8. def stringyString(object, indentation=''):
  9. """
  10. Expansive string formatting for sequence types.
  11. C{list.__str__} and C{dict.__str__} use C{repr()} to display their
  12. elements. This function also turns these sequence types
  13. into strings, but uses C{str()} on their elements instead.
  14. Sequence elements are also displayed on separate lines, and nested
  15. sequences have nested indentation.
  16. """
  17. braces = ''
  18. sl = []
  19. if type(object) is dict:
  20. braces = '{}'
  21. for key, value in object.items():
  22. value = stringyString(value, indentation + ' ')
  23. if isMultiline(value):
  24. if endsInNewline(value):
  25. value = value[:-len('\n')]
  26. sl.append("%s %s:\n%s" % (indentation, key, value))
  27. else:
  28. # Oops. Will have to move that indentation.
  29. sl.append("%s %s: %s" % (indentation, key,
  30. value[len(indentation) + 3:]))
  31. elif type(object) is tuple or type(object) is list:
  32. if type(object) is tuple:
  33. braces = '()'
  34. else:
  35. braces = '[]'
  36. for element in object:
  37. element = stringyString(element, indentation + ' ')
  38. sl.append(element.rstrip() + ',')
  39. else:
  40. sl[:] = map(lambda s, i=indentation: i + s,
  41. str(object).split('\n'))
  42. if not sl:
  43. sl.append(indentation)
  44. if braces:
  45. sl[0] = indentation + braces[0] + sl[0][len(indentation) + 1:]
  46. sl[-1] = sl[-1] + braces[-1]
  47. s = "\n".join(sl)
  48. if isMultiline(s) and not endsInNewline(s):
  49. s = s + '\n'
  50. return s
  51. def isMultiline(s):
  52. """
  53. Returns C{True} if this string has a newline in it.
  54. """
  55. return (s.find('\n') != -1)
  56. def endsInNewline(s):
  57. """
  58. Returns C{True} if this string ends in a newline.
  59. """
  60. return (s[-len('\n'):] == '\n')
  61. def greedyWrap(inString, width=80):
  62. """
  63. Given a string and a column width, return a list of lines.
  64. Caveat: I'm use a stupid greedy word-wrapping
  65. algorythm. I won't put two spaces at the end
  66. of a sentence. I don't do full justification.
  67. And no, I've never even *heard* of hypenation.
  68. """
  69. outLines = []
  70. #eww, evil hacks to allow paragraphs delimited by two \ns :(
  71. if inString.find('\n\n') >= 0:
  72. paragraphs = inString.split('\n\n')
  73. for para in paragraphs:
  74. outLines.extend(greedyWrap(para, width) + [''])
  75. return outLines
  76. inWords = inString.split()
  77. column = 0
  78. ptr_line = 0
  79. while inWords:
  80. column = column + len(inWords[ptr_line])
  81. ptr_line = ptr_line + 1
  82. if (column > width):
  83. if ptr_line == 1:
  84. # This single word is too long, it will be the whole line.
  85. pass
  86. else:
  87. # We've gone too far, stop the line one word back.
  88. ptr_line = ptr_line - 1
  89. (l, inWords) = (inWords[0:ptr_line], inWords[ptr_line:])
  90. outLines.append(' '.join(l))
  91. ptr_line = 0
  92. column = 0
  93. elif not (len(inWords) > ptr_line):
  94. # Clean up the last bit.
  95. outLines.append(' '.join(inWords))
  96. del inWords[:]
  97. else:
  98. # Space
  99. column = column + 1
  100. # next word
  101. return outLines
  102. wordWrap = greedyWrap
  103. def removeLeadingBlanks(lines):
  104. ret = []
  105. for line in lines:
  106. if ret or line.strip():
  107. ret.append(line)
  108. return ret
  109. def removeLeadingTrailingBlanks(s):
  110. lines = removeLeadingBlanks(s.split('\n'))
  111. lines.reverse()
  112. lines = removeLeadingBlanks(lines)
  113. lines.reverse()
  114. return '\n'.join(lines)+'\n'
  115. def splitQuoted(s):
  116. """
  117. Like a string split, but don't break substrings inside quotes.
  118. >>> splitQuoted('the "hairy monkey" likes pie')
  119. ['the', 'hairy monkey', 'likes', 'pie']
  120. Another one of those "someone must have a better solution for
  121. this" things. This implementation is a VERY DUMB hack done too
  122. quickly.
  123. """
  124. out = []
  125. quot = None
  126. phrase = None
  127. for word in s.split():
  128. if phrase is None:
  129. if word and (word[0] in ("\"", "'")):
  130. quot = word[0]
  131. word = word[1:]
  132. phrase = []
  133. if phrase is None:
  134. out.append(word)
  135. else:
  136. if word and (word[-1] == quot):
  137. word = word[:-1]
  138. phrase.append(word)
  139. out.append(" ".join(phrase))
  140. phrase = None
  141. else:
  142. phrase.append(word)
  143. return out
  144. def strFile(p, f, caseSensitive=True):
  145. """
  146. Find whether string C{p} occurs in a read()able object C{f}.
  147. @rtype: C{bool}
  148. """
  149. buf = type(p)()
  150. buf_len = max(len(p), 2**2**2**2)
  151. if not caseSensitive:
  152. p = p.lower()
  153. while 1:
  154. r = f.read(buf_len-len(p))
  155. if not caseSensitive:
  156. r = r.lower()
  157. bytes_read = len(r)
  158. if bytes_read == 0:
  159. return False
  160. l = len(buf)+bytes_read-buf_len
  161. if l <= 0:
  162. buf = buf + r
  163. else:
  164. buf = buf[l:] + r
  165. if buf.find(p) != -1:
  166. return True