sas_constants.py 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
  1. magic = (b"\x00\x00\x00\x00\x00\x00\x00\x00" +
  2. b"\x00\x00\x00\x00\xc2\xea\x81\x60" +
  3. b"\xb3\x14\x11\xcf\xbd\x92\x08\x00" +
  4. b"\x09\xc7\x31\x8c\x18\x1f\x10\x11")
  5. align_1_checker_value = b'3'
  6. align_1_offset = 32
  7. align_1_length = 1
  8. align_1_value = 4
  9. u64_byte_checker_value = b'3'
  10. align_2_offset = 35
  11. align_2_length = 1
  12. align_2_value = 4
  13. endianness_offset = 37
  14. endianness_length = 1
  15. platform_offset = 39
  16. platform_length = 1
  17. encoding_offset = 70
  18. encoding_length = 1
  19. dataset_offset = 92
  20. dataset_length = 64
  21. file_type_offset = 156
  22. file_type_length = 8
  23. date_created_offset = 164
  24. date_created_length = 8
  25. date_modified_offset = 172
  26. date_modified_length = 8
  27. header_size_offset = 196
  28. header_size_length = 4
  29. page_size_offset = 200
  30. page_size_length = 4
  31. page_count_offset = 204
  32. page_count_length = 4
  33. sas_release_offset = 216
  34. sas_release_length = 8
  35. sas_server_type_offset = 224
  36. sas_server_type_length = 16
  37. os_version_number_offset = 240
  38. os_version_number_length = 16
  39. os_maker_offset = 256
  40. os_maker_length = 16
  41. os_name_offset = 272
  42. os_name_length = 16
  43. page_bit_offset_x86 = 16
  44. page_bit_offset_x64 = 32
  45. subheader_pointer_length_x86 = 12
  46. subheader_pointer_length_x64 = 24
  47. page_type_offset = 0
  48. page_type_length = 2
  49. block_count_offset = 2
  50. block_count_length = 2
  51. subheader_count_offset = 4
  52. subheader_count_length = 2
  53. page_meta_type = 0
  54. page_data_type = 256
  55. page_amd_type = 1024
  56. page_metc_type = 16384
  57. page_comp_type = -28672
  58. page_mix_types = [512, 640]
  59. subheader_pointers_offset = 8
  60. truncated_subheader_id = 1
  61. compressed_subheader_id = 4
  62. compressed_subheader_type = 1
  63. text_block_size_length = 2
  64. row_length_offset_multiplier = 5
  65. row_count_offset_multiplier = 6
  66. col_count_p1_multiplier = 9
  67. col_count_p2_multiplier = 10
  68. row_count_on_mix_page_offset_multiplier = 15
  69. column_name_pointer_length = 8
  70. column_name_text_subheader_offset = 0
  71. column_name_text_subheader_length = 2
  72. column_name_offset_offset = 2
  73. column_name_offset_length = 2
  74. column_name_length_offset = 4
  75. column_name_length_length = 2
  76. column_data_offset_offset = 8
  77. column_data_length_offset = 8
  78. column_data_length_length = 4
  79. column_type_offset = 14
  80. column_type_length = 1
  81. column_format_text_subheader_index_offset = 22
  82. column_format_text_subheader_index_length = 2
  83. column_format_offset_offset = 24
  84. column_format_offset_length = 2
  85. column_format_length_offset = 26
  86. column_format_length_length = 2
  87. column_label_text_subheader_index_offset = 28
  88. column_label_text_subheader_index_length = 2
  89. column_label_offset_offset = 30
  90. column_label_offset_length = 2
  91. column_label_length_offset = 32
  92. column_label_length_length = 2
  93. rle_compression = b'SASYZCRL'
  94. rdc_compression = b'SASYZCR2'
  95. compression_literals = [rle_compression, rdc_compression]
  96. # Incomplete list of encodings, using SAS nomenclature:
  97. # http://support.sas.com/documentation/cdl/en/nlsref/61893/HTML/default/viewer.htm#a002607278.htm
  98. encoding_names = {29: "latin1", 20: "utf-8", 33: "cyrillic", 60: "wlatin2",
  99. 61: "wcyrillic", 62: "wlatin1", 90: "ebcdic870"}
  100. class SASIndex(object):
  101. row_size_index = 0
  102. column_size_index = 1
  103. subheader_counts_index = 2
  104. column_text_index = 3
  105. column_name_index = 4
  106. column_attributes_index = 5
  107. format_and_label_index = 6
  108. column_list_index = 7
  109. data_subheader_index = 8
  110. subheader_signature_to_index = {
  111. b"\xF7\xF7\xF7\xF7": SASIndex.row_size_index,
  112. b"\x00\x00\x00\x00\xF7\xF7\xF7\xF7": SASIndex.row_size_index,
  113. b"\xF7\xF7\xF7\xF7\x00\x00\x00\x00": SASIndex.row_size_index,
  114. b"\xF7\xF7\xF7\xF7\xFF\xFF\xFB\xFE": SASIndex.row_size_index,
  115. b"\xF6\xF6\xF6\xF6": SASIndex.column_size_index,
  116. b"\x00\x00\x00\x00\xF6\xF6\xF6\xF6": SASIndex.column_size_index,
  117. b"\xF6\xF6\xF6\xF6\x00\x00\x00\x00": SASIndex.column_size_index,
  118. b"\xF6\xF6\xF6\xF6\xFF\xFF\xFB\xFE": SASIndex.column_size_index,
  119. b"\x00\xFC\xFF\xFF": SASIndex.subheader_counts_index,
  120. b"\xFF\xFF\xFC\x00": SASIndex.subheader_counts_index,
  121. b"\x00\xFC\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.subheader_counts_index,
  122. b"\xFF\xFF\xFF\xFF\xFF\xFF\xFC\x00": SASIndex.subheader_counts_index,
  123. b"\xFD\xFF\xFF\xFF": SASIndex.column_text_index,
  124. b"\xFF\xFF\xFF\xFD": SASIndex.column_text_index,
  125. b"\xFD\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_text_index,
  126. b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD": SASIndex.column_text_index,
  127. b"\xFF\xFF\xFF\xFF": SASIndex.column_name_index,
  128. b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_name_index,
  129. b"\xFC\xFF\xFF\xFF": SASIndex.column_attributes_index,
  130. b"\xFF\xFF\xFF\xFC": SASIndex.column_attributes_index,
  131. b"\xFC\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_attributes_index,
  132. b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC": SASIndex.column_attributes_index,
  133. b"\xFE\xFB\xFF\xFF": SASIndex.format_and_label_index,
  134. b"\xFF\xFF\xFB\xFE": SASIndex.format_and_label_index,
  135. b"\xFE\xFB\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.format_and_label_index,
  136. b"\xFF\xFF\xFF\xFF\xFF\xFF\xFB\xFE": SASIndex.format_and_label_index,
  137. b"\xFE\xFF\xFF\xFF": SASIndex.column_list_index,
  138. b"\xFF\xFF\xFF\xFE": SASIndex.column_list_index,
  139. b"\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_list_index,
  140. b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFE": SASIndex.column_list_index}
  141. # List of frequently used SAS date and datetime formats
  142. # http://support.sas.com/documentation/cdl/en/etsug/60372/HTML/default/viewer.htm#etsug_intervals_sect009.htm
  143. # https://github.com/epam/parso/blob/master/src/main/java/com/epam/parso/impl/SasFileConstants.java
  144. sas_date_formats = ("DATE", "DAY", "DDMMYY", "DOWNAME", "JULDAY", "JULIAN",
  145. "MMDDYY", "MMYY", "MMYYC", "MMYYD", "MMYYP", "MMYYS",
  146. "MMYYN", "MONNAME", "MONTH", "MONYY", "QTR", "QTRR",
  147. "NENGO", "WEEKDATE", "WEEKDATX", "WEEKDAY", "WEEKV",
  148. "WORDDATE", "WORDDATX", "YEAR", "YYMM", "YYMMC", "YYMMD",
  149. "YYMMP", "YYMMS", "YYMMN", "YYMON", "YYMMDD", "YYQ",
  150. "YYQC", "YYQD", "YYQP", "YYQS", "YYQN", "YYQR", "YYQRC",
  151. "YYQRD", "YYQRP", "YYQRS", "YYQRN",
  152. "YYMMDDP", "YYMMDDC", "E8601DA", "YYMMDDN", "MMDDYYC",
  153. "MMDDYYS", "MMDDYYD", "YYMMDDS", "B8601DA", "DDMMYYN",
  154. "YYMMDDD", "DDMMYYB", "DDMMYYP", "MMDDYYP", "YYMMDDB",
  155. "MMDDYYN", "DDMMYYC", "DDMMYYD", "DDMMYYS",
  156. "MINGUO")
  157. sas_datetime_formats = ("DATETIME", "DTWKDATX",
  158. "B8601DN", "B8601DT", "B8601DX", "B8601DZ", "B8601LX",
  159. "E8601DN", "E8601DT", "E8601DX", "E8601DZ", "E8601LX",
  160. "DATEAMPM", "DTDATE", "DTMONYY", "DTMONYY", "DTWKDATX",
  161. "DTYEAR", "TOD", "MDYAMPM")