Changeset 33222 for main/trunk/model-sites-dev/heritage-nz/collect
- Timestamp:
- 2019-06-26T15:52:27+12:00 (5 years ago)
- Location:
- main/trunk/model-sites-dev/heritage-nz/collect/pdf-reports/prepare
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/model-sites-dev/heritage-nz/collect/pdf-reports/prepare/xls-to-csv.py
r33204 r33222 124 124 ## for header_name in xlsutil.sanitized_headers: 125 125 126 header_name = worksheet.cell_value(xlsutil.header_row_pos,col_j)127 128 if header_name and header_name.strip():126 header_name = xlsutil.cell_value_tidy_unicode(worksheet,xlsutil.header_row_pos,col_j) 127 128 if header_name: 129 129 130 130 header_col_j = header_names_mapping[header_name] … … 132 132 ## header_col_j = header_names_mapping[header_name] 133 133 134 ## header_name = worksheet.cell_value(row_i,header_col_j)134 ## header_name = xslutil.cell_value_tidy_unicode(worksheet,row_i,header_col_j) 135 135 136 if header_name == "Report Identifier": 137 header_name = "hnz.Identifier" 138 139 if header_name == "Title": 140 header_name = "dc.Title" 141 142 if header_name == "Relevant TLA's": 143 header_name = "TLA" 144 145 header_name_utf8 = unicode(header_name).encode("utf-8") 136 if header_name == u"Report Identifier": 137 header_name = u"hnz.Identifier" 138 139 if header_name == u"Title": 140 header_name = u"dc.Title" 141 142 if header_name == u"Relevant TLA's": 143 header_name = u"TLA" 144 145 # header_name_utf8 = unicode(header_name).encode("utf-8") 146 header_name_utf8 = header_name.encode("utf-8") 146 147 metadata_name_utf8 = header_name_utf8.replace(" ", "") 147 148 148 149 entry_utf8_row.append(metadata_name_utf8) 149 150 150 if header_name == "Site No":151 entry_utf8_row.append( "SiteNoOrdering")151 if header_name == u"Site No": 152 entry_utf8_row.append(u"SiteNoOrdering") 152 153 153 154 ## else: … … 171 172 for col_j in range(num_cols): 172 173 ## for header_name in xlsutil.sanitized_headers: 173 header_name = worksheet.cell_value(xlsutil.header_row_pos,col_j)174 175 if header_name and header_name.strip():174 header_name = xlsutil.cell_value_tidy_unicode(worksheet,xlsutil.header_row_pos,col_j) 175 176 if header_name: 176 177 col_j = header_names_mapping[header_name] 177 178 ## if header_name in header_names_mapping: 178 179 ## col_j = header_names_mapping[header_name] 179 180 180 cell_value = worksheet.cell_value(row_i,col_j) 181 182 formatted_cell_value = xlsutil.format_if_int(cell_value) 183 184 if header_name == "Report Identifier": 181 formatted_cell_value = xlsutil.cell_value_tidy_unicode(worksheet,row_i,col_j) 182 183 if header_name == u"Report Identifier": 185 184 # Check to see if companion PDF file present 186 185 # pdf_filename = os.path.join(excel_dirname,"pdfs",formatted_cell_value+".pdf") … … 208 207 formatted_cell_value = formatted_cell_value.rstrip(); 209 208 210 formatted_cell_value_utf8 = unicode(formatted_cell_value).encode("utf-8") 209 # formatted_cell_value_utf8 = unicode(formatted_cell_value).encode("utf-8") 210 formatted_cell_value_utf8 = formatted_cell_value.encode("utf-8") 211 211 212 212 ## Perform any cell transformations to make DL used spreadsheet … … 216 216 formatted_utf8_row.append(formatted_cell_value_utf8) 217 217 218 if header_name == "Site No":218 if header_name == u"Site No": 219 219 site_no = formatted_cell_value_utf8 220 220 site_no_ordering = None -
main/trunk/model-sites-dev/heritage-nz/collect/pdf-reports/prepare/xls-to-sanitized-csv.py
r33202 r33222 49 49 header_col_j = header_names_mapping[header_name] 50 50 51 cell_value = worksheet.cell_value(row_i,header_col_j) 51 # cell_value = worksheet.cell_value(row_i,header_col_j) 52 # 53 # cell_value_utf8 = unicode(cell_value).encode("utf-8") 52 54 53 cell_value_utf8 = unicode(cell_value).encode("utf-8") 55 cell_value_unicode = xlsutil.cell_value_tidy_unicode(worksheet,row_i,header_col_j) 56 57 cell_value_utf8 = cell_value_unicode.encode("utf-8") 58 59 ## cell_value_utf8 = unicode(cell_value).encode("utf-8") 54 60 55 61 sanitized_utf8_row.append(cell_value_utf8) -
main/trunk/model-sites-dev/heritage-nz/collect/pdf-reports/prepare/xlsutil.py
r33204 r33222 6 6 ## worksheet_name = "Archaeological reports" 7 7 8 sanitized_headers = [ "Report Identifier", "Author", "Title", "Produced By", "Date", "HNZPT Region", "File No", 9 "Authority No", "Site No", "Relevant TLA's", "Record Type", "Date Entered" ] 8 sanitized_headers = [ u"Report Identifier", u"Author", u"Title", u"Produced By", u"Date", u"HNZPT Region", u"File No", 9 u"Authority No", u"Site No", u"Relevant TLA's", u"Record Type", u"Date Entered" ] 10 11 def make_unicode(value): 12 value_unicode = value 13 14 if type(value) != unicode: 15 value_str = value 16 if type(value_str) != str: 17 value_str = str(value) 18 19 value_unicode = value_str.decode('utf-8') 20 21 return value_unicode 22 23 24 def cell_value_tidy_unicode(worksheet,row_i,col_j): 25 value= worksheet.cell_value(row_i,col_j) 26 27 value_unicode = make_unicode(value) 28 value_unicode_stripped = value_unicode.strip() 29 30 return value_unicode_stripped 31 10 32 11 33 def format_if_int(cell_value): … … 36 58 found_header = False 37 59 for col_j in range(num_header_cols): 38 header_cell_value = worksheet.cell_value(header_row_pos,col_j)39 if header_cell_value == "Report Identifier":60 header_cell_value = cell_value_tidy_unicode(worksheet,header_row_pos,col_j) 61 if header_cell_value == u"Report Identifier": 40 62 found_header = True 41 63
Note:
See TracChangeset
for help on using the changeset viewer.