Changeset 33202 for main/trunk/model-sites-dev/heritage-nz/collect/pdf-reports/prepare/xls-to-csv.py
- Timestamp:
- 2019-06-22T17:13:38+12:00 (5 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/model-sites-dev/heritage-nz/collect/pdf-reports/prepare/xls-to-csv.py
r33191 r33202 103 103 # 3. Process the rest of the file, checking if accompanying 104 104 # PDF file present or not and only printing out the header-names 105 # specified in 'sanitized_headers' 105 # as long as it has a non-empty header-name 106 ## # specified in 'sanitized_headers' 106 107 107 108 num_rows = worksheet.nrows … … 117 118 118 119 entry_utf8_row = [] 119 row_i = 0; 120 121 for header_name in xlsutil.sanitized_headers: 122 123 if header_name in header_names_mapping: 120 num_header_cols = worksheet.row_len(xlsutil.header_row_pos) 121 122 for col_j in range(num_header_cols): 123 ## for header_name in xlsutil.sanitized_headers: 124 125 header_name = worksheet.cell_value(xlsutil.header_row_pos,col_j) 126 127 if header_name and header_name.strip(): 128 #### print "*** 2. header_name = " + header_name 129 124 130 header_col_j = header_names_mapping[header_name] 125 126 header_cell_value = worksheet.cell_value(row_i,header_col_j) 131 ## if header_name in header_names_mapping: 132 ## header_col_j = header_names_mapping[header_name] 133 134 ## header_cell_value = worksheet.cell_value(row_i,header_col_j) 127 135 128 if header_ cell_value == "Report Identifier":129 header_ cell_value = "hnz.Identifier"130 131 if header_ cell_value == "Title":132 header_ cell_value = "dc.Title"133 134 if header_ cell_value == "RelevantTLA's":135 header_ cell_value = "TLA"136 137 header_ cell_value_utf8 = unicode(header_cell_value).encode("utf-8")138 metadata_name_utf8 = header_ cell_value_utf8.replace(" ", "")136 if header_name == "Report Identifier": 137 header_name = "hnz.Identifier" 138 139 if header_name == "Title": 140 header_name = "dc.Title" 141 142 if header_name == "RelevantTLA's": 143 header_name = "TLA" 144 145 header_name_utf8 = unicode(header_name).encode("utf-8") 146 metadata_name_utf8 = header_name_utf8.replace(" ", "") 139 147 140 148 entry_utf8_row.append(metadata_name_utf8) 141 149 142 else:143 print("Warning: Failed to column mapping in spreadsheet for header name \""+header_name+"\" => skipping")150 ## else: 151 ## print("Warning: Failed to column mapping in spreadsheet for header name \""+header_name+"\" => skipping") 144 152 145 153 unbound_wr.writerow(entry_utf8_row) … … 151 159 # 3. Process the rest of the file (metadata values) ... 152 160 153 row_i = row_i+ 1161 row_i = xlsutil.header_row_pos + 1 154 162 while row_i<num_rows: 155 163 num_cols = worksheet.row_len(row_i) … … 158 166 found_pdf = False 159 167 160 # for col_j in range(num_cols): 161 for header_name in xlsutil.sanitized_headers: 162 163 if header_name in header_names_mapping: 168 for col_j in range(num_cols): 169 ## for header_name in xlsutil.sanitized_headers: 170 header_name = worksheet.cell_value(xlsutil.header_row_pos,col_j) 171 172 if header_name and header_name.strip(): 164 173 col_j = header_names_mapping[header_name] 174 ## if header_name in header_names_mapping: 175 ## col_j = header_names_mapping[header_name] 165 176 166 177 cell_value = worksheet.cell_value(row_i,col_j) … … 201 212 202 213 formatted_utf8_row.append(formatted_cell_value_utf8) 203 else:204 print "Warning: No column number mapping for header name \""+header_name+"\" => skipping"214 ## else: 215 ## print "Warning: No column number mapping for header name \""+header_name+"\" => skipping" 205 216 206 217 if found_pdf:
Note:
See TracChangeset
for help on using the changeset viewer.