Changeset 36606 for main/trunk/model-sites-dev/heritage-nz/collect/pdf-reports/prepare/xls-to-csv.py
- Timestamp:
- 2022-09-12T12:46:11+12:00 (20 months ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/model-sites-dev/heritage-nz/collect/pdf-reports/prepare/xls-to-csv.py
r33752 r36606 163 163 164 164 row_i = xlsutil.header_row_pos + 1 165 print "num rows = ", num_rows 165 166 while row_i<num_rows: 166 167 num_cols = worksheet.row_len(row_i) … … 168 169 169 170 found_pdf = False 171 empty_id = False 170 172 171 173 for col_j in range(num_cols): … … 192 194 formatted_cell_value = formatted_cell_value.strip().replace(" ","-") 193 195 id = formatted_cell_value 194 195 pdf_file = id_to_relative_pdf_file(id) 196 pdf_file_root, pdf_ext = os.path.splitext(pdf_file) 197 PDF_file = pdf_file_root + ".PDF" 198 199 pdf_filename = os.path.join(excel_dirname,pdf_file) 200 PDF_filename = os.path.join(excel_dirname,PDF_file) 201 202 if os.path.exists(pdf_filename): 203 found_pdf = True 204 formatted_utf8_row.insert(0, pdf_file) 205 elif os.path.exists(PDF_filename): 206 found_pdf = True 207 formatted_utf8_row.insert(0, PDF_file) 196 if id == u"": 197 empty_id = True 198 print "empty id, row ",row_i 208 199 else: 209 print "Unbound id: '" + id + "'" 210 211 # if isinstance(formatted_cell_value, basestring): 200 pdf_file = id_to_relative_pdf_file(id) 201 pdf_file_root, pdf_ext = os.path.splitext(pdf_file) 202 PDF_file = pdf_file_root + ".PDF" 203 204 pdf_filename = os.path.join(excel_dirname,pdf_file) 205 PDF_filename = os.path.join(excel_dirname,PDF_file) 206 207 if os.path.exists(pdf_filename): 208 found_pdf = True 209 formatted_utf8_row.insert(0, pdf_file) 210 elif os.path.exists(PDF_filename): 211 found_pdf = True 212 formatted_utf8_row.insert(0, PDF_file) 213 else: 214 print "Unbound id: '" + id + "'" 215 216 # if isinstance(formatted_cell_value, basestring): 212 217 # # Remove any trailing whitespace. 213 218 # # Newline at end particular harmful for a entry in the CSV file … … 237 242 site_no_ordering = "%s%03d" % (site_no_primary_letter, site_no_primary_num) 238 243 else: 239 if site_no != "various": 240 print "Warning: Site No '"+site_no+"' did not form Capital Letter followed by Digits" 241 print "Leaving value unchanged for column 'Site No Ordering'" 242 243 site_no_ordering = site_no 244 # try eg CA/5 245 pattern=re.compile('^([A-Z][A-Z])') 246 site_no_match2 = pattern.match(site_no) 247 248 if site_no_match2: 249 site_no_ordering = site_no_match2.group(1) 250 print "matched second pattern ", site_no_ordering 251 else: 252 if site_no != "various": 253 print "Warning: Site No '"+site_no+"' did not form Capital Letter followed by Digits" 254 print "Leaving value unchanged for column 'Site No Ordering'" 255 256 site_no_ordering = site_no 244 257 else: 245 258 site_no_ordering = "" … … 252 265 if found_pdf: 253 266 pdfbound_wr.writerow(formatted_utf8_row) 267 elif empty_id: 268 print "id was empty, not outputting row", formatted_utf8_row 254 269 else: 255 270 unbound_wr.writerow(formatted_utf8_row)
Note:
See TracChangeset
for help on using the changeset viewer.