Ignore:
Timestamp:
2019-06-23T22:49:47+12:00 (5 years ago)
Author:
davidb
Message:

Regularization of SiteNo through sprintf %03d type formatting so things sort more nicely when browsing by this metadata; Extra field added in to sanitized CSV output; fixed typo in TLA name

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/model-sites-dev/heritage-nz/collect/pdf-reports/prepare/xls-to-csv.py

    r33203 r33204  
    22import csv
    33import os
     4import re
    45
    56import xlsutil
     
    139140                    header_name = "dc.Title"
    140141
    141                 if header_name == "RelevantTLA's":
     142                if header_name == "Relevant TLA's":
    142143                    header_name = "TLA"
    143144
     
    146147
    147148                entry_utf8_row.append(metadata_name_utf8)
     149
     150                if header_name == "Site No":
     151                    entry_utf8_row.append("SiteNoOrdering")
    148152
    149153##            else:
     
    211215
    212216                    formatted_utf8_row.append(formatted_cell_value_utf8)
     217
     218                    if header_name == "Site No":
     219                        site_no = formatted_cell_value_utf8
     220                        site_no_ordering = None
     221
     222                        if site_no and site_no.strip():
     223                            # Non-empty entry
     224                            pattern = re.compile('^([A-Z])(\d+)')
     225                            site_no_match = pattern.match(site_no)
     226                           
     227                            if site_no_match:
     228                                site_no_primary_letter = site_no_match.group(1)
     229                                site_no_primary_num = int(site_no_match.group(2))
     230                               
     231                                site_no_ordering = "%s%03d" % (site_no_primary_letter, site_no_primary_num)
     232                            else:
     233                                if site_no != "various":
     234                                    print "Warning: Site No '"+site_no+"' did not form Capital Letter followed by Digits"
     235                                    print "Leaving value unchanged for column 'Site No Ordering'"
     236
     237                                site_no_ordering = site_no
     238                        else:
     239                            site_no_ordering = ""
     240
     241                        formatted_utf8_row.append(site_no_ordering)
     242
    213243##                else:
    214244##                    print "Warning: No column number mapping for header name \""+header_name+"\" => skipping"
Note: See TracChangeset for help on using the changeset viewer.