Changeset 33204

Show
Ignore:
Timestamp:
23.06.2019 22:49:47 (3 weeks ago)
Author:
davidb
Message:

Regularization of SiteNo? through sprintf %03d type formatting so things sort more nicely when browsing by this metadata; Extra field added in to sanitized CSV output; fixed typo in TLA name

Location:
main/trunk/model-sites-dev/heritage-nz/collect/pdf-reports/prepare
Files:
2 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/model-sites-dev/heritage-nz/collect/pdf-reports/prepare/xls-to-csv.py

    r33203 r33204  
    22import csv 
    33import os 
     4import re 
    45 
    56import xlsutil 
     
    139140                    header_name = "dc.Title" 
    140141 
    141                 if header_name == "RelevantTLA's": 
     142                if header_name == "Relevant TLA's": 
    142143                    header_name = "TLA" 
    143144 
     
    146147 
    147148                entry_utf8_row.append(metadata_name_utf8) 
     149 
     150                if header_name == "Site No": 
     151                    entry_utf8_row.append("SiteNoOrdering") 
    148152 
    149153##            else: 
     
    211215 
    212216                    formatted_utf8_row.append(formatted_cell_value_utf8) 
     217 
     218                    if header_name == "Site No": 
     219                        site_no = formatted_cell_value_utf8 
     220                        site_no_ordering = None 
     221 
     222                        if site_no and site_no.strip(): 
     223                            # Non-empty entry 
     224                            pattern = re.compile('^([A-Z])(\d+)') 
     225                            site_no_match = pattern.match(site_no) 
     226                             
     227                            if site_no_match: 
     228                                site_no_primary_letter = site_no_match.group(1) 
     229                                site_no_primary_num = int(site_no_match.group(2)) 
     230                                 
     231                                site_no_ordering = "%s%03d" % (site_no_primary_letter, site_no_primary_num) 
     232                            else: 
     233                                if site_no != "various": 
     234                                    print "Warning: Site No '"+site_no+"' did not form Capital Letter followed by Digits" 
     235                                    print "Leaving value unchanged for column 'Site No Ordering'" 
     236 
     237                                site_no_ordering = site_no 
     238                        else: 
     239                            site_no_ordering = "" 
     240 
     241                        formatted_utf8_row.append(site_no_ordering) 
     242 
    213243##                else: 
    214244##                    print "Warning: No column number mapping for header name \""+header_name+"\" => skipping" 
  • main/trunk/model-sites-dev/heritage-nz/collect/pdf-reports/prepare/xlsutil.py

    r33202 r33204  
    77 
    88sanitized_headers = [ "Report Identifier", "Author", "Title", "Produced By", "Date", "HNZPT Region", "File No", 
    9                       "Authority No", "Site No", "Relevant TLA's", "Record Type" ] 
     9                      "Authority No", "Site No", "Relevant TLA's", "Record Type", "Date Entered" ] 
    1010 
    1111def format_if_int(cell_value):