Changeset 32813 for main

Show
Ignore:
Timestamp:
25.02.2019 22:06:34 (10 months ago)
Author:
davidb
Message:

Changes after testing

Location:
main/trunk/model-sites-dev/heritage-nz/collect/reports-2019/prepare
Files:
2 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/model-sites-dev/heritage-nz/collect/reports-2019/prepare/README.txt

    r32807 r32813  
    55  cd virtualenv-16.4.1 
    66 
    7   python virtualenv.py myEnv 
     7  python virtualenv.py ../my-python-env 
     8 
     9  cd .. 
     10  source my-python-env/bin/activate  
    811 
    912  pip install xlrd 
  • main/trunk/model-sites-dev/heritage-nz/collect/reports-2019/prepare/xls-to-csv.py

    r32808 r32813  
    33import os 
    44 
    5 worksheet_name = "Archaeological reports" 
     5## worksheet_name = "Archaeological reports" 
     6 
     7 
     8# https://code.activestate.com/recipes/546518-simple-conversion-of-excel-files-into-csv-and-yaml/download/1/ 
     9def format_excelval(book, type, value, wanttupledate): 
     10    """ Clean up the incoming excel data """ 
     11    ##  Data Type Codes: 
     12    ##  EMPTY   0 
     13    ##  TEXT    1 a Unicode string  
     14    ##  NUMBER  2 float  
     15    ##  DATE    3 float  
     16    ##  BOOLEAN 4 int; 1 means TRUE, 0 means FALSE  
     17    ##  ERROR   5  
     18    returnrow = [] 
     19    if   type == 2: # TEXT 
     20        if value == int(value): value = int(value) 
     21    elif type == 3: # NUMBER 
     22        datetuple = xlrd.xldate_as_tuple(value, book.datemode) 
     23        value = datetuple if wanttupledate else tupledate_to_isodate(datetuple) 
     24    elif type == 5: # ERROR 
     25        value = xlrd.error_text_from_code[value] 
     26    return value 
     27 
    628 
    729def excel_to_bound_pdf_csv(excel_filename): 
    8     workbook = xlrd.open_workbook(excel_filename) 
    9     worksheet = workbook.sheet_by_name(worksheet_name) 
    10      
     30    workbook = xlrd.open_workbook(excel_filename, formatting_info=True) 
     31    ## worksheet = workbook.sheet_by_name(worksheet_name) 
     32    worksheet = workbook.sheet_by_index(0) 
     33 
    1134    excel_filename_split = os.path.split(excel_filename) 
    1235    excel_dirname = excel_filename_split[0] 
     
    4770        entry_utf8_row = [] 
    4871        for entry in worksheet.row_values(row_i): 
     72            if entry == "Report Identifier": 
     73                entry = "hnz.Identifier" 
     74 
    4975            entry_utf8 = unicode(entry).encode("utf-8") 
    5076            metadata_entry_utf8 = entry_utf8.replace(" ", "") 
     
    6793#            for entry in worksheet.row_values(row_i): 
    6894            for col_j in range(num_cols): 
    69                 entry = worksheet.cell_value(row_i,col_j) 
     95                cell = worksheet.cell(row_i,col_j) 
     96#                xf = workbook.xf_list[cell.xf_index] 
     97#                format = workbook.format_map[xf.format_key] 
     98#                format_str = format.format_str 
     99 
     100#                print 'rowx=%d colx=%d ctype=%d xfx=%d s_value=%s fmt=%s' \ 
     101#                    % (row_i, col_j, cell.ctype, cell.xf_index, str(cell.value), format_str) 
     102 
     103                cell_type  = worksheet.cell_type(row_i,col_j) 
     104                cell_value = worksheet.cell_value(row_i,col_j) 
     105 
     106                format_cell = format_excelval(workbook,cell_type,cell_value,False) 
     107                #print "**** format cell str = " +str(format_cell) 
     108 
     109                # entry = worksheet.cell_value(row_i,col_j) 
     110                entry = format_cell 
    70111 
    71112                if col_j == 0: