Changeset 32813 for main


Ignore:
Timestamp:
2019-02-25T22:06:34+13:00 (5 years ago)
Author:
davidb
Message:

Changes after testing

Location:
main/trunk/model-sites-dev/heritage-nz/collect/reports-2019/prepare
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/model-sites-dev/heritage-nz/collect/reports-2019/prepare/README.txt

    r32807 r32813  
    55  cd virtualenv-16.4.1
    66
    7   python virtualenv.py myEnv
     7  python virtualenv.py ../my-python-env
     8
     9  cd ..
     10  source my-python-env/bin/activate
    811
    912  pip install xlrd
  • main/trunk/model-sites-dev/heritage-nz/collect/reports-2019/prepare/xls-to-csv.py

    r32808 r32813  
    33import os
    44
    5 worksheet_name = "Archaeological reports"
     5## worksheet_name = "Archaeological reports"
     6
     7
     8# https://code.activestate.com/recipes/546518-simple-conversion-of-excel-files-into-csv-and-yaml/download/1/
     9def format_excelval(book, type, value, wanttupledate):
     10    """ Clean up the incoming excel data """
     11    ##  Data Type Codes:
     12    ##  EMPTY   0
     13    ##  TEXT    1 a Unicode string
     14    ##  NUMBER  2 float
     15    ##  DATE    3 float
     16    ##  BOOLEAN 4 int; 1 means TRUE, 0 means FALSE
     17    ##  ERROR   5
     18    returnrow = []
     19    if   type == 2: # TEXT
     20        if value == int(value): value = int(value)
     21    elif type == 3: # NUMBER
     22        datetuple = xlrd.xldate_as_tuple(value, book.datemode)
     23        value = datetuple if wanttupledate else tupledate_to_isodate(datetuple)
     24    elif type == 5: # ERROR
     25        value = xlrd.error_text_from_code[value]
     26    return value
     27
    628
    729def excel_to_bound_pdf_csv(excel_filename):
    8     workbook = xlrd.open_workbook(excel_filename)
    9     worksheet = workbook.sheet_by_name(worksheet_name)
    10    
     30    workbook = xlrd.open_workbook(excel_filename, formatting_info=True)
     31    ## worksheet = workbook.sheet_by_name(worksheet_name)
     32    worksheet = workbook.sheet_by_index(0)
     33
    1134    excel_filename_split = os.path.split(excel_filename)
    1235    excel_dirname = excel_filename_split[0]
     
    4770        entry_utf8_row = []
    4871        for entry in worksheet.row_values(row_i):
     72            if entry == "Report Identifier":
     73                entry = "hnz.Identifier"
     74
    4975            entry_utf8 = unicode(entry).encode("utf-8")
    5076            metadata_entry_utf8 = entry_utf8.replace(" ", "")
     
    6793#            for entry in worksheet.row_values(row_i):
    6894            for col_j in range(num_cols):
    69                 entry = worksheet.cell_value(row_i,col_j)
     95                cell = worksheet.cell(row_i,col_j)
     96#                xf = workbook.xf_list[cell.xf_index]
     97#                format = workbook.format_map[xf.format_key]
     98#                format_str = format.format_str
     99
     100#                print 'rowx=%d colx=%d ctype=%d xfx=%d s_value=%s fmt=%s' \
     101#                    % (row_i, col_j, cell.ctype, cell.xf_index, str(cell.value), format_str)
     102
     103                cell_type  = worksheet.cell_type(row_i,col_j)
     104                cell_value = worksheet.cell_value(row_i,col_j)
     105
     106                format_cell = format_excelval(workbook,cell_type,cell_value,False)
     107                #print "**** format cell str = " +str(format_cell)
     108
     109                # entry = worksheet.cell_value(row_i,col_j)
     110                entry = format_cell
    70111
    71112                if col_j == 0:
Note: See TracChangeset for help on using the changeset viewer.