import xlrd

# Currently access worksheet by index num
# Alternative is to do this by worksheet name
worksheet_index_pos = 0 
## worksheet_name = "Archaeological reports"

sanitized_headers = [ u"Report Identifier", u"Author", u"Title", u"Produced By", u"Date", u"HNZPT Region", u"File No",
                      u"Authority No", u"Site No", u"Relevant TLA's", u"Record Type", u"Date Entered" ]

def make_unicode(value):
    value_unicode = value

    if type(value) != unicode:
        value_str = value
        if type(value_str) != str:
            value_str = str(value)
        
        value_unicode =  value_str.decode('utf-8')

    return value_unicode


def cell_value_tidy_unicode(worksheet,row_i,col_j):
    value= worksheet.cell_value(row_i,col_j)

    value_unicode = make_unicode(value)
    value_unicode_stripped = value_unicode.strip()

    return value_unicode_stripped


def format_if_int(cell_value):
    # Numbers from worksheet are represented as floating-point type
    # This causes a problem when it is an 'int' as it then gets
    # written out as a floating point number (with a '.0')
    # => test for float and when there is no value after 
    #    the decimal point, explicitly cast to 'int'
    if type(cell_value) is float:
        if cell_value == int(cell_value): 
            cell_value = int(cell_value)

    return cell_value


header_row_pos = 0

def getHeaderMappingToColNum(worksheet):

    # For header line, build up hashmap of header-names to column number
    # Assume header line is first row of spreadsheet

    header_names_mapping = {}

    num_header_cols = worksheet.row_len(header_row_pos)

    found_header = False
    for col_j in range(num_header_cols):
        header_cell_value = cell_value_tidy_unicode(worksheet,header_row_pos,col_j)
        if header_cell_value == u"Report Identifier":
            found_header = True

        if header_cell_value and header_cell_value.strip():
            header_names_mapping[header_cell_value] = col_j

    if not found_header:
        header_names_mapping = None

    return header_names_mapping


# The following can be used on an XSL file, but not currently an XSLX file
# This is because, at the time of coding, xlrd does not support loading
# in an XSLT workbook with formatting_info=True
 
# https://code.activestate.com/recipes/546518-simple-conversion-of-excel-files-into-csv-and-yaml/download/1/
def format_excelval(book, type, value, wanttupledate):
    """ Clean up the incoming excel data """
    ##  Data Type Codes:
    ##  EMPTY   0
    ##  TEXT    1 a Unicode string 
    ##  NUMBER  2 float 
    ##  DATE    3 float 
    ##  BOOLEAN 4 int; 1 means TRUE, 0 means FALSE 
    ##  ERROR   5 
    returnrow = []
    if   type == 2: # TEXT
        if value == int(value): value = int(value)
    elif type == 3: # NUMBER
        datetuple = xlrd.xldate_as_tuple(value, book.datemode)
        value = datetuple if wanttupledate else tupledate_to_isodate(datetuple)
    elif type == 5: # ERROR
        value = xlrd.error_text_from_code[value]
    return value