#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Module with several helper functions
"""
import os
import collections
import re
[docs]def file_extensions_get(fname_list):
"""Returns file extensions in list
Args:
fname_list (list): file names, eg ['a.csv','b.csv']
Returns:
list: file extensions for each file name in input list, eg ['.csv','.csv']
"""
return [os.path.splitext(fname)[-1] for fname in fname_list]
[docs]def file_extensions_all_equal(ext_list):
"""Checks that all file extensions are equal.
Args:
ext_list (list): file extensions, eg ['.csv','.csv']
Returns:
bool: all extensions are equal to first extension in list?
"""
return len(set(ext_list))==1
[docs]def file_extensions_contains_xls(ext_list):
# Assumes all file extensions are equal! Only checks first file
return ext_list[0] == '.xls'
[docs]def file_extensions_contains_xlsx(ext_list):
# Assumes all file extensions are equal! Only checks first file
return ext_list[0] == '.xlsx'
[docs]def file_extensions_contains_csv(ext_list):
# Assumes all file extensions are equal! Only checks first file
return (ext_list[0] == '.csv' or ext_list[0] == '.txt')
[docs]def file_extensions_valid(ext_list):
"""Checks if file list contains only valid files
Notes:
Assumes all file extensions are equal! Only checks first file
Args:
ext_list (list): file extensions, eg ['.csv','.csv']
Returns:
bool: first element in list is one of ['.csv','.txt','.xls','.xlsx']?
"""
ext_list_valid = ['.csv','.txt','.xls','.xlsx']
return ext_list[0] in ext_list_valid
[docs]def columns_all_equal(col_list):
"""Checks that all lists in col_list are equal.
Args:
col_list (list): columns, eg [['a','b'],['a','b','c']]
Returns:
bool: all lists in list are equal?
"""
return all([l==col_list[0] for l in col_list])
[docs]def list_common(_list, sort=True):
l = list(set.intersection(*[set(l) for l in _list]))
if sort:
return sorted(l)
else:
return l
[docs]def list_unique(_list, sort=True):
l = list(set.union(*[set(l) for l in _list]))
if sort:
return sorted(l)
else:
return l
[docs]def list_tofront(_list,val):
return _list.insert(0, _list.pop(_list.index(val)))
[docs]def cols_filename_tofront(_list):
return list_tofront(_list,'filename')
[docs]def df_filename_tofront(dfg):
cfg_col = dfg.columns.tolist()
return dfg[cols_filename_tofront(cfg_col)]
[docs]def check_valid_xls(fname_list):
ext_list = file_extensions_get(fname_list)
if not file_extensions_all_equal(ext_list):
raise IOError('All file types and extensions have to be equal')
if not(file_extensions_contains_xls(ext_list) or file_extensions_contains_xlsx(ext_list)):
raise IOError('Only .xls, .xlsx files can be processed')
return True
[docs]def compare_pandas_versions(version1, version2):
def cmp(a, b):
return (a > b) - (a < b)
def normalize(v):
return [int(x) for x in re.sub(r'(\.0+)*$','', v).split(".")]
return cmp(normalize(version1), normalize(version2))