List All Subdirectories And Files In Python, Then Assign Depth And Extensions.

2020-12-05 Updated

目錄

Original Post

〈List All Subdirectories And Files In Python, Then Assign Depth And Extensions.〉


Iterating a path to list all subdirectories and files is an extremely helpful function. You can read bulk excel files with several pretty simple codes. As a lazy guy like me, wrap it to a function make me write code effectively.

The code below seems huge, but it contains multiple pieces of functions. All of them are used to operate lists, and the main function to list paths and files is os_get_dir_list().

Copy, Paste And Work

def list_remove_empty(li):

    while "" in li: 
        li.remove("") 
        
    while [] in li: 
        li.remove([])
        
    return li

# ..........
    
def conv_to_list(obj):
    '''
    Convert object to list.
    '''
    
    if not isinstance(obj, list) :
        results = [obj]
    else:
        results = obj

    return results

# ..........

def li_element_add(li, head='', tail=''):
    '''
    Concat strings for every elements in the list.
    li   : list.
    head : str. String concat ahead.
    tail : str. String concat at tail.
    '''
    if (all(isinstance(x, str) for x in li)) and \
        (isinstance(head, str)) and \
        (isinstance(tail, str)):
                            
            results = [head + i + tail for i in li]                
        
    elif (all(isinstance(x, int) for x in li)) and \
        (isinstance(head, int)):

            if head != '':
                results = [head+i for i in li]
            else:
                results = li            
            
    return results

# ..........
    
def list_conv_ele_type(li, to_type='str'):
    '''
    Convert data type of all elements in list.
    '''
    if to_type == 'str':
        results = list(map(str, li))
        
    elif to_type == 'int':
        results = list(map(int, li))
    
    return results
    
# ..........

def list_flatten(li, join=False, divider=", "):
    '''
    Flatten lists with recursions.
    join:    Boolean. Concat as a string.
    '''
    
    if (not isinstance(li, list)) or (len(li) == 0):
        return li
    
    # Remove blank to prevent error.
    li = list_remove_empty(li)
    
    if len(li) == 1:
        if type(li[0]) == list:
            
            results = list_flatten(li[0])
        else:
            results = li
            
    elif type(li[0]) == list:
        results = list_flatten(li[0]) + list_flatten(li[1:])
        
    else:
        results = [li[0]] + list_flatten(li[1:])

        
    if join == True:
        results = list_conv_ele_type(results, 'str')
        results = divider.join(results)
        
    return results

# ..........

def os_get_dir_list(path, level=0, extensions=None, 
                    remove_temp=True):
    '''
    path        : str.
    level       : int. 0 to list all subdirectories and files。
    extensions : str or list.
    remove_temp : boolean. Remove temp files
    '''
    
    import os
    path_level = path.count(os.path.sep)
    
    # walk includes three parts ......
    # [0] root
    # [1] subdirs
    # [2] files
    
    path_list =  list(os.walk(path))
    
    result_dirs = [
    result_files = []
    
    for i in range(0, len(path_list)):
        
        root = path_list[i][0]
        subdirs = path_list[i][1]
        files = path_list[i][2]
        
        cur_level = root.count(os.path.sep)

        if (level != 0) and \
            (path_level + level < cur_level):
                continue                
        
            
        # Subdirectory ......
        if len(subdirs) > 0:
            temp = li_element_add(li=subdirs, 
                                  head=root + '/')
            
            result_dirs.append(temp)
            
        # Files ......
        if len(files) > 0:
            
            # Filter with extension
            if extensions != None:
            
                new_files = files.copy()
                
                for j in range(0, len(files)):
                    file_split = files[j].split('.')
                    file_split = file_split[-1]
                    
                    if file_split not in extensions:
                        new_files.remove(files[j])
                        
                files = new_files


            # After removing, files may be empty
            if len(files) > 0:
                
                # Store complete paths and file names.
                new_file_list = []
                
                for k in range(0, len(files)):
                    
                    # Remove temp file ...
                    # Some hidden temp files may be included, like excel files.
                    if (remove_temp==True) and (files[k][0] == '~'):
                        continue
                    
                    row = [root + '/' + files[k],
                           files[k]]
                    
                    new_file_list.append(row)
                    

                result_files = new_file_list.copy()
        
    
    # Reorganize ......
    result_dirs = list_flatten(result_dirs)
    
    results={'DIRS':result_dirs,
             'FILES':result_files}

    return results

Related Posts

〈Import Custom Package/Library/Module In Python.〉
〈Learn Python And R On DataCamp. Start Your Data Science Career.〉

A
Aron

Technical Notes
相关文章