#!/usr/bin/env python
"""
Selective directory scanning

Selects is a list of extended unix filename wildcards prefixed by + (include)
or - (exclude). This is processed in order, and the first match is used to
include or exclude a file. The default is to include if no match is made.
Directorys will only match selects ending in '/'

When scanning directories, selects are used to prune directories when
possible. A directory is not pruned unless all possible files and directories
within it are excluded. This means "-home/,+**.c" will not prune a directory
"home/" (but the directory itself will be excluded), since it is possible that
there could be a file ending in '.c' included somewhere within it. It is
dangerously easy to use selects that do not allow any directories to be
pruned. Any entry prefixed by '+**' will force scanning of all directories not
pruned by an entry earlier in the list. The safest way to explicitly prune a
directory is with a "-<directory>**" before any includes.

Thoughts

Implicit vs explicit inclusion/pruning;

a) files can be included, irrespective of whether their parent directorys are.
b) any excluded subdirectory implys exluding all files within it

a) requires complex piece-by-piece pattern compares to see if a dir can be
pruned without excluding any included files. This makes filematching easy, but
pruning hard.
b) requires piece-by-piece filename compares to see if a file is excluded
because of directory pruning. This makes matching hard, but pruning easy.

Of the two, b) is probably easier because piece-by-piece processing of proper
filenames is easier than piece-by-piece processing of extended unix wildcards.
However, one may be more intuitive and/or flexible than the other. It is
possible to exclude parent directories of included files using a), but not b).

+**.c,-** will find all *.c files using a), but b) will only include *.c files
in the start directory, due to all directories being pruned.

+**/,+**.c,-** will perform the above for b), but will also include all
directories.

default include vs default exclude...
should the default be to include or exclude?

"""
import sys,os,re
from stat import *
from efnmatch import efnmatch

# regex's that match os.sep
exclpat=r"\[![^\%s][^]\%s]*\]" % (os.sep,os.sep)        # an exclude
inclpat=r"\[(?:[^!][^]]*)?\%s[^]]*\]" % os.sep          # an include
eseppat=r"\*\*|\?\?|" + exclpat + '|' + inclpat         # any type of wildcard
esplitpat="^(.*)("+ eseppat + ")(.*?)$"                 # esplit regex
esplitre=re.compile(esplitpat)

def esplit(pat):
    """ Performs an os.path.split() operation on an extended shell pattern
        returns <path>,<sep>,<name> because <sep> can be a variety of wildcards.
    """
    seppos = pat.rfind(os.sep)
    match = esplitre.match(pat)
    if match and match.end(2) > seppos:
        return match.group(1,2,3)
    elif seppos>=0:
        return pat[:seppos],os.sep,pat[seppos+1:]
    else:
        return "","",pat
    
def edirname(pat):
    """ Performs an os.path.dirname() operation on extended shell patterns"""
    return esplit(pat)[0]    

def ebasename(pat):
    """ Performs an os.path.basename() operation on extended shell patterns"""
    return esplit(pat)[3]    

def filematch(path,selects):
    """tests if a file should be included when scanning"""
    for pat in selects:
        if efnmatch(path,pat[1:]):
            return pat[0]=='+'
    #default is include
    return 1

def prunematch(path,selects):
    """tests if a path can be pruned when scanning directories"""
    for pat in selects:
        sig,pat=pat[0],pat[1:]
        # if path matches a wildcard ended pattern, prune depends on sig
        if pat[-2:]=='**' and efnmatch(path,pat):
            return sig=='-'
        # if path matches part of an include, it cannot be pruned
        elif sig=='+':
            pat,sep,fil=esplit(pat)
            if fil=="": sep=""
            while pat+sep:
                print pat+sep
                if efnmatch(path,pat+sep):
                    return 0
                pat,sep,fil=esplit(pat)
    #default is don't prune
    return 0

def __scan__((filelist,selects,match,prune),dirname,names):
    """Used by scan as the method for os.path.walk"""
    for n in names[:]:
        file=os.path.join(dirname,n)[2:]
        if os.path.isdir(file):
            file=os.path.join(file,"")
        if match(file,selects):
            filelist.append(file)
        if os.path.isdir(file) and prune(file,selects):
            names.remove(n)
        
def scan(startdir='.',selects=[],match=filematch,prune=prunematch):
    """Adds files to an index by scanning directories"""
    olddir=os.getcwd()
    os.chdir(startdir)
    filelist=[]
    if match('.',selects):
        filelist.append('.')
    os.path.walk('.',__scan__,(filelist,selects,match,prune))
    os.chdir(olddir)
    return filelist

def filematch2(path,selects):
    """tests if a path matches multiple select lists"""
    for s in selects:
        if not filematch(path,s):
            return 0
    return 1

def prunematch2(path,selects):
    """tests if a path can be pruned for multiple select lists"""
    for s in selects:
        if prunematch(path,s):
            return 1
    return 0

def scan2(startdir='.',selects=[[]]):
    scan(startdir,selects,filematch2,prunematch2)
    
