Python 3370
Consegui que isso fosse o mais funcional possível e até alternei para o trabalho (com a verificação correta de contagem dupla!). Tanto quanto eu sei, isso funciona para tudo, exceto olhar para trás (porque isso seria loucura).
Para quem escreve sua própria solução, sinta-se à vontade para usar / melhorar meus métodos o quanto quiser.
Código:
R=range;L=len;E=enumerate;I=int;F="Infinity";S=sum;H=chr;D=ord;J=lambda l:''.join(l);U=lambda s:J([H(i)if H(i) not in s else''for i in R(1,128)]);s=' \n\t';c=J((H(i)if H(i)not in s else'')for i in R(1,32));d,l,u=[J(H(i)for i in R(*n))for n in [[48,59],[97,123],[65,91]]];p='`~!@#$%^&*()-_=+[{]}\\|;:\'",<.>/?';Y={'\\s':s,'\\S':c+s+p+u+l+d,'\\d':d,'\\D':U(d),'\\w':u+l+'_','\\W':U(u+l+'_'),'[:alnum:]':u+l+d,'[:alpha:]':u+l,'[:ascii:]':U(''),'[:blank:]':' \t','[:cntrl:]':c,'[:digit:]':d,'[:graph:]':p+d+l+u,'[:lower:]':l,'[:print:]':p+d+l+u+s,'[:punct:]':p,'[:space:]':s,'[:upper:]':u,'[:word:]':l+u+'_','[:xdigit:]':d+'ABCDEF'};C=lambda l,n:[d+[l[i]]for i in R(L(l))for d in C(l[i+1:],n-1)]if n>0 else[[]];O=lambda x:S([all((c in s)for s in [(e[1]if any(e[1] not in Y for e in x)else Y[e[1]])if e[0]=='e'else(e[1]if e[0]=='t'else((Y[e[1]]if e[1]in Y else B(e[1]))if e[0]=='['else''))for e in x])for c in Y['[:ascii:]']])
def X(r):
x=[];l=0;c=''
for h in r:
l+=I(h in'([')-I(h in')]')
if l<1 and'|'==h:x+=[c];c=''
else:c+=h
x+=[c]
if L(x)>1:
o=0;m=[];u=[]
for e in x:
p,b=X(e)
if p==F:return F,[]
o+=p;m+=[('(',b)];u+=[b]
return o-S([(-1)**(i%2)*T(s) for i in R(2,L(u)+1)for s in C(u,i)]),[('|',m)]
u=[];o=[];m=[];w=1
while r!='':
h=r[0];z=0
if h in'([{':
l=1;i=1
while l>0:k=r[i];l+=(k==h)-(k=={'(':')','[':']','{':'}'}[h]);i+=1
u+=[(h,r[1:i-1])];z=i
elif h=='\\':u+=[('e','\\'+eval("'\\%s'"%r[1]))]if r[1]in'nt'else[('e','\\'+r[1])];z=2
elif h in ['*','+','?']:u+=[('{',h)];z=1
elif h in'^$':return 0
elif h=='.':u+=[('[','[:ascii:]')];z=1
else:u+=[('t',h)];z=1
r=r[z:]
for t,g in u:
if t=='(':p,b=X(g);o+=[p];m+=[('(',b)]
elif t=='e':o+=[L(Y[g])]if g in Y else[1]
elif t=='[':o+=[N(g)]
elif t=='{':
n=o[-1]
try:o[-1]=S([n**r for r in Q(g)])
except:return F,[]
elif t=='t':o+=[1]
if t!='(':m+=[(t,g)]
for c in o:
if c==F:return F,[]
w*=c
return w,m
def N(s):
if s in Y:return L(Y[s])
if(s[0],s[-1])==('[',']'):return 1
n=(s[0]=='^');a=0
if n:s=s[1:]
while s!='':
if L(s)>=3 and s[1]=='-':a+=D(s[2])-D(s[0])+1;s=s[3:];continue
a+=1;s=s[1:]
return 256*n+(-1)**n*a
def Q(s):
if s=='?':return[0,1]
if s[0]in'*+':return None
if ','in s:
l,h=s.split(',')
return None if h==''else R(I(l),I(h)+1)
return[I(s)]
def B(s):
n=(s[0]=='^')
if n:s=s[1:]
a='';w=''
while s!='':
h=s[0]
if 3<=L(s)and'-'==s[1]:
for i in R(D(s[0]),D(s[2])+1):a+=H(i)
s=s[3:];continue
a+=h;s=s[1:]
return J([c*(c not in a)for c in Y['[:ascii:]']])if n else a
def T(x):
if all(e==[] for e in x):return 1
for i,e in E(x):
if L(e)>=2 and e[1][0]=='{':return S([T([[e[0]]*n+e[2:]]+x[:i]+x[i+1:])for n in Q(e[1][1])])
if L(e)>=1:
if e[0][0] == '(':return T([e[0][1]+e[1:]]+x[:i]+x[i+1:])
if e[0][0]== '|':
t=S(T([[s]+e[1:]]+x[:i]+x[i+1:])for s in e[0][1])
u=[[s]for s in e[0][1]]
return t-S((-1)**(j%2)*T(b)for j in R(2,L(u)+1)for b in C(u,j))
if any(e==[]for e in x):return 0
return O([e[0]for e in x])*T([e[1:]for e in x])
r=raw_input();e=[];l=0;c=''
for h in r:
l+=I(h in'([')-I(h in')]')
if l<1 and'|'==h:e+=[c];c=''
else:c+=h
e+=[c];o=[];x=[]
for f in e:
if '^'!=f[0]or'$'!=f[-1]:print F;quit()
n,m=X(f[1:-1])
if n==F:print F;quit()
o+=[n];x+=[m]
print S(o)-S([(-1)**(i%2)*T(s) for i in R(2,L(x)+1)for s in C(x,i)])
Ungolfed:
controlchars = ''
for i in range(1,32):
if chr(i) not in '\t\n':
controlchars += chr(i)
CLASSES={
'\\s':' \t\n',
'\\S':controlchars+'!"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~',
'\\d':'0123456789',
'\\D':controlchars+'\n\t !"#$%&\'()*+,-./:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~',
'\\w':'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_',
'\\W':controlchars+'\n\t !"#$%&\'()*+,-./:;<=>?@[\\]^`{|}~',
'[:alnum:]':'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789',
'[:alpha:]':'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ',
'[:ascii:]':controlchars+'\n\t !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~',
'[:blank:]':' \t',
'[:cntrl:]':controlchars,
'[:digit:]':'0123456789',
'[:graph:]':'!"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~',
'[:lower:]':'abcdefghijklmnopqrstuvwxyz',
'[:print:]':'\n\t !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~',
'[:punct:]':'`~!@#$%^&*()-_=+[{]}\\|;:\'",<.>/?',
'[:space:]':' \t\n',
'[:upper:]':'ABCDEFGHIJKLMNOPQRSTUVWXYZ',
'[:word:]':'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_',
'[:xdigit:]':'0123456789ABCDEF'
}
DELIMIT = {
'(':')',
')':'(',
'[':']',
']':'[',
'{':'}',
'}':'{'
}
def combos(lst,num):
if num == 0:
return [[]]
combs = []
for i in range(len(lst)):
for c in combos(lst[i+1:],num-1):
combs.append([lst[i]]+c)
return combs
def count_regex(regex):
exprs = []
level = 0
current = ''
for char in regex:
if char in '([':
level += 1
if char in ')]':
level -= 1
if char == '|' and level == 0:
exprs.append(current)
current = ''
else:
current += char
exprs.append(current)
comps = []
expanded = []
for e in exprs:
if (e[0] != '^' or e[-1] != '$'):
return 'Infinity'
num,member = count_expr(e[1:-1])
if num == 'Infinity':
return 'Infinity'
comps.append(num)
expanded.append(member)
total = sum(comps)
for i in range(2,len(expanded)+1):
for subset in combos(expanded,i):
if i % 2 == 0:
total -= count_doubles(subset)
else:
total += count_doubles(subset)
return total
def count_expr(expr):
exprs = []
level = 0
current = ''
for char in expr:
if char in '([':
level += 1
if char in ')]':
level -= 1
if char == '|' and level == 0:
exprs.append(current)
current = ''
else:
current += char
exprs.append(current)
if len(exprs) != 1:
comps = 0
members = []
sub = []
for e in exprs:
comp,memb = count_expr(e)
if comp == 'Infinity':
return 'Infinity',[]
comps += comp
members.append(('(',memb))
sub.append(memb)
for i in range(2,len(sub)+1):
for subset in combos(sub,i):
if i % 2 == 0:
comps -= count_doubles(subset)
else:
comps += count_doubles(subset)
return comps,[('|',members)]
sub = []
while expr != '':
char = expr[0]
if char in ['(','[','{']:
level = 1
i = 1
while level > 0:
nchar = expr[i]
if nchar == char: level += 1
if nchar == DELIMIT[char]: level -= 1
i += 1
sub.append((char,expr[1:i-1]))
expr = expr[i:]
elif char == '\\':
if expr[1] == 'n':
sub.append(('e','\\'+'\n'))
expr = expr[2:]
elif expr[1] == 't':
sub.append(('e','\\'+'\t'))
expr = expr[2:]
else:
sub.append(('e','\\'+expr[1]))
expr = expr[2:]
elif char in ['*','+','?']:
sub.append(('{',char))
expr = expr[1:]
else:
if char in '^$':
return 0
if char == '.':
sub.append(('[','[:ascii:]'))
expr = expr[1:]
else:
sub.append(('t',char))
expr = expr[1:]
components = []
members = []
for t,string in sub:
if t == '(':
comp,memb = count_expr(string)
components.append(comp)
members.append(('(',memb))
elif t == 'e':
if string in CLASSES:
components.append(len(CLASSES[string]))
else:
components.append(1)
elif t == '[':
components.append(count_class(string))
elif t == '{':
num = components[-1]
try:
components[-1] = sum([num**r for r in count_quantifier(string)])
except TypeError:
return 'Infinity',[]
elif t == 't':
components.append(1)
if t != '(':
members.append((t,string))
total = 1
for c in components:
if c == 'Infinity':
return 'Infinity',[]
total *= c
return total,members
def count_class(string):
if string in CLASSES:
return len(CLASSES[string])
if string[0] == '[' and string[-1] == ']':
return 1
negate = (string[0] == '^')
if negate: string = string[1:]
avail_count = 0
while string != '':
char = string[0]
if len(string) >= 3:
if string[1] == '-':
first,last = string[0],string[2]
avail_count += ord(last)-ord(first)+1
string = string[3:]
continue
avail_count += 1
string = string[1:]
if negate:
return 256-avail-count
return avail_count
def count_quantifier(string):
if string == '?':
return [0,1]
if string[0] in '*+':
return None
if ',' in string:
low,high = string.split(',')
if high == '':
return None
return range(int(low),int(high)+1)
return [int(string)]
def bracket_string(string):
negate = (string[0] == '^')
if negate: string = string[1:]
avail = ''
while string != '':
char = string[0]
if len(string) >= 3:
if string[1] == '-':
first,last = string[0],string[2]
for i in range(ord(first),ord(last)+1):
avail += chr(i)
string = string[3:]
continue
avail += char
string = string[1:]
if negate:
new = ''
for c in CLASSES['[:ascii:]']:
if c not in avail:
new += c
return new
return avail
def overlap(es):
chars=['' for i in range(len(es))]
for i,e in enumerate(es):
if e[0] == 'e':
if any(e[1] not in CLASSES for e in es):
chars[i] = e[1]
else:
chars[i] = CLASSES[e[1]]
for i,e in enumerate(es):
if e[0] == 't':
chars[i] = e[1]
for i,e in enumerate(es):
if e[0] == '[':
if e[1] in CLASSES:
chars[i] = CLASSES[e[1]]
else:
chars[i] = bracket_string(e[1])
total = 0
for c in CLASSES['[:ascii:]']:
has = True
for chs in chars:
if c not in chs:
has = False
break
if has:
total += 1
return total
def count_doubles(exprs):
if all(e==[] for e in exprs):
return 1
for i,expr in enumerate(exprs):
if len(expr) >= 2 and expr[1][0] == '{':
rng = count_quantifier(expr[1][1])
total = 0
for n in rng:
total += count_doubles([ [expr[0]]*n+expr[2:] ] + exprs[:i] + exprs[i+1:])
return total
for i,expr in enumerate(exprs):
if len(expr) >= 1 and expr[0][0] == '(':
return count_doubles([ expr[0][1]+expr[1:] ] + exprs[:i] + exprs[i+1:] )
if any(e==[] for e in exprs):
return 0
for i,expr in enumerate(exprs):
if expr[0][0] == '|':
total = 0
subs = []
for sub_expr in expr[0][1]:
subs.append([sub_expr])
total += count_doubles([ [sub_expr]+expr[1:] ] + exprs[:i] + exprs[i+1:])
for j in range(2,len(subs)+1):
for subset in combos(subs,j):
if j % 2 == 0:
total -= count_doubles(subset)
else:
total += count_doubles(subset)
return total
over = overlap([e[0] for e in exprs])
if over == 0:
return 0
return over * count_doubles([e[1:] for e in exprs])
reg = raw_input('Regex: ')
print count_regex(reg)
Aqui estão alguns casos de teste relevantes que confirmei:
Regex: ^[A-Za-z0-9]{8}$
218340105584896
Regex: ^([a-b]*)$
Infinity
Regex: ^([0-9]|[a-e])([a-e]|[A-E])$|^[a-f]{2}$
161
Regex: ^[a-z]{2}$|^[0-9]?[a-z]{2,3}$|^a[a-z]?$
200773
Regex: ^([a-z]|[a-e]|[A-E]|[3-4]|[D-G])$
35
Regex: ^\(.)\(.)\2\1$
16129*
Regex: ^(a)\1?$
2
Regex: ^[[:space:]]$|^\t$
3
* Isso é realmente diferente no golfe e no golfe devido a uma diferença de um caractere no que é definido como ascii válido. Eu acredito que o golfe é o mais correto.
Para confirmar sua precisão, mais testes podem ser feitos, informe-me sobre quaisquer erros (sinceramente, não ficaria surpreso se houvesse alguns).
^[A-Za-z0-9]{8}$
? Caso contrário, a resposta seriainfinity
.