#!/usr/bin/perl # Compact alternations (for regex) - 1 level # ('google', 'goof', 'gov', 'gox', 'go') --> go(?:of|ogle|[vx])? # Author: Trizen # License: GPLv3 # 29 February 2012 use 5.010; use strict; use warnings; my %done; sub _get_parts { my @parts; foreach my $word (@_) { foreach my $i (1 .. length $word) { push @parts, substr($word, 0, $i); } } return @parts; } sub _strip_alterns { my $phrase = shift; $phrase =~ s/(? $table{$a} } keys %table) { foreach my $word (@array) { if ($word ne $key and index($word, $key) == 0) { push @{$out{$key}}, $word; } elsif (length $word == 1 and $word eq $key) { push @{$out{$key}}, $key; } } } my %seen; my @left; my $alter = ''; foreach my $key ( sort { length($b) * scalar(@{$out{$b}}) <=> length($a) * scalar(@{$out{$a}}) } keys %out ) { if (@{$out{$key}} == 1) { push @left, $key; next; } my $x = 0; my $y = 0; foreach my $item (@{$out{$key}}) { next if exists $seen{$item}; $seen{$item} = (); $seen{$key} = (); unless ($x++) { $alter .= quotemeta $key; if (grep { not exists $seen{$_} } @{$out{$key}}) { $alter .= '(?:'; $y = 1; } } my $word = substr($item, length $key); if (length $word) { $alter .= quotemeta($word) . '|'; } } if ($y) { chop $alter; $alter .= ')'; $alter .= '?' if exists $hash{$key}; $alter .= '|'; } } my %uniq; @uniq{grep { not exists $seen{$_} } map({@{$out{$_}}} @left)} = (); $alter =~ s{\(\?:((?:.\|)*.?)\)}{ _strip_alterns($1) }egs; $alter .= join('|', map { quotemeta } keys %uniq); $alter =~ s{(?<=\|)(?