This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
# Taken from "There are too many ways to do it" presentation | |
# http://www.shlomifish.org/lecture/Perl/Lightning/Too-Many-Ways/slides/ | |
# | |
# The problem | |
# | |
# <sniperd> looking to write a regex that strips out all periods, except for | |
# the last one. ie, i have a string named perl.is.the.best.txt and I just | |
# want perlisthebest.txt | |
# | |
# - Freenode's #perl. | |
# | |
use strict; | |
use warnings; | |
use Benchmark 'cmpthese'; | |
sub test { | |
$_[0]->('hello.world.txt'); | |
$_[0]->('hello-there'); | |
$_[0]->('hello..too.pl'); | |
$_[0]->('magna..carta'); | |
$_[0]->('the-more-the-merrier.jpg'); | |
$_[0]->('hello.'); | |
$_[0]->('perl.txt.'); | |
$_[0]->('.yes'); | |
$_[0]->('.yes.txt'); | |
} | |
sub via_split { | |
my $s = shift; | |
my @components = split /\./, $s, -1; | |
return $s if @components == 1; | |
my $last = pop @components; | |
return join( '', @components ) . '.' . $last; | |
} | |
sub sexeger { | |
my $s = shift; | |
$s = reverse $s; | |
my $c = 0; | |
$s =~ s!\.!($c++)?'':'.'!ge; | |
return reverse $s; | |
} | |
sub two_parts { | |
my $s = shift; | |
if ( $s =~ /^(.*)\.([^\.]*)$/ ) { | |
my ( $l, $r ) = ( $1, $2 ); | |
$l =~ tr/.//d; | |
return "$l.$r"; | |
} | |
else { | |
return $s; | |
} | |
} | |
sub look_ahead { | |
my $s = shift; | |
$s =~ s/\.(?=.*\.)//g; | |
return $s; | |
} | |
sub count_and_replace { | |
my $s = shift; | |
my $count = ( my @a = ($s =~ /\./g) ); | |
$s =~ s/\./(--$count)?'':'.'/ge; | |
return $s; | |
} | |
sub elim_last { | |
my $s = shift; | |
my $non_occur = "\x{1}" . ( "\0" x length $s ) . "\x{1}"; | |
$s =~ s/\.([^\.]*)$/$non_occur$1/; | |
$s =~ tr/.//d; | |
$s =~ s!$non_occur!.!; | |
return $s; | |
} | |
sub rindex01 { | |
my $s = shift; | |
substr( $s, 0, rindex( $s, '.' ) ) =~ tr/.//d; | |
return $s; | |
} | |
sub recursive_perl { | |
my @chars = split //, shift; | |
my $recurse; | |
$recurse = sub { | |
return '', 0 unless scalar @_; | |
my $head = shift @_; | |
my ( $processed_string, $was_period_found ) = $recurse->( @_ ); | |
if ( $was_period_found ) { | |
return ( $head eq '.' ? '' : $head ) . $processed_string, 1; | |
} | |
else { | |
return $head . $processed_string, $head eq '.'; | |
} | |
}; | |
return ( $recurse->( @chars ) )[0]; | |
} | |
sub delpoint1 { | |
local $_ = shift; | |
my $old = $_; | |
while ( /\./ ) { | |
$old = $_; | |
s/\.//; | |
} | |
return $old; | |
} | |
sub delpoint2 { | |
local $_ = shift; | |
while ( s/\.(.*\.)/$1/ ) {} | |
return $_; | |
} | |
sub delpoint8 { | |
local $_ = shift; | |
my @parts = split /(\.)/; | |
my %hash; | |
for my $i ( 0..$#parts ) { | |
push @{ $hash{$parts[$i]} }, $i; | |
} | |
if ( exists $hash{'.'} ) { | |
$hash{'.'} = [ @{ $hash{'.'} }[-1] ]; | |
} | |
my %sort; | |
for my $key ( %hash ) { | |
for my $number ( @{$hash{$key}} ) { | |
$sort{$number} = $key; | |
} | |
} | |
$_ = ''; | |
for my $number ( sort { $a <=> $b } keys %sort ) { | |
$_ .= $sort{$number}; | |
} | |
return $_; | |
} | |
sub pack01 { | |
my @c = unpack 'c*', $_[0]; | |
my @p = grep( $c[$_] == 46, 0..$#c ); | |
pop @p; | |
while ( defined ( my $c = pop @p ) ) { | |
splice @c, $c, 1; | |
} | |
return pack 'c*', @c; | |
} | |
cmpthese( 1000000, { | |
'via_split' => sub { test( \&via_split ) }, | |
'sexeger' => sub { test( \&sexeger ) }, | |
'two_parts' => sub { test( \&two_parts ) }, | |
'look_ahead' => sub { test( \&look_ahead ) }, | |
'count_and_replace' => sub { test( \&count_and_replace ) }, | |
'elim_last' => sub { test( \&elim_last ) }, | |
'rindex01' => sub { test( \&rindex01 ) }, | |
# Recursive would eat all the memory and would be the slowest one anyway. | |
# Trust me on this ;) | |
#'recursive_perl' => sub { test( \&recursive_perl ) }, | |
'delpoint1' => sub { test( \&delpoint1 ) }, | |
'delpoint2' => sub { test( \&delpoint2 ) }, | |
'delpoint8' => sub { test( \&delpoint8 ) }, | |
'pack01' => sub { test( \&pack01 ) }, | |
}); |
Rate delpoint8 elim_last pack01 count_and_replace sexeger delpoint8 9237/s -- -58% -73% -82% -86% elim_last 22232/s 141% -- -36% -58% -67% pack01 34855/s 277% 57% -- -34% -49% count_and_replace 52466/s 468% 136% 51% -- -23% sexeger 68259/s 639% 207% 96% 30% -- two_parts 72359/s 683% 225% 108% 38% 6% delpoint2 80775/s 774% 263% 132% 54% 18% via_split 90009/s 874% 305% 158% 72% 32% delpoint1 101626/s 1000% 357% 192% 94% 49% rindex01 145985/s 1480% 557% 319% 178% 114% look_ahead 163399/s 1669% 635% 369% 211% 139% Rate two_parts delpoint2 via_split delpoint1 rindex01 look_ahead delpoint8 9237/s -87% -89% -90% -91% -94% -94% elim_last 22232/s -69% -72% -75% -78% -85% -86% pack01 34855/s -52% -57% -61% -66% -76% -79% count_and_replace 52466/s -27% -35% -42% -48% -64% -68% sexeger 68259/s -6% -15% -24% -33% -53% -58% two_parts 72359/s -- -10% -20% -29% -50% -56% delpoint2 80775/s 12% -- -10% -21% -45% -51% via_split 90009/s 24% 11% -- -11% -38% -45% delpoint1 101626/s 40% 26% 13% -- -30% -38% rindex01 145985/s 102% 81% 62% 44% -- -11% look_ahead 163399/s 126% 102% 82% 61% 12% --