w use
#!/usr/bin/perl - w use strict; my $protocol='(?:(?=[FfHh])(?i:http(?>s?)|ftp)://)'; my $host=<<HOST; (?>[A-Za-z0-9]{1,63}\\.) (?>[A-Za-z0-9] (?>[-A-Za-z0-9]{0,62})\\. )* HOST my $subdom=<<SUBDOM; (?: (?>[A-Za-z0-9] (?:[-A-Za-z0-9]{0,61}[A-Za-z0-9])? )\\. )+ SUBDOM my $wb='(?![A-Za-z0-9])'; my $zone=<<ZONE; (?i:(?(?=[a-z]{3}$wb)(?>com|net|org|edu|biz|gov|int|mil)| (?(?=[a-z]{2}$wb)[a-z]{2}| (?(?=[a-z]{4}$wb)(?>info|aero|name)| (?(?=[a-z]{6}$wb)museum|(?!) ) ) ) ) (?>\\.[a-z]{2}$wb)? ) ZONE my $port="(?::\\d{1,5}$wb)"; my $tail=<<TAIL; (?:[/?] (?>[^.,"'<>()[\\]{}\\s\\x7F-\\xFF]*) (?:(?>[.,?]+) (?:[^"'<>()[\\]{}\\s\\x7F-\\xFF]+) )* (?<![,.?!-]) ) TAIL my $re=<<RE; ( (?>($protocol)(?(2)(?>$host$zone)|$host$zone) (?![A-Za-z0-9])| (?<![A-Za-z0-9_\\\@-]) (?<!\\.(?!(?i:www))) $subdom$zone(?![A-Za-z0-9_.-]*\\\@) ) (?>(?>$port?(?>\\\@$host$zone(?![A-Za-z0-9_.-]*\\\@))?)?) ) ($tail?) RE my $text=<<TEXT; URLs: Ftp://a.com/AAa Look at:aaa.Museum. http://www.proxy.com:80\@www.site.com/ http://proxy.com:80\@site.com/ http://proxy.com\@site.com/ aAaa.com.au.rr.ggg Zwww.Yabcd.co.uk Фforum.abcd.de www.Abc.eu П123.123.123.1234.com/?q=aaa http://Abc.Tk Ahttp://www.Abc.pt/AAa http://abc.au/query/vid.cam.dig/sony.dcrhc15.htm#full_image Ф.Www.old-avto.tk NOT URLs: aaa.museumm http://aaa.museumm, http://-aaa.com www._aaa.com www.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.com TEXT $text =~ s!$re!<a href="${\($2 ? '' : 'http://')}\L$1\E$3" target="_blank">$1$3</a>!gx; print $text; |
Листинг 8.1. |
Закрыть окно |
#!/usr/bin/perl -w
use strict;
my $protocol='(?:(?=[FfHh])(?i:http(?>s?)|ftp)://)';
my $host=<
(?>[A-Za-z0-9]{1,63}\\.)
(?>[A-Za-z0-9]
(?>[-A-Za-z0-9]{0,62})\\.
)*
HOST
my $subdom=<
(?:
(?>[A-Za-z0-9]
(?:[-A-Za-z0-9]{0,61}[A-Za-z0-9])?
)\\.
)+
SUBDOM
my $wb='(?![A-Za-z0-9])';
my $zone=<
(?i:(?(?=[a-z]{3}$wb)(?>com|net|org|edu|biz|gov|int|mil)|
(?(?=[a-z]{2}$wb)[a-z]{2}|
(?(?=[a-z]{4}$wb)(?>info|aero|name)|
(?(?=[a-z]{6}$wb)museum|(?!)
)
)
)
)
(?>\\.[a-z]{2}$wb)?
)
ZONE
my $port="(?::\\d{1,5}$wb)";
my $tail=<
(?:[/?]
(?>[^.,"'<>()[\\]{}\\s\\x7F-\\xFF]*)
(?:(?>[.,?]+)
(?:[^"'<>()[\\]{}\\s\\x7F-\\xFF]+)
)*
(? )
TAIL
my $re=<
(
(?>($protocol)(?(2)(?>$host$zone)|$host$zone)
(?![A-Za-z0-9])|
(? (? $subdom$zone(?![A-Za-z0-9_.-]*\\\@)
)
(?>(?>$port?(?>\\\@$host$zone(?![A-Za-z0-9_.-]*\\\@))?)?)
)
($tail?)
RE
my $text=<
URLs:
Ftp://a.com/AAa
Look at:aaa.Museum.
http://www.proxy.com:80\@www.site.com/
http://proxy.com:80\@site.com/
http://proxy.com\@site.com/
aAaa.com.au.rr.ggg
Zwww.Yabcd.co.uk
Фforum.abcd.de
www.Abc.eu
П123.123.123.1234.com/?q=aaa
http://Abc.Tk
Ahttp://www.Abc.pt/AAa
http://abc.au/query/vid.cam.dig/sony.dcrhc15.htm#full_image
Ф.Www.old-avto.tk
NOT URLs:
aaa.museumm
http://aaa.museumm,
http://-aaa.com
www._aaa.com
www.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.com
TEXT
$text =~ s!$re!$1$3!gx;
print $text;