Регулярные выражения Perl и их применение

       

и passw ограничены 32 символами


#!perl -w use strict;
my $wb='(?![A-Za-z0-9])';
my $protocol='(?:(?=[FfHh])(?i:http(?>s?)|ftp)://)';
my $host=<<HOST; (?>[-A-Za-z0-9_]{1,63}\\.) (?>[A-Za-z0-9_] (?>[-A-Za-z0-9_]{0,62})\\. )* HOST
my $subdom=<<SUBDOM; (?: (?>[A-Za-z0-9] (?:[-A-Za-z0-9]{0,61}[A-Za-z0-9])? )\\. )+ SUBDOM
my $subdom1='[A-Za-z0-9](?:[-A-Za-z0-9]{0,61}[A-Za-z0-9])?';
my $zone=<<ZONE; (?i: (?=[a-z]{3}$wb) (?>com|net|org|edu|biz|gov|int|mil)| (?(?=[a-z]{2}$wb)[a-z]{2}| (?(?=[a-z]{4}$wb)(?>info|aero|name)| (?(?=[a-z]{6}$wb)museum|(?!) ) ) ) (?>\\.[a-z]{2}$wb)? ) ZONE
my $port="(?::\\d{1,5}$wb)";
my $tail=<<TAIL; (?:[/?] (?>[^.,"'<>()\\[\\]{}\\s\\x7F-\\xFF]*) (?: (?>[.,?]+) (?:[^"'<>()\\[\\]{}\\s\\x7F-\\xFF]+) )* (?<![,.?!-]) ) TAIL
my $firstchr='(?:[A-Za-z0-9])';
my $namechr='(?:[A-Za-z0-9_+.-])';
my $ip='(?:(?<!\\d)(?>\\d{1,3})\\.(?>\\d{1,3})\\.(?>\\d{1,3})\\.(?>\\d{1,3})(?!\\d))';
# Login и passw ограничены 32 символами my $loginpasswat='(?:(?>[A-Za-z0-9_]{1,32})(?>(?::[A-Za-z0-9_]{1,32})?)\\@)';


my $res;
$_=q(http://www.proxy.com:80@www.site.com/ Ftp://a.com/AAa Ftp://Login:Passw@Www.Aaa.Com/Www/ Ftp://login:passw@a-aa.com/www/ Mailto:aaa@sss.zzz.co. Mailto:aaa@sss.zzz.eee.co. aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa@aaa.com ыы@ddd.com ыы@ddЫd.com ыыsы-sf.ff.com.com@ddd.com ыыsы.-sf.ff@ddd.com Mailto:aaa@sss.co, aaa@sss.comЫЫЫ aaa.Bb.b@aaaa.com.ru.rr.ggg aaa.museumm Look at:aaa.museum. httpS://aaa.museumm, http://www.proxy.com:80@www.site.com/ http://proxy.com:80@site.com/ http://proxy.com@site.com/ aAaa.com.ru.rr.ggg Zwww.Yabcd.co.uk Фforum.abcde.ru www.Eabcd.ru http://Eabcd.Ru Ahttp://www.Eabcd.ru/AAa http://abc.ru/query/vid.cam.dig/sony.dcrhc15.htm#full_image Ф.Www.abcdefg-avto.ru httP://1.2.3.400/aaa/ddd.exe? 1.2.3.400/aaa/ddd.exe?d=c,f=t;&e=h, .0.2.3.400. http://66.123.234.555/ddd michel@ab-cdefg.ru http://99.999.999.999/search?q=cache:w5K8GsupwvcJ:olympus.flexiblesoft.com/c-4000-man.doc+c-4000-man&hl=ru&client=firefox-a );
# Оформляем ссылки без login:passw s#((?>($protocol)(?(2)(?>$ip|$host$zone)|$host$zone)(?![A-Za-z0-9])|(?<![A-Za-z0-9_\@-])(?<!\.(?!(?i:www)))$subdom$zone(?![A-Za-z0-9_.-]*\@))(?>(?>$port?(?>\@$host$zone(?![A-Za-z0-9_.-]*\@))?)?))($tail?)#$res=$2 ? '' : 'http://'; qq!<a href="$res\L$1\E$3" target="_blank">$1$3</a>!#gex; # Оформляем ссылки с login:passw s#($protocol)($loginpasswat)($ip|$host$zone)((?>$port?)$tail?)#<a href=\"\L$1\E$2\L$3\E$4\" target=\"_blank\">$1$2$3$4</a>"#gx; # Оформляем е-мейлы. Этот оператор чувствителен к тексту, на который меняет предыдущие операторы! s#((?<!$firstchr)$firstchr(?>$namechr{0,39})\@(?>$subdom1)(?:\.$subdom1)?\.$zone)(?!(?>[^\s"<]*)(?:"\starget="_blank">|</a>))#<a href="mailto:$1">$1</a>#gx; # Оформляем ссылки с IP s#((?<![>/])$ip(?>$port?))($tail?)#"<a href=\"http://\L$1\E$2\" target=\"_blank\">$1$2</a>"#gx;
print $_;
Листинг 8.3.
Закрыть окно

#!perl -w
use strict;
my $wb='(?![A-Za-z0-9])';
my $protocol='(?:(?=[FfHh])(?i:http(?>s?)|ftp)://)';
my $host=<
(?>[-A-Za-z0-9_]{1,63}\\.)
(?>[A-Za-z0-9_]
(?>[-A-Za-z0-9_]{0,62})\\.
)*
HOST
my $subdom=<
(?:
(?>[A-Za-z0-9]
(?:[-A-Za-z0-9]{0,61}[A-Za-z0-9])?
)\\.
)+
SUBDOM
my $subdom1='[A-Za-z0-9](?:[-A-Za-z0-9]{0,61}[A-Za-z0-9])?';
my $zone=<
(?i:
(?=[a-z]{3}$wb)
(?>com|net|org|edu|biz|gov|int|mil)|
(?(?=[a-z]{2}$wb)[a-z]{2}|
(?(?=[a-z]{4}$wb)(?>info|aero|name)|
(?(?=[a-z]{6}$wb)museum|(?!)
)
)
)
(?>\\.[a-z]{2}$wb)?
)
ZONE
my $port="(?::\\d{1,5}$wb)";
my $tail=<
(?:[/?]
(?>[^.,"'<>()\\[\\]{}\\s\\x7F-\\xFF]*)
(?:
(?>[.,?]+)
(?:[^"'<>()\\[\\]{}\\s\\x7F-\\xFF]+)
)*
(? )
TAIL
my $firstchr='(?:[A-Za-z0-9])';
my $namechr='(?:[A-Za-z0-9_+.-])';
my $ip='(?:(?\\d{1,3})\\.(?>\\d{1,3})\\.(?>\\d{1,3})\\.(?>\\d{1,3})(?!\\d))';
# Login и passw ограничены 32 символами
my $loginpasswat='(?:(?>[A-Za-z0-9_]{1,32})(?>(?::[A-Za-z0-9_]{1,32})?)\\@)';
my $res;
$_=q(http://www.proxy.com:80@www.site.com/
Ftp://a.com/AAa
Ftp://Login:Passw@Www.Aaa.Com/Www/
Ftp://login:passw@a-aa.com/www/
Mailto:aaa@sss.zzz.co.
Mailto:aaa@sss.zzz.eee.co.
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa@aaa.com
ыы@ddd.com
ыы@ddЫd.com
ыыsы-sf.ff.com.com@ddd.com
ыыsы.-sf.ff@ddd.com
Mailto:aaa@sss.co,
aaa@sss.comЫЫЫ
aaa.Bb.b@aaaa.com.ru.rr.ggg
aaa.museumm
Look at:aaa.museum.
httpS://aaa.museumm,
http://www.proxy.com:80@www.site.com/
http://proxy.com:80@site.com/
http://proxy.com@site.com/
aAaa.com.ru.rr.ggg
Zwww.Yabcd.co.uk
Фforum.abcde.ru
www.Eabcd.ru
http://Eabcd.Ru
Ahttp://www.Eabcd.ru/AAa
http://abc.ru/query/vid.cam.dig/sony.dcrhc15.htm#full_image
Ф.Www.abcdefg-avto.ru
httP://1.2.3.400/aaa/ddd.exe?
1.2.3.400/aaa/ddd.exe?d=c,f=t;&e=h,
.0.2.3.400.
http://66.123.234.555/ddd
michel@ab-cdefg.ru
http://99.999.999.999/search?q=cache:w5K8GsupwvcJ:olympus.flexiblesoft.com/c-4000-man.doc+c-4000-man&hl=ru&client=firefox-a
);
# Оформляем ссылки без login:passw
s#((?>($protocol)(?(2)(?>$ip|$host$zone)|$host$zone)(?![A-Za-z0-9])|(?(?>$port?(?>\@$host$zone(?![A-Za-z0-9_.-]*\@))?)?))($tail?)#$res=$2 ? '' : 'http://'; qq!$1$3!#gex;
# Оформляем ссылки с login:passw
s#($protocol)($loginpasswat)($ip|$host$zone)((?>$port?)$tail?)#$1$2$3$4"#gx;
# Оформляем е-мейлы. Этот оператор чувствителен к тексту, на который меняет предыдущие операторы!
s#((?$namechr{0,39})\@(?>$subdom1)(?:\.$subdom1)?\.$zone)(?!(?>[^\s"<]*)(?:"\starget="_blank">|))#$1#gx;
# Оформляем ссылки с IP
s#((?/])$ip(?>$port?))($tail?)#"$1$2"#gx;
print $_;

Содержание раздела