[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: Forward: Re: CVS commit: htdocs
> > スクリプトが直接修正するのは恐いので、こないだのようなものを
> > www-changes-jaに流すとよいかなと思っています。
> これは
> > /ja/Changes/index.html
> > ../MailingLists/ should be /MailingLists/
> だと思ってるんですが、
そうですそうです。
> これは翻訳ファイルが追加された時には
>
> /MailingLists/ should be ../MailingLists/
>
> とか出力してくれるんでしょうか?
そのはずだったんですが、今試しに
$ mkdir /ja/MailingLists/; touch /ja/MailingLists/index.html
$ ./urlcheck.pl
してみたら何も表示されませんでした…。あー、スクリプト間違ってる。
ということで直したやつで実験したら、こういうのが表示されました。
$ rm -rf /ja/MailingLists
$ time perl urlcheck.pl
/ja/Documentation/network/index.html
/Misc/feedback.html should be /ja/Misc/feedback.html
/ja/index.html
../index.html should be /ja/index.html
real 1m3.963s
user 0m50.590s
sys 0m2.052s
$
ちゃんと表示してるみたいです。/ja/index.htmlのほうはそのままの
ほうがいいので、これは出ないようにしますけど。
チェックに使うつもりのurlcheck.plと、昨日使ったurlconv.plを
添付しておきます。urlcheck.plをhtdocs/直下で使えば、
上記のような結果が得られると思います。
--
iかわもと よしひさ! kawamoto@es.osaka-u.ac.jp
#!/usr/pkg/bin/perl
use File::Find;
use HTML::Parse;
use HTML::Element;
%jafiles = ();
find(\&wanted, "ja");
for $jafile (sort(keys %jafiles)) {
$msg = '';
$parsed = HTML::Parse::parse_htmlfile("./$jafile");
for (@{ $parsed->extract_links() }) {
$_ = $_->[0];
$url = $_;
if (!/^\w*:/ && !/^$/ && !/^#/ && /^(.*\/)?([^\/#]*)(#.*)?$/) {
$dir = $1;
$file = $2;
$mark = $3;
if (/^\//) {
$_ = "$dir$file";
} else {
$_ = $jafile;
s/\/[^\/]+$/\/$dir$file/;
}
s/\/\.$/\//;
s/(\/\.\.?)$/$1\//;
1 while (s/\/(?!\.\.\/)[^\/]+\/\.\.\//\//);
s/\/$/\/index.html/;
if (/^\/ja\//) {
if ($jafiles{$_}) {
# The link seems to point existing file.
# This is OK.
} else {
# The link points non existent file.
# This is NG.
s/^\/ja//;
s/\/[^\/]+$/\/$file$mark/;
$msg .= "\t$url should be $_\n";
}
} elsif (/^\/[^\/\.]+\.[^\/]+\//) {
# This is probably outside link.
# This is NG.
$msg .= "\t$url should be ../$url\n";
} else {
# This is local link, maybe.
if ($jafiles{"/ja$_"}) {
# There is a translated file.
# This is NG.
s/\/[^\/]+$/\/$file$mark/;
$msg .= "\t$url should be /ja$_\n";
} else {
# There are no traslated files.
# This is OK, or a wrong link.
}
}
}
}
print "$jafile\n$msg" if ($msg);
}
sub wanted {
$jafiles{"/$File::Find::dir/$_"} = 1 if (-f && /\.html$/);
}
#!/usr/pkg/bin/perl
use File::Find;
use File::Path;
use File::Basename;
use HTML::Parse;
use HTML::Element;
%jafiles = ();
find(\&wanted, "ja");
for $jafile (sort(keys %jafiles)) {
@html = ();
open(HTML, "./$jafile") || next;
@html = <HTML>;
close(HTML);
$parsed = HTML::Parse::parse_html(join('', @html));
$msg = '';
@urls = ();
%newurl = ();
for (@{ $parsed->extract_links() }) {
$_ = $_->[0];
$url = $_;
if (!/^\w*:/ && !/^$/ && !/^#/ && /^(.*\/)?([^\/#]*)(#.*)?$/) {
$dir = $1;
$file = $2;
$mark = $3;
if (/^\//) {
$_ = "$dir$file";
} else {
$_ = $jafile;
s/\/[^\/]+$/\/$dir$file/;
}
s/\/\.$/\//;
s/(\/\.\.?)$/$1\//;
1 while (s/\/(?!\.\.\/)[^\/]+\/\.\.\//\//);
s/\/$/\/index.html/;
if (/^\/ja\//) {
if ($jafiles{$_}) {
# The link seems to point existing file.
# This is OK.
} else {
# The link points non existent file.
# This is NG.
s/^\/ja//;
s/\/[^\/]+$/\/$file$mark/;
$msg .= "\t$url should be $_\n";
push(@urls, $url);
$newurl{$url} = $_;
}
} elsif (/^\/[^\/\.]+\.[^\/]+\//) {
# This is probably outside link.
# This is NG.
$msg .= "\t$url should be ../$url\n";
push(@urls, $url);
$newurl{$url} = "../$url";
} else {
# This is local link, maybe.
if ($jafiles{"/ja$_"}) {
# There is a translated file.
# This is NG.
s/\/[^\/]+$/\/$file$mark/;
$msg .= "\t$url should be /ja$_\n";
push(@urls, $url);
$newurl{$url} = "/ja$_";
} else {
# There are no traslated files.
# This is OK, or a wrong link.
}
}
}
}
#print "$jafile\n$msg" if ($msg);
print "$jafile\n" if ($msg);
&mkfile($jafile, @urls) if (@urls);
}
sub wanted {
$jafiles{"/$File::Find::dir/$_"} = 1
if (-f && (/\.html$/ || /\.list$/ ||
/^index.faq$/ || /^post$/ || /^pre$/));
}
sub mkfile {
my ($jafile, @urls) = @_;
$jafile =~ s<^/ja><./ja-new>;
mkpath(dirname($jafile));
open(NEW, "> $jafile") || return;
for (@html) {
shift @urls if (s/(["']\Q$urls[0]\E['"])/"$newurl{$urls[0]}" origlink=$1/);
print NEW "$_";
}
close(NEW);
print "*** WARNING ***: some links are not found: @urls\n" if (@urls);
}