Converting RMVB Files' Content tags' Encoding
I have some RMVB files whose content tags are encoded in GBK, they won't displayed correctly in Freevo. So I wrote this perl script to convert the GBK tags to UTF8.
The script will make a backup of the rmvb file, remember to delete those backup if you don't need them.
Use at your own risk.
User your text editor to save the following to a file, for example "convterRmvbEncode", then "chmod +x convertRmvbEncode"
(can also download the file from here http://www.box.net/shared/it7cq7vgp0)
#!/usr/bin/perl
#this is to convert RMVB file's metadata from GBK to UTF8 encoding
#to use it:
# convRmvbEncode fileName.rmvb
#
#************USE AT YOUR OWN RISK******************
#If the RMVB file has more than one DATA section, the result file
#may not be usable.
#The RMVB file format can be found at:
#http://wiki.multimedia.cx/index.php?title=RealMedia
use strict 'vars';
use Fcntl; #for random file control
use Encode qw/from_to/;
#*****************************************************
my $from_encoding = "GBK"; #old file's encoding
my $to_encoding = "UTF8"; #new file's encoding
#*****************************************************
my $rmvbFile = @ARGV[0];
my ($buffer); #temp variable
#-----------------------------------------------------
#the following to try to read CONT tag
#and covert the CONT tags to UTF8
my $changed_cont = ""; #cont section after coverting to UTF8
my $added_len = -1; #number of chars added to cont
#section after coverting to UTF8
my $num_headers = 0;
{
open(HND_RMVB, "<$rmvbFile") or die "Couldn't open $rmvbFile for overwriting: $!\n"; binmode(HND_RMVB); #--------------------------------------------- #try to read content tag of the rmvb file my ($cont_pos, $cont_size, $cont_ver, $cont_buffer) = (-1, -1, -1, ""); #read first 18 bytes $buffer = ""; read(HND_RMVB, $buffer, 18) == 18 or die "error when read:$!"; my ($obj_id, $obj_size, $obj_version, $file_ver); ($obj_id, $obj_size, $obj_version, $file_ver, $num_headers) = unpack("a4NnNN", $buffer); #quit if not a real media file die "Not a Real Media file" if $obj_id ne ".RMF"; for (my $i = 0; $i < $num_headers; $i ++){ $buffer = ""; read(HND_RMVB, $buffer, 10) == 10 or die "error when read:$!"; ($obj_id, $obj_size, $obj_version) = unpack("a4NH", $buffer); print "Header $i: $obj_id, $obj_size, $obj_version\n"; if ($obj_id eq "CONT") { $cont_buffer = ""; read(HND_RMVB, $cont_buffer, $obj_size - 10 ) == $obj_size - 10 or die "error when read:$!"; $cont_pos = tell(HND_RMVB) - $obj_size; $cont_size = $obj_size; $cont_ver = $obj_version; }else{ #skip other sections seek(HND_RMVB, tell(HND_RMVB) + $obj_size - 10, SEEK_SET); } } die "Couldn't find content tags" if ($cont_size == -1); #------------------------------------------------------- #the following will convert content tag to UTF encoding #------------------------------------------------------ my ($title_len, $title); my ($author_len, $author); my ($copyright_len, $copyright); my ($comment_len, $comment); my $tmp_pos = 0; #read title $title_len = unpack("n", substr($cont_buffer,$tmp_pos, 2)); $tmp_pos += 2; $title = substr($cont_buffer, $tmp_pos, $title_len); $tmp_pos += $title_len; #read author $author_len = unpack("n", substr($cont_buffer,$tmp_pos, 2)); $tmp_pos += 2; $author = substr($cont_buffer, $tmp_pos, $author_len); $tmp_pos += $author_len; #read copyright $copyright_len = unpack("n", substr($cont_buffer,$tmp_pos, 2)); $tmp_pos += 2; $copyright = substr($cont_buffer, $tmp_pos, $copyright_len); $tmp_pos += $copyright_len; #read comment $comment_len = unpack("n", substr($cont_buffer,$tmp_pos, 2)); $tmp_pos += 2; $comment = substr($cont_buffer, $tmp_pos, $comment_len); $tmp_pos += $comment_len; print "------Before Change-------\n"; print "title($title_len)=$title\n"; print "author($author_len)=$author\n"; print "copyright($copyright_len)=$copyright\n"; print "comment($comment_len)=$comment\n"; #convert to UTF8 from_to($title, $from_encoding, $to_encoding); from_to($author, $from_encoding, $to_encoding); from_to($copyright, $from_encoding, $to_encoding); from_to($comment, $from_encoding, $to_encoding); print "------After Change-------\n"; $title_len = length($title); $author_len = length($author); $copyright_len = length($copyright); $comment_len = length($comment); print "title($title_len)=$title\n"; print "author($author_len)=$author\n"; print "copyright($copyright_len)=$copyright\n"; print "comment($comment_len)=$comment\n"; #rebuid CONT section $changed_cont = pack "a4 N n na$title_len na$author_len na$copyright_len na$comment_len", "CONT", 10 + 2 + $title_len + 2 + $author_len + 2 + $copyright_len + 2 + $comment_len, $cont_ver, $title_len, $title, $author_len, $author, $copyright_len, $copyright, $comment_len, $comment ; $added_len = length($changed_cont) - $cont_size; close (HND_RMVB); } #---------------------------------------------------------- #confirm with user if want to make the change print "This Real Media file may have multiple DATA sections, output file may not be usable\n" if ($num_headers > 7);
print "Make the change?(Y/N)\n";
my $user_in=;
$user_in = uc($user_in);
my $newFileCreated = 0;
if (substr($user_in,0,1) ne "Y"){
print "File not changed\n";
}else{
open(HND_RMVB, "<$rmvbFile") or die "Couldn't open $rmvbFile for overwriting: $!\n"; binmode(HND_RMVB); #create a new file open(NEW_RMVB, ">$rmvbFile.new")
or die "error when creating new file:$!";
binmode(NEW_RMVB);
#copy the old file's first 18 bytes
$buffer = "";
read(HND_RMVB, $buffer, 18) == 18
or die "Couldn't read: $!";
print NEW_RMVB $buffer;
my ($obj_id,
$obj_size,
$obj_version,
$file_ver,
$num_headers) = unpack("a4NnNN", $buffer);
#print "$obj_id, $obj_size, $obj_version, $file_ver, $num_headers\n";
for (my $i = 0; $i < $num_headers; $i ++){ $buffer = ""; read(HND_RMVB, $buffer, 10) == 10 or die "error when read:$!"; ($obj_id, $obj_size, $obj_version) = unpack("a4NH", $buffer); #print "Header $i: $obj_id, $obj_size, $obj_version\n"; if ($obj_id eq "CONT") { print NEW_RMVB $changed_cont; #skip old file's CONT section seek(HND_RMVB, tell(HND_RMVB) + $obj_size - 10, SEEK_SET); }elsif ($obj_id eq "PROP"){ #rebuid PROP section #need to recalculate offset of DATA section print NEW_RMVB $buffer; $buffer = ""; read(HND_RMVB, $buffer, $obj_size - 10); my ($maxBit, $avgBit, $lagData, $avgData, $nOfPcks, $duration, $sugNofMs, $offsetIndx, $offsetData, $nOfsteam, $flag) = unpack("NNNNNNNNNnn", $buffer); $buffer = ""; $buffer = pack("NNNNNNNNNnn", $maxBit, $avgBit, $lagData, $avgData, $nOfPcks, $duration, $sugNofMs, $offsetIndx + $added_len, $offsetData + $added_len, $nOfsteam, $flag ); print NEW_RMVB $buffer; }else{ #for other sections, just copy #copy the header already read print NEW_RMVB $buffer; #copy the rest of this section $buffer = ""; read(HND_RMVB, $buffer, $obj_size - 10); print NEW_RMVB $buffer; } } close (HND_RMVB); close (NEW_RMVB); rename($rmvbFile, "$rmvbFile.old") or die "Can't rename: $!"; rename("$rmvbFile.new", "$rmvbFile") or die "Can't rename: $!"; }
The script will make a backup of the rmvb file, remember to delete those backup if you don't need them.
Use at your own risk.
User your text editor to save the following to a file, for example "convterRmvbEncode", then "chmod +x convertRmvbEncode"
(can also download the file from here http://www.box.net/shared/it7cq7vgp0)
#this is to convert RMVB file's metadata from GBK to UTF8 encoding
#to use it:
# convRmvbEncode fileName.rmvb
#
#************USE AT YOUR OWN RISK******************
#If the RMVB file has more than one DATA section, the result file
#may not be usable.
#The RMVB file format can be found at:
#http://wiki.multimedia.cx/index.php?title=RealMedia
use strict 'vars';
use Fcntl; #for random file control
use Encode qw/from_to/;
#*****************************************************
my $from_encoding = "GBK"; #old file's encoding
my $to_encoding = "UTF8"; #new file's encoding
#*****************************************************
my $rmvbFile = @ARGV[0];
my ($buffer); #temp variable
#-----------------------------------------------------
#the following to try to read CONT tag
#and covert the CONT tags to UTF8
my $changed_cont = ""; #cont section after coverting to UTF8
my $added_len = -1; #number of chars added to cont
#section after coverting to UTF8
my $num_headers = 0;
{
open(HND_RMVB, "<$rmvbFile") or die "Couldn't open $rmvbFile for overwriting: $!\n"; binmode(HND_RMVB); #--------------------------------------------- #try to read content tag of the rmvb file my ($cont_pos, $cont_size, $cont_ver, $cont_buffer) = (-1, -1, -1, ""); #read first 18 bytes $buffer = ""; read(HND_RMVB, $buffer, 18) == 18 or die "error when read:$!"; my ($obj_id, $obj_size, $obj_version, $file_ver); ($obj_id, $obj_size, $obj_version, $file_ver, $num_headers) = unpack("a4NnNN", $buffer); #quit if not a real media file die "Not a Real Media file" if $obj_id ne ".RMF"; for (my $i = 0; $i < $num_headers; $i ++){ $buffer = ""; read(HND_RMVB, $buffer, 10) == 10 or die "error when read:$!"; ($obj_id, $obj_size, $obj_version) = unpack("a4NH", $buffer); print "Header $i: $obj_id, $obj_size, $obj_version\n"; if ($obj_id eq "CONT") { $cont_buffer = ""; read(HND_RMVB, $cont_buffer, $obj_size - 10 ) == $obj_size - 10 or die "error when read:$!"; $cont_pos = tell(HND_RMVB) - $obj_size; $cont_size = $obj_size; $cont_ver = $obj_version; }else{ #skip other sections seek(HND_RMVB, tell(HND_RMVB) + $obj_size - 10, SEEK_SET); } } die "Couldn't find content tags" if ($cont_size == -1); #------------------------------------------------------- #the following will convert content tag to UTF encoding #------------------------------------------------------ my ($title_len, $title); my ($author_len, $author); my ($copyright_len, $copyright); my ($comment_len, $comment); my $tmp_pos = 0; #read title $title_len = unpack("n", substr($cont_buffer,$tmp_pos, 2)); $tmp_pos += 2; $title = substr($cont_buffer, $tmp_pos, $title_len); $tmp_pos += $title_len; #read author $author_len = unpack("n", substr($cont_buffer,$tmp_pos, 2)); $tmp_pos += 2; $author = substr($cont_buffer, $tmp_pos, $author_len); $tmp_pos += $author_len; #read copyright $copyright_len = unpack("n", substr($cont_buffer,$tmp_pos, 2)); $tmp_pos += 2; $copyright = substr($cont_buffer, $tmp_pos, $copyright_len); $tmp_pos += $copyright_len; #read comment $comment_len = unpack("n", substr($cont_buffer,$tmp_pos, 2)); $tmp_pos += 2; $comment = substr($cont_buffer, $tmp_pos, $comment_len); $tmp_pos += $comment_len; print "------Before Change-------\n"; print "title($title_len)=$title\n"; print "author($author_len)=$author\n"; print "copyright($copyright_len)=$copyright\n"; print "comment($comment_len)=$comment\n"; #convert to UTF8 from_to($title, $from_encoding, $to_encoding); from_to($author, $from_encoding, $to_encoding); from_to($copyright, $from_encoding, $to_encoding); from_to($comment, $from_encoding, $to_encoding); print "------After Change-------\n"; $title_len = length($title); $author_len = length($author); $copyright_len = length($copyright); $comment_len = length($comment); print "title($title_len)=$title\n"; print "author($author_len)=$author\n"; print "copyright($copyright_len)=$copyright\n"; print "comment($comment_len)=$comment\n"; #rebuid CONT section $changed_cont = pack "a4 N n na$title_len na$author_len na$copyright_len na$comment_len", "CONT", 10 + 2 + $title_len + 2 + $author_len + 2 + $copyright_len + 2 + $comment_len, $cont_ver, $title_len, $title, $author_len, $author, $copyright_len, $copyright, $comment_len, $comment ; $added_len = length($changed_cont) - $cont_size; close (HND_RMVB); } #---------------------------------------------------------- #confirm with user if want to make the change print "This Real Media file may have multiple DATA sections, output file may not be usable\n" if ($num_headers > 7);
print "Make the change?(Y/N)\n";
my $user_in=
$user_in = uc($user_in);
my $newFileCreated = 0;
if (substr($user_in,0,1) ne "Y"){
print "File not changed\n";
}else{
open(HND_RMVB, "<$rmvbFile") or die "Couldn't open $rmvbFile for overwriting: $!\n"; binmode(HND_RMVB); #create a new file open(NEW_RMVB, ">$rmvbFile.new")
or die "error when creating new file:$!";
binmode(NEW_RMVB);
#copy the old file's first 18 bytes
$buffer = "";
read(HND_RMVB, $buffer, 18) == 18
or die "Couldn't read: $!";
print NEW_RMVB $buffer;
my ($obj_id,
$obj_size,
$obj_version,
$file_ver,
$num_headers) = unpack("a4NnNN", $buffer);
#print "$obj_id, $obj_size, $obj_version, $file_ver, $num_headers\n";
for (my $i = 0; $i < $num_headers; $i ++){ $buffer = ""; read(HND_RMVB, $buffer, 10) == 10 or die "error when read:$!"; ($obj_id, $obj_size, $obj_version) = unpack("a4NH", $buffer); #print "Header $i: $obj_id, $obj_size, $obj_version\n"; if ($obj_id eq "CONT") { print NEW_RMVB $changed_cont; #skip old file's CONT section seek(HND_RMVB, tell(HND_RMVB) + $obj_size - 10, SEEK_SET); }elsif ($obj_id eq "PROP"){ #rebuid PROP section #need to recalculate offset of DATA section print NEW_RMVB $buffer; $buffer = ""; read(HND_RMVB, $buffer, $obj_size - 10); my ($maxBit, $avgBit, $lagData, $avgData, $nOfPcks, $duration, $sugNofMs, $offsetIndx, $offsetData, $nOfsteam, $flag) = unpack("NNNNNNNNNnn", $buffer); $buffer = ""; $buffer = pack("NNNNNNNNNnn", $maxBit, $avgBit, $lagData, $avgData, $nOfPcks, $duration, $sugNofMs, $offsetIndx + $added_len, $offsetData + $added_len, $nOfsteam, $flag ); print NEW_RMVB $buffer; }else{ #for other sections, just copy #copy the header already read print NEW_RMVB $buffer; #copy the rest of this section $buffer = ""; read(HND_RMVB, $buffer, $obj_size - 10); print NEW_RMVB $buffer; } } close (HND_RMVB); close (NEW_RMVB); rename($rmvbFile, "$rmvbFile.old") or die "Can't rename: $!"; rename("$rmvbFile.new", "$rmvbFile") or die "Can't rename: $!"; }
Labels: GBK UTF8 RMVB

0 Comments:
Post a Comment
<< Home