Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | Download | RSS feed

  1. #!/usr/bin/perl -w
  2. # This file is part of LibParserUtils.
  3. # Licensed under the MIT License,
  4. #                http://www.opensource.org/licenses/mit-license.php
  5. # Copyright 2010 Daniel Silverstone <dsilvers@netsurf-browser.org>
  6. #                John-Mark Bell <jmb@netsurf-browser.org>
  7.  
  8. use strict;
  9.  
  10. use constant ALIAS_FILE => 'build/Aliases';
  11. use constant ALIAS_INC  => 'src/charset/aliases.inc';
  12.  
  13. use constant UNICODE_CHARSETS =>
  14.   [
  15.    qr'^ISO-10646-UCS-[24]$',
  16.    qr'^UTF-16',
  17.    qr'^UTF-8$',
  18.    qr'^UTF-32'
  19.   ];
  20.  
  21. open(INFILE, "<", ALIAS_FILE) || die "Unable to open " . ALIAS_FILE;
  22.  
  23. my %charsets;
  24.  
  25. while (my $line = <INFILE>) {
  26.    last unless (defined $line);
  27.    next if ($line =~ /^#/);
  28.    chomp $line;
  29.    next if ($line eq '');
  30.    my @elements = split /\s+/, $line;
  31.    my $canon = shift @elements;
  32.    my $mibenum = shift @elements;
  33.    $charsets{$canon} = [$mibenum, \@elements];
  34. }
  35.  
  36. close(INFILE);
  37.  
  38. my $unicodeexp = "";
  39.  
  40. my $output = <<'EOH';
  41. /*
  42.  * This file is part of LibParserUtils.
  43.  * Licensed under the MIT License,
  44.  *                http://www.opensource.org/licenses/mit-license.php
  45.  * Copyright 2010 The NetSurf Project.
  46.  *
  47.  * Note: This file is automatically generated by make-aliases.pl
  48.  *
  49.  * Do not edit file file, changes will be overwritten during build.
  50.  */
  51.  
  52. static parserutils_charset_aliases_canon canonical_charset_names[] = {
  53. EOH
  54.  
  55. my %aliases;
  56. my $canonnr = 0;
  57. foreach my $canon (sort keys %charsets) {
  58.    my ($mibenum, $elements) = @{$charsets{$canon}};
  59.    # Ordering must match struct in src/charset/aliases.h
  60.    $output .= "\t{ " . $mibenum . ", " . length($canon) . ', "' . $canon . '" },' . "\n";
  61.    my $isunicode = 0;
  62.    foreach my $unirexp (@{UNICODE_CHARSETS()}) {
  63.       $isunicode = 1 if ($canon =~ $unirexp);
  64.    }
  65.    if ($isunicode == 1) {
  66.       $unicodeexp .= "((x) == $mibenum) || ";
  67.    }
  68.    $canon =~ y/A-Z/a-z/;
  69.    $canon =~ s/[^a-z0-9]//g;
  70.    $aliases{$canon} = $canonnr;
  71.    foreach my $alias (@$elements) {
  72.       $alias =~ y/A-Z/a-z/;
  73.       $alias =~ s/[^a-z0-9]//g;
  74.       $aliases{$alias} = $canonnr;
  75.    }
  76.    $canonnr += 1;
  77. }
  78.  
  79. $output .= "};\n\nstatic const uint16_t charset_aliases_canon_count = ${canonnr};\n\n";
  80.  
  81. $output .= <<'EOT';
  82. typedef struct {
  83.         uint16_t name_len;
  84.         const char *name;
  85.         parserutils_charset_aliases_canon *canon;
  86. } parserutils_charset_aliases_alias;
  87.  
  88. static parserutils_charset_aliases_alias charset_aliases[] = {
  89. EOT
  90.  
  91. my $aliascount = 0;
  92.  
  93. foreach my $alias (sort keys %aliases) {
  94.    my $canonnr = $aliases{$alias};
  95.    $output .= "\t{ " . length($alias) . ', "' . $alias . '", &canonical_charset_names[' . $canonnr . "] },\n";
  96.    $aliascount += 1;
  97. }
  98.  
  99. $output .= "};\n\n";
  100.  
  101. # Drop the final " || "
  102. chop $unicodeexp;
  103. chop $unicodeexp;
  104. chop $unicodeexp;
  105. chop $unicodeexp;
  106.  
  107. $output .= <<"EOS";
  108. static const uint16_t charset_aliases_count = ${aliascount};
  109.  
  110. #define MIBENUM_IS_UNICODE(x) ($unicodeexp)
  111. EOS
  112.  
  113. if (open(EXISTING, "<", ALIAS_INC)) {
  114.    local $/ = undef();
  115.    my $now = <EXISTING>;
  116.    undef($output) if ($output eq $now);
  117.    close(EXISTING);
  118. }
  119.  
  120. if (defined($output)) {
  121.    open(OUTF, ">", ALIAS_INC);
  122.    print OUTF $output;
  123.    close(OUTF);
  124. }
  125.