make_perl_groups.pl 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. #!/usr/bin/perl
  2. # Copyright 2008 The Go Authors. All rights reserved.
  3. # Use of this source code is governed by a BSD-style
  4. # license that can be found in the GO-LICENSE file.
  5. # Modified version of RE2's make_perl_groups.pl.
  6. # Generate table entries giving character ranges
  7. # for POSIX/Perl character classes. Rather than
  8. # figure out what the definition is, it is easier to ask
  9. # Perl about each letter from 0-128 and write down
  10. # its answer.
  11. @posixclasses = (
  12. "[:alnum:]",
  13. "[:alpha:]",
  14. "[:ascii:]",
  15. "[:blank:]",
  16. "[:cntrl:]",
  17. "[:digit:]",
  18. "[:graph:]",
  19. "[:lower:]",
  20. "[:print:]",
  21. "[:punct:]",
  22. "[:space:]",
  23. "[:upper:]",
  24. "[:word:]",
  25. "[:xdigit:]",
  26. );
  27. @perlclasses = (
  28. "\\d",
  29. "\\s",
  30. "\\w",
  31. );
  32. %overrides = (
  33. # Prior to Perl 5.18, \s did not match vertical tab.
  34. # RE2 preserves that original behaviour.
  35. "\\s:11" => 0,
  36. );
  37. sub ComputeClass($) {
  38. my @ranges;
  39. my ($class) = @_;
  40. my $regexp = "[$class]";
  41. my $start = -1;
  42. for (my $i=0; $i<=129; $i++) {
  43. if ($i == 129) { $i = 256; }
  44. if ($i <= 128 && ($overrides{"$class:$i"} // chr($i) =~ $regexp)) {
  45. if ($start < 0) {
  46. $start = $i;
  47. }
  48. } else {
  49. if ($start >= 0) {
  50. push @ranges, [$start, $i-1];
  51. }
  52. $start = -1;
  53. }
  54. }
  55. return @ranges;
  56. }
  57. sub PrintClass($$@) {
  58. my ($cname, $name, @ranges) = @_;
  59. print "var code$cname = []rune{ /* $name */\n";
  60. for (my $i=0; $i<@ranges; $i++) {
  61. my @a = @{$ranges[$i]};
  62. printf "\t0x%x, 0x%x,\n", $a[0], $a[1];
  63. }
  64. print "}\n\n";
  65. my $n = @ranges;
  66. $negname = $name;
  67. if ($negname =~ /:/) {
  68. $negname =~ s/:/:^/;
  69. } else {
  70. $negname =~ y/a-z/A-Z/;
  71. }
  72. return "\t`$name`: {+1, code$cname},\n" .
  73. "\t`$negname`: {-1, code$cname},\n";
  74. }
  75. my $gen = 0;
  76. sub PrintClasses($@) {
  77. my ($cname, @classes) = @_;
  78. my @entries;
  79. foreach my $cl (@classes) {
  80. my @ranges = ComputeClass($cl);
  81. push @entries, PrintClass(++$gen, $cl, @ranges);
  82. }
  83. print "var ${cname}Group = map[string]charGroup{\n";
  84. foreach my $e (@entries) {
  85. print $e;
  86. }
  87. print "}\n";
  88. my $count = @entries;
  89. }
  90. print <<EOF;
  91. // Copyright 2013 The Go Authors. All rights reserved.
  92. // Use of this source code is governed by a BSD-style
  93. // license that can be found in the LICENSE file.
  94. // GENERATED BY make_perl_groups.pl; DO NOT EDIT.
  95. // make_perl_groups.pl >perl_groups.go
  96. package syntax
  97. EOF
  98. PrintClasses("perl", @perlclasses);
  99. PrintClasses("posix", @posixclasses);