1 files changed, 134 insertions, 106 deletions
diff --git a/src/backend/utils/mb/Unicode/convutils.pm b/src/backend/utils/mb/Unicode/convutils.pm
index 42b4ffaaef..43cadf5303 100644
--- a/src/backend/utils/mb/Unicode/convutils.pm
+++ b/src/backend/utils/mb/Unicode/convutils.pm
@@ -9,15 +9,15 @@ use strict;
 
 use Exporter 'import';
 
-our @EXPORT = qw( NONE TO_UNICODE FROM_UNICODE BOTH read_source print_conversion_tables);
+our @EXPORT =
+  qw( NONE TO_UNICODE FROM_UNICODE BOTH read_source print_conversion_tables);
 
 # Constants used in the 'direction' field of the character maps
 use constant {
 	NONE         => 0,
 	TO_UNICODE   => 1,
 	FROM_UNICODE => 2,
-	BOTH         => 3
-};
+	BOTH         => 3 };
 
 #######################################################################
 # read_source - common routine to read source file
@@ -36,7 +36,7 @@ sub read_source
 		next if (/^#/);
 		chop;
 
-		next if (/^$/); # Ignore empty lines
+		next if (/^$/);    # Ignore empty lines
 
 		next if (/^0x([0-9A-F]+)\s+(#.*)$/);
 
@@ -49,13 +49,13 @@ sub read_source
 			print STDERR "READ ERROR at line $. in $fname: $_\n";
 			exit;
 		}
-		my $out = {code => hex($1),
-				   ucs => hex($2),
-				   comment => $4,
-				   direction => BOTH,
-				   f => $fname,
-				   l => $.
-				};
+		my $out = {
+			code      => hex($1),
+			ucs       => hex($2),
+			comment   => $4,
+			direction => BOTH,
+			f         => $fname,
+			l         => $. };
 
 		# Ignore pure ASCII mappings. PostgreSQL character conversion code
 		# never even passes these to the conversion code.
@@ -92,8 +92,10 @@ sub print_conversion_tables
 {
 	my ($this_script, $csname, $charset) = @_;
 
-	print_conversion_tables_direction($this_script, $csname, FROM_UNICODE, $charset);
-	print_conversion_tables_direction($this_script, $csname, TO_UNICODE,   $charset);
+	print_conversion_tables_direction($this_script, $csname, FROM_UNICODE,
+		$charset);
+	print_conversion_tables_direction($this_script, $csname, TO_UNICODE,
+		$charset);
 }
 
 #############################################################################
@@ -117,14 +119,14 @@ sub print_conversion_tables_direction
 	my $tblname;
 	if ($direction == TO_UNICODE)
 	{
-		$fname = lc("${csname}_to_utf8.map");
+		$fname   = lc("${csname}_to_utf8.map");
 		$tblname = lc("${csname}_to_unicode_tree");
 
 		print "- Writing ${csname}=>UTF8 conversion table: $fname\n";
 	}
 	else
 	{
-		$fname = lc("utf8_to_${csname}.map");
+		$fname   = lc("utf8_to_${csname}.map");
 		$tblname = lc("${csname}_from_unicode_tree");
 
 		print "- Writing UTF8=>${csname} conversion table: $fname\n";
@@ -135,24 +137,22 @@ sub print_conversion_tables_direction
 	print $out "/* src/backend/utils/mb/Unicode/$fname */\n";
 	print $out "/* This file is generated by $this_script */\n\n";
 
-	# Collect regular, non-combined, mappings, and create the radix tree from them.
+# Collect regular, non-combined, mappings, and create the radix tree from them.
 	my $charmap = &make_charmap($out, $charset, $direction, 0);
 	print_radix_table($out, $tblname, $charmap);
 
-	# Collect combined characters, and create combined character table (if any)
+   # Collect combined characters, and create combined character table (if any)
 	my $charmap_combined = &make_charmap_combined($charset, $direction);
 
 	if (scalar @{$charmap_combined} > 0)
 	{
 		if ($direction == TO_UNICODE)
 		{
-			print_to_utf8_combined_map($out, $csname,
-									   $charmap_combined, 1);
+			print_to_utf8_combined_map($out, $csname, $charmap_combined, 1);
 		}
 		else
 		{
-			print_from_utf8_combined_map($out, $csname,
-										 $charmap_combined, 1);
+			print_from_utf8_combined_map($out, $csname, $charmap_combined, 1);
 		}
 	}
 
@@ -166,14 +166,16 @@ sub print_from_utf8_combined_map
 	my $last_comment = "";
 
 	printf $out "\n/* Combined character map */\n";
-	printf $out "static const pg_utf_to_local_combined ULmap${charset}_combined[ %d ] = {",
+	printf $out
+"static const pg_utf_to_local_combined ULmap${charset}_combined[ %d ] = {",
 	  scalar(@$table);
 	my $first = 1;
-	foreach my $i (sort {$a->{utf8} <=> $b->{utf8}} @$table)
-    {
+	foreach my $i (sort { $a->{utf8} <=> $b->{utf8} } @$table)
+	{
 		print($out ",") if (!$first);
 		$first = 0;
-		print $out "\t/* $last_comment */" if ($verbose && $last_comment ne "");
+		print $out "\t/* $last_comment */"
+		  if ($verbose && $last_comment ne "");
 
 		printf $out "\n  {0x%08x, 0x%08x, 0x%04x}",
 		  $i->{utf8}, $i->{utf8_second}, $i->{code};
@@ -198,15 +200,17 @@ sub print_to_utf8_combined_map
 	my $last_comment = "";
 
 	printf $out "\n/* Combined character map */\n";
-	printf $out "static const pg_local_to_utf_combined LUmap${charset}_combined[ %d ] = {",
+	printf $out
+"static const pg_local_to_utf_combined LUmap${charset}_combined[ %d ] = {",
 	  scalar(@$table);
 
 	my $first = 1;
-	foreach my $i (sort {$a->{code} <=> $b->{code}} @$table)
-    {
+	foreach my $i (sort { $a->{code} <=> $b->{code} } @$table)
+	{
 		print($out ",") if (!$first);
 		$first = 0;
-		print $out "\t/* $last_comment */" if ($verbose && $last_comment ne "");
+		print $out "\t/* $last_comment */"
+		  if ($verbose && $last_comment ne "");
 
 		printf $out "\n  {0x%04x, 0x%08x, 0x%08x}",
 		  $i->{code}, $i->{utf8}, $i->{utf8_second};
@@ -214,7 +218,7 @@ sub print_to_utf8_combined_map
 		if ($verbose >= 2)
 		{
 			$last_comment =
-				sprintf("%s:%d %s", $i->{f}, $i->{l}, $i->{comment});
+			  sprintf("%s:%d %s", $i->{f}, $i->{l}, $i->{comment});
 		}
 		elsif ($verbose >= 1)
 		{
@@ -255,25 +259,25 @@ sub print_radix_table
 		}
 		elsif ($in < 0x10000)
 		{
-			my $b1     = $in >> 8;
-			my $b2     = $in & 0xff;
+			my $b1 = $in >> 8;
+			my $b2 = $in & 0xff;
 
 			$b2map{$b1}{$b2} = $out;
 		}
 		elsif ($in < 0x1000000)
 		{
-			my $b1     = $in >> 16;
-			my $b2     = ($in >> 8) & 0xff;
-			my $b3     = $in & 0xff;
+			my $b1 = $in >> 16;
+			my $b2 = ($in >> 8) & 0xff;
+			my $b3 = $in & 0xff;
 
 			$b3map{$b1}{$b2}{$b3} = $out;
 		}
 		elsif ($in < 0x100000000)
 		{
-			my $b1     = $in >> 24;
-			my $b2     = ($in >> 16) & 0xff;
-			my $b3     = ($in >> 8) & 0xff;
-			my $b4     = $in & 0xff;
+			my $b1 = $in >> 24;
+			my $b2 = ($in >> 16) & 0xff;
+			my $b3 = ($in >> 8) & 0xff;
+			my $b4 = $in & 0xff;
 
 			$b4map{$b1}{$b2}{$b3}{$b4} = $out;
 		}
@@ -309,10 +313,14 @@ sub print_radix_table
 	###
 
 	# Add the segments for the radix trees themselves.
-	push @segments, build_segments_from_tree("Single byte table", "1-byte", 1, \%b1map);
-	push @segments, build_segments_from_tree("Two byte table", "2-byte", 2, \%b2map);
-	push @segments, build_segments_from_tree("Three byte table", "3-byte", 3, \%b3map);
-	push @segments, build_segments_from_tree("Four byte table", "4-byte", 4, \%b4map);
+	push @segments,
+	  build_segments_from_tree("Single byte table", "1-byte", 1, \%b1map);
+	push @segments,
+	  build_segments_from_tree("Two byte table", "2-byte", 2, \%b2map);
+	push @segments,
+	  build_segments_from_tree("Three byte table", "3-byte", 3, \%b3map);
+	push @segments,
+	  build_segments_from_tree("Four byte table", "4-byte", 4, \%b4map);
 
 	###
 	### Find min and max index used in each level of each tree.
@@ -325,23 +333,24 @@ sub print_radix_table
 	my %max_idx;
 	foreach my $seg (@segments)
 	{
-		my $this_min = $min_idx{$seg->{depth}}->{$seg->{level}};
-		my $this_max = $max_idx{$seg->{depth}}->{$seg->{level}};
+		my $this_min = $min_idx{ $seg->{depth} }->{ $seg->{level} };
+		my $this_max = $max_idx{ $seg->{depth} }->{ $seg->{level} };
 
-		foreach my $i (keys %{$seg->{values}})
+		foreach my $i (keys %{ $seg->{values} })
 		{
 			$this_min = $i if (!defined $this_min || $i < $this_min);
 			$this_max = $i if (!defined $this_max || $i > $this_max);
 		}
 
-		$min_idx{$seg->{depth}}{$seg->{level}} = $this_min;
-		$max_idx{$seg->{depth}}{$seg->{level}} = $this_max;
+		$min_idx{ $seg->{depth} }{ $seg->{level} } = $this_min;
+		$max_idx{ $seg->{depth} }{ $seg->{level} } = $this_max;
 	}
+
 	# Copy the mins and max's back to every segment, for convenience.
 	foreach my $seg (@segments)
 	{
-		$seg->{min_idx} = $min_idx{$seg->{depth}}{$seg->{level}};
-		$seg->{max_idx} = $max_idx{$seg->{depth}}{$seg->{level}};
+		$seg->{min_idx} = $min_idx{ $seg->{depth} }{ $seg->{level} };
+		$seg->{max_idx} = $max_idx{ $seg->{depth} }{ $seg->{level} };
 	}
 
 	###
@@ -359,11 +368,10 @@ sub print_radix_table
 		$widest_range = $this_range if ($this_range > $widest_range);
 	}
 
-	unshift @segments, {
-		header => "Dummy map, for invalid values",
+	unshift @segments,
+	  { header  => "Dummy map, for invalid values",
 		min_idx => 0,
-		max_idx => $widest_range
-	};
+		max_idx => $widest_range };
 
 	###
 	### Eliminate overlapping zeros
@@ -378,26 +386,34 @@ sub print_radix_table
 	###
 	for (my $j = 0; $j < $#segments - 1; $j++)
 	{
-		my $seg = $segments[$j];
-		my $nextseg = $segments[$j + 1];
+		my $seg     = $segments[$j];
+		my $nextseg = $segments[ $j + 1 ];
 
 		# Count the number of zero values at the end of this segment.
 		my $this_trail_zeros = 0;
-		for (my $i = $seg->{max_idx}; $i >= $seg->{min_idx} && !$seg->{values}->{$i}; $i--)
+		for (
+			my $i = $seg->{max_idx};
+			$i >= $seg->{min_idx} && !$seg->{values}->{$i};
+			$i--)
 		{
 			$this_trail_zeros++;
 		}
 
 		# Count the number of zeros at the beginning of next segment.
 		my $next_lead_zeros = 0;
-		for (my $i = $nextseg->{min_idx}; $i <= $nextseg->{max_idx} && !$nextseg->{values}->{$i}; $i++)
+		for (
+			my $i = $nextseg->{min_idx};
+			$i <= $nextseg->{max_idx} && !$nextseg->{values}->{$i};
+			$i++)
 		{
 			$next_lead_zeros++;
 		}
 
 		# How many zeros in common?
 		my $overlaid_trail_zeros =
-			($this_trail_zeros > $next_lead_zeros) ? $next_lead_zeros : $this_trail_zeros;
+		  ($this_trail_zeros > $next_lead_zeros)
+		  ? $next_lead_zeros
+		  : $this_trail_zeros;
 
 		$seg->{overlaid_trail_zeros} = $overlaid_trail_zeros;
 		$seg->{max_idx} = $seg->{max_idx} - $overlaid_trail_zeros;
@@ -419,7 +435,7 @@ sub print_radix_table
 	foreach my $seg (@segments)
 	{
 		$seg->{offset} = $flatoff;
-		$segmap{$seg->{label}} = $flatoff;
+		$segmap{ $seg->{label} } = $flatoff;
 		$flatoff += $seg->{max_idx} - $seg->{min_idx} + 1;
 	}
 	my $tblsize = $flatoff;
@@ -427,9 +443,9 @@ sub print_radix_table
 	# Second pass: look up the offset of each label reference in the hash.
 	foreach my $seg (@segments)
 	{
-		while (my ($i, $val) = each %{$seg->{values}})
+		while (my ($i, $val) = each %{ $seg->{values} })
 		{
-			if (!($val =~ /^[0-9,.E]+$/ ))
+			if (!($val =~ /^[0-9,.E]+$/))
 			{
 				my $segoff = $segmap{$val};
 				if ($segoff)
@@ -482,7 +498,7 @@ sub print_radix_table
 	my $max_val = 0;
 	foreach my $seg (@segments)
 	{
-		foreach my $val (values %{$seg->{values}})
+		foreach my $val (values %{ $seg->{values} })
 		{
 			$max_val = $val if ($val > $max_val);
 		}
@@ -498,17 +514,17 @@ sub print_radix_table
 	if ($max_val <= 0xffff)
 	{
 		$vals_per_line = 8;
-		$colwidth = 4;
+		$colwidth      = 4;
 	}
 	elsif ($max_val <= 0xffffff)
 	{
 		$vals_per_line = 4;
-		$colwidth = 6;
+		$colwidth      = 6;
 	}
 	else
 	{
 		$vals_per_line = 4;
-		$colwidth = 8;
+		$colwidth      = 8;
 	}
 
 	###
@@ -529,17 +545,20 @@ sub print_radix_table
 		print $out "  ${tblname}_table,\n";
 	}
 	printf $out "\n";
-	printf $out "  0x%04x, /* offset of table for 1-byte inputs */\n", $b1root;
+	printf $out "  0x%04x, /* offset of table for 1-byte inputs */\n",
+	  $b1root;
 	printf $out "  0x%02x, /* b1_lower */\n", $b1_lower;
 	printf $out "  0x%02x, /* b1_upper */\n", $b1_upper;
 	printf $out "\n";
-	printf $out "  0x%04x, /* offset of table for 2-byte inputs */\n", $b2root;
+	printf $out "  0x%04x, /* offset of table for 2-byte inputs */\n",
+	  $b2root;
 	printf $out "  0x%02x, /* b2_1_lower */\n", $b2_1_lower;
 	printf $out "  0x%02x, /* b2_1_upper */\n", $b2_1_upper;
 	printf $out "  0x%02x, /* b2_2_lower */\n", $b2_2_lower;
 	printf $out "  0x%02x, /* b2_2_upper */\n", $b2_2_upper;
 	printf $out "\n";
-	printf $out "  0x%04x, /* offset of table for 3-byte inputs */\n", $b3root;
+	printf $out "  0x%04x, /* offset of table for 3-byte inputs */\n",
+	  $b3root;
 	printf $out "  0x%02x, /* b3_1_lower */\n", $b3_1_lower;
 	printf $out "  0x%02x, /* b3_1_upper */\n", $b3_1_upper;
 	printf $out "  0x%02x, /* b3_2_lower */\n", $b3_2_lower;
@@ -547,7 +566,8 @@ sub print_radix_table
 	printf $out "  0x%02x, /* b3_3_lower */\n", $b3_3_lower;
 	printf $out "  0x%02x, /* b3_3_upper */\n", $b3_3_upper;
 	printf $out "\n";
-	printf $out "  0x%04x, /* offset of table for 3-byte inputs */\n", $b4root;
+	printf $out "  0x%04x, /* offset of table for 3-byte inputs */\n",
+	  $b4root;
 	printf $out "  0x%02x, /* b4_1_lower */\n", $b4_1_lower;
 	printf $out "  0x%02x, /* b4_1_upper */\n", $b4_1_upper;
 	printf $out "  0x%02x, /* b4_2_lower */\n", $b4_2_lower;
@@ -561,18 +581,21 @@ sub print_radix_table
 	print $out "static const $datatype ${tblname}_table[$tblsize] =\n";
 	print $out "{";
 	my $off = 0;
+
 	foreach my $seg (@segments)
 	{
 		printf $out "\n";
 		printf $out "  /*** %s - offset 0x%05x ***/\n", $seg->{header}, $off;
 		printf $out "\n";
 
-		for (my $i=$seg->{min_idx}; $i <= $seg->{max_idx};)
+		for (my $i = $seg->{min_idx}; $i <= $seg->{max_idx};)
 		{
+
 			# Print the next line's worth of values.
 			# XXX pad to begin at a nice boundary
 			printf $out "  /* %02x */ ", $i;
-			for (my $j = 0; $j < $vals_per_line && $i <= $seg->{max_idx}; $j++)
+			for (my $j = 0;
+				$j < $vals_per_line && $i <= $seg->{max_idx}; $j++)
 			{
 				my $val = $seg->{values}->{$i};
 
@@ -588,7 +611,8 @@ sub print_radix_table
 		}
 		if ($seg->{overlaid_trail_zeros})
 		{
-			printf $out "    /* $seg->{overlaid_trail_zeros} trailing zero values shared with next segment */\n";
+			printf $out
+"    /* $seg->{overlaid_trail_zeros} trailing zero values shared with next segment */\n";
 		}
 	}
 
@@ -607,13 +631,14 @@ sub build_segments_from_tree
 
 	if (%{$map})
 	{
-		@segments = build_segments_recurse($header, $rootlabel, "", 1, $depth, $map);
+		@segments =
+		  build_segments_recurse($header, $rootlabel, "", 1, $depth, $map);
 
 		# Sort the segments into "breadth-first" order. Not strictly required,
 		# but makes the maps nicer to read.
-		@segments = sort { $a->{level} cmp $b->{level} or
-						   $a->{path}  cmp $b->{path}}
-						 @segments;
+		@segments =
+		  sort { $a->{level} cmp $b->{level} or $a->{path} cmp $b->{path} }
+		  @segments;
 	}
 
 	return @segments;
@@ -628,14 +653,13 @@ sub build_segments_recurse
 
 	if ($level == $depth)
 	{
-		push @segments, {
-			header => $header . ", leaf: ${path}xx",
-			label => $label,
-			level => $level,
-			depth => $depth,
-			path => $path,
-			values => $map
-		};
+		push @segments,
+		  { header => $header . ", leaf: ${path}xx",
+			label  => $label,
+			level  => $level,
+			depth  => $depth,
+			path   => $path,
+			values => $map };
 	}
 	else
 	{
@@ -646,19 +670,19 @@ sub build_segments_recurse
 			my $childpath = $path . sprintf("%02x", $i);
 			my $childlabel = "$depth-level-$level-$childpath";
 
-			push @segments, build_segments_recurse($header, $childlabel, $childpath,
-												   $level + 1, $depth, $val);
+			push @segments,
+			  build_segments_recurse($header, $childlabel, $childpath,
+				$level + 1, $depth, $val);
 			$children{$i} = $childlabel;
 		}
 
-		push @segments, {
-			header => $header . ", byte #$level: ${path}xx",
-			label => $label,
-			level => $level,
-			depth => $depth,
-			path => $path,
-			values => \%children
-		};
+		push @segments,
+		  { header => $header . ", byte #$level: ${path}xx",
+			label  => $label,
+			level  => $level,
+			depth  => $depth,
+			path   => $path,
+			values => \%children };
 	}
 	return @segments;
 }
@@ -688,29 +712,31 @@ sub make_charmap
 	my %charmap;
 	foreach my $c (@$charset)
 	{
+
 		# combined characters are handled elsewhere
 		next if (defined $c->{ucs_second});
 
 		next if ($c->{direction} != $direction && $c->{direction} != BOTH);
 
 		my ($src, $dst) =
-			$direction == TO_UNICODE
-			? ($c->{code}, ucs2utf($c->{ucs}))
-			: (ucs2utf($c->{ucs}), $c->{code});
+		  $direction == TO_UNICODE
+		  ? ($c->{code}, ucs2utf($c->{ucs}))
+		  : (ucs2utf($c->{ucs}), $c->{code});
 
 		# check for duplicate source codes
 		if (defined $charmap{$src})
 		{
 			printf STDERR
-				"Error: duplicate source code on %s:%d: 0x%04x => 0x%04x, 0x%04x\n",
-				$c->{f}, $c->{l}, $src, $charmap{$src}, $dst;
+"Error: duplicate source code on %s:%d: 0x%04x => 0x%04x, 0x%04x\n",
+			  $c->{f}, $c->{l}, $src, $charmap{$src}, $dst;
 			exit;
 		}
 		$charmap{$src} = $dst;
 
 		if ($verbose)
 		{
-			printf $out "0x%04x 0x%04x %s:%d %s\n", $src, $dst, $c->{f}, $c->{l}, $c->{comment};
+			printf $out "0x%04x 0x%04x %s:%d %s\n", $src, $dst, $c->{f},
+			  $c->{l}, $c->{comment};
 		}
 	}
 	if ($verbose)
@@ -743,11 +769,13 @@ sub make_charmap_combined
 
 		if (defined $c->{ucs_second})
 		{
-			my $entry = {utf8 => ucs2utf($c->{ucs}),
-						 utf8_second => ucs2utf($c->{ucs_second}),
-						 code => $c->{code},
-						 comment => $c->{comment},
-						 f => $c->{f}, l => $c->{l}};
+			my $entry = {
+				utf8        => ucs2utf($c->{ucs}),
+				utf8_second => ucs2utf($c->{ucs_second}),
+				code        => $c->{code},
+				comment     => $c->{comment},
+				f           => $c->{f},
+				l           => $c->{l} };
 			push @combined, $entry;
 		}
 	}