#!/usr/bin/env perl use v5.10; use strict; use warnings; use utf8; use open qw/:std :utf8/; use Getopt::Long; use Pod::Usage; use if $^O eq 'MSWin32', 'Win32::Console::ANSI'; use Term::ANSIColor; use constant { NULL => "\x00", BSON_TYPE => "C", BSON_ENAME => "Z*", BSON_TYPE_NAME => "CZ*", BSON_DOUBLE => "d", BSON_STRING => "l/A", BSON_BOOLEAN => "C", BSON_REGEX => "Z*Z*", BSON_JSCODE => "", BSON_INT32 => "l", BSON_INT64 => "q", BSON_TIMESTAMP => "q", BSON_CODE_W_SCOPE => "l", BSON_REMAINING => 'a*', BSON_SKIP_4_BYTES => 'x4', BSON_OBJECTID => 'a12', BSON_BINARY_TYPE => 'C', BSON_CSTRING => 'Z*', BSON_BYTES => 'a*' }; my $BOLD = $^O eq 'MSWin32' ? "bold " : ""; # minimum field size my %FIELD_SIZES = ( 0x01 => 8, 0x02 => 5, 0x03 => 5, 0x04 => 5, 0x05 => 5, 0x06 => 0, 0x07 => 12, 0x08 => 1, 0x09 => 8, 0x0A => 0, 0x0B => 2, 0x0C => 17, 0x0D => 5, 0x0E => 5, 0x0F => 14, 0x10 => 4, 0x11 => 8, 0x12 => 8, 0x7F => 0, 0xFF => 0, ); sub main { my ( $hex, $file, $help ); GetOptions( "file=s" => \$file, "x" => \$hex, "help|h" => \$help, ) or die("Error in command line args"); pod2usage( { -exitval => 2, -verbose => 2, } ) if $help; if ( $file ) { dump_file($file); } else { dump_stdin($hex); } } sub dump_stdin { my $hex = shift; while ( defined( my $bson = ) ) { chomp $bson; if ( !length($bson) ) { print_error("[ no document ]\n"); next; } # in -x mode, treat leading # as a comment if ( $hex && index( $bson, "#" ) == 0 ) { say $bson; next; } $bson =~ s[ ][]g if $hex; $bson = pack( "H*", $bson ) if $hex; dump_document( \$bson ); print "\n"; } } sub dump_file { my $file = shift; open my $fh, "<", $file; binmode($fh); my $data = do { local $/; <$fh> }; while ( length $data ) { my $len = unpack( BSON_INT32, $data ); my $bson = substr($data,0,$len,''); dump_document(\$bson); print "\n"; } } sub dump_document { my ( $ref, $is_array ) = @_; print $is_array ? " [" : " {" if defined $is_array; dump_header($ref); 1 while dump_field($ref); print_error( " " . unpack( "H*", $$ref ) ) if length($$ref); print $is_array ? " ]" : " }" if defined $is_array; return; } sub dump_header { my ($ref) = @_; my $len = get_length( $ref, 4 ); return unless defined $len; if ( $len < 5 || $len < length($$ref) + 4 ) { print_length( $len, 'red' ); } else { print_length( $len, 'blue' ); } } sub dump_field { my ($ref) = @_; # detect end of document if ( length($$ref) < 2 ) { if ( length($$ref) == 0 ) { print_error(" [missing terminator]"); } else { my $end = substr( $$ref, 0, 1, '' ); print_hex( $end, $end eq NULL ? 'blue' : 'red' ); } return; } # unpack type my $type = unpack( BSON_TYPE, substr( $$ref, 0, 1, '' ) ); if ( !exists $FIELD_SIZES{$type} ) { print_type( $type, 'red' ); return; } print_type($type); # check for key termination my $key_end = index( $$ref, NULL ); return if $key_end == -1; # unpack key my $key = unpack( BSON_CSTRING, substr( $$ref, 0, $key_end + 1, '' ) ); print_key($key); # Check if there is enough data to complete field for this type # This is greedy, so it checks length, not length -1 my $min_size = $FIELD_SIZES{$type}; return if length($$ref) < $min_size; # fields without payload: 0x06, 0x0A, 0x7F, 0xFF return 1 if $min_size == 0; # document or array if ( $type == 0x03 || $type == 0x04 ) { my ($len) = unpack( BSON_INT32, $$ref ); my $doc = substr( $$ref, 0, $len, '' ); dump_document( \$doc, $type == 0x04 ); return 1; } # fixed width fields if ( $type == 0x01 || $type == 0x07 || $type == 0x09 || $type == 0x10 || $type == 0x11 || $type == 0x12 ) { my $len = ( $type == 0x10 ? 4 : $type == 0x07 ? 12 : 8 ); print_hex( substr( $$ref, 0, $len, '' ) ); return 1; } # boolean if ( $type == 0x08 ) { my $bool = substr( $$ref, 0, 1, '' ); print_hex( $bool, ( $bool eq "\x00" || $bool eq "\x01" ) ? 'green' : 'red' ); return 1; } # binary field if ( $type == 0x05 ) { my $len = get_length( $ref, -1 ); my $subtype = substr( $$ref, 0, 1, '' ); if ( !defined($len) ) { print_hex($subtype); return; } my $binary = substr( $$ref, 0, $len, '' ); print_length($len); print_hex($subtype); if ( $subtype eq "\x02" ) { my $bin_len = get_length( \$binary ); if ( !defined($bin_len) ) { print_hex( $binary, 'red' ); return; } if ( $bin_len != length($binary) ) { print_length( $bin_len, 'red' ); print_hex( $binary, 'red' ); return; } } print_hex($binary) if length($binary); return 1; } # string or symbol or code if ( $type == 0x02 || $type == 0x0e || $type == 0x0d ) { my ( $len, $string ) = get_string($ref); return unless defined $len; print_length( $len, 'cyan' ); print_string($string); return 1; } # regex 0x0B if ( $type == 0x0B ) { my ( $pattern, $flag ) = unpack( BSON_CSTRING . BSON_CSTRING, $$ref ); substr( $$ref, 0, length($pattern) + length($flag) + 2, '' ); print_string($pattern); print_string($flag); return 1; } # code with scope 0x0F if ( $type == 0x0F ) { my $len = get_length( $ref, 4 ); return unless defined $len; # len + string + doc minimum size is 4 + 5 + 5 if ( $len < 14 ) { print_length( $len, 'red' ); return; } print_length($len); my $cws = substr( $$ref, 0, $len - 4, '' ); my ( $strlen, $string ) = get_string( \$cws ); if ( !defined $strlen ) { print_hex( $cws, 'red' ); return; } print_length($strlen); print_string($string); dump_document( \$cws, 0 ); return 1; } # dbpointer 0x0C if ( $type == 0x0C ) { my ( $len, $string ) = get_string($ref); return unless defined $len; print_length($len); print_string($string); # Check if there are 12 bytes (plus terminator) or more return if length($$ref) < 13; my $oid = substr( $$ref, 0, 12, '' ); print_hex($oid); return 1; } die "Shouldn't reach here"; } sub get_length { my ( $ref, $adj ) = @_; $adj ||= 0; my $len = unpack( BSON_INT32, substr( $$ref, 0, 4, '' ) ); return unless defined $len; # check if requested length is too long if ( $len < 0 || $len > length($$ref) + $adj ) { print_length( $len, 'red' ); return; } return $len; } sub get_string { my ($ref) = @_; my $len = get_length($ref); return unless defined $len; # len must be at least 1 for trailing 0x00 if ( $len == 0 ) { print_length( $len, 'red' ); return; } my $string = substr( $$ref, 0, $len, '' ); # check if null terminated if ( substr( $string, -1, 1 ) ne NULL ) { print_length($len); print_hex( $string, 'red' ); return; } # remove trailing null chop($string); # try to decode to UTF-8 if ( !utf8::decode($string) ) { print_length($len); print_hex( $string . "\x00", 'red' ); return; } return ( $len, $string ); } sub print_error { my ($text) = @_; print colored( ["${BOLD}red"], $text ); } sub print_type { my ( $type, $color ) = @_; $color ||= 'magenta'; print colored( ["$BOLD$color"], sprintf( " %02x", $type ) ); } sub print_key { my ($string) = @_; print_string( $string, 'yellow' ); } sub print_string { my ( $string, $color ) = @_; $color ||= 'green'; $string =~ s{([^[:graph:]])}{sprintf("\\x%02x",ord($1))}ge; print colored( ["$BOLD$color"], qq[ "$string"] . " 00" ); } sub print_length { my ( $len, $color ) = @_; $color ||= 'cyan'; print colored( ["$BOLD$color"], " " . unpack( "H*", pack( BSON_INT32, $len ) ) ); } sub print_hex { my ( $value, $color ) = @_; $color ||= 'green'; print colored( ["$BOLD$color"], " " . uc( unpack( "H*", $value ) ) ); } main(); __END__ =head1 NAME bsonview - dump a BSON string with color output showing structure =head1 SYNOPSIS cat file.bson | bsondump echo "0500000000" | bsondump -x =head1 OPTIONS -x input is in hex format (default is 0) --help, -h show help =head1 USAGE Reads from C and dumps colored structures to C. =head1 AUTHOR =over 4 =item * David Golden =back =head1 COPYRIGHT AND LICENSE This software is Copyright (c) 2016 by MongoDB, Inc.. This is free software, licensed under: The Apache License, Version 2.0, January 2004 =cut =cut