diff options
115 files changed, 10102 insertions, 4918 deletions
@@ -18,8 +18,10 @@ TARGETS=$(EBIN_DIR)/rabbit.app $(INCLUDE_DIR)/rabbit_framing.hrl $(BEAM_TARGETS) WEB_URL=http://www.rabbitmq.com/ MANPAGES=$(patsubst %.xml, %.gz, $(wildcard $(DOCS_DIR)/*.[0-9].xml)) WEB_MANPAGES=$(patsubst %.xml, %.man.xml, $(wildcard $(DOCS_DIR)/*.[0-9].xml) $(DOCS_DIR)/rabbitmq-service.xml) -USAGES_XML=$(DOCS_DIR)/rabbitmqctl.1.xml $(DOCS_DIR)/rabbitmq-multi.1.xml +USAGES_XML=$(DOCS_DIR)/rabbitmqctl.1.xml USAGES_ERL=$(foreach XML, $(USAGES_XML), $(call usage_xml_to_erl, $(XML))) +QC_MODULES := rabbit_backing_queue_qc +QC_TRIALS ?= 100 ifeq ($(shell python -c 'import simplejson' 2>/dev/null && echo yes),yes) PYTHON=python @@ -41,12 +43,18 @@ RABBIT_PLT=rabbit.plt ifndef USE_SPECS # our type specs rely on features and bug fixes in dialyzer that are -# only available in R14A upwards (R14A is erts 5.8) -USE_SPECS:=$(shell erl -noshell -eval 'io:format([list_to_integer(X) || X <- string:tokens(erlang:system_info(version), ".")] >= [5,8]), halt().') +# only available in R14B03 upwards (R14B03 is erts 5.8.4) +USE_SPECS:=$(shell erl -noshell -eval 'io:format([list_to_integer(X) || X <- string:tokens(erlang:system_info(version), ".")] >= [5,8,4]), halt().') +endif + +ifndef USE_PROPER_QC +# PropEr needs to be installed for property checking +# http://proper.softlab.ntua.gr/ +USE_PROPER_QC:=$(shell erl -noshell -eval 'io:format({module, proper} =:= code:ensure_loaded(proper)), halt().') endif #other args: +native +"{hipe,[o3,verbose]}" -Ddebug=true +debug_info +no_strict_record_tests -ERLC_OPTS=-I $(INCLUDE_DIR) -o $(EBIN_DIR) -Wall -v +debug_info $(if $(filter true,$(USE_SPECS)),-Duse_specs) +ERLC_OPTS=-I $(INCLUDE_DIR) -o $(EBIN_DIR) -Wall -v +debug_info $(call boolean_macro,$(USE_SPECS),use_specs) $(call boolean_macro,$(USE_PROPER_QC),use_proper_qc) VERSION=0.0.0 TARBALL_NAME=rabbitmq-server-$(VERSION) @@ -69,6 +77,10 @@ define usage_dep $(call usage_xml_to_erl, $(1)): $(1) $(DOCS_DIR)/usage.xsl endef +define boolean_macro +$(if $(filter true,$(1)),-D$(2)) +endef + ifneq "$(SBIN_DIR)" "" ifneq "$(TARGET_DIR)" "" SCRIPTS_REL_PATH=$(shell ./calculate-relative $(TARGET_DIR)/sbin $(SBIN_DIR)) @@ -93,8 +105,8 @@ $(DEPS_FILE): $(SOURCES) $(INCLUDES) rm -f $@ echo $(subst : ,:,$(foreach FILE,$^,$(FILE):)) | escript generate_deps $@ $(EBIN_DIR) -$(EBIN_DIR)/rabbit.app: $(EBIN_DIR)/rabbit_app.in $(BEAM_TARGETS) generate_app - escript generate_app $(EBIN_DIR) $@ < $< +$(EBIN_DIR)/rabbit.app: $(EBIN_DIR)/rabbit_app.in $(SOURCES) generate_app + escript generate_app $< $@ $(SOURCE_DIR) $(EBIN_DIR)/%.beam: $(SOURCE_DIR)/%.erl | $(DEPS_FILE) erlc $(ERLC_OPTS) -pa $(EBIN_DIR) $< @@ -162,7 +174,11 @@ run-node: all ./scripts/rabbitmq-server run-tests: all - echo "rabbit_tests:all_tests()." | $(ERL_CALL) + OUT=$$(echo "rabbit_tests:all_tests()." | $(ERL_CALL)) ; \ + echo $$OUT ; echo $$OUT | grep '^{ok, passed}$$' > /dev/null + +run-qc: all + $(foreach MOD,$(QC_MODULES),./quickcheck $(RABBITMQ_NODENAME) $(MOD) $(QC_TRIALS)) start-background-node: $(BASIC_SCRIPT_ENVIRONMENT_SETTINGS) \ @@ -177,11 +193,11 @@ stop-rabbit-on-node: all echo "rabbit:stop()." | $(ERL_CALL) set-memory-alarm: all - echo "alarm_handler:set_alarm({vm_memory_high_watermark, []})." | \ + echo "alarm_handler:set_alarm({{vm_memory_high_watermark, node()}, []})." | \ $(ERL_CALL) clear-memory-alarm: all - echo "alarm_handler:clear_alarm(vm_memory_high_watermark)." | \ + echo "alarm_handler:clear_alarm({vm_memory_high_watermark, node()})." | \ $(ERL_CALL) stop-node: @@ -222,7 +238,7 @@ srcdist: distclean chmod 0755 $(TARGET_SRC_DIR)/scripts/* (cd dist; tar -zcf $(TARBALL_NAME).tar.gz $(TARBALL_NAME)) - (cd dist; zip -r $(TARBALL_NAME).zip $(TARBALL_NAME)) + (cd dist; zip -q -r $(TARBALL_NAME).zip $(TARBALL_NAME)) rm -rf $(TARGET_SRC_DIR) distclean: clean @@ -233,7 +249,7 @@ distclean: clean # xmlto can not read from standard input, so we mess with a tmp file. %.gz: %.xml $(DOCS_DIR)/examples-to-end.xsl xmlto --version | grep -E '^xmlto version 0\.0\.([0-9]|1[1-8])$$' >/dev/null || opt='--stringparam man.indent.verbatims=0' ; \ - xsltproc $(DOCS_DIR)/examples-to-end.xsl $< > $<.tmp && \ + xsltproc --novalid $(DOCS_DIR)/examples-to-end.xsl $< > $<.tmp && \ xmlto -o $(DOCS_DIR) $$opt man $<.tmp && \ gzip -f $(DOCS_DIR)/`basename $< .xml` rm -f $<.tmp @@ -242,7 +258,7 @@ distclean: clean # Do not fold the cp into previous line, it's there to stop the file being # generated but empty if we fail $(SOURCE_DIR)/%_usage.erl: - xsltproc --stringparam modulename "`basename $@ .erl`" \ + xsltproc --novalid --stringparam modulename "`basename $@ .erl`" \ $(DOCS_DIR)/usage.xsl $< > $@.tmp sed -e 's/"/\\"/g' -e 's/%QUOTE%/"/g' $@.tmp > $@.tmp2 fold -s $@.tmp2 > $@.tmp3 @@ -256,7 +272,7 @@ $(SOURCE_DIR)/%_usage.erl: xmlto xhtml-nochunks `basename $< .xml`.xml ; rm `basename $< .xml`.xml cat `basename $< .xml`.html | \ xsltproc --novalid $(DOCS_DIR)/remove-namespaces.xsl - | \ - xsltproc --stringparam original `basename $<` $(DOCS_DIR)/html-to-website-xml.xsl - | \ + xsltproc --novalid --stringparam original `basename $<` $(DOCS_DIR)/html-to-website-xml.xsl - | \ xmllint --format - > $@ rm `basename $< .xml`.html @@ -268,7 +284,7 @@ install_bin: all install_dirs cp -r ebin include LICENSE LICENSE-MPL-RabbitMQ INSTALL $(TARGET_DIR) chmod 0755 scripts/* - for script in rabbitmq-env rabbitmq-server rabbitmqctl rabbitmq-multi; do \ + for script in rabbitmq-env rabbitmq-server rabbitmqctl; do \ cp scripts/$$script $(TARGET_DIR)/sbin; \ [ -e $(SBIN_DIR)/$$script ] || ln -s $(SCRIPTS_REL_PATH)/$$script $(SBIN_DIR)/$$script; \ done @@ -313,3 +329,4 @@ ifneq "$(strip $(patsubst clean%,,$(patsubst %clean,,$(TESTABLEGOALS))))" "" -include $(DEPS_FILE) endif +.PHONY: run-qc @@ -324,7 +324,7 @@ def genErl(spec): -type(amqp_field_type() :: 'longstr' | 'signedint' | 'decimal' | 'timestamp' | 'table' | 'byte' | 'double' | 'float' | 'long' | - 'short' | 'bool' | 'binary' | 'void'). + 'short' | 'bool' | 'binary' | 'void' | 'array'). -type(amqp_property_type() :: 'shortstr' | 'longstr' | 'octet' | 'shortint' | 'longint' | 'longlongint' | 'timestamp' | 'bit' | 'table'). diff --git a/docs/examples-to-end.xsl b/docs/examples-to-end.xsl index d9686ada..a0a74178 100644 --- a/docs/examples-to-end.xsl +++ b/docs/examples-to-end.xsl @@ -1,9 +1,5 @@ <?xml version='1.0'?> <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" - xmlns:exsl="http://exslt.org/common" - xmlns:ng="http://docbook.org/docbook-ng" - xmlns:db="http://docbook.org/ns/docbook" - exclude-result-prefixes="exsl ng db" version='1.0'> <xsl:output doctype-public="-//OASIS//DTD DocBook XML V4.5//EN" doctype-system="http://www.docbook.org/xml/4.5/docbookx.dtd" /> diff --git a/docs/rabbitmq.conf.5.xml b/docs/rabbitmq-env.conf.5.xml index 31de7164..c887596c 100644 --- a/docs/rabbitmq.conf.5.xml +++ b/docs/rabbitmq-env.conf.5.xml @@ -9,20 +9,20 @@ </refentryinfo> <refmeta> - <refentrytitle>rabbitmq.conf</refentrytitle> + <refentrytitle>rabbitmq-env.conf</refentrytitle> <manvolnum>5</manvolnum> <refmiscinfo class="manual">RabbitMQ Server</refmiscinfo> </refmeta> <refnamediv> - <refname>rabbitmq.conf</refname> + <refname>rabbitmq-env.conf</refname> <refpurpose>default settings for RabbitMQ AMQP server</refpurpose> </refnamediv> <refsect1> <title>Description</title> <para> -<filename>/etc/rabbitmq/rabbitmq.conf</filename> contains variable settings that override the +<filename>/etc/rabbitmq/rabbitmq-env.conf</filename> contains variable settings that override the defaults built in to the RabbitMQ startup scripts. </para> <para> @@ -33,7 +33,7 @@ operator), including line comments starting with "#". </para> <para> In order of preference, the startup scripts get their values from the -environment, from <filename>/etc/rabbitmq/rabbitmq.conf</filename> and finally from the +environment, from <filename>/etc/rabbitmq/rabbitmq-env.conf</filename> and finally from the built-in default values. For example, for the <envar>RABBITMQ_NODENAME</envar> setting, </para> @@ -48,26 +48,26 @@ empty string, then <envar>NODENAME</envar> </para> <para> -from <filename>/etc/rabbitmq/rabbitmq.conf</filename> is checked. If it is also absent +from <filename>/etc/rabbitmq/rabbitmq-env.conf</filename> is checked. If it is also absent or set equal to the empty string then the default value from the startup script is used. </para> <para> -The variable names in /etc/rabbitmq/rabbitmq.conf are always equal to the +The variable names in /etc/rabbitmq/rabbitmq-env.conf are always equal to the environment variable names, with the <envar>RABBITMQ_</envar> prefix removed: <envar>RABBITMQ_NODE_PORT</envar> from the environment becomes <envar>NODE_PORT</envar> in the -<filename>/etc/rabbitmq/rabbitmq.conf</filename> file, etc. +<filename>/etc/rabbitmq/rabbitmq-env.conf</filename> file, etc. </para> <para role="example-prefix">For example:</para> <screen role="example-multiline"> -# I am a complete /etc/rabbitmq/rabbitmq.conf file. +# I am a complete /etc/rabbitmq/rabbitmq-env.conf file. # Comment lines start with a hash character. # This is a /bin/sh script file - use ordinary envt var syntax NODENAME=hare </screen> <para role="example"> This is an example of a complete - <filename>/etc/rabbitmq/rabbitmq.conf</filename> file that overrides the default Erlang + <filename>/etc/rabbitmq/rabbitmq-env.conf</filename> file that overrides the default Erlang node name from "rabbit" to "hare". </para> @@ -76,7 +76,6 @@ NODENAME=hare <refsect1> <title>See also</title> <para> - <citerefentry><refentrytitle>rabbitmq-multi</refentrytitle><manvolnum>1</manvolnum></citerefentry> <citerefentry><refentrytitle>rabbitmq-server</refentrytitle><manvolnum>1</manvolnum></citerefentry> <citerefentry><refentrytitle>rabbitmqctl</refentrytitle><manvolnum>1</manvolnum></citerefentry> </para> diff --git a/docs/rabbitmq-multi.1.xml b/docs/rabbitmq-multi.1.xml deleted file mode 100644 index 6586890a..00000000 --- a/docs/rabbitmq-multi.1.xml +++ /dev/null @@ -1,100 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.docbook.org/xml/4.5/docbookx.dtd"> -<refentry lang="en"> - <refentryinfo> - <productname>RabbitMQ Server</productname> - <authorgroup> - <corpauthor>The RabbitMQ Team <<ulink url="mailto:info@rabbitmq.com"><email>info@rabbitmq.com</email></ulink>></corpauthor> - </authorgroup> - </refentryinfo> - - <refmeta> - <refentrytitle>rabbitmq-multi</refentrytitle> - <manvolnum>1</manvolnum> - <refmiscinfo class="manual">RabbitMQ Server</refmiscinfo> - </refmeta> - - <refnamediv> - <refname>rabbitmq-multi</refname> - <refpurpose>start/stop local cluster RabbitMQ nodes</refpurpose> - </refnamediv> - - <refsynopsisdiv> - <cmdsynopsis> - <command>rabbitmq-multi</command> - <arg choice="req"><replaceable>command</replaceable></arg> - <arg choice="opt" rep="repeat"><replaceable>command options</replaceable></arg> - </cmdsynopsis> - </refsynopsisdiv> - - <refsect1> - <title>Description</title> - <para> - RabbitMQ is an implementation of AMQP, the emerging standard for high -performance enterprise messaging. The RabbitMQ server is a robust and -scalable implementation of an AMQP broker. - </para> - <para> -rabbitmq-multi scripts allows for easy set-up of a cluster on a single -machine. - </para> - </refsect1> - - <refsect1> - <title>Commands</title> - <variablelist> - <varlistentry> - <term><cmdsynopsis><command>start_all</command> <arg choice="req"><replaceable>count</replaceable></arg></cmdsynopsis></term> - <listitem> - <para> -Start count nodes with unique names, listening on all IP addresses and -on sequential ports starting from 5672. - </para> - <para role="example-prefix">For example:</para> - <screen role="example">rabbitmq-multi start_all 3</screen> - <para role="example"> - Starts 3 local RabbitMQ nodes with unique, sequential port numbers. - </para> - </listitem> - </varlistentry> - - <varlistentry> - <term><cmdsynopsis><command>status</command></cmdsynopsis></term> - <listitem> - <para> -Print the status of all running RabbitMQ nodes. - </para> - </listitem> - </varlistentry> - - <varlistentry> - <term><cmdsynopsis><command>stop_all</command></cmdsynopsis></term> - <listitem> - <para> -Stop all local RabbitMQ nodes, - </para> - </listitem> - </varlistentry> - - <varlistentry> - <term><cmdsynopsis><command>rotate_logs</command></cmdsynopsis></term> - <listitem> - <para> -Rotate log files for all local and running RabbitMQ nodes. - </para> - </listitem> - </varlistentry> - - </variablelist> - </refsect1> - - - <refsect1> - <title>See also</title> - <para> - <citerefentry><refentrytitle>rabbitmq.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry> - <citerefentry><refentrytitle>rabbitmq-server</refentrytitle><manvolnum>1</manvolnum></citerefentry> - <citerefentry><refentrytitle>rabbitmqctl</refentrytitle><manvolnum>1</manvolnum></citerefentry> - </para> - </refsect1> -</refentry> diff --git a/docs/rabbitmq-server.1.xml b/docs/rabbitmq-server.1.xml index f161a291..ca63927c 100644 --- a/docs/rabbitmq-server.1.xml +++ b/docs/rabbitmq-server.1.xml @@ -124,8 +124,7 @@ Defaults to 5672. <refsect1> <title>See also</title> <para> - <citerefentry><refentrytitle>rabbitmq.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry> - <citerefentry><refentrytitle>rabbitmq-multi</refentrytitle><manvolnum>1</manvolnum></citerefentry> + <citerefentry><refentrytitle>rabbitmq-env.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry> <citerefentry><refentrytitle>rabbitmqctl</refentrytitle><manvolnum>1</manvolnum></citerefentry> </para> </refsect1> diff --git a/docs/rabbitmqctl.1.xml b/docs/rabbitmqctl.1.xml index bd9fee7d..ee000215 100644 --- a/docs/rabbitmqctl.1.xml +++ b/docs/rabbitmqctl.1.xml @@ -59,6 +59,11 @@ RabbitMQ broker. It performs all actions by connecting to one of the broker's nodes. </para> + <para> + Diagnostic information is displayed if the broker was not + running, could not be reached, or rejected the connection due to + mismatching Erlang cookies. + </para> </refsect1> <refsect1> @@ -158,19 +163,23 @@ </varlistentry> <varlistentry> - <term><cmdsynopsis><command>status</command></cmdsynopsis></term> + <term><cmdsynopsis><command>wait</command></cmdsynopsis></term> <listitem> <para> - Displays various information about the RabbitMQ broker, - such as whether the RabbitMQ application on the current - node, its version number, what nodes are part of the - broker, which of these are running. + Wait for the RabbitMQ application to start. + </para> + <para> + This command will wait for the RabbitMQ application to + start at the node. As long as the Erlang node is up but + the RabbitMQ application is down it will wait + indefinitely. If the node itself goes down, or takes + more than five seconds to come up, it will fail. </para> <para role="example-prefix">For example:</para> - <screen role="example">rabbitmqctl status</screen> + <screen role="example">rabbitmqctl wait</screen> <para role="example"> - This command displays information about the RabbitMQ - broker. + This command will return when the RabbitMQ node has + started up. </para> </listitem> </varlistentry> @@ -355,6 +364,20 @@ </para> </listitem> </varlistentry> + <varlistentry> + <term><cmdsynopsis><command>cluster_status</command></cmdsynopsis></term> + <listitem> + <para> + Displays all the nodes in the cluster grouped by node type, + together with the currently running nodes. + </para> + <para role="example-prefix">For example:</para> + <screen role="example">rabbitmqctl cluster_status</screen> + <para role="example"> + This command displays the nodes in the cluster. + </para> + </listitem> + </varlistentry> </variablelist> </refsect2> @@ -490,17 +513,22 @@ </varlistentry> <varlistentry> - <term><cmdsynopsis><command>set_admin</command> <arg choice="req"><replaceable>username</replaceable></arg></cmdsynopsis></term> + <term><cmdsynopsis><command>set_user_tags</command> <arg choice="req"><replaceable>username</replaceable></arg> <arg choice="req"><replaceable>tag</replaceable> ...</arg></cmdsynopsis></term> <listitem> <variablelist> <varlistentry> <term>username</term> - <listitem><para>The name of the user whose administrative - status is to be set.</para></listitem> + <listitem><para>The name of the user whose tags are to + be set.</para></listitem> + </varlistentry> + <varlistentry> + <term>tag</term> + <listitem><para>Zero, one or more tags to set. Any + existing tags will be removed.</para></listitem> </varlistentry> </variablelist> <para role="example-prefix">For example:</para> - <screen role="example">rabbitmqctl set_admin tonyg</screen> + <screen role="example">rabbitmqctl set_user_tags tonyg administrator</screen> <para role="example"> This command instructs the RabbitMQ broker to ensure the user named <command>tonyg</command> is an administrator. This has no @@ -509,24 +537,10 @@ user logs in via some other means (for example with the management plugin). </para> - </listitem> - </varlistentry> - - <varlistentry> - <term><cmdsynopsis><command>clear_admin</command> <arg choice="req"><replaceable>username</replaceable></arg></cmdsynopsis></term> - <listitem> - <variablelist> - <varlistentry> - <term>username</term> - <listitem><para>The name of the user whose administrative - status is to be cleared.</para></listitem> - </varlistentry> - </variablelist> - <para role="example-prefix">For example:</para> - <screen role="example">rabbitmqctl clear_admin tonyg</screen> + <screen role="example">rabbitmqctl set_user_tags tonyg</screen> <para role="example"> - This command instructs the RabbitMQ broker to ensure the user - named <command>tonyg</command> is not an administrator. + This command instructs the RabbitMQ broker to remove any + tags from the user named <command>tonyg</command>. </para> </listitem> </varlistentry> @@ -602,14 +616,35 @@ </listitem> </varlistentry> - <varlistentry> - <term><cmdsynopsis><command>list_vhosts</command></cmdsynopsis></term> + <varlistentry role="usage-has-option-list"> + <term><cmdsynopsis><command>list_vhosts</command> <arg choice="opt" role="usage-option-list"><replaceable>vhostinfoitem</replaceable> ...</arg></cmdsynopsis></term> <listitem> <para> Lists virtual hosts. </para> + <para> + The <command>vhostinfoitem</command> parameter is used to indicate which + virtual host information items to include in the results. The column order in the + results will match the order of the parameters. + <command>vhostinfoitem</command> can take any value from + the list that follows: + </para> + <variablelist> + <varlistentry> + <term>name</term> + <listitem><para>The name of the virtual host with non-ASCII characters escaped as in C.</para></listitem> + </varlistentry> + <varlistentry> + <term>tracing</term> + <listitem><para>Whether tracing is enabled for this virtual host.</para></listitem> + </varlistentry> + </variablelist> + <para> + If no <command>vhostinfoitem</command>s are specified + then the vhost name is displayed. + </para> <para role="example-prefix">For example:</para> - <screen role="example">rabbitmqctl list_vhosts</screen> + <screen role="example">rabbitmqctl list_vhosts name tracing</screen> <para role="example"> This command instructs the RabbitMQ broker to list all virtual hosts. @@ -1165,6 +1200,10 @@ <listitem><para>True if the channel is in transactional mode, false otherwise.</para></listitem> </varlistentry> <varlistentry> + <term>confirm</term> + <listitem><para>True if the channel is in confirm mode, false otherwise.</para></listitem> + </varlistentry> + <varlistentry> <term>consumer_count</term> <listitem><para>Number of logical AMQP consumers retrieving messages via the channel.</para></listitem> @@ -1175,11 +1214,22 @@ yet acknowledged.</para></listitem> </varlistentry> <varlistentry> + <term>messages_uncommitted</term> + <listitem><para>Number of messages received in an as yet + uncommitted transaction.</para></listitem> + </varlistentry> + <varlistentry> <term>acks_uncommitted</term> <listitem><para>Number of acknowledgements received in an as yet uncommitted transaction.</para></listitem> </varlistentry> <varlistentry> + <term>messages_unconfirmed</term> + <listitem><para>Number of published messages not yet + confirmed. On channels not in confirm mode, this + remains 0.</para></listitem> + </varlistentry> + <varlistentry> <term>prefetch_count</term> <listitem><para>QoS prefetch count limit in force, 0 if unlimited.</para></listitem> </varlistentry> @@ -1191,21 +1241,10 @@ messages to the channel's consumers. </para></listitem> </varlistentry> - <varlistentry> - <term>confirm</term> - <listitem><para>True if the channel is in confirm mode, false otherwise.</para></listitem> - </varlistentry> - <varlistentry> - <term>messages_unconfirmed</term> - <listitem><para>Number of published messages not yet - confirmed. On channels not in confirm mode, this - remains 0.</para></listitem> - </varlistentry> </variablelist> <para> If no <command>channelinfoitem</command>s are specified then pid, - user, transactional, consumer_count, and - messages_unacknowledged are assumed. + user, consumer_count, and messages_unacknowledged are assumed. </para> <para role="example-prefix"> @@ -1220,7 +1259,7 @@ </varlistentry> <varlistentry> - <term><cmdsynopsis><command>list_consumers</command></cmdsynopsis></term> + <term><cmdsynopsis><command>list_consumers</command> <arg choice="opt">-p <replaceable>vhostpath</replaceable></arg></cmdsynopsis></term> <listitem> <para> List consumers, i.e. subscriptions to a queue's message @@ -1232,14 +1271,98 @@ indicating whether acknowledgements are expected for messages delivered to this consumer. </para> - <para role="usage"> - The output format for "list_consumers" is a list of rows containing, - in order, the queue name, channel process id, consumer tag, and a - boolean indicating whether acknowledgements are expected from the - consumer. + <para> + The output is a list of rows containing, in order, the queue name, + channel process id, consumer tag, and a boolean indicating whether + acknowledgements are expected from the consumer. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><cmdsynopsis><command>status</command></cmdsynopsis></term> + <listitem> + <para> + Displays broker status information such as the running + applications on the current Erlang node, RabbitMQ and + Erlang versions and OS name. (See + the <command>cluster_status</command> command to find + out which nodes are clustered and running.) + </para> + <para role="example-prefix">For example:</para> + <screen role="example">rabbitmqctl status</screen> + <para role="example"> + This command displays information about the RabbitMQ + broker. </para> </listitem> </varlistentry> + + <varlistentry> + <term><cmdsynopsis><command>environment</command></cmdsynopsis></term> + <listitem> + <para> + Display the name and value of each variable in the + application environment. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><cmdsynopsis><command>report</command></cmdsynopsis></term> + <listitem> + <para> + Generate a server status report containing a + concatenation of all server status information for + support purposes. The output should be redirected to a + file when accompanying a support request. + </para> + <para role="example-prefix"> + For example: + </para> + <screen role="example">rabbitmqctl report > server_report.txt</screen> + <para role="example"> + This command creates a server report which may be + attached to a support request email. + </para> + </listitem> + </varlistentry> + </variablelist> + </refsect2> + + <refsect2> + <title>Message Tracing</title> + <variablelist> + <varlistentry> + <term><cmdsynopsis><command>trace_on</command> <arg choice="opt">-p <replaceable>vhost</replaceable></arg></cmdsynopsis></term> + <listitem> + <variablelist> + <varlistentry> + <term>vhost</term> + <listitem><para>The name of the virtual host for which to start tracing.</para></listitem> + </varlistentry> + </variablelist> + <para> + Starts tracing. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><cmdsynopsis><command>trace_off</command> <arg choice="opt">-p <replaceable>vhost</replaceable></arg></cmdsynopsis></term> + <listitem> + <variablelist> + <varlistentry> + <term>vhost</term> + <listitem><para>The name of the virtual host for which to stop tracing.</para></listitem> + </varlistentry> + </variablelist> + <para> + Stops tracing. + </para> + </listitem> + </varlistentry> + </variablelist> </refsect2> </refsect1> diff --git a/docs/usage.xsl b/docs/usage.xsl index a6cebd93..586f8303 100644 --- a/docs/usage.xsl +++ b/docs/usage.xsl @@ -1,9 +1,5 @@ <?xml version='1.0'?> <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" - xmlns:exsl="http://exslt.org/common" - xmlns:ng="http://docbook.org/docbook-ng" - xmlns:db="http://docbook.org/ns/docbook" - exclude-result-prefixes="exsl" version='1.0'> <xsl:param name="modulename"/> diff --git a/ebin/rabbit_app.in b/ebin/rabbit_app.in index cc7221d6..65a3269a 100644 --- a/ebin/rabbit_app.in +++ b/ebin/rabbit_app.in @@ -20,18 +20,26 @@ {vm_memory_high_watermark, 0.4}, {msg_store_index_module, rabbit_msg_store_ets_index}, {backing_queue_module, rabbit_variable_queue}, - {persister_max_wrap_entries, 500}, - {persister_hibernate_after, 10000}, + {frame_max, 131072}, {msg_store_file_size_limit, 16777216}, {queue_index_max_journal_entries, 262144}, {default_user, <<"guest">>}, {default_pass, <<"guest">>}, - {default_user_is_admin, true}, + {default_user_tags, [administrator]}, {default_vhost, <<"/">>}, {default_permissions, [<<".*">>, <<".*">>, <<".*">>]}, {cluster_nodes, []}, {server_properties, []}, {collect_statistics, none}, + {collect_statistics_interval, 5000}, {auth_mechanisms, ['PLAIN', 'AMQPLAIN']}, {auth_backends, [rabbit_auth_backend_internal]}, - {delegate_count, 16}]}]}. + {delegate_count, 16}, + {trace_vhosts, []}, + {tcp_listen_options, [binary, + {packet, raw}, + {reuseaddr, true}, + {backlog, 128}, + {nodelay, true}, + {exit_on_close, false}]} + ]}]}. diff --git a/generate_app b/generate_app index 576b485e..fb0eb1ea 100644 --- a/generate_app +++ b/generate_app @@ -1,12 +1,16 @@ #!/usr/bin/env escript %% -*- erlang -*- -main([BeamDir, TargetFile]) -> - Modules = [list_to_atom(filename:basename(F, ".beam")) || - F <- filelib:wildcard("*.beam", BeamDir)], - {ok, {application, Application, Properties}} = io:read(''), - NewProperties = lists:keyreplace(modules, 1, Properties, - {modules, Modules}), +main([InFile, OutFile | SrcDirs]) -> + Modules = [list_to_atom(filename:basename(F, ".erl")) || + SrcDir <- SrcDirs, + F <- filelib:wildcard("*.erl", SrcDir)], + {ok, [{application, Application, Properties}]} = file:consult(InFile), + NewProperties = + case proplists:get_value(modules, Properties) of + [] -> lists:keyreplace(modules, 1, Properties, {modules, Modules}); + _ -> Properties + end, file:write_file( - TargetFile, + OutFile, io_lib:format("~p.~n", [{application, Application, NewProperties}])). diff --git a/include/gm_specs.hrl b/include/gm_specs.hrl new file mode 100644 index 00000000..ee29706e --- /dev/null +++ b/include/gm_specs.hrl @@ -0,0 +1,28 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is VMware, Inc. +%% Copyright (c) 2007-2011 VMware, Inc. All rights reserved. +%% + +-ifdef(use_specs). + +-type(callback_result() :: 'ok' | {'stop', any()} | {'become', atom(), args()}). +-type(args() :: any()). +-type(members() :: [pid()]). + +-spec(joined/2 :: (args(), members()) -> callback_result()). +-spec(members_changed/3 :: (args(), members(), members()) -> callback_result()). +-spec(handle_msg/3 :: (args(), pid(), any()) -> callback_result()). +-spec(terminate/2 :: (args(), term()) -> any()). + +-endif. diff --git a/include/rabbit.hrl b/include/rabbit.hrl index 15f5d7c5..ac6399c6 100644 --- a/include/rabbit.hrl +++ b/include/rabbit.hrl @@ -15,12 +15,12 @@ %% -record(user, {username, - is_admin, + tags, auth_backend, %% Module this user came from impl %% Scratch space for that module }). --record(internal_user, {username, password_hash, is_admin}). +-record(internal_user, {username, password_hash, tags}). -record(permission, {configure, write, read}). -record(user_vhost, {username, virtual_host}). -record(user_permission, {user_vhost, permission}). @@ -28,7 +28,7 @@ -record(vhost, {virtual_host, dummy}). -record(connection, {protocol, user, timeout_sec, frame_max, vhost, - client_properties}). + client_properties, capabilities}). -record(content, {class_id, @@ -42,10 +42,12 @@ -record(resource, {virtual_host, kind, name}). --record(exchange, {name, type, durable, auto_delete, internal, arguments}). +-record(exchange, {name, type, durable, auto_delete, internal, arguments, + scratch}). +-record(exchange_serial, {name, next}). -record(amqqueue, {name, durable, auto_delete, exclusive_owner = none, - arguments, pid}). + arguments, pid, slave_pids, mirror_nodes}). %% mnesia doesn't like unary records, so we add a dummy 'value' field -record(route, {binding, value = const}). @@ -54,14 +56,19 @@ -record(binding, {source, key, destination, args = []}). -record(reverse_binding, {destination, key, source, args = []}). +-record(topic_trie_edge, {trie_edge, node_id}). +-record(topic_trie_binding, {trie_binding, value = const}). + +-record(trie_edge, {exchange_name, node_id, word}). +-record(trie_binding, {exchange_name, node_id, destination}). + -record(listener, {node, protocol, host, ip_address, port}). --record(basic_message, {exchange_name, routing_key, content, guid, +-record(basic_message, {exchange_name, routing_keys = [], content, id, is_persistent}). -record(ssl_socket, {tcp, ssl}). --record(delivery, {mandatory, immediate, txn, sender, message, - msg_seq_no}). +-record(delivery, {mandatory, immediate, sender, message, msg_seq_no}). -record(amqp_error, {name, explanation = "", method = none}). -record(event, {type, props, timestamp}). @@ -79,7 +86,9 @@ -define(HIBERNATE_AFTER_MIN, 1000). -define(DESIRED_HIBERNATE, 10000). --define(STATS_INTERVAL, 5000). + +-define(ROUTING_HEADERS, [<<"CC">>, <<"BCC">>]). +-define(DELETED_HEADER, <<"BCC">>). -ifdef(debug). -define(LOGDEBUG0(F), rabbit_log:debug(F)). diff --git a/include/rabbit_auth_backend_spec.hrl b/include/rabbit_auth_backend_spec.hrl index e26d44ea..803bb75c 100644 --- a/include/rabbit_auth_backend_spec.hrl +++ b/include/rabbit_auth_backend_spec.hrl @@ -22,8 +22,7 @@ {'ok', rabbit_types:user()} | {'refused', string(), [any()]} | {'error', any()}). --spec(check_vhost_access/3 :: (rabbit_types:user(), rabbit_types:vhost(), - rabbit_access_control:vhost_permission_atom()) -> +-spec(check_vhost_access/2 :: (rabbit_types:user(), rabbit_types:vhost()) -> boolean() | {'error', any()}). -spec(check_resource_access/3 :: (rabbit_types:user(), rabbit_types:r(atom()), diff --git a/include/rabbit_auth_mechanism_spec.hrl b/include/rabbit_auth_mechanism_spec.hrl index 49614d5f..614a3eed 100644 --- a/include/rabbit_auth_mechanism_spec.hrl +++ b/include/rabbit_auth_mechanism_spec.hrl @@ -17,6 +17,7 @@ -ifdef(use_specs). -spec(description/0 :: () -> [{atom(), any()}]). +-spec(should_offer/1 :: (rabbit_net:socket()) -> boolean()). -spec(init/1 :: (rabbit_net:socket()) -> any()). -spec(handle_response/2 :: (binary(), any()) -> {'ok', rabbit_types:user()} | diff --git a/include/rabbit_backing_queue_spec.hrl b/include/rabbit_backing_queue_spec.hrl index accb2c0e..ee102f5e 100644 --- a/include/rabbit_backing_queue_spec.hrl +++ b/include/rabbit_backing_queue_spec.hrl @@ -25,43 +25,44 @@ -type(message_properties_transformer() :: fun ((rabbit_types:message_properties()) -> rabbit_types:message_properties())). +-type(async_callback() :: fun ((atom(), fun ((atom(), state()) -> state())) -> 'ok')). -spec(start/1 :: ([rabbit_amqqueue:name()]) -> 'ok'). -spec(stop/0 :: () -> 'ok'). --spec(init/3 :: (rabbit_amqqueue:name(), is_durable(), attempt_recovery()) -> - state()). --spec(terminate/1 :: (state()) -> state()). --spec(delete_and_terminate/1 :: (state()) -> state()). +-spec(init/3 :: (rabbit_types:amqqueue(), attempt_recovery(), + async_callback()) -> state()). +-spec(terminate/2 :: (any(), state()) -> state()). +-spec(delete_and_terminate/2 :: (any(), state()) -> state()). -spec(purge/1 :: (state()) -> {purged_msg_count(), state()}). --spec(publish/3 :: (rabbit_types:basic_message(), - rabbit_types:message_properties(), state()) -> state()). --spec(publish_delivered/4 :: (true, rabbit_types:basic_message(), - rabbit_types:message_properties(), state()) +-spec(publish/4 :: (rabbit_types:basic_message(), + rabbit_types:message_properties(), pid(), state()) -> + state()). +-spec(publish_delivered/5 :: (true, rabbit_types:basic_message(), + rabbit_types:message_properties(), pid(), state()) -> {ack(), state()}; (false, rabbit_types:basic_message(), - rabbit_types:message_properties(), state()) + rabbit_types:message_properties(), pid(), state()) -> {undefined, state()}). +-spec(drain_confirmed/1 :: (state()) -> {[rabbit_guid:guid()], state()}). -spec(dropwhile/2 :: (fun ((rabbit_types:message_properties()) -> boolean()), state()) -> state()). -spec(fetch/2 :: (true, state()) -> {fetch_result(ack()), state()}; (false, state()) -> {fetch_result(undefined), state()}). --spec(ack/2 :: ([ack()], state()) -> state()). --spec(tx_publish/4 :: (rabbit_types:txn(), rabbit_types:basic_message(), - rabbit_types:message_properties(), state()) -> state()). --spec(tx_ack/3 :: (rabbit_types:txn(), [ack()], state()) -> state()). --spec(tx_rollback/2 :: (rabbit_types:txn(), state()) -> {[ack()], state()}). --spec(tx_commit/4 :: - (rabbit_types:txn(), fun (() -> any()), - message_properties_transformer(), state()) -> {[ack()], state()}). +-spec(ack/2 :: ([ack()], state()) -> {[rabbit_guid:guid()], state()}). -spec(requeue/3 :: ([ack()], message_properties_transformer(), state()) - -> state()). + -> {[rabbit_guid:guid()], state()}). -spec(len/1 :: (state()) -> non_neg_integer()). -spec(is_empty/1 :: (state()) -> boolean()). -spec(set_ram_duration_target/2 :: (('undefined' | 'infinity' | number()), state()) -> state()). -spec(ram_duration/1 :: (state()) -> {number(), state()}). --spec(needs_idle_timeout/1 :: (state()) -> boolean()). --spec(idle_timeout/1 :: (state()) -> state()). +-spec(needs_timeout/1 :: (state()) -> 'false' | 'timed' | 'idle'). +-spec(timeout/1 :: (state()) -> state()). -spec(handle_pre_hibernate/1 :: (state()) -> state()). -spec(status/1 :: (state()) -> [{atom(), any()}]). +-spec(invoke/3 :: (atom(), fun ((atom(), A) -> A), state()) -> state()). +-spec(is_duplicate/2 :: + (rabbit_types:basic_message(), state()) -> + {'false'|'published'|'discarded', state()}). +-spec(discard/3 :: (rabbit_types:basic_message(), pid(), state()) -> state()). diff --git a/include/rabbit_exchange_type_spec.hrl b/include/rabbit_exchange_type_spec.hrl index 45c475d8..f6283ef7 100644 --- a/include/rabbit_exchange_type_spec.hrl +++ b/include/rabbit_exchange_type_spec.hrl @@ -16,18 +16,20 @@ -ifdef(use_specs). +-type(tx() :: 'transaction' | 'none'). +-type(serial() :: pos_integer() | tx()). + -spec(description/0 :: () -> [{atom(), any()}]). +-spec(serialise_events/0 :: () -> boolean()). -spec(route/2 :: (rabbit_types:exchange(), rabbit_types:delivery()) -> rabbit_router:match_result()). -spec(validate/1 :: (rabbit_types:exchange()) -> 'ok'). --spec(create/2 :: (boolean(), rabbit_types:exchange()) -> 'ok'). --spec(recover/2 :: (rabbit_types:exchange(), - [rabbit_types:binding()]) -> 'ok'). --spec(delete/3 :: (boolean(), rabbit_types:exchange(), +-spec(create/2 :: (tx(), rabbit_types:exchange()) -> 'ok'). +-spec(delete/3 :: (tx(), rabbit_types:exchange(), [rabbit_types:binding()]) -> 'ok'). --spec(add_binding/3 :: (boolean(), rabbit_types:exchange(), +-spec(add_binding/3 :: (serial(), rabbit_types:exchange(), rabbit_types:binding()) -> 'ok'). --spec(remove_bindings/3 :: (boolean(), rabbit_types:exchange(), +-spec(remove_bindings/3 :: (serial(), rabbit_types:exchange(), [rabbit_types:binding()]) -> 'ok'). -spec(assert_args_equivalence/2 :: (rabbit_types:exchange(), rabbit_framing:amqp_table()) diff --git a/include/rabbit_msg_store.hrl b/include/rabbit_msg_store.hrl index 9d704f65..e9150a97 100644 --- a/include/rabbit_msg_store.hrl +++ b/include/rabbit_msg_store.hrl @@ -22,5 +22,4 @@ -endif. --record(msg_location, - {guid, ref_count, file, offset, total_size}). +-record(msg_location, {msg_id, ref_count, file, offset, total_size}). diff --git a/include/rabbit_msg_store_index.hrl b/include/rabbit_msg_store_index.hrl index 289f8f60..2ae5b000 100644 --- a/include/rabbit_msg_store_index.hrl +++ b/include/rabbit_msg_store_index.hrl @@ -29,13 +29,13 @@ -spec(new/1 :: (dir()) -> index_state()). -spec(recover/1 :: (dir()) -> rabbit_types:ok_or_error2(index_state(), any())). -spec(lookup/2 :: - (rabbit_guid:guid(), index_state()) -> ('not_found' | keyvalue())). + (rabbit_types:msg_id(), index_state()) -> ('not_found' | keyvalue())). -spec(insert/2 :: (keyvalue(), index_state()) -> 'ok'). -spec(update/2 :: (keyvalue(), index_state()) -> 'ok'). --spec(update_fields/3 :: (rabbit_guid:guid(), ({fieldpos(), fieldvalue()} | - [{fieldpos(), fieldvalue()}]), +-spec(update_fields/3 :: (rabbit_types:msg_id(), ({fieldpos(), fieldvalue()} | + [{fieldpos(), fieldvalue()}]), index_state()) -> 'ok'). --spec(delete/2 :: (rabbit_guid:guid(), index_state()) -> 'ok'). +-spec(delete/2 :: (rabbit_types:msg_id(), index_state()) -> 'ok'). -spec(delete_object/2 :: (keyvalue(), index_state()) -> 'ok'). -spec(delete_by_file/2 :: (fieldvalue(), index_state()) -> 'ok'). -spec(terminate/1 :: (index_state()) -> any()). diff --git a/packaging/RPMS/Fedora/Makefile b/packaging/RPMS/Fedora/Makefile index 74a1800a..c67d8fd6 100644 --- a/packaging/RPMS/Fedora/Makefile +++ b/packaging/RPMS/Fedora/Makefile @@ -12,7 +12,7 @@ ifndef RPM_OS RPM_OS=fedora endif -ifeq "x$(RPM_OS)" "xsuse" +ifeq "$(RPM_OS)" "suse" REQUIRES=/sbin/chkconfig /sbin/service OS_DEFINES=--define '_initrddir /etc/init.d' --define 'dist .suse' else @@ -31,9 +31,13 @@ prepare: cp ${COMMON_DIR}/* SOURCES/ sed -i \ - -e 's|^DEFAULTS_FILE=.*$$|DEFAULTS_FILE=/etc/sysconfig/rabbitmq|' \ -e 's|^LOCK_FILE=.*$$|LOCK_FILE=/var/lock/subsys/$$NAME|' \ SOURCES/rabbitmq-server.init +ifeq "$(RPM_OS)" "fedora" +# Fedora says that only vital services should have Default-Start + sed -i -e '/^# Default-Start:/d;/^# Default-Stop:/d' \ + SOURCES/rabbitmq-server.init +endif sed -i -e 's|@SU_RABBITMQ_SH_C@|su rabbitmq -s /bin/sh -c|' \ SOURCES/rabbitmq-script-wrapper cp rabbitmq-server.logrotate SOURCES/rabbitmq-server.logrotate @@ -41,5 +45,5 @@ prepare: server: prepare rpmbuild -ba --nodeps SPECS/rabbitmq-server.spec $(DEFINES) $(OS_DEFINES) -clean: +clean: rm -rf SOURCES SPECS RPMS SRPMS BUILD tmp diff --git a/packaging/RPMS/Fedora/rabbitmq-server.spec b/packaging/RPMS/Fedora/rabbitmq-server.spec index 47316864..ffc826eb 100644 --- a/packaging/RPMS/Fedora/rabbitmq-server.spec +++ b/packaging/RPMS/Fedora/rabbitmq-server.spec @@ -55,7 +55,6 @@ mkdir -p %{buildroot}%{_localstatedir}/log/rabbitmq install -p -D -m 0755 %{S:1} %{buildroot}%{_initrddir}/rabbitmq-server install -p -D -m 0755 %{_rabbit_wrapper} %{buildroot}%{_sbindir}/rabbitmqctl install -p -D -m 0755 %{_rabbit_wrapper} %{buildroot}%{_sbindir}/rabbitmq-server -install -p -D -m 0755 %{_rabbit_wrapper} %{buildroot}%{_sbindir}/rabbitmq-multi install -p -D -m 0755 %{_rabbit_server_ocf} %{buildroot}%{_exec_prefix}/lib/ocf/resource.d/rabbitmq/rabbitmq-server install -p -D -m 0644 %{S:3} %{buildroot}%{_sysconfdir}/logrotate.d/rabbitmq-server @@ -65,12 +64,8 @@ mkdir -p %{buildroot}%{_sysconfdir}/rabbitmq rm %{_maindir}/LICENSE %{_maindir}/LICENSE-MPL-RabbitMQ %{_maindir}/INSTALL #Build the list of files -rm -f %{_builddir}/%{name}.files -echo '%defattr(-,root,root, -)' >> %{_builddir}/%{name}.files -(cd %{buildroot}; \ - find . -type f ! -regex '\.%{_sysconfdir}.*' \ - ! -regex '\.\(%{_rabbit_erllibdir}\|%{_rabbit_libdir}\).*' \ - | sed -e 's/^\.//' >> %{_builddir}/%{name}.files) +echo '%defattr(-,root,root, -)' >%{_builddir}/%{name}.files +find %{buildroot} -path %{buildroot}%{_sysconfdir} -prune -o '!' -type d -printf "/%%P\n" >>%{_builddir}/%{name}.files %pre @@ -92,6 +87,9 @@ fi %post /sbin/chkconfig --add %{name} +if [ -f %{_sysconfdir}/rabbitmq/rabbitmq.conf ] && [ ! -f %{_sysconfdir}/rabbitmq/rabbitmq-env.conf ]; then + mv %{_sysconfdir}/rabbitmq/rabbitmq.conf %{_sysconfdir}/rabbitmq/rabbitmq-env.conf +fi %preun if [ $1 = 0 ]; then @@ -114,8 +112,6 @@ done %attr(0750, rabbitmq, rabbitmq) %dir %{_localstatedir}/lib/rabbitmq %attr(0750, rabbitmq, rabbitmq) %dir %{_localstatedir}/log/rabbitmq %dir %{_sysconfdir}/rabbitmq -%{_rabbit_erllibdir} -%{_rabbit_libdir} %{_initrddir}/rabbitmq-server %config(noreplace) %{_sysconfdir}/logrotate.d/rabbitmq-server %doc LICENSE LICENSE-MPL-RabbitMQ @@ -124,6 +120,15 @@ done rm -rf %{buildroot} %changelog +* Thu Jun 9 2011 jerryk@vmware.com 2.5.0-1 +- New Upstream Release + +* Thu Apr 7 2011 Alexandru Scvortov <alexandru@rabbitmq.com> 2.4.1-1 +- New Upstream Release + +* Tue Mar 22 2011 Alexandru Scvortov <alexandru@rabbitmq.com> 2.4.0-1 +- New Upstream Release + * Thu Feb 3 2011 simon@rabbitmq.com 2.3.1-1 - New Upstream Release diff --git a/packaging/common/rabbitmq-server.init b/packaging/common/rabbitmq-server.init index 39d23983..d8a7a94d 100644 --- a/packaging/common/rabbitmq-server.init +++ b/packaging/common/rabbitmq-server.init @@ -10,97 +10,110 @@ # Provides: rabbitmq-server # Required-Start: $remote_fs $network # Required-Stop: $remote_fs $network -# Default-Start: -# Default-Stop: +# Default-Start: 3 4 5 +# Default-Stop: 0 1 2 6 # Description: RabbitMQ broker # Short-Description: Enable AMQP service provided by RabbitMQ broker ### END INIT INFO PATH=/sbin:/usr/sbin:/bin:/usr/bin -DAEMON=/usr/sbin/rabbitmq-multi NAME=rabbitmq-server +DAEMON=/usr/sbin/${NAME} +CONTROL=/usr/sbin/rabbitmqctl DESC=rabbitmq-server USER=rabbitmq -NODE_COUNT=1 ROTATE_SUFFIX= INIT_LOG_DIR=/var/log/rabbitmq -DEFAULTS_FILE= # This is filled in when building packages LOCK_FILE= # This is filled in when building packages test -x $DAEMON || exit 0 - -# Include rabbitmq defaults if available -if [ -f "$DEFAULTS_FILE" ] ; then - . $DEFAULTS_FILE -fi +test -x $CONTROL || exit 0 RETVAL=0 set -e start_rabbitmq () { - set +e - $DAEMON start_all ${NODE_COUNT} > ${INIT_LOG_DIR}/startup_log 2> ${INIT_LOG_DIR}/startup_err - case "$?" in - 0) - echo SUCCESS - [ -n "$LOCK_FILE" ] && touch $LOCK_FILE + status_rabbitmq quiet + if [ $RETVAL = 0 ] ; then + echo RabbitMQ is currently running + else RETVAL=0 - ;; - 1) - echo TIMEOUT - check ${INIT_LOG_DIR}/startup_\{log,err\} - RETVAL=1 - ;; - *) - echo FAILED - check ${INIT_LOG_DIR}/startup_log, _err - RETVAL=1 - ;; - esac - set -e + set +e + setsid sh -c "$DAEMON > ${INIT_LOG_DIR}/startup_log \ + 2> ${INIT_LOG_DIR}/startup_err" & + $CONTROL wait >/dev/null 2>&1 + RETVAL=$? + set -e + case "$RETVAL" in + 0) + echo SUCCESS + if [ -n "$LOCK_FILE" ] ; then + touch $LOCK_FILE + fi + ;; + *) + echo FAILED - check ${INIT_LOG_DIR}/startup_\{log, _err\} + RETVAL=1 + ;; + esac + fi } stop_rabbitmq () { - set +e status_rabbitmq quiet if [ $RETVAL = 0 ] ; then - $DAEMON stop_all > ${INIT_LOG_DIR}/shutdown_log 2> ${INIT_LOG_DIR}/shutdown_err + set +e + $CONTROL stop > ${INIT_LOG_DIR}/shutdown_log 2> ${INIT_LOG_DIR}/shutdown_err RETVAL=$? + set -e if [ $RETVAL = 0 ] ; then - [ -n "$LOCK_FILE" ] && rm -rf $LOCK_FILE + if [ -n "$LOCK_FILE" ] ; then + rm -f $LOCK_FILE + fi else echo FAILED - check ${INIT_LOG_DIR}/shutdown_log, _err fi else - echo No nodes running + echo RabbitMQ is not running RETVAL=0 fi - set -e } status_rabbitmq() { set +e if [ "$1" != "quiet" ] ; then - $DAEMON status 2>&1 + $CONTROL status 2>&1 else - $DAEMON status > /dev/null 2>&1 + $CONTROL status > /dev/null 2>&1 fi if [ $? != 0 ] ; then - RETVAL=1 + RETVAL=3 fi set -e } rotate_logs_rabbitmq() { set +e - $DAEMON rotate_logs ${ROTATE_SUFFIX} + $CONTROL rotate_logs ${ROTATE_SUFFIX} if [ $? != 0 ] ; then RETVAL=1 fi set -e } +restart_running_rabbitmq () { + status_rabbitmq quiet + if [ $RETVAL = 0 ] ; then + restart_rabbitmq + else + echo RabbitMQ is not runnning + RETVAL=0 + fi +} + restart_rabbitmq() { - stop_rabbitmq + stop_rabbitmq start_rabbitmq } @@ -122,11 +135,16 @@ case "$1" in echo -n "Rotating log files for $DESC: " rotate_logs_rabbitmq ;; - force-reload|reload|restart|condrestart|try-restart) + force-reload|reload|restart) echo -n "Restarting $DESC: " restart_rabbitmq echo "$NAME." ;; + try-restart) + echo -n "Restarting $DESC: " + restart_running_rabbitmq + echo "$NAME." + ;; *) echo "Usage: $0 {start|stop|status|rotate-logs|restart|condrestart|try-restart|reload|force-reload}" >&2 RETVAL=1 diff --git a/packaging/common/rabbitmq-server.ocf b/packaging/common/rabbitmq-server.ocf index dc0521dd..d58c48ed 100755 --- a/packaging/common/rabbitmq-server.ocf +++ b/packaging/common/rabbitmq-server.ocf @@ -20,7 +20,7 @@ ## ## OCF instance parameters -## OCF_RESKEY_multi +## OCF_RESKEY_server ## OCF_RESKEY_ctl ## OCF_RESKEY_nodename ## OCF_RESKEY_ip @@ -38,11 +38,11 @@ ####################################################################### -OCF_RESKEY_multi_default="/usr/sbin/rabbitmq-multi" +OCF_RESKEY_server_default="/usr/sbin/rabbitmq-server" OCF_RESKEY_ctl_default="/usr/sbin/rabbitmqctl" OCF_RESKEY_nodename_default="rabbit@localhost" OCF_RESKEY_log_base_default="/var/log/rabbitmq" -: ${OCF_RESKEY_multi=${OCF_RESKEY_multi_default}} +: ${OCF_RESKEY_server=${OCF_RESKEY_server_default}} : ${OCF_RESKEY_ctl=${OCF_RESKEY_ctl_default}} : ${OCF_RESKEY_nodename=${OCF_RESKEY_nodename_default}} : ${OCF_RESKEY_log_base=${OCF_RESKEY_log_base_default}} @@ -61,12 +61,12 @@ Resource agent for RabbitMQ-server <shortdesc lang="en">Resource agent for RabbitMQ-server</shortdesc> <parameters> -<parameter name="multi" unique="0" required="0"> +<parameter name="server" unique="0" required="0"> <longdesc lang="en"> -The path to the rabbitmq-multi script +The path to the rabbitmq-server script </longdesc> -<shortdesc lang="en">Path to rabbitmq-multi</shortdesc> -<content type="string" default="${OCF_RESKEY_multi_default}" /> +<shortdesc lang="en">Path to rabbitmq-server</shortdesc> +<content type="string" default="${OCF_RESKEY_server_default}" /> </parameter> <parameter name="ctl" unique="0" required="0"> @@ -103,9 +103,9 @@ The IP Port for rabbitmq-server to listen on <parameter name="config_file" unique="0" required="0"> <longdesc lang="en"> -Location of the config file +Location of the config file (without the .config suffix) </longdesc> -<shortdesc lang="en">Config file path</shortdesc> +<shortdesc lang="en">Config file path (without the .config suffix)</shortdesc> <content type="string" default="" /> </parameter> @@ -155,7 +155,7 @@ Expects to have a fully populated OCF RA-compliant environment set. END } -RABBITMQ_MULTI=$OCF_RESKEY_multi +RABBITMQ_SERVER=$OCF_RESKEY_server RABBITMQ_CTL=$OCF_RESKEY_ctl RABBITMQ_NODENAME=$OCF_RESKEY_nodename RABBITMQ_NODE_IP_ADDRESS=$OCF_RESKEY_ip @@ -177,8 +177,8 @@ export_vars() { } rabbit_validate_partial() { - if [ ! -x $RABBITMQ_MULTI ]; then - ocf_log err "rabbitmq-server multi $RABBITMQ_MULTI does not exist or is not executable"; + if [ ! -x $RABBITMQ_SERVER ]; then + ocf_log err "rabbitmq-server server $RABBITMQ_SERVER does not exist or is not executable"; exit $OCF_ERR_INSTALLED; fi @@ -189,8 +189,8 @@ rabbit_validate_partial() { } rabbit_validate_full() { - if [ ! -z $RABBITMQ_CONFIG_FILE ] && [ ! -e $RABBITMQ_CONFIG_FILE ]; then - ocf_log err "rabbitmq-server config_file $RABBITMQ_CONFIG_FILE does not exist or is not a file"; + if [ ! -z $RABBITMQ_CONFIG_FILE ] && [ ! -e "${RABBITMQ_CONFIG_FILE}.config" ]; then + ocf_log err "rabbitmq-server config_file ${RABBITMQ_CONFIG_FILE}.config does not exist or is not a file"; exit $OCF_ERR_INSTALLED; fi @@ -210,8 +210,18 @@ rabbit_validate_full() { } rabbit_status() { + rabbitmqctl_action "status" +} + +rabbit_wait() { + rabbitmqctl_action "wait" +} + +rabbitmqctl_action() { local rc - $RABBITMQ_CTL $NODENAME_ARG status > /dev/null 2> /dev/null + local action + action=$1 + $RABBITMQ_CTL $NODENAME_ARG $action > /dev/null 2> /dev/null rc=$? case "$rc" in 0) @@ -223,7 +233,7 @@ rabbit_status() { return $OCF_NOT_RUNNING ;; *) - ocf_log err "Unexpected return from rabbitmqctl $NODENAME_ARG status: $rc" + ocf_log err "Unexpected return from rabbitmqctl $NODENAME_ARG $action: $rc" exit $OCF_ERR_GENERIC esac } @@ -238,28 +248,16 @@ rabbit_start() { export_vars - $RABBITMQ_MULTI start_all 1 > ${RABBITMQ_LOG_BASE}/startup_log 2> ${RABBITMQ_LOG_BASE}/startup_err & - rc=$? - - if [ "$rc" != 0 ]; then - ocf_log err "rabbitmq-server start command failed: $RABBITMQ_MULTI start_all 1, $rc" - return $rc - fi + setsid sh -c "$RABBITMQ_SERVER > ${RABBITMQ_LOG_BASE}/startup_log 2> ${RABBITMQ_LOG_BASE}/startup_err" & - # Spin waiting for the server to come up. + # Wait for the server to come up. # Let the CRM/LRM time us out if required - start_wait=1 - while [ $start_wait = 1 ]; do - rabbit_status - rc=$? - if [ "$rc" = $OCF_SUCCESS ]; then - start_wait=0 - elif [ "$rc" != $OCF_NOT_RUNNING ]; then - ocf_log info "rabbitmq-server start failed: $rc" - exit $OCF_ERR_GENERIC - fi - sleep 1 - done + rabbit_wait + rc=$? + if [ "$rc" != $OCF_SUCCESS ]; then + ocf_log info "rabbitmq-server start failed: $rc" + exit $OCF_ERR_GENERIC + fi return $OCF_SUCCESS } @@ -272,11 +270,11 @@ rabbit_stop() { return $OCF_SUCCESS fi - $RABBITMQ_MULTI stop_all & + $RABBITMQ_CTL stop rc=$? if [ "$rc" != 0 ]; then - ocf_log err "rabbitmq-server stop command failed: $RABBITMQ_MULTI stop_all, $rc" + ocf_log err "rabbitmq-server stop command failed: $RABBITMQ_CTL stop, $rc" return $rc fi diff --git a/packaging/debs/Debian/Makefile b/packaging/debs/Debian/Makefile index ab05f732..38c81134 100644 --- a/packaging/debs/Debian/Makefile +++ b/packaging/debs/Debian/Makefile @@ -19,12 +19,15 @@ all: package: clean cp $(TARBALL_DIR)/$(TARBALL) $(DEBIAN_ORIG_TARBALL) - tar -zxvf $(DEBIAN_ORIG_TARBALL) + tar -zxf $(DEBIAN_ORIG_TARBALL) cp -r debian $(UNPACKED_DIR) cp $(COMMON_DIR)/* $(UNPACKED_DIR)/debian/ +# Debian and descendants differ from most other distros in that +# runlevel 2 should start network services. sed -i \ - -e 's|^DEFAULTS_FILE=.*$$|DEFAULTS_FILE=/etc/default/rabbitmq|' \ -e 's|^LOCK_FILE=.*$$|LOCK_FILE=|' \ + -e 's|^\(# Default-Start:\).*$$|\1 2 3 4 5|' \ + -e 's|^\(# Default-Stop:\).*$$|\1 0 1 6|' \ $(UNPACKED_DIR)/debian/rabbitmq-server.init sed -i -e 's|@SU_RABBITMQ_SH_C@|su rabbitmq -s /bin/sh -c|' \ $(UNPACKED_DIR)/debian/rabbitmq-script-wrapper diff --git a/packaging/debs/Debian/debian/changelog b/packaging/debs/Debian/debian/changelog index 12165dc0..1cab4235 100644 --- a/packaging/debs/Debian/debian/changelog +++ b/packaging/debs/Debian/debian/changelog @@ -1,3 +1,21 @@ +rabbitmq-server (2.5.0-1) lucid; urgency=low + + * New Upstream Release + + -- <jerryk@vmware.com> Thu, 09 Jun 2011 07:20:29 -0700 + +rabbitmq-server (2.4.1-1) lucid; urgency=low + + * New Upstream Release + + -- Alexandru Scvortov <alexandru@rabbitmq.com> Thu, 07 Apr 2011 16:49:22 +0100 + +rabbitmq-server (2.4.0-1) lucid; urgency=low + + * New Upstream Release + + -- Alexandru Scvortov <alexandru@rabbitmq.com> Tue, 22 Mar 2011 17:34:31 +0000 + rabbitmq-server (2.3.1-1) lucid; urgency=low * New Upstream Release diff --git a/packaging/debs/Debian/debian/control b/packaging/debs/Debian/debian/control index 02da0cc6..45f5c5c4 100644 --- a/packaging/debs/Debian/debian/control +++ b/packaging/debs/Debian/debian/control @@ -7,10 +7,7 @@ Standards-Version: 3.8.0 Package: rabbitmq-server Architecture: all -# erlang-inets is not a strict dependency, but it's needed to allow -# the installation of plugins that use mochiweb. Ideally it would be a -# "Recommends" instead, but gdebi does not install those. -Depends: erlang-base (>= 1:12.b.3) | erlang-base-hipe (>= 1:12.b.3), erlang-ssl | erlang-nox (<< 1:13.b-dfsg1-1), erlang-os-mon | erlang-nox (<< 1:13.b-dfsg1-1), erlang-mnesia | erlang-nox (<< 1:13.b-dfsg1-1), erlang-inets | erlang-nox (<< 1:13.b-dfsg1-1), adduser, logrotate, ${misc:Depends} +Depends: erlang-nox (>= 1:12.b.3), adduser, logrotate, ${misc:Depends} Description: An AMQP server written in Erlang RabbitMQ is an implementation of AMQP, the emerging standard for high performance enterprise messaging. The RabbitMQ server is a robust and diff --git a/packaging/debs/Debian/debian/postinst b/packaging/debs/Debian/debian/postinst index 134f16ee..b11340ef 100644 --- a/packaging/debs/Debian/debian/postinst +++ b/packaging/debs/Debian/debian/postinst @@ -35,6 +35,10 @@ chown -R rabbitmq:rabbitmq /var/log/rabbitmq case "$1" in configure) + if [ -f /etc/rabbitmq/rabbitmq.conf ] && \ + [ ! -f /etc/rabbitmq/rabbitmq-env.conf ]; then + mv /etc/rabbitmq/rabbitmq.conf /etc/rabbitmq/rabbitmq-env.conf + fi ;; abort-upgrade|abort-remove|abort-deconfigure) diff --git a/packaging/debs/Debian/debian/rules b/packaging/debs/Debian/debian/rules index 6b6df33b..a785b292 100644 --- a/packaging/debs/Debian/debian/rules +++ b/packaging/debs/Debian/debian/rules @@ -14,7 +14,7 @@ DOCDIR=$(DEB_DESTDIR)usr/share/doc/rabbitmq-server/ install/rabbitmq-server:: mkdir -p $(DOCDIR) rm $(RABBIT_LIB)LICENSE* $(RABBIT_LIB)INSTALL* - for script in rabbitmqctl rabbitmq-server rabbitmq-multi; do \ + for script in rabbitmqctl rabbitmq-server; do \ install -p -D -m 0755 debian/rabbitmq-script-wrapper $(DEB_DESTDIR)usr/sbin/$$script; \ done sed -e 's|@RABBIT_LIB@|/usr/lib/rabbitmq/lib/rabbitmq_server-$(DEB_UPSTREAM_VERSION)|g' <debian/postrm.in >debian/postrm diff --git a/packaging/generic-unix/Makefile b/packaging/generic-unix/Makefile index c4e01f4a..b5c342aa 100644 --- a/packaging/generic-unix/Makefile +++ b/packaging/generic-unix/Makefile @@ -4,7 +4,7 @@ TARGET_DIR=rabbitmq_server-$(VERSION) TARGET_TARBALL=rabbitmq-server-generic-unix-$(VERSION) dist: - tar -zxvf ../../dist/$(SOURCE_DIR).tar.gz + tar -zxf ../../dist/$(SOURCE_DIR).tar.gz $(MAKE) -C $(SOURCE_DIR) \ TARGET_DIR=`pwd`/$(TARGET_DIR) \ diff --git a/packaging/macports/Portfile.in b/packaging/macports/Portfile.in index f8417b83..4a866305 100644 --- a/packaging/macports/Portfile.in +++ b/packaging/macports/Portfile.in @@ -5,7 +5,7 @@ PortSystem 1.0 name rabbitmq-server version @VERSION@ categories net -maintainers paperplanes.de:meyer rabbitmq.com:tonyg openmaintainer +maintainers paperplanes.de:meyer openmaintainer platforms darwin supported_archs noarch @@ -23,12 +23,12 @@ distfiles ${name}-${version}${extract.suffix} \ ${name}-generic-unix-${version}${extract.suffix} checksums \ - ${name}-${version}${extract.suffix} md5 @md5-src@ \ - ${name}-${version}${extract.suffix} sha1 @sha1-src@ \ - ${name}-${version}${extract.suffix} rmd160 @rmd160-src@ \ - ${name}-generic-unix-${version}${extract.suffix} md5 @md5-bin@ \ - ${name}-generic-unix-${version}${extract.suffix} sha1 @sha1-bin@ \ - ${name}-generic-unix-${version}${extract.suffix} rmd160 @rmd160-bin@ + ${name}-${version}${extract.suffix} \ + sha1 @sha1-src@ \ + rmd160 @rmd160-src@ \ + ${name}-generic-unix-${version}${extract.suffix} \ + sha1 @sha1-bin@ \ + rmd160 @rmd160-bin@ depends_lib port:erlang depends_build port:libxslt @@ -81,31 +81,28 @@ post-destroot { xinstall -d -g [existsgroup ${servergroup}] -m 775 ${destroot}${serverhome} xinstall -d -g [existsgroup ${servergroup}] -m 775 ${destroot}${mnesiadbdir} - reinplace -E "s:(/etc/rabbitmq/rabbitmq.conf):${prefix}\\1:g" \ + reinplace -E "s:(/etc/rabbitmq/rabbitmq):${prefix}\\1:g" \ ${realsbin}/rabbitmq-env - foreach var {CONFIG_FILE LOG_BASE MNESIA_BASE PIDS_FILE} { + foreach var {CONFIG_FILE LOG_BASE MNESIA_BASE} { reinplace -E "s:^($var)=/:\\1=${prefix}/:" \ - ${realsbin}/rabbitmq-multi \ ${realsbin}/rabbitmq-server \ ${realsbin}/rabbitmqctl } xinstall -m 555 ${filespath}/rabbitmq-script-wrapper \ - ${wrappersbin}/rabbitmq-multi + ${wrappersbin}/rabbitmq-server reinplace -E "s:MACPORTS_PREFIX/bin:${prefix}/bin:" \ - ${wrappersbin}/rabbitmq-multi + ${wrappersbin}/rabbitmq-server reinplace -E "s:/usr/lib/rabbitmq/bin/:${prefix}/lib/rabbitmq/bin/:" \ - ${wrappersbin}/rabbitmq-multi + ${wrappersbin}/rabbitmq-server reinplace -E "s:/var/lib/rabbitmq:${prefix}/var/lib/rabbitmq:" \ - ${wrappersbin}/rabbitmq-multi - file copy ${wrappersbin}/rabbitmq-multi ${wrappersbin}/rabbitmq-server - file copy ${wrappersbin}/rabbitmq-multi ${wrappersbin}/rabbitmqctl - - file copy ${mansrc}/man1/rabbitmq-multi.1.gz ${mandest}/man1/ - file copy ${mansrc}/man1/rabbitmq-server.1.gz ${mandest}/man1/ - file copy ${mansrc}/man1/rabbitmqctl.1.gz ${mandest}/man1/ - file copy ${mansrc}/man5/rabbitmq.conf.5.gz ${mandest}/man5/ + ${wrappersbin}/rabbitmq-server + file copy ${wrappersbin}/rabbitmq-server ${wrappersbin}/rabbitmqctl + + xinstall -m 644 -W ${mansrc}/man1 rabbitmq-server.1.gz rabbitmqctl.1.gz \ + ${mandest}/man1/ + xinstall -m 644 -W ${mansrc}/man5 rabbitmq-env.conf.5.gz ${mandest}/man5/ } pre-install { diff --git a/packaging/macports/make-checksums.sh b/packaging/macports/make-checksums.sh index 11424dfc..891de6ba 100755 --- a/packaging/macports/make-checksums.sh +++ b/packaging/macports/make-checksums.sh @@ -6,7 +6,7 @@ for type in src bin do tarball_var=tarball_${type} tarball=${!tarball_var} - for algo in md5 sha1 rmd160 + for algo in sha1 rmd160 do checksum=$(openssl $algo ${tarball} | awk '{print $NF}') echo "s|@$algo-$type@|$checksum|g" diff --git a/packaging/windows-exe/Makefile b/packaging/windows-exe/Makefile index 59803f9c..ab50e30b 100644 --- a/packaging/windows-exe/Makefile +++ b/packaging/windows-exe/Makefile @@ -2,7 +2,7 @@ VERSION=0.0.0 ZIP=../windows/rabbitmq-server-windows-$(VERSION) dist: rabbitmq-$(VERSION).nsi rabbitmq_server-$(VERSION) - makensis rabbitmq-$(VERSION).nsi + makensis -V2 rabbitmq-$(VERSION).nsi rabbitmq-$(VERSION).nsi: rabbitmq_nsi.in sed \ @@ -10,7 +10,7 @@ rabbitmq-$(VERSION).nsi: rabbitmq_nsi.in $< > $@ rabbitmq_server-$(VERSION): - unzip $(ZIP) + unzip -q $(ZIP) clean: rm -rf rabbitmq-*.nsi rabbitmq_server-* rabbitmq-server-*.exe diff --git a/packaging/windows-exe/lib/EnvVarUpdate.nsh b/packaging/windows-exe/lib/EnvVarUpdate.nsh deleted file mode 100644 index 839d6a02..00000000 --- a/packaging/windows-exe/lib/EnvVarUpdate.nsh +++ /dev/null @@ -1,327 +0,0 @@ -/**
- * EnvVarUpdate.nsh
- * : Environmental Variables: append, prepend, and remove entries
- *
- * WARNING: If you use StrFunc.nsh header then include it before this file
- * with all required definitions. This is to avoid conflicts
- *
- * Usage:
- * ${EnvVarUpdate} "ResultVar" "EnvVarName" "Action" "RegLoc" "PathString"
- *
- * Credits:
- * Version 1.0
- * * Cal Turney (turnec2)
- * * Amir Szekely (KiCHiK) and e-circ for developing the forerunners of this
- * function: AddToPath, un.RemoveFromPath, AddToEnvVar, un.RemoveFromEnvVar,
- * WriteEnvStr, and un.DeleteEnvStr
- * * Diego Pedroso (deguix) for StrTok
- * * Kevin English (kenglish_hi) for StrContains
- * * Hendri Adriaens (Smile2Me), Diego Pedroso (deguix), and Dan Fuhry
- * (dandaman32) for StrReplace
- *
- * Version 1.1 (compatibility with StrFunc.nsh)
- * * techtonik
- *
- * http://nsis.sourceforge.net/Environmental_Variables:_append%2C_prepend%2C_and_remove_entries
- *
- */
-
-
-!ifndef ENVVARUPDATE_FUNCTION
-!define ENVVARUPDATE_FUNCTION
-!verbose push
-!verbose 3
-!include "LogicLib.nsh"
-!include "WinMessages.NSH"
-!include "StrFunc.nsh"
-
-; ---- Fix for conflict if StrFunc.nsh is already includes in main file -----------------------
-!macro _IncludeStrFunction StrFuncName
- !ifndef ${StrFuncName}_INCLUDED
- ${${StrFuncName}}
- !endif
- !ifndef Un${StrFuncName}_INCLUDED
- ${Un${StrFuncName}}
- !endif
- !define un.${StrFuncName} "${Un${StrFuncName}}"
-!macroend
-
-!insertmacro _IncludeStrFunction StrTok
-!insertmacro _IncludeStrFunction StrStr
-!insertmacro _IncludeStrFunction StrRep
-
-; ---------------------------------- Macro Definitions ----------------------------------------
-!macro _EnvVarUpdateConstructor ResultVar EnvVarName Action Regloc PathString
- Push "${EnvVarName}"
- Push "${Action}"
- Push "${RegLoc}"
- Push "${PathString}"
- Call EnvVarUpdate
- Pop "${ResultVar}"
-!macroend
-!define EnvVarUpdate '!insertmacro "_EnvVarUpdateConstructor"'
-
-!macro _unEnvVarUpdateConstructor ResultVar EnvVarName Action Regloc PathString
- Push "${EnvVarName}"
- Push "${Action}"
- Push "${RegLoc}"
- Push "${PathString}"
- Call un.EnvVarUpdate
- Pop "${ResultVar}"
-!macroend
-!define un.EnvVarUpdate '!insertmacro "_unEnvVarUpdateConstructor"'
-; ---------------------------------- Macro Definitions end-------------------------------------
-
-;----------------------------------- EnvVarUpdate start----------------------------------------
-!define hklm_all_users 'HKLM "SYSTEM\CurrentControlSet\Control\Session Manager\Environment"'
-!define hkcu_current_user 'HKCU "Environment"'
-
-!macro EnvVarUpdate UN
-
-Function ${UN}EnvVarUpdate
-
- Push $0
- Exch 4
- Exch $1
- Exch 3
- Exch $2
- Exch 2
- Exch $3
- Exch
- Exch $4
- Push $5
- Push $6
- Push $7
- Push $8
- Push $9
- Push $R0
-
- /* After this point:
- -------------------------
- $0 = ResultVar (returned)
- $1 = EnvVarName (input)
- $2 = Action (input)
- $3 = RegLoc (input)
- $4 = PathString (input)
- $5 = Orig EnvVar (read from registry)
- $6 = Len of $0 (temp)
- $7 = tempstr1 (temp)
- $8 = Entry counter (temp)
- $9 = tempstr2 (temp)
- $R0 = tempChar (temp) */
-
- ; Step 1: Read contents of EnvVarName from RegLoc
- ;
- ; Check for empty EnvVarName
- ${If} $1 == ""
- SetErrors
- DetailPrint "ERROR: EnvVarName is blank"
- Goto EnvVarUpdate_Restore_Vars
- ${EndIf}
-
- ; Check for valid Action
- ${If} $2 != "A"
- ${AndIf} $2 != "P"
- ${AndIf} $2 != "R"
- SetErrors
- DetailPrint "ERROR: Invalid Action - must be A, P, or R"
- Goto EnvVarUpdate_Restore_Vars
- ${EndIf}
-
- ${If} $3 == HKLM
- ReadRegStr $5 ${hklm_all_users} $1 ; Get EnvVarName from all users into $5
- ${ElseIf} $3 == HKCU
- ReadRegStr $5 ${hkcu_current_user} $1 ; Read EnvVarName from current user into $5
- ${Else}
- SetErrors
- DetailPrint 'ERROR: Action is [$3] but must be "HKLM" or HKCU"'
- Goto EnvVarUpdate_Restore_Vars
- ${EndIf}
-
- ; Check for empty PathString
- ${If} $4 == ""
- SetErrors
- DetailPrint "ERROR: PathString is blank"
- Goto EnvVarUpdate_Restore_Vars
- ${EndIf}
-
- ; Make sure we've got some work to do
- ${If} $5 == ""
- ${AndIf} $2 == "R"
- SetErrors
- DetailPrint "$1 is empty - Nothing to remove"
- Goto EnvVarUpdate_Restore_Vars
- ${EndIf}
-
- ; Step 2: Scrub EnvVar
- ;
- StrCpy $0 $5 ; Copy the contents to $0
- ; Remove spaces around semicolons (NOTE: spaces before the 1st entry or
- ; after the last one are not removed here but instead in Step 3)
- ${If} $0 != "" ; If EnvVar is not empty ...
- ${Do}
- ${${UN}StrStr} $7 $0 " ;"
- ${If} $7 == ""
- ${ExitDo}
- ${EndIf}
- ${${UN}StrRep} $0 $0 " ;" ";" ; Remove '<space>;'
- ${Loop}
- ${Do}
- ${${UN}StrStr} $7 $0 "; "
- ${If} $7 == ""
- ${ExitDo}
- ${EndIf}
- ${${UN}StrRep} $0 $0 "; " ";" ; Remove ';<space>'
- ${Loop}
- ${Do}
- ${${UN}StrStr} $7 $0 ";;"
- ${If} $7 == ""
- ${ExitDo}
- ${EndIf}
- ${${UN}StrRep} $0 $0 ";;" ";"
- ${Loop}
-
- ; Remove a leading or trailing semicolon from EnvVar
- StrCpy $7 $0 1 0
- ${If} $7 == ";"
- StrCpy $0 $0 "" 1 ; Change ';<EnvVar>' to '<EnvVar>'
- ${EndIf}
- StrLen $6 $0
- IntOp $6 $6 - 1
- StrCpy $7 $0 1 $6
- ${If} $7 == ";"
- StrCpy $0 $0 $6 ; Change ';<EnvVar>' to '<EnvVar>'
- ${EndIf}
- ; DetailPrint "Scrubbed $1: [$0]" ; Uncomment to debug
- ${EndIf}
-
- /* Step 3. Remove all instances of the target path/string (even if "A" or "P")
- $6 = bool flag (1 = found and removed PathString)
- $7 = a string (e.g. path) delimited by semicolon(s)
- $8 = entry counter starting at 0
- $9 = copy of $0
- $R0 = tempChar */
-
- ${If} $5 != "" ; If EnvVar is not empty ...
- StrCpy $9 $0
- StrCpy $0 ""
- StrCpy $8 0
- StrCpy $6 0
-
- ${Do}
- ${${UN}StrTok} $7 $9 ";" $8 "0" ; $7 = next entry, $8 = entry counter
-
- ${If} $7 == "" ; If we've run out of entries,
- ${ExitDo} ; were done
- ${EndIf} ;
-
- ; Remove leading and trailing spaces from this entry (critical step for Action=Remove)
- ${Do}
- StrCpy $R0 $7 1
- ${If} $R0 != " "
- ${ExitDo}
- ${EndIf}
- StrCpy $7 $7 "" 1 ; Remove leading space
- ${Loop}
- ${Do}
- StrCpy $R0 $7 1 -1
- ${If} $R0 != " "
- ${ExitDo}
- ${EndIf}
- StrCpy $7 $7 -1 ; Remove trailing space
- ${Loop}
- ${If} $7 == $4 ; If string matches, remove it by not appending it
- StrCpy $6 1 ; Set 'found' flag
- ${ElseIf} $7 != $4 ; If string does NOT match
- ${AndIf} $0 == "" ; and the 1st string being added to $0,
- StrCpy $0 $7 ; copy it to $0 without a prepended semicolon
- ${ElseIf} $7 != $4 ; If string does NOT match
- ${AndIf} $0 != "" ; and this is NOT the 1st string to be added to $0,
- StrCpy $0 $0;$7 ; append path to $0 with a prepended semicolon
- ${EndIf} ;
-
- IntOp $8 $8 + 1 ; Bump counter
- ${Loop} ; Check for duplicates until we run out of paths
- ${EndIf}
-
- ; Step 4: Perform the requested Action
- ;
- ${If} $2 != "R" ; If Append or Prepend
- ${If} $6 == 1 ; And if we found the target
- DetailPrint "Target is already present in $1. It will be removed and"
- ${EndIf}
- ${If} $0 == "" ; If EnvVar is (now) empty
- StrCpy $0 $4 ; just copy PathString to EnvVar
- ${If} $6 == 0 ; If found flag is either 0
- ${OrIf} $6 == "" ; or blank (if EnvVarName is empty)
- DetailPrint "$1 was empty and has been updated with the target"
- ${EndIf}
- ${ElseIf} $2 == "A" ; If Append (and EnvVar is not empty),
- StrCpy $0 $0;$4 ; append PathString
- ${If} $6 == 1
- DetailPrint "appended to $1"
- ${Else}
- DetailPrint "Target was appended to $1"
- ${EndIf}
- ${Else} ; If Prepend (and EnvVar is not empty),
- StrCpy $0 $4;$0 ; prepend PathString
- ${If} $6 == 1
- DetailPrint "prepended to $1"
- ${Else}
- DetailPrint "Target was prepended to $1"
- ${EndIf}
- ${EndIf}
- ${Else} ; If Action = Remove
- ${If} $6 == 1 ; and we found the target
- DetailPrint "Target was found and removed from $1"
- ${Else}
- DetailPrint "Target was NOT found in $1 (nothing to remove)"
- ${EndIf}
- ${If} $0 == ""
- DetailPrint "$1 is now empty"
- ${EndIf}
- ${EndIf}
-
- ; Step 5: Update the registry at RegLoc with the updated EnvVar and announce the change
- ;
- ClearErrors
- ${If} $3 == HKLM
- WriteRegExpandStr ${hklm_all_users} $1 $0 ; Write it in all users section
- ${ElseIf} $3 == HKCU
- WriteRegExpandStr ${hkcu_current_user} $1 $0 ; Write it to current user section
- ${EndIf}
-
- IfErrors 0 +4
- MessageBox MB_OK|MB_ICONEXCLAMATION "Could not write updated $1 to $3"
- DetailPrint "Could not write updated $1 to $3"
- Goto EnvVarUpdate_Restore_Vars
-
- ; "Export" our change
- SendMessage ${HWND_BROADCAST} ${WM_WININICHANGE} 0 "STR:Environment" /TIMEOUT=5000
-
- EnvVarUpdate_Restore_Vars:
- ;
- ; Restore the user's variables and return ResultVar
- Pop $R0
- Pop $9
- Pop $8
- Pop $7
- Pop $6
- Pop $5
- Pop $4
- Pop $3
- Pop $2
- Pop $1
- Push $0 ; Push my $0 (ResultVar)
- Exch
- Pop $0 ; Restore his $0
-
-FunctionEnd
-
-!macroend ; EnvVarUpdate UN
-!insertmacro EnvVarUpdate ""
-!insertmacro EnvVarUpdate "un."
-;----------------------------------- EnvVarUpdate end----------------------------------------
-
-!verbose pop
-!endif
diff --git a/packaging/windows-exe/rabbitmq_nsi.in b/packaging/windows-exe/rabbitmq_nsi.in index 6d79ffd4..27e4e1dc 100644 --- a/packaging/windows-exe/rabbitmq_nsi.in +++ b/packaging/windows-exe/rabbitmq_nsi.in @@ -4,7 +4,6 @@ !include WinMessages.nsh !include FileFunc.nsh !include WordFunc.nsh -!include lib\EnvVarUpdate.nsh !define env_hklm 'HKLM "SYSTEM\CurrentControlSet\Control\Session Manager\Environment"' !define uninstall "Software\Microsoft\Windows\CurrentVersion\Uninstall\RabbitMQ" @@ -77,9 +76,6 @@ Section "RabbitMQ Server (required)" Rabbit File /r "rabbitmq_server-%%VERSION%%" File "rabbitmq.ico" - ; Add to PATH - ${EnvVarUpdate} $0 "PATH" "A" "HKLM" "$INSTDIR\rabbitmq_server-%%VERSION%%\sbin" - ; Write the installation path into the registry WriteRegStr HKLM "SOFTWARE\VMware, Inc.\RabbitMQ Server" "Install_Dir" "$INSTDIR" @@ -117,15 +113,18 @@ Section "Start Menu" RabbitStartMenu CreateDirectory "$APPDATA\RabbitMQ\db" CreateDirectory "$SMPROGRAMS\RabbitMQ Server" - CreateShortCut "$SMPROGRAMS\RabbitMQ Server\Uninstall.lnk" "$INSTDIR\uninstall.exe" "" "$INSTDIR\uninstall.exe" 0 - CreateShortCut "$SMPROGRAMS\RabbitMQ Server\Plugins Directory.lnk" "$INSTDIR\rabbitmq_server-%%VERSION%%\plugins" - CreateShortCut "$SMPROGRAMS\RabbitMQ Server\Log Directory.lnk" "$APPDATA\RabbitMQ\log" - CreateShortCut "$SMPROGRAMS\RabbitMQ Server\Database Directory.lnk" "$APPDATA\RabbitMQ\db" - CreateShortCut "$SMPROGRAMS\RabbitMQ Server\(Re)Install Service.lnk" "$INSTDIR\rabbitmq_server-%%VERSION%%\sbin\rabbitmq-service.bat" "install" "$INSTDIR\rabbitmq.ico" - CreateShortCut "$SMPROGRAMS\RabbitMQ Server\Remove Service.lnk" "$INSTDIR\rabbitmq_server-%%VERSION%%\sbin\rabbitmq-service.bat" "remove" "$INSTDIR\rabbitmq.ico" - CreateShortCut "$SMPROGRAMS\RabbitMQ Server\Start Service.lnk" "$INSTDIR\rabbitmq_server-%%VERSION%%\sbin\rabbitmq-service.bat" "start" "$INSTDIR\rabbitmq.ico" - CreateShortCut "$SMPROGRAMS\RabbitMQ Server\Stop Service.lnk" "$INSTDIR\rabbitmq_server-%%VERSION%%\sbin\rabbitmq-service.bat" "stop" "$INSTDIR\rabbitmq.ico" - + CreateShortCut "$SMPROGRAMS\RabbitMQ Server\Uninstall RabbitMQ.lnk" "$INSTDIR\uninstall.exe" "" "$INSTDIR\uninstall.exe" 0 + CreateShortCut "$SMPROGRAMS\RabbitMQ Server\RabbitMQ Plugins.lnk" "$INSTDIR\rabbitmq_server-%%VERSION%%\plugins" + CreateShortCut "$SMPROGRAMS\RabbitMQ Server\RabbitMQ Logs.lnk" "$APPDATA\RabbitMQ\log" + CreateShortCut "$SMPROGRAMS\RabbitMQ Server\RabbitMQ Database Directory.lnk" "$APPDATA\RabbitMQ\db" + CreateShortCut "$SMPROGRAMS\RabbitMQ Server\RabbitMQ Service - (re)install.lnk" "$INSTDIR\rabbitmq_server-%%VERSION%%\sbin\rabbitmq-service.bat" "install" "$INSTDIR\rabbitmq.ico" + CreateShortCut "$SMPROGRAMS\RabbitMQ Server\RabbitMQ Service - remove.lnk" "$INSTDIR\rabbitmq_server-%%VERSION%%\sbin\rabbitmq-service.bat" "remove" "$INSTDIR\rabbitmq.ico" + CreateShortCut "$SMPROGRAMS\RabbitMQ Server\RabbitMQ Service - start.lnk" "$INSTDIR\rabbitmq_server-%%VERSION%%\sbin\rabbitmq-service.bat" "start" "$INSTDIR\rabbitmq.ico" + CreateShortCut "$SMPROGRAMS\RabbitMQ Server\RabbitMQ Service - stop.lnk" "$INSTDIR\rabbitmq_server-%%VERSION%%\sbin\rabbitmq-service.bat" "stop" "$INSTDIR\rabbitmq.ico" + + SetOutPath "$INSTDIR\rabbitmq_server-%%VERSION%%\sbin" + CreateShortCut "$SMPROGRAMS\RabbitMQ Server\RabbitMQ Command Prompt (sbin dir).lnk" "$WINDIR\system32\cmd.exe" "" "$WINDIR\system32\cmd.exe" + SetOutPath $INSTDIR SectionEnd ;-------------------------------- @@ -157,9 +156,6 @@ Section "Uninstall" ExecWait '"$0" /C "$INSTDIR\rabbitmq_server-%%VERSION%%\sbin\rabbitmq-service.bat" stop' ExecWait '"$0" /C "$INSTDIR\rabbitmq_server-%%VERSION%%\sbin\rabbitmq-service.bat" remove' - ; Remove from PATH - ${un.EnvVarUpdate} $0 "PATH" "R" "HKLM" "$INSTDIR\rabbitmq_server-%%VERSION%%\sbin" - ; Remove files and uninstaller RMDir /r "$INSTDIR\rabbitmq_server-%%VERSION%%" Delete "$INSTDIR\rabbitmq.ico" diff --git a/packaging/windows/Makefile b/packaging/windows/Makefile index abe174e0..a0be8d89 100644 --- a/packaging/windows/Makefile +++ b/packaging/windows/Makefile @@ -4,14 +4,13 @@ TARGET_DIR=rabbitmq_server-$(VERSION) TARGET_ZIP=rabbitmq-server-windows-$(VERSION) dist: - tar -zxvf ../../dist/$(SOURCE_DIR).tar.gz + tar -zxf ../../dist/$(SOURCE_DIR).tar.gz $(MAKE) -C $(SOURCE_DIR) mkdir $(SOURCE_DIR)/sbin mv $(SOURCE_DIR)/scripts/rabbitmq-server.bat $(SOURCE_DIR)/sbin mv $(SOURCE_DIR)/scripts/rabbitmq-service.bat $(SOURCE_DIR)/sbin mv $(SOURCE_DIR)/scripts/rabbitmqctl.bat $(SOURCE_DIR)/sbin - mv $(SOURCE_DIR)/scripts/rabbitmq-multi.bat $(SOURCE_DIR)/sbin rm -rf $(SOURCE_DIR)/scripts rm -rf $(SOURCE_DIR)/codegen* $(SOURCE_DIR)/Makefile rm -f $(SOURCE_DIR)/README @@ -25,7 +24,7 @@ dist: elinks -dump -no-references -no-numbering rabbitmq-service.html \ > $(TARGET_DIR)/readme-service.txt todos $(TARGET_DIR)/readme-service.txt - zip -r $(TARGET_ZIP).zip $(TARGET_DIR) + zip -q -r $(TARGET_ZIP).zip $(TARGET_DIR) rm -rf $(TARGET_DIR) rabbitmq-service.html clean: clean_partial diff --git a/quickcheck b/quickcheck new file mode 100755 index 00000000..a36cf3ed --- /dev/null +++ b/quickcheck @@ -0,0 +1,36 @@ +#!/usr/bin/env escript +%% -*- erlang -*- +%%! -sname quickcheck +-mode(compile). + +%% A helper to test quickcheck properties on a running broker +%% NodeStr is a local broker node name +%% ModStr is the module containing quickcheck properties +%% The number of trials is optional +main([NodeStr, ModStr | TrialsStr]) -> + {ok, Hostname} = inet:gethostname(), + Node = list_to_atom(NodeStr ++ "@" ++ Hostname), + Mod = list_to_atom(ModStr), + Trials = lists:map(fun erlang:list_to_integer/1, TrialsStr), + case rpc:call(Node, code, ensure_loaded, [proper]) of + {module, proper} -> + case rpc:call(Node, proper, module, [Mod] ++ Trials) of + [] -> ok; + _ -> quit(1) + end; + {badrpc, Reason} -> + io:format("Could not contact node ~p: ~p.~n", [Node, Reason]), + quit(2); + {error,nofile} -> + io:format("Module PropEr was not found on node ~p~n", [Node]), + quit(2) + end; +main([]) -> + io:format("This script requires a node name and a module.~n"). + +quit(Status) -> + case os:type() of + {unix, _} -> halt(Status); + {win32, _} -> init:stop(Status) + end. + diff --git a/scripts/rabbitmq-env b/scripts/rabbitmq-env index df4b24d8..a2ef8d3c 100755 --- a/scripts/rabbitmq-env +++ b/scripts/rabbitmq-env @@ -37,4 +37,9 @@ RABBITMQ_HOME="${SCRIPT_DIR}/.." NODENAME=rabbit@${HOSTNAME%%.*} # Load configuration from the rabbitmq.conf file -[ -f /etc/rabbitmq/rabbitmq.conf ] && . /etc/rabbitmq/rabbitmq.conf +if [ -f /etc/rabbitmq/rabbitmq.conf ] && \ + [ ! -f /etc/rabbitmq/rabbitmq-env.conf ] ; then + echo -n "WARNING: ignoring /etc/rabbitmq/rabbitmq.conf -- " + echo "location has moved to /etc/rabbitmq/rabbitmq-env.conf" +fi +[ -f /etc/rabbitmq/rabbitmq-env.conf ] && . /etc/rabbitmq/rabbitmq-env.conf diff --git a/scripts/rabbitmq-multi b/scripts/rabbitmq-multi deleted file mode 100755 index ebcf4b63..00000000 --- a/scripts/rabbitmq-multi +++ /dev/null @@ -1,72 +0,0 @@ -#!/bin/sh -## The contents of this file are subject to the Mozilla Public License -## Version 1.1 (the "License"); you may not use this file except in -## compliance with the License. You may obtain a copy of the License -## at http://www.mozilla.org/MPL/ -## -## Software distributed under the License is distributed on an "AS IS" -## basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See -## the License for the specific language governing rights and -## limitations under the License. -## -## The Original Code is RabbitMQ. -## -## The Initial Developer of the Original Code is VMware, Inc. -## Copyright (c) 2007-2011 VMware, Inc. All rights reserved. -## - -SCRIPT_HOME=$(dirname $0) -PIDS_FILE=/var/lib/rabbitmq/pids -MULTI_ERL_ARGS= -MULTI_START_ARGS= -CONFIG_FILE=/etc/rabbitmq/rabbitmq - -. `dirname $0`/rabbitmq-env - -DEFAULT_NODE_IP_ADDRESS=0.0.0.0 -DEFAULT_NODE_PORT=5672 -[ "x" = "x$RABBITMQ_NODE_IP_ADDRESS" ] && [ "x" != "x$NODE_IP_ADDRESS" ] && RABBITMQ_NODE_IP_ADDRESS=${NODE_IP_ADDRESS} -[ "x" = "x$RABBITMQ_NODE_PORT" ] && [ "x" != "x$NODE_PORT" ] && RABBITMQ_NODE_PORT=${NODE_PORT} -if [ "x" = "x$RABBITMQ_NODE_IP_ADDRESS" ] -then - if [ "x" != "x$RABBITMQ_NODE_PORT" ] - then RABBITMQ_NODE_IP_ADDRESS=${DEFAULT_NODE_IP_ADDRESS} - fi -else - if [ "x" = "x$RABBITMQ_NODE_PORT" ] - then RABBITMQ_NODE_PORT=${DEFAULT_NODE_PORT} - fi -fi -[ "x" = "x$RABBITMQ_NODENAME" ] && RABBITMQ_NODENAME=${NODENAME} -[ "x" = "x$RABBITMQ_SCRIPT_HOME" ] && RABBITMQ_SCRIPT_HOME=${SCRIPT_HOME} -[ "x" = "x$RABBITMQ_PIDS_FILE" ] && RABBITMQ_PIDS_FILE=${PIDS_FILE} -[ "x" = "x$RABBITMQ_MULTI_ERL_ARGS" ] && RABBITMQ_MULTI_ERL_ARGS=${MULTI_ERL_ARGS} -[ "x" = "x$RABBITMQ_MULTI_START_ARGS" ] && RABBITMQ_MULTI_START_ARGS=${MULTI_START_ARGS} -[ "x" = "x$RABBITMQ_CONFIG_FILE" ] && RABBITMQ_CONFIG_FILE=${CONFIG_FILE} - -export \ - RABBITMQ_NODENAME \ - RABBITMQ_NODE_IP_ADDRESS \ - RABBITMQ_NODE_PORT \ - RABBITMQ_SCRIPT_HOME \ - RABBITMQ_PIDS_FILE \ - RABBITMQ_CONFIG_FILE - -RABBITMQ_CONFIG_ARG= -[ -f "${RABBITMQ_CONFIG_FILE}.config" ] && RABBITMQ_CONFIG_ARG="-config ${RABBITMQ_CONFIG_FILE}" - -# we need to turn off path expansion because some of the vars, notably -# RABBITMQ_MULTI_ERL_ARGS, may contain terms that look like globs and -# there is no other way of preventing their expansion. -set -f - -exec erl \ - -pa "${RABBITMQ_HOME}/ebin" \ - -noinput \ - -hidden \ - ${RABBITMQ_MULTI_ERL_ARGS} \ - -sname rabbitmq_multi$$ \ - ${RABBITMQ_CONFIG_ARG} \ - -s rabbit_multi \ - ${RABBITMQ_MULTI_START_ARGS} \ - -extra "$@" diff --git a/scripts/rabbitmq-multi.bat b/scripts/rabbitmq-multi.bat deleted file mode 100644 index a2d10f2e..00000000 --- a/scripts/rabbitmq-multi.bat +++ /dev/null @@ -1,84 +0,0 @@ -@echo off
-REM The contents of this file are subject to the Mozilla Public License
-REM Version 1.1 (the "License"); you may not use this file except in
-REM compliance with the License. You may obtain a copy of the License
-REM at http://www.mozilla.org/MPL/
-REM
-REM Software distributed under the License is distributed on an "AS IS"
-REM basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
-REM the License for the specific language governing rights and
-REM limitations under the License.
-REM
-REM The Original Code is RabbitMQ.
-REM
-REM The Initial Developer of the Original Code is VMware, Inc.
-REM Copyright (c) 2007-2011 VMware, Inc. All rights reserved.
-REM
-
-setlocal
-
-rem Preserve values that might contain exclamation marks before
-rem enabling delayed expansion
-set TDP0=%~dp0
-set STAR=%*
-setlocal enabledelayedexpansion
-
-if "!RABBITMQ_BASE!"=="" (
- set RABBITMQ_BASE=!APPDATA!\RabbitMQ
-)
-
-if "!COMPUTERNAME!"=="" (
- set COMPUTERNAME=localhost
-)
-
-if "!RABBITMQ_NODENAME!"=="" (
- set RABBITMQ_NODENAME=rabbit@!COMPUTERNAME!
-)
-
-if "!RABBITMQ_NODE_IP_ADDRESS!"=="" (
- if not "!RABBITMQ_NODE_PORT!"=="" (
- set RABBITMQ_NODE_IP_ADDRESS=0.0.0.0
- )
-) else (
- if "!RABBITMQ_NODE_PORT!"=="" (
- set RABBITMQ_NODE_PORT=5672
- )
-)
-
-set RABBITMQ_PIDS_FILE=!RABBITMQ_BASE!\rabbitmq.pids
-set RABBITMQ_SCRIPT_HOME=!TDP0!
-
-if "!RABBITMQ_CONFIG_FILE!"=="" (
- set RABBITMQ_CONFIG_FILE=!RABBITMQ_BASE!\rabbitmq
-)
-
-if exist "!RABBITMQ_CONFIG_FILE!.config" (
- set RABBITMQ_CONFIG_ARG=-config "!RABBITMQ_CONFIG_FILE!"
-) else (
- set RABBITMQ_CONFIG_ARG=
-)
-
-if not exist "!ERLANG_HOME!\bin\erl.exe" (
- echo.
- echo ******************************
- echo ERLANG_HOME not set correctly.
- echo ******************************
- echo.
- echo Please either set ERLANG_HOME to point to your Erlang installation or place the
- echo RabbitMQ server distribution in the Erlang lib folder.
- echo.
- exit /B
-)
-
-"!ERLANG_HOME!\bin\erl.exe" ^
--pa "!TDP0!..\ebin" ^
--noinput -hidden ^
-!RABBITMQ_MULTI_ERL_ARGS! ^
--sname rabbitmq_multi!RANDOM! ^
-!RABBITMQ_CONFIG_ARG! ^
--s rabbit_multi ^
-!RABBITMQ_MULTI_START_ARGS! ^
--extra !STAR!
-
-endlocal
-endlocal
diff --git a/scripts/rabbitmq-server b/scripts/rabbitmq-server index 5c390a51..2f80eb96 100755 --- a/scripts/rabbitmq-server +++ b/scripts/rabbitmq-server @@ -16,7 +16,6 @@ ## SERVER_ERL_ARGS="+K true +A30 +P 1048576 \ --kernel inet_default_listen_options [{nodelay,true}] \ -kernel inet_default_connect_options [{nodelay,true}]" CONFIG_FILE=/etc/rabbitmq/rabbitmq LOG_BASE=/var/log/rabbitmq diff --git a/scripts/rabbitmq-server.bat b/scripts/rabbitmq-server.bat index 0cfa5ea8..5e2097db 100644 --- a/scripts/rabbitmq-server.bat +++ b/scripts/rabbitmq-server.bat @@ -72,17 +72,14 @@ rem Log management (rotation, filtering based of size...) is left as an exercice set BACKUP_EXTENSION=.1
-set LOGS=!RABBITMQ_BASE!\log\!RABBITMQ_NODENAME!.log
-set SASL_LOGS=!RABBITMQ_BASE!\log\!RABBITMQ_NODENAME!-sasl.log
-
-set LOGS_BACKUP=!RABBITMQ_BASE!\log\!RABBITMQ_NODENAME!.log!BACKUP_EXTENSION!
-set SASL_LOGS_BACKUP=!RABBITMQ_BASE!\log\!RABBITMQ_NODENAME!-sasl.log!BACKUP_EXTENSION!
+set LOGS=!RABBITMQ_LOG_BASE!\!RABBITMQ_NODENAME!.log
+set SASL_LOGS=!RABBITMQ_LOG_BASE!\!RABBITMQ_NODENAME!-sasl.log
if exist "!LOGS!" (
- type "!LOGS!" >> "!LOGS_BACKUP!"
+ type "!LOGS!" >> "!LOGS!!BACKUP_EXTENSION!"
)
if exist "!SASL_LOGS!" (
- type "!SASL_LOGS!" >> "!SASL_LOGS_BACKUP!"
+ type "!SASL_LOGS!" >> "!SASL_LOGS!!BACKUP_EXTENSION!"
)
rem End of log management
@@ -142,13 +139,12 @@ if not "!RABBITMQ_NODE_IP_ADDRESS!"=="" ( +W w ^
+A30 ^
+P 1048576 ^
--kernel inet_default_listen_options "[{nodelay, true}]" ^
-kernel inet_default_connect_options "[{nodelay, true}]" ^
!RABBITMQ_LISTEN_ARG! ^
--kernel error_logger {file,\""!RABBITMQ_LOG_BASE!/!RABBITMQ_NODENAME!.log"\"} ^
+-kernel error_logger {file,\""!LOGS:\=/!"\"} ^
!RABBITMQ_SERVER_ERL_ARGS! ^
-sasl errlog_type error ^
--sasl sasl_error_logger {file,\""!RABBITMQ_LOG_BASE!/!RABBITMQ_NODENAME!-sasl.log"\"} ^
+-sasl sasl_error_logger {file,\""!SASL_LOGS:\=/!"\"} ^
-os_mon start_cpu_sup true ^
-os_mon start_disksup false ^
-os_mon start_memsup false ^
diff --git a/scripts/rabbitmq-service.bat b/scripts/rabbitmq-service.bat index 43520b55..b2aa4f58 100644 --- a/scripts/rabbitmq-service.bat +++ b/scripts/rabbitmq-service.bat @@ -105,17 +105,14 @@ rem Log management (rotation, filtering based on size...) is left as an exercise set BACKUP_EXTENSION=.1
-set LOGS=!RABBITMQ_BASE!\log\!RABBITMQ_NODENAME!.log
-set SASL_LOGS=!RABBITMQ_BASE!\log\!RABBITMQ_NODENAME!-sasl.log
-
-set LOGS_BACKUP=!RABBITMQ_BASE!\log\!RABBITMQ_NODENAME!.log!BACKUP_EXTENSION!
-set SASL_LOGS_BACKUP=!RABBITMQ_BASE!\log\!RABBITMQ_NODENAME!-sasl.log!BACKUP_EXTENSION!
+set LOGS=!RABBITMQ_LOG_BASE!\!RABBITMQ_NODENAME!.log
+set SASL_LOGS=!RABBITMQ_LOG_BASE!\!RABBITMQ_NODENAME!-sasl.log
if exist "!LOGS!" (
- type "!LOGS!" >> "!LOGS_BACKUP!"
+ type "!LOGS!" >> "!LOGS!!BACKUP_EXTENSION!"
)
if exist "!SASL_LOGS!" (
- type "!SASL_LOGS!" >> "!SASL_LOGS_BACKUP!"
+ type "!SASL_LOGS!" >> "!SASL_LOGS!!BACKUP_EXTENSION!"
)
rem End of log management
@@ -207,13 +204,12 @@ set ERLANG_SERVICE_ARGUMENTS= ^ -s rabbit ^
+W w ^
+A30 ^
--kernel inet_default_listen_options "[{nodelay,true}]" ^
-kernel inet_default_connect_options "[{nodelay,true}]" ^
!RABBITMQ_LISTEN_ARG! ^
--kernel error_logger {file,\""!RABBITMQ_LOG_BASE!/!RABBITMQ_NODENAME!.log"\"} ^
+-kernel error_logger {file,\""!LOGS:\=/!"\"} ^
!RABBITMQ_SERVER_ERL_ARGS! ^
-sasl errlog_type error ^
--sasl sasl_error_logger {file,\""!RABBITMQ_LOG_BASE!/!RABBITMQ_NODENAME!-sasl.log"\"} ^
+-sasl sasl_error_logger {file,\""!SASL_LOGS:\=/!"\"} ^
-os_mon start_cpu_sup true ^
-os_mon start_disksup false ^
-os_mon start_memsup false ^
@@ -231,6 +227,7 @@ set ERLANG_SERVICE_ARGUMENTS=!ERLANG_SERVICE_ARGUMENTS:"=\"! -stopaction "rabbit:stop_and_halt()." ^
-sname !RABBITMQ_NODENAME! ^
!CONSOLE_FLAG! ^
+-comment "A robust and scalable messaging broker" ^
-args "!ERLANG_SERVICE_ARGUMENTS!" > NUL
goto END
diff --git a/src/delegate.erl b/src/delegate.erl index 46bd8245..17046201 100644 --- a/src/delegate.erl +++ b/src/delegate.erl @@ -18,7 +18,7 @@ -behaviour(gen_server2). --export([start_link/1, invoke_no_result/2, invoke/2, delegate_count/1]). +-export([start_link/1, invoke_no_result/2, invoke/2]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]). @@ -36,8 +36,6 @@ ([pid()], fun ((pid()) -> A)) -> {[{pid(), A}], [{pid(), term()}]}). --spec(delegate_count/1 :: ([node()]) -> non_neg_integer()). - -endif. %%---------------------------------------------------------------------------- @@ -111,22 +109,14 @@ group_pids_by_node(Pids) -> node(Pid), fun (List) -> [Pid | List] end, [Pid], Remote)} end, {[], orddict:new()}, Pids). -delegate_count([RemoteNode | _]) -> - {ok, Count} = case application:get_env(rabbit, delegate_count) of - undefined -> rpc:call(RemoteNode, application, get_env, - [rabbit, delegate_count]); - Result -> Result - end, - Count. - delegate_name(Hash) -> list_to_atom("delegate_" ++ integer_to_list(Hash)). delegate(RemoteNodes) -> case get(delegate) of - undefined -> Name = - delegate_name(erlang:phash2( - self(), delegate_count(RemoteNodes))), + undefined -> Name = delegate_name( + erlang:phash2(self(), + delegate_sup:count(RemoteNodes))), put(delegate, Name), Name; Name -> Name diff --git a/src/delegate_sup.erl b/src/delegate_sup.erl index e0ffa7c8..fc693c7d 100644 --- a/src/delegate_sup.erl +++ b/src/delegate_sup.erl @@ -18,7 +18,7 @@ -behaviour(supervisor). --export([start_link/0]). +-export([start_link/1, count/1]). -export([init/1]). @@ -28,20 +28,32 @@ -ifdef(use_specs). --spec(start_link/0 :: () -> {'ok', pid()} | {'error', any()}). +-spec(start_link/1 :: (integer()) -> {'ok', pid()} | {'error', any()}). +-spec(count/1 :: ([node()]) -> integer()). -endif. %%---------------------------------------------------------------------------- -start_link() -> - supervisor:start_link({local, ?SERVER}, ?MODULE, []). +start_link(Count) -> + supervisor:start_link({local, ?SERVER}, ?MODULE, [Count]). + +count([]) -> + 1; +count([Node | Nodes]) -> + try + length(supervisor:which_children({?SERVER, Node})) + catch exit:{{R, _}, _} when R =:= nodedown; R =:= shutdown -> + count(Nodes); + exit:{R, _} when R =:= noproc; R =:= normal; R =:= shutdown; + R =:= nodedown -> + count(Nodes) + end. %%---------------------------------------------------------------------------- -init(_Args) -> - DCount = delegate:delegate_count([node()]), +init([Count]) -> {ok, {{one_for_one, 10, 10}, [{Num, {delegate, start_link, [Num]}, transient, 16#ffffffff, worker, [delegate]} || - Num <- lists:seq(0, DCount - 1)]}}. + Num <- lists:seq(0, Count - 1)]}}. diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl index b5b07eca..235e14c0 100644 --- a/src/file_handle_cache.erl +++ b/src/file_handle_cache.erl @@ -146,7 +146,8 @@ -export([open/3, close/1, read/2, append/2, sync/1, position/2, truncate/1, last_sync_offset/1, current_virtual_offset/1, current_raw_offset/1, flush/1, copy/3, set_maximum_since_use/1, delete/1, clear/1]). --export([obtain/0, transfer/1, set_limit/1, get_limit/0]). +-export([obtain/0, transfer/1, set_limit/1, get_limit/0, info_keys/0, info/0, + info/1]). -export([ulimit/0]). -export([start_link/0, init/1, handle_call/3, handle_cast/2, handle_info/2, @@ -155,13 +156,6 @@ -define(SERVER, ?MODULE). -define(RESERVED_FOR_OTHERS, 100). -%% Googling around suggests that Windows has a limit somewhere around -%% 16M, eg -%% http://blogs.technet.com/markrussinovich/archive/2009/09/29/3283844.aspx -%% however, it turns out that's only available through the win32 -%% API. Via the C Runtime, we have just 512: -%% http://msdn.microsoft.com/en-us/library/6e3b887c%28VS.80%29.aspx --define(FILE_HANDLES_LIMIT_WINDOWS, 512). -define(FILE_HANDLES_LIMIT_OTHER, 1024). -define(FILE_HANDLES_CHECK_INTERVAL, 2000). @@ -241,7 +235,7 @@ -> val_or_error(ref())). -spec(close/1 :: (ref()) -> ok_or_error()). -spec(read/2 :: (ref(), non_neg_integer()) -> - val_or_error([char()] | binary()) | 'eof'). + val_or_error([char()] | binary()) | 'eof'). -spec(append/2 :: (ref(), iodata()) -> ok_or_error()). -spec(sync/1 :: (ref()) -> ok_or_error()). -spec(position/2 :: (ref(), position()) -> val_or_error(offset())). @@ -251,7 +245,7 @@ -spec(current_raw_offset/1 :: (ref()) -> val_or_error(offset())). -spec(flush/1 :: (ref()) -> ok_or_error()). -spec(copy/3 :: (ref(), ref(), non_neg_integer()) -> - val_or_error(non_neg_integer())). + val_or_error(non_neg_integer())). -spec(set_maximum_since_use/1 :: (non_neg_integer()) -> 'ok'). -spec(delete/1 :: (ref()) -> ok_or_error()). -spec(clear/1 :: (ref()) -> ok_or_error()). @@ -259,11 +253,17 @@ -spec(transfer/1 :: (pid()) -> 'ok'). -spec(set_limit/1 :: (non_neg_integer()) -> 'ok'). -spec(get_limit/0 :: () -> non_neg_integer()). +-spec(info_keys/0 :: () -> [atom()]). +-spec(info/0 :: () -> [{atom(), any()}]). +-spec(info/1 :: ([atom()]) -> [{atom(), any()}]). -spec(ulimit/0 :: () -> 'infinity' | 'unknown' | non_neg_integer()). -endif. %%---------------------------------------------------------------------------- +-define(INFO_KEYS, [obtain_count, obtain_limit]). + +%%---------------------------------------------------------------------------- %% Public API %%---------------------------------------------------------------------------- @@ -494,6 +494,11 @@ set_limit(Limit) -> get_limit() -> gen_server:call(?SERVER, get_limit, infinity). +info_keys() -> ?INFO_KEYS. + +info() -> info(?INFO_KEYS). +info(Items) -> gen_server:call(?SERVER, {info, Items}, infinity). + %%---------------------------------------------------------------------------- %% Internal functions %%---------------------------------------------------------------------------- @@ -789,6 +794,12 @@ write_buffer(Handle = #handle { hdl = Hdl, offset = Offset, {Error, Handle} end. +infos(Items, State) -> [{Item, i(Item, State)} || Item <- Items]. + +i(obtain_count, #fhc_state{obtain_count = Count}) -> Count; +i(obtain_limit, #fhc_state{obtain_limit = Limit}) -> Limit; +i(Item, _) -> throw({bad_argument, Item}). + %%---------------------------------------------------------------------------- %% gen_server callbacks %%---------------------------------------------------------------------------- @@ -849,35 +860,41 @@ handle_call({open, Pid, Requested, EldestUnusedSince}, From, false -> {noreply, run_pending_item(Item, State1)} end; -handle_call({obtain, Pid}, From, State = #fhc_state { obtain_limit = Limit, - obtain_count = Count, - obtain_pending = Pending, - clients = Clients }) - when Limit =/= infinity andalso Count >= Limit -> - ok = track_client(Pid, Clients), - true = ets:update_element(Clients, Pid, {#cstate.blocked, true}), - Item = #pending { kind = obtain, pid = Pid, requested = 1, from = From }, - {noreply, State #fhc_state { obtain_pending = pending_in(Item, Pending) }}; handle_call({obtain, Pid}, From, State = #fhc_state { obtain_count = Count, obtain_pending = Pending, clients = Clients }) -> - Item = #pending { kind = obtain, pid = Pid, requested = 1, from = From }, ok = track_client(Pid, Clients), - case needs_reduce(State #fhc_state { obtain_count = Count + 1 }) of - true -> - true = ets:update_element(Clients, Pid, {#cstate.blocked, true}), - {noreply, reduce(State #fhc_state { - obtain_pending = pending_in(Item, Pending) })}; - false -> - {noreply, run_pending_item(Item, State)} - end; + Item = #pending { kind = obtain, pid = Pid, requested = 1, from = From }, + Enqueue = fun () -> + true = ets:update_element(Clients, Pid, + {#cstate.blocked, true}), + State #fhc_state { + obtain_pending = pending_in(Item, Pending) } + end, + {noreply, + case obtain_limit_reached(State) of + true -> Enqueue(); + false -> case needs_reduce(State #fhc_state { + obtain_count = Count + 1 }) of + true -> reduce(Enqueue()); + false -> adjust_alarm( + State, run_pending_item(Item, State)) + end + end}; + handle_call({set_limit, Limit}, _From, State) -> - {reply, ok, maybe_reduce( - process_pending(State #fhc_state { - limit = Limit, - obtain_limit = obtain_limit(Limit) }))}; + {reply, ok, adjust_alarm( + State, maybe_reduce( + process_pending( + State #fhc_state { + limit = Limit, + obtain_limit = obtain_limit(Limit) })))}; + handle_call(get_limit, _From, State = #fhc_state { limit = Limit }) -> - {reply, Limit, State}. + {reply, Limit, State}; + +handle_call({info, Items}, _From, State) -> + {reply, infos(Items, State), State}. handle_cast({register_callback, Pid, MFA}, State = #fhc_state { clients = Clients }) -> @@ -900,9 +917,9 @@ handle_cast({close, Pid, EldestUnusedSince}, _ -> dict:store(Pid, EldestUnusedSince, Elders) end, ets:update_counter(Clients, Pid, {#cstate.pending_closes, -1, 0, 0}), - {noreply, process_pending( + {noreply, adjust_alarm(State, process_pending( update_counts(open, Pid, -1, - State #fhc_state { elders = Elders1 }))}; + State #fhc_state { elders = Elders1 })))}; handle_cast({transfer, FromPid, ToPid}, State) -> ok = track_client(ToPid, State#fhc_state.clients), @@ -924,13 +941,15 @@ handle_info({'DOWN', _MRef, process, Pid, _Reason}, ets:lookup(Clients, Pid), true = ets:delete(Clients, Pid), FilterFun = fun (#pending { pid = Pid1 }) -> Pid1 =/= Pid end, - {noreply, process_pending( - State #fhc_state { - open_count = OpenCount - Opened, - open_pending = filter_pending(FilterFun, OpenPending), - obtain_count = ObtainCount - Obtained, - obtain_pending = filter_pending(FilterFun, ObtainPending), - elders = dict:erase(Pid, Elders) })}. + {noreply, adjust_alarm( + State, + process_pending( + State #fhc_state { + open_count = OpenCount - Opened, + open_pending = filter_pending(FilterFun, OpenPending), + obtain_count = ObtainCount - Obtained, + obtain_pending = filter_pending(FilterFun, ObtainPending), + elders = dict:erase(Pid, Elders) }))}. terminate(_Reason, State = #fhc_state { clients = Clients }) -> ets:delete(Clients), @@ -951,12 +970,13 @@ queue_fold(Fun, Init, Q) -> filter_pending(Fun, {Count, Queue}) -> {Delta, Queue1} = - queue_fold(fun (Item, {DeltaN, QueueN}) -> - case Fun(Item) of - true -> {DeltaN, queue:in(Item, QueueN)}; - false -> {DeltaN - requested(Item), QueueN} - end - end, {0, queue:new()}, Queue), + queue_fold( + fun (Item = #pending { requested = Requested }, {DeltaN, QueueN}) -> + case Fun(Item) of + true -> {DeltaN, queue:in(Item, QueueN)}; + false -> {DeltaN - Requested, QueueN} + end + end, {0, queue:new()}, Queue), {Count + Delta, Queue1}. pending_new() -> @@ -990,8 +1010,17 @@ obtain_limit(Limit) -> case ?OBTAIN_LIMIT(Limit) of OLimit -> OLimit end. -requested({_Kind, _Pid, Requested, _From}) -> - Requested. +obtain_limit_reached(#fhc_state { obtain_limit = Limit, + obtain_count = Count}) -> + Limit =/= infinity andalso Count >= Limit. + +adjust_alarm(OldState, NewState) -> + case {obtain_limit_reached(OldState), obtain_limit_reached(NewState)} of + {false, true} -> alarm_handler:set_alarm({file_descriptor_limit, []}); + {true, false} -> alarm_handler:clear_alarm(file_descriptor_limit); + _ -> ok + end, + NewState. process_pending(State = #fhc_state { limit = infinity }) -> State; @@ -1094,7 +1123,7 @@ reduce(State = #fhc_state { open_pending = OpenPending, case CStates of [] -> ok; _ -> case (Sum / ClientCount) - - (1000 * ?FILE_HANDLES_CHECK_INTERVAL) of + (1000 * ?FILE_HANDLES_CHECK_INTERVAL) of AverageAge when AverageAge > 0 -> notify_age(CStates, AverageAge); _ -> @@ -1118,11 +1147,12 @@ notify_age(CStates, AverageAge) -> end, CStates). notify_age0(Clients, CStates, Required) -> - Notifications = - [CState || CState <- CStates, CState#cstate.callback =/= undefined], - {L1, L2} = lists:split(random:uniform(length(Notifications)), - Notifications), - notify(Clients, Required, L2 ++ L1). + case [CState || CState <- CStates, CState#cstate.callback =/= undefined] of + [] -> ok; + Notifications -> S = random:uniform(length(Notifications)), + {L1, L2} = lists:split(S, Notifications), + notify(Clients, Required, L2 ++ L1) + end. notify(_Clients, _Required, []) -> ok; @@ -1147,29 +1177,20 @@ track_client(Pid, Clients) -> false -> ok end. -%% For all unices, assume ulimit exists. Further googling suggests -%% that BSDs (incl OS X), solaris and linux all agree that ulimit -n -%% is file handles + +%% To increase the number of file descriptors: on Windows set ERL_MAX_PORTS +%% environment variable, on Linux set `ulimit -n`. ulimit() -> - case os:type() of - {win32, _OsName} -> - ?FILE_HANDLES_LIMIT_WINDOWS; - {unix, _OsName} -> - %% Under Linux, Solaris and FreeBSD, ulimit is a shell - %% builtin, not a command. In OS X and AIX it's a command. - %% Fortunately, os:cmd invokes the cmd in a shell env, so - %% we're safe in all cases. - case os:cmd("ulimit -n") of - "unlimited" -> - infinity; - String = [C|_] when $0 =< C andalso C =< $9 -> - list_to_integer( - lists:takewhile( - fun (D) -> $0 =< D andalso D =< $9 end, String)); - _ -> - %% probably a variant of - %% "/bin/sh: line 1: ulimit: command not found\n" - unknown + case proplists:get_value(max_fds, erlang:system_info(check_io)) of + MaxFds when is_integer(MaxFds) andalso MaxFds > 1 -> + case os:type() of + {win32, _OsName} -> + %% On Windows max_fds is twice the number of open files: + %% https://github.com/yrashk/erlang/blob/e1282325ed75e52a98d5/erts/emulator/sys/win32/sys.c#L2459-2466 + MaxFds div 2; + _Any -> + %% For other operating systems trust Erlang. + MaxFds end; _ -> unknown diff --git a/src/gen_server2.erl b/src/gen_server2.erl index a637dddd..35258139 100644 --- a/src/gen_server2.erl +++ b/src/gen_server2.erl @@ -58,6 +58,20 @@ %% hibernate the process immediately, as it would if backoff wasn't %% being used. Instead it'll wait for the current timeout as described %% above. +%% +%% 7) The callback module can return from any of the handle_* +%% functions, a {become, Module, State} triple, or a {become, Module, +%% State, Timeout} quadruple. This allows the gen_server to +%% dynamically change the callback module. The State is the new state +%% which will be passed into any of the callback functions in the new +%% module. Note there is no form also encompassing a reply, thus if +%% you wish to reply in handle_call/3 and change the callback module, +%% you need to use gen_server2:reply/2 to issue the reply manually. +%% +%% 8) The callback module can optionally implement +%% format_message_queue/2 which is the equivalent of format_status/2 +%% but where the second argument is specifically the priority_queue +%% which contains the prioritised message_queue. %% All modifications are (C) 2009-2011 VMware, Inc. @@ -444,8 +458,8 @@ unregister_name({global,Name}) -> _ = global:unregister_name(Name); unregister_name(Pid) when is_pid(Pid) -> Pid; -% Under R12 let's just ignore it, as we have a single term as Name. -% On R13 it will never get here, as we get tuple with 'local/global' atom. +%% Under R12 let's just ignore it, as we have a single term as Name. +%% On R13 it will never get here, as we get tuple with 'local/global' atom. unregister_name(_Name) -> ok. extend_backoff(undefined) -> @@ -584,41 +598,35 @@ adjust_timeout_state(SleptAt, AwokeAt, {backoff, CurrentTO, MinimumTO, CurrentTO1 = Base + Extra, {backoff, CurrentTO1, MinimumTO, DesiredHibPeriod, RandomState1}. -in({'$gen_cast', Msg}, GS2State = #gs2_state { prioritise_cast = PC, - queue = Queue }) -> - GS2State #gs2_state { queue = priority_queue:in( - {'$gen_cast', Msg}, - PC(Msg, GS2State), Queue) }; -in({'$gen_call', From, Msg}, GS2State = #gs2_state { prioritise_call = PC, - queue = Queue }) -> - GS2State #gs2_state { queue = priority_queue:in( - {'$gen_call', From, Msg}, - PC(Msg, From, GS2State), Queue) }; -in(Input, GS2State = #gs2_state { prioritise_info = PI, queue = Queue }) -> - GS2State #gs2_state { queue = priority_queue:in( - Input, PI(Input, GS2State), Queue) }. - -process_msg(Msg, - GS2State = #gs2_state { parent = Parent, - name = Name, - debug = Debug }) -> - case Msg of - {system, From, Req} -> - sys:handle_system_msg( - Req, From, Parent, ?MODULE, Debug, - GS2State); - %% gen_server puts Hib on the end as the 7th arg, but that - %% version of the function seems not to be documented so - %% leaving out for now. - {'EXIT', Parent, Reason} -> - terminate(Reason, Msg, GS2State); - _Msg when Debug =:= [] -> - handle_msg(Msg, GS2State); - _Msg -> - Debug1 = sys:handle_debug(Debug, fun print_event/3, - Name, {in, Msg}), - handle_msg(Msg, GS2State #gs2_state { debug = Debug1 }) - end. +in({'$gen_cast', Msg} = Input, + GS2State = #gs2_state { prioritise_cast = PC }) -> + in(Input, PC(Msg, GS2State), GS2State); +in({'$gen_call', From, Msg} = Input, + GS2State = #gs2_state { prioritise_call = PC }) -> + in(Input, PC(Msg, From, GS2State), GS2State); +in({'EXIT', Parent, _R} = Input, GS2State = #gs2_state { parent = Parent }) -> + in(Input, infinity, GS2State); +in({system, _From, _Req} = Input, GS2State) -> + in(Input, infinity, GS2State); +in(Input, GS2State = #gs2_state { prioritise_info = PI }) -> + in(Input, PI(Input, GS2State), GS2State). + +in(Input, Priority, GS2State = #gs2_state { queue = Queue }) -> + GS2State # gs2_state { queue = priority_queue:in(Input, Priority, Queue) }. + +process_msg({system, From, Req}, + GS2State = #gs2_state { parent = Parent, debug = Debug }) -> + sys:handle_system_msg(Req, From, Parent, ?MODULE, Debug, GS2State); +process_msg({'EXIT', Parent, Reason} = Msg, + GS2State = #gs2_state { parent = Parent }) -> + %% gen_server puts Hib on the end as the 7th arg, but that version + %% of the fun seems not to be documented so leaving out for now. + terminate(Reason, Msg, GS2State); +process_msg(Msg, GS2State = #gs2_state { debug = [] }) -> + handle_msg(Msg, GS2State); +process_msg(Msg, GS2State = #gs2_state { name = Name, debug = Debug }) -> + Debug1 = sys:handle_debug(Debug, fun print_event/3, Name, {in, Msg}), + handle_msg(Msg, GS2State #gs2_state { debug = Debug1 }). %%% --------------------------------------------------- %%% Send/recive functions @@ -880,6 +888,22 @@ handle_common_reply(Reply, Msg, GS2State = #gs2_state { name = Name, loop(GS2State #gs2_state { state = NState, time = Time1, debug = Debug1 }); + {become, Mod, NState} -> + Debug1 = common_debug(Debug, fun print_event/3, Name, + {become, Mod, NState}), + loop(find_prioritisers( + GS2State #gs2_state { mod = Mod, + state = NState, + time = infinity, + debug = Debug1 })); + {become, Mod, NState, Time1} -> + Debug1 = common_debug(Debug, fun print_event/3, Name, + {become, Mod, NState}), + loop(find_prioritisers( + GS2State #gs2_state { mod = Mod, + state = NState, + time = Time1, + debug = Debug1 })); _ -> handle_common_termination(Reply, Msg, GS2State) end. @@ -1136,17 +1160,22 @@ format_status(Opt, StatusData) -> end, Header = lists:concat(["Status for generic server ", NameTag]), Log = sys:get_debug(log, Debug, []), - Specfic = - case erlang:function_exported(Mod, format_status, 2) of - true -> case catch Mod:format_status(Opt, [PDict, State]) of - {'EXIT', _} -> [{data, [{"State", State}]}]; - Else -> Else - end; - _ -> [{data, [{"State", State}]}] - end, + Specfic = callback(Mod, format_status, [Opt, [PDict, State]], + fun () -> [{data, [{"State", State}]}] end), + Messages = callback(Mod, format_message_queue, [Opt, Queue], + fun () -> priority_queue:to_list(Queue) end), [{header, Header}, {data, [{"Status", SysState}, {"Parent", Parent}, {"Logged events", Log}, - {"Queued messages", priority_queue:to_list(Queue)}]} | + {"Queued messages", Messages}]} | Specfic]. + +callback(Mod, FunName, Args, DefaultThunk) -> + case erlang:function_exported(Mod, FunName, length(Args)) of + true -> case catch apply(Mod, FunName, Args) of + {'EXIT', _} -> DefaultThunk(); + Success -> Success + end; + false -> DefaultThunk() + end. diff --git a/src/gm.erl b/src/gm.erl new file mode 100644 index 00000000..8b7dc70c --- /dev/null +++ b/src/gm.erl @@ -0,0 +1,1379 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is VMware, Inc. +%% Copyright (c) 2007-2011 VMware, Inc. All rights reserved. +%% + +-module(gm). + +%% Guaranteed Multicast +%% ==================== +%% +%% This module provides the ability to create named groups of +%% processes to which members can be dynamically added and removed, +%% and for messages to be broadcast within the group that are +%% guaranteed to reach all members of the group during the lifetime of +%% the message. The lifetime of a message is defined as being, at a +%% minimum, the time from which the message is first sent to any +%% member of the group, up until the time at which it is known by the +%% member who published the message that the message has reached all +%% group members. +%% +%% The guarantee given is that provided a message, once sent, makes it +%% to members who do not all leave the group, the message will +%% continue to propagate to all group members. +%% +%% Another way of stating the guarantee is that if member P publishes +%% messages m and m', then for all members P', if P' is a member of +%% the group prior to the publication of m, and P' receives m', then +%% P' will receive m. +%% +%% Note that only local-ordering is enforced: i.e. if member P sends +%% message m and then message m', then for-all members P', if P' +%% receives m and m', then they will receive m' after m. Causality +%% ordering is _not_ enforced. I.e. if member P receives message m +%% and as a result publishes message m', there is no guarantee that +%% other members P' will receive m before m'. +%% +%% +%% API Use +%% ------- +%% +%% Mnesia must be started. Use the idempotent create_tables/0 function +%% to create the tables required. +%% +%% start_link/3 +%% Provide the group name, the callback module name, and any arguments +%% you wish to be passed into the callback module's functions. The +%% joined/2 function will be called when we have joined the group, +%% with the arguments passed to start_link and a list of the current +%% members of the group. See the comments in behaviour_info/1 below +%% for further details of the callback functions. +%% +%% leave/1 +%% Provide the Pid. Removes the Pid from the group. The callback +%% terminate/2 function will be called. +%% +%% broadcast/2 +%% Provide the Pid and a Message. The message will be sent to all +%% members of the group as per the guarantees given above. This is a +%% cast and the function call will return immediately. There is no +%% guarantee that the message will reach any member of the group. +%% +%% confirmed_broadcast/2 +%% Provide the Pid and a Message. As per broadcast/2 except that this +%% is a call, not a cast, and only returns 'ok' once the Message has +%% reached every member of the group. Do not call +%% confirmed_broadcast/2 directly from the callback module otherwise +%% you will deadlock the entire group. +%% +%% group_members/1 +%% Provide the Pid. Returns a list of the current group members. +%% +%% +%% Implementation Overview +%% ----------------------- +%% +%% One possible means of implementation would be a fan-out from the +%% sender to every member of the group. This would require that the +%% group is fully connected, and, in the event that the original +%% sender of the message disappears from the group before the message +%% has made it to every member of the group, raises questions as to +%% who is responsible for sending on the message to new group members. +%% In particular, the issue is with [ Pid ! Msg || Pid <- Members ] - +%% if the sender dies part way through, who is responsible for +%% ensuring that the remaining Members receive the Msg? In the event +%% that within the group, messages sent are broadcast from a subset of +%% the members, the fan-out arrangement has the potential to +%% substantially impact the CPU and network workload of such members, +%% as such members would have to accommodate the cost of sending each +%% message to every group member. +%% +%% Instead, if the members of the group are arranged in a chain, then +%% it becomes easier to reason about who within the group has received +%% each message and who has not. It eases issues of responsibility: in +%% the event of a group member disappearing, the nearest upstream +%% member of the chain is responsible for ensuring that messages +%% continue to propagate down the chain. It also results in equal +%% distribution of sending and receiving workload, even if all +%% messages are being sent from just a single group member. This +%% configuration has the further advantage that it is not necessary +%% for every group member to know of every other group member, and +%% even that a group member does not have to be accessible from all +%% other group members. +%% +%% Performance is kept high by permitting pipelining and all +%% communication between joined group members is asynchronous. In the +%% chain A -> B -> C -> D, if A sends a message to the group, it will +%% not directly contact C or D. However, it must know that D receives +%% the message (in addition to B and C) before it can consider the +%% message fully sent. A simplistic implementation would require that +%% D replies to C, C replies to B and B then replies to A. This would +%% result in a propagation delay of twice the length of the chain. It +%% would also require, in the event of the failure of C, that D knows +%% to directly contact B and issue the necessary replies. Instead, the +%% chain forms a ring: D sends the message on to A: D does not +%% distinguish A as the sender, merely as the next member (downstream) +%% within the chain (which has now become a ring). When A receives +%% from D messages that A sent, it knows that all members have +%% received the message. However, the message is not dead yet: if C +%% died as B was sending to C, then B would need to detect the death +%% of C and forward the message on to D instead: thus every node has +%% to remember every message published until it is told that it can +%% forget about the message. This is essential not just for dealing +%% with failure of members, but also for the addition of new members. +%% +%% Thus once A receives the message back again, it then sends to B an +%% acknowledgement for the message, indicating that B can now forget +%% about the message. B does so, and forwards the ack to C. C forgets +%% the message, and forwards the ack to D, which forgets the message +%% and finally forwards the ack back to A. At this point, A takes no +%% further action: the message and its acknowledgement have made it to +%% every member of the group. The message is now dead, and any new +%% member joining the group at this point will not receive the +%% message. +%% +%% We therefore have two roles: +%% +%% 1. The sender, who upon receiving their own messages back, must +%% then send out acknowledgements, and upon receiving their own +%% acknowledgements back perform no further action. +%% +%% 2. The other group members who upon receiving messages and +%% acknowledgements must update their own internal state accordingly +%% (the sending member must also do this in order to be able to +%% accommodate failures), and forwards messages on to their downstream +%% neighbours. +%% +%% +%% Implementation: It gets trickier +%% -------------------------------- +%% +%% Chain A -> B -> C -> D +%% +%% A publishes a message which B receives. A now dies. B and D will +%% detect the death of A, and will link up, thus the chain is now B -> +%% C -> D. B forwards A's message on to C, who forwards it to D, who +%% forwards it to B. Thus B is now responsible for A's messages - both +%% publications and acknowledgements that were in flight at the point +%% at which A died. Even worse is that this is transitive: after B +%% forwards A's message to C, B dies as well. Now C is not only +%% responsible for B's in-flight messages, but is also responsible for +%% A's in-flight messages. +%% +%% Lemma 1: A member can only determine which dead members they have +%% inherited responsibility for if there is a total ordering on the +%% conflicting additions and subtractions of members from the group. +%% +%% Consider the simultaneous death of B and addition of B' that +%% transitions a chain from A -> B -> C to A -> B' -> C. Either B' or +%% C is responsible for in-flight messages from B. It is easy to +%% ensure that at least one of them thinks they have inherited B, but +%% if we do not ensure that exactly one of them inherits B, then we +%% could have B' converting publishes to acks, which then will crash C +%% as C does not believe it has issued acks for those messages. +%% +%% More complex scenarios are easy to concoct: A -> B -> C -> D -> E +%% becoming A -> C' -> E. Who has inherited which of B, C and D? +%% +%% However, for non-conflicting membership changes, only a partial +%% ordering is required. For example, A -> B -> C becoming A -> A' -> +%% B. The addition of A', between A and B can have no conflicts with +%% the death of C: it is clear that A has inherited C's messages. +%% +%% For ease of implementation, we adopt the simple solution, of +%% imposing a total order on all membership changes. +%% +%% On the death of a member, it is ensured the dead member's +%% neighbours become aware of the death, and the upstream neighbour +%% now sends to its new downstream neighbour its state, including the +%% messages pending acknowledgement. The downstream neighbour can then +%% use this to calculate which publishes and acknowledgements it has +%% missed out on, due to the death of its old upstream. Thus the +%% downstream can catch up, and continues the propagation of messages +%% through the group. +%% +%% Lemma 2: When a member is joining, it must synchronously +%% communicate with its upstream member in order to receive its +%% starting state atomically with its addition to the group. +%% +%% New members must start with the same state as their nearest +%% upstream neighbour. This ensures that it is not surprised by +%% acknowledgements they are sent, and that should their downstream +%% neighbour die, they are able to send the correct state to their new +%% downstream neighbour to ensure it can catch up. Thus in the +%% transition A -> B -> C becomes A -> A' -> B -> C becomes A -> A' -> +%% C, A' must start with the state of A, so that it can send C the +%% correct state when B dies, allowing C to detect any missed +%% messages. +%% +%% If A' starts by adding itself to the group membership, A could then +%% die, without A' having received the necessary state from A. This +%% would leave A' responsible for in-flight messages from A, but +%% having the least knowledge of all, of those messages. Thus A' must +%% start by synchronously calling A, which then immediately sends A' +%% back its state. A then adds A' to the group. If A dies at this +%% point then A' will be able to see this (as A' will fail to appear +%% in the group membership), and thus A' will ignore the state it +%% receives from A, and will simply repeat the process, trying to now +%% join downstream from some other member. This ensures that should +%% the upstream die as soon as the new member has been joined, the new +%% member is guaranteed to receive the correct state, allowing it to +%% correctly process messages inherited due to the death of its +%% upstream neighbour. +%% +%% The canonical definition of the group membership is held by a +%% distributed database. Whilst this allows the total ordering of +%% changes to be achieved, it is nevertheless undesirable to have to +%% query this database for the current view, upon receiving each +%% message. Instead, we wish for members to be able to cache a view of +%% the group membership, which then requires a cache invalidation +%% mechanism. Each member maintains its own view of the group +%% membership. Thus when the group's membership changes, members may +%% need to become aware of such changes in order to be able to +%% accurately process messages they receive. Because of the +%% requirement of a total ordering of conflicting membership changes, +%% it is not possible to use the guaranteed broadcast mechanism to +%% communicate these changes: to achieve the necessary ordering, it +%% would be necessary for such messages to be published by exactly one +%% member, which can not be guaranteed given that such a member could +%% die. +%% +%% The total ordering we enforce on membership changes gives rise to a +%% view version number: every change to the membership creates a +%% different view, and the total ordering permits a simple +%% monotonically increasing view version number. +%% +%% Lemma 3: If a message is sent from a member that holds view version +%% N, it can be correctly processed by any member receiving the +%% message with a view version >= N. +%% +%% Initially, let us suppose that each view contains the ordering of +%% every member that was ever part of the group. Dead members are +%% marked as such. Thus we have a ring of members, some of which are +%% dead, and are thus inherited by the nearest alive downstream +%% member. +%% +%% In the chain A -> B -> C, all three members initially have view +%% version 1, which reflects reality. B publishes a message, which is +%% forward by C to A. B now dies, which A notices very quickly. Thus A +%% updates the view, creating version 2. It now forwards B's +%% publication, sending that message to its new downstream neighbour, +%% C. This happens before C is aware of the death of B. C must become +%% aware of the view change before it interprets the message its +%% received, otherwise it will fail to learn of the death of B, and +%% thus will not realise it has inherited B's messages (and will +%% likely crash). +%% +%% Thus very simply, we have that each subsequent view contains more +%% information than the preceding view. +%% +%% However, to avoid the views growing indefinitely, we need to be +%% able to delete members which have died _and_ for which no messages +%% are in-flight. This requires that upon inheriting a dead member, we +%% know the last publication sent by the dead member (this is easy: we +%% inherit a member because we are the nearest downstream member which +%% implies that we know at least as much than everyone else about the +%% publications of the dead member), and we know the earliest message +%% for which the acknowledgement is still in flight. +%% +%% In the chain A -> B -> C, when B dies, A will send to C its state +%% (as C is the new downstream from A), allowing C to calculate which +%% messages it has missed out on (described above). At this point, C +%% also inherits B's messages. If that state from A also includes the +%% last message published by B for which an acknowledgement has been +%% seen, then C knows exactly which further acknowledgements it must +%% receive (also including issuing acknowledgements for publications +%% still in-flight that it receives), after which it is known there +%% are no more messages in flight for B, thus all evidence that B was +%% ever part of the group can be safely removed from the canonical +%% group membership. +%% +%% Thus, for every message that a member sends, it includes with that +%% message its view version. When a member receives a message it will +%% update its view from the canonical copy, should its view be older +%% than the view version included in the message it has received. +%% +%% The state held by each member therefore includes the messages from +%% each publisher pending acknowledgement, the last publication seen +%% from that publisher, and the last acknowledgement from that +%% publisher. In the case of the member's own publications or +%% inherited members, this last acknowledgement seen state indicates +%% the last acknowledgement retired, rather than sent. +%% +%% +%% Proof sketch +%% ------------ +%% +%% We need to prove that with the provided operational semantics, we +%% can never reach a state that is not well formed from a well-formed +%% starting state. +%% +%% Operational semantics (small step): straight-forward message +%% sending, process monitoring, state updates. +%% +%% Well formed state: dead members inherited by exactly one non-dead +%% member; for every entry in anyone's pending-acks, either (the +%% publication of the message is in-flight downstream from the member +%% and upstream from the publisher) or (the acknowledgement of the +%% message is in-flight downstream from the publisher and upstream +%% from the member). +%% +%% Proof by induction on the applicable operational semantics. +%% +%% +%% Related work +%% ------------ +%% +%% The ring configuration and double traversal of messages around the +%% ring is similar (though developed independently) to the LCR +%% protocol by [Levy 2008]. However, LCR differs in several +%% ways. Firstly, by using vector clocks, it enforces a total order of +%% message delivery, which is unnecessary for our purposes. More +%% significantly, it is built on top of a "group communication system" +%% which performs the group management functions, taking +%% responsibility away from the protocol as to how to cope with safely +%% adding and removing members. When membership changes do occur, the +%% protocol stipulates that every member must perform communication +%% with every other member of the group, to ensure all outstanding +%% deliveries complete, before the entire group transitions to the new +%% view. This, in total, requires two sets of all-to-all synchronous +%% communications. +%% +%% This is not only rather inefficient, but also does not explain what +%% happens upon the failure of a member during this process. It does +%% though entirely avoid the need for inheritance of responsibility of +%% dead members that our protocol incorporates. +%% +%% In [Marandi et al 2010], a Paxos-based protocol is described. This +%% work explicitly focuses on the efficiency of communication. LCR +%% (and our protocol too) are more efficient, but at the cost of +%% higher latency. The Ring-Paxos protocol is itself built on top of +%% IP-multicast, which rules it out for many applications where +%% point-to-point communication is all that can be required. They also +%% have an excellent related work section which I really ought to +%% read... +%% +%% +%% [Levy 2008] The Complexity of Reliable Distributed Storage, 2008. +%% [Marandi et al 2010] Ring Paxos: A High-Throughput Atomic Broadcast +%% Protocol + + +-behaviour(gen_server2). + +-export([create_tables/0, start_link/3, leave/1, broadcast/2, + confirmed_broadcast/2, group_members/1]). + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3, prioritise_cast/2, prioritise_info/2]). + +-export([behaviour_info/1]). + +-export([table_definitions/0, flush/1]). + +-define(GROUP_TABLE, gm_group). +-define(HIBERNATE_AFTER_MIN, 1000). +-define(DESIRED_HIBERNATE, 10000). +-define(BROADCAST_TIMER, 25). +-define(SETS, ordsets). +-define(DICT, orddict). + +-record(state, + { self, + left, + right, + group_name, + module, + view, + pub_count, + members_state, + callback_args, + confirms, + broadcast_buffer, + broadcast_timer + }). + +-record(gm_group, { name, version, members }). + +-record(view_member, { id, aliases, left, right }). + +-record(member, { pending_ack, last_pub, last_ack }). + +-define(TABLE, {?GROUP_TABLE, [{record_name, gm_group}, + {attributes, record_info(fields, gm_group)}]}). +-define(TABLE_MATCH, {match, #gm_group { _ = '_' }}). + +-define(TAG, '$gm'). + +-ifdef(use_specs). + +-export_type([group_name/0]). + +-type(group_name() :: any()). + +-spec(create_tables/0 :: () -> 'ok'). +-spec(start_link/3 :: (group_name(), atom(), any()) -> + {'ok', pid()} | {'error', any()}). +-spec(leave/1 :: (pid()) -> 'ok'). +-spec(broadcast/2 :: (pid(), any()) -> 'ok'). +-spec(confirmed_broadcast/2 :: (pid(), any()) -> 'ok'). +-spec(group_members/1 :: (pid()) -> [pid()]). + +-endif. + +behaviour_info(callbacks) -> + [ + %% The joined, members_changed and handle_msg callbacks can all + %% return any of the following terms: + %% + %% 'ok' - the callback function returns normally + %% + %% {'stop', Reason} - the callback indicates the member should + %% stop with reason Reason and should leave the group. + %% + %% {'become', Module, Args} - the callback indicates that the + %% callback module should be changed to Module and that the + %% callback functions should now be passed the arguments + %% Args. This allows the callback module to be dynamically + %% changed. + + %% Called when we've successfully joined the group. Supplied with + %% Args provided in start_link, plus current group members. + {joined, 2}, + + %% Supplied with Args provided in start_link, the list of new + %% members and the list of members previously known to us that + %% have since died. Note that if a member joins and dies very + %% quickly, it's possible that we will never see that member + %% appear in either births or deaths. However we are guaranteed + %% that (1) we will see a member joining either in the births + %% here, or in the members passed to joined/2 before receiving + %% any messages from it; and (2) we will not see members die that + %% we have not seen born (or supplied in the members to + %% joined/2). + {members_changed, 3}, + + %% Supplied with Args provided in start_link, the sender, and the + %% message. This does get called for messages injected by this + %% member, however, in such cases, there is no special + %% significance of this invocation: it does not indicate that the + %% message has made it to any other members, let alone all other + %% members. + {handle_msg, 3}, + + %% Called on gm member termination as per rules in gen_server, + %% with the Args provided in start_link plus the termination + %% Reason. + {terminate, 2} + ]; +behaviour_info(_Other) -> + undefined. + +create_tables() -> + create_tables([?TABLE]). + +create_tables([]) -> + ok; +create_tables([{Table, Attributes} | Tables]) -> + case mnesia:create_table(Table, Attributes) of + {atomic, ok} -> create_tables(Tables); + {aborted, {already_exists, gm_group}} -> create_tables(Tables); + Err -> Err + end. + +table_definitions() -> + {Name, Attributes} = ?TABLE, + [{Name, [?TABLE_MATCH | Attributes]}]. + +start_link(GroupName, Module, Args) -> + gen_server2:start_link(?MODULE, [GroupName, Module, Args], []). + +leave(Server) -> + gen_server2:cast(Server, leave). + +broadcast(Server, Msg) -> + gen_server2:cast(Server, {broadcast, Msg}). + +confirmed_broadcast(Server, Msg) -> + gen_server2:call(Server, {confirmed_broadcast, Msg}, infinity). + +group_members(Server) -> + gen_server2:call(Server, group_members, infinity). + +flush(Server) -> + gen_server2:cast(Server, flush). + + +init([GroupName, Module, Args]) -> + {MegaSecs, Secs, MicroSecs} = now(), + random:seed(MegaSecs, Secs, MicroSecs), + gen_server2:cast(self(), join), + Self = self(), + {ok, #state { self = Self, + left = {Self, undefined}, + right = {Self, undefined}, + group_name = GroupName, + module = Module, + view = undefined, + pub_count = 0, + members_state = undefined, + callback_args = Args, + confirms = queue:new(), + broadcast_buffer = [], + broadcast_timer = undefined }, hibernate, + {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. + + +handle_call({confirmed_broadcast, _Msg}, _From, + State = #state { members_state = undefined }) -> + reply(not_joined, State); + +handle_call({confirmed_broadcast, Msg}, _From, + State = #state { self = Self, + right = {Self, undefined}, + module = Module, + callback_args = Args }) -> + handle_callback_result({Module:handle_msg(Args, Self, Msg), ok, State}); + +handle_call({confirmed_broadcast, Msg}, From, State) -> + internal_broadcast(Msg, From, State); + +handle_call(group_members, _From, + State = #state { members_state = undefined }) -> + reply(not_joined, State); + +handle_call(group_members, _From, State = #state { view = View }) -> + reply(alive_view_members(View), State); + +handle_call({add_on_right, _NewMember}, _From, + State = #state { members_state = undefined }) -> + reply(not_ready, State); + +handle_call({add_on_right, NewMember}, _From, + State = #state { self = Self, + group_name = GroupName, + view = View, + members_state = MembersState, + module = Module, + callback_args = Args }) -> + Group = record_new_member_in_group( + GroupName, Self, NewMember, + fun (Group1) -> + View1 = group_to_view(Group1), + ok = send_right(NewMember, View1, + {catchup, Self, prepare_members_state( + MembersState)}) + end), + View2 = group_to_view(Group), + State1 = check_neighbours(State #state { view = View2 }), + Result = callback_view_changed(Args, Module, View, View2), + handle_callback_result({Result, {ok, Group}, State1}). + + +handle_cast({?TAG, ReqVer, Msg}, + State = #state { view = View, + group_name = GroupName, + module = Module, + callback_args = Args }) -> + {Result, State1} = + case needs_view_update(ReqVer, View) of + true -> + View1 = group_to_view(read_group(GroupName)), + {callback_view_changed(Args, Module, View, View1), + check_neighbours(State #state { view = View1 })}; + false -> + {ok, State} + end, + handle_callback_result( + if_callback_success( + Result, fun handle_msg_true/3, fun handle_msg_false/3, Msg, State1)); + +handle_cast({broadcast, _Msg}, State = #state { members_state = undefined }) -> + noreply(State); + +handle_cast({broadcast, Msg}, + State = #state { self = Self, + right = {Self, undefined}, + module = Module, + callback_args = Args }) -> + handle_callback_result({Module:handle_msg(Args, Self, Msg), State}); + +handle_cast({broadcast, Msg}, State) -> + internal_broadcast(Msg, none, State); + +handle_cast(join, State = #state { self = Self, + group_name = GroupName, + members_state = undefined, + module = Module, + callback_args = Args }) -> + View = join_group(Self, GroupName), + MembersState = + case alive_view_members(View) of + [Self] -> blank_member_state(); + _ -> undefined + end, + State1 = check_neighbours(State #state { view = View, + members_state = MembersState }), + handle_callback_result( + {Module:joined(Args, all_known_members(View)), State1}); + +handle_cast(leave, State) -> + {stop, normal, State}; + +handle_cast(flush, State) -> + noreply( + flush_broadcast_buffer(State #state { broadcast_timer = undefined })). + + +handle_info({'DOWN', MRef, process, _Pid, _Reason}, + State = #state { self = Self, + left = Left, + right = Right, + group_name = GroupName, + view = View, + module = Module, + callback_args = Args, + confirms = Confirms }) -> + Member = case {Left, Right} of + {{Member1, MRef}, _} -> Member1; + {_, {Member1, MRef}} -> Member1; + _ -> undefined + end, + case Member of + undefined -> + noreply(State); + _ -> + View1 = + group_to_view(record_dead_member_in_group(Member, GroupName)), + State1 = State #state { view = View1 }, + {Result, State2} = + case alive_view_members(View1) of + [Self] -> + maybe_erase_aliases( + State1 #state { + members_state = blank_member_state(), + confirms = purge_confirms(Confirms) }); + _ -> + %% here we won't be pointing out any deaths: + %% the concern is that there maybe births + %% which we'd otherwise miss. + {callback_view_changed(Args, Module, View, View1), + State1} + end, + handle_callback_result({Result, check_neighbours(State2)}) + end. + + +terminate(Reason, State = #state { module = Module, + callback_args = Args }) -> + flush_broadcast_buffer(State), + Module:terminate(Args, Reason). + + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +prioritise_cast(flush, _State) -> 1; +prioritise_cast(_ , _State) -> 0. + +prioritise_info({'DOWN', _MRef, process, _Pid, _Reason}, _State) -> 1; +prioritise_info(_ , _State) -> 0. + + +handle_msg(check_neighbours, State) -> + %% no-op - it's already been done by the calling handle_cast + {ok, State}; + +handle_msg({catchup, Left, MembersStateLeft}, + State = #state { self = Self, + left = {Left, _MRefL}, + right = {Right, _MRefR}, + view = View, + members_state = undefined }) -> + ok = send_right(Right, View, {catchup, Self, MembersStateLeft}), + MembersStateLeft1 = build_members_state(MembersStateLeft), + {ok, State #state { members_state = MembersStateLeft1 }}; + +handle_msg({catchup, Left, MembersStateLeft}, + State = #state { self = Self, + left = {Left, _MRefL}, + view = View, + members_state = MembersState }) + when MembersState =/= undefined -> + MembersStateLeft1 = build_members_state(MembersStateLeft), + AllMembers = lists:usort(?DICT:fetch_keys(MembersState) ++ + ?DICT:fetch_keys(MembersStateLeft1)), + {MembersState1, Activity} = + lists:foldl( + fun (Id, MembersStateActivity) -> + #member { pending_ack = PALeft, last_ack = LA } = + find_member_or_blank(Id, MembersStateLeft1), + with_member_acc( + fun (#member { pending_ack = PA } = Member, Activity1) -> + case is_member_alias(Id, Self, View) of + true -> + {_AcksInFlight, Pubs, _PA1} = + find_prefix_common_suffix(PALeft, PA), + {Member #member { last_ack = LA }, + activity_cons(Id, pubs_from_queue(Pubs), + [], Activity1)}; + false -> + {Acks, _Common, Pubs} = + find_prefix_common_suffix(PA, PALeft), + {Member, + activity_cons(Id, pubs_from_queue(Pubs), + acks_from_queue(Acks), + Activity1)} + end + end, Id, MembersStateActivity) + end, {MembersState, activity_nil()}, AllMembers), + handle_msg({activity, Left, activity_finalise(Activity)}, + State #state { members_state = MembersState1 }); + +handle_msg({catchup, _NotLeft, _MembersState}, State) -> + {ok, State}; + +handle_msg({activity, Left, Activity}, + State = #state { self = Self, + left = {Left, _MRefL}, + view = View, + members_state = MembersState, + confirms = Confirms }) + when MembersState =/= undefined -> + {MembersState1, {Confirms1, Activity1}} = + lists:foldl( + fun ({Id, Pubs, Acks}, MembersStateConfirmsActivity) -> + with_member_acc( + fun (Member = #member { pending_ack = PA, + last_pub = LP, + last_ack = LA }, + {Confirms2, Activity2}) -> + case is_member_alias(Id, Self, View) of + true -> + {ToAck, PA1} = + find_common(queue_from_pubs(Pubs), PA, + queue:new()), + LA1 = last_ack(Acks, LA), + AckNums = acks_from_queue(ToAck), + Confirms3 = maybe_confirm( + Self, Id, Confirms2, AckNums), + {Member #member { pending_ack = PA1, + last_ack = LA1 }, + {Confirms3, + activity_cons( + Id, [], AckNums, Activity2)}}; + false -> + PA1 = apply_acks(Acks, join_pubs(PA, Pubs)), + LA1 = last_ack(Acks, LA), + LP1 = last_pub(Pubs, LP), + {Member #member { pending_ack = PA1, + last_pub = LP1, + last_ack = LA1 }, + {Confirms2, + activity_cons(Id, Pubs, Acks, Activity2)}} + end + end, Id, MembersStateConfirmsActivity) + end, {MembersState, {Confirms, activity_nil()}}, Activity), + State1 = State #state { members_state = MembersState1, + confirms = Confirms1 }, + Activity3 = activity_finalise(Activity1), + {Result, State2} = maybe_erase_aliases(State1), + ok = maybe_send_activity(Activity3, State2), + if_callback_success( + Result, fun activity_true/3, fun activity_false/3, Activity3, State2); + +handle_msg({activity, _NotLeft, _Activity}, State) -> + {ok, State}. + + +noreply(State) -> + {noreply, ensure_broadcast_timer(State), hibernate}. + +reply(Reply, State) -> + {reply, Reply, ensure_broadcast_timer(State), hibernate}. + +ensure_broadcast_timer(State = #state { broadcast_buffer = [], + broadcast_timer = undefined }) -> + State; +ensure_broadcast_timer(State = #state { broadcast_buffer = [], + broadcast_timer = TRef }) -> + timer:cancel(TRef), + State #state { broadcast_timer = undefined }; +ensure_broadcast_timer(State = #state { broadcast_timer = undefined }) -> + {ok, TRef} = timer:apply_after(?BROADCAST_TIMER, ?MODULE, flush, [self()]), + State #state { broadcast_timer = TRef }; +ensure_broadcast_timer(State) -> + State. + +internal_broadcast(Msg, From, State = #state { self = Self, + pub_count = PubCount, + module = Module, + confirms = Confirms, + callback_args = Args, + broadcast_buffer = Buffer }) -> + Result = Module:handle_msg(Args, Self, Msg), + Buffer1 = [{PubCount, Msg} | Buffer], + Confirms1 = case From of + none -> Confirms; + _ -> queue:in({PubCount, From}, Confirms) + end, + State1 = State #state { pub_count = PubCount + 1, + confirms = Confirms1, + broadcast_buffer = Buffer1 }, + case From =/= none of + true -> + handle_callback_result({Result, flush_broadcast_buffer(State1)}); + false -> + handle_callback_result( + {Result, State1 #state { broadcast_buffer = Buffer1 }}) + end. + +flush_broadcast_buffer(State = #state { broadcast_buffer = [] }) -> + State; +flush_broadcast_buffer(State = #state { self = Self, + members_state = MembersState, + broadcast_buffer = Buffer }) -> + Pubs = lists:reverse(Buffer), + Activity = activity_cons(Self, Pubs, [], activity_nil()), + ok = maybe_send_activity(activity_finalise(Activity), State), + MembersState1 = with_member( + fun (Member = #member { pending_ack = PA }) -> + PA1 = queue:join(PA, queue:from_list(Pubs)), + Member #member { pending_ack = PA1 } + end, Self, MembersState), + State #state { members_state = MembersState1, + broadcast_buffer = [] }. + + +%% --------------------------------------------------------------------------- +%% View construction and inspection +%% --------------------------------------------------------------------------- + +needs_view_update(ReqVer, {Ver, _View}) -> + Ver < ReqVer. + +view_version({Ver, _View}) -> + Ver. + +is_member_alive({dead, _Member}) -> false; +is_member_alive(_) -> true. + +is_member_alias(Self, Self, _View) -> + true; +is_member_alias(Member, Self, View) -> + ?SETS:is_element(Member, + ((fetch_view_member(Self, View)) #view_member.aliases)). + +dead_member_id({dead, Member}) -> Member. + +store_view_member(VMember = #view_member { id = Id }, {Ver, View}) -> + {Ver, ?DICT:store(Id, VMember, View)}. + +with_view_member(Fun, View, Id) -> + store_view_member(Fun(fetch_view_member(Id, View)), View). + +fetch_view_member(Id, {_Ver, View}) -> + ?DICT:fetch(Id, View). + +find_view_member(Id, {_Ver, View}) -> + ?DICT:find(Id, View). + +blank_view(Ver) -> + {Ver, ?DICT:new()}. + +alive_view_members({_Ver, View}) -> + ?DICT:fetch_keys(View). + +all_known_members({_Ver, View}) -> + ?DICT:fold( + fun (Member, #view_member { aliases = Aliases }, Acc) -> + ?SETS:to_list(Aliases) ++ [Member | Acc] + end, [], View). + +group_to_view(#gm_group { members = Members, version = Ver }) -> + Alive = lists:filter(fun is_member_alive/1, Members), + [_|_] = Alive, %% ASSERTION - can't have all dead members + add_aliases(link_view(Alive ++ Alive ++ Alive, blank_view(Ver)), Members). + +link_view([Left, Middle, Right | Rest], View) -> + case find_view_member(Middle, View) of + error -> + link_view( + [Middle, Right | Rest], + store_view_member(#view_member { id = Middle, + aliases = ?SETS:new(), + left = Left, + right = Right }, View)); + {ok, _} -> + View + end; +link_view(_, View) -> + View. + +add_aliases(View, Members) -> + Members1 = ensure_alive_suffix(Members), + {EmptyDeadSet, View1} = + lists:foldl( + fun (Member, {DeadAcc, ViewAcc}) -> + case is_member_alive(Member) of + true -> + {?SETS:new(), + with_view_member( + fun (VMember = + #view_member { aliases = Aliases }) -> + VMember #view_member { + aliases = ?SETS:union(Aliases, DeadAcc) } + end, ViewAcc, Member)}; + false -> + {?SETS:add_element(dead_member_id(Member), DeadAcc), + ViewAcc} + end + end, {?SETS:new(), View}, Members1), + 0 = ?SETS:size(EmptyDeadSet), %% ASSERTION + View1. + +ensure_alive_suffix(Members) -> + queue:to_list(ensure_alive_suffix1(queue:from_list(Members))). + +ensure_alive_suffix1(MembersQ) -> + {{value, Member}, MembersQ1} = queue:out_r(MembersQ), + case is_member_alive(Member) of + true -> MembersQ; + false -> ensure_alive_suffix1(queue:in_r(Member, MembersQ1)) + end. + + +%% --------------------------------------------------------------------------- +%% View modification +%% --------------------------------------------------------------------------- + +join_group(Self, GroupName) -> + join_group(Self, GroupName, read_group(GroupName)). + +join_group(Self, GroupName, {error, not_found}) -> + join_group(Self, GroupName, prune_or_create_group(Self, GroupName)); +join_group(Self, _GroupName, #gm_group { members = [Self] } = Group) -> + group_to_view(Group); +join_group(Self, GroupName, #gm_group { members = Members } = Group) -> + case lists:member(Self, Members) of + true -> + group_to_view(Group); + false -> + case lists:filter(fun is_member_alive/1, Members) of + [] -> + join_group(Self, GroupName, + prune_or_create_group(Self, GroupName)); + Alive -> + Left = lists:nth(random:uniform(length(Alive)), Alive), + Handler = + fun () -> + join_group( + Self, GroupName, + record_dead_member_in_group(Left, GroupName)) + end, + try + case gen_server2:call( + Left, {add_on_right, Self}, infinity) of + {ok, Group1} -> group_to_view(Group1); + not_ready -> join_group(Self, GroupName) + end + catch + exit:{R, _} + when R =:= noproc; R =:= normal; R =:= shutdown -> + Handler(); + exit:{{R, _}, _} + when R =:= nodedown; R =:= shutdown -> + Handler() + end + end + end. + +read_group(GroupName) -> + case mnesia:dirty_read(?GROUP_TABLE, GroupName) of + [] -> {error, not_found}; + [Group] -> Group + end. + +prune_or_create_group(Self, GroupName) -> + {atomic, Group} = + mnesia:sync_transaction( + fun () -> GroupNew = #gm_group { name = GroupName, + members = [Self], + version = 0 }, + case mnesia:read({?GROUP_TABLE, GroupName}) of + [] -> + mnesia:write(GroupNew), + GroupNew; + [Group1 = #gm_group { members = Members }] -> + case lists:any(fun is_member_alive/1, Members) of + true -> Group1; + false -> mnesia:write(GroupNew), + GroupNew + end + end + end), + Group. + +record_dead_member_in_group(Member, GroupName) -> + {atomic, Group} = + mnesia:sync_transaction( + fun () -> [Group1 = #gm_group { members = Members, version = Ver }] = + mnesia:read({?GROUP_TABLE, GroupName}), + case lists:splitwith( + fun (Member1) -> Member1 =/= Member end, Members) of + {_Members1, []} -> %% not found - already recorded dead + Group1; + {Members1, [Member | Members2]} -> + Members3 = Members1 ++ [{dead, Member} | Members2], + Group2 = Group1 #gm_group { members = Members3, + version = Ver + 1 }, + mnesia:write(Group2), + Group2 + end + end), + Group. + +record_new_member_in_group(GroupName, Left, NewMember, Fun) -> + {atomic, Group} = + mnesia:sync_transaction( + fun () -> + [#gm_group { members = Members, version = Ver } = Group1] = + mnesia:read({?GROUP_TABLE, GroupName}), + {Prefix, [Left | Suffix]} = + lists:splitwith(fun (M) -> M =/= Left end, Members), + Members1 = Prefix ++ [Left, NewMember | Suffix], + Group2 = Group1 #gm_group { members = Members1, + version = Ver + 1 }, + ok = Fun(Group2), + mnesia:write(Group2), + Group2 + end), + Group. + +erase_members_in_group(Members, GroupName) -> + DeadMembers = [{dead, Id} || Id <- Members], + {atomic, Group} = + mnesia:sync_transaction( + fun () -> + [Group1 = #gm_group { members = [_|_] = Members1, + version = Ver }] = + mnesia:read({?GROUP_TABLE, GroupName}), + case Members1 -- DeadMembers of + Members1 -> Group1; + Members2 -> Group2 = + Group1 #gm_group { members = Members2, + version = Ver + 1 }, + mnesia:write(Group2), + Group2 + end + end), + Group. + +maybe_erase_aliases(State = #state { self = Self, + group_name = GroupName, + view = View, + members_state = MembersState, + module = Module, + callback_args = Args }) -> + #view_member { aliases = Aliases } = fetch_view_member(Self, View), + {Erasable, MembersState1} + = ?SETS:fold( + fun (Id, {ErasableAcc, MembersStateAcc} = Acc) -> + #member { last_pub = LP, last_ack = LA } = + find_member_or_blank(Id, MembersState), + case can_erase_view_member(Self, Id, LA, LP) of + true -> {[Id | ErasableAcc], + erase_member(Id, MembersStateAcc)}; + false -> Acc + end + end, {[], MembersState}, Aliases), + State1 = State #state { members_state = MembersState1 }, + case Erasable of + [] -> {ok, State1}; + _ -> View1 = group_to_view( + erase_members_in_group(Erasable, GroupName)), + {callback_view_changed(Args, Module, View, View1), + State1 #state { view = View1 }} + end. + +can_erase_view_member(Self, Self, _LA, _LP) -> false; +can_erase_view_member(_Self, _Id, N, N) -> true; +can_erase_view_member(_Self, _Id, _LA, _LP) -> false. + + +%% --------------------------------------------------------------------------- +%% View monitoring and maintanence +%% --------------------------------------------------------------------------- + +ensure_neighbour(_Ver, Self, {Self, undefined}, Self) -> + {Self, undefined}; +ensure_neighbour(Ver, Self, {Self, undefined}, RealNeighbour) -> + ok = gen_server2:cast(RealNeighbour, {?TAG, Ver, check_neighbours}), + {RealNeighbour, maybe_monitor(RealNeighbour, Self)}; +ensure_neighbour(_Ver, _Self, {RealNeighbour, MRef}, RealNeighbour) -> + {RealNeighbour, MRef}; +ensure_neighbour(Ver, Self, {RealNeighbour, MRef}, Neighbour) -> + true = erlang:demonitor(MRef), + Msg = {?TAG, Ver, check_neighbours}, + ok = gen_server2:cast(RealNeighbour, Msg), + ok = case Neighbour of + Self -> ok; + _ -> gen_server2:cast(Neighbour, Msg) + end, + {Neighbour, maybe_monitor(Neighbour, Self)}. + +maybe_monitor(Self, Self) -> + undefined; +maybe_monitor(Other, _Self) -> + erlang:monitor(process, Other). + +check_neighbours(State = #state { self = Self, + left = Left, + right = Right, + view = View, + broadcast_buffer = Buffer }) -> + #view_member { left = VLeft, right = VRight } + = fetch_view_member(Self, View), + Ver = view_version(View), + Left1 = ensure_neighbour(Ver, Self, Left, VLeft), + Right1 = ensure_neighbour(Ver, Self, Right, VRight), + Buffer1 = case Right1 of + {Self, undefined} -> []; + _ -> Buffer + end, + State1 = State #state { left = Left1, right = Right1, + broadcast_buffer = Buffer1 }, + ok = maybe_send_catchup(Right, State1), + State1. + +maybe_send_catchup(Right, #state { right = Right }) -> + ok; +maybe_send_catchup(_Right, #state { self = Self, + right = {Self, undefined} }) -> + ok; +maybe_send_catchup(_Right, #state { members_state = undefined }) -> + ok; +maybe_send_catchup(_Right, #state { self = Self, + right = {Right, _MRef}, + view = View, + members_state = MembersState }) -> + send_right(Right, View, + {catchup, Self, prepare_members_state(MembersState)}). + + +%% --------------------------------------------------------------------------- +%% Catch_up delta detection +%% --------------------------------------------------------------------------- + +find_prefix_common_suffix(A, B) -> + {Prefix, A1} = find_prefix(A, B, queue:new()), + {Common, Suffix} = find_common(A1, B, queue:new()), + {Prefix, Common, Suffix}. + +%% Returns the elements of A that occur before the first element of B, +%% plus the remainder of A. +find_prefix(A, B, Prefix) -> + case {queue:out(A), queue:out(B)} of + {{{value, Val}, _A1}, {{value, Val}, _B1}} -> + {Prefix, A}; + {{empty, A1}, {{value, _A}, _B1}} -> + {Prefix, A1}; + {{{value, {NumA, _MsgA} = Val}, A1}, + {{value, {NumB, _MsgB}}, _B1}} when NumA < NumB -> + find_prefix(A1, B, queue:in(Val, Prefix)); + {_, {empty, _B1}} -> + {A, Prefix} %% Prefix well be empty here + end. + +%% A should be a prefix of B. Returns the commonality plus the +%% remainder of B. +find_common(A, B, Common) -> + case {queue:out(A), queue:out(B)} of + {{{value, Val}, A1}, {{value, Val}, B1}} -> + find_common(A1, B1, queue:in(Val, Common)); + {{empty, _A}, _} -> + {Common, B} + end. + + +%% --------------------------------------------------------------------------- +%% Members helpers +%% --------------------------------------------------------------------------- + +with_member(Fun, Id, MembersState) -> + store_member( + Id, Fun(find_member_or_blank(Id, MembersState)), MembersState). + +with_member_acc(Fun, Id, {MembersState, Acc}) -> + {MemberState, Acc1} = Fun(find_member_or_blank(Id, MembersState), Acc), + {store_member(Id, MemberState, MembersState), Acc1}. + +find_member_or_blank(Id, MembersState) -> + case ?DICT:find(Id, MembersState) of + {ok, Result} -> Result; + error -> blank_member() + end. + +erase_member(Id, MembersState) -> + ?DICT:erase(Id, MembersState). + +blank_member() -> + #member { pending_ack = queue:new(), last_pub = -1, last_ack = -1 }. + +blank_member_state() -> + ?DICT:new(). + +store_member(Id, MemberState, MembersState) -> + ?DICT:store(Id, MemberState, MembersState). + +prepare_members_state(MembersState) -> + ?DICT:to_list(MembersState). + +build_members_state(MembersStateList) -> + ?DICT:from_list(MembersStateList). + + +%% --------------------------------------------------------------------------- +%% Activity assembly +%% --------------------------------------------------------------------------- + +activity_nil() -> + queue:new(). + +activity_cons(_Id, [], [], Tail) -> + Tail; +activity_cons(Sender, Pubs, Acks, Tail) -> + queue:in({Sender, Pubs, Acks}, Tail). + +activity_finalise(Activity) -> + queue:to_list(Activity). + +maybe_send_activity([], _State) -> + ok; +maybe_send_activity(Activity, #state { self = Self, + right = {Right, _MRefR}, + view = View }) -> + send_right(Right, View, {activity, Self, Activity}). + +send_right(Right, View, Msg) -> + ok = gen_server2:cast(Right, {?TAG, view_version(View), Msg}). + +callback(Args, Module, Activity) -> + lists:foldl( + fun ({Id, Pubs, _Acks}, ok) -> + lists:foldl(fun ({_PubNum, Pub}, ok) -> + Module:handle_msg(Args, Id, Pub); + (_, Error) -> + Error + end, ok, Pubs); + (_, Error) -> + Error + end, ok, Activity). + +callback_view_changed(Args, Module, OldView, NewView) -> + OldMembers = all_known_members(OldView), + NewMembers = all_known_members(NewView), + Births = NewMembers -- OldMembers, + Deaths = OldMembers -- NewMembers, + case {Births, Deaths} of + {[], []} -> ok; + _ -> Module:members_changed(Args, Births, Deaths) + end. + +handle_callback_result({Result, State}) -> + if_callback_success( + Result, fun no_reply_true/3, fun no_reply_false/3, undefined, State); +handle_callback_result({Result, Reply, State}) -> + if_callback_success( + Result, fun reply_true/3, fun reply_false/3, Reply, State). + +no_reply_true (_Result, _Undefined, State) -> noreply(State). +no_reply_false({stop, Reason}, _Undefined, State) -> {stop, Reason, State}. + +reply_true (_Result, Reply, State) -> reply(Reply, State). +reply_false({stop, Reason}, Reply, State) -> {stop, Reason, Reply, State}. + +handle_msg_true (_Result, Msg, State) -> handle_msg(Msg, State). +handle_msg_false(Result, _Msg, State) -> {Result, State}. + +activity_true(_Result, Activity, State = #state { module = Module, + callback_args = Args }) -> + {callback(Args, Module, Activity), State}. +activity_false(Result, _Activity, State) -> + {Result, State}. + +if_callback_success(ok, True, _False, Arg, State) -> + True(ok, Arg, State); +if_callback_success( + {become, Module, Args} = Result, True, _False, Arg, State) -> + True(Result, Arg, State #state { module = Module, + callback_args = Args }); +if_callback_success({stop, _Reason} = Result, _True, False, Arg, State) -> + False(Result, Arg, State). + +maybe_confirm(_Self, _Id, Confirms, []) -> + Confirms; +maybe_confirm(Self, Self, Confirms, [PubNum | PubNums]) -> + case queue:out(Confirms) of + {empty, _Confirms} -> + Confirms; + {{value, {PubNum, From}}, Confirms1} -> + gen_server2:reply(From, ok), + maybe_confirm(Self, Self, Confirms1, PubNums); + {{value, {PubNum1, _From}}, _Confirms} when PubNum1 > PubNum -> + maybe_confirm(Self, Self, Confirms, PubNums) + end; +maybe_confirm(_Self, _Id, Confirms, _PubNums) -> + Confirms. + +purge_confirms(Confirms) -> + [gen_server2:reply(From, ok) || {_PubNum, From} <- queue:to_list(Confirms)], + queue:new(). + + +%% --------------------------------------------------------------------------- +%% Msg transformation +%% --------------------------------------------------------------------------- + +acks_from_queue(Q) -> + [PubNum || {PubNum, _Msg} <- queue:to_list(Q)]. + +pubs_from_queue(Q) -> + queue:to_list(Q). + +queue_from_pubs(Pubs) -> + queue:from_list(Pubs). + +apply_acks([], Pubs) -> + Pubs; +apply_acks(List, Pubs) -> + {_, Pubs1} = queue:split(length(List), Pubs), + Pubs1. + +join_pubs(Q, []) -> Q; +join_pubs(Q, Pubs) -> queue:join(Q, queue_from_pubs(Pubs)). + +last_ack([], LA) -> + LA; +last_ack(List, LA) -> + LA1 = lists:last(List), + true = LA1 > LA, %% ASSERTION + LA1. + +last_pub([], LP) -> + LP; +last_pub(List, LP) -> + {PubNum, _Msg} = lists:last(List), + true = PubNum > LP, %% ASSERTION + PubNum. diff --git a/src/gm_soak_test.erl b/src/gm_soak_test.erl new file mode 100644 index 00000000..5e5a3a5a --- /dev/null +++ b/src/gm_soak_test.erl @@ -0,0 +1,131 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is VMware, Inc. +%% Copyright (c) 2007-2011 VMware, Inc. All rights reserved. +%% + +-module(gm_soak_test). + +-export([test/0]). +-export([joined/2, members_changed/3, handle_msg/3, terminate/2]). + +-behaviour(gm). + +-include("gm_specs.hrl"). + +%% --------------------------------------------------------------------------- +%% Soak test +%% --------------------------------------------------------------------------- + +get_state() -> + get(state). + +with_state(Fun) -> + put(state, Fun(get_state())). + +inc() -> + case 1 + get(count) of + 100000 -> Now = now(), + Start = put(ts, Now), + Diff = timer:now_diff(Now, Start), + Rate = 100000 / (Diff / 1000000), + io:format("~p seeing ~p msgs/sec~n", [self(), Rate]), + put(count, 0); + N -> put(count, N) + end. + +joined([], Members) -> + io:format("Joined ~p (~p members)~n", [self(), length(Members)]), + put(state, dict:from_list([{Member, empty} || Member <- Members])), + put(count, 0), + put(ts, now()), + ok. + +members_changed([], Births, Deaths) -> + with_state( + fun (State) -> + State1 = + lists:foldl( + fun (Born, StateN) -> + false = dict:is_key(Born, StateN), + dict:store(Born, empty, StateN) + end, State, Births), + lists:foldl( + fun (Died, StateN) -> + true = dict:is_key(Died, StateN), + dict:store(Died, died, StateN) + end, State1, Deaths) + end), + ok. + +handle_msg([], From, {test_msg, Num}) -> + inc(), + with_state( + fun (State) -> + ok = case dict:find(From, State) of + {ok, died} -> + exit({{from, From}, + {received_posthumous_delivery, Num}}); + {ok, empty} -> ok; + {ok, Num} -> ok; + {ok, Num1} when Num < Num1 -> + exit({{from, From}, + {duplicate_delivery_of, Num}, + {expecting, Num1}}); + {ok, Num1} -> + exit({{from, From}, + {received_early, Num}, + {expecting, Num1}}); + error -> + exit({{from, From}, + {received_premature_delivery, Num}}) + end, + dict:store(From, Num + 1, State) + end), + ok. + +terminate([], Reason) -> + io:format("Left ~p (~p)~n", [self(), Reason]), + ok. + +spawn_member() -> + spawn_link( + fun () -> + {MegaSecs, Secs, MicroSecs} = now(), + random:seed(MegaSecs, Secs, MicroSecs), + %% start up delay of no more than 10 seconds + timer:sleep(random:uniform(10000)), + {ok, Pid} = gm:start_link(?MODULE, ?MODULE, []), + Start = random:uniform(10000), + send_loop(Pid, Start, Start + random:uniform(10000)), + gm:leave(Pid), + spawn_more() + end). + +spawn_more() -> + [spawn_member() || _ <- lists:seq(1, 4 - random:uniform(4))]. + +send_loop(_Pid, Target, Target) -> + ok; +send_loop(Pid, Count, Target) when Target > Count -> + case random:uniform(3) of + 3 -> gm:confirmed_broadcast(Pid, {test_msg, Count}); + _ -> gm:broadcast(Pid, {test_msg, Count}) + end, + timer:sleep(random:uniform(5) - 1), %% sleep up to 4 ms + send_loop(Pid, Count + 1, Target). + +test() -> + ok = gm:create_tables(), + spawn_member(), + spawn_member(). diff --git a/src/gm_speed_test.erl b/src/gm_speed_test.erl new file mode 100644 index 00000000..defb0f29 --- /dev/null +++ b/src/gm_speed_test.erl @@ -0,0 +1,82 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is VMware, Inc. +%% Copyright (c) 2007-2011 VMware, Inc. All rights reserved. +%% + +-module(gm_speed_test). + +-export([test/3]). +-export([joined/2, members_changed/3, handle_msg/3, terminate/2]). +-export([wile_e_coyote/2]). + +-behaviour(gm). + +-include("gm_specs.hrl"). + +%% callbacks + +joined(Owner, _Members) -> + Owner ! joined, + ok. + +members_changed(_Owner, _Births, _Deaths) -> + ok. + +handle_msg(Owner, _From, ping) -> + Owner ! ping, + ok. + +terminate(Owner, _Reason) -> + Owner ! terminated, + ok. + +%% other + +wile_e_coyote(Time, WriteUnit) -> + {ok, Pid} = gm:start_link(?MODULE, ?MODULE, self()), + receive joined -> ok end, + timer:sleep(1000), %% wait for all to join + timer:send_after(Time, stop), + Start = now(), + {Sent, Received} = loop(Pid, WriteUnit, 0, 0), + End = now(), + ok = gm:leave(Pid), + receive terminated -> ok end, + Elapsed = timer:now_diff(End, Start) / 1000000, + io:format("Sending rate: ~p msgs/sec~nReceiving rate: ~p msgs/sec~n~n", + [Sent/Elapsed, Received/Elapsed]), + ok. + +loop(Pid, WriteUnit, Sent, Received) -> + case read(Received) of + {stop, Received1} -> {Sent, Received1}; + {ok, Received1} -> ok = write(Pid, WriteUnit), + loop(Pid, WriteUnit, Sent + WriteUnit, Received1) + end. + +read(Count) -> + receive + ping -> read(Count + 1); + stop -> {stop, Count} + after 5 -> + {ok, Count} + end. + +write(_Pid, 0) -> ok; +write(Pid, N) -> ok = gm:broadcast(Pid, ping), + write(Pid, N - 1). + +test(Time, WriteUnit, Nodes) -> + ok = gm:create_tables(), + [spawn(Node, ?MODULE, wile_e_coyote, [Time, WriteUnit]) || Node <- Nodes]. diff --git a/src/gm_tests.erl b/src/gm_tests.erl new file mode 100644 index 00000000..ca0ffd64 --- /dev/null +++ b/src/gm_tests.erl @@ -0,0 +1,182 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is VMware, Inc. +%% Copyright (c) 2007-2011 VMware, Inc. All rights reserved. +%% + +-module(gm_tests). + +-export([test_join_leave/0, + test_broadcast/0, + test_confirmed_broadcast/0, + test_member_death/0, + test_receive_in_order/0, + all_tests/0]). +-export([joined/2, members_changed/3, handle_msg/3, terminate/2]). + +-behaviour(gm). + +-include("gm_specs.hrl"). + +-define(RECEIVE_OR_THROW(Body, Bool, Error), + receive Body -> + true = Bool, + passed + after 1000 -> + throw(Error) + end). + +joined(Pid, Members) -> + Pid ! {joined, self(), Members}, + ok. + +members_changed(Pid, Births, Deaths) -> + Pid ! {members_changed, self(), Births, Deaths}, + ok. + +handle_msg(Pid, From, Msg) -> + Pid ! {msg, self(), From, Msg}, + ok. + +terminate(Pid, Reason) -> + Pid ! {termination, self(), Reason}, + ok. + +%% --------------------------------------------------------------------------- +%% Functional tests +%% --------------------------------------------------------------------------- + +all_tests() -> + passed = test_join_leave(), + passed = test_broadcast(), + passed = test_confirmed_broadcast(), + passed = test_member_death(), + passed = test_receive_in_order(), + passed. + +test_join_leave() -> + with_two_members(fun (_Pid, _Pid2) -> passed end). + +test_broadcast() -> + test_broadcast(fun gm:broadcast/2). + +test_confirmed_broadcast() -> + test_broadcast(fun gm:confirmed_broadcast/2). + +test_member_death() -> + with_two_members( + fun (Pid, Pid2) -> + {ok, Pid3} = gm:start_link(?MODULE, ?MODULE, self()), + passed = receive_joined(Pid3, [Pid, Pid2, Pid3], + timeout_joining_gm_group_3), + passed = receive_birth(Pid, Pid3, timeout_waiting_for_birth_3_1), + passed = receive_birth(Pid2, Pid3, timeout_waiting_for_birth_3_2), + + unlink(Pid3), + exit(Pid3, kill), + + %% Have to do some broadcasts to ensure that all members + %% find out about the death. + passed = (test_broadcast_fun(fun gm:confirmed_broadcast/2))( + Pid, Pid2), + + passed = receive_death(Pid, Pid3, timeout_waiting_for_death_3_1), + passed = receive_death(Pid2, Pid3, timeout_waiting_for_death_3_2), + + passed + end). + +test_receive_in_order() -> + with_two_members( + fun (Pid, Pid2) -> + Numbers = lists:seq(1,1000), + [begin ok = gm:broadcast(Pid, N), ok = gm:broadcast(Pid2, N) end + || N <- Numbers], + passed = receive_numbers( + Pid, Pid, {timeout_for_msgs, Pid, Pid}, Numbers), + passed = receive_numbers( + Pid, Pid2, {timeout_for_msgs, Pid, Pid2}, Numbers), + passed = receive_numbers( + Pid2, Pid, {timeout_for_msgs, Pid2, Pid}, Numbers), + passed = receive_numbers( + Pid2, Pid2, {timeout_for_msgs, Pid2, Pid2}, Numbers), + passed + end). + +test_broadcast(Fun) -> + with_two_members(test_broadcast_fun(Fun)). + +test_broadcast_fun(Fun) -> + fun (Pid, Pid2) -> + ok = Fun(Pid, magic_message), + passed = receive_or_throw({msg, Pid, Pid, magic_message}, + timeout_waiting_for_msg), + passed = receive_or_throw({msg, Pid2, Pid, magic_message}, + timeout_waiting_for_msg) + end. + +with_two_members(Fun) -> + ok = gm:create_tables(), + + {ok, Pid} = gm:start_link(?MODULE, ?MODULE, self()), + passed = receive_joined(Pid, [Pid], timeout_joining_gm_group_1), + + {ok, Pid2} = gm:start_link(?MODULE, ?MODULE, self()), + passed = receive_joined(Pid2, [Pid, Pid2], timeout_joining_gm_group_2), + passed = receive_birth(Pid, Pid2, timeout_waiting_for_birth_2), + + passed = Fun(Pid, Pid2), + + ok = gm:leave(Pid), + passed = receive_death(Pid2, Pid, timeout_waiting_for_death_1), + passed = + receive_termination(Pid, normal, timeout_waiting_for_termination_1), + + ok = gm:leave(Pid2), + passed = + receive_termination(Pid2, normal, timeout_waiting_for_termination_2), + + receive X -> throw({unexpected_message, X}) + after 0 -> passed + end. + +receive_or_throw(Pattern, Error) -> + ?RECEIVE_OR_THROW(Pattern, true, Error). + +receive_birth(From, Born, Error) -> + ?RECEIVE_OR_THROW({members_changed, From, Birth, Death}, + ([Born] == Birth) andalso ([] == Death), + Error). + +receive_death(From, Died, Error) -> + ?RECEIVE_OR_THROW({members_changed, From, Birth, Death}, + ([] == Birth) andalso ([Died] == Death), + Error). + +receive_joined(From, Members, Error) -> + ?RECEIVE_OR_THROW({joined, From, Members1}, + lists:usort(Members) == lists:usort(Members1), + Error). + +receive_termination(From, Reason, Error) -> + ?RECEIVE_OR_THROW({termination, From, Reason1}, + Reason == Reason1, + Error). + +receive_numbers(_Pid, _Sender, _Error, []) -> + passed; +receive_numbers(Pid, Sender, Error, [N | Numbers]) -> + ?RECEIVE_OR_THROW({msg, Pid, Sender, M}, + M == N, + Error), + receive_numbers(Pid, Sender, Error, Numbers). diff --git a/src/pg_local.erl b/src/pg_local.erl index fd515747..c9c3a3a7 100644 --- a/src/pg_local.erl +++ b/src/pg_local.erl @@ -83,7 +83,7 @@ get_members(Name) -> sync() -> ensure_started(), - gen_server:call(?MODULE, sync). + gen_server:call(?MODULE, sync, infinity). %%% %%% Callback functions from gen_server diff --git a/src/priority_queue.erl b/src/priority_queue.erl index 4a94b24b..4fc8b469 100644 --- a/src/priority_queue.erl +++ b/src/priority_queue.erl @@ -47,7 +47,10 @@ -ifdef(use_specs). --type(priority() :: integer()). +-export_type([q/0]). + +-type(q() :: pqueue()). +-type(priority() :: integer() | 'infinity'). -type(squeue() :: {queue, [any()], [any()]}). -type(pqueue() :: squeue() | {pqueue, [{priority(), squeue()}]}). @@ -71,8 +74,9 @@ new() -> is_queue({queue, R, F}) when is_list(R), is_list(F) -> true; is_queue({pqueue, Queues}) when is_list(Queues) -> - lists:all(fun ({P, Q}) -> is_integer(P) andalso is_queue(Q) end, - Queues); + lists:all(fun ({infinity, Q}) -> is_queue(Q); + ({P, Q}) -> is_integer(P) andalso is_queue(Q) + end, Queues); is_queue(_) -> false. @@ -89,7 +93,8 @@ len({pqueue, Queues}) -> to_list({queue, In, Out}) when is_list(In), is_list(Out) -> [{0, V} || V <- Out ++ lists:reverse(In, [])]; to_list({pqueue, Queues}) -> - [{-P, V} || {P, Q} <- Queues, {0, V} <- to_list(Q)]. + [{maybe_negate_priority(P), V} || {P, Q} <- Queues, + {0, V} <- to_list(Q)]. in(Item, Q) -> in(Item, 0, Q). @@ -103,12 +108,20 @@ in(X, Priority, _Q = {queue, [], []}) -> in(X, Priority, Q = {queue, _, _}) -> in(X, Priority, {pqueue, [{0, Q}]}); in(X, Priority, {pqueue, Queues}) -> - P = -Priority, + P = maybe_negate_priority(Priority), {pqueue, case lists:keysearch(P, 1, Queues) of {value, {_, Q}} -> lists:keyreplace(P, 1, Queues, {P, in(X, Q)}); + false when P == infinity -> + [{P, {queue, [X], []}} | Queues]; false -> - lists:keysort(1, [{P, {queue, [X], []}} | Queues]) + case Queues of + [{infinity, InfQueue} | Queues1] -> + [{infinity, InfQueue} | + lists:keysort(1, [{P, {queue, [X], []}} | Queues1])]; + _ -> + lists:keysort(1, [{P, {queue, [X], []}} | Queues]) + end end}. out({queue, [], []} = Q) -> @@ -141,7 +154,8 @@ join({queue, [], []}, B) -> join({queue, AIn, AOut}, {queue, BIn, BOut}) -> {queue, BIn, AOut ++ lists:reverse(AIn, BOut)}; join(A = {queue, _, _}, {pqueue, BPQ}) -> - {Pre, Post} = lists:splitwith(fun ({P, _}) -> P < 0 end, BPQ), + {Pre, Post} = + lists:splitwith(fun ({P, _}) -> P < 0 orelse P == infinity end, BPQ), Post1 = case Post of [] -> [ {0, A} ]; [ {0, ZeroQueue} | Rest ] -> [ {0, join(A, ZeroQueue)} | Rest ]; @@ -149,7 +163,8 @@ join(A = {queue, _, _}, {pqueue, BPQ}) -> end, {pqueue, Pre ++ Post1}; join({pqueue, APQ}, B = {queue, _, _}) -> - {Pre, Post} = lists:splitwith(fun ({P, _}) -> P < 0 end, APQ), + {Pre, Post} = + lists:splitwith(fun ({P, _}) -> P < 0 orelse P == infinity end, APQ), Post1 = case Post of [] -> [ {0, B} ]; [ {0, ZeroQueue} | Rest ] -> [ {0, join(ZeroQueue, B)} | Rest ]; @@ -165,7 +180,7 @@ merge(APQ, [], Acc) -> lists:reverse(Acc, APQ); merge([{P, A}|As], [{P, B}|Bs], Acc) -> merge(As, Bs, [ {P, join(A, B)} | Acc ]); -merge([{PA, A}|As], Bs = [{PB, _}|_], Acc) when PA < PB -> +merge([{PA, A}|As], Bs = [{PB, _}|_], Acc) when PA < PB orelse PA == infinity -> merge(As, Bs, [ {PA, A} | Acc ]); merge(As = [{_, _}|_], [{PB, B}|Bs], Acc) -> merge(As, Bs, [ {PB, B} | Acc ]). @@ -174,3 +189,6 @@ r2f([]) -> {queue, [], []}; r2f([_] = R) -> {queue, [], R}; r2f([X,Y]) -> {queue, [X], [Y]}; r2f([X,Y|R]) -> {queue, [X,Y], lists:reverse(R, [])}. + +maybe_negate_priority(infinity) -> infinity; +maybe_negate_priority(P) -> -P. diff --git a/src/rabbit.erl b/src/rabbit.erl index c6661d39..e067607d 100644 --- a/src/rabbit.erl +++ b/src/rabbit.erl @@ -18,37 +18,43 @@ -behaviour(application). --export([prepare/0, start/0, stop/0, stop_and_halt/0, status/0, +-export([prepare/0, start/0, stop/0, stop_and_halt/0, status/0, environment/0, rotate_logs/1]). -export([start/2, stop/1]). --export([log_location/1]). +-export([log_location/1]). %% for testing %%--------------------------------------------------------------------------- %% Boot steps. --export([maybe_insert_default_data/0]). +-export([maybe_insert_default_data/0, boot_delegate/0, recover/0]). + +-rabbit_boot_step({pre_boot, [{description, "rabbit boot start"}]}). -rabbit_boot_step({codec_correctness_check, [{description, "codec correctness check"}, {mfa, {rabbit_binary_generator, check_empty_content_body_frame_size, []}}, + {requires, pre_boot}, {enables, external_infrastructure}]}). -rabbit_boot_step({database, [{mfa, {rabbit_mnesia, init, []}}, + {requires, file_handle_cache}, {enables, external_infrastructure}]}). -rabbit_boot_step({file_handle_cache, [{description, "file handle cache server"}, {mfa, {rabbit_sup, start_restartable_child, [file_handle_cache]}}, + {requires, pre_boot}, {enables, worker_pool}]}). -rabbit_boot_step({worker_pool, [{description, "worker pool"}, {mfa, {rabbit_sup, start_child, [worker_pool_sup]}}, + {requires, pre_boot}, {enables, external_infrastructure}]}). -rabbit_boot_step({external_infrastructure, @@ -101,8 +107,7 @@ -rabbit_boot_step({delegate_sup, [{description, "cluster delegate"}, - {mfa, {rabbit_sup, start_child, - [delegate_sup]}}, + {mfa, {rabbit, boot_delegate, []}}, {requires, kernel_ready}, {enables, core_initialized}]}). @@ -123,16 +128,22 @@ {requires, core_initialized}, {enables, routing_ready}]}). --rabbit_boot_step({exchange_recovery, - [{description, "exchange recovery"}, - {mfa, {rabbit_exchange, recover, []}}, +-rabbit_boot_step({recovery, + [{description, "exchange, queue and binding recovery"}, + {mfa, {rabbit, recover, []}}, {requires, empty_db_check}, {enables, routing_ready}]}). --rabbit_boot_step({queue_sup_queue_recovery, - [{description, "queue supervisor and queue recovery"}, - {mfa, {rabbit_amqqueue, start, []}}, - {requires, empty_db_check}, +-rabbit_boot_step({mirror_queue_slave_sup, + [{description, "mirror queue slave sup"}, + {mfa, {rabbit_mirror_queue_slave_sup, start, []}}, + {requires, recovery}, + {enables, routing_ready}]}). + +-rabbit_boot_step({mirrored_queues, + [{description, "adding mirrors to queues"}, + {mfa, {rabbit_mirror_queue_misc, on_node_up, []}}, + {requires, mirror_queue_slave_sup}, {enables, routing_ready}]}). -rabbit_boot_step({routing_ready, @@ -153,6 +164,11 @@ [{mfa, {rabbit_networking, boot, []}}, {requires, log_relay}]}). +-rabbit_boot_step({notify_cluster, + [{description, "notify cluster nodes"}, + {mfa, {rabbit_node_monitor, notify_cluster, []}}, + {requires, networking}]}). + %%--------------------------------------------------------------------------- -include("rabbit_framing.hrl"). @@ -174,29 +190,37 @@ -spec(stop_and_halt/0 :: () -> 'ok'). -spec(rotate_logs/1 :: (file_suffix()) -> rabbit_types:ok_or_error(any())). -spec(status/0 :: - () -> [{running_applications, [{atom(), string(), string()}]} | - {nodes, [{rabbit_mnesia:node_type(), [node()]}]} | - {running_nodes, [node()]}]). + () -> [{pid, integer()} | + {running_applications, [{atom(), string(), string()}]} | + {os, {atom(), atom()}} | + {erlang_version, string()} | + {memory, any()}]). +-spec(environment/0 :: () -> [{atom() | term()}]). -spec(log_location/1 :: ('sasl' | 'kernel') -> log_location()). +-spec(maybe_insert_default_data/0 :: () -> 'ok'). +-spec(boot_delegate/0 :: () -> 'ok'). +-spec(recover/0 :: () -> 'ok'). + -endif. %%---------------------------------------------------------------------------- prepare() -> - ok = ensure_working_log_handlers(). + ok = ensure_working_log_handlers(), + ok = rabbit_upgrade:maybe_upgrade_mnesia(). start() -> try ok = prepare(), - ok = rabbit_misc:start_applications(?APPS) + ok = rabbit_misc:start_applications(application_load_order()) after %%give the error loggers some time to catch up timer:sleep(100) end. stop() -> - ok = rabbit_misc:stop_applications(?APPS). + ok = rabbit_misc:stop_applications(application_load_order()). stop_and_halt() -> try @@ -207,8 +231,16 @@ stop_and_halt() -> ok. status() -> - [{running_applications, application:which_applications()}] ++ - rabbit_mnesia:status(). + [{pid, list_to_integer(os:getpid())}, + {running_applications, application:which_applications()}, + {os, os:type()}, + {erlang_version, erlang:system_info(system_version)}, + {memory, erlang:memory()}]. + +environment() -> + lists:keysort( + 1, [P || P = {K, _} <- application:get_all_env(rabbit), + K =/= default_pass]). rotate_logs(BinarySuffix) -> Suffix = binary_to_list(BinarySuffix), @@ -224,18 +256,20 @@ rotate_logs(BinarySuffix) -> start(normal, []) -> case erts_version_check() of ok -> + ok = rabbit_mnesia:delete_previously_running_nodes(), {ok, SupPid} = rabbit_sup:start_link(), + true = register(rabbit, self()), print_banner(), [ok = run_boot_step(Step) || Step <- boot_steps()], io:format("~nbroker running~n"), - {ok, SupPid}; Error -> Error end. stop(_State) -> + ok = rabbit_mnesia:record_running_nodes(), terminated_ok = error_logger:delete_report_handler(rabbit_error_logger), ok = rabbit_alarm:stop(), ok = case rabbit_mnesia:is_clustered() of @@ -245,20 +279,51 @@ stop(_State) -> ok. %%--------------------------------------------------------------------------- +%% application life cycle + +application_load_order() -> + ok = load_applications(), + {ok, G} = rabbit_misc:build_acyclic_graph( + fun (App, _Deps) -> [{App, App}] end, + fun (App, Deps) -> [{Dep, App} || Dep <- Deps] end, + [{App, app_dependencies(App)} || + {App, _Desc, _Vsn} <- application:loaded_applications()]), + true = digraph:del_vertices( + G, digraph:vertices(G) -- digraph_utils:reachable(?APPS, G)), + Result = digraph_utils:topsort(G), + true = digraph:delete(G), + Result. + +load_applications() -> + load_applications(queue:from_list(?APPS), sets:new()). + +load_applications(Worklist, Loaded) -> + case queue:out(Worklist) of + {empty, _WorkList} -> + ok; + {{value, App}, Worklist1} -> + case sets:is_element(App, Loaded) of + true -> load_applications(Worklist1, Loaded); + false -> case application:load(App) of + ok -> ok; + {error, {already_loaded, App}} -> ok; + Error -> throw(Error) + end, + load_applications( + queue:join(Worklist1, + queue:from_list(app_dependencies(App))), + sets:add_element(App, Loaded)) + end + end. -erts_version_check() -> - FoundVer = erlang:system_info(version), - case rabbit_misc:version_compare(?ERTS_MINIMUM, FoundVer, lte) of - true -> ok; - false -> {error, {erlang_version_too_old, - {found, FoundVer}, {required, ?ERTS_MINIMUM}}} +app_dependencies(App) -> + case application:get_key(App, applications) of + undefined -> []; + {ok, Lst} -> Lst end. -boot_error(Format, Args) -> - io:format("BOOT ERROR: " ++ Format, Args), - error_logger:error_msg(Format, Args), - timer:sleep(1000), - exit({?MODULE, failure_during_boot}). +%%--------------------------------------------------------------------------- +%% boot step logic run_boot_step({StepName, Attributes}) -> Description = case lists:keysearch(description, 1, Attributes) of @@ -333,83 +398,46 @@ sort_boot_steps(UnsortedSteps) -> end]) end. -%%--------------------------------------------------------------------------- +boot_error(Format, Args) -> + io:format("BOOT ERROR: " ++ Format, Args), + error_logger:error_msg(Format, Args), + timer:sleep(1000), + exit({?MODULE, failure_during_boot}). -log_location(Type) -> - case application:get_env(Type, case Type of - kernel -> error_logger; - sasl -> sasl_error_logger - end) of - {ok, {file, File}} -> File; - {ok, false} -> undefined; - {ok, tty} -> tty; - {ok, silent} -> undefined; - {ok, Bad} -> throw({error, {cannot_log_to_file, Bad}}); - _ -> undefined - end. +%%--------------------------------------------------------------------------- +%% boot step functions -app_location() -> - {ok, Application} = application:get_application(), - filename:absname(code:where_is_file(atom_to_list(Application) ++ ".app")). +boot_delegate() -> + {ok, Count} = application:get_env(rabbit, delegate_count), + rabbit_sup:start_child(delegate_sup, [Count]). -home_dir() -> - case init:get_argument(home) of - {ok, [[Home]]} -> Home; - Other -> Other - end. +recover() -> + rabbit_binding:recover(rabbit_exchange:recover(), rabbit_amqqueue:start()). -config_files() -> - case init:get_argument(config) of - {ok, Files} -> [filename:absname( - filename:rootname(File, ".config") ++ ".config") || - File <- Files]; - error -> [] +maybe_insert_default_data() -> + case rabbit_mnesia:is_db_empty() of + true -> insert_default_data(); + false -> ok end. -%--------------------------------------------------------------------------- +insert_default_data() -> + {ok, DefaultUser} = application:get_env(default_user), + {ok, DefaultPass} = application:get_env(default_pass), + {ok, DefaultTags} = application:get_env(default_user_tags), + {ok, DefaultVHost} = application:get_env(default_vhost), + {ok, [DefaultConfigurePerm, DefaultWritePerm, DefaultReadPerm]} = + application:get_env(default_permissions), + ok = rabbit_vhost:add(DefaultVHost), + ok = rabbit_auth_backend_internal:add_user(DefaultUser, DefaultPass), + ok = rabbit_auth_backend_internal:set_tags(DefaultUser, DefaultTags), + ok = rabbit_auth_backend_internal:set_permissions(DefaultUser, DefaultVHost, + DefaultConfigurePerm, + DefaultWritePerm, + DefaultReadPerm), + ok. -print_banner() -> - {ok, Product} = application:get_key(id), - {ok, Version} = application:get_key(vsn), - ProductLen = string:len(Product), - io:format("~n" - "+---+ +---+~n" - "| | | |~n" - "| | | |~n" - "| | | |~n" - "| +---+ +-------+~n" - "| |~n" - "| ~s +---+ |~n" - "| | | |~n" - "| ~s +---+ |~n" - "| |~n" - "+-------------------+~n" - "~s~n~s~n~s~n~n", - [Product, string:right([$v|Version], ProductLen), - ?PROTOCOL_VERSION, - ?COPYRIGHT_MESSAGE, ?INFORMATION_MESSAGE]), - Settings = [{"node", node()}, - {"app descriptor", app_location()}, - {"home dir", home_dir()}, - {"config file(s)", config_files()}, - {"cookie hash", rabbit_misc:cookie_hash()}, - {"log", log_location(kernel)}, - {"sasl log", log_location(sasl)}, - {"database dir", rabbit_mnesia:dir()}, - {"erlang version", erlang:system_info(version)}], - DescrLen = 1 + lists:max([length(K) || {K, _V} <- Settings]), - Format = fun (K, V) -> - io:format("~-" ++ integer_to_list(DescrLen) ++ "s: ~s~n", - [K, V]) - end, - lists:foreach(fun ({"config file(s)" = K, []}) -> - Format(K, "(none)"); - ({"config file(s)" = K, [V0 | Vs]}) -> - Format(K, V0), [Format("", V) || V <- Vs]; - ({K, V}) -> - Format(K, V) - end, Settings), - io:nl(). +%%--------------------------------------------------------------------------- +%% logging ensure_working_log_handlers() -> Handlers = gen_event:which_handlers(error_logger), @@ -448,31 +476,19 @@ ensure_working_log_handler(OldFHandler, NewFHandler, TTYHandler, end end. -maybe_insert_default_data() -> - case rabbit_mnesia:is_db_empty() of - true -> insert_default_data(); - false -> ok +log_location(Type) -> + case application:get_env(Type, case Type of + kernel -> error_logger; + sasl -> sasl_error_logger + end) of + {ok, {file, File}} -> File; + {ok, false} -> undefined; + {ok, tty} -> tty; + {ok, silent} -> undefined; + {ok, Bad} -> throw({error, {cannot_log_to_file, Bad}}); + _ -> undefined end. -insert_default_data() -> - {ok, DefaultUser} = application:get_env(default_user), - {ok, DefaultPass} = application:get_env(default_pass), - {ok, DefaultAdmin} = application:get_env(default_user_is_admin), - {ok, DefaultVHost} = application:get_env(default_vhost), - {ok, [DefaultConfigurePerm, DefaultWritePerm, DefaultReadPerm]} = - application:get_env(default_permissions), - ok = rabbit_vhost:add(DefaultVHost), - ok = rabbit_auth_backend_internal:add_user(DefaultUser, DefaultPass), - case DefaultAdmin of - true -> rabbit_auth_backend_internal:set_admin(DefaultUser); - _ -> ok - end, - ok = rabbit_auth_backend_internal:set_permissions(DefaultUser, DefaultVHost, - DefaultConfigurePerm, - DefaultWritePerm, - DefaultReadPerm), - ok. - rotate_logs(File, Suffix, Handler) -> rotate_logs(File, Suffix, Handler, Handler). @@ -495,3 +511,75 @@ log_rotation_result(ok, {error, SaslLogError}) -> {error, {cannot_rotate_sasl_logs, SaslLogError}}; log_rotation_result(ok, ok) -> ok. + +%%--------------------------------------------------------------------------- +%% misc + +erts_version_check() -> + FoundVer = erlang:system_info(version), + case rabbit_misc:version_compare(?ERTS_MINIMUM, FoundVer, lte) of + true -> ok; + false -> {error, {erlang_version_too_old, + {found, FoundVer}, {required, ?ERTS_MINIMUM}}} + end. + +print_banner() -> + {ok, Product} = application:get_key(id), + {ok, Version} = application:get_key(vsn), + ProductLen = string:len(Product), + io:format("~n" + "+---+ +---+~n" + "| | | |~n" + "| | | |~n" + "| | | |~n" + "| +---+ +-------+~n" + "| |~n" + "| ~s +---+ |~n" + "| | | |~n" + "| ~s +---+ |~n" + "| |~n" + "+-------------------+~n" + "~s~n~s~n~s~n~n", + [Product, string:right([$v|Version], ProductLen), + ?PROTOCOL_VERSION, + ?COPYRIGHT_MESSAGE, ?INFORMATION_MESSAGE]), + Settings = [{"node", node()}, + {"app descriptor", app_location()}, + {"home dir", home_dir()}, + {"config file(s)", config_files()}, + {"cookie hash", rabbit_misc:cookie_hash()}, + {"log", log_location(kernel)}, + {"sasl log", log_location(sasl)}, + {"database dir", rabbit_mnesia:dir()}, + {"erlang version", erlang:system_info(version)}], + DescrLen = 1 + lists:max([length(K) || {K, _V} <- Settings]), + Format = fun (K, V) -> + io:format("~-" ++ integer_to_list(DescrLen) ++ "s: ~s~n", + [K, V]) + end, + lists:foreach(fun ({"config file(s)" = K, []}) -> + Format(K, "(none)"); + ({"config file(s)" = K, [V0 | Vs]}) -> + Format(K, V0), [Format("", V) || V <- Vs]; + ({K, V}) -> + Format(K, V) + end, Settings), + io:nl(). + +app_location() -> + {ok, Application} = application:get_application(), + filename:absname(code:where_is_file(atom_to_list(Application) ++ ".app")). + +home_dir() -> + case init:get_argument(home) of + {ok, [[Home]]} -> Home; + Other -> Other + end. + +config_files() -> + case init:get_argument(config) of + {ok, Files} -> [filename:absname( + filename:rootname(File, ".config") ++ ".config") || + File <- Files]; + error -> [] + end. diff --git a/src/rabbit_access_control.erl b/src/rabbit_access_control.erl index b0b57af4..c0ae18c0 100644 --- a/src/rabbit_access_control.erl +++ b/src/rabbit_access_control.erl @@ -18,21 +18,17 @@ -include("rabbit.hrl"). --export([user_pass_login/2, check_user_pass_login/2, check_user_login/2, - check_vhost_access/2, check_resource_access/3, list_vhosts/2]). +-export([check_user_pass_login/2, check_user_login/2, + check_vhost_access/2, check_resource_access/3]). %%---------------------------------------------------------------------------- -ifdef(use_specs). --export_type([permission_atom/0, vhost_permission_atom/0]). +-export_type([permission_atom/0]). -type(permission_atom() :: 'configure' | 'read' | 'write'). --type(vhost_permission_atom() :: 'read' | 'write'). --spec(user_pass_login/2 :: - (rabbit_types:username(), rabbit_types:password()) - -> rabbit_types:user() | rabbit_types:channel_exit()). -spec(check_user_pass_login/2 :: (rabbit_types:username(), rabbit_types:password()) -> {'ok', rabbit_types:user()} | {'refused', string(), [any()]}). @@ -42,23 +38,11 @@ -spec(check_resource_access/3 :: (rabbit_types:user(), rabbit_types:r(atom()), permission_atom()) -> 'ok' | rabbit_types:channel_exit()). --spec(list_vhosts/2 :: (rabbit_types:user(), vhost_permission_atom()) - -> [rabbit_types:vhost()]). -endif. %%---------------------------------------------------------------------------- -user_pass_login(User, Pass) -> - ?LOGDEBUG("Login with user ~p pass ~p~n", [User, Pass]), - case check_user_pass_login(User, Pass) of - {refused, Msg, Args} -> - rabbit_misc:protocol_error( - access_refused, "login refused: ~s", [io_lib:format(Msg, Args)]); - {ok, U} -> - U - end. - check_user_pass_login(Username, Password) -> check_user_login(Username, [{password, Password}]). @@ -83,7 +67,7 @@ check_vhost_access(User = #user{ username = Username, check_access( fun() -> rabbit_vhost:exists(VHostPath) andalso - Module:check_vhost_access(User, VHostPath, write) + Module:check_vhost_access(User, VHostPath) end, "~s failed checking vhost access to ~s for ~s: ~p~n", [Module, VHostPath, Username], @@ -117,21 +101,3 @@ check_access(Fun, ErrStr, ErrArgs, RefStr, RefArgs) -> false -> rabbit_misc:protocol_error(access_refused, RefStr, RefArgs) end. - -%% Permission = write -> log in -%% Permission = read -> learn of the existence of (only relevant for -%% management plugin) -list_vhosts(User = #user{username = Username, auth_backend = Module}, - Permission) -> - lists:filter( - fun(VHost) -> - case Module:check_vhost_access(User, VHost, Permission) of - {error, _} = E -> - rabbit_log:warning("~w failed checking vhost access " - "to ~s for ~s: ~p~n", - [Module, VHost, Username, E]), - false; - Else -> - Else - end - end, rabbit_vhost:list()). diff --git a/src/rabbit_alarm.erl b/src/rabbit_alarm.erl index 37e40981..d38ecb91 100644 --- a/src/rabbit_alarm.erl +++ b/src/rabbit_alarm.erl @@ -18,12 +18,14 @@ -behaviour(gen_event). --export([start/0, stop/0, register/2]). +-export([start/0, stop/0, register/2, on_node_up/1, on_node_down/1]). -export([init/1, handle_call/2, handle_event/2, handle_info/2, terminate/2, code_change/3]). --record(alarms, {alertees, vm_memory_high_watermark = false}). +-export([remote_conserve_memory/2]). %% Internal use only + +-record(alarms, {alertees, alarmed_nodes}). %%---------------------------------------------------------------------------- @@ -33,6 +35,8 @@ -spec(start/0 :: () -> 'ok'). -spec(stop/0 :: () -> 'ok'). -spec(register/2 :: (pid(), mfa_tuple()) -> boolean()). +-spec(on_node_up/1 :: (node()) -> 'ok'). +-spec(on_node_down/1 :: (node()) -> 'ok'). -endif. @@ -56,39 +60,57 @@ register(Pid, HighMemMFA) -> {register, Pid, HighMemMFA}, infinity). +on_node_up(Node) -> gen_event:notify(alarm_handler, {node_up, Node}). + +on_node_down(Node) -> gen_event:notify(alarm_handler, {node_down, Node}). + +%% Can't use alarm_handler:{set,clear}_alarm because that doesn't +%% permit notifying a remote node. +remote_conserve_memory(Pid, true) -> + gen_event:notify({alarm_handler, node(Pid)}, + {set_alarm, {{vm_memory_high_watermark, node()}, []}}); +remote_conserve_memory(Pid, false) -> + gen_event:notify({alarm_handler, node(Pid)}, + {clear_alarm, {vm_memory_high_watermark, node()}}). + %%---------------------------------------------------------------------------- init([]) -> - {ok, #alarms{alertees = dict:new()}}. + {ok, #alarms{alertees = dict:new(), + alarmed_nodes = sets:new()}}. -handle_call({register, Pid, {M, F, A} = HighMemMFA}, - State = #alarms{alertees = Alertess}) -> - _MRef = erlang:monitor(process, Pid), - ok = case State#alarms.vm_memory_high_watermark of - true -> apply(M, F, A ++ [Pid, true]); - false -> ok - end, - NewAlertees = dict:store(Pid, HighMemMFA, Alertess), - {ok, State#alarms.vm_memory_high_watermark, - State#alarms{alertees = NewAlertees}}; +handle_call({register, Pid, HighMemMFA}, State) -> + {ok, 0 < sets:size(State#alarms.alarmed_nodes), + internal_register(Pid, HighMemMFA, State)}; handle_call(_Request, State) -> {ok, not_understood, State}. -handle_event({set_alarm, {vm_memory_high_watermark, []}}, State) -> - ok = alert(true, State#alarms.alertees), - {ok, State#alarms{vm_memory_high_watermark = true}}; +handle_event({set_alarm, {{vm_memory_high_watermark, Node}, []}}, State) -> + {ok, maybe_alert(fun sets:add_element/2, Node, State)}; -handle_event({clear_alarm, vm_memory_high_watermark}, State) -> - ok = alert(false, State#alarms.alertees), - {ok, State#alarms{vm_memory_high_watermark = false}}; +handle_event({clear_alarm, {vm_memory_high_watermark, Node}}, State) -> + {ok, maybe_alert(fun sets:del_element/2, Node, State)}; + +handle_event({node_up, Node}, State) -> + %% Must do this via notify and not call to avoid possible deadlock. + ok = gen_event:notify( + {alarm_handler, Node}, + {register, self(), {?MODULE, remote_conserve_memory, []}}), + {ok, State}; + +handle_event({node_down, Node}, State) -> + {ok, maybe_alert(fun sets:del_element/2, Node, State)}; + +handle_event({register, Pid, HighMemMFA}, State) -> + {ok, internal_register(Pid, HighMemMFA, State)}; handle_event(_Event, State) -> {ok, State}. handle_info({'DOWN', _MRef, process, Pid, _Reason}, - State = #alarms{alertees = Alertess}) -> - {ok, State#alarms{alertees = dict:erase(Pid, Alertess)}}; + State = #alarms{alertees = Alertees}) -> + {ok, State#alarms{alertees = dict:erase(Pid, Alertees)}}; handle_info(_Info, State) -> {ok, State}. @@ -100,10 +122,45 @@ code_change(_OldVsn, State, _Extra) -> {ok, State}. %%---------------------------------------------------------------------------- -alert(_Alert, undefined) -> - ok; -alert(Alert, Alertees) -> - dict:fold(fun (Pid, {M, F, A}, Acc) -> - ok = erlang:apply(M, F, A ++ [Pid, Alert]), - Acc + +maybe_alert(SetFun, Node, State = #alarms{alarmed_nodes = AN, + alertees = Alertees}) -> + AN1 = SetFun(Node, AN), + BeforeSz = sets:size(AN), + AfterSz = sets:size(AN1), + %% If we have changed our alarm state, inform the remotes. + IsLocal = Node =:= node(), + if IsLocal andalso BeforeSz < AfterSz -> ok = alert_remote(true, Alertees); + IsLocal andalso BeforeSz > AfterSz -> ok = alert_remote(false, Alertees); + true -> ok + end, + %% If the overall alarm state has changed, inform the locals. + case {BeforeSz, AfterSz} of + {0, 1} -> ok = alert_local(true, Alertees); + {1, 0} -> ok = alert_local(false, Alertees); + {_, _} -> ok + end, + State#alarms{alarmed_nodes = AN1}. + +alert_local(Alert, Alertees) -> alert(Alert, Alertees, fun erlang:'=:='/2). + +alert_remote(Alert, Alertees) -> alert(Alert, Alertees, fun erlang:'=/='/2). + +alert(Alert, Alertees, NodeComparator) -> + Node = node(), + dict:fold(fun (Pid, {M, F, A}, ok) -> + case NodeComparator(Node, node(Pid)) of + true -> apply(M, F, A ++ [Pid, Alert]); + false -> ok + end end, ok, Alertees). + +internal_register(Pid, {M, F, A} = HighMemMFA, + State = #alarms{alertees = Alertees}) -> + _MRef = erlang:monitor(process, Pid), + case sets:is_element(node(), State#alarms.alarmed_nodes) of + true -> ok = apply(M, F, A ++ [Pid, true]); + false -> ok + end, + NewAlertees = dict:store(Pid, HighMemMFA, Alertees), + State#alarms{alertees = NewAlertees}. diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl index dc3f249a..6024db65 100644 --- a/src/rabbit_amqqueue.erl +++ b/src/rabbit_amqqueue.erl @@ -17,21 +17,23 @@ -module(rabbit_amqqueue). -export([start/0, stop/0, declare/5, delete_immediately/1, delete/3, purge/1]). --export([internal_declare/2, internal_delete/1, - maybe_run_queue_via_backing_queue/2, - maybe_run_queue_via_backing_queue_async/2, - sync_timeout/1, update_ram_duration/1, set_ram_duration_target/2, - set_maximum_since_use/2, maybe_expire/1, drop_expired/1]). -export([pseudo_queue/2]). -export([lookup/1, with/2, with_or_die/2, assert_equivalence/5, check_exclusive_access/2, with_exclusive_access_or_die/3, - stat/1, deliver/2, requeue/3, ack/4, reject/4]). + stat/1, deliver/2, requeue/3, ack/3, reject/4]). -export([list/1, info_keys/0, info/1, info/2, info_all/1, info_all/2]). --export([consumers/1, consumers_all/1]). +-export([consumers/1, consumers_all/1, consumer_info_keys/0]). -export([basic_get/3, basic_consume/7, basic_cancel/4]). -export([notify_sent/2, unblock/2, flush_all/2]). --export([commit_all/3, rollback_all/3, notify_down_all/2, limit_all/3]). +-export([notify_down_all/2, limit_all/3]). -export([on_node_down/1]). +-export([store_queue/1]). + + +%% internal +-export([internal_declare/2, internal_delete/1, run_backing_queue/3, + sync_timeout/1, update_ram_duration/1, set_ram_duration_target/2, + set_maximum_since_use/2, maybe_expire/1, drop_expired/1]). -include("rabbit.hrl"). -include_lib("stdlib/include/qlc.hrl"). @@ -51,11 +53,11 @@ -type(qmsg() :: {name(), pid(), msg_id(), boolean(), rabbit_types:message()}). -type(msg_id() :: non_neg_integer()). -type(ok_or_errors() :: - 'ok' | {'error', [{'error' | 'exit' | 'throw', any()}]}). + 'ok' | {'error', [{'error' | 'exit' | 'throw', any()}]}). -type(queue_or_not_found() :: rabbit_types:amqqueue() | 'not_found'). --spec(start/0 :: () -> 'ok'). +-spec(start/0 :: () -> [name()]). -spec(stop/0 :: () -> 'ok'). -spec(declare/5 :: (name(), boolean(), boolean(), @@ -90,6 +92,7 @@ -spec(consumers/1 :: (rabbit_types:amqqueue()) -> [{pid(), rabbit_types:ctag(), boolean()}]). +-spec(consumer_info_keys/0 :: () -> rabbit_types:info_keys()). -spec(consumers_all/1 :: (rabbit_types:vhost()) -> [{name(), pid(), rabbit_types:ctag(), boolean()}]). @@ -98,32 +101,28 @@ -> {'ok', non_neg_integer(), non_neg_integer()}). -spec(delete_immediately/1 :: (rabbit_types:amqqueue()) -> 'ok'). -spec(delete/3 :: - (rabbit_types:amqqueue(), 'false', 'false') + (rabbit_types:amqqueue(), 'false', 'false') -> qlen(); - (rabbit_types:amqqueue(), 'true' , 'false') + (rabbit_types:amqqueue(), 'true' , 'false') -> qlen() | rabbit_types:error('in_use'); - (rabbit_types:amqqueue(), 'false', 'true' ) + (rabbit_types:amqqueue(), 'false', 'true' ) -> qlen() | rabbit_types:error('not_empty'); - (rabbit_types:amqqueue(), 'true' , 'true' ) + (rabbit_types:amqqueue(), 'true' , 'true' ) -> qlen() | rabbit_types:error('in_use') | rabbit_types:error('not_empty')). -spec(purge/1 :: (rabbit_types:amqqueue()) -> qlen()). -spec(deliver/2 :: (pid(), rabbit_types:delivery()) -> boolean()). -spec(requeue/3 :: (pid(), [msg_id()], pid()) -> 'ok'). --spec(ack/4 :: - (pid(), rabbit_types:maybe(rabbit_types:txn()), [msg_id()], pid()) - -> 'ok'). +-spec(ack/3 :: (pid(), [msg_id()], pid()) -> 'ok'). -spec(reject/4 :: (pid(), [msg_id()], boolean(), pid()) -> 'ok'). --spec(commit_all/3 :: ([pid()], rabbit_types:txn(), pid()) -> ok_or_errors()). --spec(rollback_all/3 :: ([pid()], rabbit_types:txn(), pid()) -> 'ok'). -spec(notify_down_all/2 :: ([pid()], pid()) -> ok_or_errors()). -spec(limit_all/3 :: ([pid()], pid(), pid() | 'undefined') -> ok_or_errors()). -spec(basic_get/3 :: (rabbit_types:amqqueue(), pid(), boolean()) -> - {'ok', non_neg_integer(), qmsg()} | 'empty'). + {'ok', non_neg_integer(), qmsg()} | 'empty'). -spec(basic_consume/7 :: - (rabbit_types:amqqueue(), boolean(), pid(), pid() | 'undefined', - rabbit_types:ctag(), boolean(), any()) + (rabbit_types:amqqueue(), boolean(), pid(), pid() | 'undefined', + rabbit_types:ctag(), boolean(), any()) -> rabbit_types:ok_or_error('exclusive_consume_unavailable')). -spec(basic_cancel/4 :: (rabbit_types:amqqueue(), pid(), rabbit_types:ctag(), any()) -> 'ok'). @@ -136,12 +135,11 @@ -spec(internal_delete/1 :: (name()) -> rabbit_types:ok_or_error('not_found') | rabbit_types:connection_exit() | - fun ((boolean()) -> rabbit_types:ok_or_error('not_found') | - rabbit_types:connection_exit())). --spec(maybe_run_queue_via_backing_queue/2 :: - (pid(), (fun ((A) -> {[rabbit_guid:guid()], A}))) -> 'ok'). --spec(maybe_run_queue_via_backing_queue_async/2 :: - (pid(), (fun ((A) -> {[rabbit_guid:guid()], A}))) -> 'ok'). + fun (() -> rabbit_types:ok_or_error('not_found') | + rabbit_types:connection_exit())). +-spec(run_backing_queue/3 :: + (pid(), atom(), + (fun ((atom(), A) -> {[rabbit_types:msg_id()], A}))) -> 'ok'). -spec(sync_timeout/1 :: (pid()) -> 'ok'). -spec(update_ram_duration/1 :: (pid()) -> 'ok'). -spec(set_ram_duration_target/2 :: (pid(), number() | 'infinity') -> 'ok'). @@ -154,6 +152,9 @@ %%---------------------------------------------------------------------------- +-define(CONSUMER_INFO_KEYS, + [queue_name, channel_pid, consumer_tag, ack_required]). + start() -> DurableQueues = find_durable_queues(), {ok, BQ} = application:get_env(rabbit, backing_queue_module), @@ -163,8 +164,7 @@ start() -> {rabbit_amqqueue_sup, {rabbit_amqqueue_sup, start_link, []}, transient, infinity, supervisor, [rabbit_amqqueue_sup]}), - _RealDurableQueues = recover_durable_queues(DurableQueues), - ok. + recover_durable_queues(DurableQueues). stop() -> ok = supervisor:terminate_child(rabbit_sup, rabbit_amqqueue_sup), @@ -183,19 +183,22 @@ find_durable_queues() -> end). recover_durable_queues(DurableQueues) -> - Qs = [start_queue_process(Q) || Q <- DurableQueues], - [Q || Q <- Qs, - gen_server2:call(Q#amqqueue.pid, {init, true}, infinity) == Q]. + Qs = [start_queue_process(node(), Q) || Q <- DurableQueues], + [QName || Q = #amqqueue{name = QName, pid = Pid} <- Qs, + gen_server2:call(Pid, {init, true}, infinity) == {new, Q}]. declare(QueueName, Durable, AutoDelete, Args, Owner) -> ok = check_declare_arguments(QueueName, Args), - Q = start_queue_process(#amqqueue{name = QueueName, - durable = Durable, - auto_delete = AutoDelete, - arguments = Args, - exclusive_owner = Owner, - pid = none}), - case gen_server2:call(Q#amqqueue.pid, {init, false}) of + {Node, MNodes} = determine_queue_nodes(Args), + Q = start_queue_process(Node, #amqqueue{name = QueueName, + durable = Durable, + auto_delete = AutoDelete, + arguments = Args, + exclusive_owner = Owner, + pid = none, + slave_pids = [], + mirror_nodes = MNodes}), + case gen_server2:call(Q#amqqueue.pid, {init, false}, infinity) of not_found -> rabbit_misc:not_found(QueueName); Q1 -> Q1 end. @@ -211,15 +214,15 @@ internal_declare(Q = #amqqueue{name = QueueName}, false) -> case mnesia:read({rabbit_durable_queue, QueueName}) of [] -> ok = store_queue(Q), B = add_default_binding(Q), - fun (Tx) -> B(Tx), Q end; - [_] -> %% Q exists on stopped node - rabbit_misc:const(not_found) + fun () -> B(), Q end; + %% Q exists on stopped node + [_] -> rabbit_misc:const(not_found) end; [ExistingQ = #amqqueue{pid = QPid}] -> - case is_process_alive(QPid) of + case rabbit_misc:is_process_alive(QPid) of true -> rabbit_misc:const(ExistingQ); false -> TailFun = internal_delete(QueueName), - fun (Tx) -> TailFun(Tx), ExistingQ end + fun () -> TailFun(), ExistingQ end end end end). @@ -232,8 +235,24 @@ store_queue(Q = #amqqueue{durable = false}) -> ok = mnesia:write(rabbit_queue, Q, write), ok. -start_queue_process(Q) -> - {ok, Pid} = rabbit_amqqueue_sup:start_child([Q]), +determine_queue_nodes(Args) -> + Policy = rabbit_misc:table_lookup(Args, <<"x-ha-policy">>), + PolicyParams = rabbit_misc:table_lookup(Args, <<"x-ha-policy-params">>), + case {Policy, PolicyParams} of + {{_Type, <<"nodes">>}, {array, Nodes}} -> + case [list_to_atom(binary_to_list(Node)) || + {longstr, Node} <- Nodes] of + [Node] -> {Node, undefined}; + [First | Rest] -> {First, Rest} + end; + {{_Type, <<"all">>}, _} -> + {node(), all}; + _ -> + {node(), undefined} + end. + +start_queue_process(Node, Q) -> + {ok, Pid} = rabbit_amqqueue_sup:start_child(Node, [Q]), Q#amqqueue{pid = Pid}. add_default_binding(#amqqueue{name = QueueName}) -> @@ -249,8 +268,13 @@ lookup(Name) -> with(Name, F, E) -> case lookup(Name) of - {ok, Q} -> rabbit_misc:with_exit_handler(E, fun () -> F(Q) end); - {error, not_found} -> E() + {ok, Q = #amqqueue{slave_pids = []}} -> + rabbit_misc:with_exit_handler(E, fun () -> F(Q) end); + {ok, Q} -> + E1 = fun () -> timer:sleep(25), with(Name, F, E) end, + rabbit_misc:with_exit_handler(E1, fun () -> F(Q) end); + {error, not_found} -> + E() end. with(Name, F) -> @@ -286,41 +310,58 @@ with_exclusive_access_or_die(Name, ReaderPid, F) -> fun (Q) -> check_exclusive_access(Q, ReaderPid), F(Q) end). assert_args_equivalence(#amqqueue{name = QueueName, arguments = Args}, - RequiredArgs) -> - rabbit_misc:assert_args_equivalence(Args, RequiredArgs, QueueName, - [<<"x-expires">>]). + RequiredArgs) -> + rabbit_misc:assert_args_equivalence( + Args, RequiredArgs, QueueName, + [<<"x-expires">>, <<"x-message-ttl">>, <<"x-ha-policy">>]). check_declare_arguments(QueueName, Args) -> - [case Fun(rabbit_misc:table_lookup(Args, Key)) of + [case Fun(rabbit_misc:table_lookup(Args, Key), Args) of ok -> ok; {error, Error} -> rabbit_misc:protocol_error( precondition_failed, "invalid arg '~s' for ~s: ~w", [Key, rabbit_misc:rs(QueueName), Error]) end || {Key, Fun} <- - [{<<"x-expires">>, fun check_expires_argument/1}, - {<<"x-message-ttl">>, fun check_message_ttl_argument/1}]], + [{<<"x-expires">>, fun check_integer_argument/2}, + {<<"x-message-ttl">>, fun check_integer_argument/2}, + {<<"x-ha-policy">>, fun check_ha_policy_argument/2}]], ok. -check_expires_argument(Val) -> - check_integer_argument(Val, - expires_not_of_acceptable_type, - expires_zero_or_less). - -check_message_ttl_argument(Val) -> - check_integer_argument(Val, - ttl_not_of_acceptable_type, - ttl_zero_or_less). - -check_integer_argument(undefined, _, _) -> +check_integer_argument(undefined, _Args) -> ok; -check_integer_argument({Type, Val}, InvalidTypeError, _) when Val > 0 -> +check_integer_argument({Type, Val}, _Args) when Val > 0 -> case lists:member(Type, ?INTEGER_ARG_TYPES) of true -> ok; - false -> {error, {InvalidTypeError, Type, Val}} + false -> {error, {unacceptable_type, Type}} + end; +check_integer_argument({_Type, Val}, _Args) -> + {error, {value_zero_or_less, Val}}. + +check_ha_policy_argument(undefined, _Args) -> + ok; +check_ha_policy_argument({longstr, <<"all">>}, _Args) -> + ok; +check_ha_policy_argument({longstr, <<"nodes">>}, Args) -> + case rabbit_misc:table_lookup(Args, <<"x-ha-policy-params">>) of + undefined -> + {error, {require, 'x-ha-policy-params'}}; + {array, []} -> + {error, {require_non_empty_list_of_nodes_for_ha}}; + {array, Ary} -> + case lists:all(fun ({longstr, _Node}) -> true; + (_ ) -> false + end, Ary) of + true -> ok; + false -> {error, {require_node_list_as_longstrs_for_ha, Ary}} + end; + {Type, _} -> + {error, {ha_nodes_policy_params_not_array_of_longstr, Type}} end; -check_integer_argument({_Type, _Val}, _, ZeroOrLessError) -> - {error, ZeroOrLessError}. +check_ha_policy_argument({longstr, Policy}, _Args) -> + {error, {invalid_ha_policy, Policy}}; +check_ha_policy_argument({Type, _}, _Args) -> + {error, {unacceptable_type, Type}}. list(VHostPath) -> mnesia:dirty_match_object( @@ -332,10 +373,10 @@ info_keys() -> rabbit_amqqueue_process:info_keys(). map(VHostPath, F) -> rabbit_misc:filter_exit_map(F, list(VHostPath)). info(#amqqueue{ pid = QPid }) -> - delegate_call(QPid, info, infinity). + delegate_call(QPid, info). info(#amqqueue{ pid = QPid }, Items) -> - case delegate_call(QPid, {info, Items}, infinity) of + case delegate_call(QPid, {info, Items}) of {ok, Res} -> Res; {error, Error} -> throw(Error) end. @@ -345,24 +386,30 @@ info_all(VHostPath) -> map(VHostPath, fun (Q) -> info(Q) end). info_all(VHostPath, Items) -> map(VHostPath, fun (Q) -> info(Q, Items) end). consumers(#amqqueue{ pid = QPid }) -> - delegate_call(QPid, consumers, infinity). + delegate_call(QPid, consumers). + +consumer_info_keys() -> ?CONSUMER_INFO_KEYS. consumers_all(VHostPath) -> + ConsumerInfoKeys=consumer_info_keys(), lists:append( map(VHostPath, - fun (Q) -> [{Q#amqqueue.name, ChPid, ConsumerTag, AckRequired} || + fun (Q) -> + [lists:zip(ConsumerInfoKeys, + [Q#amqqueue.name, ChPid, ConsumerTag, AckRequired]) || {ChPid, ConsumerTag, AckRequired} <- consumers(Q)] end)). -stat(#amqqueue{pid = QPid}) -> delegate_call(QPid, stat, infinity). +stat(#amqqueue{pid = QPid}) -> + delegate_call(QPid, stat). delete_immediately(#amqqueue{ pid = QPid }) -> gen_server2:cast(QPid, delete_immediately). delete(#amqqueue{ pid = QPid }, IfUnused, IfEmpty) -> - delegate_call(QPid, {delete, IfUnused, IfEmpty}, infinity). + delegate_call(QPid, {delete, IfUnused, IfEmpty}). -purge(#amqqueue{ pid = QPid }) -> delegate_call(QPid, purge, infinity). +purge(#amqqueue{ pid = QPid }) -> delegate_call(QPid, purge). deliver(QPid, Delivery = #delivery{immediate = true}) -> gen_server2:call(QPid, {deliver_immediately, Delivery}, infinity); @@ -374,23 +421,14 @@ deliver(QPid, Delivery) -> true. requeue(QPid, MsgIds, ChPid) -> - delegate_call(QPid, {requeue, MsgIds, ChPid}, infinity). + delegate_call(QPid, {requeue, MsgIds, ChPid}). -ack(QPid, Txn, MsgIds, ChPid) -> - delegate_cast(QPid, {ack, Txn, MsgIds, ChPid}). +ack(QPid, MsgIds, ChPid) -> + delegate_cast(QPid, {ack, MsgIds, ChPid}). reject(QPid, MsgIds, Requeue, ChPid) -> delegate_cast(QPid, {reject, MsgIds, Requeue, ChPid}). -commit_all(QPids, Txn, ChPid) -> - safe_delegate_call_ok( - fun (QPid) -> gen_server2:call(QPid, {commit, Txn, ChPid}, infinity) end, - QPids). - -rollback_all(QPids, Txn, ChPid) -> - delegate:invoke_no_result( - QPids, fun (QPid) -> gen_server2:cast(QPid, {rollback, Txn, ChPid}) end). - notify_down_all(QPids, ChPid) -> safe_delegate_call_ok( fun (QPid) -> gen_server2:call(QPid, {notify_down, ChPid}, infinity) end, @@ -403,20 +441,18 @@ limit_all(QPids, ChPid, LimiterPid) -> end). basic_get(#amqqueue{pid = QPid}, ChPid, NoAck) -> - delegate_call(QPid, {basic_get, ChPid, NoAck}, infinity). + delegate_call(QPid, {basic_get, ChPid, NoAck}). basic_consume(#amqqueue{pid = QPid}, NoAck, ChPid, LimiterPid, ConsumerTag, ExclusiveConsume, OkMsg) -> delegate_call(QPid, {basic_consume, NoAck, ChPid, - LimiterPid, ConsumerTag, ExclusiveConsume, OkMsg}, - infinity). + LimiterPid, ConsumerTag, ExclusiveConsume, OkMsg}). basic_cancel(#amqqueue{pid = QPid}, ChPid, ConsumerTag, OkMsg) -> - ok = delegate_call(QPid, {basic_cancel, ChPid, ConsumerTag, OkMsg}, - infinity). + ok = delegate_call(QPid, {basic_cancel, ChPid, ConsumerTag, OkMsg}). notify_sent(QPid, ChPid) -> - delegate_cast(QPid, {notify_sent, ChPid}). + gen_server2:cast(QPid, {notify_sent, ChPid}). unblock(QPid, ChPid) -> delegate_cast(QPid, {unblock, ChPid}). @@ -438,17 +474,12 @@ internal_delete(QueueName) -> case mnesia:wread({rabbit_queue, QueueName}) of [] -> rabbit_misc:const({error, not_found}); [_] -> Deletions = internal_delete1(QueueName), - fun (Tx) -> ok = rabbit_binding:process_deletions( - Deletions, Tx) - end + rabbit_binding:process_deletions(Deletions) end end). -maybe_run_queue_via_backing_queue(QPid, Fun) -> - gen_server2:call(QPid, {maybe_run_queue_via_backing_queue, Fun}, infinity). - -maybe_run_queue_via_backing_queue_async(QPid, Fun) -> - gen_server2:cast(QPid, {maybe_run_queue_via_backing_queue, Fun}). +run_backing_queue(QPid, Mod, Fun) -> + gen_server2:cast(QPid, {run_backing_queue, Mod, Fun}). sync_timeout(QPid) -> gen_server2:cast(QPid, sync_timeout). @@ -469,18 +500,15 @@ drop_expired(QPid) -> gen_server2:cast(QPid, drop_expired). on_node_down(Node) -> - rabbit_misc:execute_mnesia_transaction( - fun () -> qlc:e(qlc:q([delete_queue(QueueName) || - #amqqueue{name = QueueName, pid = Pid} - <- mnesia:table(rabbit_queue), - node(Pid) == Node])) - end, - fun (Deletions, Tx) -> - rabbit_binding:process_deletions( - lists:foldl(fun rabbit_binding:combine_deletions/2, - rabbit_binding:new_deletions(), - Deletions), - Tx) + rabbit_misc:execute_mnesia_tx_with_tail( + fun () -> Dels = qlc:e(qlc:q([delete_queue(QueueName) || + #amqqueue{name = QueueName, pid = Pid, + slave_pids = []} + <- mnesia:table(rabbit_queue), + node(Pid) == Node])), + rabbit_binding:process_deletions( + lists:foldl(fun rabbit_binding:combine_deletions/2, + rabbit_binding:new_deletions(), Dels)) end). delete_queue(QueueName) -> @@ -488,11 +516,13 @@ delete_queue(QueueName) -> rabbit_binding:remove_transient_for_destination(QueueName). pseudo_queue(QueueName, Pid) -> - #amqqueue{name = QueueName, - durable = false, - auto_delete = false, - arguments = [], - pid = Pid}. + #amqqueue{name = QueueName, + durable = false, + auto_delete = false, + arguments = [], + pid = Pid, + slave_pids = [], + mirror_nodes = undefined}. safe_delegate_call_ok(F, Pids) -> case delegate:invoke(Pids, fun (Pid) -> @@ -504,8 +534,8 @@ safe_delegate_call_ok(F, Pids) -> {_, Bad} -> {error, Bad} end. -delegate_call(Pid, Msg, Timeout) -> - delegate:invoke(Pid, fun (P) -> gen_server2:call(P, Msg, Timeout) end). +delegate_call(Pid, Msg) -> + delegate:invoke(Pid, fun (P) -> gen_server2:call(P, Msg, infinity) end). delegate_cast(Pid, Msg) -> delegate:invoke_no_result(Pid, fun (P) -> gen_server2:cast(P, Msg) end). diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl index 2999aab2..e787fa84 100644 --- a/src/rabbit_amqqueue_process.erl +++ b/src/rabbit_amqqueue_process.erl @@ -21,7 +21,7 @@ -behaviour(gen_server2). -define(UNSENT_MESSAGE_LIMIT, 100). --define(SYNC_INTERVAL, 5). %% milliseconds +-define(SYNC_INTERVAL, 25). %% milliseconds -define(RAM_DURATION_UPDATE_INTERVAL, 5000). -define(BASE_MESSAGE_PROPERTIES, @@ -31,9 +31,11 @@ -export([init/1, terminate/2, code_change/3, handle_call/3, handle_cast/2, handle_info/2, handle_pre_hibernate/1, prioritise_call/3, - prioritise_cast/2, prioritise_info/2]). + prioritise_cast/2, prioritise_info/2, format_message_queue/2]). -% Queue's state +-export([init_with_backing_queue_state/7]). + +%% Queue's state -record(q, {q, exclusive_consumer, has_had_consumers, @@ -46,7 +48,7 @@ rate_timer_ref, expiry_timer_ref, stats_timer, - guid_to_channel, + msg_id_to_channel, ttl, ttl_timer_ref }). @@ -60,7 +62,6 @@ monitor_ref, acktags, is_limit_active, - txn, unsent_message_count}). -define(STATISTICS_KEYS, @@ -72,7 +73,8 @@ messages, consumers, memory, - backing_queue_status + backing_queue_status, + slave_pids ]). -define(CREATION_EVENT_KEYS, @@ -81,7 +83,8 @@ durable, auto_delete, arguments, - owner_pid + owner_pid, + mirror_nodes ]). -define(INFO_KEYS, ?CREATION_EVENT_KEYS ++ ?STATISTICS_KEYS -- [pid]). @@ -97,12 +100,11 @@ info_keys() -> ?INFO_KEYS. init(Q) -> ?LOGDEBUG("Queue starting - ~p~n", [Q]), process_flag(trap_exit, true), - {ok, BQ} = application:get_env(backing_queue_module), {ok, #q{q = Q#amqqueue{pid = self()}, exclusive_consumer = none, has_had_consumers = false, - backing_queue = BQ, + backing_queue = backing_queue_module(Q), backing_queue_state = undefined, active_consumers = queue:new(), blocked_consumers = queue:new(), @@ -112,17 +114,47 @@ init(Q) -> expiry_timer_ref = undefined, ttl = undefined, stats_timer = rabbit_event:init_stats_timer(), - guid_to_channel = dict:new()}, hibernate, + msg_id_to_channel = dict:new()}, hibernate, {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. -terminate(shutdown, State = #q{backing_queue = BQ}) -> - terminate_shutdown(fun (BQS) -> BQ:terminate(BQS) end, State); -terminate({shutdown, _}, State = #q{backing_queue = BQ}) -> - terminate_shutdown(fun (BQS) -> BQ:terminate(BQS) end, State); -terminate(_Reason, State = #q{backing_queue = BQ}) -> +init_with_backing_queue_state(Q = #amqqueue{exclusive_owner = Owner}, BQ, BQS, + RateTRef, AckTags, Deliveries, MTC) -> + ?LOGDEBUG("Queue starting - ~p~n", [Q]), + case Owner of + none -> ok; + _ -> erlang:monitor(process, Owner) + end, + State = requeue_and_run( + AckTags, + process_args( + #q{q = Q, + exclusive_consumer = none, + has_had_consumers = false, + backing_queue = BQ, + backing_queue_state = BQS, + active_consumers = queue:new(), + blocked_consumers = queue:new(), + expires = undefined, + sync_timer_ref = undefined, + rate_timer_ref = RateTRef, + expiry_timer_ref = undefined, + ttl = undefined, + stats_timer = rabbit_event:init_stats_timer(), + msg_id_to_channel = MTC})), + lists:foldl( + fun (Delivery, StateN) -> deliver_or_enqueue(Delivery, StateN) end, + State, Deliveries). + +terminate(shutdown = R, State = #q{backing_queue = BQ}) -> + terminate_shutdown(fun (BQS) -> BQ:terminate(R, BQS) end, State); +terminate({shutdown, _} = R, State = #q{backing_queue = BQ}) -> + terminate_shutdown(fun (BQS) -> BQ:terminate(R, BQS) end, State); +terminate(Reason, State = #q{backing_queue = BQ}) -> %% FIXME: How do we cancel active subscriptions? terminate_shutdown(fun (BQS) -> - BQS1 = BQ:delete_and_terminate(BQS), + rabbit_event:notify( + queue_deleted, [{pid, self()}]), + BQS1 = BQ:delete_and_terminate(Reason, BQS), %% don't care if the internal delete %% doesn't return 'ok'. rabbit_amqqueue:internal_delete(qname(State)), @@ -135,8 +167,7 @@ code_change(_OldVsn, State, _Extra) -> %%---------------------------------------------------------------------------- declare(Recover, From, - State = #q{q = Q = #amqqueue{name = QName, durable = IsDurable}, - backing_queue = BQ, backing_queue_state = undefined, + State = #q{q = Q, backing_queue = BQ, backing_queue_state = undefined, stats_timer = StatsTimer}) -> case rabbit_amqqueue:internal_declare(Q, Recover) of not_found -> {stop, normal, not_found, State}; @@ -147,7 +178,7 @@ declare(Recover, From, ok = rabbit_memory_monitor:register( self(), {rabbit_amqqueue, set_ram_duration_target, [self()]}), - BQS = BQ:init(QName, IsDurable, Recover), + BQS = bq_init(BQ, Q, Recover), State1 = process_args(State#q{backing_queue_state = BQS}), rabbit_event:notify(queue_created, infos(?CREATION_EVENT_KEYS, State1)), @@ -157,6 +188,13 @@ declare(Recover, From, Q1 -> {stop, normal, {existing, Q1}, State} end. +bq_init(BQ, Q, Recover) -> + Self = self(), + BQ:init(Q, Recover, + fun (Mod, Fun) -> + rabbit_amqqueue:run_backing_queue(Self, Mod, Fun) + end). + process_args(State = #q{q = #amqqueue{arguments = Arguments}}) -> lists:foldl(fun({Arg, Fun}, State1) -> case rabbit_misc:table_lookup(Arguments, Arg) of @@ -171,23 +209,14 @@ init_expires(Expires, State) -> ensure_expiry_timer(State#q{expires = Expires}). init_ttl(TTL, State) -> drop_expired_messages(State#q{ttl = TTL}). terminate_shutdown(Fun, State) -> - State1 = #q{backing_queue = BQ, backing_queue_state = BQS} = + State1 = #q{backing_queue_state = BQS} = stop_sync_timer(stop_rate_timer(State)), case BQS of - undefined -> State; + undefined -> State1; _ -> ok = rabbit_memory_monitor:deregister(self()), - BQS1 = lists:foldl( - fun (#cr{txn = none}, BQSN) -> - BQSN; - (#cr{txn = Txn}, BQSN) -> - {_AckTags, BQSN1} = - BQ:tx_rollback(Txn, BQSN), - BQSN1 - end, BQS, all_ch_record()), [emit_consumer_deleted(Ch, CTag) || {Ch, CTag, _} <- consumers(State1)], - rabbit_event:notify(queue_deleted, [{pid, self()}]), - State1#q{backing_queue_state = Fun(BQS1)} + State1#q{backing_queue_state = Fun(BQS)} end. reply(Reply, NewState) -> @@ -200,13 +229,23 @@ noreply(NewState) -> {NewState1, Timeout} = next_state(NewState), {noreply, NewState1, Timeout}. -next_state(State) -> - State1 = #q{backing_queue = BQ, backing_queue_state = BQS} = - ensure_rate_timer(State), - State2 = ensure_stats_timer(State1), - case BQ:needs_idle_timeout(BQS) of - true -> {ensure_sync_timer(State2), 0}; - false -> {stop_sync_timer(State2), hibernate} +next_state(State = #q{backing_queue = BQ, backing_queue_state = BQS}) -> + {MsgIds, BQS1} = BQ:drain_confirmed(BQS), + State1 = ensure_stats_timer( + ensure_rate_timer( + confirm_messages(MsgIds, State#q{ + backing_queue_state = BQS1}))), + case BQ:needs_timeout(BQS1) of + false -> {stop_sync_timer(State1), hibernate}; + idle -> {stop_sync_timer(State1), 0 }; + timed -> {ensure_sync_timer(State1), 0 } + end. + +backing_queue_module(#amqqueue{arguments = Args}) -> + case rabbit_misc:table_lookup(Args, <<"x-ha-policy">>) of + undefined -> {ok, BQM} = application:get_env(backing_queue_module), + BQM; + _Policy -> rabbit_mirror_queue_master end. ensure_sync_timer(State = #q{sync_timer_ref = undefined}) -> @@ -278,17 +317,15 @@ lookup_ch(ChPid) -> ch_record(ChPid) -> Key = {ch, ChPid}, case get(Key) of - undefined -> - MonitorRef = erlang:monitor(process, ChPid), - C = #cr{consumer_count = 0, - ch_pid = ChPid, - monitor_ref = MonitorRef, - acktags = sets:new(), - is_limit_active = false, - txn = none, - unsent_message_count = 0}, - put(Key, C), - C; + undefined -> MonitorRef = erlang:monitor(process, ChPid), + C = #cr{consumer_count = 0, + ch_pid = ChPid, + monitor_ref = MonitorRef, + acktags = sets:new(), + is_limit_active = false, + unsent_message_count = 0}, + put(Key, C), + C; C = #cr{} -> C end. @@ -297,13 +334,12 @@ store_ch_record(C = #cr{ch_pid = ChPid}) -> maybe_store_ch_record(C = #cr{consumer_count = ConsumerCount, acktags = ChAckTags, - txn = Txn, unsent_message_count = UnsentMessageCount}) -> - case {sets:size(ChAckTags), ConsumerCount, UnsentMessageCount, Txn} of - {0, 0, 0, none} -> ok = erase_ch_record(C), - false; - _ -> store_ch_record(C), - true + case {sets:size(ChAckTags), ConsumerCount, UnsentMessageCount} of + {0, 0, 0} -> ok = erase_ch_record(C), + false; + _ -> store_ch_record(C), + true end. erase_ch_record(#cr{ch_pid = ChPid, @@ -314,18 +350,16 @@ erase_ch_record(#cr{ch_pid = ChPid, erase({ch, ChPid}), ok. -all_ch_record() -> - [C || {{ch, _}, C} <- get()]. +all_ch_record() -> [C || {{ch, _}, C} <- get()]. is_ch_blocked(#cr{unsent_message_count = Count, is_limit_active = Limited}) -> Limited orelse Count >= ?UNSENT_MESSAGE_LIMIT. ch_record_state_transition(OldCR, NewCR) -> - BlockedOld = is_ch_blocked(OldCR), - BlockedNew = is_ch_blocked(NewCR), - if BlockedOld andalso not(BlockedNew) -> unblock; - BlockedNew andalso not(BlockedOld) -> block; - true -> ok + case {is_ch_blocked(OldCR), is_ch_blocked(NewCR)} of + {true, false} -> unblock; + {false, true} -> block; + {_, _} -> ok end. deliver_msgs_to_consumers(Funs = {PredFun, DeliverFun}, FunAcc, @@ -360,13 +394,12 @@ deliver_msgs_to_consumers(Funs = {PredFun, DeliverFun}, FunAcc, case ch_record_state_transition(C, NewC) of ok -> {queue:in(QEntry, ActiveConsumersTail), BlockedConsumers}; - block -> - {ActiveConsumers1, BlockedConsumers1} = - move_consumers(ChPid, - ActiveConsumersTail, - BlockedConsumers), - {ActiveConsumers1, - queue:in(QEntry, BlockedConsumers1)} + block -> {ActiveConsumers1, BlockedConsumers1} = + move_consumers(ChPid, + ActiveConsumersTail, + BlockedConsumers), + {ActiveConsumers1, + queue:in(QEntry, BlockedConsumers1)} end, State2 = State1#q{ active_consumers = NewActiveConsumers, @@ -391,56 +424,63 @@ deliver_msgs_to_consumers(Funs = {PredFun, DeliverFun}, FunAcc, {FunAcc, State} end. -deliver_from_queue_pred(IsEmpty, _State) -> - not IsEmpty. +deliver_from_queue_pred(IsEmpty, _State) -> not IsEmpty. deliver_from_queue_deliver(AckRequired, false, State) -> {{Message, IsDelivered, AckTag, Remaining}, State1} = fetch(AckRequired, State), {{Message, IsDelivered, AckTag}, 0 == Remaining, State1}. -confirm_messages(Guids, State = #q{guid_to_channel = GTC}) -> - {CMs, GTC1} = - lists:foldl( - fun(Guid, {CMs, GTC0}) -> - case dict:find(Guid, GTC0) of - {ok, {ChPid, MsgSeqNo}} -> - {[{ChPid, MsgSeqNo} | CMs], dict:erase(Guid, GTC0)}; - _ -> - {CMs, GTC0} - end - end, {[], GTC}, Guids), - case lists:usort(CMs) of - [{Ch, MsgSeqNo} | CMs1] -> - [rabbit_channel:confirm(ChPid, MsgSeqNos) || - {ChPid, MsgSeqNos} <- group_confirms_by_channel( - CMs1, [{Ch, [MsgSeqNo]}])]; - [] -> - ok - end, - State#q{guid_to_channel = GTC1}. - -group_confirms_by_channel([], Acc) -> - Acc; -group_confirms_by_channel([{Ch, Msg1} | CMs], [{Ch, Msgs} | Acc]) -> - group_confirms_by_channel(CMs, [{Ch, [Msg1 | Msgs]} | Acc]); -group_confirms_by_channel([{Ch, Msg1} | CMs], Acc) -> - group_confirms_by_channel(CMs, [{Ch, [Msg1]} | Acc]). - -record_confirm_message(#delivery{msg_seq_no = undefined}, State) -> - {no_confirm, State}; -record_confirm_message(#delivery{sender = ChPid, +confirm_messages([], State) -> + State; +confirm_messages(MsgIds, State = #q{msg_id_to_channel = MTC}) -> + {CMs, MTC1} = lists:foldl( + fun(MsgId, {CMs, MTC0}) -> + case dict:find(MsgId, MTC0) of + {ok, {ChPid, MsgSeqNo}} -> + {gb_trees_cons(ChPid, MsgSeqNo, CMs), + dict:erase(MsgId, MTC0)}; + _ -> + {CMs, MTC0} + end + end, {gb_trees:empty(), MTC}, MsgIds), + gb_trees_foreach(fun rabbit_channel:confirm/2, CMs), + State#q{msg_id_to_channel = MTC1}. + +gb_trees_foreach(_, none) -> + ok; +gb_trees_foreach(Fun, {Key, Val, It}) -> + Fun(Key, Val), + gb_trees_foreach(Fun, gb_trees:next(It)); +gb_trees_foreach(Fun, Tree) -> + gb_trees_foreach(Fun, gb_trees:next(gb_trees:iterator(Tree))). + +gb_trees_cons(Key, Value, Tree) -> + case gb_trees:lookup(Key, Tree) of + {value, Values} -> gb_trees:update(Key, [Value | Values], Tree); + none -> gb_trees:insert(Key, [Value], Tree) + end. + +should_confirm_message(#delivery{msg_seq_no = undefined}, _State) -> + never; +should_confirm_message(#delivery{sender = ChPid, msg_seq_no = MsgSeqNo, message = #basic_message { is_persistent = true, - guid = Guid}}, - State = - #q{guid_to_channel = GTC, - q = #amqqueue{durable = true}}) -> - {confirm, - State#q{guid_to_channel = dict:store(Guid, {ChPid, MsgSeqNo}, GTC)}}; -record_confirm_message(_Delivery, State) -> - {no_confirm, State}. + id = MsgId}}, + #q{q = #amqqueue{durable = true}}) -> + {eventually, ChPid, MsgSeqNo, MsgId}; +should_confirm_message(_Delivery, _State) -> + immediately. + +needs_confirming({eventually, _, _, _}) -> true; +needs_confirming(_) -> false. + +maybe_record_confirm_message({eventually, ChPid, MsgSeqNo, MsgId}, + State = #q{msg_id_to_channel = MTC}) -> + State#q{msg_id_to_channel = dict:store(MsgId, {ChPid, MsgSeqNo}, MTC)}; +maybe_record_confirm_message(_Confirm, State) -> + State. run_message_queue(State) -> Funs = {fun deliver_from_queue_pred/2, @@ -451,67 +491,71 @@ run_message_queue(State) -> {_IsEmpty1, State2} = deliver_msgs_to_consumers(Funs, IsEmpty, State1), State2. -attempt_delivery(#delivery{txn = none, - sender = ChPid, - message = Message, - msg_seq_no = MsgSeqNo}, - {NeedsConfirming, State = #q{backing_queue = BQ}}) -> - %% must confirm immediately if it has a MsgSeqNo and not NeedsConfirming - case {NeedsConfirming, MsgSeqNo} of - {_, undefined} -> ok; - {no_confirm, _} -> rabbit_channel:confirm(ChPid, [MsgSeqNo]); - {confirm, _} -> ok +attempt_delivery(Delivery = #delivery{sender = ChPid, + message = Message, + msg_seq_no = MsgSeqNo}, + State = #q{backing_queue = BQ, backing_queue_state = BQS}) -> + Confirm = should_confirm_message(Delivery, State), + case Confirm of + immediately -> rabbit_channel:confirm(ChPid, [MsgSeqNo]); + _ -> ok end, - PredFun = fun (IsEmpty, _State) -> not IsEmpty end, - DeliverFun = - fun (AckRequired, false, State1 = #q{backing_queue_state = BQS}) -> - %% we don't need an expiry here because messages are - %% not being enqueued, so we use an empty - %% message_properties. - {AckTag, BQS1} = - BQ:publish_delivered( - AckRequired, Message, - (?BASE_MESSAGE_PROPERTIES)#message_properties{ - needs_confirming = (NeedsConfirming =:= confirm)}, - BQS), - {{Message, false, AckTag}, true, - State1#q{backing_queue_state = BQS1}} - end, - {Delivered, State1} = - deliver_msgs_to_consumers({ PredFun, DeliverFun }, false, State), - {Delivered, NeedsConfirming, State1}; -attempt_delivery(#delivery{txn = Txn, - sender = ChPid, - message = Message}, - {NeedsConfirming, - State = #q{backing_queue = BQ, - backing_queue_state = BQS}}) -> - store_ch_record((ch_record(ChPid))#cr{txn = Txn}), - {true, - NeedsConfirming, - State#q{backing_queue_state = - BQ:tx_publish(Txn, Message, ?BASE_MESSAGE_PROPERTIES, BQS)}}. - -deliver_or_enqueue(Delivery, State) -> - case attempt_delivery(Delivery, record_confirm_message(Delivery, State)) of - {true, _, State1} -> - {true, State1}; - {false, NeedsConfirming, State1 = #q{backing_queue = BQ, - backing_queue_state = BQS}} -> - #delivery{message = Message} = Delivery, - BQS1 = BQ:publish(Message, - (message_properties(State)) #message_properties{ - needs_confirming = - (NeedsConfirming =:= confirm)}, - BQS), - {false, ensure_ttl_timer(State1#q{backing_queue_state = BQS1})} + case BQ:is_duplicate(Message, BQS) of + {false, BQS1} -> + PredFun = fun (IsEmpty, _State) -> not IsEmpty end, + DeliverFun = + fun (AckRequired, false, + State1 = #q{backing_queue_state = BQS2}) -> + %% we don't need an expiry here because + %% messages are not being enqueued, so we use + %% an empty message_properties. + {AckTag, BQS3} = + BQ:publish_delivered( + AckRequired, Message, + (?BASE_MESSAGE_PROPERTIES)#message_properties{ + needs_confirming = needs_confirming(Confirm)}, + ChPid, BQS2), + {{Message, false, AckTag}, true, + State1#q{backing_queue_state = BQS3}} + end, + {Delivered, State2} = + deliver_msgs_to_consumers({ PredFun, DeliverFun }, false, + State#q{backing_queue_state = BQS1}), + {Delivered, Confirm, State2}; + {Duplicate, BQS1} -> + %% if the message has previously been seen by the BQ then + %% it must have been seen under the same circumstances as + %% now: i.e. if it is now a deliver_immediately then it + %% must have been before. + Delivered = case Duplicate of + published -> true; + discarded -> false + end, + {Delivered, Confirm, State#q{backing_queue_state = BQS1}} + end. + +deliver_or_enqueue(Delivery = #delivery{message = Message, + sender = ChPid}, State) -> + {Delivered, Confirm, State1} = attempt_delivery(Delivery, State), + State2 = #q{backing_queue = BQ, backing_queue_state = BQS} = + maybe_record_confirm_message(Confirm, State1), + case Delivered of + true -> State2; + false -> BQS1 = + BQ:publish(Message, + (message_properties(State)) #message_properties{ + needs_confirming = needs_confirming(Confirm)}, + ChPid, BQS), + ensure_ttl_timer(State2#q{backing_queue_state = BQS1}) end. requeue_and_run(AckTags, State = #q{backing_queue = BQ, ttl=TTL}) -> - maybe_run_queue_via_backing_queue( - fun (BQS) -> - {[], BQ:requeue(AckTags, reset_msg_expiry_fun(TTL), BQS)} - end, State). + run_backing_queue( + BQ, fun (M, BQS) -> + {_MsgIds, BQS1} = + M:requeue(AckTags, reset_msg_expiry_fun(TTL), BQS), + BQS1 + end, State). fetch(AckRequired, State = #q{backing_queue_state = BQS, backing_queue = BQ}) -> @@ -567,7 +611,7 @@ handle_ch_down(DownPid, State = #q{exclusive_consumer = Holder}) -> case lookup_ch(DownPid) of not_found -> {ok, State}; - C = #cr{ch_pid = ChPid, txn = Txn, acktags = ChAckTags} -> + C = #cr{ch_pid = ChPid, acktags = ChAckTags} -> ok = erase_ch_record(C), State1 = State#q{ exclusive_consumer = case Holder of @@ -580,13 +624,8 @@ handle_ch_down(DownPid, State = #q{exclusive_consumer = Holder}) -> ChPid, State#q.blocked_consumers)}, case should_auto_delete(State1) of true -> {stop, State1}; - false -> State2 = case Txn of - none -> State1; - _ -> rollback_transaction(Txn, C, - State1) - end, - {ok, requeue_and_run(sets:to_list(ChAckTags), - ensure_expiry_timer(State2))} + false -> {ok, requeue_and_run(sets:to_list(ChAckTags), + ensure_expiry_timer(State1))} end end. @@ -613,37 +652,22 @@ maybe_send_reply(ChPid, Msg) -> ok = rabbit_channel:send_command(ChPid, Msg). qname(#q{q = #amqqueue{name = QName}}) -> QName. -backing_queue_idle_timeout(State = #q{backing_queue = BQ}) -> - maybe_run_queue_via_backing_queue( - fun (BQS) -> {[], BQ:idle_timeout(BQS)} end, State). - -maybe_run_queue_via_backing_queue(Fun, State = #q{backing_queue_state = BQS}) -> - {Guids, BQS1} = Fun(BQS), - run_message_queue( - confirm_messages(Guids, State#q{backing_queue_state = BQS1})). - -commit_transaction(Txn, From, C = #cr{acktags = ChAckTags}, - State = #q{backing_queue = BQ, - backing_queue_state = BQS, - ttl = TTL}) -> - {AckTags, BQS1} = BQ:tx_commit( - Txn, fun () -> gen_server2:reply(From, ok) end, - reset_msg_expiry_fun(TTL), BQS), - ChAckTags1 = subtract_acks(ChAckTags, AckTags), - maybe_store_ch_record(C#cr{acktags = ChAckTags1, txn = none}), - State#q{backing_queue_state = BQS1}. - -rollback_transaction(Txn, C, State = #q{backing_queue = BQ, - backing_queue_state = BQS}) -> - {_AckTags, BQS1} = BQ:tx_rollback(Txn, BQS), - %% Iff we removed acktags from the channel record on ack+txn then - %% we would add them back in here. - maybe_store_ch_record(C#cr{txn = none}), - State#q{backing_queue_state = BQS1}. +backing_queue_timeout(State = #q{backing_queue = BQ}) -> + run_backing_queue(BQ, fun (M, BQS) -> M:timeout(BQS) end, State). + +run_backing_queue(Mod, Fun, State = #q{backing_queue = BQ, + backing_queue_state = BQS}) -> + run_message_queue(State#q{backing_queue_state = BQ:invoke(Mod, Fun, BQS)}). subtract_acks(A, B) when is_list(B) -> lists:foldl(fun sets:del_element/2, A, B). +discard_delivery(#delivery{sender = ChPid, + message = Message}, + State = #q{backing_queue = BQ, + backing_queue_state = BQS}) -> + State#q{backing_queue_state = BQ:discard(Message, ChPid, BQS)}. + reset_msg_expiry_fun(TTL) -> fun(MsgProps) -> MsgProps#message_properties{expiry = calculate_msg_expiry(TTL)} @@ -653,17 +677,16 @@ message_properties(#q{ttl=TTL}) -> #message_properties{expiry = calculate_msg_expiry(TTL)}. calculate_msg_expiry(undefined) -> undefined; -calculate_msg_expiry(TTL) -> now_millis() + (TTL * 1000). +calculate_msg_expiry(TTL) -> now_micros() + (TTL * 1000). drop_expired_messages(State = #q{ttl = undefined}) -> State; drop_expired_messages(State = #q{backing_queue_state = BQS, backing_queue = BQ}) -> - Now = now_millis(), + Now = now_micros(), BQS1 = BQ:dropwhile( - fun (#message_properties{expiry = Expiry}) -> - Now > Expiry - end, BQS), + fun (#message_properties{expiry = Expiry}) -> Now > Expiry end, + BQS), ensure_ttl_timer(State#q{backing_queue_state = BQS1}). ensure_ttl_timer(State = #q{backing_queue = BQ, @@ -679,7 +702,7 @@ ensure_ttl_timer(State = #q{backing_queue = BQ, ensure_ttl_timer(State) -> State. -now_millis() -> timer:now_diff(now(), {0,0,0}). +now_micros() -> timer:now_diff(now(), {0,0,0}). infos(Items, State) -> [{Item, i(Item, State)} || Item <- Items]. @@ -715,16 +738,22 @@ i(memory, _) -> M; i(backing_queue_status, #q{backing_queue_state = BQS, backing_queue = BQ}) -> BQ:status(BQS); +i(slave_pids, #q{q = #amqqueue{name = Name}}) -> + {ok, #amqqueue{slave_pids = SPids}} = rabbit_amqqueue:lookup(Name), + SPids; +i(mirror_nodes, #q{q = #amqqueue{name = Name}}) -> + {ok, #amqqueue{mirror_nodes = MNodes}} = rabbit_amqqueue:lookup(Name), + MNodes; i(Item, _) -> throw({bad_argument, Item}). consumers(#q{active_consumers = ActiveConsumers, blocked_consumers = BlockedConsumers}) -> rabbit_misc:queue_fold( - fun ({ChPid, #consumer{tag = ConsumerTag, - ack_required = AckRequired}}, Acc) -> - [{ChPid, ConsumerTag, AckRequired} | Acc] - end, [], queue:join(ActiveConsumers, BlockedConsumers)). + fun ({ChPid, #consumer{tag = ConsumerTag, + ack_required = AckRequired}}, Acc) -> + [{ChPid, ConsumerTag, AckRequired} | Acc] + end, [], queue:join(ActiveConsumers, BlockedConsumers)). emit_stats(State) -> emit_stats(State, []). @@ -746,29 +775,32 @@ emit_consumer_deleted(ChPid, ConsumerTag) -> {channel, ChPid}, {queue, self()}]). -%--------------------------------------------------------------------------- +%%---------------------------------------------------------------------------- prioritise_call(Msg, _From, _State) -> case Msg of - info -> 9; - {info, _Items} -> 9; - consumers -> 9; - {maybe_run_queue_via_backing_queue, _Fun} -> 6; - _ -> 0 + info -> 9; + {info, _Items} -> 9; + consumers -> 9; + _ -> 0 end. prioritise_cast(Msg, _State) -> case Msg of - delete_immediately -> 8; - {set_ram_duration_target, _Duration} -> 8; - {set_maximum_since_use, _Age} -> 8; - {ack, _Txn, _MsgIds, _ChPid} -> 7; - {reject, _MsgIds, _Requeue, _ChPid} -> 7; - {notify_sent, _ChPid} -> 7; - {unblock, _ChPid} -> 7; - {maybe_run_queue_via_backing_queue, _Fun} -> 6; - sync_timeout -> 6; - _ -> 0 + update_ram_duration -> 8; + delete_immediately -> 8; + {set_ram_duration_target, _Duration} -> 8; + {set_maximum_since_use, _Age} -> 8; + maybe_expire -> 8; + drop_expired -> 8; + emit_stats -> 7; + {ack, _AckTags, _ChPid} -> 7; + {reject, _AckTags, _Requeue, _ChPid} -> 7; + {notify_sent, _ChPid} -> 7; + {unblock, _ChPid} -> 7; + {run_backing_queue, _Mod, _Fun} -> 6; + sync_timeout -> 6; + _ -> 0 end. prioritise_info({'DOWN', _MonitorRef, process, DownPid, _Reason}, @@ -788,20 +820,20 @@ handle_call({init, Recover}, From, handle_call({init, Recover}, From, State = #q{q = #amqqueue{exclusive_owner = Owner}}) -> - case rpc:call(node(Owner), erlang, is_process_alive, [Owner]) of - true -> erlang:monitor(process, Owner), - declare(Recover, From, State); - _ -> #q{q = #amqqueue{name = QName, durable = IsDurable}, - backing_queue = BQ, backing_queue_state = undefined} = State, - gen_server2:reply(From, not_found), - case Recover of - true -> ok; - _ -> rabbit_log:warning( - "Queue ~p exclusive owner went away~n", [QName]) - end, - BQS = BQ:init(QName, IsDurable, Recover), - %% Rely on terminate to delete the queue. - {stop, normal, State#q{backing_queue_state = BQS}} + case rabbit_misc:is_process_alive(Owner) of + true -> erlang:monitor(process, Owner), + declare(Recover, From, State); + false -> #q{backing_queue = BQ, backing_queue_state = undefined, + q = #amqqueue{name = QName} = Q} = State, + gen_server2:reply(From, not_found), + case Recover of + true -> ok; + _ -> rabbit_log:warning( + "Queue ~p exclusive owner went away~n", [QName]) + end, + BQS = bq_init(BQ, Q, Recover), + %% Rely on terminate to delete the queue. + {stop, normal, State#q{backing_queue_state = BQS}} end; handle_call(info, _From, State) -> @@ -816,8 +848,7 @@ handle_call({info, Items}, _From, State) -> handle_call(consumers, _From, State) -> reply(consumers(State), State); -handle_call({deliver_immediately, Delivery}, - _From, State) -> +handle_call({deliver_immediately, Delivery}, _From, State) -> %% Synchronous, "immediate" delivery mode %% %% FIXME: Is this correct semantics? @@ -831,22 +862,16 @@ handle_call({deliver_immediately, Delivery}, %% just all ready-to-consume queues get the message, with unready %% queues discarding the message? %% - {Delivered, _NeedsConfirming, State1} = - attempt_delivery(Delivery, record_confirm_message(Delivery, State)), - reply(Delivered, State1); + {Delivered, Confirm, State1} = attempt_delivery(Delivery, State), + reply(Delivered, case Delivered of + true -> maybe_record_confirm_message(Confirm, State1); + false -> discard_delivery(Delivery, State1) + end); handle_call({deliver, Delivery}, From, State) -> %% Synchronous, "mandatory" delivery mode. Reply asap. gen_server2:reply(From, true), - {_Delivered, NewState} = deliver_or_enqueue(Delivery, State), - noreply(NewState); - -handle_call({commit, Txn, ChPid}, From, State) -> - case lookup_ch(ChPid) of - not_found -> reply(ok, State); - C -> noreply(run_message_queue( - commit_transaction(Txn, From, C, State))) - end; + noreply(deliver_or_enqueue(Delivery, State)); handle_call({notify_down, ChPid}, _From, State) -> %% we want to do this synchronously, so that auto_deleted queues @@ -908,15 +933,13 @@ handle_call({basic_consume, NoAck, ChPid, LimiterPid, case is_ch_blocked(C) of true -> State1#q{ blocked_consumers = - add_consumer( - ChPid, Consumer, - State1#q.blocked_consumers)}; + add_consumer(ChPid, Consumer, + State1#q.blocked_consumers)}; false -> run_message_queue( State1#q{ active_consumers = - add_consumer( - ChPid, Consumer, - State1#q.active_consumers)}) + add_consumer(ChPid, Consumer, + State1#q.active_consumers)}) end, emit_consumer_created(ChPid, ConsumerTag, ExclusiveConsume, not NoAck), @@ -989,41 +1012,28 @@ handle_call({requeue, AckTags, ChPid}, From, State) -> ChAckTags1 = subtract_acks(ChAckTags, AckTags), maybe_store_ch_record(C#cr{acktags = ChAckTags1}), noreply(requeue_and_run(AckTags, State)) - end; - -handle_call({maybe_run_queue_via_backing_queue, Fun}, _From, State) -> - reply(ok, maybe_run_queue_via_backing_queue(Fun, State)). - + end. -handle_cast({maybe_run_queue_via_backing_queue, Fun}, State) -> - noreply(maybe_run_queue_via_backing_queue(Fun, State)); +handle_cast({run_backing_queue, Mod, Fun}, State) -> + noreply(run_backing_queue(Mod, Fun, State)); handle_cast(sync_timeout, State) -> - noreply(backing_queue_idle_timeout(State#q{sync_timer_ref = undefined})); + noreply(backing_queue_timeout(State#q{sync_timer_ref = undefined})); handle_cast({deliver, Delivery}, State) -> %% Asynchronous, non-"mandatory", non-"immediate" deliver mode. - {_Delivered, NewState} = deliver_or_enqueue(Delivery, State), - noreply(NewState); + noreply(deliver_or_enqueue(Delivery, State)); -handle_cast({ack, Txn, AckTags, ChPid}, +handle_cast({ack, AckTags, ChPid}, State = #q{backing_queue = BQ, backing_queue_state = BQS}) -> case lookup_ch(ChPid) of not_found -> noreply(State); C = #cr{acktags = ChAckTags} -> - {C1, State1} = - case Txn of - none -> ChAckTags1 = subtract_acks(ChAckTags, AckTags), - NewC = C#cr{acktags = ChAckTags1}, - BQS1 = BQ:ack(AckTags, BQS), - {NewC, State#q{backing_queue_state = BQS1}}; - _ -> BQS1 = BQ:tx_ack(Txn, AckTags, BQS), - {C#cr{txn = Txn}, - State#q{backing_queue_state = BQS1}} - end, - maybe_store_ch_record(C1), - noreply(State1) + maybe_store_ch_record(C#cr{acktags = subtract_acks( + ChAckTags, AckTags)}), + {_Guids, BQS1} = BQ:ack(AckTags, BQS), + noreply(State#q{backing_queue_state = BQS1}) end; handle_cast({reject, AckTags, Requeue, ChPid}, @@ -1037,17 +1047,11 @@ handle_cast({reject, AckTags, Requeue, ChPid}, maybe_store_ch_record(C#cr{acktags = ChAckTags1}), noreply(case Requeue of true -> requeue_and_run(AckTags, State); - false -> BQS1 = BQ:ack(AckTags, BQS), + false -> {_Guids, BQS1} = BQ:ack(AckTags, BQS), State#q{backing_queue_state = BQS1} end) end; -handle_cast({rollback, Txn, ChPid}, State) -> - noreply(case lookup_ch(ChPid) of - not_found -> State; - C -> rollback_transaction(Txn, C, State) - end); - handle_cast(delete_immediately, State) -> {stop, normal, State}; @@ -1134,7 +1138,7 @@ handle_info({'DOWN', _MonitorRef, process, DownPid, _Reason}, State) -> end; handle_info(timeout, State) -> - noreply(backing_queue_idle_timeout(State)); + noreply(backing_queue_timeout(State)); handle_info({'EXIT', _Pid, Reason}, State) -> {stop, Reason, State}; @@ -1160,3 +1164,5 @@ handle_pre_hibernate(State = #q{backing_queue = BQ, State1 = State#q{stats_timer = rabbit_event:stop_stats_timer(StatsTimer), backing_queue_state = BQS3}, {hibernate, stop_rate_timer(State1)}. + +format_message_queue(Opt, MQ) -> rabbit_misc:format_message_queue(Opt, MQ). diff --git a/src/rabbit_amqqueue_sup.erl b/src/rabbit_amqqueue_sup.erl index 1344956e..2c28adce 100644 --- a/src/rabbit_amqqueue_sup.erl +++ b/src/rabbit_amqqueue_sup.erl @@ -18,7 +18,7 @@ -behaviour(supervisor2). --export([start_link/0, start_child/1]). +-export([start_link/0, start_child/2]). -export([init/1]). @@ -29,8 +29,8 @@ start_link() -> supervisor2:start_link({local, ?SERVER}, ?MODULE, []). -start_child(Args) -> - supervisor2:start_child(?SERVER, Args). +start_child(Node, Args) -> + supervisor2:start_child({?SERVER, Node}, Args). init([]) -> {ok, {{simple_one_for_one_terminate, 10, 10}, diff --git a/src/rabbit_auth_backend.erl b/src/rabbit_auth_backend.erl index 09820c5b..ade158bb 100644 --- a/src/rabbit_auth_backend.erl +++ b/src/rabbit_auth_backend.erl @@ -36,17 +36,13 @@ behaviour_info(callbacks) -> %% Client failed authentication. Log and die. {check_user_login, 2}, - %% Given #user, vhost path and permission, can a user access a vhost? - %% Permission is read - learn of the existence of (only relevant for - %% management plugin) - %% or write - log in - %% + %% Given #user and vhost, can a user log in to a vhost? %% Possible responses: %% true %% false %% {error, Error} %% Something went wrong. Log and die. - {check_vhost_access, 3}, + {check_vhost_access, 2}, %% Given #user, resource and permission, can a user access a resource? %% diff --git a/src/rabbit_auth_backend_internal.erl b/src/rabbit_auth_backend_internal.erl index a564480b..6a018bd1 100644 --- a/src/rabbit_auth_backend_internal.erl +++ b/src/rabbit_auth_backend_internal.erl @@ -20,15 +20,17 @@ -behaviour(rabbit_auth_backend). -export([description/0]). --export([check_user_login/2, check_vhost_access/3, check_resource_access/3]). +-export([check_user_login/2, check_vhost_access/2, check_resource_access/3]). --export([add_user/2, delete_user/1, change_password/2, set_admin/1, - clear_admin/1, list_users/0, lookup_user/1, clear_password/1]). +-export([add_user/2, delete_user/1, change_password/2, set_tags/2, + list_users/0, user_info_keys/0, lookup_user/1, clear_password/1]). -export([make_salt/0, check_password/2, change_password_hash/2, hash_password/1]). -export([set_permissions/5, clear_permissions/2, list_permissions/0, list_vhost_permissions/1, list_user_permissions/1, - list_user_vhost_permissions/2]). + list_user_vhost_permissions/2, perms_info_keys/0, + vhost_perms_info_keys/0, user_perms_info_keys/0, + user_vhost_perms_info_keys/0]). -include("rabbit_auth_backend_spec.hrl"). @@ -48,33 +50,35 @@ rabbit_types:password_hash()) -> 'ok'). -spec(hash_password/1 :: (rabbit_types:password()) -> rabbit_types:password_hash()). --spec(set_admin/1 :: (rabbit_types:username()) -> 'ok'). --spec(clear_admin/1 :: (rabbit_types:username()) -> 'ok'). --spec(list_users/0 :: () -> [{rabbit_types:username(), boolean()}]). +-spec(set_tags/2 :: (rabbit_types:username(), [atom()]) -> 'ok'). +-spec(list_users/0 :: () -> rabbit_types:infos()). +-spec(user_info_keys/0 :: () -> rabbit_types:info_keys()). -spec(lookup_user/1 :: (rabbit_types:username()) - -> rabbit_types:ok(rabbit_types:internal_user()) - | rabbit_types:error('not_found')). + -> rabbit_types:ok(rabbit_types:internal_user()) + | rabbit_types:error('not_found')). -spec(set_permissions/5 ::(rabbit_types:username(), rabbit_types:vhost(), regexp(), regexp(), regexp()) -> 'ok'). -spec(clear_permissions/2 :: (rabbit_types:username(), rabbit_types:vhost()) -> 'ok'). --spec(list_permissions/0 :: - () -> [{rabbit_types:username(), rabbit_types:vhost(), - regexp(), regexp(), regexp()}]). +-spec(list_permissions/0 :: () -> rabbit_types:infos()). -spec(list_vhost_permissions/1 :: - (rabbit_types:vhost()) -> [{rabbit_types:username(), - regexp(), regexp(), regexp()}]). + (rabbit_types:vhost()) -> rabbit_types:infos()). -spec(list_user_permissions/1 :: - (rabbit_types:username()) -> [{rabbit_types:vhost(), - regexp(), regexp(), regexp()}]). + (rabbit_types:username()) -> rabbit_types:infos()). -spec(list_user_vhost_permissions/2 :: (rabbit_types:username(), rabbit_types:vhost()) - -> [{regexp(), regexp(), regexp()}]). - + -> rabbit_types:infos()). +-spec(perms_info_keys/0 :: () -> rabbit_types:info_keys()). +-spec(vhost_perms_info_keys/0 :: () -> rabbit_types:info_keys()). +-spec(user_perms_info_keys/0 :: () -> rabbit_types:info_keys()). +-spec(user_vhost_perms_info_keys/0 :: () -> rabbit_types:info_keys()). -endif. %%---------------------------------------------------------------------------- +-define(PERMS_INFO_KEYS, [configure, write, read]). +-define(USER_INFO_KEYS, [user, tags]). + %% Implementation of rabbit_auth_backend description() -> @@ -85,20 +89,19 @@ check_user_login(Username, []) -> internal_check_user_login(Username, fun(_) -> true end); check_user_login(Username, [{password, Password}]) -> internal_check_user_login( - Username, - fun(#internal_user{password_hash = Hash}) -> - check_password(Password, Hash) - end); + Username, fun(#internal_user{password_hash = Hash}) -> + check_password(Password, Hash) + end); check_user_login(Username, AuthProps) -> exit({unknown_auth_props, Username, AuthProps}). internal_check_user_login(Username, Fun) -> Refused = {refused, "user '~s' - invalid credentials", [Username]}, case lookup_user(Username) of - {ok, User = #internal_user{is_admin = IsAdmin}} -> + {ok, User = #internal_user{tags = Tags}} -> case Fun(User) of true -> {ok, #user{username = Username, - is_admin = IsAdmin, + tags = Tags, auth_backend = ?MODULE, impl = User}}; _ -> Refused @@ -107,16 +110,13 @@ internal_check_user_login(Username, Fun) -> Refused end. -check_vhost_access(#user{is_admin = true}, _VHostPath, read) -> - true; - -check_vhost_access(#user{username = Username}, VHostPath, _) -> +check_vhost_access(#user{username = Username}, VHost) -> %% TODO: use dirty ops instead rabbit_misc:execute_mnesia_transaction( fun () -> case mnesia:read({rabbit_user_permission, #user_vhost{username = Username, - virtual_host = VHostPath}}) of + virtual_host = VHost}}) of [] -> false; [_R] -> true end @@ -131,12 +131,11 @@ check_resource_access(#user{username = Username}, [] -> false; [#user_permission{permission = P}] -> - PermRegexp = - case element(permission_index(Permission), P) of - %% <<"^$">> breaks Emacs' erlang mode - <<"">> -> <<$^, $$>>; - RE -> RE - end, + PermRegexp = case element(permission_index(Permission), P) of + %% <<"^$">> breaks Emacs' erlang mode + <<"">> -> <<$^, $$>>; + RE -> RE + end, case re:run(Name, PermRegexp, [{capture, none}]) of match -> true; nomatch -> false @@ -160,7 +159,7 @@ add_user(Username, Password) -> #internal_user{username = Username, password_hash = hash_password(Password), - is_admin = false}, + tags = []}, write); _ -> mnesia:abort({user_already_exists, Username}) @@ -221,18 +220,12 @@ salted_md5(Salt, Cleartext) -> Salted = <<Salt/binary, Cleartext/binary>>, erlang:md5(Salted). -set_admin(Username) -> - set_admin(Username, true). - -clear_admin(Username) -> - set_admin(Username, false). - -set_admin(Username, IsAdmin) -> +set_tags(Username, Tags) -> R = update_user(Username, fun(User) -> - User#internal_user{is_admin = IsAdmin} + User#internal_user{tags = Tags} end), - rabbit_log:info("Set user admin flag for user ~p to ~p~n", - [Username, IsAdmin]), + rabbit_log:info("Set user tags for user ~p to ~p~n", + [Username, Tags]), R. update_user(Username, Fun) -> @@ -245,10 +238,12 @@ update_user(Username, Fun) -> end)). list_users() -> - [{Username, IsAdmin} || - #internal_user{username = Username, is_admin = IsAdmin} <- + [[{user, Username}, {tags, Tags}] || + #internal_user{username = Username, tags = Tags} <- mnesia:dirty_match_object(rabbit_user, #internal_user{_ = '_'})]. +user_info_keys() -> ?USER_INFO_KEYS. + lookup_user(Username) -> rabbit_misc:dirty_read({rabbit_user, Username}). @@ -287,32 +282,38 @@ clear_permissions(Username, VHostPath) -> virtual_host = VHostPath}}) end)). +perms_info_keys() -> [user, vhost | ?PERMS_INFO_KEYS]. +vhost_perms_info_keys() -> [user | ?PERMS_INFO_KEYS]. +user_perms_info_keys() -> [vhost | ?PERMS_INFO_KEYS]. +user_vhost_perms_info_keys() -> ?PERMS_INFO_KEYS. + list_permissions() -> - [{Username, VHostPath, ConfigurePerm, WritePerm, ReadPerm} || - {Username, VHostPath, ConfigurePerm, WritePerm, ReadPerm} <- - list_permissions(match_user_vhost('_', '_'))]. + list_permissions(perms_info_keys(), match_user_vhost('_', '_')). list_vhost_permissions(VHostPath) -> - [{Username, ConfigurePerm, WritePerm, ReadPerm} || - {Username, _, ConfigurePerm, WritePerm, ReadPerm} <- - list_permissions(rabbit_vhost:with( - VHostPath, match_user_vhost('_', VHostPath)))]. + list_permissions( + vhost_perms_info_keys(), + rabbit_vhost:with(VHostPath, match_user_vhost('_', VHostPath))). list_user_permissions(Username) -> - [{VHostPath, ConfigurePerm, WritePerm, ReadPerm} || - {_, VHostPath, ConfigurePerm, WritePerm, ReadPerm} <- - list_permissions(rabbit_misc:with_user( - Username, match_user_vhost(Username, '_')))]. + list_permissions( + user_perms_info_keys(), + rabbit_misc:with_user(Username, match_user_vhost(Username, '_'))). list_user_vhost_permissions(Username, VHostPath) -> - [{ConfigurePerm, WritePerm, ReadPerm} || - {_, _, ConfigurePerm, WritePerm, ReadPerm} <- - list_permissions(rabbit_misc:with_user_and_vhost( - Username, VHostPath, - match_user_vhost(Username, VHostPath)))]. - -list_permissions(QueryThunk) -> - [{Username, VHostPath, ConfigurePerm, WritePerm, ReadPerm} || + list_permissions( + user_vhost_perms_info_keys(), + rabbit_misc:with_user_and_vhost( + Username, VHostPath, match_user_vhost(Username, VHostPath))). + +filter_props(Keys, Props) -> [T || T = {K, _} <- Props, lists:member(K, Keys)]. + +list_permissions(Keys, QueryThunk) -> + [filter_props(Keys, [{user, Username}, + {vhost, VHostPath}, + {configure, ConfigurePerm}, + {write, WritePerm}, + {read, ReadPerm}]) || #user_permission{user_vhost = #user_vhost{username = Username, virtual_host = VHostPath}, permission = #permission{ configure = ConfigurePerm, diff --git a/src/rabbit_auth_mechanism.erl b/src/rabbit_auth_mechanism.erl index 1d14f9f0..897199ee 100644 --- a/src/rabbit_auth_mechanism.erl +++ b/src/rabbit_auth_mechanism.erl @@ -23,6 +23,10 @@ behaviour_info(callbacks) -> %% A description. {description, 0}, + %% If this mechanism is enabled, should it be offered for a given socket? + %% (primarily so EXTERNAL can be SSL-only) + {should_offer, 1}, + %% Called before authentication starts. Should create a state %% object to be passed through all the stages of authentication. {init, 1}, diff --git a/src/rabbit_auth_mechanism_amqplain.erl b/src/rabbit_auth_mechanism_amqplain.erl index 5e422eee..b8682a46 100644 --- a/src/rabbit_auth_mechanism_amqplain.erl +++ b/src/rabbit_auth_mechanism_amqplain.erl @@ -19,7 +19,7 @@ -behaviour(rabbit_auth_mechanism). --export([description/0, init/1, handle_response/2]). +-export([description/0, should_offer/1, init/1, handle_response/2]). -include("rabbit_auth_mechanism_spec.hrl"). @@ -38,6 +38,9 @@ description() -> [{name, <<"AMQPLAIN">>}, {description, <<"QPid AMQPLAIN mechanism">>}]. +should_offer(_Sock) -> + true. + init(_Sock) -> []. @@ -51,5 +54,5 @@ handle_response(Response, _State) -> _ -> {protocol_error, "AMQPLAIN auth info ~w is missing LOGIN or PASSWORD field", - [LoginTable]} + [LoginTable]} end. diff --git a/src/rabbit_auth_mechanism_cr_demo.erl b/src/rabbit_auth_mechanism_cr_demo.erl index 7fd20f8b..acbb6e48 100644 --- a/src/rabbit_auth_mechanism_cr_demo.erl +++ b/src/rabbit_auth_mechanism_cr_demo.erl @@ -19,7 +19,7 @@ -behaviour(rabbit_auth_mechanism). --export([description/0, init/1, handle_response/2]). +-export([description/0, should_offer/1, init/1, handle_response/2]). -include("rabbit_auth_mechanism_spec.hrl"). @@ -43,6 +43,9 @@ description() -> {description, <<"RabbitMQ Demo challenge-response authentication " "mechanism">>}]. +should_offer(_Sock) -> + true. + init(_Sock) -> #state{}. @@ -50,10 +53,8 @@ handle_response(Response, State = #state{username = undefined}) -> {challenge, <<"Please tell me your password">>, State#state{username = Response}}; -handle_response(Response, #state{username = Username}) -> - case Response of - <<"My password is ", Password/binary>> -> - rabbit_access_control:check_user_pass_login(Username, Password); - _ -> - {protocol_error, "Invalid response '~s'", [Response]} - end. +handle_response(<<"My password is ", Password/binary>>, + #state{username = Username}) -> + rabbit_access_control:check_user_pass_login(Username, Password); +handle_response(Response, _State) -> + {protocol_error, "Invalid response '~s'", [Response]}. diff --git a/src/rabbit_auth_mechanism_plain.erl b/src/rabbit_auth_mechanism_plain.erl index 1ca07018..2448acb6 100644 --- a/src/rabbit_auth_mechanism_plain.erl +++ b/src/rabbit_auth_mechanism_plain.erl @@ -19,7 +19,7 @@ -behaviour(rabbit_auth_mechanism). --export([description/0, init/1, handle_response/2]). +-export([description/0, should_offer/1, init/1, handle_response/2]). -include("rabbit_auth_mechanism_spec.hrl"). @@ -41,6 +41,9 @@ description() -> [{name, <<"PLAIN">>}, {description, <<"SASL PLAIN authentication mechanism">>}]. +should_offer(_Sock) -> + true. + init(_Sock) -> []. @@ -62,15 +65,12 @@ extract_user_pass(Response) -> end. extract_elem(<<0:8, Rest/binary>>) -> - Count = next_null_pos(Rest), + Count = next_null_pos(Rest, 0), <<Elem:Count/binary, Rest1/binary>> = Rest, {ok, Elem, Rest1}; extract_elem(_) -> error. -next_null_pos(Bin) -> - next_null_pos(Bin, 0). - next_null_pos(<<>>, Count) -> Count; next_null_pos(<<0:8, _Rest/binary>>, Count) -> Count; next_null_pos(<<_:8, Rest/binary>>, Count) -> next_null_pos(Rest, Count + 1). diff --git a/src/rabbit_backing_queue.erl b/src/rabbit_backing_queue.erl index 6a21e10f..77278416 100644 --- a/src/rabbit_backing_queue.erl +++ b/src/rabbit_backing_queue.erl @@ -33,26 +33,66 @@ behaviour_info(callbacks) -> {stop, 0}, %% Initialise the backing queue and its state. + %% + %% Takes + %% 1. the amqqueue record + %% 2. a boolean indicating whether the queue is an existing queue + %% that should be recovered + %% 3. an asynchronous callback which accepts a function of type + %% backing-queue-state to backing-queue-state. This callback + %% function can be safely invoked from any process, which + %% makes it useful for passing messages back into the backing + %% queue, especially as the backing queue does not have + %% control of its own mailbox. {init, 3}, %% Called on queue shutdown when queue isn't being deleted. - {terminate, 1}, + {terminate, 2}, %% Called when the queue is terminating and needs to delete all %% its content. - {delete_and_terminate, 1}, + {delete_and_terminate, 2}, %% Remove all messages in the queue, but not messages which have %% been fetched and are pending acks. {purge, 1}, %% Publish a message. - {publish, 3}, + {publish, 4}, %% Called for messages which have already been passed straight %% out to a client. The queue will be empty for these calls %% (i.e. saves the round trip through the backing queue). - {publish_delivered, 4}, + {publish_delivered, 5}, + + %% Return ids of messages which have been confirmed since + %% the last invocation of this function (or initialisation). + %% + %% Message ids should only appear in the result of + %% drain_confirmed under the following circumstances: + %% + %% 1. The message appears in a call to publish_delivered/4 and + %% the first argument (ack_required) is false; or + %% 2. The message is fetched from the queue with fetch/2 and the + %% first argument (ack_required) is false; or + %% 3. The message is acked (ack/2 is called for the message); or + %% 4. The message is fully fsync'd to disk in such a way that the + %% recovery of the message is guaranteed in the event of a + %% crash of this rabbit node (excluding hardware failure). + %% + %% In addition to the above conditions, a message id may only + %% appear in the result of drain_confirmed if + %% #message_properties.needs_confirming = true when the msg was + %% published (through whichever means) to the backing queue. + %% + %% It is legal for the same message id to appear in the results + %% of multiple calls to drain_confirmed, which means that the + %% backing queue is not required to keep track of which messages + %% it has already confirmed. The confirm will be issued to the + %% publisher the first time the message id appears in the result + %% of drain_confirmed. All subsequent appearances of that message + %% id will be ignored. + {drain_confirmed, 1}, %% Drop messages from the head of the queue while the supplied %% predicate returns true. @@ -62,24 +102,9 @@ behaviour_info(callbacks) -> {fetch, 2}, %% Acktags supplied are for messages which can now be forgotten - %% about. Must return 1 guid per Ack, in the same order as Acks. + %% about. Must return 1 msg_id per Ack, in the same order as Acks. {ack, 2}, - %% A publish, but in the context of a transaction. - {tx_publish, 4}, - - %% Acks, but in the context of a transaction. - {tx_ack, 3}, - - %% Undo anything which has been done in the context of the - %% specified transaction. - {tx_rollback, 2}, - - %% Commit a transaction. The Fun passed in must be called once - %% the messages have really been commited. This CPS permits the - %% possibility of commit coalescing. - {tx_commit, 4}, - %% Reinsert messages into the queue which have already been %% delivered and were pending acknowledgement. {requeue, 3}, @@ -107,22 +132,40 @@ behaviour_info(callbacks) -> %% queue. {ram_duration, 1}, - %% Should 'idle_timeout' be called as soon as the queue process + %% Should 'timeout' be called as soon as the queue process %% can manage (either on an empty mailbox, or when a timer %% fires)? - {needs_idle_timeout, 1}, + {needs_timeout, 1}, - %% Called (eventually) after needs_idle_timeout returns - %% 'true'. Note this may be called more than once for each 'true' - %% returned from needs_idle_timeout. - {idle_timeout, 1}, + %% Called (eventually) after needs_timeout returns 'idle' or + %% 'timed'. Note this may be called more than once for each + %% 'idle' or 'timed' returned from needs_timeout. + {timeout, 1}, %% Called immediately before the queue hibernates. {handle_pre_hibernate, 1}, %% Exists for debugging purposes, to be able to expose state via %% rabbitmqctl list_queues backing_queue_status - {status, 1} + {status, 1}, + + %% Passed a function to be invoked with the relevant backing + %% queue's state. Useful for when the backing queue or other + %% components need to pass functions into the backing queue. + {invoke, 3}, + + %% Called prior to a publish or publish_delivered call. Allows + %% the BQ to signal that it's already seen this message (and in + %% what capacity - i.e. was it published previously or discarded + %% previously) and thus the message should be dropped. + {is_duplicate, 2}, + + %% Called to inform the BQ about messages which have reached the + %% queue, but are not going to be further passed to BQ for some + %% reason. Note that this is may be invoked for messages for + %% which BQ:is_duplicate/2 has already returned {'published' | + %% 'discarded', BQS}. + {discard, 3} ]; behaviour_info(_Other) -> undefined. diff --git a/src/rabbit_backing_queue_qc.erl b/src/rabbit_backing_queue_qc.erl new file mode 100644 index 00000000..d358a041 --- /dev/null +++ b/src/rabbit_backing_queue_qc.erl @@ -0,0 +1,392 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License +%% at http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and +%% limitations under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is VMware, Inc. +%% Copyright (c) 2011-2011 VMware, Inc. All rights reserved. +%% + +-module(rabbit_backing_queue_qc). +-ifdef(use_proper_qc). +-include("rabbit.hrl"). +-include("rabbit_framing.hrl"). +-include_lib("proper/include/proper.hrl"). + +-behaviour(proper_statem). + +-define(BQMOD, rabbit_variable_queue). +-define(QUEUE_MAXLEN, 10000). +-define(TIMEOUT_LIMIT, 100). + +-define(RECORD_INDEX(Key, Record), + proplists:get_value(Key, lists:zip( + record_info(fields, Record), lists:seq(2, record_info(size, Record))))). + +-export([initial_state/0, command/1, precondition/2, postcondition/3, + next_state/3]). + +-export([prop_backing_queue_test/0, publish_multiple/4, timeout/2]). + +-record(state, {bqstate, + len, %% int + messages, %% queue of {msg_props, basic_msg} + acks, %% dict of acktag => {msg_props, basic_msg} + confirms}). %% set of msgid + +%% Initialise model + +initial_state() -> + #state{bqstate = qc_variable_queue_init(qc_test_queue()), + len = 0, + messages = queue:new(), + acks = orddict:new(), + confirms = gb_sets:new()}. + +%% Property + +prop_backing_queue_test() -> + ?FORALL(Cmds, commands(?MODULE, initial_state()), + backing_queue_test(Cmds)). + +backing_queue_test(Cmds) -> + {ok, FileSizeLimit} = + application:get_env(rabbit, msg_store_file_size_limit), + application:set_env(rabbit, msg_store_file_size_limit, 512, + infinity), + {ok, MaxJournal} = + application:get_env(rabbit, queue_index_max_journal_entries), + application:set_env(rabbit, queue_index_max_journal_entries, 128, + infinity), + + {_H, #state{bqstate = BQ}, Res} = run_commands(?MODULE, Cmds), + + application:set_env(rabbit, msg_store_file_size_limit, + FileSizeLimit, infinity), + application:set_env(rabbit, queue_index_max_journal_entries, + MaxJournal, infinity), + + ?BQMOD:delete_and_terminate(shutdown, BQ), + ?WHENFAIL( + io:format("Result: ~p~n", [Res]), + aggregate(command_names(Cmds), Res =:= ok)). + +%% Commands + +%% Command frequencies are tuned so that queues are normally reasonably +%% short, but they may sometimes exceed ?QUEUE_MAXLEN. Publish-multiple +%% and purging cause extreme queue lengths, so these have lower probabilities. +%% Fetches are sufficiently frequent so that commands that need acktags +%% get decent coverage. + +command(S) -> + frequency([{10, qc_publish(S)}, + {1, qc_publish_delivered(S)}, + {1, qc_publish_multiple(S)}, %% very slow + {15, qc_fetch(S)}, %% needed for ack and requeue + {15, qc_ack(S)}, + {15, qc_requeue(S)}, + {3, qc_set_ram_duration_target(S)}, + {1, qc_ram_duration(S)}, + {1, qc_drain_confirmed(S)}, + {1, qc_dropwhile(S)}, + {1, qc_is_empty(S)}, + {1, qc_timeout(S)}, + {1, qc_purge(S)}]). + +qc_publish(#state{bqstate = BQ}) -> + {call, ?BQMOD, publish, + [qc_message(), + #message_properties{needs_confirming = frequency([{1, true}, + {20, false}]), + expiry = choose(0, 10)}, + self(), BQ]}. + +qc_publish_multiple(#state{bqstate = BQ}) -> + {call, ?MODULE, publish_multiple, + [qc_message(), #message_properties{}, BQ, + resize(?QUEUE_MAXLEN, pos_integer())]}. + +qc_publish_delivered(#state{bqstate = BQ}) -> + {call, ?BQMOD, publish_delivered, + [boolean(), qc_message(), #message_properties{}, self(), BQ]}. + +qc_fetch(#state{bqstate = BQ}) -> + {call, ?BQMOD, fetch, [boolean(), BQ]}. + +qc_ack(#state{bqstate = BQ, acks = Acks}) -> + {call, ?BQMOD, ack, [rand_choice(orddict:fetch_keys(Acks)), BQ]}. + +qc_requeue(#state{bqstate = BQ, acks = Acks}) -> + {call, ?BQMOD, requeue, + [rand_choice(orddict:fetch_keys(Acks)), fun(MsgOpts) -> MsgOpts end, BQ]}. + +qc_set_ram_duration_target(#state{bqstate = BQ}) -> + {call, ?BQMOD, set_ram_duration_target, + [oneof([0, 1, 2, resize(1000, pos_integer()), infinity]), BQ]}. + +qc_ram_duration(#state{bqstate = BQ}) -> + {call, ?BQMOD, ram_duration, [BQ]}. + +qc_drain_confirmed(#state{bqstate = BQ}) -> + {call, ?BQMOD, drain_confirmed, [BQ]}. + +qc_dropwhile(#state{bqstate = BQ}) -> + {call, ?BQMOD, dropwhile, [fun dropfun/1, BQ]}. + +qc_is_empty(#state{bqstate = BQ}) -> + {call, ?BQMOD, is_empty, [BQ]}. + +qc_timeout(#state{bqstate = BQ}) -> + {call, ?MODULE, timeout, [BQ, ?TIMEOUT_LIMIT]}. + +qc_purge(#state{bqstate = BQ}) -> + {call, ?BQMOD, purge, [BQ]}. + +%% Preconditions + +precondition(#state{acks = Acks}, {call, ?BQMOD, Fun, _Arg}) + when Fun =:= ack; Fun =:= requeue -> + orddict:size(Acks) > 0; +precondition(#state{messages = Messages}, + {call, ?BQMOD, publish_delivered, _Arg}) -> + queue:is_empty(Messages); +precondition(_S, {call, ?BQMOD, _Fun, _Arg}) -> + true; +precondition(_S, {call, ?MODULE, timeout, _Arg}) -> + true; +precondition(#state{len = Len}, {call, ?MODULE, publish_multiple, _Arg}) -> + Len < ?QUEUE_MAXLEN. + +%% Model updates + +next_state(S, BQ, {call, ?BQMOD, publish, [Msg, MsgProps, _Pid, _BQ]}) -> + #state{len = Len, messages = Messages, confirms = Confirms} = S, + MsgId = {call, erlang, element, [?RECORD_INDEX(id, basic_message), Msg]}, + NeedsConfirm = + {call, erlang, element, + [?RECORD_INDEX(needs_confirming, message_properties), MsgProps]}, + S#state{bqstate = BQ, + len = Len + 1, + messages = queue:in({MsgProps, Msg}, Messages), + confirms = case eval(NeedsConfirm) of + true -> gb_sets:add(MsgId, Confirms); + _ -> Confirms + end}; + +next_state(S, BQ, {call, _, publish_multiple, [Msg, MsgProps, _BQ, Count]}) -> + #state{len = Len, messages = Messages} = S, + Messages1 = repeat(Messages, fun(Msgs) -> + queue:in({MsgProps, Msg}, Msgs) + end, Count), + S#state{bqstate = BQ, + len = Len + Count, + messages = Messages1}; + +next_state(S, Res, + {call, ?BQMOD, publish_delivered, + [AckReq, Msg, MsgProps, _Pid, _BQ]}) -> + #state{confirms = Confirms, acks = Acks} = S, + AckTag = {call, erlang, element, [1, Res]}, + BQ1 = {call, erlang, element, [2, Res]}, + MsgId = {call, erlang, element, [?RECORD_INDEX(id, basic_message), Msg]}, + NeedsConfirm = + {call, erlang, element, + [?RECORD_INDEX(needs_confirming, message_properties), MsgProps]}, + S#state{bqstate = BQ1, + confirms = case eval(NeedsConfirm) of + true -> gb_sets:add(MsgId, Confirms); + _ -> Confirms + end, + acks = case AckReq of + true -> orddict:append(AckTag, {MsgProps, Msg}, Acks); + false -> Acks + end + }; + +next_state(S, Res, {call, ?BQMOD, fetch, [AckReq, _BQ]}) -> + #state{len = Len, messages = Messages, acks = Acks} = S, + ResultInfo = {call, erlang, element, [1, Res]}, + BQ1 = {call, erlang, element, [2, Res]}, + AckTag = {call, erlang, element, [3, ResultInfo]}, + S1 = S#state{bqstate = BQ1}, + case queue:out(Messages) of + {empty, _M2} -> + S1; + {{value, MsgProp_Msg}, M2} -> + S2 = S1#state{len = Len - 1, messages = M2}, + case AckReq of + true -> + S2#state{acks = orddict:append(AckTag, MsgProp_Msg, Acks)}; + false -> + S2 + end + end; + +next_state(S, Res, {call, ?BQMOD, ack, [AcksArg, _BQ]}) -> + #state{acks = AcksState} = S, + BQ1 = {call, erlang, element, [2, Res]}, + S#state{bqstate = BQ1, + acks = lists:foldl(fun orddict:erase/2, AcksState, AcksArg)}; + +next_state(S, Res, {call, ?BQMOD, requeue, [AcksArg, _F, _V]}) -> + #state{len = Len, messages = Messages, acks = AcksState} = S, + BQ1 = {call, erlang, element, [2, Res]}, + RequeueMsgs = lists:append([orddict:fetch(Key, AcksState) || + Key <- AcksArg]), + S#state{bqstate = BQ1, + len = Len + length(RequeueMsgs), + messages = queue:join(Messages, queue:from_list(RequeueMsgs)), + acks = lists:foldl(fun orddict:erase/2, AcksState, AcksArg)}; + +next_state(S, BQ, {call, ?BQMOD, set_ram_duration_target, _Args}) -> + S#state{bqstate = BQ}; + +next_state(S, Res, {call, ?BQMOD, ram_duration, _Args}) -> + BQ1 = {call, erlang, element, [2, Res]}, + S#state{bqstate = BQ1}; + +next_state(S, Res, {call, ?BQMOD, drain_confirmed, _Args}) -> + BQ1 = {call, erlang, element, [2, Res]}, + S#state{bqstate = BQ1}; + +next_state(S, BQ1, {call, ?BQMOD, dropwhile, _Args}) -> + #state{messages = Messages} = S, + Messages1 = drop_messages(Messages), + S#state{bqstate = BQ1, len = queue:len(Messages1), messages = Messages1}; + +next_state(S, _Res, {call, ?BQMOD, is_empty, _Args}) -> + S; + +next_state(S, BQ, {call, ?MODULE, timeout, _Args}) -> + S#state{bqstate = BQ}; + +next_state(S, Res, {call, ?BQMOD, purge, _Args}) -> + BQ1 = {call, erlang, element, [2, Res]}, + S#state{bqstate = BQ1, len = 0, messages = queue:new()}. + +%% Postconditions + +postcondition(S, {call, ?BQMOD, fetch, _Args}, Res) -> + #state{messages = Messages, len = Len, acks = Acks, confirms = Confrms} = S, + case Res of + {{MsgFetched, _IsDelivered, AckTag, RemainingLen}, _BQ} -> + {_MsgProps, Msg} = queue:head(Messages), + MsgFetched =:= Msg andalso + not orddict:is_key(AckTag, Acks) andalso + not gb_sets:is_element(AckTag, Confrms) andalso + RemainingLen =:= Len - 1; + {empty, _BQ} -> + Len =:= 0 + end; + +postcondition(S, {call, ?BQMOD, publish_delivered, _Args}, {AckTag, _BQ}) -> + #state{acks = Acks, confirms = Confrms} = S, + not orddict:is_key(AckTag, Acks) andalso + not gb_sets:is_element(AckTag, Confrms); + +postcondition(#state{len = Len}, {call, ?BQMOD, purge, _Args}, Res) -> + {PurgeCount, _BQ} = Res, + Len =:= PurgeCount; + +postcondition(#state{len = Len}, + {call, ?BQMOD, is_empty, _Args}, Res) -> + (Len =:= 0) =:= Res; + +postcondition(S, {call, ?BQMOD, drain_confirmed, _Args}, Res) -> + #state{confirms = Confirms} = S, + {ReportedConfirmed, _BQ} = Res, + lists:all(fun (M) -> + gb_sets:is_element(M, Confirms) + end, ReportedConfirmed); + +postcondition(#state{bqstate = BQ, len = Len}, {call, _M, _F, _A}, _Res) -> + ?BQMOD:len(BQ) =:= Len. + +%% Helpers + +repeat(Result, _Fun, 0) -> + Result; +repeat(Result, Fun, Times) -> + repeat(Fun(Result), Fun, Times - 1). + +publish_multiple(Msg, MsgProps, BQ, Count) -> + repeat(BQ, fun(BQ1) -> + ?BQMOD:publish(Msg, MsgProps, self(), BQ1) + end, Count). + +timeout(BQ, 0) -> + BQ; +timeout(BQ, AtMost) -> + case ?BQMOD:needs_timeout(BQ) of + false -> BQ; + _ -> timeout(?BQMOD:timeout(BQ), AtMost - 1) + end. + +qc_message_payload() -> + ?SIZED(Size, resize(Size * Size, binary())). + +qc_routing_key() -> + noshrink(binary(10)). + +qc_delivery_mode() -> + oneof([1, 2]). + +qc_message() -> + qc_message(qc_delivery_mode()). + +qc_message(DeliveryMode) -> + {call, rabbit_basic, message, [ + qc_default_exchange(), + qc_routing_key(), + #'P_basic'{delivery_mode = DeliveryMode}, + qc_message_payload()]}. + +qc_default_exchange() -> + {call, rabbit_misc, r, [<<>>, exchange, <<>>]}. + +qc_variable_queue_init(Q) -> + {call, ?BQMOD, init, + [Q, false, function(2, ok)]}. + +qc_test_q() -> + {call, rabbit_misc, r, [<<"/">>, queue, noshrink(binary(16))]}. + +qc_test_queue() -> + qc_test_queue(boolean()). + +qc_test_queue(Durable) -> + #amqqueue{name = qc_test_q(), + durable = Durable, + auto_delete = false, + arguments = [], + pid = self()}. + +rand_choice([]) -> []; +rand_choice(List) -> [lists:nth(random:uniform(length(List)), List)]. + +dropfun(Props) -> + Expiry = eval({call, erlang, element, + [?RECORD_INDEX(expiry, message_properties), Props]}), + Expiry =/= 0. + +drop_messages(Messages) -> + case queue:out(Messages) of + {empty, _} -> + Messages; + {{value, MsgProps_Msg}, M2} -> + MsgProps = {call, erlang, element, [1, MsgProps_Msg]}, + case dropfun(MsgProps) of + true -> drop_messages(M2); + false -> Messages + end + end. + +-endif. diff --git a/src/rabbit_basic.erl b/src/rabbit_basic.erl index c5bd9575..9cc406e7 100644 --- a/src/rabbit_basic.erl +++ b/src/rabbit_basic.erl @@ -18,10 +18,9 @@ -include("rabbit.hrl"). -include("rabbit_framing.hrl"). --export([publish/1, message/4, properties/1, delivery/5]). --export([publish/4, publish/7]). +-export([publish/1, message/3, message/4, properties/1, delivery/4]). +-export([publish/4, publish/6]). -export([build_content/2, from_content/1]). --export([is_message_persistent/1]). %%---------------------------------------------------------------------------- @@ -33,32 +32,33 @@ ({ok, rabbit_router:routing_result(), [pid()]} | rabbit_types:error('not_found'))). +-type(exchange_input() :: (rabbit_types:exchange() | rabbit_exchange:name())). +-type(body_input() :: (binary() | [binary()])). + -spec(publish/1 :: (rabbit_types:delivery()) -> publish_result()). --spec(delivery/5 :: - (boolean(), boolean(), rabbit_types:maybe(rabbit_types:txn()), - rabbit_types:message(), undefined | integer()) -> +-spec(delivery/4 :: + (boolean(), boolean(), rabbit_types:message(), undefined | integer()) -> rabbit_types:delivery()). -spec(message/4 :: (rabbit_exchange:name(), rabbit_router:routing_key(), - properties_input(), binary()) -> - (rabbit_types:message() | rabbit_types:error(any()))). + properties_input(), binary()) -> rabbit_types:message()). +-spec(message/3 :: + (rabbit_exchange:name(), rabbit_router:routing_key(), + rabbit_types:decoded_content()) -> + rabbit_types:ok_or_error2(rabbit_types:message(), any())). -spec(properties/1 :: (properties_input()) -> rabbit_framing:amqp_property_record()). -spec(publish/4 :: - (rabbit_exchange:name(), rabbit_router:routing_key(), - properties_input(), binary()) -> publish_result()). --spec(publish/7 :: - (rabbit_exchange:name(), rabbit_router:routing_key(), - boolean(), boolean(), rabbit_types:maybe(rabbit_types:txn()), - properties_input(), binary()) -> publish_result()). --spec(build_content/2 :: (rabbit_framing:amqp_property_record(), binary()) -> - rabbit_types:content()). + (exchange_input(), rabbit_router:routing_key(), properties_input(), + body_input()) -> publish_result()). +-spec(publish/6 :: + (exchange_input(), rabbit_router:routing_key(), boolean(), boolean(), + properties_input(), body_input()) -> publish_result()). +-spec(build_content/2 :: (rabbit_framing:amqp_property_record(), + binary() | [binary()]) -> rabbit_types:content()). -spec(from_content/1 :: (rabbit_types:content()) -> {rabbit_framing:amqp_property_record(), binary()}). --spec(is_message_persistent/1 :: (rabbit_types:decoded_content()) -> - (boolean() | - {'invalid', non_neg_integer()})). -endif. @@ -67,18 +67,18 @@ publish(Delivery = #delivery{ message = #basic_message{exchange_name = ExchangeName}}) -> case rabbit_exchange:lookup(ExchangeName) of - {ok, X} -> - {RoutingRes, DeliveredQPids} = rabbit_exchange:publish(X, Delivery), - {ok, RoutingRes, DeliveredQPids}; - Other -> - Other + {ok, X} -> publish(X, Delivery); + Other -> Other end. -delivery(Mandatory, Immediate, Txn, Message, MsgSeqNo) -> - #delivery{mandatory = Mandatory, immediate = Immediate, txn = Txn, - sender = self(), message = Message, msg_seq_no = MsgSeqNo}. +delivery(Mandatory, Immediate, Message, MsgSeqNo) -> + #delivery{mandatory = Mandatory, immediate = Immediate, sender = self(), + message = Message, msg_seq_no = MsgSeqNo}. -build_content(Properties, BodyBin) -> +build_content(Properties, BodyBin) when is_binary(BodyBin) -> + build_content(Properties, [BodyBin]); + +build_content(Properties, PFR) -> %% basic.publish hasn't changed so we can just hard-code amqp_0_9_1 {ClassId, _MethodId} = rabbit_framing_amqp_0_9_1:method_id('basic.publish'), @@ -86,7 +86,7 @@ build_content(Properties, BodyBin) -> properties = Properties, properties_bin = none, protocol = none, - payload_fragments_rev = [BodyBin]}. + payload_fragments_rev = PFR}. from_content(Content) -> #content{class_id = ClassId, @@ -98,20 +98,41 @@ from_content(Content) -> rabbit_framing_amqp_0_9_1:method_id('basic.publish'), {Props, list_to_binary(lists:reverse(FragmentsRev))}. -message(ExchangeName, RoutingKeyBin, RawProperties, BodyBin) -> - Properties = properties(RawProperties), - Content = build_content(Properties, BodyBin), - case is_message_persistent(Content) of - {invalid, Other} -> - {error, {invalid_delivery_mode, Other}}; - IsPersistent when is_boolean(IsPersistent) -> - #basic_message{exchange_name = ExchangeName, - routing_key = RoutingKeyBin, - content = Content, - guid = rabbit_guid:guid(), - is_persistent = IsPersistent} +%% This breaks the spec rule forbidding message modification +strip_header(#content{properties = #'P_basic'{headers = undefined}} + = DecodedContent, _Key) -> + DecodedContent; +strip_header(#content{properties = Props = #'P_basic'{headers = Headers}} + = DecodedContent, Key) -> + case lists:keysearch(Key, 1, Headers) of + false -> DecodedContent; + {value, Found} -> Headers0 = lists:delete(Found, Headers), + rabbit_binary_generator:clear_encoded_content( + DecodedContent#content{ + properties = Props#'P_basic'{ + headers = Headers0}}) end. +message(ExchangeName, RoutingKey, + #content{properties = Props} = DecodedContent) -> + try + {ok, #basic_message{ + exchange_name = ExchangeName, + content = strip_header(DecodedContent, ?DELETED_HEADER), + id = rabbit_guid:guid(), + is_persistent = is_message_persistent(DecodedContent), + routing_keys = [RoutingKey | + header_routes(Props#'P_basic'.headers)]}} + catch + {error, _Reason} = Error -> Error + end. + +message(ExchangeName, RoutingKey, RawProperties, Body) -> + Properties = properties(RawProperties), + Content = build_content(Properties, Body), + {ok, Msg} = message(ExchangeName, RoutingKey, Content), + Msg. + properties(P = #'P_basic'{}) -> P; properties(P) when is_list(P) -> @@ -133,18 +154,25 @@ indexof([_ | Rest], Element, N) -> indexof(Rest, Element, N + 1). %% Convenience function, for avoiding round-trips in calls across the %% erlang distributed network. -publish(ExchangeName, RoutingKeyBin, Properties, BodyBin) -> - publish(ExchangeName, RoutingKeyBin, false, false, none, Properties, - BodyBin). +publish(Exchange, RoutingKeyBin, Properties, Body) -> + publish(Exchange, RoutingKeyBin, false, false, Properties, Body). %% Convenience function, for avoiding round-trips in calls across the %% erlang distributed network. -publish(ExchangeName, RoutingKeyBin, Mandatory, Immediate, Txn, Properties, - BodyBin) -> - publish(delivery(Mandatory, Immediate, Txn, - message(ExchangeName, RoutingKeyBin, - properties(Properties), BodyBin), - undefined)). +publish(X = #exchange{name = XName}, RKey, Mandatory, Immediate, Props, Body) -> + publish(X, delivery(Mandatory, Immediate, + message(XName, RKey, properties(Props), Body), + undefined)); +publish(XName, RKey, Mandatory, Immediate, Props, Body) -> + case rabbit_exchange:lookup(XName) of + {ok, X} -> publish(X, RKey, Mandatory, Immediate, Props, Body); + Err -> Err + end. + +publish(X, Delivery) -> + {RoutingRes, DeliveredQPids} = + rabbit_router:deliver(rabbit_exchange:route(X, Delivery), Delivery), + {ok, RoutingRes, DeliveredQPids}. is_message_persistent(#content{properties = #'P_basic'{ delivery_mode = Mode}}) -> @@ -152,5 +180,18 @@ is_message_persistent(#content{properties = #'P_basic'{ 1 -> false; 2 -> true; undefined -> false; - Other -> {invalid, Other} + Other -> throw({error, {delivery_mode_unknown, Other}}) end. + +%% Extract CC routes from headers +header_routes(undefined) -> + []; +header_routes(HeadersTable) -> + lists:append( + [case rabbit_misc:table_lookup(HeadersTable, HeaderKey) of + {array, Routes} -> [Route || {longstr, Route} <- Routes]; + undefined -> []; + {Type, _Val} -> throw({error, {unacceptable_type_in_header, + Type, + binary_to_list(HeaderKey)}}) + end || HeaderKey <- ?ROUTING_HEADERS]). diff --git a/src/rabbit_binary_generator.erl b/src/rabbit_binary_generator.erl index d67c7f58..68511a32 100644 --- a/src/rabbit_binary_generator.erl +++ b/src/rabbit_binary_generator.erl @@ -18,12 +18,13 @@ -include("rabbit_framing.hrl"). -include("rabbit.hrl"). -% EMPTY_CONTENT_BODY_FRAME_SIZE, 8 = 1 + 2 + 4 + 1 -% - 1 byte of frame type -% - 2 bytes of channel number -% - 4 bytes of frame payload length -% - 1 byte of payload trailer FRAME_END byte -% See definition of check_empty_content_body_frame_size/0, an assertion called at startup. +%% EMPTY_CONTENT_BODY_FRAME_SIZE, 8 = 1 + 2 + 4 + 1 +%% - 1 byte of frame type +%% - 2 bytes of channel number +%% - 4 bytes of frame payload length +%% - 1 byte of payload trailer FRAME_END byte +%% See definition of check_empty_content_body_frame_size/0, +%% an assertion called at startup. -define(EMPTY_CONTENT_BODY_FRAME_SIZE, 8). -export([build_simple_method_frame/3, @@ -61,8 +62,7 @@ -spec(map_exception/3 :: (rabbit_channel:channel_number(), rabbit_types:amqp_error() | any(), rabbit_types:protocol()) -> - {boolean(), - rabbit_channel:channel_number(), + {rabbit_channel:channel_number(), rabbit_framing:amqp_method_record()}). -endif. @@ -301,24 +301,21 @@ clear_encoded_content(Content = #content{}) -> map_exception(Channel, Reason, Protocol) -> {SuggestedClose, ReplyCode, ReplyText, FailedMethod} = lookup_amqp_exception(Reason, Protocol), - ShouldClose = SuggestedClose orelse (Channel == 0), {ClassId, MethodId} = case FailedMethod of {_, _} -> FailedMethod; none -> {0, 0}; _ -> Protocol:method_id(FailedMethod) end, - {CloseChannel, CloseMethod} = - case ShouldClose of - true -> {0, #'connection.close'{reply_code = ReplyCode, - reply_text = ReplyText, - class_id = ClassId, - method_id = MethodId}}; - false -> {Channel, #'channel.close'{reply_code = ReplyCode, - reply_text = ReplyText, - class_id = ClassId, - method_id = MethodId}} - end, - {ShouldClose, CloseChannel, CloseMethod}. + case SuggestedClose orelse (Channel == 0) of + true -> {0, #'connection.close'{reply_code = ReplyCode, + reply_text = ReplyText, + class_id = ClassId, + method_id = MethodId}}; + false -> {Channel, #'channel.close'{reply_code = ReplyCode, + reply_text = ReplyText, + class_id = ClassId, + method_id = MethodId}} + end. lookup_amqp_exception(#amqp_error{name = Name, explanation = Expl, diff --git a/src/rabbit_binding.erl b/src/rabbit_binding.erl index 96a22dca..205d5bba 100644 --- a/src/rabbit_binding.erl +++ b/src/rabbit_binding.erl @@ -17,11 +17,11 @@ -module(rabbit_binding). -include("rabbit.hrl"). --export([recover/0, exists/1, add/1, remove/1, add/2, remove/2, list/1]). +-export([recover/2, exists/1, add/1, add/2, remove/1, remove/2, list/1]). -export([list_for_source/1, list_for_destination/1, list_for_source_and_destination/2]). -export([new_deletions/0, combine_deletions/2, add_deletion/3, - process_deletions/2]). + process_deletions/1]). -export([info_keys/0, info/1, info/2, info_all/1, info_all/2]). %% these must all be run inside a mnesia tx -export([has_for_source/1, remove_for_source/1, @@ -38,24 +38,24 @@ -type(bind_errors() :: rabbit_types:error('source_not_found' | 'destination_not_found' | 'source_and_destination_not_found')). --type(bind_res() :: 'ok' | bind_errors()). +-type(bind_ok_or_error() :: 'ok' | bind_errors() | + rabbit_types:error('binding_not_found')). +-type(bind_res() :: bind_ok_or_error() | rabbit_misc:const(bind_ok_or_error())). -type(inner_fun() :: fun((rabbit_types:exchange(), rabbit_types:exchange() | rabbit_types:amqqueue()) -> rabbit_types:ok_or_error(rabbit_types:amqp_error()))). -type(bindings() :: [rabbit_types:binding()]). --type(add_res() :: bind_res() | rabbit_misc:const(bind_res())). --type(bind_or_error() :: bind_res() | rabbit_types:error('binding_not_found')). --type(remove_res() :: bind_or_error() | rabbit_misc:const(bind_or_error())). -opaque(deletions() :: dict()). --spec(recover/0 :: () -> [rabbit_types:binding()]). +-spec(recover/2 :: ([rabbit_exchange:name()], [rabbit_amqqueue:name()]) -> + 'ok'). -spec(exists/1 :: (rabbit_types:binding()) -> boolean() | bind_errors()). --spec(add/1 :: (rabbit_types:binding()) -> add_res()). --spec(remove/1 :: (rabbit_types:binding()) -> remove_res()). --spec(add/2 :: (rabbit_types:binding(), inner_fun()) -> add_res()). --spec(remove/2 :: (rabbit_types:binding(), inner_fun()) -> remove_res()). +-spec(add/1 :: (rabbit_types:binding()) -> bind_res()). +-spec(add/2 :: (rabbit_types:binding(), inner_fun()) -> bind_res()). +-spec(remove/1 :: (rabbit_types:binding()) -> bind_res()). +-spec(remove/2 :: (rabbit_types:binding(), inner_fun()) -> bind_res()). -spec(list/1 :: (rabbit_types:vhost()) -> bindings()). -spec(list_for_source/1 :: (rabbit_types:binding_source()) -> bindings()). @@ -70,14 +70,14 @@ rabbit_types:infos()). -spec(info_all/1 :: (rabbit_types:vhost()) -> [rabbit_types:infos()]). -spec(info_all/2 ::(rabbit_types:vhost(), rabbit_types:info_keys()) - -> [rabbit_types:infos()]). + -> [rabbit_types:infos()]). -spec(has_for_source/1 :: (rabbit_types:binding_source()) -> boolean()). -spec(remove_for_source/1 :: (rabbit_types:binding_source()) -> bindings()). -spec(remove_for_destination/1 :: (rabbit_types:binding_destination()) -> deletions()). -spec(remove_transient_for_destination/1 :: (rabbit_types:binding_destination()) -> deletions()). --spec(process_deletions/2 :: (deletions(), boolean()) -> 'ok'). +-spec(process_deletions/1 :: (deletions()) -> rabbit_misc:thunk('ok')). -spec(combine_deletions/2 :: (deletions(), deletions()) -> deletions()). -spec(add_deletion/3 :: (rabbit_exchange:name(), {'undefined' | rabbit_types:exchange(), @@ -93,14 +93,42 @@ destination_name, destination_kind, routing_key, arguments]). -recover() -> - rabbit_misc:table_fold( - fun (Route = #route{binding = B}, Acc) -> - {_, ReverseRoute} = route_with_reverse(Route), - ok = mnesia:write(rabbit_route, Route, write), - ok = mnesia:write(rabbit_reverse_route, ReverseRoute, write), - [B | Acc] - end, [], rabbit_durable_route). +recover(XNames, QNames) -> + rabbit_misc:table_filter( + fun (Route) -> + mnesia:read({rabbit_semi_durable_route, Route}) =:= [] + end, + fun (Route, true) -> + ok = mnesia:write(rabbit_semi_durable_route, Route, write); + (_Route, false) -> + ok + end, rabbit_durable_route), + XNameSet = sets:from_list(XNames), + QNameSet = sets:from_list(QNames), + SelectSet = fun (#resource{kind = exchange}) -> XNameSet; + (#resource{kind = queue}) -> QNameSet + end, + [recover_semi_durable_route(R, SelectSet(Dst)) || + R = #route{binding = #binding{destination = Dst}} <- + rabbit_misc:dirty_read_all(rabbit_semi_durable_route)], + ok. + +recover_semi_durable_route(R = #route{binding = B}, ToRecover) -> + #binding{source = Src, destination = Dst} = B, + {ok, X} = rabbit_exchange:lookup(Src), + rabbit_misc:execute_mnesia_transaction( + fun () -> + Rs = mnesia:match_object(rabbit_semi_durable_route, R, read), + case Rs =/= [] andalso sets:is_element(Dst, ToRecover) of + false -> no_recover; + true -> ok = sync_transient_route(R, fun mnesia:write/3), + rabbit_exchange:serial(X) + end + end, + fun (no_recover, _) -> ok; + (_Serial, true) -> x_callback(transaction, X, add_binding, B); + (Serial, false) -> x_callback(Serial, X, add_binding, B) + end). exists(Binding) -> binding_action( @@ -110,8 +138,6 @@ exists(Binding) -> add(Binding) -> add(Binding, fun (_Src, _Dst) -> ok end). -remove(Binding) -> remove(Binding, fun (_Src, _Dst) -> ok end). - add(Binding, InnerFun) -> binding_action( Binding, @@ -120,51 +146,52 @@ add(Binding, InnerFun) -> %% in general, we want to fail on that in preference to %% anything else case InnerFun(Src, Dst) of - ok -> - case mnesia:read({rabbit_route, B}) of - [] -> ok = sync_binding(B, all_durable([Src, Dst]), - fun mnesia:write/3), - fun (Tx) -> - ok = rabbit_exchange:callback( - Src, add_binding, [Tx, Src, B]), - rabbit_event:notify_if( - not Tx, binding_created, info(B)) - end; - [_] -> fun rabbit_misc:const_ok/1 - end; - {error, _} = Err -> - rabbit_misc:const(Err) + ok -> case mnesia:read({rabbit_route, B}) of + [] -> add(Src, Dst, B); + [_] -> fun rabbit_misc:const_ok/0 + end; + {error, _} = Err -> rabbit_misc:const(Err) end end). +add(Src, Dst, B) -> + [SrcDurable, DstDurable] = [durable(E) || E <- [Src, Dst]], + case (not (SrcDurable andalso DstDurable) orelse + mnesia:read({rabbit_durable_route, B}) =:= []) of + true -> ok = sync_route(#route{binding = B}, SrcDurable, DstDurable, + fun mnesia:write/3), + ok = rabbit_exchange:callback( + Src, add_binding, [transaction, Src, B]), + Serial = rabbit_exchange:serial(Src), + fun () -> + ok = rabbit_exchange:callback( + Src, add_binding, [Serial, Src, B]), + ok = rabbit_event:notify(binding_created, info(B)) + end; + false -> rabbit_misc:const({error, binding_not_found}) + end. + +remove(Binding) -> remove(Binding, fun (_Src, _Dst) -> ok end). + remove(Binding, InnerFun) -> binding_action( Binding, fun (Src, Dst, B) -> - Result = - case mnesia:match_object(rabbit_route, #route{binding = B}, - write) of - [] -> - {error, binding_not_found}; - [_] -> - case InnerFun(Src, Dst) of - ok -> - ok = sync_binding(B, all_durable([Src, Dst]), - fun mnesia:delete_object/3), - {ok, maybe_auto_delete(B#binding.source, - [B], new_deletions())}; - {error, _} = E -> - E - end - end, - case Result of - {error, _} = Err -> - rabbit_misc:const(Err); - {ok, Deletions} -> - fun (Tx) -> ok = process_deletions(Deletions, Tx) end + case mnesia:read(rabbit_route, B, write) of + [] -> rabbit_misc:const({error, binding_not_found}); + [_] -> case InnerFun(Src, Dst) of + ok -> remove(Src, Dst, B); + {error, _} = Err -> rabbit_misc:const(Err) + end end end). +remove(Src, Dst, B) -> + ok = sync_route(#route{binding = B}, durable(Src), durable(Dst), + fun mnesia:delete_object/3), + Deletions = maybe_auto_delete(B#binding.source, [B], new_deletions()), + process_deletions(Deletions). + list(VHostPath) -> VHostResource = rabbit_misc:r(VHostPath, '_'), Route = #route{binding = #binding{source = VHostResource, @@ -175,22 +202,33 @@ list(VHostPath) -> Route)]. list_for_source(SrcName) -> - Route = #route{binding = #binding{source = SrcName, _ = '_'}}, - [B || #route{binding = B} <- mnesia:dirty_match_object(rabbit_route, - Route)]. + mnesia:async_dirty( + fun() -> + Route = #route{binding = #binding{source = SrcName, _ = '_'}}, + [B || #route{binding = B} + <- mnesia:match_object(rabbit_route, Route, read)] + end). list_for_destination(DstName) -> - Route = #route{binding = #binding{destination = DstName, _ = '_'}}, - [reverse_binding(B) || #reverse_route{reverse_binding = B} <- - mnesia:dirty_match_object(rabbit_reverse_route, - reverse_route(Route))]. + mnesia:async_dirty( + fun() -> + Route = #route{binding = #binding{destination = DstName, + _ = '_'}}, + [reverse_binding(B) || + #reverse_route{reverse_binding = B} <- + mnesia:match_object(rabbit_reverse_route, + reverse_route(Route), read)] + end). list_for_source_and_destination(SrcName, DstName) -> - Route = #route{binding = #binding{source = SrcName, - destination = DstName, - _ = '_'}}, - [B || #route{binding = B} <- mnesia:dirty_match_object(rabbit_route, - Route)]. + mnesia:async_dirty( + fun() -> + Route = #route{binding = #binding{source = SrcName, + destination = DstName, + _ = '_'}}, + [B || #route{binding = B} <- mnesia:match_object(rabbit_route, + Route, read)] + end). info_keys() -> ?INFO_KEYS. @@ -222,32 +260,31 @@ has_for_source(SrcName) -> %% we need to check for durable routes here too in case a bunch of %% routes to durable queues have been removed temporarily as a %% result of a node failure - contains(rabbit_route, Match) orelse contains(rabbit_durable_route, Match). + contains(rabbit_route, Match) orelse + contains(rabbit_semi_durable_route, Match). remove_for_source(SrcName) -> + Match = #route{binding = #binding{source = SrcName, _ = '_'}}, + Routes = lists:usort( + mnesia:match_object(rabbit_route, Match, write) ++ + mnesia:match_object(rabbit_durable_route, Match, write)), [begin - ok = mnesia:delete_object(rabbit_reverse_route, - reverse_route(Route), write), - ok = delete_forward_routes(Route), + sync_route(Route, fun mnesia:delete_object/3), Route#route.binding - end || Route <- mnesia:match_object( - rabbit_route, - #route{binding = #binding{source = SrcName, - _ = '_'}}, - write)]. + end || Route <- Routes]. -remove_for_destination(DstName) -> - remove_for_destination(DstName, fun delete_forward_routes/1). +remove_for_destination(Dst) -> + remove_for_destination( + Dst, fun (R) -> sync_route(R, fun mnesia:delete_object/3) end). -remove_transient_for_destination(DstName) -> - remove_for_destination(DstName, fun delete_transient_forward_routes/1). +remove_transient_for_destination(Dst) -> + remove_for_destination( + Dst, fun (R) -> sync_transient_route(R, fun mnesia:delete_object/3) end). %%---------------------------------------------------------------------------- -all_durable(Resources) -> - lists:all(fun (#exchange{durable = D}) -> D; - (#amqqueue{durable = D}) -> D - end, Resources). +durable(#exchange{durable = D}) -> D; +durable(#amqqueue{durable = D}) -> D. binding_action(Binding = #binding{source = SrcName, destination = DstName, @@ -259,31 +296,36 @@ binding_action(Binding = #binding{source = SrcName, Fun(Src, Dst, Binding#binding{args = SortedArgs}) end). -sync_binding(Binding, Durable, Fun) -> - ok = case Durable of - true -> Fun(rabbit_durable_route, - #route{binding = Binding}, write); - false -> ok - end, - {Route, ReverseRoute} = route_with_reverse(Binding), +sync_route(R, Fun) -> sync_route(R, true, true, Fun). + +sync_route(Route, true, true, Fun) -> + ok = Fun(rabbit_durable_route, Route, write), + sync_route(Route, false, true, Fun); + +sync_route(Route, false, true, Fun) -> + ok = Fun(rabbit_semi_durable_route, Route, write), + sync_route(Route, false, false, Fun); + +sync_route(Route, _SrcDurable, false, Fun) -> + sync_transient_route(Route, Fun). + +sync_transient_route(Route, Fun) -> ok = Fun(rabbit_route, Route, write), - ok = Fun(rabbit_reverse_route, ReverseRoute, write), - ok. + ok = Fun(rabbit_reverse_route, reverse_route(Route), write). call_with_source_and_destination(SrcName, DstName, Fun) -> SrcTable = table_for_resource(SrcName), DstTable = table_for_resource(DstName), - ErrFun = fun (Err) -> rabbit_misc:const(Err) end, + ErrFun = fun (Err) -> rabbit_misc:const({error, Err}) end, rabbit_misc:execute_mnesia_tx_with_tail( fun () -> case {mnesia:read({SrcTable, SrcName}), mnesia:read({DstTable, DstName})} of {[Src], [Dst]} -> Fun(Src, Dst); - {[], [_] } -> ErrFun({error, source_not_found}); - {[_], [] } -> ErrFun({error, destination_not_found}); - {[], [] } -> ErrFun({error, - source_and_destination_not_found}) - end + {[], [_] } -> ErrFun(source_not_found); + {[_], [] } -> ErrFun(destination_not_found); + {[], [] } -> ErrFun(source_and_destination_not_found) + end end). table_for_resource(#resource{kind = exchange}) -> rabbit_exchange; @@ -296,22 +338,15 @@ continue('$end_of_table') -> false; continue({[_|_], _}) -> true; continue({[], Continuation}) -> continue(mnesia:select(Continuation)). -remove_for_destination(DstName, FwdDeleteFun) -> - Bindings = - [begin - Route = reverse_route(ReverseRoute), - ok = FwdDeleteFun(Route), - ok = mnesia:delete_object(rabbit_reverse_route, - ReverseRoute, write), - Route#route.binding - end || ReverseRoute - <- mnesia:match_object( - rabbit_reverse_route, - reverse_route(#route{ - binding = #binding{ - destination = DstName, - _ = '_'}}), - write)], +remove_for_destination(DstName, DeleteFun) -> + Match = reverse_route( + #route{binding = #binding{destination = DstName, _ = '_'}}), + ReverseRoutes = mnesia:match_object(rabbit_reverse_route, Match, write), + Bindings = [begin + Route = reverse_route(ReverseRoute), + ok = DeleteFun(Route), + Route#route.binding + end || ReverseRoute <- ReverseRoutes], group_bindings_fold(fun maybe_auto_delete/3, new_deletions(), lists:keysort(#binding.source, Bindings)). @@ -331,30 +366,18 @@ group_bindings_fold(Fun, SrcName, Acc, Removed, Bindings) -> group_bindings_fold(Fun, Fun(SrcName, Bindings, Acc), Removed). maybe_auto_delete(XName, Bindings, Deletions) -> - case mnesia:read({rabbit_exchange, XName}) of - [] -> - add_deletion(XName, {undefined, not_deleted, Bindings}, Deletions); - [X] -> - add_deletion(XName, {X, not_deleted, Bindings}, - case rabbit_exchange:maybe_auto_delete(X) of - not_deleted -> Deletions; - {deleted, Deletions1} -> combine_deletions( - Deletions, Deletions1) - end) - end. - -delete_forward_routes(Route) -> - ok = mnesia:delete_object(rabbit_route, Route, write), - ok = mnesia:delete_object(rabbit_durable_route, Route, write). - -delete_transient_forward_routes(Route) -> - ok = mnesia:delete_object(rabbit_route, Route, write). - -route_with_reverse(#route{binding = Binding}) -> - route_with_reverse(Binding); -route_with_reverse(Binding = #binding{}) -> - Route = #route{binding = Binding}, - {Route, reverse_route(Route)}. + {Entry, Deletions1} = + case mnesia:read({rabbit_exchange, XName}) of + [] -> {{undefined, not_deleted, Bindings}, Deletions}; + [X] -> case rabbit_exchange:maybe_auto_delete(X) of + not_deleted -> + {{X, not_deleted, Bindings}, Deletions}; + {deleted, Deletions2} -> + {{X, deleted, Bindings}, + combine_deletions(Deletions, Deletions2)} + end + end, + add_deletion(XName, Entry, Deletions1). reverse_route(#route{binding = Binding}) -> #reverse_route{reverse_binding = reverse_binding(Binding)}; @@ -404,19 +427,29 @@ merge_entry({X1, Deleted1, Bindings1}, {X2, Deleted2, Bindings2}) -> anything_but(not_deleted, Deleted1, Deleted2), [Bindings1 | Bindings2]}. -process_deletions(Deletions, Tx) -> - dict:fold( - fun (_XName, {X, Deleted, Bindings}, ok) -> - FlatBindings = lists:flatten(Bindings), - [rabbit_event:notify_if(not Tx, binding_deleted, info(B)) || - B <- FlatBindings], - case Deleted of - not_deleted -> - rabbit_exchange:callback(X, remove_bindings, - [Tx, X, FlatBindings]); - deleted -> - rabbit_event:notify_if(not Tx, exchange_deleted, - [{name, X#exchange.name}]), - rabbit_exchange:callback(X, delete, [Tx, X, FlatBindings]) - end - end, ok, Deletions). +process_deletions(Deletions) -> + AugmentedDeletions = + dict:map(fun (_XName, {X, deleted, Bindings}) -> + Bs = lists:flatten(Bindings), + x_callback(transaction, X, delete, Bs), + {X, deleted, Bs, none}; + (_XName, {X, not_deleted, Bindings}) -> + Bs = lists:flatten(Bindings), + x_callback(transaction, X, remove_bindings, Bs), + {X, not_deleted, Bs, rabbit_exchange:serial(X)} + end, Deletions), + fun() -> + dict:fold(fun (XName, {X, deleted, Bs, Serial}, ok) -> + ok = rabbit_event:notify( + exchange_deleted, [{name, XName}]), + del_notify(Bs), + x_callback(Serial, X, delete, Bs); + (_XName, {X, not_deleted, Bs, Serial}, ok) -> + del_notify(Bs), + x_callback(Serial, X, remove_bindings, Bs) + end, ok, AugmentedDeletions) + end. + +del_notify(Bs) -> [rabbit_event:notify(binding_deleted, info(B)) || B <- Bs]. + +x_callback(Arg, X, F, Bs) -> ok = rabbit_exchange:callback(X, F, [Arg, X, Bs]). diff --git a/src/rabbit_channel.erl b/src/rabbit_channel.erl index eb80e437..6fbbc93e 100644 --- a/src/rabbit_channel.erl +++ b/src/rabbit_channel.erl @@ -20,21 +20,22 @@ -behaviour(gen_server2). --export([start_link/7, do/2, do/3, flush/1, shutdown/1]). +-export([start_link/10, do/2, do/3, flush/1, shutdown/1]). -export([send_command/2, deliver/4, flushed/2, confirm/2]). -export([list/0, info_keys/0, info/1, info/2, info_all/0, info_all/1]). --export([emit_stats/1]). +-export([refresh_config_all/0, emit_stats/1, ready_for_close/1]). -export([init/1, terminate/2, code_change/3, handle_call/3, handle_cast/2, handle_info/2, handle_pre_hibernate/1, prioritise_call/3, - prioritise_cast/2]). + prioritise_cast/2, format_message_queue/2]). --record(ch, {state, channel, reader_pid, writer_pid, limiter_pid, - start_limiter_fun, transaction_id, tx_participants, next_tag, - uncommitted_ack_q, unacked_message_q, +-record(ch, {state, protocol, channel, reader_pid, writer_pid, conn_pid, + limiter_pid, start_limiter_fun, tx_status, next_tag, + unacked_message_q, uncommitted_message_q, uncommitted_ack_q, user, virtual_host, most_recently_declared_queue, - consumer_mapping, blocking, queue_collector_pid, stats_timer, - confirm_enabled, publish_seqno, unconfirmed, confirmed}). + consumer_mapping, blocking, consumer_monitors, queue_collector_pid, + stats_timer, confirm_enabled, publish_seqno, unconfirmed_mq, + unconfirmed_qm, confirmed, capabilities, trace_state}). -define(MAX_PERMISSION_CACHE_SIZE, 12). @@ -45,6 +46,7 @@ consumer_count, messages_unacknowledged, messages_unconfirmed, + messages_uncommitted, acks_uncommitted, prefetch_count, client_flow_blocked]). @@ -66,10 +68,10 @@ -type(channel_number() :: non_neg_integer()). --spec(start_link/7 :: - (channel_number(), pid(), pid(), rabbit_types:user(), - rabbit_types:vhost(), pid(), - fun ((non_neg_integer()) -> rabbit_types:ok(pid()))) -> +-spec(start_link/10 :: + (channel_number(), pid(), pid(), pid(), rabbit_types:protocol(), + rabbit_types:user(), rabbit_types:vhost(), rabbit_framing:amqp_table(), + pid(), fun ((non_neg_integer()) -> rabbit_types:ok(pid()))) -> rabbit_types:ok_pid_or_error()). -spec(do/2 :: (pid(), rabbit_framing:amqp_method_record()) -> 'ok'). -spec(do/3 :: (pid(), rabbit_framing:amqp_method_record(), @@ -88,16 +90,19 @@ -spec(info/2 :: (pid(), rabbit_types:info_keys()) -> rabbit_types:infos()). -spec(info_all/0 :: () -> [rabbit_types:infos()]). -spec(info_all/1 :: (rabbit_types:info_keys()) -> [rabbit_types:infos()]). +-spec(refresh_config_all/0 :: () -> 'ok'). -spec(emit_stats/1 :: (pid()) -> 'ok'). +-spec(ready_for_close/1 :: (pid()) -> 'ok'). -endif. %%---------------------------------------------------------------------------- -start_link(Channel, ReaderPid, WriterPid, User, VHost, CollectorPid, - StartLimiterFun) -> - gen_server2:start_link(?MODULE, [Channel, ReaderPid, WriterPid, User, - VHost, CollectorPid, StartLimiterFun], []). +start_link(Channel, ReaderPid, WriterPid, ConnPid, Protocol, User, VHost, + Capabilities, CollectorPid, StartLimiterFun) -> + gen_server2:start_link( + ?MODULE, [Channel, ReaderPid, WriterPid, ConnPid, Protocol, User, + VHost, Capabilities, CollectorPid, StartLimiterFun], []). do(Pid, Method) -> do(Pid, Method, none). @@ -106,7 +111,7 @@ do(Pid, Method, Content) -> gen_server2:cast(Pid, {method, Method, Content}). flush(Pid) -> - gen_server2:call(Pid, flush). + gen_server2:call(Pid, flush, infinity). shutdown(Pid) -> gen_server2:cast(Pid, terminate). @@ -143,38 +148,52 @@ info_all() -> info_all(Items) -> rabbit_misc:filter_exit_map(fun (C) -> info(C, Items) end, list()). +refresh_config_all() -> + rabbit_misc:upmap( + fun (C) -> gen_server2:call(C, refresh_config) end, list()), + ok. + emit_stats(Pid) -> gen_server2:cast(Pid, emit_stats). +ready_for_close(Pid) -> + gen_server2:cast(Pid, ready_for_close). + %%--------------------------------------------------------------------------- -init([Channel, ReaderPid, WriterPid, User, VHost, CollectorPid, - StartLimiterFun]) -> +init([Channel, ReaderPid, WriterPid, ConnPid, Protocol, User, VHost, + Capabilities, CollectorPid, StartLimiterFun]) -> process_flag(trap_exit, true), ok = pg_local:join(rabbit_channels, self()), StatsTimer = rabbit_event:init_stats_timer(), State = #ch{state = starting, + protocol = Protocol, channel = Channel, reader_pid = ReaderPid, writer_pid = WriterPid, + conn_pid = ConnPid, limiter_pid = undefined, start_limiter_fun = StartLimiterFun, - transaction_id = none, - tx_participants = sets:new(), + tx_status = none, next_tag = 1, - uncommitted_ack_q = queue:new(), unacked_message_q = queue:new(), + uncommitted_message_q = queue:new(), + uncommitted_ack_q = queue:new(), user = User, virtual_host = VHost, most_recently_declared_queue = <<>>, consumer_mapping = dict:new(), blocking = dict:new(), + consumer_monitors = dict:new(), queue_collector_pid = CollectorPid, stats_timer = StatsTimer, confirm_enabled = false, publish_seqno = 1, - unconfirmed = gb_trees:empty(), - confirmed = []}, + unconfirmed_mq = gb_trees:empty(), + unconfirmed_qm = gb_trees:empty(), + confirmed = [], + capabilities = Capabilities, + trace_state = rabbit_trace:init(VHost)}, rabbit_event:notify(channel_created, infos(?CREATION_EVENT_KEYS, State)), rabbit_event:if_enabled(StatsTimer, fun() -> internal_emit_stats(State) end), @@ -207,6 +226,9 @@ handle_call({info, Items}, _From, State) -> catch Error -> reply({error, Error}, State) end; +handle_call(refresh_config, _From, State = #ch{virtual_host = VHost}) -> + reply(ok, State#ch{trace_state = rabbit_trace:init(VHost)}); + handle_call(_Request, _From, State) -> noreply(State). @@ -218,14 +240,11 @@ handle_cast({method, Method, Content}, State) -> {noreply, NewState} -> noreply(NewState); stop -> - {stop, normal, State#ch{state = terminating}} + {stop, normal, State} catch exit:Reason = #amqp_error{} -> MethodName = rabbit_misc:method_record_type(Method), - {stop, normal, terminating(Reason#amqp_error{method = MethodName}, - State)}; - exit:normal -> - {stop, normal, State}; + send_exception(Reason#amqp_error{method = MethodName}, State); _:Reason -> {stop, {Reason, erlang:get_stacktrace()}, State} end; @@ -233,9 +252,19 @@ handle_cast({method, Method, Content}, State) -> handle_cast({flushed, QPid}, State) -> {noreply, queue_blocked(QPid, State), hibernate}; +handle_cast(ready_for_close, State = #ch{state = closing, + writer_pid = WriterPid}) -> + ok = rabbit_writer:send_command_sync(WriterPid, #'channel.close_ok'{}), + {stop, normal, State}; + handle_cast(terminate, State) -> {stop, normal, State}; +handle_cast({command, #'basic.consume_ok'{consumer_tag = ConsumerTag} = Msg}, + State = #ch{writer_pid = WriterPid}) -> + ok = rabbit_writer:send_command(WriterPid, Msg), + noreply(monitor_consumer(ConsumerTag, State)); + handle_cast({command, Msg}, State = #ch{writer_pid = WriterPid}) -> ok = rabbit_writer:send_command(WriterPid, Msg), noreply(State); @@ -243,10 +272,11 @@ handle_cast({command, Msg}, State = #ch{writer_pid = WriterPid}) -> handle_cast({deliver, ConsumerTag, AckRequired, Msg = {_QName, QPid, _MsgId, Redelivered, #basic_message{exchange_name = ExchangeName, - routing_key = RoutingKey, + routing_keys = [RoutingKey | _CcRoutes], content = Content}}}, - State = #ch{writer_pid = WriterPid, - next_tag = DeliveryTag}) -> + State = #ch{writer_pid = WriterPid, + next_tag = DeliveryTag, + trace_state = TraceState}) -> State1 = lock_message(AckRequired, ack_record(DeliveryTag, ConsumerTag, Msg), State), @@ -257,12 +287,12 @@ handle_cast({deliver, ConsumerTag, AckRequired, exchange = ExchangeName#resource.name, routing_key = RoutingKey}, rabbit_writer:send_command_and_notify(WriterPid, QPid, self(), M, Content), - - maybe_incr_stats([{QPid, 1}], - case AckRequired of - true -> deliver; - false -> deliver_no_ack - end, State), + maybe_incr_stats([{QPid, 1}], case AckRequired of + true -> deliver; + false -> deliver_no_ack + end, State), + maybe_incr_redeliver_stats(Redelivered, QPid, State), + rabbit_trace:tap_trace_out(Msg, TraceState), noreply(State1#ch{next_tag = DeliveryTag + 1}); handle_cast({confirm, MsgSeqNos, From}, State) -> @@ -278,20 +308,18 @@ handle_info(emit_stats, State = #ch{stats_timer = StatsTimer}) -> State#ch{ stats_timer = rabbit_event:reset_stats_timer(StatsTimer)}); -handle_info({'DOWN', _MRef, process, QPid, Reason}, - State = #ch{unconfirmed = UC}) -> - %% TODO: this does a complete scan and partial rebuild of the - %% tree, which is quite efficient. To do better we'd need to - %% maintain a secondary mapping, from QPids to MsgSeqNos. - {MXs, UC1} = remove_queue_unconfirmed( - gb_trees:next(gb_trees:iterator(UC)), QPid, - {[], UC}, State), - erase_queue_stats(QPid), - State1 = case Reason of - normal -> record_confirms(MXs, State#ch{unconfirmed = UC1}); - _ -> send_nacks(MXs, State#ch{unconfirmed = UC1}) - end, - noreply(queue_blocked(QPid, State1)). +handle_info({'DOWN', MRef, process, QPid, Reason}, + State = #ch{consumer_monitors = ConsumerMonitors}) -> + noreply( + case dict:find(MRef, ConsumerMonitors) of + error -> + handle_publishing_queue_down(QPid, Reason, State); + {ok, ConsumerTag} -> + handle_consuming_queue_down(MRef, ConsumerTag, State) + end); + +handle_info({'EXIT', _Pid, Reason}, State) -> + {stop, Reason, State}. handle_pre_hibernate(State = #ch{stats_timer = StatsTimer}) -> ok = clear_permission_cache(), @@ -303,22 +331,22 @@ handle_pre_hibernate(State = #ch{stats_timer = StatsTimer}) -> StatsTimer1 = rabbit_event:stop_stats_timer(StatsTimer), {hibernate, State#ch{stats_timer = StatsTimer1}}. -terminate(_Reason, State = #ch{state = terminating}) -> - terminate(State); - terminate(Reason, State) -> - Res = rollback_and_notify(State), + {Res, _State1} = notify_queues(State), case Reason of normal -> ok = Res; shutdown -> ok = Res; {shutdown, _Term} -> ok = Res; _ -> ok end, - terminate(State). + pg_local:leave(rabbit_channels, self()), + rabbit_event:notify(channel_closed, [{pid, self()}]). code_change(_OldVsn, State, _Extra) -> {ok, State}. +format_message_queue(Opt, MQ) -> rabbit_misc:format_message_queue(Opt, MQ). + %%--------------------------------------------------------------------------- reply(Reply, NewState) -> reply(Reply, [], NewState). @@ -351,10 +379,23 @@ return_ok(State, false, Msg) -> {reply, Msg, State}. ok_msg(true, _Msg) -> undefined; ok_msg(false, Msg) -> Msg. -terminating(Reason, State = #ch{channel = Channel, reader_pid = Reader}) -> - ok = rollback_and_notify(State), - Reader ! {channel_exit, Channel, Reason}, - State#ch{state = terminating}. +send_exception(Reason, State = #ch{protocol = Protocol, + channel = Channel, + writer_pid = WriterPid, + reader_pid = ReaderPid, + conn_pid = ConnPid}) -> + {CloseChannel, CloseMethod} = + rabbit_binary_generator:map_exception(Channel, Reason, Protocol), + rabbit_log:error("connection ~p, channel ~p - error:~n~p~n", + [ConnPid, Channel, Reason]), + %% something bad's happened: notify_queues may not be 'ok' + {_Result, State1} = notify_queues(State), + case CloseChannel of + Channel -> ok = rabbit_writer:send_command(WriterPid, CloseMethod), + {noreply, State1}; + _ -> ReaderPid ! {channel_exit, Channel, Reason}, + {stop, normal, State1} + end. return_queue_declare_ok(#resource{name = ActualName}, NoWait, MessageCount, ConsumerCount, State) -> @@ -476,13 +517,6 @@ queue_blocked(QPid, State = #ch{blocking = Blocking}) -> State#ch{blocking = Blocking1} end. -remove_queue_unconfirmed(none, _QPid, Acc, _State) -> - Acc; -remove_queue_unconfirmed({MsgSeqNo, XQ, Next}, QPid, Acc, State) -> - remove_queue_unconfirmed(gb_trees:next(Next), QPid, - remove_qmsg(MsgSeqNo, QPid, XQ, Acc, State), - State). - record_confirm(undefined, _, State) -> State; record_confirm(MsgSeqNo, XName, State) -> @@ -495,25 +529,42 @@ record_confirms(MXs, State = #ch{confirmed = C}) -> confirm([], _QPid, State) -> State; -confirm(MsgSeqNos, QPid, State = #ch{unconfirmed = UC}) -> - {MXs, UC1} = +confirm(MsgSeqNos, QPid, State) -> + {MXs, State1} = process_confirms(MsgSeqNos, QPid, false, State), + record_confirms(MXs, State1). + +process_confirms(MsgSeqNos, QPid, Nack, State = #ch{unconfirmed_mq = UMQ, + unconfirmed_qm = UQM}) -> + {MXs, UMQ1, UQM1} = lists:foldl( - fun(MsgSeqNo, {_DMs, UC0} = Acc) -> - case gb_trees:lookup(MsgSeqNo, UC0) of - none -> Acc; - {value, XQ} -> remove_qmsg(MsgSeqNo, QPid, XQ, Acc, State) + fun(MsgSeqNo, {_MXs, UMQ0, _UQM} = Acc) -> + case gb_trees:lookup(MsgSeqNo, UMQ0) of + {value, XQ} -> remove_unconfirmed(MsgSeqNo, QPid, XQ, + Acc, Nack); + none -> Acc end - end, {[], UC}, MsgSeqNos), - record_confirms(MXs, State#ch{unconfirmed = UC1}). - -remove_qmsg(MsgSeqNo, QPid, {XName, Qs}, {MXs, UC}, State) -> - Qs1 = sets:del_element(QPid, Qs), - %% these confirms will be emitted even when a queue dies, but that - %% should be fine, since the queue stats get erased immediately - maybe_incr_stats([{{QPid, XName}, 1}], confirm, State), - case sets:size(Qs1) of - 0 -> {[{MsgSeqNo, XName} | MXs], gb_trees:delete(MsgSeqNo, UC)}; - _ -> {MXs, gb_trees:update(MsgSeqNo, {XName, Qs1}, UC)} + end, {[], UMQ, UQM}, MsgSeqNos), + {MXs, State#ch{unconfirmed_mq = UMQ1, unconfirmed_qm = UQM1}}. + +remove_unconfirmed(MsgSeqNo, QPid, {XName, Qs}, {MXs, UMQ, UQM}, Nack) -> + UQM1 = case gb_trees:lookup(QPid, UQM) of + {value, MsgSeqNos} -> + MsgSeqNos1 = gb_sets:delete(MsgSeqNo, MsgSeqNos), + case gb_sets:is_empty(MsgSeqNos1) of + true -> gb_trees:delete(QPid, UQM); + false -> gb_trees:update(QPid, MsgSeqNos1, UQM) + end; + none -> + UQM + end, + Qs1 = gb_sets:del_element(QPid, Qs), + %% If QPid somehow died initiating a nack, clear the message from + %% internal data-structures. Also, cleanup empty entries. + case (Nack orelse gb_sets:is_empty(Qs1)) of + true -> + {[{MsgSeqNo, XName} | MXs], gb_trees:delete(MsgSeqNo, UMQ), UQM1}; + false -> + {MXs, gb_trees:update(MsgSeqNo, {XName, Qs1}, UMQ), UQM1} end. handle_method(#'channel.open'{}, _, State = #ch{state = starting}) -> @@ -526,11 +577,29 @@ handle_method(#'channel.open'{}, _, _State) -> handle_method(_Method, _, #ch{state = starting}) -> rabbit_misc:protocol_error(channel_error, "expected 'channel.open'", []); -handle_method(#'channel.close'{}, _, State = #ch{writer_pid = WriterPid}) -> - ok = rollback_and_notify(State), - ok = rabbit_writer:send_command_sync(WriterPid, #'channel.close_ok'{}), +handle_method(#'channel.close_ok'{}, _, #ch{state = closing}) -> stop; +handle_method(#'channel.close'{}, _, State = #ch{state = closing}) -> + {reply, #'channel.close_ok'{}, State}; + +handle_method(_Method, _, State = #ch{state = closing}) -> + {noreply, State}; + +handle_method(#'channel.close'{}, _, State = #ch{reader_pid = ReaderPid}) -> + {ok, State1} = notify_queues(State), + ReaderPid ! {channel_closing, self()}, + {noreply, State1}; + +%% Even though the spec prohibits the client from sending commands +%% while waiting for the reply to a synchronous command, we generally +%% do allow this...except in the case of a pending tx.commit, where +%% it could wreak havoc. +handle_method(_Method, _, #ch{tx_status = TxStatus}) + when TxStatus =/= none andalso TxStatus =/= in_progress -> + rabbit_misc:protocol_error( + channel_error, "unexpected command while processing 'tx.commit'", []); + handle_method(#'access.request'{},_, State) -> {reply, #'access.request_ok'{ticket = 1}, State}; @@ -539,8 +608,9 @@ handle_method(#'basic.publish'{exchange = ExchangeNameBin, mandatory = Mandatory, immediate = Immediate}, Content, State = #ch{virtual_host = VHostPath, - transaction_id = TxnKey, - confirm_enabled = ConfirmEnabled}) -> + tx_status = TxStatus, + confirm_enabled = ConfirmEnabled, + trace_state = TraceState}) -> ExchangeName = rabbit_misc:r(VHostPath, exchange, ExchangeNameBin), check_write_permitted(ExchangeName, State), Exchange = rabbit_exchange:lookup_or_die(ExchangeName), @@ -549,32 +619,29 @@ handle_method(#'basic.publish'{exchange = ExchangeNameBin, %% certain to want to look at delivery-mode and priority. DecodedContent = rabbit_binary_parser:ensure_content_decoded(Content), check_user_id_header(DecodedContent#content.properties, State), - IsPersistent = is_message_persistent(DecodedContent), {MsgSeqNo, State1} = - case ConfirmEnabled of - false -> {undefined, State}; - true -> SeqNo = State#ch.publish_seqno, - {SeqNo, State#ch{publish_seqno = SeqNo + 1}} + case {TxStatus, ConfirmEnabled} of + {none, false} -> {undefined, State}; + {_, _} -> SeqNo = State#ch.publish_seqno, + {SeqNo, State#ch{publish_seqno = SeqNo + 1}} end, - Message = #basic_message{exchange_name = ExchangeName, - routing_key = RoutingKey, - content = DecodedContent, - guid = rabbit_guid:guid(), - is_persistent = IsPersistent}, - {RoutingRes, DeliveredQPids} = - rabbit_exchange:publish( - Exchange, - rabbit_basic:delivery(Mandatory, Immediate, TxnKey, Message, - MsgSeqNo)), - State2 = process_routing_result(RoutingRes, DeliveredQPids, ExchangeName, - MsgSeqNo, Message, State1), - maybe_incr_stats([{ExchangeName, 1} | - [{{QPid, ExchangeName}, 1} || - QPid <- DeliveredQPids]], publish, State2), - {noreply, case TxnKey of - none -> State2; - _ -> add_tx_participants(DeliveredQPids, State2) - end}; + case rabbit_basic:message(ExchangeName, RoutingKey, DecodedContent) of + {ok, Message} -> + rabbit_trace:tap_trace_in(Message, TraceState), + Delivery = rabbit_basic:delivery(Mandatory, Immediate, Message, + MsgSeqNo), + QNames = rabbit_exchange:route(Exchange, Delivery), + {noreply, + case TxStatus of + none -> deliver_to_queues({Delivery, QNames}, State1); + in_progress -> TMQ = State1#ch.uncommitted_message_q, + NewTMQ = queue:in({Delivery, QNames}, TMQ), + State1#ch{uncommitted_message_q = NewTMQ} + end}; + {error, Reason} -> + rabbit_misc:protocol_error(precondition_failed, + "invalid message: ~p", [Reason]) + end; handle_method(#'basic.nack'{delivery_tag = DeliveryTag, multiple = Multiple, @@ -584,46 +651,42 @@ handle_method(#'basic.nack'{delivery_tag = DeliveryTag, handle_method(#'basic.ack'{delivery_tag = DeliveryTag, multiple = Multiple}, - _, State = #ch{transaction_id = TxnKey, - unacked_message_q = UAMQ}) -> + _, State = #ch{unacked_message_q = UAMQ, + tx_status = TxStatus}) -> {Acked, Remaining} = collect_acks(UAMQ, DeliveryTag, Multiple), - QIncs = ack(TxnKey, Acked), - Participants = [QPid || {QPid, _} <- QIncs], - maybe_incr_stats(QIncs, ack, State), - {noreply, case TxnKey of - none -> ok = notify_limiter(State#ch.limiter_pid, Acked), - State#ch{unacked_message_q = Remaining}; - _ -> NewUAQ = queue:join(State#ch.uncommitted_ack_q, - Acked), - add_tx_participants( - Participants, - State#ch{unacked_message_q = Remaining, - uncommitted_ack_q = NewUAQ}) - end}; + State1 = State#ch{unacked_message_q = Remaining}, + {noreply, + case TxStatus of + none -> ack(Acked, State1); + in_progress -> NewTAQ = queue:join(State1#ch.uncommitted_ack_q, Acked), + State1#ch{uncommitted_ack_q = NewTAQ} + end}; handle_method(#'basic.get'{queue = QueueNameBin, no_ack = NoAck}, - _, State = #ch{writer_pid = WriterPid, - reader_pid = ReaderPid, - next_tag = DeliveryTag}) -> + _, State = #ch{writer_pid = WriterPid, + conn_pid = ConnPid, + next_tag = DeliveryTag, + trace_state = TraceState}) -> QueueName = expand_queue_name_shortcut(QueueNameBin, State), check_read_permitted(QueueName, State), case rabbit_amqqueue:with_exclusive_access_or_die( - QueueName, ReaderPid, + QueueName, ConnPid, fun (Q) -> rabbit_amqqueue:basic_get(Q, self(), NoAck) end) of {ok, MessageCount, Msg = {_QName, QPid, _MsgId, Redelivered, #basic_message{exchange_name = ExchangeName, - routing_key = RoutingKey, + routing_keys = [RoutingKey | _CcRoutes], content = Content}}} -> State1 = lock_message(not(NoAck), ack_record(DeliveryTag, none, Msg), State), - maybe_incr_stats([{QPid, 1}], - case NoAck of - true -> get_no_ack; - false -> get - end, State), + maybe_incr_stats([{QPid, 1}], case NoAck of + true -> get_no_ack; + false -> get + end, State), + maybe_incr_redeliver_stats(Redelivered, QPid, State), + rabbit_trace:tap_trace_out(Msg, TraceState), ok = rabbit_writer:send_command( WriterPid, #'basic.get_ok'{delivery_tag = DeliveryTag, @@ -643,9 +706,9 @@ handle_method(#'basic.consume'{queue = QueueNameBin, no_ack = NoAck, exclusive = ExclusiveConsume, nowait = NoWait}, - _, State = #ch{reader_pid = ReaderPid, - limiter_pid = LimiterPid, - consumer_mapping = ConsumerMapping }) -> + _, State = #ch{conn_pid = ConnPid, + limiter_pid = LimiterPid, + consumer_mapping = ConsumerMapping}) -> case dict:find(ConsumerTag, ConsumerMapping) of error -> QueueName = expand_queue_name_shortcut(QueueNameBin, State), @@ -660,20 +723,26 @@ handle_method(#'basic.consume'{queue = QueueNameBin, %% behalf. This is for symmetry with basic.cancel - see %% the comment in that method for why. case rabbit_amqqueue:with_exclusive_access_or_die( - QueueName, ReaderPid, + QueueName, ConnPid, fun (Q) -> - rabbit_amqqueue:basic_consume( - Q, NoAck, self(), LimiterPid, - ActualConsumerTag, ExclusiveConsume, - ok_msg(NoWait, #'basic.consume_ok'{ - consumer_tag = ActualConsumerTag})) + {rabbit_amqqueue:basic_consume( + Q, NoAck, self(), LimiterPid, + ActualConsumerTag, ExclusiveConsume, + ok_msg(NoWait, #'basic.consume_ok'{ + consumer_tag = ActualConsumerTag})), + Q} end) of - ok -> - {noreply, State#ch{consumer_mapping = - dict:store(ActualConsumerTag, - QueueName, - ConsumerMapping)}}; - {error, exclusive_consume_unavailable} -> + {ok, Q} -> + State1 = State#ch{consumer_mapping = + dict:store(ActualConsumerTag, + {Q, undefined}, + ConsumerMapping)}, + {noreply, + case NoWait of + true -> monitor_consumer(ActualConsumerTag, State1); + false -> State1 + end}; + {{error, exclusive_consume_unavailable}, _Q} -> rabbit_misc:protocol_error( access_refused, "~s in exclusive use", [rabbit_misc:rs(QueueName)]) @@ -686,26 +755,31 @@ handle_method(#'basic.consume'{queue = QueueNameBin, handle_method(#'basic.cancel'{consumer_tag = ConsumerTag, nowait = NoWait}, - _, State = #ch{consumer_mapping = ConsumerMapping }) -> + _, State = #ch{consumer_mapping = ConsumerMapping, + consumer_monitors = ConsumerMonitors}) -> OkMsg = #'basic.cancel_ok'{consumer_tag = ConsumerTag}, case dict:find(ConsumerTag, ConsumerMapping) of error -> %% Spec requires we ignore this situation. return_ok(State, NoWait, OkMsg); - {ok, QueueName} -> - NewState = State#ch{consumer_mapping = - dict:erase(ConsumerTag, - ConsumerMapping)}, - case rabbit_amqqueue:with( - QueueName, - fun (Q) -> - %% In order to ensure that no more messages - %% are sent to the consumer after the - %% cancel_ok has been sent, we get the - %% queue process to send the cancel_ok on - %% our behalf. If we were sending the - %% cancel_ok ourselves it might overtake a - %% message sent previously by the queue. + {ok, {Q, MRef}} -> + ConsumerMonitors1 = + case MRef of + undefined -> ConsumerMonitors; + _ -> true = erlang:demonitor(MRef), + dict:erase(MRef, ConsumerMonitors) + end, + NewState = State#ch{consumer_mapping = dict:erase(ConsumerTag, + ConsumerMapping), + consumer_monitors = ConsumerMonitors1}, + %% In order to ensure that no more messages are sent to + %% the consumer after the cancel_ok has been sent, we get + %% the queue process to send the cancel_ok on our + %% behalf. If we were sending the cancel_ok ourselves it + %% might overtake a message sent previously by the queue. + case rabbit_misc:with_exit_handler( + fun () -> {error, not_found} end, + fun () -> rabbit_amqqueue:basic_cancel( Q, self(), ConsumerTag, ok_msg(NoWait, #'basic.cancel_ok'{ @@ -816,7 +890,6 @@ handle_method(#'exchange.declare'{exchange = ExchangeNameBin, nowait = NoWait}, _, State = #ch{virtual_host = VHostPath}) -> ExchangeName = rabbit_misc:r(VHostPath, exchange, ExchangeNameBin), - check_configure_permitted(ExchangeName, State), check_not_default_exchange(ExchangeName), _ = rabbit_exchange:lookup_or_die(ExchangeName), return_ok(State, NoWait, #'exchange.declare_ok'{}); @@ -864,10 +937,10 @@ handle_method(#'queue.declare'{queue = QueueNameBin, nowait = NoWait, arguments = Args} = Declare, _, State = #ch{virtual_host = VHostPath, - reader_pid = ReaderPid, + conn_pid = ConnPid, queue_collector_pid = CollectorPid}) -> Owner = case ExclusiveDeclare of - true -> ReaderPid; + true -> ConnPid; false -> none end, ActualNameBin = case QueueNameBin of @@ -910,13 +983,12 @@ handle_method(#'queue.declare'{queue = QueueNameBin, passive = true, nowait = NoWait}, _, State = #ch{virtual_host = VHostPath, - reader_pid = ReaderPid}) -> + conn_pid = ConnPid}) -> QueueName = rabbit_misc:r(VHostPath, queue, QueueNameBin), - check_configure_permitted(QueueName, State), {{ok, MessageCount, ConsumerCount}, #amqqueue{} = Q} = rabbit_amqqueue:with_or_die( QueueName, fun (Q) -> {rabbit_amqqueue:stat(Q), Q} end), - ok = rabbit_amqqueue:check_exclusive_access(Q, ReaderPid), + ok = rabbit_amqqueue:check_exclusive_access(Q, ConnPid), return_queue_declare_ok(QueueName, NoWait, MessageCount, ConsumerCount, State); @@ -924,11 +996,11 @@ handle_method(#'queue.delete'{queue = QueueNameBin, if_unused = IfUnused, if_empty = IfEmpty, nowait = NoWait}, - _, State = #ch{reader_pid = ReaderPid}) -> + _, State = #ch{conn_pid = ConnPid}) -> QueueName = expand_queue_name_shortcut(QueueNameBin, State), check_configure_permitted(QueueName, State), case rabbit_amqqueue:with_exclusive_access_or_die( - QueueName, ReaderPid, + QueueName, ConnPid, fun (Q) -> rabbit_amqqueue:delete(Q, IfUnused, IfEmpty) end) of {error, in_use} -> rabbit_misc:protocol_error( @@ -960,42 +1032,42 @@ handle_method(#'queue.unbind'{queue = QueueNameBin, handle_method(#'queue.purge'{queue = QueueNameBin, nowait = NoWait}, - _, State = #ch{reader_pid = ReaderPid}) -> + _, State = #ch{conn_pid = ConnPid}) -> QueueName = expand_queue_name_shortcut(QueueNameBin, State), check_read_permitted(QueueName, State), {ok, PurgedMessageCount} = rabbit_amqqueue:with_exclusive_access_or_die( - QueueName, ReaderPid, + QueueName, ConnPid, fun (Q) -> rabbit_amqqueue:purge(Q) end), return_ok(State, NoWait, #'queue.purge_ok'{message_count = PurgedMessageCount}); - handle_method(#'tx.select'{}, _, #ch{confirm_enabled = true}) -> rabbit_misc:protocol_error( precondition_failed, "cannot switch from confirm to tx mode", []); -handle_method(#'tx.select'{}, _, State = #ch{transaction_id = none}) -> - {reply, #'tx.select_ok'{}, new_tx(State)}; - handle_method(#'tx.select'{}, _, State) -> - {reply, #'tx.select_ok'{}, State}; + {reply, #'tx.select_ok'{}, State#ch{tx_status = in_progress}}; -handle_method(#'tx.commit'{}, _, #ch{transaction_id = none}) -> +handle_method(#'tx.commit'{}, _, #ch{tx_status = none}) -> rabbit_misc:protocol_error( precondition_failed, "channel is not transactional", []); -handle_method(#'tx.commit'{}, _, State) -> - {reply, #'tx.commit_ok'{}, internal_commit(State)}; +handle_method(#'tx.commit'{}, _, State = #ch{uncommitted_message_q = TMQ, + uncommitted_ack_q = TAQ}) -> + State1 = new_tx(ack(TAQ, rabbit_misc:queue_fold(fun deliver_to_queues/2, + State, TMQ))), + {noreply, maybe_complete_tx(State1#ch{tx_status = committing})}; -handle_method(#'tx.rollback'{}, _, #ch{transaction_id = none}) -> +handle_method(#'tx.rollback'{}, _, #ch{tx_status = none}) -> rabbit_misc:protocol_error( precondition_failed, "channel is not transactional", []); -handle_method(#'tx.rollback'{}, _, State) -> - {reply, #'tx.rollback_ok'{}, internal_rollback(State)}; +handle_method(#'tx.rollback'{}, _, State = #ch{unacked_message_q = UAMQ, + uncommitted_ack_q = TAQ}) -> + {reply, #'tx.rollback_ok'{}, new_tx(State#ch{unacked_message_q = + queue:join(TAQ, UAMQ)})}; -handle_method(#'confirm.select'{}, _, #ch{transaction_id = TxId}) - when TxId =/= none -> +handle_method(#'confirm.select'{}, _, #ch{tx_status = in_progress}) -> rabbit_misc:protocol_error( precondition_failed, "cannot switch from tx to confirm mode", []); @@ -1035,10 +1107,63 @@ handle_method(_MethodRecord, _Content, _State) -> %%---------------------------------------------------------------------------- +monitor_consumer(ConsumerTag, State = #ch{consumer_mapping = ConsumerMapping, + consumer_monitors = ConsumerMonitors, + capabilities = Capabilities}) -> + case rabbit_misc:table_lookup( + Capabilities, <<"consumer_cancel_notify">>) of + {bool, true} -> + {#amqqueue{pid = QPid} = Q, undefined} = + dict:fetch(ConsumerTag, ConsumerMapping), + MRef = erlang:monitor(process, QPid), + State#ch{consumer_mapping = + dict:store(ConsumerTag, {Q, MRef}, ConsumerMapping), + consumer_monitors = + dict:store(MRef, ConsumerTag, ConsumerMonitors)}; + _ -> + State + end. + +handle_publishing_queue_down(QPid, Reason, State = #ch{unconfirmed_qm = UQM}) -> + MsgSeqNos = case gb_trees:lookup(QPid, UQM) of + {value, MsgSet} -> gb_sets:to_list(MsgSet); + none -> [] + end, + %% We remove the MsgSeqNos from UQM before calling + %% process_confirms to prevent each MsgSeqNo being removed from + %% the set one by one which which would be inefficient + State1 = State#ch{unconfirmed_qm = gb_trees:delete_any(QPid, UQM)}, + {Nack, SendFun} = + case Reason of + Reason when Reason =:= noproc; Reason =:= noconnection; + Reason =:= normal; Reason =:= shutdown -> + {false, fun record_confirms/2}; + {shutdown, _} -> + {false, fun record_confirms/2}; + _ -> + {true, fun send_nacks/2} + end, + {MXs, State2} = process_confirms(MsgSeqNos, QPid, Nack, State1), + erase_queue_stats(QPid), + State3 = SendFun(MXs, State2), + queue_blocked(QPid, State3). + +handle_consuming_queue_down(MRef, ConsumerTag, + State = #ch{consumer_mapping = ConsumerMapping, + consumer_monitors = ConsumerMonitors, + writer_pid = WriterPid}) -> + ConsumerMapping1 = dict:erase(ConsumerTag, ConsumerMapping), + ConsumerMonitors1 = dict:erase(MRef, ConsumerMonitors), + Cancel = #'basic.cancel'{consumer_tag = ConsumerTag, + nowait = true}, + ok = rabbit_writer:send_command(WriterPid, Cancel), + State#ch{consumer_mapping = ConsumerMapping1, + consumer_monitors = ConsumerMonitors1}. + binding_action(Fun, ExchangeNameBin, DestinationType, DestinationNameBin, RoutingKey, Arguments, ReturnMethod, NoWait, State = #ch{virtual_host = VHostPath, - reader_pid = ReaderPid}) -> + conn_pid = ConnPid }) -> %% FIXME: connection exception (!) on failure?? %% (see rule named "failure" in spec-XML) %% FIXME: don't allow binding to internal exchanges - @@ -1054,7 +1179,7 @@ binding_action(Fun, ExchangeNameBin, DestinationType, DestinationNameBin, key = ActualRoutingKey, args = Arguments}, fun (_X, Q = #amqqueue{}) -> - try rabbit_amqqueue:check_exclusive_access(Q, ReaderPid) + try rabbit_amqqueue:check_exclusive_access(Q, ConnPid) catch exit:Reason -> {error, Reason} end; (_X, #exchange{}) -> @@ -1079,11 +1204,10 @@ binding_action(Fun, ExchangeNameBin, DestinationType, DestinationNameBin, end. basic_return(#basic_message{exchange_name = ExchangeName, - routing_key = RoutingKey, + routing_keys = [RoutingKey | _CcRoutes], content = Content}, - WriterPid, Reason) -> - {_Close, ReplyCode, ReplyText} = - rabbit_framing_amqp_0_9_1:lookup_amqp_exception(Reason), + #ch{protocol = Protocol, writer_pid = WriterPid}, Reason) -> + {_Close, ReplyCode, ReplyText} = Protocol:lookup_amqp_exception(Reason), ok = rabbit_writer:send_command( WriterPid, #'basic.return'{reply_code = ReplyCode, @@ -1128,52 +1252,24 @@ collect_acks(ToAcc, PrefixAcc, Q, DeliveryTag, Multiple) -> precondition_failed, "unknown delivery tag ~w", [DeliveryTag]) end. -add_tx_participants(MoreP, State = #ch{tx_participants = Participants}) -> - State#ch{tx_participants = sets:union(Participants, - sets:from_list(MoreP))}. - -ack(TxnKey, UAQ) -> - fold_per_queue( - fun (QPid, MsgIds, L) -> - ok = rabbit_amqqueue:ack(QPid, TxnKey, MsgIds, self()), - [{QPid, length(MsgIds)} | L] - end, [], UAQ). - -make_tx_id() -> rabbit_guid:guid(). - -new_tx(State) -> - State#ch{transaction_id = make_tx_id(), - tx_participants = sets:new(), - uncommitted_ack_q = queue:new()}. - -internal_commit(State = #ch{transaction_id = TxnKey, - tx_participants = Participants}) -> - case rabbit_amqqueue:commit_all(sets:to_list(Participants), - TxnKey, self()) of - ok -> ok = notify_limiter(State#ch.limiter_pid, - State#ch.uncommitted_ack_q), - new_tx(State); - {error, Errors} -> rabbit_misc:protocol_error( - internal_error, "commit failed: ~w", [Errors]) - end. +ack(Acked, State) -> + QIncs = fold_per_queue( + fun (QPid, MsgIds, L) -> + ok = rabbit_amqqueue:ack(QPid, MsgIds, self()), + [{QPid, length(MsgIds)} | L] + end, [], Acked), + maybe_incr_stats(QIncs, ack, State), + ok = notify_limiter(State#ch.limiter_pid, Acked), + State. + +new_tx(State) -> State#ch{uncommitted_message_q = queue:new(), + uncommitted_ack_q = queue:new()}. -internal_rollback(State = #ch{transaction_id = TxnKey, - tx_participants = Participants, - uncommitted_ack_q = UAQ, - unacked_message_q = UAMQ}) -> - ?LOGDEBUG("rollback ~p~n - ~p acks uncommitted, ~p messages unacked~n", - [self(), - queue:len(UAQ), - queue:len(UAMQ)]), - ok = rabbit_amqqueue:rollback_all(sets:to_list(Participants), - TxnKey, self()), - NewUAMQ = queue:join(UAQ, UAMQ), - new_tx(State#ch{unacked_message_q = NewUAMQ}). - -rollback_and_notify(State = #ch{transaction_id = none}) -> - notify_queues(State); -rollback_and_notify(State) -> - notify_queues(internal_rollback(State)). +notify_queues(State = #ch{state = closing}) -> + {ok, State}; +notify_queues(State = #ch{consumer_mapping = Consumers}) -> + {rabbit_amqqueue:notify_down_all(consumer_queues(Consumers), self()), + State#ch{state = closing}}. fold_per_queue(F, Acc0, UAQ) -> D = rabbit_misc:queue_fold( @@ -1192,9 +1288,6 @@ start_limiter(State = #ch{unacked_message_q = UAMQ, start_limiter_fun = SLF}) -> ok = limit_queues(LPid, State), LPid. -notify_queues(#ch{consumer_mapping = Consumers}) -> - rabbit_amqqueue:notify_down_all(consumer_queues(Consumers), self()). - unlimit_queues(State) -> ok = limit_queues(undefined, State), undefined. @@ -1203,16 +1296,9 @@ limit_queues(LPid, #ch{consumer_mapping = Consumers}) -> rabbit_amqqueue:limit_all(consumer_queues(Consumers), self(), LPid). consumer_queues(Consumers) -> - [QPid || QueueName <- - sets:to_list( - dict:fold(fun (_ConsumerTag, QueueName, S) -> - sets:add_element(QueueName, S) - end, sets:new(), Consumers)), - case rabbit_amqqueue:lookup(QueueName) of - {ok, Q} -> QPid = Q#amqqueue.pid, true; - %% queue has been deleted in the meantime - {error, not_found} -> QPid = none, false - end]. + lists:usort([QPid || + {_Key, {#amqqueue{pid = QPid}, _MRef}} + <- dict:to_list(Consumers)]). %% tell the limiter about the number of acks that have been received %% for messages delivered to subscribed consumers, but not acks for @@ -1228,32 +1314,47 @@ notify_limiter(LimiterPid, Acked) -> Count -> rabbit_limiter:ack(LimiterPid, Count) end. -is_message_persistent(Content) -> - case rabbit_basic:is_message_persistent(Content) of - {invalid, Other} -> - rabbit_log:warning("Unknown delivery mode ~p - " - "treating as 1, non-persistent~n", - [Other]), - false; - IsPersistent when is_boolean(IsPersistent) -> - IsPersistent - end. +deliver_to_queues({Delivery = #delivery{message = Message = #basic_message{ + exchange_name = XName}, + msg_seq_no = MsgSeqNo}, + QNames}, State) -> + {RoutingRes, DeliveredQPids} = rabbit_router:deliver(QNames, Delivery), + State1 = process_routing_result(RoutingRes, DeliveredQPids, + XName, MsgSeqNo, Message, State), + maybe_incr_stats([{XName, 1} | + [{{QPid, XName}, 1} || + QPid <- DeliveredQPids]], publish, State1), + State1. process_routing_result(unroutable, _, XName, MsgSeqNo, Msg, State) -> - ok = basic_return(Msg, State#ch.writer_pid, no_route), + ok = basic_return(Msg, State, no_route), + maybe_incr_stats([{Msg#basic_message.exchange_name, 1}], + return_unroutable, State), record_confirm(MsgSeqNo, XName, State); process_routing_result(not_delivered, _, XName, MsgSeqNo, Msg, State) -> - ok = basic_return(Msg, State#ch.writer_pid, no_consumers), + ok = basic_return(Msg, State, no_consumers), + maybe_incr_stats([{XName, 1}], return_not_delivered, State), record_confirm(MsgSeqNo, XName, State); process_routing_result(routed, [], XName, MsgSeqNo, _, State) -> record_confirm(MsgSeqNo, XName, State); process_routing_result(routed, _, _, undefined, _, State) -> State; process_routing_result(routed, QPids, XName, MsgSeqNo, _, State) -> - #ch{unconfirmed = UC} = State, - [maybe_monitor(QPid) || QPid <- QPids], - UC1 = gb_trees:insert(MsgSeqNo, {XName, sets:from_list(QPids)}, UC), - State#ch{unconfirmed = UC1}. + #ch{unconfirmed_mq = UMQ, unconfirmed_qm = UQM} = State, + UMQ1 = gb_trees:insert(MsgSeqNo, {XName, gb_sets:from_list(QPids)}, UMQ), + SingletonSet = gb_sets:singleton(MsgSeqNo), + UQM1 = lists:foldl( + fun (QPid, UQM2) -> + maybe_monitor(QPid), + case gb_trees:lookup(QPid, UQM2) of + {value, MsgSeqNos} -> + MsgSeqNos1 = gb_sets:insert(MsgSeqNo, MsgSeqNos), + gb_trees:update(QPid, MsgSeqNos1, UQM2); + none -> + gb_trees:insert(QPid, SingletonSet, UQM2) + end + end, UQM, QPids), + State#ch{unconfirmed_mq = UMQ1, unconfirmed_qm = UQM1}. lock_message(true, MsgStruct, State = #ch{unacked_message_q = UAMQ}) -> State#ch{unacked_message_q = queue:in(MsgStruct, UAMQ)}; @@ -1262,20 +1363,25 @@ lock_message(false, _MsgStruct, State) -> send_nacks([], State) -> State; -send_nacks(MXs, State) -> +send_nacks(MXs, State = #ch{tx_status = none}) -> MsgSeqNos = [ MsgSeqNo || {MsgSeqNo, _} <- MXs ], coalesce_and_send(MsgSeqNos, fun(MsgSeqNo, Multiple) -> #'basic.nack'{delivery_tag = MsgSeqNo, multiple = Multiple} - end, State). + end, State); +send_nacks(_, State) -> + maybe_complete_tx(State#ch{tx_status = failed}). -send_confirms(State = #ch{confirmed = C}) -> +send_confirms(State = #ch{tx_status = none, confirmed = C}) -> C1 = lists:append(C), MsgSeqNos = [ begin maybe_incr_stats([{ExchangeName, 1}], confirm, State), MsgSeqNo end || {MsgSeqNo, ExchangeName} <- C1 ], - send_confirms(MsgSeqNos, State #ch{confirmed = []}). + send_confirms(MsgSeqNos, State #ch{confirmed = []}); +send_confirms(State) -> + maybe_complete_tx(State). + send_confirms([], State) -> State; send_confirms([MsgSeqNo], State = #ch{writer_pid = WriterPid}) -> @@ -1289,11 +1395,11 @@ send_confirms(Cs, State) -> end, State). coalesce_and_send(MsgSeqNos, MkMsgFun, - State = #ch{writer_pid = WriterPid, unconfirmed = UC}) -> + State = #ch{writer_pid = WriterPid, unconfirmed_mq = UMQ}) -> SMsgSeqNos = lists:usort(MsgSeqNos), - CutOff = case gb_trees:is_empty(UC) of + CutOff = case gb_trees:is_empty(UMQ) of true -> lists:last(SMsgSeqNos) + 1; - false -> {SeqNo, _XQ} = gb_trees:smallest(UC), SeqNo + false -> {SeqNo, _XQ} = gb_trees:smallest(UMQ), SeqNo end, {Ms, Ss} = lists:splitwith(fun(X) -> X < CutOff end, SMsgSeqNos), case Ms of @@ -1305,28 +1411,44 @@ coalesce_and_send(MsgSeqNos, MkMsgFun, WriterPid, MkMsgFun(SeqNo, false)) || SeqNo <- Ss], State. -terminate(_State) -> - pg_local:leave(rabbit_channels, self()), - rabbit_event:notify(channel_closed, [{pid, self()}]). +maybe_complete_tx(State = #ch{tx_status = in_progress}) -> + State; +maybe_complete_tx(State = #ch{unconfirmed_mq = UMQ}) -> + case gb_trees:is_empty(UMQ) of + false -> State; + true -> complete_tx(State#ch{confirmed = []}) + end. + +complete_tx(State = #ch{tx_status = committing}) -> + ok = rabbit_writer:send_command(State#ch.writer_pid, #'tx.commit_ok'{}), + State#ch{tx_status = in_progress}; +complete_tx(State = #ch{tx_status = failed}) -> + {noreply, State1} = send_exception( + rabbit_misc:amqp_error( + precondition_failed, "partial tx completion", [], + 'tx.commit'), + State), + State1#ch{tx_status = in_progress}. infos(Items, State) -> [{Item, i(Item, State)} || Item <- Items]. -i(pid, _) -> self(); -i(connection, #ch{reader_pid = ReaderPid}) -> ReaderPid; -i(number, #ch{channel = Channel}) -> Channel; -i(user, #ch{user = User}) -> User#user.username; -i(vhost, #ch{virtual_host = VHost}) -> VHost; -i(transactional, #ch{transaction_id = TxnKey}) -> TxnKey =/= none; -i(confirm, #ch{confirm_enabled = CE}) -> CE; +i(pid, _) -> self(); +i(connection, #ch{conn_pid = ConnPid}) -> ConnPid; +i(number, #ch{channel = Channel}) -> Channel; +i(user, #ch{user = User}) -> User#user.username; +i(vhost, #ch{virtual_host = VHost}) -> VHost; +i(transactional, #ch{tx_status = TE}) -> TE =/= none; +i(confirm, #ch{confirm_enabled = CE}) -> CE; i(consumer_count, #ch{consumer_mapping = ConsumerMapping}) -> dict:size(ConsumerMapping); -i(messages_unconfirmed, #ch{unconfirmed = UC}) -> - gb_trees:size(UC); -i(messages_unacknowledged, #ch{unacked_message_q = UAMQ, - uncommitted_ack_q = UAQ}) -> - queue:len(UAMQ) + queue:len(UAQ); -i(acks_uncommitted, #ch{uncommitted_ack_q = UAQ}) -> - queue:len(UAQ); +i(messages_unconfirmed, #ch{unconfirmed_mq = UMQ}) -> + gb_trees:size(UMQ); +i(messages_unacknowledged, #ch{unacked_message_q = UAMQ}) -> + queue:len(UAMQ); +i(messages_uncommitted, #ch{uncommitted_message_q = TMQ}) -> + queue:len(TMQ); +i(acks_uncommitted, #ch{uncommitted_ack_q = TAQ}) -> + queue:len(TAQ); i(prefetch_count, #ch{limiter_pid = LimiterPid}) -> rabbit_limiter:get_limit(LimiterPid); i(client_flow_blocked, #ch{limiter_pid = LimiterPid}) -> @@ -1334,6 +1456,11 @@ i(client_flow_blocked, #ch{limiter_pid = LimiterPid}) -> i(Item, _) -> throw({bad_argument, Item}). +maybe_incr_redeliver_stats(true, QPid, State) -> + maybe_incr_stats([{QPid, 1}], redeliver, State); +maybe_incr_redeliver_stats(_, _, _) -> + ok. + maybe_incr_stats(QXIncs, Measure, #ch{stats_timer = StatsTimer}) -> case rabbit_event:stats_level(StatsTimer) of fine -> [incr_stats(QX, Inc, Measure) || {QX, Inc} <- QXIncs]; diff --git a/src/rabbit_channel_sup.erl b/src/rabbit_channel_sup.erl index d21cfdb7..65ccca02 100644 --- a/src/rabbit_channel_sup.erl +++ b/src/rabbit_channel_sup.erl @@ -31,11 +31,13 @@ -export_type([start_link_args/0]). -type(start_link_args() :: - {'tcp', rabbit_types:protocol(), rabbit_net:socket(), - rabbit_channel:channel_number(), non_neg_integer(), pid(), - rabbit_types:user(), rabbit_types:vhost(), pid()} | - {'direct', rabbit_channel:channel_number(), pid(), rabbit_types:user(), - rabbit_types:vhost(), pid()}). + {'tcp', rabbit_net:socket(), rabbit_channel:channel_number(), + non_neg_integer(), pid(), rabbit_types:protocol(), rabbit_types:user(), + rabbit_types:vhost(), rabbit_framing:amqp_table(), + pid()} | + {'direct', rabbit_channel:channel_number(), pid(), + rabbit_types:protocol(), rabbit_types:user(), rabbit_types:vhost(), + rabbit_framing:amqp_table(), pid()}). -spec(start_link/1 :: (start_link_args()) -> {'ok', pid(), {pid(), any()}}). @@ -43,8 +45,8 @@ %%---------------------------------------------------------------------------- -start_link({tcp, Protocol, Sock, Channel, FrameMax, ReaderPid, User, VHost, - Collector}) -> +start_link({tcp, Sock, Channel, FrameMax, ReaderPid, Protocol, User, VHost, + Capabilities, Collector}) -> {ok, SupPid} = supervisor2:start_link(?MODULE, []), {ok, WriterPid} = supervisor2:start_child( @@ -56,20 +58,23 @@ start_link({tcp, Protocol, Sock, Channel, FrameMax, ReaderPid, User, VHost, supervisor2:start_child( SupPid, {channel, {rabbit_channel, start_link, - [Channel, ReaderPid, WriterPid, User, VHost, - Collector, start_limiter_fun(SupPid)]}, + [Channel, ReaderPid, WriterPid, ReaderPid, Protocol, + User, VHost, Capabilities, Collector, + start_limiter_fun(SupPid)]}, intrinsic, ?MAX_WAIT, worker, [rabbit_channel]}), {ok, AState} = rabbit_command_assembler:init(Protocol), {ok, SupPid, {ChannelPid, AState}}; -start_link({direct, Channel, ClientChannelPid, User, VHost, Collector}) -> +start_link({direct, Channel, ClientChannelPid, ConnPid, Protocol, User, VHost, + Capabilities, Collector}) -> {ok, SupPid} = supervisor2:start_link(?MODULE, []), {ok, ChannelPid} = supervisor2:start_child( - SupPid, - {channel, {rabbit_channel, start_link, - [Channel, ClientChannelPid, ClientChannelPid, - User, VHost, Collector, start_limiter_fun(SupPid)]}, - intrinsic, ?MAX_WAIT, worker, [rabbit_channel]}), + SupPid, + {channel, {rabbit_channel, start_link, + [Channel, ClientChannelPid, ClientChannelPid, ConnPid, + Protocol, User, VHost, Capabilities, Collector, + start_limiter_fun(SupPid)]}, + intrinsic, ?MAX_WAIT, worker, [rabbit_channel]}), {ok, SupPid, {ChannelPid, none}}. %%---------------------------------------------------------------------------- diff --git a/src/rabbit_client_sup.erl b/src/rabbit_client_sup.erl index dbdc6cd4..15e92542 100644 --- a/src/rabbit_client_sup.erl +++ b/src/rabbit_client_sup.erl @@ -29,9 +29,9 @@ -ifdef(use_specs). -spec(start_link/1 :: (mfa()) -> - rabbit_types:ok_pid_or_error()). + rabbit_types:ok_pid_or_error()). -spec(start_link/2 :: ({'local', atom()}, mfa()) -> - rabbit_types:ok_pid_or_error()). + rabbit_types:ok_pid_or_error()). -endif. diff --git a/src/rabbit_control.erl b/src/rabbit_control.erl index 80483097..6eb1aaba 100644 --- a/src/rabbit_control.erl +++ b/src/rabbit_control.erl @@ -20,11 +20,25 @@ -export([start/0, stop/0, action/5, diagnostics/1]). -define(RPC_TIMEOUT, infinity). +-define(WAIT_FOR_VM_ATTEMPTS, 5). -define(QUIET_OPT, "-q"). -define(NODE_OPT, "-n"). -define(VHOST_OPT, "-p"). +-define(GLOBAL_QUERIES, + [{"Connections", rabbit_networking, connection_info_all, + connection_info_keys}, + {"Channels", rabbit_channel, info_all, info_keys}]). + +-define(VHOST_QUERIES, + [{"Queues", rabbit_amqqueue, info_all, info_keys}, + {"Exchanges", rabbit_exchange, info_all, info_keys}, + {"Bindings", rabbit_binding, info_all, info_keys}, + {"Consumers", rabbit_amqqueue, consumers_all, consumer_info_keys}, + {"Permissions", rabbit_auth_backend_internal, list_vhost_permissions, + vhost_perms_info_keys}]). + %%---------------------------------------------------------------------------- -ifdef(use_specs). @@ -44,22 +58,18 @@ start() -> {ok, [[NodeStr|_]|_]} = init:get_argument(nodename), - FullCommand = init:get_plain_arguments(), - case FullCommand of - [] -> usage(); - _ -> ok - end, {[Command0 | Args], Opts} = - rabbit_misc:get_options( - [{flag, ?QUIET_OPT}, {option, ?NODE_OPT, NodeStr}, - {option, ?VHOST_OPT, "/"}], - FullCommand), - Opts1 = lists:map(fun({K, V}) -> - case K of - ?NODE_OPT -> {?NODE_OPT, rabbit_misc:makenode(V)}; - _ -> {K, V} - end - end, Opts), + case rabbit_misc:get_options([{flag, ?QUIET_OPT}, + {option, ?NODE_OPT, NodeStr}, + {option, ?VHOST_OPT, "/"}], + init:get_plain_arguments()) of + {[], _Opts} -> usage(); + CmdArgsAndOpts -> CmdArgsAndOpts + end, + Opts1 = [case K of + ?NODE_OPT -> {?NODE_OPT, rabbit_misc:makenode(V)}; + _ -> {K, V} + end || {K, V} <- Opts], Command = list_to_atom(Command0), Quiet = proplists:get_bool(?QUIET_OPT, Opts1), Node = proplists:get_value(?NODE_OPT, Opts1), @@ -99,6 +109,23 @@ start() -> fmt_stderr(Format, Args) -> rabbit_misc:format_stderr(Format ++ "~n", Args). +print_report(Node, {Descr, Module, InfoFun, KeysFun}) -> + io:format("~s:~n", [Descr]), + print_report0(Node, {Module, InfoFun, KeysFun}, []). + +print_report(Node, {Descr, Module, InfoFun, KeysFun}, VHostArg) -> + io:format("~s on ~s:~n", [Descr, VHostArg]), + print_report0(Node, {Module, InfoFun, KeysFun}, VHostArg). + +print_report0(Node, {Module, InfoFun, KeysFun}, VHostArg) -> + case Results = rpc_call(Node, Module, InfoFun, VHostArg) of + [_|_] -> InfoItems = rpc_call(Node, Module, KeysFun, []), + display_row([atom_to_list(I) || I <- InfoItems]), + display_info_list(Results, InfoItems); + _ -> ok + end, + io:nl(). + print_error(Format, Args) -> fmt_stderr("Error: " ++ Format, Args). print_badrpc_diagnostics(Node) -> @@ -106,24 +133,22 @@ print_badrpc_diagnostics(Node) -> diagnostics(Node) -> {_NodeName, NodeHost} = rabbit_misc:nodeparts(Node), - [ - {"diagnostics:", []}, - case net_adm:names(NodeHost) of - {error, EpmdReason} -> - {"- unable to connect to epmd on ~s: ~w", - [NodeHost, EpmdReason]}; - {ok, NamePorts} -> - {"- nodes and their ports on ~s: ~p", - [NodeHost, [{list_to_atom(Name), Port} || - {Name, Port} <- NamePorts]]} - end, - {"- current node: ~w", [node()]}, - case init:get_argument(home) of - {ok, [[Home]]} -> {"- current node home dir: ~s", [Home]}; - Other -> {"- no current node home dir: ~p", [Other]} - end, - {"- current node cookie hash: ~s", [rabbit_misc:cookie_hash()]} - ]. + [{"diagnostics:", []}, + case net_adm:names(NodeHost) of + {error, EpmdReason} -> + {"- unable to connect to epmd on ~s: ~w", + [NodeHost, EpmdReason]}; + {ok, NamePorts} -> + {"- nodes and their ports on ~s: ~p", + [NodeHost, [{list_to_atom(Name), Port} || + {Name, Port} <- NamePorts]]} + end, + {"- current node: ~w", [node()]}, + case init:get_argument(home) of + {ok, [[Home]]} -> {"- current node home dir: ~s", [Home]}; + Other -> {"- no current node home dir: ~p", [Other]} + end, + {"- current node cookie hash: ~s", [rabbit_misc:cookie_hash()]}]. stop() -> ok. @@ -132,6 +157,8 @@ usage() -> io:format("~s", [rabbit_ctl_usage:usage()]), quit(1). +%%---------------------------------------------------------------------------- + action(stop, Node, [], _Opts, Inform) -> Inform("Stopping and halting node ~p", [Node]), call(Node, {rabbit, stop_and_halt, []}); @@ -155,22 +182,30 @@ action(force_reset, Node, [], _Opts, Inform) -> action(cluster, Node, ClusterNodeSs, _Opts, Inform) -> ClusterNodes = lists:map(fun list_to_atom/1, ClusterNodeSs), Inform("Clustering node ~p with ~p", - [Node, ClusterNodes]), + [Node, ClusterNodes]), rpc_call(Node, rabbit_mnesia, cluster, [ClusterNodes]); action(force_cluster, Node, ClusterNodeSs, _Opts, Inform) -> ClusterNodes = lists:map(fun list_to_atom/1, ClusterNodeSs), Inform("Forcefully clustering node ~p with ~p (ignoring offline nodes)", - [Node, ClusterNodes]), + [Node, ClusterNodes]), rpc_call(Node, rabbit_mnesia, force_cluster, [ClusterNodes]); +action(wait, Node, [], _Opts, Inform) -> + Inform("Waiting for ~p", [Node]), + wait_for_application(Node, ?WAIT_FOR_VM_ATTEMPTS); + action(status, Node, [], _Opts, Inform) -> Inform("Status of node ~p", [Node]), - case call(Node, {rabbit, status, []}) of - {badrpc, _} = Res -> Res; - Res -> io:format("~p~n", [Res]), - ok - end; + display_call_result(Node, {rabbit, status, []}); + +action(cluster_status, Node, [], _Opts, Inform) -> + Inform("Cluster status of node ~p", [Node]), + display_call_result(Node, {rabbit_mnesia, status, []}); + +action(environment, Node, _App, _Opts, Inform) -> + Inform("Application environment of node ~p", [Node]), + display_call_result(Node, {rabbit, environment, []}); action(rotate_logs, Node, [], _Opts, Inform) -> Inform("Reopening logs for node ~p", [Node]), @@ -200,17 +235,17 @@ action(clear_password, Node, Args = [Username], _Opts, Inform) -> Inform("Clearing password for user ~p", [Username]), call(Node, {rabbit_auth_backend_internal, clear_password, Args}); -action(set_admin, Node, [Username], _Opts, Inform) -> - Inform("Setting administrative status for user ~p", [Username]), - call(Node, {rabbit_auth_backend_internal, set_admin, [Username]}); - -action(clear_admin, Node, [Username], _Opts, Inform) -> - Inform("Clearing administrative status for user ~p", [Username]), - call(Node, {rabbit_auth_backend_internal, clear_admin, [Username]}); +action(set_user_tags, Node, [Username | TagsStr], _Opts, Inform) -> + Tags = [list_to_atom(T) || T <- TagsStr], + Inform("Setting tags for user ~p to ~p", [Username, Tags]), + rpc_call(Node, rabbit_auth_backend_internal, set_tags, + [list_to_binary(Username), Tags]); action(list_users, Node, [], _Opts, Inform) -> Inform("Listing users", []), - display_list(call(Node, {rabbit_auth_backend_internal, list_users, []})); + display_info_list( + call(Node, {rabbit_auth_backend_internal, list_users, []}), + rabbit_auth_backend_internal:user_info_keys()); action(add_vhost, Node, Args = [_VHostPath], _Opts, Inform) -> Inform("Creating vhost ~p", Args), @@ -220,14 +255,16 @@ action(delete_vhost, Node, Args = [_VHostPath], _Opts, Inform) -> Inform("Deleting vhost ~p", Args), call(Node, {rabbit_vhost, delete, Args}); -action(list_vhosts, Node, [], _Opts, Inform) -> +action(list_vhosts, Node, Args, _Opts, Inform) -> Inform("Listing vhosts", []), - display_list(call(Node, {rabbit_vhost, list, []})); + ArgAtoms = default_if_empty(Args, [name]), + display_info_list(call(Node, {rabbit_vhost, info_all, []}), ArgAtoms); action(list_user_permissions, Node, Args = [_Username], _Opts, Inform) -> Inform("Listing permissions for user ~p", Args), - display_list(call(Node, {rabbit_auth_backend_internal, - list_user_permissions, Args})); + display_info_list(call(Node, {rabbit_auth_backend_internal, + list_user_permissions, Args}), + rabbit_auth_backend_internal:user_perms_info_keys()); action(list_queues, Node, Args, Opts, Inform) -> Inform("Listing queues", []), @@ -264,7 +301,7 @@ action(list_connections, Node, Args, _Opts, Inform) -> action(list_channels, Node, Args, _Opts, Inform) -> Inform("Listing channels", []), - ArgAtoms = default_if_empty(Args, [pid, user, transactional, consumer_count, + ArgAtoms = default_if_empty(Args, [pid, user, consumer_count, messages_unacknowledged]), display_info_list(rpc_call(Node, rabbit_channel, info_all, [ArgAtoms]), ArgAtoms); @@ -272,14 +309,18 @@ action(list_channels, Node, Args, _Opts, Inform) -> action(list_consumers, Node, _Args, Opts, Inform) -> Inform("Listing consumers", []), VHostArg = list_to_binary(proplists:get_value(?VHOST_OPT, Opts)), - InfoKeys = [queue_name, channel_pid, consumer_tag, ack_required], - case rpc_call(Node, rabbit_amqqueue, consumers_all, [VHostArg]) of - L when is_list(L) -> display_info_list( - [lists:zip(InfoKeys, tuple_to_list(X)) || - X <- L], - InfoKeys); - Other -> Other - end; + display_info_list(rpc_call(Node, rabbit_amqqueue, consumers_all, [VHostArg]), + rabbit_amqqueue:consumer_info_keys()); + +action(trace_on, Node, [], Opts, Inform) -> + VHost = proplists:get_value(?VHOST_OPT, Opts), + Inform("Starting tracing for vhost ~p", [VHost]), + rpc_call(Node, rabbit_trace, start, [list_to_binary(VHost)]); + +action(trace_off, Node, [], Opts, Inform) -> + VHost = proplists:get_value(?VHOST_OPT, Opts), + Inform("Stopping tracing for vhost ~p", [VHost]), + rpc_call(Node, rabbit_trace, stop, [list_to_binary(VHost)]); action(set_permissions, Node, [Username, CPerm, WPerm, RPerm], Opts, Inform) -> VHost = proplists:get_value(?VHOST_OPT, Opts), @@ -296,14 +337,44 @@ action(clear_permissions, Node, [Username], Opts, Inform) -> action(list_permissions, Node, [], Opts, Inform) -> VHost = proplists:get_value(?VHOST_OPT, Opts), Inform("Listing permissions in vhost ~p", [VHost]), - display_list(call(Node, {rabbit_auth_backend_internal, - list_vhost_permissions, [VHost]})). + display_info_list(call(Node, {rabbit_auth_backend_internal, + list_vhost_permissions, [VHost]}), + rabbit_auth_backend_internal:vhost_perms_info_keys()); + +action(report, Node, _Args, _Opts, Inform) -> + io:format("Reporting server status on ~p~n~n", [erlang:universaltime()]), + [begin ok = action(Action, N, [], [], Inform), io:nl() end || + N <- unsafe_rpc(Node, rabbit_mnesia, running_clustered_nodes, []), + Action <- [status, cluster_status, environment]], + VHosts = unsafe_rpc(Node, rabbit_vhost, list, []), + [print_report(Node, Q) || Q <- ?GLOBAL_QUERIES], + [print_report(Node, Q, [V]) || Q <- ?VHOST_QUERIES, V <- VHosts], + io:format("End of server status report~n"), + ok. + +%%---------------------------------------------------------------------------- + +wait_for_application(Node, Attempts) -> + case rpc_call(Node, application, which_applications, [infinity]) of + {badrpc, _} = E -> case Attempts of + 0 -> E; + _ -> wait_for_application0(Node, Attempts - 1) + end; + Apps -> case proplists:is_defined(rabbit, Apps) of + %% We've seen the node up; if it goes down + %% die immediately. + true -> ok; + false -> wait_for_application0(Node, 0) + end + end. + +wait_for_application0(Node, Attempts) -> + timer:sleep(1000), + wait_for_application(Node, Attempts). default_if_empty(List, Default) when is_list(List) -> - if List == [] -> - Default; - true -> - [list_to_atom(X) || X <- List] + if List == [] -> Default; + true -> [list_to_atom(X) || X <- List] end. display_info_list(Results, InfoItemKeys) when is_list(Results) -> @@ -342,19 +413,27 @@ format_info_item([{TableEntryKey, TableEntryType, _TableEntryValue} | _] = Value) when is_binary(TableEntryKey) andalso is_atom(TableEntryType) -> io_lib:format("~1000000000000p", [prettify_amqp_table(Value)]); +format_info_item([T | _] = Value) + when is_tuple(T) orelse is_pid(T) orelse is_binary(T) orelse is_atom(T) orelse + is_list(T) -> + "[" ++ + lists:nthtail(2, lists:append( + [", " ++ format_info_item(E) || E <- Value])) ++ "]"; format_info_item(Value) -> io_lib:format("~w", [Value]). -display_list(L) when is_list(L) -> - lists:foreach(fun (I) when is_binary(I) -> - io:format("~s~n", [escape(I)]); - (I) when is_tuple(I) -> - display_row([escape(V) - || V <- tuple_to_list(I)]) - end, - lists:sort(L)), - ok; -display_list(Other) -> Other. +display_call_result(Node, MFA) -> + case call(Node, MFA) of + {badrpc, _} = Res -> throw(Res); + Res -> io:format("~p~n", [Res]), + ok + end. + +unsafe_rpc(Node, Mod, Fun, Args) -> + case rpc_call(Node, Mod, Fun, Args) of + {badrpc, _} = Res -> throw(Res); + Normal -> Normal + end. call(Node, {Mod, Fun, Args}) -> rpc_call(Node, Mod, Fun, lists:map(fun list_to_binary/1, Args)). @@ -366,12 +445,9 @@ rpc_call(Node, Mod, Fun, Args) -> %% characters. We don't escape characters above 127, since they may %% form part of UTF-8 strings. -escape(Atom) when is_atom(Atom) -> - escape(atom_to_list(Atom)); -escape(Bin) when is_binary(Bin) -> - escape(binary_to_list(Bin)); -escape(L) when is_list(L) -> - escape_char(lists:reverse(L), []). +escape(Atom) when is_atom(Atom) -> escape(atom_to_list(Atom)); +escape(Bin) when is_binary(Bin) -> escape(binary_to_list(Bin)); +escape(L) when is_list(L) -> escape_char(lists:reverse(L), []). escape_char([$\\ | T], Acc) -> escape_char(T, [$\\, $\\ | Acc]); @@ -386,19 +462,15 @@ escape_char([], Acc) -> prettify_amqp_table(Table) -> [{escape(K), prettify_typed_amqp_value(T, V)} || {K, T, V} <- Table]. -prettify_typed_amqp_value(Type, Value) -> - case Type of - longstr -> escape(Value); - table -> prettify_amqp_table(Value); - array -> [prettify_typed_amqp_value(T, V) || {T, V} <- Value]; - _ -> Value - end. +prettify_typed_amqp_value(longstr, Value) -> escape(Value); +prettify_typed_amqp_value(table, Value) -> prettify_amqp_table(Value); +prettify_typed_amqp_value(array, Value) -> [prettify_typed_amqp_value(T, V) || + {T, V} <- Value]; +prettify_typed_amqp_value(_Type, Value) -> Value. -% the slower shutdown on windows required to flush stdout +%% the slower shutdown on windows required to flush stdout quit(Status) -> case os:type() of - {unix, _} -> - halt(Status); - {win32, _} -> - init:stop(Status) + {unix, _} -> halt(Status); + {win32, _} -> init:stop(Status) end. diff --git a/src/rabbit_direct.erl b/src/rabbit_direct.erl index 3b8c9fba..7ff534ee 100644 --- a/src/rabbit_direct.erl +++ b/src/rabbit_direct.erl @@ -16,7 +16,7 @@ -module(rabbit_direct). --export([boot/0, connect/3, start_channel/5]). +-export([boot/0, connect/4, start_channel/8, disconnect/1]). -include("rabbit.hrl"). @@ -25,12 +25,16 @@ -ifdef(use_specs). -spec(boot/0 :: () -> 'ok'). --spec(connect/3 :: (binary(), binary(), binary()) -> - {'ok', {rabbit_types:user(), - rabbit_framing:amqp_table()}}). --spec(start_channel/5 :: (rabbit_channel:channel_number(), pid(), - rabbit_types:user(), rabbit_types:vhost(), pid()) -> - {'ok', pid()}). +-spec(connect/4 :: (rabbit_types:username(), rabbit_types:vhost(), + rabbit_types:protocol(), rabbit_event:event_props()) -> + {'ok', {rabbit_types:user(), + rabbit_framing:amqp_table()}}). +-spec(start_channel/8 :: + (rabbit_channel:channel_number(), pid(), pid(), rabbit_types:protocol(), + rabbit_types:user(), rabbit_types:vhost(), rabbit_framing:amqp_table(), + pid()) -> {'ok', pid()}). + +-spec(disconnect/1 :: (rabbit_event:event_props()) -> 'ok'). -endif. @@ -39,37 +43,44 @@ boot() -> {ok, _} = supervisor2:start_child( - rabbit_sup, - {rabbit_direct_client_sup, - {rabbit_client_sup, start_link, - [{local, rabbit_direct_client_sup}, - {rabbit_channel_sup, start_link, []}]}, - transient, infinity, supervisor, [rabbit_client_sup]}), + rabbit_sup, + {rabbit_direct_client_sup, + {rabbit_client_sup, start_link, + [{local, rabbit_direct_client_sup}, + {rabbit_channel_sup, start_link, []}]}, + transient, infinity, supervisor, [rabbit_client_sup]}), ok. %%---------------------------------------------------------------------------- -connect(Username, Password, VHost) -> +connect(Username, VHost, Protocol, Infos) -> case lists:keymember(rabbit, 1, application:which_applications()) of true -> - try rabbit_access_control:user_pass_login(Username, Password) of - #user{} = User -> + case rabbit_access_control:check_user_login(Username, []) of + {ok, User} -> try rabbit_access_control:check_vhost_access(User, VHost) of - ok -> {ok, {User, rabbit_reader:server_properties()}} + ok -> rabbit_event:notify(connection_created, Infos), + {ok, {User, + rabbit_reader:server_properties(Protocol)}} catch exit:#amqp_error{name = access_refused} -> {error, access_refused} - end - catch - exit:#amqp_error{name = access_refused} -> {error, auth_failure} + end; + {refused, _Msg, _Args} -> + {error, auth_failure} end; false -> {error, broker_not_found_on_node} end. -start_channel(Number, ClientChannelPid, User, VHost, Collector) -> +start_channel(Number, ClientChannelPid, ConnPid, Protocol, User, VHost, + Capabilities, Collector) -> {ok, _, {ChannelPid, _}} = supervisor2:start_child( - rabbit_direct_client_sup, - [{direct, Number, ClientChannelPid, User, VHost, Collector}]), + rabbit_direct_client_sup, + [{direct, Number, ClientChannelPid, ConnPid, Protocol, User, VHost, + Capabilities, Collector}]), {ok, ChannelPid}. + +disconnect(Infos) -> + rabbit_event:notify(connection_closed, Infos). diff --git a/src/rabbit_error_logger.erl b/src/rabbit_error_logger.erl index 0120f0d6..93aad9e3 100644 --- a/src/rabbit_error_logger.erl +++ b/src/rabbit_error_logger.erl @@ -67,8 +67,12 @@ publish(_Other, _Format, _Data, _State) -> ok. publish1(RoutingKey, Format, Data, LogExch) -> + %% 0-9-1 says the timestamp is a "64 bit POSIX timestamp". That's + %% second resolution, not millisecond. + Timestamp = rabbit_misc:now_ms() div 1000, {ok, _RoutingRes, _DeliveredQPids} = - rabbit_basic:publish(LogExch, RoutingKey, false, false, none, - #'P_basic'{content_type = <<"text/plain">>}, + rabbit_basic:publish(LogExch, RoutingKey, false, false, + #'P_basic'{content_type = <<"text/plain">>, + timestamp = Timestamp}, list_to_binary(io_lib:format(Format, Data))), ok. diff --git a/src/rabbit_event.erl b/src/rabbit_event.erl index f4ee279b..887e4a1f 100644 --- a/src/rabbit_event.erl +++ b/src/rabbit_event.erl @@ -26,7 +26,7 @@ %%---------------------------------------------------------------------------- --record(state, {level, timer}). +-record(state, {level, interval, timer}). %%---------------------------------------------------------------------------- @@ -49,6 +49,7 @@ -opaque(state() :: #state { level :: level(), + interval :: integer(), timer :: atom() }). @@ -95,12 +96,14 @@ start_link() -> init_stats_timer() -> {ok, StatsLevel} = application:get_env(rabbit, collect_statistics), - #state{level = StatsLevel, timer = undefined}. + {ok, Interval} = application:get_env(rabbit, collect_statistics_interval), + #state{level = StatsLevel, interval = Interval, timer = undefined}. ensure_stats_timer(State = #state{level = none}, _Pid, _Msg) -> State; -ensure_stats_timer(State = #state{timer = undefined}, Pid, Msg) -> - TRef = erlang:send_after(?STATS_INTERVAL, Pid, Msg), +ensure_stats_timer(State = #state{interval = Interval, + timer = undefined}, Pid, Msg) -> + TRef = erlang:send_after(Interval, Pid, Msg), State#state{timer = TRef}; ensure_stats_timer(State, _Pid, _Msg) -> State. @@ -129,15 +132,8 @@ notify_if(true, Type, Props) -> notify(Type, Props); notify_if(false, _Type, _Props) -> ok. notify(Type, Props) -> - try - %% TODO: switch to os:timestamp() when we drop support for - %% Erlang/OTP < R13B01 - gen_event:notify(rabbit_event, #event{type = Type, - props = Props, - timestamp = now()}) - catch error:badarg -> - %% badarg means rabbit_event is no longer registered. We never - %% unregister it so the great likelihood is that we're shutting - %% down the broker but some events were backed up. Ignore it. - ok - end. + %% TODO: switch to os:timestamp() when we drop support for + %% Erlang/OTP < R13B01 + gen_event:notify(rabbit_event, #event{type = Type, + props = Props, + timestamp = now()}). diff --git a/src/rabbit_exchange.erl b/src/rabbit_exchange.erl index 92259195..afa48355 100644 --- a/src/rabbit_exchange.erl +++ b/src/rabbit_exchange.erl @@ -18,12 +18,13 @@ -include("rabbit.hrl"). -include("rabbit_framing.hrl"). --export([recover/0, declare/6, lookup/1, lookup_or_die/1, list/1, info_keys/0, - info/1, info/2, info_all/1, info_all/2, publish/2, delete/2]). --export([callback/3]). -%% this must be run inside a mnesia tx --export([maybe_auto_delete/1]). --export([assert_equivalence/6, assert_args_equivalence/2, check_type/1]). +-export([recover/0, callback/3, declare/6, + assert_equivalence/6, assert_args_equivalence/2, check_type/1, + lookup/1, lookup_or_die/1, list/1, update_scratch/2, + info_keys/0, info/1, info/2, info_all/1, info_all/2, + route/2, delete/2]). +%% these must be run inside a mnesia tx +-export([maybe_auto_delete/1, serial/1, peek_serial/1]). %%---------------------------------------------------------------------------- @@ -33,8 +34,10 @@ -type(name() :: rabbit_types:r('exchange')). -type(type() :: atom()). +-type(fun_name() :: atom()). --spec(recover/0 :: () -> 'ok'). +-spec(recover/0 :: () -> [name()]). +-spec(callback/3:: (rabbit_types:exchange(), fun_name(), [any()]) -> 'ok'). -spec(declare/6 :: (name(), type(), boolean(), boolean(), boolean(), rabbit_framing:amqp_table()) @@ -55,6 +58,7 @@ (name()) -> rabbit_types:exchange() | rabbit_types:channel_exit()). -spec(list/1 :: (rabbit_types:vhost()) -> [rabbit_types:exchange()]). +-spec(update_scratch/2 :: (name(), fun((any()) -> any())) -> 'ok'). -spec(info_keys/0 :: () -> rabbit_types:info_keys()). -spec(info/1 :: (rabbit_types:exchange()) -> rabbit_types:infos()). -spec(info/2 :: @@ -62,9 +66,9 @@ -> rabbit_types:infos()). -spec(info_all/1 :: (rabbit_types:vhost()) -> [rabbit_types:infos()]). -spec(info_all/2 ::(rabbit_types:vhost(), rabbit_types:info_keys()) - -> [rabbit_types:infos()]). --spec(publish/2 :: (rabbit_types:exchange(), rabbit_types:delivery()) - -> {rabbit_router:routing_result(), [pid()]}). + -> [rabbit_types:infos()]). +-spec(route/2 :: (rabbit_types:exchange(), rabbit_types:delivery()) + -> [rabbit_amqqueue:name()]). -spec(delete/2 :: (name(), boolean())-> 'ok' | rabbit_types:error('not_found') | @@ -72,7 +76,8 @@ -spec(maybe_auto_delete/1:: (rabbit_types:exchange()) -> 'not_deleted' | {'deleted', rabbit_binding:deletions()}). --spec(callback/3:: (rabbit_types:exchange(), atom(), [any()]) -> 'ok'). +-spec(serial/1 :: (rabbit_types:exchange()) -> 'none' | pos_integer()). +-spec(peek_serial/1 :: (name()) -> pos_integer() | 'undefined'). -endif. @@ -81,25 +86,22 @@ -define(INFO_KEYS, [name, type, durable, auto_delete, internal, arguments]). recover() -> - Xs = rabbit_misc:table_fold( - fun (X, Acc) -> - ok = mnesia:write(rabbit_exchange, X, write), - [X | Acc] - end, [], rabbit_durable_exchange), - Bs = rabbit_binding:recover(), - recover_with_bindings( - lists:keysort(#binding.source, Bs), - lists:keysort(#exchange.name, Xs), []). - -recover_with_bindings([B = #binding{source = XName} | Rest], - Xs = [#exchange{name = XName} | _], - Bindings) -> - recover_with_bindings(Rest, Xs, [B | Bindings]); -recover_with_bindings(Bs, [X = #exchange{type = Type} | Xs], Bindings) -> - (type_to_module(Type)):recover(X, Bindings), - recover_with_bindings(Bs, Xs, []); -recover_with_bindings([], [], []) -> - ok. + Xs = rabbit_misc:table_filter( + fun (#exchange{name = XName}) -> + mnesia:read({rabbit_exchange, XName}) =:= [] + end, + fun (X, Tx) -> + case Tx of + true -> store(X); + false -> ok + end, + rabbit_exchange:callback(X, create, [map_create_tx(Tx), X]) + end, + rabbit_durable_exchange), + [XName || #exchange{name = XName} <- Xs]. + +callback(#exchange{type = XType}, Fun, Args) -> + apply(type_to_module(XType), Fun, Args). declare(XName, Type, Durable, AutoDelete, Internal, Args) -> X = #exchange{name = XName, @@ -108,13 +110,14 @@ declare(XName, Type, Durable, AutoDelete, Internal, Args) -> auto_delete = AutoDelete, internal = Internal, arguments = Args}, + XT = type_to_module(Type), %% We want to upset things if it isn't ok - ok = (type_to_module(Type)):validate(X), + ok = XT:validate(X), rabbit_misc:execute_mnesia_transaction( fun () -> case mnesia:wread({rabbit_exchange, XName}) of [] -> - ok = mnesia:write(rabbit_exchange, X, write), + store(X), ok = case Durable of true -> mnesia:write(rabbit_durable_exchange, X, write); @@ -126,7 +129,7 @@ declare(XName, Type, Durable, AutoDelete, Internal, Args) -> end end, fun ({new, Exchange}, Tx) -> - callback(Exchange, create, [Tx, Exchange]), + ok = XT:create(map_create_tx(Tx), Exchange), rabbit_event:notify_if(not Tx, exchange_created, info(Exchange)), Exchange; ({existing, Exchange}, _Tx) -> @@ -135,10 +138,16 @@ declare(XName, Type, Durable, AutoDelete, Internal, Args) -> Err end). -%% Used with atoms from records; e.g., the type is expected to exist. -type_to_module(T) -> - {ok, Module} = rabbit_registry:lookup_module(exchange, T), - Module. +map_create_tx(true) -> transaction; +map_create_tx(false) -> none. + +store(X = #exchange{name = Name, type = Type}) -> + ok = mnesia:write(rabbit_exchange, X, write), + case (type_to_module(Type)):serialise_events() of + true -> S = #exchange_serial{name = Name, next = 1}, + ok = mnesia:write(rabbit_exchange_serial, S, write); + false -> ok + end. %% Used with binaries sent over the wire; the type may not exist. check_type(TypeBin) -> @@ -191,6 +200,23 @@ list(VHostPath) -> rabbit_exchange, #exchange{name = rabbit_misc:r(VHostPath, exchange), _ = '_'}). +update_scratch(Name, Fun) -> + rabbit_misc:execute_mnesia_transaction( + fun() -> + case mnesia:wread({rabbit_exchange, Name}) of + [X = #exchange{durable = Durable, scratch = Scratch}] -> + X1 = X#exchange{scratch = Fun(Scratch)}, + ok = mnesia:write(rabbit_exchange, X1, write), + case Durable of + true -> ok = mnesia:write(rabbit_durable_exchange, + X1, write); + _ -> ok + end; + [] -> + ok + end + end). + info_keys() -> ?INFO_KEYS. map(VHostPath, F) -> @@ -216,21 +242,19 @@ info_all(VHostPath) -> map(VHostPath, fun (X) -> info(X) end). info_all(VHostPath, Items) -> map(VHostPath, fun (X) -> info(X, Items) end). -publish(X = #exchange{name = XName}, Delivery) -> - rabbit_router:deliver( - route(Delivery, {queue:from_list([X]), XName, []}), - Delivery). +route(X = #exchange{name = XName}, Delivery) -> + route1(Delivery, {queue:from_list([X]), XName, []}). -route(Delivery, {WorkList, SeenXs, QNames}) -> +route1(Delivery, {WorkList, SeenXs, QNames}) -> case queue:out(WorkList) of {empty, _WorkList} -> lists:usort(QNames); {{value, X = #exchange{type = Type}}, WorkList1} -> DstNames = process_alternate( X, ((type_to_module(Type)):route(X, Delivery))), - route(Delivery, - lists:foldl(fun process_route/2, {WorkList1, SeenXs, QNames}, - DstNames)) + route1(Delivery, + lists:foldl(fun process_route/2, {WorkList1, SeenXs, QNames}, + DstNames)) end. process_alternate(#exchange{name = XName, arguments = Args}, []) -> @@ -263,27 +287,30 @@ process_route(#resource{kind = queue} = QName, {WorkList, SeenXs, QNames}) -> {WorkList, SeenXs, [QName | QNames]}. -call_with_exchange(XName, Fun, PrePostCommitFun) -> - rabbit_misc:execute_mnesia_transaction( +call_with_exchange(XName, Fun) -> + rabbit_misc:execute_mnesia_tx_with_tail( fun () -> case mnesia:read({rabbit_exchange, XName}) of - [] -> {error, not_found}; - [X] -> Fun(X) - end - end, PrePostCommitFun). + [] -> rabbit_misc:const({error, not_found}); + [X] -> Fun(X) + end + end). delete(XName, IfUnused) -> + Fun = case IfUnused of + true -> fun conditional_delete/1; + false -> fun unconditional_delete/1 + end, call_with_exchange( XName, - case IfUnused of - true -> fun conditional_delete/1; - false -> fun unconditional_delete/1 - end, - fun ({deleted, X, Bs, Deletions}, Tx) -> - ok = rabbit_binding:process_deletions( - rabbit_binding:add_deletion( - XName, {X, deleted, Bs}, Deletions), Tx); - (Error = {error, _InUseOrNotFound}, _Tx) -> - Error + fun (X) -> + case Fun(X) of + {deleted, X, Bs, Deletions} -> + rabbit_binding:process_deletions( + rabbit_binding:add_deletion( + XName, {X, deleted, Bs}, Deletions)); + {error, _InUseOrNotFound} = E -> + rabbit_misc:const(E) + end end). maybe_auto_delete(#exchange{auto_delete = false}) -> @@ -294,9 +321,6 @@ maybe_auto_delete(#exchange{auto_delete = true} = X) -> {deleted, X, [], Deletions} -> {deleted, Deletions} end. -callback(#exchange{type = XType}, Fun, Args) -> - apply(type_to_module(XType), Fun, Args). - conditional_delete(X = #exchange{name = XName}) -> case rabbit_binding:has_for_source(XName) of false -> unconditional_delete(X); @@ -306,5 +330,30 @@ conditional_delete(X = #exchange{name = XName}) -> unconditional_delete(X = #exchange{name = XName}) -> ok = mnesia:delete({rabbit_durable_exchange, XName}), ok = mnesia:delete({rabbit_exchange, XName}), + ok = mnesia:delete({rabbit_exchange_serial, XName}), Bindings = rabbit_binding:remove_for_source(XName), {deleted, X, Bindings, rabbit_binding:remove_for_destination(XName)}. + +serial(#exchange{name = XName, type = Type}) -> + case (type_to_module(Type)):serialise_events() of + true -> next_serial(XName); + false -> none + end. + +next_serial(XName) -> + [#exchange_serial{next = Serial}] = + mnesia:read(rabbit_exchange_serial, XName, write), + ok = mnesia:write(rabbit_exchange_serial, + #exchange_serial{name = XName, next = Serial + 1}, write), + Serial. + +peek_serial(XName) -> + case mnesia:read({rabbit_exchange_serial, XName}) of + [#exchange_serial{next = Serial}] -> Serial; + _ -> undefined + end. + +%% Used with atoms from records; e.g., the type is expected to exist. +type_to_module(T) -> + {ok, Module} = rabbit_registry:lookup_module(exchange, T), + Module. diff --git a/src/rabbit_exchange_type.erl b/src/rabbit_exchange_type.erl index 547583e9..ab3d00dc 100644 --- a/src/rabbit_exchange_type.erl +++ b/src/rabbit_exchange_type.erl @@ -21,21 +21,25 @@ behaviour_info(callbacks) -> [ {description, 0}, + + %% Should Rabbit ensure that all binding events that are + %% delivered to an individual exchange can be serialised? (they + %% might still be delivered out of order, but there'll be a + %% serial number). + {serialise_events, 0}, + {route, 2}, %% called BEFORE declaration, to check args etc; may exit with #amqp_error{} {validate, 1}, - %% called after declaration when previously absent + %% called after declaration and recovery {create, 2}, - %% called when recovering - {recover, 2}, - - %% called after exchange deletion. + %% called after exchange (auto)deletion. {delete, 3}, - %% called after a binding has been added + %% called after a binding has been added or recovered {add_binding, 3}, %% called after bindings have been deleted. diff --git a/src/rabbit_exchange_type_direct.erl b/src/rabbit_exchange_type_direct.erl index c51b0913..b485e31f 100644 --- a/src/rabbit_exchange_type_direct.erl +++ b/src/rabbit_exchange_type_direct.erl @@ -19,8 +19,8 @@ -behaviour(rabbit_exchange_type). --export([description/0, route/2]). --export([validate/1, create/2, recover/2, delete/3, +-export([description/0, serialise_events/0, route/2]). +-export([validate/1, create/2, delete/3, add_binding/3, remove_bindings/3, assert_args_equivalence/2]). -include("rabbit_exchange_type_spec.hrl"). @@ -35,13 +35,14 @@ description() -> [{name, <<"direct">>}, {description, <<"AMQP direct exchange, as per the AMQP specification">>}]. +serialise_events() -> false. + route(#exchange{name = Name}, - #delivery{message = #basic_message{routing_key = RoutingKey}}) -> - rabbit_router:match_routing_key(Name, RoutingKey). + #delivery{message = #basic_message{routing_keys = Routes}}) -> + rabbit_router:match_routing_key(Name, Routes). validate(_X) -> ok. create(_Tx, _X) -> ok. -recover(_X, _Bs) -> ok. delete(_Tx, _X, _Bs) -> ok. add_binding(_Tx, _X, _B) -> ok. remove_bindings(_Tx, _X, _Bs) -> ok. diff --git a/src/rabbit_exchange_type_fanout.erl b/src/rabbit_exchange_type_fanout.erl index 382fb627..3c029722 100644 --- a/src/rabbit_exchange_type_fanout.erl +++ b/src/rabbit_exchange_type_fanout.erl @@ -19,8 +19,8 @@ -behaviour(rabbit_exchange_type). --export([description/0, route/2]). --export([validate/1, create/2, recover/2, delete/3, add_binding/3, +-export([description/0, serialise_events/0, route/2]). +-export([validate/1, create/2, delete/3, add_binding/3, remove_bindings/3, assert_args_equivalence/2]). -include("rabbit_exchange_type_spec.hrl"). @@ -35,12 +35,13 @@ description() -> [{name, <<"fanout">>}, {description, <<"AMQP fanout exchange, as per the AMQP specification">>}]. +serialise_events() -> false. + route(#exchange{name = Name}, _Delivery) -> - rabbit_router:match_routing_key(Name, '_'). + rabbit_router:match_routing_key(Name, ['_']). validate(_X) -> ok. create(_Tx, _X) -> ok. -recover(_X, _Bs) -> ok. delete(_Tx, _X, _Bs) -> ok. add_binding(_Tx, _X, _B) -> ok. remove_bindings(_Tx, _X, _Bs) -> ok. diff --git a/src/rabbit_exchange_type_headers.erl b/src/rabbit_exchange_type_headers.erl index d3529b06..f09e4aae 100644 --- a/src/rabbit_exchange_type_headers.erl +++ b/src/rabbit_exchange_type_headers.erl @@ -20,8 +20,8 @@ -behaviour(rabbit_exchange_type). --export([description/0, route/2]). --export([validate/1, create/2, recover/2, delete/3, add_binding/3, +-export([description/0, serialise_events/0, route/2]). +-export([validate/1, create/2, delete/3, add_binding/3, remove_bindings/3, assert_args_equivalence/2]). -include("rabbit_exchange_type_spec.hrl"). @@ -41,6 +41,8 @@ description() -> [{name, <<"headers">>}, {description, <<"AMQP headers exchange, as per the AMQP specification">>}]. +serialise_events() -> false. + route(#exchange{name = Name}, #delivery{message = #basic_message{content = Content}}) -> Headers = case (Content#content.properties)#'P_basic'.headers of @@ -114,7 +116,6 @@ headers_match([{PK, PT, PV} | PRest], [{DK, DT, DV} | DRest], validate(_X) -> ok. create(_Tx, _X) -> ok. -recover(_X, _Bs) -> ok. delete(_Tx, _X, _Bs) -> ok. add_binding(_Tx, _X, _B) -> ok. remove_bindings(_Tx, _X, _Bs) -> ok. diff --git a/src/rabbit_exchange_type_topic.erl b/src/rabbit_exchange_type_topic.erl index 9cbf8100..348655b1 100644 --- a/src/rabbit_exchange_type_topic.erl +++ b/src/rabbit_exchange_type_topic.erl @@ -15,12 +15,13 @@ %% -module(rabbit_exchange_type_topic). + -include("rabbit.hrl"). -behaviour(rabbit_exchange_type). --export([description/0, route/2]). --export([validate/1, create/2, recover/2, delete/3, add_binding/3, +-export([description/0, serialise_events/0, route/2]). +-export([validate/1, create/2, delete/3, add_binding/3, remove_bindings/3, assert_args_equivalence/2]). -include("rabbit_exchange_type_spec.hrl"). @@ -31,58 +32,247 @@ {requires, rabbit_registry}, {enables, kernel_ready}]}). --export([topic_matches/2]). - --ifdef(use_specs). - --spec(topic_matches/2 :: (binary(), binary()) -> boolean()). - --endif. +%%---------------------------------------------------------------------------- description() -> [{name, <<"topic">>}, {description, <<"AMQP topic exchange, as per the AMQP specification">>}]. -route(#exchange{name = Name}, - #delivery{message = #basic_message{routing_key = RoutingKey}}) -> - rabbit_router:match_bindings(Name, - fun (#binding{key = BindingKey}) -> - topic_matches(BindingKey, RoutingKey) - end). +serialise_events() -> false. -split_topic_key(Key) -> - string:tokens(binary_to_list(Key), "."). - -topic_matches(PatternKey, RoutingKey) -> - P = split_topic_key(PatternKey), - R = split_topic_key(RoutingKey), - topic_matches1(P, R). - -topic_matches1(["#"], _R) -> - true; -topic_matches1(["#" | PTail], R) -> - last_topic_match(PTail, [], lists:reverse(R)); -topic_matches1([], []) -> - true; -topic_matches1(["*" | PatRest], [_ | ValRest]) -> - topic_matches1(PatRest, ValRest); -topic_matches1([PatElement | PatRest], [ValElement | ValRest]) - when PatElement == ValElement -> - topic_matches1(PatRest, ValRest); -topic_matches1(_, _) -> - false. - -last_topic_match(P, R, []) -> - topic_matches1(P, R); -last_topic_match(P, R, [BacktrackNext | BacktrackList]) -> - topic_matches1(P, R) or - last_topic_match(P, [BacktrackNext | R], BacktrackList). +%% NB: This may return duplicate results in some situations (that's ok) +route(#exchange{name = X}, + #delivery{message = #basic_message{routing_keys = Routes}}) -> + lists:append([begin + Words = split_topic_key(RKey), + mnesia:async_dirty(fun trie_match/2, [X, Words]) + end || RKey <- Routes]). validate(_X) -> ok. create(_Tx, _X) -> ok. -recover(_X, _Bs) -> ok. -delete(_Tx, _X, _Bs) -> ok. -add_binding(_Tx, _X, _B) -> ok. -remove_bindings(_Tx, _X, _Bs) -> ok. + +delete(transaction, #exchange{name = X}, _Bs) -> + trie_remove_all_edges(X), + trie_remove_all_bindings(X), + ok; +delete(none, _Exchange, _Bs) -> + ok. + +add_binding(transaction, _Exchange, Binding) -> + internal_add_binding(Binding); +add_binding(none, _Exchange, _Binding) -> + ok. + +remove_bindings(transaction, #exchange{name = X}, Bs) -> + %% The remove process is split into two distinct phases. In the + %% first phase we gather the lists of bindings and edges to + %% delete, then in the second phase we process all the + %% deletions. This is to prevent interleaving of read/write + %% operations in mnesia that can adversely affect performance. + {ToDelete, Paths} = + lists:foldl( + fun(#binding{source = S, key = K, destination = D}, {Acc, PathAcc}) -> + Path = [{FinalNode, _} | _] = + follow_down_get_path(S, split_topic_key(K)), + {[{FinalNode, D} | Acc], + decrement_bindings(X, Path, maybe_add_path(X, Path, PathAcc))} + end, {[], gb_trees:empty()}, Bs), + + [trie_remove_binding(X, FinalNode, D) || {FinalNode, D} <- ToDelete], + [trie_remove_edge(X, Parent, Node, W) || + {Node, {Parent, W, {0, 0}}} <- gb_trees:to_list(Paths)], + ok; +remove_bindings(none, _X, _Bs) -> + ok. + +maybe_add_path(_X, [{root, none}], PathAcc) -> + PathAcc; +maybe_add_path(X, [{Node, W}, {Parent, _} | _], PathAcc) -> + case gb_trees:is_defined(Node, PathAcc) of + true -> PathAcc; + false -> gb_trees:insert(Node, {Parent, W, {trie_binding_count(X, Node), + trie_child_count(X, Node)}}, + PathAcc) + end. + +decrement_bindings(X, Path, PathAcc) -> + with_path_acc(X, fun({Bindings, Edges}) -> {Bindings - 1, Edges} end, + Path, PathAcc). + +decrement_edges(X, Path, PathAcc) -> + with_path_acc(X, fun({Bindings, Edges}) -> {Bindings, Edges - 1} end, + Path, PathAcc). + +with_path_acc(_X, _Fun, [{root, none}], PathAcc) -> + PathAcc; +with_path_acc(X, Fun, [{Node, _} | ParentPath], PathAcc) -> + {Parent, W, Counts} = gb_trees:get(Node, PathAcc), + NewCounts = Fun(Counts), + NewPathAcc = gb_trees:update(Node, {Parent, W, NewCounts}, PathAcc), + case NewCounts of + {0, 0} -> decrement_edges(X, ParentPath, + maybe_add_path(X, ParentPath, NewPathAcc)); + _ -> NewPathAcc + end. + + assert_args_equivalence(X, Args) -> rabbit_exchange:assert_args_equivalence(X, Args). + +%%---------------------------------------------------------------------------- + +internal_add_binding(#binding{source = X, key = K, destination = D}) -> + FinalNode = follow_down_create(X, split_topic_key(K)), + trie_add_binding(X, FinalNode, D), + ok. + +trie_match(X, Words) -> + trie_match(X, root, Words, []). + +trie_match(X, Node, [], ResAcc) -> + trie_match_part(X, Node, "#", fun trie_match_skip_any/4, [], + trie_bindings(X, Node) ++ ResAcc); +trie_match(X, Node, [W | RestW] = Words, ResAcc) -> + lists:foldl(fun ({WArg, MatchFun, RestWArg}, Acc) -> + trie_match_part(X, Node, WArg, MatchFun, RestWArg, Acc) + end, ResAcc, [{W, fun trie_match/4, RestW}, + {"*", fun trie_match/4, RestW}, + {"#", fun trie_match_skip_any/4, Words}]). + +trie_match_part(X, Node, Search, MatchFun, RestW, ResAcc) -> + case trie_child(X, Node, Search) of + {ok, NextNode} -> MatchFun(X, NextNode, RestW, ResAcc); + error -> ResAcc + end. + +trie_match_skip_any(X, Node, [], ResAcc) -> + trie_match(X, Node, [], ResAcc); +trie_match_skip_any(X, Node, [_ | RestW] = Words, ResAcc) -> + trie_match_skip_any(X, Node, RestW, + trie_match(X, Node, Words, ResAcc)). + +follow_down_create(X, Words) -> + case follow_down_last_node(X, Words) of + {ok, FinalNode} -> FinalNode; + {error, Node, RestW} -> lists:foldl( + fun (W, CurNode) -> + NewNode = new_node_id(), + trie_add_edge(X, CurNode, NewNode, W), + NewNode + end, Node, RestW) + end. + +follow_down_last_node(X, Words) -> + follow_down(X, fun (_, Node, _) -> Node end, root, Words). + +follow_down_get_path(X, Words) -> + {ok, Path} = + follow_down(X, fun (W, Node, PathAcc) -> [{Node, W} | PathAcc] end, + [{root, none}], Words), + Path. + +follow_down(X, AccFun, Acc0, Words) -> + follow_down(X, root, AccFun, Acc0, Words). + +follow_down(_X, _CurNode, _AccFun, Acc, []) -> + {ok, Acc}; +follow_down(X, CurNode, AccFun, Acc, Words = [W | RestW]) -> + case trie_child(X, CurNode, W) of + {ok, NextNode} -> follow_down(X, NextNode, AccFun, + AccFun(W, NextNode, Acc), RestW); + error -> {error, Acc, Words} + end. + +trie_child(X, Node, Word) -> + case mnesia:read({rabbit_topic_trie_edge, + #trie_edge{exchange_name = X, + node_id = Node, + word = Word}}) of + [#topic_trie_edge{node_id = NextNode}] -> {ok, NextNode}; + [] -> error + end. + +trie_bindings(X, Node) -> + MatchHead = #topic_trie_binding{ + trie_binding = #trie_binding{exchange_name = X, + node_id = Node, + destination = '$1'}}, + mnesia:select(rabbit_topic_trie_binding, [{MatchHead, [], ['$1']}]). + +trie_add_edge(X, FromNode, ToNode, W) -> + trie_edge_op(X, FromNode, ToNode, W, fun mnesia:write/3). + +trie_remove_edge(X, FromNode, ToNode, W) -> + trie_edge_op(X, FromNode, ToNode, W, fun mnesia:delete_object/3). + +trie_edge_op(X, FromNode, ToNode, W, Op) -> + ok = Op(rabbit_topic_trie_edge, + #topic_trie_edge{trie_edge = #trie_edge{exchange_name = X, + node_id = FromNode, + word = W}, + node_id = ToNode}, + write). + +trie_add_binding(X, Node, D) -> + trie_binding_op(X, Node, D, fun mnesia:write/3). + +trie_remove_binding(X, Node, D) -> + trie_binding_op(X, Node, D, fun mnesia:delete_object/3). + +trie_binding_op(X, Node, D, Op) -> + ok = Op(rabbit_topic_trie_binding, + #topic_trie_binding{ + trie_binding = #trie_binding{exchange_name = X, + node_id = Node, + destination = D}}, + write). + +trie_child_count(X, Node) -> + count(rabbit_topic_trie_edge, + #topic_trie_edge{trie_edge = #trie_edge{exchange_name = X, + node_id = Node, + _ = '_'}, + _ = '_'}). + +trie_binding_count(X, Node) -> + count(rabbit_topic_trie_binding, + #topic_trie_binding{ + trie_binding = #trie_binding{exchange_name = X, + node_id = Node, + _ = '_'}, + _ = '_'}). + +count(Table, Match) -> + length(mnesia:match_object(Table, Match, read)). + +trie_remove_all_edges(X) -> + remove_all(rabbit_topic_trie_edge, + #topic_trie_edge{trie_edge = #trie_edge{exchange_name = X, + _ = '_'}, + _ = '_'}). + +trie_remove_all_bindings(X) -> + remove_all(rabbit_topic_trie_binding, + #topic_trie_binding{ + trie_binding = #trie_binding{exchange_name = X, _ = '_'}, + _ = '_'}). + +remove_all(Table, Pattern) -> + lists:foreach(fun (R) -> mnesia:delete_object(Table, R, write) end, + mnesia:match_object(Table, Pattern, write)). + +new_node_id() -> + rabbit_guid:guid(). + +split_topic_key(Key) -> + split_topic_key(Key, [], []). + +split_topic_key(<<>>, [], []) -> + []; +split_topic_key(<<>>, RevWordAcc, RevResAcc) -> + lists:reverse([lists:reverse(RevWordAcc) | RevResAcc]); +split_topic_key(<<$., Rest/binary>>, RevWordAcc, RevResAcc) -> + split_topic_key(Rest, [], [lists:reverse(RevWordAcc) | RevResAcc]); +split_topic_key(<<C:8, Rest/binary>>, RevWordAcc, RevResAcc) -> + split_topic_key(Rest, [C | RevWordAcc], RevResAcc). + diff --git a/src/rabbit_limiter.erl b/src/rabbit_limiter.erl index 86ea7282..8f9ab032 100644 --- a/src/rabbit_limiter.erl +++ b/src/rabbit_limiter.erl @@ -49,7 +49,7 @@ -record(lim, {prefetch_count = 0, ch_pid, blocked = false, - queues = dict:new(), % QPid -> {MonitorRef, Notify} + queues = orddict:new(), % QPid -> {MonitorRef, Notify} volume = 0}). %% 'Notify' is a boolean that indicates whether a queue should be %% notified of a change in the limit or volume that may allow it to @@ -65,7 +65,7 @@ start_link(ChPid, UnackedMsgCount) -> limit(undefined, 0) -> ok; limit(LimiterPid, PrefetchCount) -> - gen_server2:call(LimiterPid, {limit, PrefetchCount}). + gen_server2:call(LimiterPid, {limit, PrefetchCount}, infinity). %% Ask the limiter whether the queue can deliver a message without %% breaching a limit @@ -120,9 +120,9 @@ init([ChPid, UnackedMsgCount]) -> prioritise_call(get_limit, _From, _State) -> 9; prioritise_call(_Msg, _From, _State) -> 0. -handle_call({can_send, _QPid, _AckRequired}, _From, +handle_call({can_send, QPid, _AckRequired}, _From, State = #lim{blocked = true}) -> - {reply, false, State}; + {reply, false, limit_queue(QPid, State)}; handle_call({can_send, QPid, AckRequired}, _From, State = #lim{volume = Volume}) -> case limit_reached(State) of @@ -196,31 +196,30 @@ limit_reached(#lim{prefetch_count = Limit, volume = Volume}) -> blocked(#lim{blocked = Blocked}) -> Blocked. remember_queue(QPid, State = #lim{queues = Queues}) -> - case dict:is_key(QPid, Queues) of + case orddict:is_key(QPid, Queues) of false -> MRef = erlang:monitor(process, QPid), - State#lim{queues = dict:store(QPid, {MRef, false}, Queues)}; + State#lim{queues = orddict:store(QPid, {MRef, false}, Queues)}; true -> State end. forget_queue(QPid, State = #lim{ch_pid = ChPid, queues = Queues}) -> - case dict:find(QPid, Queues) of - {ok, {MRef, _}} -> - true = erlang:demonitor(MRef), - ok = rabbit_amqqueue:unblock(QPid, ChPid), - State#lim{queues = dict:erase(QPid, Queues)}; - error -> State + case orddict:find(QPid, Queues) of + {ok, {MRef, _}} -> true = erlang:demonitor(MRef), + ok = rabbit_amqqueue:unblock(QPid, ChPid), + State#lim{queues = orddict:erase(QPid, Queues)}; + error -> State end. limit_queue(QPid, State = #lim{queues = Queues}) -> UpdateFun = fun ({MRef, _}) -> {MRef, true} end, - State#lim{queues = dict:update(QPid, UpdateFun, Queues)}. + State#lim{queues = orddict:update(QPid, UpdateFun, Queues)}. notify_queues(State = #lim{ch_pid = ChPid, queues = Queues}) -> {QList, NewQueues} = - dict:fold(fun (_QPid, {_, false}, Acc) -> Acc; - (QPid, {MRef, true}, {L, D}) -> - {[QPid | L], dict:store(QPid, {MRef, false}, D)} - end, {[], Queues}, Queues), + orddict:fold(fun (_QPid, {_, false}, Acc) -> Acc; + (QPid, {MRef, true}, {L, D}) -> + {[QPid | L], orddict:store(QPid, {MRef, false}, D)} + end, {[], Queues}, Queues), case length(QList) of 0 -> ok; L -> diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl index 2f8c940b..996b0a98 100644 --- a/src/rabbit_memory_monitor.erl +++ b/src/rabbit_memory_monitor.erl @@ -111,11 +111,11 @@ stop() -> init([]) -> MemoryLimit = trunc(?MEMORY_LIMIT_SCALING * - (try - vm_memory_monitor:get_memory_limit() - catch - exit:{noproc, _} -> ?MEMORY_SIZE_FOR_DISABLED_VMM - end)), + (try + vm_memory_monitor:get_memory_limit() + catch + exit:{noproc, _} -> ?MEMORY_SIZE_FOR_DISABLED_VMM + end)), {ok, TRef} = timer:apply_interval(?DEFAULT_UPDATE_INTERVAL, ?SERVER, update, []), diff --git a/src/rabbit_mirror_queue_coordinator.erl b/src/rabbit_mirror_queue_coordinator.erl new file mode 100644 index 00000000..f6664a27 --- /dev/null +++ b/src/rabbit_mirror_queue_coordinator.erl @@ -0,0 +1,395 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is VMware, Inc. +%% Copyright (c) 2010-2011 VMware, Inc. All rights reserved. +%% + +-module(rabbit_mirror_queue_coordinator). + +-export([start_link/3, get_gm/1, ensure_monitoring/2]). + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). + +-export([joined/2, members_changed/3, handle_msg/3]). + +-behaviour(gen_server2). +-behaviour(gm). + +-include("rabbit.hrl"). +-include("gm_specs.hrl"). + +-record(state, { q, + gm, + monitors, + death_fun + }). + +-define(ONE_SECOND, 1000). + +-ifdef(use_specs). + +-spec(start_link/3 :: (rabbit_types:amqqueue(), pid() | 'undefined', + rabbit_mirror_queue_master:death_fun()) -> + rabbit_types:ok_pid_or_error()). +-spec(get_gm/1 :: (pid()) -> pid()). +-spec(ensure_monitoring/2 :: (pid(), [pid()]) -> 'ok'). + +-endif. + +%%---------------------------------------------------------------------------- +%% +%% Mirror Queues +%% +%% A queue with mirrors consists of the following: +%% +%% #amqqueue{ pid, mirror_pids } +%% | | +%% +----------+ +-------+--------------+-----------...etc... +%% | | | +%% V V V +%% amqqueue_process---+ slave-----+ slave-----+ ...etc... +%% | BQ = master----+ | | BQ = vq | | BQ = vq | +%% | | BQ = vq | | +-+-------+ +-+-------+ +%% | +-+-------+ | | | +%% +-++-----|---------+ | | (some details elided) +%% || | | | +%% || coordinator-+ | | +%% || +-+---------+ | | +%% || | | | +%% || gm-+ -- -- -- -- gm-+- -- -- -- gm-+- -- --...etc... +%% || +--+ +--+ +--+ +%% || +%% consumers +%% +%% The master is merely an implementation of bq, and thus is invoked +%% through the normal bq interface by the amqqueue_process. The slaves +%% meanwhile are processes in their own right (as is the +%% coordinator). The coordinator and all slaves belong to the same gm +%% group. Every member of a gm group receives messages sent to the gm +%% group. Because the master is the bq of amqqueue_process, it doesn't +%% have sole control over its mailbox, and as a result, the master +%% itself cannot be passed messages directly (well, it could by via +%% the amqqueue:run_backing_queue callback but that would induce +%% additional unnecessary loading on the master queue process), yet it +%% needs to react to gm events, such as the death of slaves. Thus the +%% master creates the coordinator, and it is the coordinator that is +%% the gm callback module and event handler for the master. +%% +%% Consumers are only attached to the master. Thus the master is +%% responsible for informing all slaves when messages are fetched from +%% the bq, when they're acked, and when they're requeued. +%% +%% The basic goal is to ensure that all slaves performs actions on +%% their bqs in the same order as the master. Thus the master +%% intercepts all events going to its bq, and suitably broadcasts +%% these events on the gm. The slaves thus receive two streams of +%% events: one stream is via the gm, and one stream is from channels +%% directly. Whilst the stream via gm is guaranteed to be consistently +%% seen by all slaves, the same is not true of the stream via +%% channels. For example, in the event of an unexpected death of a +%% channel during a publish, only some of the mirrors may receive that +%% publish. As a result of this problem, the messages broadcast over +%% the gm contain published content, and thus slaves can operate +%% successfully on messages that they only receive via the gm. The key +%% purpose of also sending messages directly from the channels to the +%% slaves is that without this, in the event of the death of the +%% master, messages could be lost until a suitable slave is promoted. +%% +%% However, that is not the only reason. For example, if confirms are +%% in use, then there is no guarantee that every slave will see the +%% delivery with the same msg_seq_no. As a result, the slaves have to +%% wait until they've seen both the publish via gm, and the publish +%% via the channel before they have enough information to be able to +%% perform the publish to their own bq, and subsequently issue the +%% confirm, if necessary. Either form of publish can arrive first, and +%% a slave can be upgraded to the master at any point during this +%% process. Confirms continue to be issued correctly, however. +%% +%% Because the slave is a full process, it impersonates parts of the +%% amqqueue API. However, it does not need to implement all parts: for +%% example, no ack or consumer-related message can arrive directly at +%% a slave from a channel: it is only publishes that pass both +%% directly to the slaves and go via gm. +%% +%% Slaves can be added dynamically. When this occurs, there is no +%% attempt made to sync the current contents of the master with the +%% new slave, thus the slave will start empty, regardless of the state +%% of the master. Thus the slave needs to be able to detect and ignore +%% operations which are for messages it has not received: because of +%% the strict FIFO nature of queues in general, this is +%% straightforward - all new publishes that the new slave receives via +%% gm should be processed as normal, but fetches which are for +%% messages the slave has never seen should be ignored. Similarly, +%% acks for messages the slave never fetched should be +%% ignored. Eventually, as the master is consumed from, the messages +%% at the head of the queue which were there before the slave joined +%% will disappear, and the slave will become fully synced with the +%% state of the master. The detection of the sync-status of a slave is +%% done entirely based on length: if the slave and the master both +%% agree on the length of the queue after the fetch of the head of the +%% queue, then the queues must be in sync. The only other possibility +%% is that the slave's queue is shorter, and thus the fetch should be +%% ignored. +%% +%% Because acktags are issued by the bq independently, and because +%% there is no requirement for the master and all slaves to use the +%% same bq, all references to msgs going over gm is by msg_id. Thus +%% upon acking, the master must convert the acktags back to msg_ids +%% (which happens to be what bq:ack returns), then sends the msg_ids +%% over gm, the slaves must convert the msg_ids to acktags (a mapping +%% the slaves themselves must maintain). +%% +%% When the master dies, a slave gets promoted. This will be the +%% eldest slave, and thus the hope is that that slave is most likely +%% to be sync'd with the master. The design of gm is that the +%% notification of the death of the master will only appear once all +%% messages in-flight from the master have been fully delivered to all +%% members of the gm group. Thus at this point, the slave that gets +%% promoted cannot broadcast different events in a different order +%% than the master for the same msgs: there is no possibility for the +%% same msg to be processed by the old master and the new master - if +%% it was processed by the old master then it will have been processed +%% by the slave before the slave was promoted, and vice versa. +%% +%% Upon promotion, all msgs pending acks are requeued as normal, the +%% slave constructs state suitable for use in the master module, and +%% then dynamically changes into an amqqueue_process with the master +%% as the bq, and the slave's bq as the master's bq. Thus the very +%% same process that was the slave is now a full amqqueue_process. +%% +%% It is important that we avoid memory leaks due to the death of +%% senders (i.e. channels) and partial publications. A sender +%% publishing a message may fail mid way through the publish and thus +%% only some of the mirrors will receive the message. We need the +%% mirrors to be able to detect this and tidy up as necessary to avoid +%% leaks. If we just had the master monitoring all senders then we +%% would have the possibility that a sender appears and only sends the +%% message to a few of the slaves before dying. Those slaves would +%% then hold on to the message, assuming they'll receive some +%% instruction eventually from the master. Thus we have both slaves +%% and the master monitor all senders they become aware of. But there +%% is a race: if the slave receives a DOWN of a sender, how does it +%% know whether or not the master is going to send it instructions +%% regarding those messages? +%% +%% Whilst the master monitors senders, it can't access its mailbox +%% directly, so it delegates monitoring to the coordinator. When the +%% coordinator receives a DOWN message from a sender, it informs the +%% master via a callback. This allows the master to do any tidying +%% necessary, but more importantly allows the master to broadcast a +%% sender_death message to all the slaves, saying the sender has +%% died. Once the slaves receive the sender_death message, they know +%% that they're not going to receive any more instructions from the gm +%% regarding that sender, thus they throw away any publications from +%% the sender pending publication instructions. However, it is +%% possible that the coordinator receives the DOWN and communicates +%% that to the master before the master has finished receiving and +%% processing publishes from the sender. This turns out not to be a +%% problem: the sender has actually died, and so will not need to +%% receive confirms or other feedback, and should further messages be +%% "received" from the sender, the master will ask the coordinator to +%% set up a new monitor, and will continue to process the messages +%% normally. Slaves may thus receive publishes via gm from previously +%% declared "dead" senders, but again, this is fine: should the slave +%% have just thrown out the message it had received directly from the +%% sender (due to receiving a sender_death message via gm), it will be +%% able to cope with the publication purely from the master via gm. +%% +%% When a slave receives a DOWN message for a sender, if it has not +%% received the sender_death message from the master via gm already, +%% then it will wait 20 seconds before broadcasting a request for +%% confirmation from the master that the sender really has died. +%% Should a sender have only sent a publish to slaves, this allows +%% slaves to inform the master of the previous existence of the +%% sender. The master will thus monitor the sender, receive the DOWN, +%% and subsequently broadcast the sender_death message, allowing the +%% slaves to tidy up. This process can repeat for the same sender: +%% consider one slave receives the publication, then the DOWN, then +%% asks for confirmation of death, then the master broadcasts the +%% sender_death message. Only then does another slave receive the +%% publication and thus set up its monitoring. Eventually that slave +%% too will receive the DOWN, ask for confirmation and the master will +%% monitor the sender again, receive another DOWN, and send out +%% another sender_death message. Given the 20 second delay before +%% requesting death confirmation, this is highly unlikely, but it is a +%% possibility. +%% +%% When the 20 second timer expires, the slave first checks to see +%% whether it still needs confirmation of the death before requesting +%% it. This prevents unnecessary traffic on gm as it allows one +%% broadcast of the sender_death message to satisfy many slaves. +%% +%% If we consider the promotion of a slave at this point, we have two +%% possibilities: that of the slave that has received the DOWN and is +%% thus waiting for confirmation from the master that the sender +%% really is down; and that of the slave that has not received the +%% DOWN. In the first case, in the act of promotion to master, the new +%% master will monitor again the dead sender, and after it has +%% finished promoting itself, it should find another DOWN waiting, +%% which it will then broadcast. This will allow slaves to tidy up as +%% normal. In the second case, we have the possibility that +%% confirmation-of-sender-death request has been broadcast, but that +%% it was broadcast before the master failed, and that the slave being +%% promoted does not know anything about that sender, and so will not +%% monitor it on promotion. Thus a slave that broadcasts such a +%% request, at the point of broadcasting it, recurses, setting another +%% 20 second timer. As before, on expiry of the timer, the slaves +%% checks to see whether it still has not received a sender_death +%% message for the dead sender, and if not, broadcasts a death +%% confirmation request. Thus this ensures that even when a master +%% dies and the new slave has no knowledge of the dead sender, it will +%% eventually receive a death confirmation request, shall monitor the +%% dead sender, receive the DOWN and broadcast the sender_death +%% message. +%% +%% The preceding commentary deals with the possibility of slaves +%% receiving publications from senders which the master does not, and +%% the need to prevent memory leaks in such scenarios. The inverse is +%% also possible: a partial publication may cause only the master to +%% receive a publication. It will then publish the message via gm. The +%% slaves will receive it via gm, will publish it to their BQ and will +%% set up monitoring on the sender. They will then receive the DOWN +%% message and the master will eventually publish the corresponding +%% sender_death message. The slave will then be able to tidy up its +%% state as normal. +%% +%% Recovery of mirrored queues is straightforward: as nodes die, the +%% remaining nodes record this, and eventually a situation is reached +%% in which only one node is alive, which is the master. This is the +%% only node which, upon recovery, will resurrect a mirrored queue: +%% nodes which die and then rejoin as a slave will start off empty as +%% if they have no mirrored content at all. This is not surprising: to +%% achieve anything more sophisticated would require the master and +%% recovering slave to be able to check to see whether they agree on +%% the last seen state of the queue: checking length alone is not +%% sufficient in this case. +%% +%% For more documentation see the comments in bug 23554. +%% +%%---------------------------------------------------------------------------- + +start_link(Queue, GM, DeathFun) -> + gen_server2:start_link(?MODULE, [Queue, GM, DeathFun], []). + +get_gm(CPid) -> + gen_server2:call(CPid, get_gm, infinity). + +ensure_monitoring(CPid, Pids) -> + gen_server2:cast(CPid, {ensure_monitoring, Pids}). + +%% --------------------------------------------------------------------------- +%% gen_server +%% --------------------------------------------------------------------------- + +init([#amqqueue { name = QueueName } = Q, GM, DeathFun]) -> + GM1 = case GM of + undefined -> + {ok, GM2} = gm:start_link(QueueName, ?MODULE, [self()]), + receive {joined, GM2, _Members} -> + ok + end, + GM2; + _ -> + true = link(GM), + GM + end, + {ok, _TRef} = + timer:apply_interval(?ONE_SECOND, gm, broadcast, [GM1, heartbeat]), + {ok, #state { q = Q, + gm = GM1, + monitors = dict:new(), + death_fun = DeathFun }, + hibernate, + {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. + +handle_call(get_gm, _From, State = #state { gm = GM }) -> + reply(GM, State). + +handle_cast({gm_deaths, Deaths}, + State = #state { q = #amqqueue { name = QueueName } }) -> + rabbit_log:info("Mirrored-queue (~s): Master ~s saw deaths of mirrors ~s~n", + [rabbit_misc:rs(QueueName), + rabbit_misc:pid_to_string(self()), + [[rabbit_misc:pid_to_string(Pid), $ ] || Pid <- Deaths]]), + case rabbit_mirror_queue_misc:remove_from_queue(QueueName, Deaths) of + {ok, Pid} when node(Pid) =:= node() -> + noreply(State); + {error, not_found} -> + {stop, normal, State} + end; + +handle_cast({ensure_monitoring, Pids}, + State = #state { monitors = Monitors }) -> + Monitors1 = + lists:foldl(fun (Pid, MonitorsN) -> + case dict:is_key(Pid, MonitorsN) of + true -> MonitorsN; + false -> MRef = erlang:monitor(process, Pid), + dict:store(Pid, MRef, MonitorsN) + end + end, Monitors, Pids), + noreply(State #state { monitors = Monitors1 }). + +handle_info({'DOWN', _MonitorRef, process, Pid, _Reason}, + State = #state { monitors = Monitors, + death_fun = Fun }) -> + noreply( + case dict:is_key(Pid, Monitors) of + false -> State; + true -> ok = Fun(Pid), + State #state { monitors = dict:erase(Pid, Monitors) } + end); + +handle_info(Msg, State) -> + {stop, {unexpected_info, Msg}, State}. + +terminate(_Reason, #state{}) -> + %% gen_server case + ok; +terminate([_CPid], _Reason) -> + %% gm case + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%% --------------------------------------------------------------------------- +%% GM +%% --------------------------------------------------------------------------- + +joined([CPid], Members) -> + CPid ! {joined, self(), Members}, + ok. + +members_changed([_CPid], _Births, []) -> + ok; +members_changed([CPid], _Births, Deaths) -> + ok = gen_server2:cast(CPid, {gm_deaths, Deaths}). + +handle_msg([_CPid], _From, heartbeat) -> + ok; +handle_msg([CPid], _From, {ensure_monitoring, _Pids} = Msg) -> + ok = gen_server2:cast(CPid, Msg); +handle_msg([_CPid], _From, _Msg) -> + ok. + +%% --------------------------------------------------------------------------- +%% Others +%% --------------------------------------------------------------------------- + +noreply(State) -> + {noreply, State, hibernate}. + +reply(Reply, State) -> + {reply, Reply, State, hibernate}. diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl new file mode 100644 index 00000000..532911f2 --- /dev/null +++ b/src/rabbit_mirror_queue_master.erl @@ -0,0 +1,390 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is VMware, Inc. +%% Copyright (c) 2010-2011 VMware, Inc. All rights reserved. +%% + +-module(rabbit_mirror_queue_master). + +-export([init/3, terminate/2, delete_and_terminate/2, + purge/1, publish/4, publish_delivered/5, fetch/2, ack/2, + requeue/3, len/1, is_empty/1, drain_confirmed/1, dropwhile/2, + set_ram_duration_target/2, ram_duration/1, + needs_timeout/1, timeout/1, handle_pre_hibernate/1, + status/1, invoke/3, is_duplicate/2, discard/3]). + +-export([start/1, stop/0]). + +-export([promote_backing_queue_state/6, sender_death_fun/0]). + +-behaviour(rabbit_backing_queue). + +-include("rabbit.hrl"). + +-record(state, { gm, + coordinator, + backing_queue, + backing_queue_state, + set_delivered, + seen_status, + confirmed, + ack_msg_id, + known_senders + }). + +-ifdef(use_specs). + +-export_type([death_fun/0]). + +-type(death_fun() :: fun ((pid()) -> 'ok')). +-type(master_state() :: #state { gm :: pid(), + coordinator :: pid(), + backing_queue :: atom(), + backing_queue_state :: any(), + set_delivered :: non_neg_integer(), + seen_status :: dict(), + confirmed :: [rabbit_guid:guid()], + ack_msg_id :: dict(), + known_senders :: set() + }). + +-spec(promote_backing_queue_state/6 :: + (pid(), atom(), any(), pid(), dict(), [pid()]) -> master_state()). +-spec(sender_death_fun/0 :: () -> death_fun()). + +-endif. + +%% For general documentation of HA design, see +%% rabbit_mirror_queue_coordinator + +%% --------------------------------------------------------------------------- +%% Backing queue +%% --------------------------------------------------------------------------- + +start(_DurableQueues) -> + %% This will never get called as this module will never be + %% installed as the default BQ implementation. + exit({not_valid_for_generic_backing_queue, ?MODULE}). + +stop() -> + %% Same as start/1. + exit({not_valid_for_generic_backing_queue, ?MODULE}). + +init(#amqqueue { name = QName, mirror_nodes = MNodes } = Q, Recover, + AsyncCallback) -> + {ok, CPid} = rabbit_mirror_queue_coordinator:start_link( + Q, undefined, sender_death_fun()), + GM = rabbit_mirror_queue_coordinator:get_gm(CPid), + MNodes1 = + (case MNodes of + all -> rabbit_mnesia:all_clustered_nodes(); + undefined -> []; + _ -> [list_to_atom(binary_to_list(Node)) || Node <- MNodes] + end) -- [node()], + [rabbit_mirror_queue_misc:add_mirror(QName, Node) || Node <- MNodes1], + {ok, BQ} = application:get_env(backing_queue_module), + BQS = BQ:init(Q, Recover, AsyncCallback), + #state { gm = GM, + coordinator = CPid, + backing_queue = BQ, + backing_queue_state = BQS, + set_delivered = 0, + seen_status = dict:new(), + confirmed = [], + ack_msg_id = dict:new(), + known_senders = sets:new() }. + +terminate({shutdown, dropped} = Reason, + State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> + %% Backing queue termination - this node has been explicitly + %% dropped. Normally, non-durable queues would be tidied up on + %% startup, but there's a possibility that we will be added back + %% in without this node being restarted. Thus we must do the full + %% blown delete_and_terminate now, but only locally: we do not + %% broadcast delete_and_terminate. + State #state { backing_queue_state = BQ:delete_and_terminate(Reason, BQS), + set_delivered = 0 }; +terminate(Reason, + State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> + %% Backing queue termination. The queue is going down but + %% shouldn't be deleted. Most likely safe shutdown of this + %% node. Thus just let some other slave take over. + State #state { backing_queue_state = BQ:terminate(Reason, BQS) }. + +delete_and_terminate(Reason, State = #state { gm = GM, + backing_queue = BQ, + backing_queue_state = BQS }) -> + ok = gm:broadcast(GM, {delete_and_terminate, Reason}), + State #state { backing_queue_state = BQ:delete_and_terminate(Reason, BQS), + set_delivered = 0 }. + +purge(State = #state { gm = GM, + backing_queue = BQ, + backing_queue_state = BQS }) -> + ok = gm:broadcast(GM, {set_length, 0}), + {Count, BQS1} = BQ:purge(BQS), + {Count, State #state { backing_queue_state = BQS1, + set_delivered = 0 }}. + +publish(Msg = #basic_message { id = MsgId }, MsgProps, ChPid, + State = #state { gm = GM, + seen_status = SS, + backing_queue = BQ, + backing_queue_state = BQS }) -> + false = dict:is_key(MsgId, SS), %% ASSERTION + ok = gm:broadcast(GM, {publish, false, ChPid, MsgProps, Msg}), + BQS1 = BQ:publish(Msg, MsgProps, ChPid, BQS), + ensure_monitoring(ChPid, State #state { backing_queue_state = BQS1 }). + +publish_delivered(AckRequired, Msg = #basic_message { id = MsgId }, MsgProps, + ChPid, State = #state { gm = GM, + seen_status = SS, + backing_queue = BQ, + backing_queue_state = BQS, + ack_msg_id = AM }) -> + false = dict:is_key(MsgId, SS), %% ASSERTION + %% Must use confirmed_broadcast here in order to guarantee that + %% all slaves are forced to interpret this publish_delivered at + %% the same point, especially if we die and a slave is promoted. + ok = gm:confirmed_broadcast( + GM, {publish, {true, AckRequired}, ChPid, MsgProps, Msg}), + {AckTag, BQS1} = + BQ:publish_delivered(AckRequired, Msg, MsgProps, ChPid, BQS), + AM1 = maybe_store_acktag(AckTag, MsgId, AM), + {AckTag, + ensure_monitoring(ChPid, State #state { backing_queue_state = BQS1, + ack_msg_id = AM1 })}. + +dropwhile(Fun, State = #state { gm = GM, + backing_queue = BQ, + backing_queue_state = BQS, + set_delivered = SetDelivered }) -> + Len = BQ:len(BQS), + BQS1 = BQ:dropwhile(Fun, BQS), + Dropped = Len - BQ:len(BQS1), + SetDelivered1 = lists:max([0, SetDelivered - Dropped]), + ok = gm:broadcast(GM, {set_length, BQ:len(BQS1)}), + State #state { backing_queue_state = BQS1, + set_delivered = SetDelivered1 }. + +drain_confirmed(State = #state { backing_queue = BQ, + backing_queue_state = BQS, + seen_status = SS, + confirmed = Confirmed }) -> + {MsgIds, BQS1} = BQ:drain_confirmed(BQS), + {MsgIds1, SS1} = + lists:foldl( + fun (MsgId, {MsgIdsN, SSN}) -> + %% We will never see 'discarded' here + case dict:find(MsgId, SSN) of + error -> + {[MsgId | MsgIdsN], SSN}; + {ok, published} -> + %% It was published when we were a slave, + %% and we were promoted before we saw the + %% publish from the channel. We still + %% haven't seen the channel publish, and + %% consequently we need to filter out the + %% confirm here. We will issue the confirm + %% when we see the publish from the channel. + {MsgIdsN, dict:store(MsgId, confirmed, SSN)}; + {ok, confirmed} -> + %% Well, confirms are racy by definition. + {[MsgId | MsgIdsN], SSN} + end + end, {[], SS}, MsgIds), + {Confirmed ++ MsgIds1, State #state { backing_queue_state = BQS1, + seen_status = SS1, + confirmed = [] }}. + +fetch(AckRequired, State = #state { gm = GM, + backing_queue = BQ, + backing_queue_state = BQS, + set_delivered = SetDelivered, + ack_msg_id = AM }) -> + {Result, BQS1} = BQ:fetch(AckRequired, BQS), + State1 = State #state { backing_queue_state = BQS1 }, + case Result of + empty -> + {Result, State1}; + {#basic_message { id = MsgId } = Message, IsDelivered, AckTag, + Remaining} -> + ok = gm:broadcast(GM, {fetch, AckRequired, MsgId, Remaining}), + IsDelivered1 = IsDelivered orelse SetDelivered > 0, + SetDelivered1 = lists:max([0, SetDelivered - 1]), + AM1 = maybe_store_acktag(AckTag, MsgId, AM), + {{Message, IsDelivered1, AckTag, Remaining}, + State1 #state { set_delivered = SetDelivered1, + ack_msg_id = AM1 }} + end. + +ack(AckTags, State = #state { gm = GM, + backing_queue = BQ, + backing_queue_state = BQS, + ack_msg_id = AM }) -> + {MsgIds, BQS1} = BQ:ack(AckTags, BQS), + AM1 = lists:foldl(fun dict:erase/2, AM, AckTags), + case MsgIds of + [] -> ok; + _ -> ok = gm:broadcast(GM, {ack, MsgIds}) + end, + {MsgIds, State #state { backing_queue_state = BQS1, + ack_msg_id = AM1 }}. + +requeue(AckTags, MsgPropsFun, State = #state { gm = GM, + backing_queue = BQ, + backing_queue_state = BQS }) -> + {MsgIds, BQS1} = BQ:requeue(AckTags, MsgPropsFun, BQS), + ok = gm:broadcast(GM, {requeue, MsgPropsFun, MsgIds}), + {MsgIds, State #state { backing_queue_state = BQS1 }}. + +len(#state { backing_queue = BQ, backing_queue_state = BQS }) -> + BQ:len(BQS). + +is_empty(#state { backing_queue = BQ, backing_queue_state = BQS }) -> + BQ:is_empty(BQS). + +set_ram_duration_target(Target, State = #state { backing_queue = BQ, + backing_queue_state = BQS }) -> + State #state { backing_queue_state = + BQ:set_ram_duration_target(Target, BQS) }. + +ram_duration(State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> + {Result, BQS1} = BQ:ram_duration(BQS), + {Result, State #state { backing_queue_state = BQS1 }}. + +needs_timeout(#state { backing_queue = BQ, backing_queue_state = BQS }) -> + BQ:needs_timeout(BQS). + +timeout(State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> + State #state { backing_queue_state = BQ:timeout(BQS) }. + +handle_pre_hibernate(State = #state { backing_queue = BQ, + backing_queue_state = BQS }) -> + State #state { backing_queue_state = BQ:handle_pre_hibernate(BQS) }. + +status(#state { backing_queue = BQ, backing_queue_state = BQS }) -> + BQ:status(BQS). + +invoke(?MODULE, Fun, State) -> + Fun(?MODULE, State); +invoke(Mod, Fun, State = #state { backing_queue = BQ, + backing_queue_state = BQS }) -> + State #state { backing_queue_state = BQ:invoke(Mod, Fun, BQS) }. + +is_duplicate(Message = #basic_message { id = MsgId }, + State = #state { seen_status = SS, + backing_queue = BQ, + backing_queue_state = BQS, + confirmed = Confirmed }) -> + %% Here, we need to deal with the possibility that we're about to + %% receive a message that we've already seen when we were a slave + %% (we received it via gm). Thus if we do receive such message now + %% via the channel, there may be a confirm waiting to issue for + %% it. + + %% We will never see {published, ChPid, MsgSeqNo} here. + case dict:find(MsgId, SS) of + error -> + %% We permit the underlying BQ to have a peek at it, but + %% only if we ourselves are not filtering out the msg. + {Result, BQS1} = BQ:is_duplicate(Message, BQS), + {Result, State #state { backing_queue_state = BQS1 }}; + {ok, published} -> + %% It already got published when we were a slave and no + %% confirmation is waiting. amqqueue_process will have, in + %% its msg_id_to_channel mapping, the entry for dealing + %% with the confirm when that comes back in (it's added + %% immediately after calling is_duplicate). The msg is + %% invalid. We will not see this again, nor will we be + %% further involved in confirming this message, so erase. + {published, State #state { seen_status = dict:erase(MsgId, SS) }}; + {ok, confirmed} -> + %% It got published when we were a slave via gm, and + %% confirmed some time after that (maybe even after + %% promotion), but before we received the publish from the + %% channel, so couldn't previously know what the + %% msg_seq_no was (and thus confirm as a slave). So we + %% need to confirm now. As above, amqqueue_process will + %% have the entry for the msg_id_to_channel mapping added + %% immediately after calling is_duplicate/2. + {published, State #state { seen_status = dict:erase(MsgId, SS), + confirmed = [MsgId | Confirmed] }}; + {ok, discarded} -> + %% Don't erase from SS here because discard/2 is about to + %% be called and we need to be able to detect this case + {discarded, State} + end. + +discard(Msg = #basic_message { id = MsgId }, ChPid, + State = #state { gm = GM, + backing_queue = BQ, + backing_queue_state = BQS, + seen_status = SS }) -> + %% It's a massive error if we get told to discard something that's + %% already been published or published-and-confirmed. To do that + %% would require non FIFO access. Hence we should not find + %% 'published' or 'confirmed' in this dict:find. + case dict:find(MsgId, SS) of + error -> + ok = gm:broadcast(GM, {discard, ChPid, Msg}), + State #state { backing_queue_state = BQ:discard(Msg, ChPid, BQS), + seen_status = dict:erase(MsgId, SS) }; + {ok, discarded} -> + State + end. + +%% --------------------------------------------------------------------------- +%% Other exported functions +%% --------------------------------------------------------------------------- + +promote_backing_queue_state(CPid, BQ, BQS, GM, SeenStatus, KS) -> + #state { gm = GM, + coordinator = CPid, + backing_queue = BQ, + backing_queue_state = BQS, + set_delivered = BQ:len(BQS), + seen_status = SeenStatus, + confirmed = [], + ack_msg_id = dict:new(), + known_senders = sets:from_list(KS) }. + +sender_death_fun() -> + Self = self(), + fun (DeadPid) -> + rabbit_amqqueue:run_backing_queue( + Self, ?MODULE, + fun (?MODULE, State = #state { gm = GM, known_senders = KS }) -> + ok = gm:broadcast(GM, {sender_death, DeadPid}), + KS1 = sets:del_element(DeadPid, KS), + State #state { known_senders = KS1 } + end) + end. + +%% --------------------------------------------------------------------------- +%% Helpers +%% --------------------------------------------------------------------------- + +maybe_store_acktag(undefined, _MsgId, AM) -> + AM; +maybe_store_acktag(AckTag, MsgId, AM) -> + dict:store(AckTag, MsgId, AM). + +ensure_monitoring(ChPid, State = #state { coordinator = CPid, + known_senders = KS }) -> + case sets:is_element(ChPid, KS) of + true -> State; + false -> ok = rabbit_mirror_queue_coordinator:ensure_monitoring( + CPid, [ChPid]), + State #state { known_senders = sets:add_element(ChPid, KS) } + end. diff --git a/src/rabbit_mirror_queue_misc.erl b/src/rabbit_mirror_queue_misc.erl new file mode 100644 index 00000000..6a9f733e --- /dev/null +++ b/src/rabbit_mirror_queue_misc.erl @@ -0,0 +1,135 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is VMware, Inc. +%% Copyright (c) 2010-2011 VMware, Inc. All rights reserved. +%% + +-module(rabbit_mirror_queue_misc). + +-export([remove_from_queue/2, on_node_up/0, + drop_mirror/2, drop_mirror/3, add_mirror/2, add_mirror/3]). + +-include("rabbit.hrl"). + +%% If the dead pids include the queue pid (i.e. the master has died) +%% then only remove that if we are about to be promoted. Otherwise we +%% can have the situation where a slave updates the mnesia record for +%% a queue, promoting another slave before that slave realises it has +%% become the new master, which is bad because it could then mean the +%% slave (now master) receives messages it's not ready for (for +%% example, new consumers). +remove_from_queue(QueueName, DeadPids) -> + DeadNodes = [node(DeadPid) || DeadPid <- DeadPids], + rabbit_misc:execute_mnesia_transaction( + fun () -> + %% Someone else could have deleted the queue before we + %% get here. + case mnesia:read({rabbit_queue, QueueName}) of + [] -> {error, not_found}; + [Q = #amqqueue { pid = QPid, + slave_pids = SPids }] -> + [QPid1 | SPids1] = + [Pid || Pid <- [QPid | SPids], + not lists:member(node(Pid), DeadNodes)], + case {{QPid, SPids}, {QPid1, SPids1}} of + {Same, Same} -> + ok; + _ when QPid =:= QPid1 orelse node(QPid1) =:= node() -> + %% Either master hasn't changed, so + %% we're ok to update mnesia; or we have + %% become the master. + Q1 = Q #amqqueue { pid = QPid1, + slave_pids = SPids1 }, + ok = rabbit_amqqueue:store_queue(Q1); + _ -> + %% Master has changed, and we're not it, + %% so leave alone to allow the promoted + %% slave to find it and make its + %% promotion atomic. + ok + end, + {ok, QPid1} + end + end). + +on_node_up() -> + Qs = + rabbit_misc:execute_mnesia_transaction( + fun () -> + mnesia:foldl( + fun (#amqqueue { mirror_nodes = undefined }, QsN) -> + QsN; + (#amqqueue { name = QName, + mirror_nodes = all }, QsN) -> + [QName | QsN]; + (#amqqueue { name = QName, + mirror_nodes = MNodes }, QsN) -> + case lists:member(node(), MNodes) of + true -> [QName | QsN]; + false -> QsN + end + end, [], rabbit_queue) + end), + [add_mirror(Q, node()) || Q <- Qs], + ok. + +drop_mirror(VHostPath, QueueName, MirrorNode) -> + drop_mirror(rabbit_misc:r(VHostPath, queue, QueueName), MirrorNode). + +drop_mirror(Queue, MirrorNode) -> + if_mirrored_queue( + Queue, + fun (#amqqueue { name = Name, pid = QPid, slave_pids = SPids }) -> + case [Pid || Pid <- [QPid | SPids], node(Pid) =:= MirrorNode] of + [] -> + {error, {queue_not_mirrored_on_node, MirrorNode}}; + [QPid] when SPids =:= [] -> + {error, cannot_drop_only_mirror}; + [Pid] -> + rabbit_log:info( + "Dropping queue mirror on node ~p for ~s~n", + [MirrorNode, rabbit_misc:rs(Name)]), + exit(Pid, {shutdown, dropped}), + ok + end + end). + +add_mirror(VHostPath, QueueName, MirrorNode) -> + add_mirror(rabbit_misc:r(VHostPath, queue, QueueName), MirrorNode). + +add_mirror(Queue, MirrorNode) -> + if_mirrored_queue( + Queue, + fun (#amqqueue { name = Name, pid = QPid, slave_pids = SPids } = Q) -> + case [Pid || Pid <- [QPid | SPids], node(Pid) =:= MirrorNode] of + [] -> Result = rabbit_mirror_queue_slave_sup:start_child( + MirrorNode, [Q]), + rabbit_log:info( + "Adding mirror of queue ~s on node ~p: ~p~n", + [rabbit_misc:rs(Name), MirrorNode, Result]), + case Result of + {ok, _Pid} -> ok; + _ -> Result + end; + [_] -> {error, {queue_already_mirrored_on_node, MirrorNode}} + end + end). + +if_mirrored_queue(Queue, Fun) -> + rabbit_amqqueue:with( + Queue, fun (#amqqueue { arguments = Args } = Q) -> + case rabbit_misc:table_lookup(Args, <<"x-ha-policy">>) of + undefined -> ok; + _ -> Fun(Q) + end + end). diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl new file mode 100644 index 00000000..b38a8967 --- /dev/null +++ b/src/rabbit_mirror_queue_slave.erl @@ -0,0 +1,850 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is VMware, Inc. +%% Copyright (c) 2010-2011 VMware, Inc. All rights reserved. +%% + +-module(rabbit_mirror_queue_slave). + +%% For general documentation of HA design, see +%% rabbit_mirror_queue_coordinator +%% +%% We join the GM group before we add ourselves to the amqqueue +%% record. As a result: +%% 1. We can receive msgs from GM that correspond to messages we will +%% never receive from publishers. +%% 2. When we receive a message from publishers, we must receive a +%% message from the GM group for it. +%% 3. However, that instruction from the GM group can arrive either +%% before or after the actual message. We need to be able to +%% distinguish between GM instructions arriving early, and case (1) +%% above. +%% +%% All instructions from the GM group must be processed in the order +%% in which they're received. + +-export([start_link/1, set_maximum_since_use/2]). + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3, handle_pre_hibernate/1, prioritise_call/3, + prioritise_cast/2]). + +-export([joined/2, members_changed/3, handle_msg/3]). + +-behaviour(gen_server2). +-behaviour(gm). + +-include("rabbit.hrl"). +-include("gm_specs.hrl"). + +-define(SYNC_INTERVAL, 25). %% milliseconds +-define(RAM_DURATION_UPDATE_INTERVAL, 5000). +-define(DEATH_TIMEOUT, 20000). %% 20 seconds + +-record(state, { q, + gm, + master_pid, + backing_queue, + backing_queue_state, + sync_timer_ref, + rate_timer_ref, + + sender_queues, %% :: Pid -> {Q {Msg, Bool}, Set MsgId} + msg_id_ack, %% :: MsgId -> AckTag + ack_num, + + msg_id_status, + known_senders + }). + +start_link(Q) -> + gen_server2:start_link(?MODULE, [Q], []). + +set_maximum_since_use(QPid, Age) -> + gen_server2:cast(QPid, {set_maximum_since_use, Age}). + +init([#amqqueue { name = QueueName } = Q]) -> + process_flag(trap_exit, true), %% amqqueue_process traps exits too. + {ok, GM} = gm:start_link(QueueName, ?MODULE, [self()]), + receive {joined, GM} -> + ok + end, + Self = self(), + Node = node(), + {ok, MPid} = + rabbit_misc:execute_mnesia_transaction( + fun () -> + [Q1 = #amqqueue { pid = QPid, slave_pids = MPids }] = + mnesia:read({rabbit_queue, QueueName}), + %% ASSERTION + [] = [Pid || Pid <- [QPid | MPids], node(Pid) =:= Node], + MPids1 = MPids ++ [Self], + mnesia:write(rabbit_queue, + Q1 #amqqueue { slave_pids = MPids1 }, + write), + {ok, QPid} + end), + erlang:monitor(process, MPid), + ok = file_handle_cache:register_callback( + rabbit_amqqueue, set_maximum_since_use, [self()]), + ok = rabbit_memory_monitor:register( + self(), {rabbit_amqqueue, set_ram_duration_target, [self()]}), + {ok, BQ} = application:get_env(backing_queue_module), + BQS = bq_init(BQ, Q, false), + {ok, #state { q = Q, + gm = GM, + master_pid = MPid, + backing_queue = BQ, + backing_queue_state = BQS, + rate_timer_ref = undefined, + sync_timer_ref = undefined, + + sender_queues = dict:new(), + msg_id_ack = dict:new(), + ack_num = 0, + + msg_id_status = dict:new(), + known_senders = dict:new() + }, hibernate, + {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. + +handle_call({deliver_immediately, Delivery = #delivery {}}, From, State) -> + %% Synchronous, "immediate" delivery mode + + %% It is safe to reply 'false' here even if a) we've not seen the + %% msg via gm, or b) the master dies before we receive the msg via + %% gm. In the case of (a), we will eventually receive the msg via + %% gm, and it's only the master's result to the channel that is + %% important. In the case of (b), if the master does die and we do + %% get promoted then at that point we have no consumers, thus + %% 'false' is precisely the correct answer. However, we must be + %% careful to _not_ enqueue the message in this case. + + %% Note this is distinct from the case where we receive the msg + %% via gm first, then we're promoted to master, and only then do + %% we receive the msg from the channel. + gen_server2:reply(From, false), %% master may deliver it, not us + noreply(maybe_enqueue_message(Delivery, false, State)); + +handle_call({deliver, Delivery = #delivery {}}, From, State) -> + %% Synchronous, "mandatory" delivery mode + gen_server2:reply(From, true), %% amqqueue throws away the result anyway + noreply(maybe_enqueue_message(Delivery, true, State)); + +handle_call({gm_deaths, Deaths}, From, + State = #state { q = #amqqueue { name = QueueName }, + gm = GM, + master_pid = MPid }) -> + rabbit_log:info("Mirrored-queue (~s): Slave ~s saw deaths of mirrors ~s~n", + [rabbit_misc:rs(QueueName), + rabbit_misc:pid_to_string(self()), + [[rabbit_misc:pid_to_string(Pid), $ ] || Pid <- Deaths]]), + %% The GM has told us about deaths, which means we're not going to + %% receive any more messages from GM + case rabbit_mirror_queue_misc:remove_from_queue(QueueName, Deaths) of + {ok, Pid} when node(Pid) =:= node(MPid) -> + %% master hasn't changed + reply(ok, State); + {ok, Pid} when node(Pid) =:= node() -> + %% we've become master + promote_me(From, State); + {ok, Pid} -> + %% master has changed to not us. + gen_server2:reply(From, ok), + erlang:monitor(process, Pid), + ok = gm:broadcast(GM, heartbeat), + noreply(State #state { master_pid = Pid }); + {error, not_found} -> + gen_server2:reply(From, ok), + {stop, normal, State} + end. + +handle_cast({run_backing_queue, Mod, Fun}, State) -> + noreply(run_backing_queue(Mod, Fun, State)); + +handle_cast({gm, Instruction}, State) -> + handle_process_result(process_instruction(Instruction, State)); + +handle_cast({deliver, Delivery = #delivery {}}, State) -> + %% Asynchronous, non-"mandatory", non-"immediate" deliver mode. + noreply(maybe_enqueue_message(Delivery, true, State)); + +handle_cast({set_maximum_since_use, Age}, State) -> + ok = file_handle_cache:set_maximum_since_use(Age), + noreply(State); + +handle_cast({set_ram_duration_target, Duration}, + State = #state { backing_queue = BQ, + backing_queue_state = BQS }) -> + BQS1 = BQ:set_ram_duration_target(Duration, BQS), + noreply(State #state { backing_queue_state = BQS1 }); + +handle_cast(update_ram_duration, + State = #state { backing_queue = BQ, + backing_queue_state = BQS }) -> + {RamDuration, BQS1} = BQ:ram_duration(BQS), + DesiredDuration = + rabbit_memory_monitor:report_ram_duration(self(), RamDuration), + BQS2 = BQ:set_ram_duration_target(DesiredDuration, BQS1), + noreply(State #state { rate_timer_ref = just_measured, + backing_queue_state = BQS2 }); + +handle_cast(sync_timeout, State) -> + noreply(backing_queue_timeout( + State #state { sync_timer_ref = undefined })). + +handle_info(timeout, State) -> + noreply(backing_queue_timeout(State)); + +handle_info({'DOWN', _MonitorRef, process, MPid, _Reason}, + State = #state { gm = GM, master_pid = MPid }) -> + ok = gm:broadcast(GM, {process_death, MPid}), + noreply(State); + +handle_info({'DOWN', _MonitorRef, process, ChPid, _Reason}, State) -> + noreply(local_sender_death(ChPid, State)); + +handle_info({'EXIT', _Pid, Reason}, State) -> + {stop, Reason, State}; + +handle_info(Msg, State) -> + {stop, {unexpected_info, Msg}, State}. + +%% If the Reason is shutdown, or {shutdown, _}, it is not the queue +%% being deleted: it's just the node going down. Even though we're a +%% slave, we have no idea whether or not we'll be the only copy coming +%% back up. Thus we must assume we will be, and preserve anything we +%% have on disk. +terminate(_Reason, #state { backing_queue_state = undefined }) -> + %% We've received a delete_and_terminate from gm, thus nothing to + %% do here. + ok; +terminate({shutdown, dropped} = R, #state { backing_queue = BQ, + backing_queue_state = BQS }) -> + %% See rabbit_mirror_queue_master:terminate/2 + BQ:delete_and_terminate(R, BQS); +terminate(Reason, #state { q = Q, + gm = GM, + backing_queue = BQ, + backing_queue_state = BQS, + rate_timer_ref = RateTRef }) -> + ok = gm:leave(GM), + QueueState = rabbit_amqqueue_process:init_with_backing_queue_state( + Q, BQ, BQS, RateTRef, [], [], dict:new()), + rabbit_amqqueue_process:terminate(Reason, QueueState); +terminate([_SPid], _Reason) -> + %% gm case + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +handle_pre_hibernate(State = #state { backing_queue = BQ, + backing_queue_state = BQS }) -> + {RamDuration, BQS1} = BQ:ram_duration(BQS), + DesiredDuration = + rabbit_memory_monitor:report_ram_duration(self(), RamDuration), + BQS2 = BQ:set_ram_duration_target(DesiredDuration, BQS1), + BQS3 = BQ:handle_pre_hibernate(BQS2), + {hibernate, stop_rate_timer(State #state { backing_queue_state = BQS3 })}. + +prioritise_call(Msg, _From, _State) -> + case Msg of + {gm_deaths, _Deaths} -> 5; + _ -> 0 + end. + +prioritise_cast(Msg, _State) -> + case Msg of + update_ram_duration -> 8; + {set_ram_duration_target, _Duration} -> 8; + {set_maximum_since_use, _Age} -> 8; + {run_backing_queue, _Mod, _Fun} -> 6; + sync_timeout -> 6; + {gm, _Msg} -> 5; + {post_commit, _Txn, _AckTags} -> 4; + _ -> 0 + end. + +%% --------------------------------------------------------------------------- +%% GM +%% --------------------------------------------------------------------------- + +joined([SPid], _Members) -> + SPid ! {joined, self()}, + ok. + +members_changed([_SPid], _Births, []) -> + ok; +members_changed([SPid], _Births, Deaths) -> + inform_deaths(SPid, Deaths). + +handle_msg([_SPid], _From, heartbeat) -> + ok; +handle_msg([_SPid], _From, {ensure_monitoring, _Pid}) -> + %% This is only of value to the master + ok; +handle_msg([SPid], _From, {process_death, Pid}) -> + inform_deaths(SPid, [Pid]); +handle_msg([SPid], _From, Msg) -> + ok = gen_server2:cast(SPid, {gm, Msg}). + +inform_deaths(SPid, Deaths) -> + rabbit_misc:with_exit_handler( + fun () -> {stop, normal} end, + fun () -> + case gen_server2:call(SPid, {gm_deaths, Deaths}, infinity) of + ok -> + ok; + {promote, CPid} -> + {become, rabbit_mirror_queue_coordinator, [CPid]} + end + end). + +%% --------------------------------------------------------------------------- +%% Others +%% --------------------------------------------------------------------------- + +bq_init(BQ, Q, Recover) -> + Self = self(), + BQ:init(Q, Recover, + fun (Mod, Fun) -> + rabbit_amqqueue:run_backing_queue(Self, Mod, Fun) + end). + +run_backing_queue(rabbit_mirror_queue_master, Fun, State) -> + %% Yes, this might look a little crazy, but see comments in + %% confirm_sender_death/1 + Fun(?MODULE, State); +run_backing_queue(Mod, Fun, State = #state { backing_queue = BQ, + backing_queue_state = BQS }) -> + State #state { backing_queue_state = BQ:invoke(Mod, Fun, BQS) }. + +needs_confirming(#delivery{ msg_seq_no = undefined }, _State) -> + never; +needs_confirming(#delivery { message = #basic_message { + is_persistent = true } }, + #state { q = #amqqueue { durable = true } }) -> + eventually; +needs_confirming(_Delivery, _State) -> + immediately. + +confirm_messages(MsgIds, State = #state { msg_id_status = MS }) -> + {MS1, CMs} = + lists:foldl( + fun (MsgId, {MSN, CMsN} = Acc) -> + %% We will never see 'discarded' here + case dict:find(MsgId, MSN) of + error -> + %% If it needed confirming, it'll have + %% already been done. + Acc; + {ok, {published, ChPid}} -> + %% Still not seen it from the channel, just + %% record that it's been confirmed. + {dict:store(MsgId, {confirmed, ChPid}, MSN), CMsN}; + {ok, {published, ChPid, MsgSeqNo}} -> + %% Seen from both GM and Channel. Can now + %% confirm. + {dict:erase(MsgId, MSN), + gb_trees_cons(ChPid, MsgSeqNo, CMsN)}; + {ok, {confirmed, _ChPid}} -> + %% It's already been confirmed. This is + %% probably it's been both sync'd to disk + %% and then delivered and ack'd before we've + %% seen the publish from the + %% channel. Nothing to do here. + Acc + end + end, {MS, gb_trees:empty()}, MsgIds), + [ok = rabbit_channel:confirm(ChPid, MsgSeqNos) + || {ChPid, MsgSeqNos} <- gb_trees:to_list(CMs)], + State #state { msg_id_status = MS1 }. + +gb_trees_cons(Key, Value, Tree) -> + case gb_trees:lookup(Key, Tree) of + {value, Values} -> gb_trees:update(Key, [Value | Values], Tree); + none -> gb_trees:insert(Key, [Value], Tree) + end. + +handle_process_result({ok, State}) -> noreply(State); +handle_process_result({stop, State}) -> {stop, normal, State}. + +promote_me(From, #state { q = Q, + gm = GM, + backing_queue = BQ, + backing_queue_state = BQS, + rate_timer_ref = RateTRef, + sender_queues = SQ, + msg_id_ack = MA, + msg_id_status = MS, + known_senders = KS }) -> + rabbit_log:info("Mirrored-queue (~s): Promoting slave ~s to master~n", + [rabbit_misc:rs(Q #amqqueue.name), + rabbit_misc:pid_to_string(self())]), + Q1 = Q #amqqueue { pid = self() }, + {ok, CPid} = rabbit_mirror_queue_coordinator:start_link( + Q1, GM, rabbit_mirror_queue_master:sender_death_fun()), + true = unlink(GM), + gen_server2:reply(From, {promote, CPid}), + ok = gm:confirmed_broadcast(GM, heartbeat), + + %% Everything that we're monitoring, we need to ensure our new + %% coordinator is monitoring. + + MonitoringPids = [begin true = erlang:demonitor(MRef), + Pid + end || {Pid, MRef} <- dict:to_list(KS)], + ok = rabbit_mirror_queue_coordinator:ensure_monitoring( + CPid, MonitoringPids), + + %% We find all the messages that we've received from channels but + %% not from gm, and if they're due to be enqueued on promotion + %% then we pass them to the + %% queue_process:init_with_backing_queue_state to be enqueued. + %% + %% We also have to requeue messages which are pending acks: the + %% consumers from the master queue have been lost and so these + %% messages need requeuing. They might also be pending + %% confirmation, and indeed they might also be pending arrival of + %% the publication from the channel itself, if we received both + %% the publication and the fetch via gm first! Requeuing doesn't + %% affect confirmations: if the message was previously pending a + %% confirmation then it still will be, under the same msg_id. So + %% as a master, we need to be prepared to filter out the + %% publication of said messages from the channel (is_duplicate + %% (thus such requeued messages must remain in the msg_id_status + %% (MS) which becomes seen_status (SS) in the master)). + %% + %% Then there are messages we already have in the queue, which are + %% not currently pending acknowledgement: + %% 1. Messages we've only received via gm: + %% Filter out subsequent publication from channel through + %% validate_message. Might have to issue confirms then or + %% later, thus queue_process state will have to know that + %% there's a pending confirm. + %% 2. Messages received via both gm and channel: + %% Queue will have to deal with issuing confirms if necessary. + %% + %% MS contains the following three entry types: + %% + %% a) {published, ChPid}: + %% published via gm only; pending arrival of publication from + %% channel, maybe pending confirm. + %% + %% b) {published, ChPid, MsgSeqNo}: + %% published via gm and channel; pending confirm. + %% + %% c) {confirmed, ChPid}: + %% published via gm only, and confirmed; pending publication + %% from channel. + %% + %% d) discarded + %% seen via gm only as discarded. Pending publication from + %% channel + %% + %% The forms a, c and d only, need to go to the master state + %% seen_status (SS). + %% + %% The form b only, needs to go through to the queue_process + %% state to form the msg_id_to_channel mapping (MTC). + %% + %% No messages that are enqueued from SQ at this point will have + %% entries in MS. + %% + %% Messages that are extracted from MA may have entries in MS, and + %% those messages are then requeued. However, as discussed above, + %% this does not affect MS, nor which bits go through to SS in + %% Master, or MTC in queue_process. + %% + %% Everything that's in MA gets requeued. Consequently the new + %% master should start with a fresh AM as there are no messages + %% pending acks. + + MSList = dict:to_list(MS), + SS = dict:from_list( + [E || E = {_MsgId, discarded} <- MSList] ++ + [{MsgId, Status} + || {MsgId, {Status, _ChPid}} <- MSList, + Status =:= published orelse Status =:= confirmed]), + + MasterState = rabbit_mirror_queue_master:promote_backing_queue_state( + CPid, BQ, BQS, GM, SS, MonitoringPids), + + MTC = dict:from_list( + [{MsgId, {ChPid, MsgSeqNo}} || + {MsgId, {published, ChPid, MsgSeqNo}} <- dict:to_list(MS)]), + NumAckTags = [NumAckTag || {_MsgId, NumAckTag} <- dict:to_list(MA)], + AckTags = [AckTag || {_Num, AckTag} <- lists:sort(NumAckTags)], + Deliveries = [Delivery || {_ChPid, {PubQ, _PendCh}} <- dict:to_list(SQ), + {Delivery, true} <- queue:to_list(PubQ)], + QueueState = rabbit_amqqueue_process:init_with_backing_queue_state( + Q1, rabbit_mirror_queue_master, MasterState, RateTRef, + AckTags, Deliveries, MTC), + {become, rabbit_amqqueue_process, QueueState, hibernate}. + +noreply(State) -> + {NewState, Timeout} = next_state(State), + {noreply, NewState, Timeout}. + +reply(Reply, State) -> + {NewState, Timeout} = next_state(State), + {reply, Reply, NewState, Timeout}. + +next_state(State = #state{backing_queue = BQ, backing_queue_state = BQS}) -> + {MsgIds, BQS1} = BQ:drain_confirmed(BQS), + State1 = ensure_rate_timer( + confirm_messages(MsgIds, State #state { + backing_queue_state = BQS1 })), + case BQ:needs_timeout(BQS1) of + false -> {stop_sync_timer(State1), hibernate}; + idle -> {stop_sync_timer(State1), 0 }; + timed -> {ensure_sync_timer(State1), 0 } + end. + +backing_queue_timeout(State = #state { backing_queue = BQ }) -> + run_backing_queue(BQ, fun (M, BQS) -> M:timeout(BQS) end, State). + +ensure_sync_timer(State = #state { sync_timer_ref = undefined }) -> + {ok, TRef} = timer:apply_after( + ?SYNC_INTERVAL, rabbit_amqqueue, sync_timeout, [self()]), + State #state { sync_timer_ref = TRef }; +ensure_sync_timer(State) -> + State. + +stop_sync_timer(State = #state { sync_timer_ref = undefined }) -> + State; +stop_sync_timer(State = #state { sync_timer_ref = TRef }) -> + {ok, cancel} = timer:cancel(TRef), + State #state { sync_timer_ref = undefined }. + +ensure_rate_timer(State = #state { rate_timer_ref = undefined }) -> + {ok, TRef} = timer:apply_after( + ?RAM_DURATION_UPDATE_INTERVAL, + rabbit_amqqueue, update_ram_duration, + [self()]), + State #state { rate_timer_ref = TRef }; +ensure_rate_timer(State = #state { rate_timer_ref = just_measured }) -> + State #state { rate_timer_ref = undefined }; +ensure_rate_timer(State) -> + State. + +stop_rate_timer(State = #state { rate_timer_ref = undefined }) -> + State; +stop_rate_timer(State = #state { rate_timer_ref = just_measured }) -> + State #state { rate_timer_ref = undefined }; +stop_rate_timer(State = #state { rate_timer_ref = TRef }) -> + {ok, cancel} = timer:cancel(TRef), + State #state { rate_timer_ref = undefined }. + +ensure_monitoring(ChPid, State = #state { known_senders = KS }) -> + case dict:is_key(ChPid, KS) of + true -> State; + false -> MRef = erlang:monitor(process, ChPid), + State #state { known_senders = dict:store(ChPid, MRef, KS) } + end. + +local_sender_death(ChPid, State = #state { known_senders = KS }) -> + ok = case dict:is_key(ChPid, KS) of + false -> ok; + true -> confirm_sender_death(ChPid) + end, + State. + +confirm_sender_death(Pid) -> + %% We have to deal with the possibility that we'll be promoted to + %% master before this thing gets run. Consequently we set the + %% module to rabbit_mirror_queue_master so that if we do become a + %% rabbit_amqqueue_process before then, sane things will happen. + Fun = + fun (?MODULE, State = #state { known_senders = KS, + gm = GM }) -> + %% We're running still as a slave + ok = case dict:is_key(Pid, KS) of + false -> ok; + true -> gm:broadcast(GM, {ensure_monitoring, [Pid]}), + confirm_sender_death(Pid) + end, + State; + (rabbit_mirror_queue_master, State) -> + %% We've become a master. State is now opaque to + %% us. When we became master, if Pid was still known + %% to us then we'd have set up monitoring of it then, + %% so this is now a noop. + State + end, + %% Note that we do not remove our knowledge of this ChPid until we + %% get the sender_death from GM. + {ok, _TRef} = timer:apply_after( + ?DEATH_TIMEOUT, rabbit_amqqueue, run_backing_queue, + [self(), rabbit_mirror_queue_master, Fun]), + ok. + +maybe_enqueue_message( + Delivery = #delivery { message = #basic_message { id = MsgId }, + msg_seq_no = MsgSeqNo, + sender = ChPid }, + EnqueueOnPromotion, + State = #state { sender_queues = SQ, msg_id_status = MS }) -> + State1 = ensure_monitoring(ChPid, State), + %% We will never see {published, ChPid, MsgSeqNo} here. + case dict:find(MsgId, MS) of + error -> + {MQ, PendingCh} = get_sender_queue(ChPid, SQ), + MQ1 = queue:in({Delivery, EnqueueOnPromotion}, MQ), + SQ1 = dict:store(ChPid, {MQ1, PendingCh}, SQ), + State1 #state { sender_queues = SQ1 }; + {ok, {confirmed, ChPid}} -> + %% BQ has confirmed it but we didn't know what the + %% msg_seq_no was at the time. We do now! + ok = rabbit_channel:confirm(ChPid, [MsgSeqNo]), + SQ1 = remove_from_pending_ch(MsgId, ChPid, SQ), + State1 #state { sender_queues = SQ1, + msg_id_status = dict:erase(MsgId, MS) }; + {ok, {published, ChPid}} -> + %% It was published to the BQ and we didn't know the + %% msg_seq_no so couldn't confirm it at the time. + case needs_confirming(Delivery, State1) of + never -> + SQ1 = remove_from_pending_ch(MsgId, ChPid, SQ), + State1 #state { msg_id_status = dict:erase(MsgId, MS), + sender_queues = SQ1 }; + eventually -> + State1 #state { + msg_id_status = + dict:store(MsgId, {published, ChPid, MsgSeqNo}, MS) }; + immediately -> + ok = rabbit_channel:confirm(ChPid, [MsgSeqNo]), + SQ1 = remove_from_pending_ch(MsgId, ChPid, SQ), + State1 #state { msg_id_status = dict:erase(MsgId, MS), + sender_queues = SQ1 } + end; + {ok, discarded} -> + %% We've already heard from GM that the msg is to be + %% discarded. We won't see this again. + SQ1 = remove_from_pending_ch(MsgId, ChPid, SQ), + State1 #state { msg_id_status = dict:erase(MsgId, MS), + sender_queues = SQ1 } + end. + +get_sender_queue(ChPid, SQ) -> + case dict:find(ChPid, SQ) of + error -> {queue:new(), sets:new()}; + {ok, Val} -> Val + end. + +remove_from_pending_ch(MsgId, ChPid, SQ) -> + case dict:find(ChPid, SQ) of + error -> + SQ; + {ok, {MQ, PendingCh}} -> + dict:store(ChPid, {MQ, sets:del_element(MsgId, PendingCh)}, SQ) + end. + +process_instruction( + {publish, Deliver, ChPid, MsgProps, Msg = #basic_message { id = MsgId }}, + State = #state { sender_queues = SQ, + backing_queue = BQ, + backing_queue_state = BQS, + msg_id_status = MS }) -> + + %% We really are going to do the publish right now, even though we + %% may not have seen it directly from the channel. As a result, we + %% may know that it needs confirming without knowing its + %% msg_seq_no, which means that we can see the confirmation come + %% back from the backing queue without knowing the msg_seq_no, + %% which means that we're going to have to hang on to the fact + %% that we've seen the msg_id confirmed until we can associate it + %% with a msg_seq_no. + State1 = ensure_monitoring(ChPid, State), + {MQ, PendingCh} = get_sender_queue(ChPid, SQ), + {MQ1, PendingCh1, MS1} = + case queue:out(MQ) of + {empty, _MQ2} -> + {MQ, sets:add_element(MsgId, PendingCh), + dict:store(MsgId, {published, ChPid}, MS)}; + {{value, {Delivery = #delivery { + msg_seq_no = MsgSeqNo, + message = #basic_message { id = MsgId } }, + _EnqueueOnPromotion}}, MQ2} -> + %% We received the msg from the channel first. Thus we + %% need to deal with confirms here. + case needs_confirming(Delivery, State1) of + never -> + {MQ2, PendingCh, MS}; + eventually -> + {MQ2, sets:add_element(MsgId, PendingCh), + dict:store(MsgId, {published, ChPid, MsgSeqNo}, MS)}; + immediately -> + ok = rabbit_channel:confirm(ChPid, [MsgSeqNo]), + {MQ2, PendingCh, MS} + end; + {{value, {#delivery {}, _EnqueueOnPromotion}}, _MQ2} -> + %% The instruction was sent to us before we were + %% within the slave_pids within the #amqqueue{} + %% record. We'll never receive the message directly + %% from the channel. And the channel will not be + %% expecting any confirms from us. + {MQ, PendingCh, MS} + end, + + SQ1 = dict:store(ChPid, {MQ1, PendingCh1}, SQ), + State2 = State1 #state { sender_queues = SQ1, msg_id_status = MS1 }, + + {ok, + case Deliver of + false -> + BQS1 = BQ:publish(Msg, MsgProps, ChPid, BQS), + State2 #state { backing_queue_state = BQS1 }; + {true, AckRequired} -> + {AckTag, BQS1} = BQ:publish_delivered(AckRequired, Msg, MsgProps, + ChPid, BQS), + maybe_store_ack(AckRequired, MsgId, AckTag, + State2 #state { backing_queue_state = BQS1 }) + end}; +process_instruction({discard, ChPid, Msg = #basic_message { id = MsgId }}, + State = #state { sender_queues = SQ, + backing_queue = BQ, + backing_queue_state = BQS, + msg_id_status = MS }) -> + %% Many of the comments around the publish head above apply here + %% too. + State1 = ensure_monitoring(ChPid, State), + {MQ, PendingCh} = get_sender_queue(ChPid, SQ), + {MQ1, PendingCh1, MS1} = + case queue:out(MQ) of + {empty, _MQ} -> + {MQ, sets:add_element(MsgId, PendingCh), + dict:store(MsgId, discarded, MS)}; + {{value, {#delivery { message = #basic_message { id = MsgId } }, + _EnqueueOnPromotion}}, MQ2} -> + %% We've already seen it from the channel, we're not + %% going to see this again, so don't add it to MS + {MQ2, PendingCh, MS}; + {{value, {#delivery {}, _EnqueueOnPromotion}}, _MQ2} -> + %% The instruction was sent to us before we were + %% within the slave_pids within the #amqqueue{} + %% record. We'll never receive the message directly + %% from the channel. + {MQ, PendingCh, MS} + end, + SQ1 = dict:store(ChPid, {MQ1, PendingCh1}, SQ), + BQS1 = BQ:discard(Msg, ChPid, BQS), + {ok, State1 #state { sender_queues = SQ1, + msg_id_status = MS1, + backing_queue_state = BQS1 }}; +process_instruction({set_length, Length}, + State = #state { backing_queue = BQ, + backing_queue_state = BQS }) -> + QLen = BQ:len(BQS), + ToDrop = QLen - Length, + {ok, case ToDrop > 0 of + true -> BQS1 = + lists:foldl( + fun (const, BQSN) -> + {{_Msg, _IsDelivered, _AckTag, _Remaining}, + BQSN1} = BQ:fetch(false, BQSN), + BQSN1 + end, BQS, lists:duplicate(ToDrop, const)), + State #state { backing_queue_state = BQS1 }; + false -> State + end}; +process_instruction({fetch, AckRequired, MsgId, Remaining}, + State = #state { backing_queue = BQ, + backing_queue_state = BQS }) -> + QLen = BQ:len(BQS), + {ok, case QLen - 1 of + Remaining -> + {{#basic_message{id = MsgId}, _IsDelivered, + AckTag, Remaining}, BQS1} = BQ:fetch(AckRequired, BQS), + maybe_store_ack(AckRequired, MsgId, AckTag, + State #state { backing_queue_state = BQS1 }); + Other when Other < Remaining -> + %% we must be shorter than the master + State + end}; +process_instruction({ack, MsgIds}, + State = #state { backing_queue = BQ, + backing_queue_state = BQS, + msg_id_ack = MA }) -> + {AckTags, MA1} = msg_ids_to_acktags(MsgIds, MA), + {MsgIds1, BQS1} = BQ:ack(AckTags, BQS), + [] = MsgIds1 -- MsgIds, %% ASSERTION + {ok, State #state { msg_id_ack = MA1, + backing_queue_state = BQS1 }}; +process_instruction({requeue, MsgPropsFun, MsgIds}, + State = #state { backing_queue = BQ, + backing_queue_state = BQS, + msg_id_ack = MA }) -> + {AckTags, MA1} = msg_ids_to_acktags(MsgIds, MA), + {ok, case length(AckTags) =:= length(MsgIds) of + true -> + {MsgIds, BQS1} = BQ:requeue(AckTags, MsgPropsFun, BQS), + State #state { msg_id_ack = MA1, + backing_queue_state = BQS1 }; + false -> + %% The only thing we can safely do is nuke out our BQ + %% and MA. The interaction between this and confirms + %% doesn't really bear thinking about... + {_Count, BQS1} = BQ:purge(BQS), + {_MsgIds, BQS2} = ack_all(BQ, MA, BQS1), + State #state { msg_id_ack = dict:new(), + backing_queue_state = BQS2 } + end}; +process_instruction({sender_death, ChPid}, + State = #state { sender_queues = SQ, + msg_id_status = MS, + known_senders = KS }) -> + {ok, case dict:find(ChPid, KS) of + error -> + State; + {ok, MRef} -> + true = erlang:demonitor(MRef), + MS1 = case dict:find(ChPid, SQ) of + error -> + MS; + {ok, {_MQ, PendingCh}} -> + lists:foldl(fun dict:erase/2, MS, + sets:to_list(PendingCh)) + end, + State #state { sender_queues = dict:erase(ChPid, SQ), + msg_id_status = MS1, + known_senders = dict:erase(ChPid, KS) } + end}; +process_instruction({delete_and_terminate, Reason}, + State = #state { backing_queue = BQ, + backing_queue_state = BQS }) -> + BQ:delete_and_terminate(Reason, BQS), + {stop, State #state { backing_queue_state = undefined }}. + +msg_ids_to_acktags(MsgIds, MA) -> + {AckTags, MA1} = + lists:foldl( + fun (MsgId, {Acc, MAN}) -> + case dict:find(MsgId, MA) of + error -> {Acc, MAN}; + {ok, {_Num, AckTag}} -> {[AckTag | Acc], + dict:erase(MsgId, MAN)} + end + end, {[], MA}, MsgIds), + {lists:reverse(AckTags), MA1}. + +ack_all(BQ, MA, BQS) -> + BQ:ack([AckTag || {_MsgId, {_Num, AckTag}} <- dict:to_list(MA)], BQS). + +maybe_store_ack(false, _MsgId, _AckTag, State) -> + State; +maybe_store_ack(true, MsgId, AckTag, State = #state { msg_id_ack = MA, + ack_num = Num }) -> + State #state { msg_id_ack = dict:store(MsgId, {Num, AckTag}, MA), + ack_num = Num + 1 }. diff --git a/src/rabbit_mirror_queue_slave_sup.erl b/src/rabbit_mirror_queue_slave_sup.erl new file mode 100644 index 00000000..fc04ec79 --- /dev/null +++ b/src/rabbit_mirror_queue_slave_sup.erl @@ -0,0 +1,48 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is VMware, Inc. +%% Copyright (c) 2010-2011 VMware, Inc. All rights reserved. +%% + +-module(rabbit_mirror_queue_slave_sup). + +-behaviour(supervisor2). + +-export([start/0, start_link/0, start_child/2]). + +-export([init/1]). + +-include_lib("rabbit.hrl"). + +-define(SERVER, ?MODULE). + +start() -> + {ok, _} = + supervisor2:start_child( + rabbit_sup, + {rabbit_mirror_queue_slave_sup, + {rabbit_mirror_queue_slave_sup, start_link, []}, + transient, infinity, supervisor, [rabbit_mirror_queue_slave_sup]}), + ok. + +start_link() -> + supervisor2:start_link({local, ?SERVER}, ?MODULE, []). + +start_child(Node, Args) -> + supervisor2:start_child({?SERVER, Node}, Args). + +init([]) -> + {ok, {{simple_one_for_one_terminate, 10, 10}, + [{rabbit_mirror_queue_slave, + {rabbit_mirror_queue_slave, start_link, []}, + temporary, ?MAX_WAIT, worker, [rabbit_mirror_queue_slave]}]}}. diff --git a/src/rabbit_misc.erl b/src/rabbit_misc.erl index 7d916797..3bbfb1d7 100644 --- a/src/rabbit_misc.erl +++ b/src/rabbit_misc.erl @@ -25,7 +25,7 @@ protocol_error/3, protocol_error/4, protocol_error/1]). -export([not_found/1, assert_args_equivalence/4]). -export([dirty_read/1]). --export([table_lookup/2]). +-export([table_lookup/2, set_table_value/4]). -export([r/3, r/2, r_arg/4, rs/1]). -export([enable_cover/0, report_cover/0]). -export([enable_cover/1, report_cover/1]). @@ -38,9 +38,9 @@ -export([ensure_ok/2]). -export([makenode/1, nodeparts/1, cookie_hash/0, tcp_name/3]). -export([upmap/2, map_in_order/2]). --export([table_fold/3]). +-export([table_filter/3]). -export([dirty_read_all/1, dirty_foreach_key/2, dirty_dump_log/1]). --export([read_term_file/1, write_term_file/2]). +-export([read_term_file/1, write_term_file/2, write_file/2, write_file/3]). -export([append_file/2, ensure_parent_dirs_exist/1]). -export([format_stderr/2]). -export([start_applications/1, stop_applications/1]). @@ -48,24 +48,25 @@ -export([sort_field_table/1]). -export([pid_to_string/1, string_to_pid/1]). -export([version_compare/2, version_compare/3]). --export([recursive_delete/1, recursive_copy/2, dict_cons/3, orddict_cons/3, - unlink_and_capture_exit/1]). +-export([recursive_delete/1, recursive_copy/2, dict_cons/3, orddict_cons/3]). -export([get_options/2]). -export([all_module_attributes/1, build_acyclic_graph/3]). -export([now_ms/0]). -export([lock_file/1]). --export([const_ok/1, const/1]). +-export([const_ok/0, const/1]). -export([ntoa/1, ntoab/1]). +-export([is_process_alive/1]). +-export([pget/2, pget/3, pget_or_die/2]). +-export([format_message_queue/2]). %%---------------------------------------------------------------------------- -ifdef(use_specs). --export_type([resource_name/0, thunk/1, const/1]). +-export_type([resource_name/0, thunk/1]). -type(ok_or_error() :: rabbit_types:ok_or_error(any())). -type(thunk(T) :: fun(() -> T)). --type(const(T) :: fun((any()) -> T)). -type(resource_name() :: binary()). -type(optdef() :: {flag, string()} | {option, string(), any()}). -type(channel_or_connection_exit() @@ -104,7 +105,12 @@ ({atom(), any()}) -> rabbit_types:ok_or_error2(any(), 'not_found')). -spec(table_lookup/2 :: (rabbit_framing:amqp_table(), binary()) - -> 'undefined' | {rabbit_framing:amqp_field_type(), any()}). + -> 'undefined' | {rabbit_framing:amqp_field_type(), any()}). +-spec(set_table_value/4 :: + (rabbit_framing:amqp_table(), binary(), + rabbit_framing:amqp_field_type(), rabbit_framing:amqp_value()) + -> rabbit_framing:amqp_table()). + -spec(r/2 :: (rabbit_types:vhost(), K) -> rabbit_types:r3(rabbit_types:vhost(), K, '_') when is_subtype(K, atom())). @@ -145,7 +151,8 @@ -> atom()). -spec(upmap/2 :: (fun ((A) -> B), [A]) -> [B]). -spec(map_in_order/2 :: (fun ((A) -> B), [A]) -> [B]). --spec(table_fold/3 :: (fun ((any(), A) -> A), A, atom()) -> A). +-spec(table_filter/3:: (fun ((A) -> boolean()), fun ((A, boolean()) -> 'ok'), + atom()) -> [A]). -spec(dirty_read_all/1 :: (atom()) -> [any()]). -spec(dirty_foreach_key/2 :: (fun ((any()) -> any()), atom()) -> 'ok' | 'aborted'). @@ -153,6 +160,8 @@ -spec(read_term_file/1 :: (file:filename()) -> {'ok', [any()]} | rabbit_types:error(any())). -spec(write_term_file/2 :: (file:filename(), [any()]) -> ok_or_error()). +-spec(write_file/2 :: (file:filename(), iodata()) -> ok_or_error()). +-spec(write_file/3 :: (file:filename(), iodata(), [any()]) -> ok_or_error()). -spec(append_file/2 :: (file:filename(), string()) -> ok_or_error()). -spec(ensure_parent_dirs_exist/1 :: (string()) -> 'ok'). -spec(format_stderr/2 :: (string(), [any()]) -> 'ok'). @@ -177,7 +186,6 @@ -> rabbit_types:ok_or_error({file:filename(), file:filename(), any()})). -spec(dict_cons/3 :: (any(), any(), dict()) -> dict()). -spec(orddict_cons/3 :: (any(), any(), orddict:orddict()) -> orddict:orddict()). --spec(unlink_and_capture_exit/1 :: (pid()) -> 'ok'). -spec(get_options/2 :: ([optdef()], [string()]) -> {[string()], [{string(), any()}]}). -spec(all_module_attributes/1 :: (atom()) -> [{atom(), [term()]}]). @@ -190,10 +198,15 @@ digraph:vertex(), digraph:vertex()})). -spec(now_ms/0 :: () -> non_neg_integer()). -spec(lock_file/1 :: (file:filename()) -> rabbit_types:ok_or_error('eexist')). --spec(const_ok/1 :: (any()) -> 'ok'). --spec(const/1 :: (A) -> const(A)). +-spec(const_ok/0 :: () -> 'ok'). +-spec(const/1 :: (A) -> thunk(A)). -spec(ntoa/1 :: (inet:ip_address()) -> string()). -spec(ntoab/1 :: (inet:ip_address()) -> string()). +-spec(is_process_alive/1 :: (pid()) -> boolean()). +-spec(pget/2 :: (term(), [term()]) -> term()). +-spec(pget/3 :: (term(), [term()], term()) -> term()). +-spec(pget_or_die/2 :: (term(), [term()]) -> term() | no_return()). +-spec(format_message_queue/2 :: (any(), priority_queue:q()) -> term()). -endif. @@ -266,6 +279,10 @@ table_lookup(Table, Key) -> false -> undefined end. +set_table_value(Table, Key, Type, Value) -> + sort_field_table( + lists:keystore(Key, 1, Table, {Key, Type, Value})). + r(#resource{virtual_host = VHostPath}, Kind, Name) when is_binary(Name) -> #resource{virtual_host = VHostPath, kind = Kind, name = Name}; @@ -350,8 +367,11 @@ throw_on_error(E, Thunk) -> with_exit_handler(Handler, Thunk) -> try Thunk() - catch exit:{R, _} when R =:= noproc; R =:= nodedown; - R =:= normal; R =:= shutdown -> + catch + exit:{R, _} when R =:= noproc; R =:= nodedown; + R =:= normal; R =:= shutdown -> + Handler(); + exit:{{R, _}, _} when R =:= nodedown; R =:= shutdown -> Handler() end. @@ -400,17 +420,12 @@ execute_mnesia_transaction(TxFun, PrePostCommitFun) -> end), false). %% Like execute_mnesia_transaction/2, but TxFun is expected to return a -%% TailFun which gets called immediately before and after the tx commit +%% TailFun which gets called (only) immediately after the tx commit execute_mnesia_tx_with_tail(TxFun) -> case mnesia:is_transaction() of true -> execute_mnesia_transaction(TxFun); - false -> TailFun = execute_mnesia_transaction( - fun () -> - TailFun1 = TxFun(), - TailFun1(true), - TailFun1 - end), - TailFun(false) + false -> TailFun = execute_mnesia_transaction(TxFun), + TailFun() end. ensure_ok(ok, _) -> ok; @@ -456,20 +471,23 @@ map_in_order(F, L) -> lists:reverse( lists:foldl(fun (E, Acc) -> [F(E) | Acc] end, [], L)). -%% Fold over each entry in a table, executing the cons function in a -%% transaction. This is often far more efficient than wrapping a tx -%% around the lot. +%% Apply a pre-post-commit function to all entries in a table that +%% satisfy a predicate, and return those entries. %% %% We ignore entries that have been modified or removed. -table_fold(F, Acc0, TableName) -> +table_filter(Pred, PrePostCommitFun, TableName) -> lists:foldl( - fun (E, Acc) -> execute_mnesia_transaction( - fun () -> case mnesia:match_object(TableName, E, read) of - [] -> Acc; - _ -> F(E, Acc) - end - end) - end, Acc0, dirty_read_all(TableName)). + fun (E, Acc) -> + case execute_mnesia_transaction( + fun () -> mnesia:match_object(TableName, E, read) =/= [] + andalso Pred(E) end, + fun (false, _Tx) -> false; + (true, Tx) -> PrePostCommitFun(E, Tx), true + end) of + false -> Acc; + true -> [E | Acc] + end + end, [], dirty_read_all(TableName)). dirty_read_all(TableName) -> mnesia:dirty_select(TableName, [{'$1',[],['$1']}]). @@ -508,8 +526,42 @@ dirty_dump_log1(LH, {K, Terms, BadBytes}) -> read_term_file(File) -> file:consult(File). write_term_file(File, Terms) -> - file:write_file(File, list_to_binary([io_lib:format("~w.~n", [Term]) || - Term <- Terms])). + write_file(File, list_to_binary([io_lib:format("~w.~n", [Term]) || + Term <- Terms])). + +write_file(Path, Data) -> + write_file(Path, Data, []). + +%% write_file/3 and make_binary/1 are both based on corresponding +%% functions in the kernel/file.erl module of the Erlang R14B02 +%% release, which is licensed under the EPL. That implementation of +%% write_file/3 does not do an fsync prior to closing the file, hence +%% the existence of this version. APIs are otherwise identical. +write_file(Path, Data, Modes) -> + Modes1 = [binary, write | (Modes -- [binary, write])], + case make_binary(Data) of + Bin when is_binary(Bin) -> + case file:open(Path, Modes1) of + {ok, Hdl} -> try file:write(Hdl, Bin) of + ok -> file:sync(Hdl); + {error, _} = E -> E + after + file:close(Hdl) + end; + {error, _} = E -> E + end; + {error, _} = E -> E + end. + +make_binary(Bin) when is_binary(Bin) -> + Bin; +make_binary(List) -> + try + iolist_to_binary(List) + catch error:Reason -> + {error, Reason} + end. + append_file(File, Suffix) -> case file:read_file_info(File) of @@ -527,7 +579,7 @@ append_file(File, 0, Suffix) -> end; append_file(File, _, Suffix) -> case file:read_file(File) of - {ok, Data} -> file:write_file([File, Suffix], Data, [append]); + {ok, Data} -> write_file([File, Suffix], Data, [append]); Error -> Error end. @@ -744,18 +796,12 @@ dict_cons(Key, Value, Dict) -> orddict_cons(Key, Value, Dict) -> orddict:update(Key, fun (List) -> [Value | List] end, [Value], Dict). -unlink_and_capture_exit(Pid) -> - unlink(Pid), - receive {'EXIT', Pid, _} -> ok - after 0 -> ok - end. - -% Separate flags and options from arguments. -% get_options([{flag, "-q"}, {option, "-p", "/"}], -% ["set_permissions","-p","/","guest", -% "-q",".*",".*",".*"]) -% == {["set_permissions","guest",".*",".*",".*"], -% [{"-q",true},{"-p","/"}]} +%% Separate flags and options from arguments. +%% get_options([{flag, "-q"}, {option, "-p", "/"}], +%% ["set_permissions","-p","/","guest", +%% "-q",".*",".*",".*"]) +%% == {["set_permissions","guest",".*",".*",".*"], +%% [{"-q",true},{"-p","/"}]} get_options(Defs, As) -> lists:foldl(fun(Def, {AsIn, RsIn}) -> {AsOut, Value} = case Def of @@ -842,8 +888,8 @@ lock_file(Path) -> ok = file:close(Lock) end. -const_ok(_) -> ok. -const(X) -> fun (_) -> X end. +const_ok() -> ok. +const(X) -> fun () -> X end. %% Format IPv4-mapped IPv6 addresses as IPv4, since they're what we see %% when IPv6 is enabled but not used (i.e. 99% of the time). @@ -858,3 +904,41 @@ ntoab(IP) -> 0 -> Str; _ -> "[" ++ Str ++ "]" end. + +is_process_alive(Pid) when node(Pid) =:= node() -> + erlang:is_process_alive(Pid); +is_process_alive(Pid) -> + case rpc:call(node(Pid), erlang, is_process_alive, [Pid]) of + true -> true; + _ -> false + end. + +pget(K, P) -> proplists:get_value(K, P). +pget(K, P, D) -> proplists:get_value(K, P, D). + +pget_or_die(K, P) -> + case proplists:get_value(K, P) of + undefined -> exit({error, key_missing, K}); + V -> V + end. + +format_message_queue(_Opt, MQ) -> + Len = priority_queue:len(MQ), + {Len, + case Len > 100 of + false -> priority_queue:to_list(MQ); + true -> {summary, + orddict:to_list( + lists:foldl( + fun ({P, V}, Counts) -> + orddict:update_counter( + {P, format_message_queue_entry(V)}, 1, Counts) + end, orddict:new(), priority_queue:to_list(MQ)))} + end}. + +format_message_queue_entry(V) when is_atom(V) -> + V; +format_message_queue_entry(V) when is_tuple(V) -> + list_to_tuple([format_message_queue_entry(E) || E <- tuple_to_list(V)]); +format_message_queue_entry(_V) -> + '_'. diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl index a9b4e177..ab553a8b 100644 --- a/src/rabbit_mnesia.erl +++ b/src/rabbit_mnesia.erl @@ -18,9 +18,13 @@ -module(rabbit_mnesia). -export([ensure_mnesia_dir/0, dir/0, status/0, init/0, is_db_empty/0, - cluster/1, force_cluster/1, reset/0, force_reset/0, + cluster/1, force_cluster/1, reset/0, force_reset/0, init_db/3, is_clustered/0, running_clustered_nodes/0, all_clustered_nodes/0, - empty_ram_only_tables/0, copy_db/1]). + empty_ram_only_tables/0, copy_db/1, wait_for_tables/1, + create_cluster_nodes_config/1, read_cluster_nodes_config/0, + record_running_nodes/0, read_previously_running_nodes/0, + delete_previously_running_nodes/0, running_nodes_filename/0, + is_disc_node/0]). -export([table_names/0]). @@ -42,6 +46,7 @@ -spec(dir/0 :: () -> file:filename()). -spec(ensure_mnesia_dir/0 :: () -> 'ok'). -spec(init/0 :: () -> 'ok'). +-spec(init_db/3 :: ([node()], boolean(), rabbit_misc:thunk('ok')) -> 'ok'). -spec(is_db_empty/0 :: () -> boolean()). -spec(cluster/1 :: ([node()]) -> 'ok'). -spec(force_cluster/1 :: ([node()]) -> 'ok'). @@ -54,6 +59,14 @@ -spec(empty_ram_only_tables/0 :: () -> 'ok'). -spec(create_tables/0 :: () -> 'ok'). -spec(copy_db/1 :: (file:filename()) -> rabbit_types:ok_or_error(any())). +-spec(wait_for_tables/1 :: ([atom()]) -> 'ok'). +-spec(create_cluster_nodes_config/1 :: ([node()]) -> 'ok'). +-spec(read_cluster_nodes_config/0 :: () -> [node()]). +-spec(record_running_nodes/0 :: () -> 'ok'). +-spec(read_previously_running_nodes/0 :: () -> [node()]). +-spec(delete_previously_running_nodes/0 :: () -> 'ok'). +-spec(running_nodes_filename/0 :: () -> file:filename()). +-spec(is_disc_node/0 :: () -> boolean()). -endif. @@ -77,9 +90,14 @@ status() -> {running_nodes, running_clustered_nodes()}]. init() -> - ok = ensure_mnesia_running(), - ok = ensure_mnesia_dir(), - ok = init_db(read_cluster_nodes_config(), true), + ensure_mnesia_running(), + ensure_mnesia_dir(), + ok = init_db(read_cluster_nodes_config(), true, + fun maybe_upgrade_local_or_record_desired/0), + %% We intuitively expect the global name server to be synced when + %% Mnesia is up. In fact that's not guaranteed to be the case - let's + %% make it so. + ok = global:sync(), ok. is_db_empty() -> @@ -97,14 +115,49 @@ force_cluster(ClusterNodes) -> %% node. If Force is false, only connections to online nodes are %% allowed. cluster(ClusterNodes, Force) -> - ok = ensure_mnesia_not_running(), - ok = ensure_mnesia_dir(), - rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia), + ensure_mnesia_not_running(), + ensure_mnesia_dir(), + + %% Wipe mnesia if we're changing type from disc to ram + case {is_disc_node(), should_be_disc_node(ClusterNodes)} of + {true, false} -> error_logger:warning_msg( + "changing node type; wiping mnesia...~n~n"), + rabbit_misc:ensure_ok(mnesia:delete_schema([node()]), + cannot_delete_schema); + _ -> ok + end, + + %% Pre-emptively leave the cluster + %% + %% We're trying to handle the following two cases: + %% 1. We have a two-node cluster, where both nodes are disc nodes. + %% One node is re-clustered as a ram node. When it tries to + %% re-join the cluster, but before it has time to update its + %% tables definitions, the other node will order it to re-create + %% its disc tables. So, we need to leave the cluster before we + %% can join it again. + %% 2. We have a two-node cluster, where both nodes are disc nodes. + %% One node is forcefully reset (so, the other node thinks its + %% still a part of the cluster). The reset node is re-clustered + %% as a ram node. Same as above, we need to leave the cluster + %% before we can join it. But, since we don't know if we're in a + %% cluster or not, we just pre-emptively leave it before joining. + ProperClusterNodes = ClusterNodes -- [node()], + try + ok = leave_cluster(ProperClusterNodes, ProperClusterNodes) + catch + {error, {no_running_cluster_nodes, _, _}} when Force -> + ok + end, + + %% Join the cluster + start_mnesia(), try - ok = init_db(ClusterNodes, Force), + ok = init_db(ClusterNodes, Force, + fun maybe_upgrade_local_or_record_desired/0), ok = create_cluster_nodes_config(ClusterNodes) after - mnesia:stop() + stop_mnesia() end, ok. @@ -128,10 +181,10 @@ empty_ram_only_tables() -> Node = node(), lists:foreach( fun (TabName) -> - case lists:member(Node, mnesia:table_info(TabName, ram_copies)) of - true -> {atomic, ok} = mnesia:clear_table(TabName); - false -> ok - end + case lists:member(Node, mnesia:table_info(TabName, ram_copies)) of + true -> {atomic, ok} = mnesia:clear_table(TabName); + false -> ok + end end, table_names()), ok. @@ -141,10 +194,13 @@ nodes_of_type(Type) -> %% This function should return the nodes of a certain type (ram, %% disc or disc_only) in the current cluster. The type of nodes %% is determined when the cluster is initially configured. - %% Specifically, we check whether a certain table, which we know - %% will be written to disk on a disc node, is stored on disk or in - %% RAM. - mnesia:table_info(rabbit_durable_exchange, Type). + mnesia:table_info(schema, Type). + +%% The tables aren't supposed to be on disk on a ram node +table_definitions(disc) -> + table_definitions(); +table_definitions(ram) -> + [{Tab, copy_type_to_ram(TabDef)} || {Tab, TabDef} <- table_definitions()]. table_definitions() -> [{rabbit_user, @@ -174,6 +230,11 @@ table_definitions() -> {attributes, record_info(fields, route)}, {disc_copies, [node()]}, {match, #route{binding = binding_match(), _='_'}}]}, + {rabbit_semi_durable_route, + [{record_name, route}, + {attributes, record_info(fields, route)}, + {type, ordered_set}, + {match, #route{binding = binding_match(), _='_'}}]}, {rabbit_route, [{record_name, route}, {attributes, record_info(fields, route)}, @@ -185,8 +246,17 @@ table_definitions() -> {type, ordered_set}, {match, #reverse_route{reverse_binding = reverse_binding_match(), _='_'}}]}, - %% Consider the implications to nodes_of_type/1 before altering - %% the next entry. + {rabbit_topic_trie_edge, + [{record_name, topic_trie_edge}, + {attributes, record_info(fields, topic_trie_edge)}, + {type, ordered_set}, + {match, #topic_trie_edge{trie_edge = trie_edge_match(), _='_'}}]}, + {rabbit_topic_trie_binding, + [{record_name, topic_trie_binding}, + {attributes, record_info(fields, topic_trie_binding)}, + {type, ordered_set}, + {match, #topic_trie_binding{trie_binding = trie_binding_match(), + _='_'}}]}, {rabbit_durable_exchange, [{record_name, exchange}, {attributes, record_info(fields, exchange)}, @@ -196,6 +266,10 @@ table_definitions() -> [{record_name, exchange}, {attributes, record_info(fields, exchange)}, {match, #exchange{name = exchange_name_match(), _='_'}}]}, + {rabbit_exchange_serial, + [{record_name, exchange_serial}, + {attributes, record_info(fields, exchange_serial)}, + {match, #exchange_serial{name = exchange_name_match(), _='_'}}]}, {rabbit_durable_queue, [{record_name, amqqueue}, {attributes, record_info(fields, amqqueue)}, @@ -204,7 +278,8 @@ table_definitions() -> {rabbit_queue, [{record_name, amqqueue}, {attributes, record_info(fields, amqqueue)}, - {match, #amqqueue{name = queue_name_match(), _='_'}}]}]. + {match, #amqqueue{name = queue_name_match(), _='_'}}]}] + ++ gm:table_definitions(). binding_match() -> #binding{source = exchange_name_match(), @@ -216,6 +291,12 @@ reverse_binding_match() -> _='_'}. binding_destination_match() -> resource_match('_'). +trie_edge_match() -> + #trie_edge{exchange_name = exchange_name_match(), + _='_'}. +trie_binding_match() -> + #trie_binding{exchange_name = exchange_name_match(), + _='_'}. exchange_name_match() -> resource_match(exchange). queue_name_match() -> @@ -264,45 +345,52 @@ ensure_schema_integrity() -> check_schema_integrity() -> Tables = mnesia:system_info(tables), - case [Error || {Tab, TabDef} <- table_definitions(), - case lists:member(Tab, Tables) of - false -> - Error = {table_missing, Tab}, - true; - true -> - {_, ExpAttrs} = proplists:lookup(attributes, TabDef), - Attrs = mnesia:table_info(Tab, attributes), - Error = {table_attributes_mismatch, Tab, - ExpAttrs, Attrs}, - Attrs /= ExpAttrs - end] of - [] -> check_table_integrity(); - Errors -> {error, Errors} + case check_tables(fun (Tab, TabDef) -> + case lists:member(Tab, Tables) of + false -> {error, {table_missing, Tab}}; + true -> check_table_attributes(Tab, TabDef) + end + end) of + ok -> ok = wait_for_tables(), + check_tables(fun check_table_content/2); + Other -> Other end. -check_table_integrity() -> - ok = wait_for_tables(), - case lists:all(fun ({Tab, TabDef}) -> - {_, Match} = proplists:lookup(match, TabDef), - read_test_table(Tab, Match) - end, table_definitions()) of - true -> ok; - false -> {error, invalid_table_content} +check_table_attributes(Tab, TabDef) -> + {_, ExpAttrs} = proplists:lookup(attributes, TabDef), + case mnesia:table_info(Tab, attributes) of + ExpAttrs -> ok; + Attrs -> {error, {table_attributes_mismatch, Tab, ExpAttrs, Attrs}} end. -read_test_table(Tab, Match) -> +check_table_content(Tab, TabDef) -> + {_, Match} = proplists:lookup(match, TabDef), case mnesia:dirty_first(Tab) of '$end_of_table' -> - true; + ok; Key -> ObjList = mnesia:dirty_read(Tab, Key), MatchComp = ets:match_spec_compile([{Match, [], ['$_']}]), case ets:match_spec_run(ObjList, MatchComp) of - ObjList -> true; - _ -> false + ObjList -> ok; + _ -> {error, {table_content_invalid, Tab, Match, ObjList}} end end. +check_tables(Fun) -> + case [Error || {Tab, TabDef} <- table_definitions( + case is_disc_node() of + true -> disc; + false -> ram + end), + case Fun(Tab, TabDef) of + ok -> Error = none, false; + {error, Error} -> true + end] of + [] -> ok; + Errors -> {error, Errors} + end. + %% The cluster node config file contains some or all of the disk nodes %% that are members of the cluster this node is / should be a part of. %% @@ -346,11 +434,40 @@ delete_cluster_nodes_config() -> FileName, Reason}}) end. +running_nodes_filename() -> + filename:join(dir(), "nodes_running_at_shutdown"). + +record_running_nodes() -> + FileName = running_nodes_filename(), + Nodes = running_clustered_nodes() -- [node()], + %% Don't check the result: we're shutting down anyway and this is + %% a best-effort-basis. + rabbit_misc:write_term_file(FileName, [Nodes]), + ok. + +read_previously_running_nodes() -> + FileName = running_nodes_filename(), + case rabbit_misc:read_term_file(FileName) of + {ok, [Nodes]} -> Nodes; + {error, enoent} -> []; + {error, Reason} -> throw({error, {cannot_read_previous_nodes_file, + FileName, Reason}}) + end. + +delete_previously_running_nodes() -> + FileName = running_nodes_filename(), + case file:delete(FileName) of + ok -> ok; + {error, enoent} -> ok; + {error, Reason} -> throw({error, {cannot_delete_previous_nodes_file, + FileName, Reason}}) + end. + %% Take a cluster node config and create the right kind of node - a %% standalone disk node, or disk or ram node connected to the %% specified cluster nodes. If Force is false, don't allow %% connections to offline nodes. -init_db(ClusterNodes, Force) -> +init_db(ClusterNodes, Force, SecondaryPostMnesiaFun) -> UClusterNodes = lists:usort(ClusterNodes), ProperClusterNodes = UClusterNodes -- [node()], case mnesia:change_config(extra_db_nodes, ProperClusterNodes) of @@ -366,37 +483,49 @@ init_db(ClusterNodes, Force) -> end; true -> ok end, - case {Nodes, mnesia:system_info(use_dir), all_clustered_nodes()} of - {[], true, [_]} -> - %% True single disc node, attempt upgrade - ok = wait_for_tables(), - case rabbit_upgrade:maybe_upgrade() of - ok -> ensure_schema_ok(); - version_not_available -> schema_ok_or_move() - end; - {[], true, _} -> - %% "Master" (i.e. without config) disc node in cluster, - %% verify schema - ok = wait_for_tables(), - ensure_version_ok(rabbit_upgrade:read_version()), - ensure_schema_ok(); - {[], false, _} -> + WantDiscNode = should_be_disc_node(ClusterNodes), + WasDiscNode = is_disc_node(), + %% We create a new db (on disk, or in ram) in the first + %% two cases and attempt to upgrade the in the other two + case {Nodes, WasDiscNode, WantDiscNode} of + {[], _, false} -> + %% New ram node; start from scratch + ok = create_schema(ram); + {[], false, true} -> %% Nothing there at all, start from scratch - ok = create_schema(); + ok = create_schema(disc); + {[], true, true} -> + %% We're the first node up + case rabbit_upgrade:maybe_upgrade_local() of + ok -> ensure_schema_integrity(); + version_not_available -> ok = schema_ok_or_move() + end; {[AnotherNode|_], _, _} -> %% Subsequent node in cluster, catch up - ensure_version_ok(rabbit_upgrade:read_version()), ensure_version_ok( - rpc:call(AnotherNode, rabbit_upgrade, read_version, [])), - IsDiskNode = ClusterNodes == [] orelse - lists:member(node(), ClusterNodes), + rpc:call(AnotherNode, rabbit_version, recorded, [])), + {CopyType, CopyTypeAlt} = + case WantDiscNode of + true -> {disc, disc_copies}; + false -> {ram, ram_copies} + end, ok = wait_for_replicated_tables(), - ok = create_local_table_copy(schema, disc_copies), - ok = create_local_table_copies(case IsDiskNode of - true -> disc; - false -> ram - end), - ensure_schema_ok() + ok = create_local_table_copy(schema, CopyTypeAlt), + ok = create_local_table_copies(CopyType), + + ok = SecondaryPostMnesiaFun(), + %% We've taken down mnesia, so ram nodes will need + %% to re-sync + case is_disc_node() of + false -> start_mnesia(), + mnesia:change_config(extra_db_nodes, + ProperClusterNodes), + wait_for_replicated_tables(); + true -> ok + end, + + ensure_schema_integrity(), + ok end; {error, Reason} -> %% one reason we may end up here is if we try to join @@ -405,6 +534,14 @@ init_db(ClusterNodes, Force) -> throw({error, {unable_to_join_cluster, ClusterNodes, Reason}}) end. +maybe_upgrade_local_or_record_desired() -> + case rabbit_upgrade:maybe_upgrade_local() of + ok -> ok; + %% If we're just starting up a new node we won't have a + %% version + version_not_available -> ok = rabbit_version:record_desired() + end. + schema_ok_or_move() -> case check_schema_integrity() of ok -> @@ -417,37 +554,39 @@ schema_ok_or_move() -> "and recreating schema from scratch~n", [Reason]), ok = move_db(), - ok = create_schema() + ok = create_schema(disc) end. ensure_version_ok({ok, DiscVersion}) -> - case rabbit_upgrade:desired_version() of - DiscVersion -> ok; - DesiredVersion -> throw({error, {schema_mismatch, - DesiredVersion, DiscVersion}}) + DesiredVersion = rabbit_version:desired(), + case rabbit_version:matches(DesiredVersion, DiscVersion) of + true -> ok; + false -> throw({error, {version_mismatch, DesiredVersion, DiscVersion}}) end; ensure_version_ok({error, _}) -> - ok = rabbit_upgrade:write_version(). + ok = rabbit_version:record_desired(). + +create_schema(Type) -> + stop_mnesia(), + case Type of + disc -> rabbit_misc:ensure_ok(mnesia:create_schema([node()]), + cannot_create_schema); + ram -> %% remove the disc schema since this is a ram node + rabbit_misc:ensure_ok(mnesia:delete_schema([node()]), + cannot_delete_schema) + end, + start_mnesia(), + ok = create_tables(Type), + ensure_schema_integrity(), + ok = rabbit_version:record_desired(). -ensure_schema_ok() -> - case check_schema_integrity() of - ok -> ok; - {error, Reason} -> throw({error, {schema_invalid, Reason}}) - end. +is_disc_node() -> mnesia:system_info(use_dir). -create_schema() -> - mnesia:stop(), - rabbit_misc:ensure_ok(mnesia:create_schema([node()]), - cannot_create_schema), - rabbit_misc:ensure_ok(mnesia:start(), - cannot_start_mnesia), - ok = create_tables(), - ok = ensure_schema_integrity(), - ok = wait_for_tables(), - ok = rabbit_upgrade:write_version(). +should_be_disc_node(ClusterNodes) -> + ClusterNodes == [] orelse lists:member(node(), ClusterNodes). move_db() -> - mnesia:stop(), + stop_mnesia(), MnesiaDir = filename:dirname(dir() ++ "/"), {{Year, Month, Day}, {Hour, Minute, Second}} = erlang:universaltime(), BackupDir = lists:flatten( @@ -464,21 +603,17 @@ move_db() -> {error, Reason} -> throw({error, {cannot_backup_mnesia, MnesiaDir, BackupDir, Reason}}) end, - ok = ensure_mnesia_dir(), - rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia), + ensure_mnesia_dir(), + start_mnesia(), ok. copy_db(Destination) -> - mnesia:stop(), - case rabbit_misc:recursive_copy(dir(), Destination) of - ok -> - rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia), - ok = wait_for_tables(); - {error, E} -> - {error, E} - end. + ok = ensure_mnesia_not_running(), + rabbit_misc:recursive_copy(dir(), Destination). + +create_tables() -> create_tables(disc). -create_tables() -> +create_tables(Type) -> lists:foreach(fun ({Tab, TabDef}) -> TabDef1 = proplists:delete(match, TabDef), case mnesia:create_table(Tab, TabDef1) of @@ -488,9 +623,13 @@ create_tables() -> Tab, TabDef1, Reason}}) end end, - table_definitions()), + table_definitions(Type)), ok. +copy_type_to_ram(TabDef) -> + [{disc_copies, []}, {ram_copies, [node()]} + | proplists:delete(ram_copies, proplists:delete(disc_copies, TabDef))]. + table_has_copy_type(TabDef, DiscType) -> lists:member(node(), proplists:get_value(DiscType, TabDef, [])). @@ -508,19 +647,19 @@ create_local_table_copies(Type) -> HasDiscOnlyCopies -> disc_only_copies; true -> ram_copies end; -%% unused code - commented out to keep dialyzer happy -%% Type =:= disc_only -> -%% if -%% HasDiscCopies or HasDiscOnlyCopies -> -%% disc_only_copies; -%% true -> ram_copies -%% end; +%%% unused code - commented out to keep dialyzer happy +%%% Type =:= disc_only -> +%%% if +%%% HasDiscCopies or HasDiscOnlyCopies -> +%%% disc_only_copies; +%%% true -> ram_copies +%%% end; Type =:= ram -> ram_copies end, ok = create_local_table_copy(Tab, StorageType) end, - table_definitions()), + table_definitions(Type)), ok. create_local_table_copy(Tab, Type) -> @@ -541,7 +680,8 @@ wait_for_tables() -> wait_for_tables(table_names()). wait_for_tables(TableNames) -> case mnesia:wait_for_tables(TableNames, 30000) of - ok -> ok; + ok -> + ok; {timeout, BadTabs} -> throw({error, {timeout_waiting_for_tables, BadTabs}}); {error, Reason} -> @@ -549,20 +689,20 @@ wait_for_tables(TableNames) -> end. reset(Force) -> - ok = ensure_mnesia_not_running(), + ensure_mnesia_not_running(), Node = node(), case Force of true -> ok; false -> - ok = ensure_mnesia_dir(), - rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia), + ensure_mnesia_dir(), + start_mnesia(), {Nodes, RunningNodes} = try ok = init(), {all_clustered_nodes() -- [Node], running_clustered_nodes() -- [Node]} after - mnesia:stop() + stop_mnesia() end, leave_cluster(Nodes, RunningNodes), rabbit_misc:ensure_ok(mnesia:delete_schema([Node]), @@ -585,6 +725,7 @@ leave_cluster(Nodes, RunningNodes) -> [schema, node()]) of {atomic, ok} -> true; {badrpc, nodedown} -> false; + {aborted, {node_not_running, _}} -> false; {aborted, Reason} -> throw({error, {failed_to_leave_cluster, Nodes, RunningNodes, Reason}}) @@ -595,3 +736,11 @@ leave_cluster(Nodes, RunningNodes) -> false -> throw({error, {no_running_cluster_nodes, Nodes, RunningNodes}}) end. + +start_mnesia() -> + rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia), + ensure_mnesia_running(). + +stop_mnesia() -> + stopped = mnesia:stop(), + ensure_mnesia_not_running(). diff --git a/src/rabbit_msg_file.erl b/src/rabbit_msg_file.erl index cfea4982..b7de27d4 100644 --- a/src/rabbit_msg_file.erl +++ b/src/rabbit_msg_file.erl @@ -16,7 +16,7 @@ -module(rabbit_msg_file). --export([append/3, read/2, scan/2]). +-export([append/3, read/2, scan/4]). %%---------------------------------------------------------------------------- @@ -27,8 +27,8 @@ -define(WRITE_OK_SIZE_BITS, 8). -define(WRITE_OK_MARKER, 255). -define(FILE_PACKING_ADJUSTMENT, (1 + ?INTEGER_SIZE_BYTES)). --define(GUID_SIZE_BYTES, 16). --define(GUID_SIZE_BITS, (8 * ?GUID_SIZE_BYTES)). +-define(MSG_ID_SIZE_BYTES, 16). +-define(MSG_ID_SIZE_BITS, (8 * ?MSG_ID_SIZE_BYTES)). -define(SCAN_BLOCK_SIZE, 4194304). %% 4MB %%---------------------------------------------------------------------------- @@ -39,83 +39,87 @@ -type(position() :: non_neg_integer()). -type(msg_size() :: non_neg_integer()). -type(file_size() :: non_neg_integer()). +-type(message_accumulator(A) :: + fun (({rabbit_types:msg_id(), msg_size(), position(), binary()}, A) -> + A)). --spec(append/3 :: (io_device(), rabbit_guid:guid(), msg()) -> +-spec(append/3 :: (io_device(), rabbit_types:msg_id(), msg()) -> rabbit_types:ok_or_error2(msg_size(), any())). -spec(read/2 :: (io_device(), msg_size()) -> - rabbit_types:ok_or_error2({rabbit_guid:guid(), msg()}, + rabbit_types:ok_or_error2({rabbit_types:msg_id(), msg()}, any())). --spec(scan/2 :: (io_device(), file_size()) -> - {'ok', [{rabbit_guid:guid(), msg_size(), position()}], - position()}). +-spec(scan/4 :: (io_device(), file_size(), message_accumulator(A), A) -> + {'ok', A, position()}). -endif. %%---------------------------------------------------------------------------- -append(FileHdl, Guid, MsgBody) - when is_binary(Guid) andalso size(Guid) =:= ?GUID_SIZE_BYTES -> +append(FileHdl, MsgId, MsgBody) + when is_binary(MsgId) andalso size(MsgId) =:= ?MSG_ID_SIZE_BYTES -> MsgBodyBin = term_to_binary(MsgBody), MsgBodyBinSize = size(MsgBodyBin), - Size = MsgBodyBinSize + ?GUID_SIZE_BYTES, + Size = MsgBodyBinSize + ?MSG_ID_SIZE_BYTES, case file_handle_cache:append(FileHdl, <<Size:?INTEGER_SIZE_BITS, - Guid:?GUID_SIZE_BYTES/binary, - MsgBodyBin:MsgBodyBinSize/binary, - ?WRITE_OK_MARKER:?WRITE_OK_SIZE_BITS>>) of + MsgId:?MSG_ID_SIZE_BYTES/binary, + MsgBodyBin:MsgBodyBinSize/binary, + ?WRITE_OK_MARKER:?WRITE_OK_SIZE_BITS>>) of ok -> {ok, Size + ?FILE_PACKING_ADJUSTMENT}; KO -> KO end. read(FileHdl, TotalSize) -> Size = TotalSize - ?FILE_PACKING_ADJUSTMENT, - BodyBinSize = Size - ?GUID_SIZE_BYTES, + BodyBinSize = Size - ?MSG_ID_SIZE_BYTES, case file_handle_cache:read(FileHdl, TotalSize) of {ok, <<Size:?INTEGER_SIZE_BITS, - Guid:?GUID_SIZE_BYTES/binary, - MsgBodyBin:BodyBinSize/binary, - ?WRITE_OK_MARKER:?WRITE_OK_SIZE_BITS>>} -> - {ok, {Guid, binary_to_term(MsgBodyBin)}}; + MsgId:?MSG_ID_SIZE_BYTES/binary, + MsgBodyBin:BodyBinSize/binary, + ?WRITE_OK_MARKER:?WRITE_OK_SIZE_BITS>>} -> + {ok, {MsgId, binary_to_term(MsgBodyBin)}}; KO -> KO end. -scan(FileHdl, FileSize) when FileSize >= 0 -> - scan(FileHdl, FileSize, <<>>, 0, [], 0). +scan(FileHdl, FileSize, Fun, Acc) when FileSize >= 0 -> + scan(FileHdl, FileSize, <<>>, 0, 0, Fun, Acc). -scan(_FileHdl, FileSize, _Data, FileSize, Acc, ScanOffset) -> +scan(_FileHdl, FileSize, _Data, FileSize, ScanOffset, _Fun, Acc) -> {ok, Acc, ScanOffset}; -scan(FileHdl, FileSize, Data, ReadOffset, Acc, ScanOffset) -> +scan(FileHdl, FileSize, Data, ReadOffset, ScanOffset, Fun, Acc) -> Read = lists:min([?SCAN_BLOCK_SIZE, (FileSize - ReadOffset)]), case file_handle_cache:read(FileHdl, Read) of {ok, Data1} -> {Data2, Acc1, ScanOffset1} = - scan(<<Data/binary, Data1/binary>>, Acc, ScanOffset), + scanner(<<Data/binary, Data1/binary>>, ScanOffset, Fun, Acc), ReadOffset1 = ReadOffset + size(Data1), - scan(FileHdl, FileSize, Data2, ReadOffset1, Acc1, ScanOffset1); + scan(FileHdl, FileSize, Data2, ReadOffset1, ScanOffset1, Fun, Acc1); _KO -> {ok, Acc, ScanOffset} end. -scan(<<>>, Acc, Offset) -> +scanner(<<>>, Offset, _Fun, Acc) -> {<<>>, Acc, Offset}; -scan(<<0:?INTEGER_SIZE_BITS, _Rest/binary>>, Acc, Offset) -> +scanner(<<0:?INTEGER_SIZE_BITS, _Rest/binary>>, Offset, _Fun, Acc) -> {<<>>, Acc, Offset}; %% Nothing to do other than stop. -scan(<<Size:?INTEGER_SIZE_BITS, GuidAndMsg:Size/binary, - WriteMarker:?WRITE_OK_SIZE_BITS, Rest/binary>>, Acc, Offset) -> +scanner(<<Size:?INTEGER_SIZE_BITS, MsgIdAndMsg:Size/binary, + WriteMarker:?WRITE_OK_SIZE_BITS, Rest/binary>>, Offset, Fun, Acc) -> TotalSize = Size + ?FILE_PACKING_ADJUSTMENT, case WriteMarker of ?WRITE_OK_MARKER -> %% Here we take option 5 from %% http://www.erlang.org/cgi-bin/ezmlm-cgi?2:mss:1569 in - %% which we read the Guid as a number, and then convert it + %% which we read the MsgId as a number, and then convert it %% back to a binary in order to work around bugs in %% Erlang's GC. - <<GuidNum:?GUID_SIZE_BITS, _Msg/binary>> = - <<GuidAndMsg:Size/binary>>, - <<Guid:?GUID_SIZE_BYTES/binary>> = <<GuidNum:?GUID_SIZE_BITS>>, - scan(Rest, [{Guid, TotalSize, Offset} | Acc], Offset + TotalSize); + <<MsgIdNum:?MSG_ID_SIZE_BITS, Msg/binary>> = + <<MsgIdAndMsg:Size/binary>>, + <<MsgId:?MSG_ID_SIZE_BYTES/binary>> = + <<MsgIdNum:?MSG_ID_SIZE_BITS>>, + scanner(Rest, Offset + TotalSize, Fun, + Fun({MsgId, TotalSize, Offset, Msg}, Acc)); _ -> - scan(Rest, Acc, Offset + TotalSize) + scanner(Rest, Offset + TotalSize, Fun, Acc) end; -scan(Data, Acc, Offset) -> +scanner(Data, Offset, _Fun, Acc) -> {Data, Acc, Offset}. diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl index 75ca0b8b..6c5035a0 100644 --- a/src/rabbit_msg_store.erl +++ b/src/rabbit_msg_store.erl @@ -21,14 +21,16 @@ -export([start_link/4, successfully_recovered_state/1, client_init/4, client_terminate/1, client_delete_and_terminate/1, client_ref/1, close_all_indicated/1, - write/3, read/2, contains/2, remove/2, release/2, sync/3]). + write/3, read/2, contains/2, remove/2, sync/3]). -export([set_maximum_since_use/2, has_readers/2, combine_files/3, delete_file/2]). %% internal +-export([transform_dir/3, force_recovery/2]). %% upgrade + -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3, prioritise_call/3, prioritise_cast/2, - prioritise_info/2]). + prioritise_info/2, format_message_queue/2]). %%---------------------------------------------------------------------------- @@ -37,6 +39,7 @@ -define(SYNC_INTERVAL, 5). %% milliseconds -define(CLEAN_FILENAME, "clean.dot"). -define(FILE_SUMMARY_FILENAME, "file_summary.ets"). +-define(TRANSFORM_TMP, "transform_tmp"). -define(BINARY_MODE, [raw, binary]). -define(READ_MODE, [read]). @@ -65,15 +68,14 @@ gc_pid, %% pid of our GC file_handles_ets, %% tid of the shared file handles table file_summary_ets, %% tid of the file summary table - dedup_cache_ets, %% tid of dedup cache table cur_file_cache_ets, %% tid of current file cache table dying_clients, %% set of dying clients clients, %% map of references of all registered clients %% to callbacks successfully_recovered, %% boolean: did we recover state? file_size_limit, %% how big are our files allowed to get? - cref_to_guids %% client ref to synced messages mapping - }). + cref_to_msg_ids %% client ref to synced messages mapping + }). -record(client_msstate, { server, @@ -85,9 +87,8 @@ gc_pid, file_handles_ets, file_summary_ets, - dedup_cache_ets, cur_file_cache_ets - }). + }). -record(file_summary, {file, valid_total_size, left, right, file_size, locked, readers}). @@ -128,38 +129,39 @@ gc_pid :: pid(), file_handles_ets :: ets:tid(), file_summary_ets :: ets:tid(), - dedup_cache_ets :: ets:tid(), cur_file_cache_ets :: ets:tid()}). --type(startup_fun_state() :: - {(fun ((A) -> 'finished' | {rabbit_guid:guid(), non_neg_integer(), A})), - A}). --type(maybe_guid_fun() :: 'undefined' | fun ((gb_set()) -> any())). +-type(msg_ref_delta_gen(A) :: + fun ((A) -> 'finished' | + {rabbit_types:msg_id(), non_neg_integer(), A})). +-type(maybe_msg_id_fun() :: 'undefined' | fun ((gb_set()) -> any())). -type(maybe_close_fds_fun() :: 'undefined' | fun (() -> 'ok')). -type(deletion_thunk() :: fun (() -> boolean())). -spec(start_link/4 :: (atom(), file:filename(), [binary()] | 'undefined', - startup_fun_state()) -> rabbit_types:ok_pid_or_error()). + {msg_ref_delta_gen(A), A}) -> rabbit_types:ok_pid_or_error()). -spec(successfully_recovered_state/1 :: (server()) -> boolean()). --spec(client_init/4 :: (server(), client_ref(), maybe_guid_fun(), +-spec(client_init/4 :: (server(), client_ref(), maybe_msg_id_fun(), maybe_close_fds_fun()) -> client_msstate()). -spec(client_terminate/1 :: (client_msstate()) -> 'ok'). -spec(client_delete_and_terminate/1 :: (client_msstate()) -> 'ok'). -spec(client_ref/1 :: (client_msstate()) -> client_ref()). --spec(write/3 :: (rabbit_guid:guid(), msg(), client_msstate()) -> 'ok'). --spec(read/2 :: (rabbit_guid:guid(), client_msstate()) -> - {rabbit_types:ok(msg()) | 'not_found', client_msstate()}). --spec(contains/2 :: (rabbit_guid:guid(), client_msstate()) -> boolean()). --spec(remove/2 :: ([rabbit_guid:guid()], client_msstate()) -> 'ok'). --spec(release/2 :: ([rabbit_guid:guid()], client_msstate()) -> 'ok'). --spec(sync/3 :: ([rabbit_guid:guid()], fun (() -> any()), client_msstate()) -> - 'ok'). +-spec(write/3 :: (rabbit_types:msg_id(), msg(), client_msstate()) -> 'ok'). +-spec(read/2 :: (rabbit_types:msg_id(), client_msstate()) -> + {rabbit_types:ok(msg()) | 'not_found', client_msstate()}). +-spec(contains/2 :: (rabbit_types:msg_id(), client_msstate()) -> boolean()). +-spec(remove/2 :: ([rabbit_types:msg_id()], client_msstate()) -> 'ok'). +-spec(sync/3 :: + ([rabbit_types:msg_id()], fun (() -> any()), client_msstate()) -> 'ok'). -spec(set_maximum_since_use/2 :: (server(), non_neg_integer()) -> 'ok'). -spec(has_readers/2 :: (non_neg_integer(), gc_state()) -> boolean()). -spec(combine_files/3 :: (non_neg_integer(), non_neg_integer(), gc_state()) -> deletion_thunk()). -spec(delete_file/2 :: (non_neg_integer(), gc_state()) -> deletion_thunk()). +-spec(force_recovery/2 :: (file:filename(), server()) -> 'ok'). +-spec(transform_dir/3 :: (file:filename(), server(), + fun ((any()) -> (rabbit_types:ok_or_error2(msg(), any())))) -> 'ok'). -endif. @@ -171,8 +173,8 @@ %% The components: %% -%% Index: this is a mapping from Guid to #msg_location{}: -%% {Guid, RefCount, File, Offset, TotalSize} +%% Index: this is a mapping from MsgId to #msg_location{}: +%% {MsgId, RefCount, File, Offset, TotalSize} %% By default, it's in ets, but it's also pluggable. %% FileSummary: this is an ets table which maps File to #file_summary{}: %% {File, ValidTotalSize, Left, Right, FileSize, Locked, Readers} @@ -273,7 +275,7 @@ %% alternating full files and files with only one tiny message in %% them). %% -%% Messages are reference-counted. When a message with the same guid +%% Messages are reference-counted. When a message with the same msg id %% is written several times we only store it once, and only remove it %% from the store when it has been removed the same number of times. %% @@ -390,7 +392,7 @@ successfully_recovered_state(Server) -> client_init(Server, Ref, MsgOnDiskFun, CloseFDsFun) -> {IState, IModule, Dir, GCPid, - FileHandlesEts, FileSummaryEts, DedupCacheEts, CurFileCacheEts} = + FileHandlesEts, FileSummaryEts, CurFileCacheEts} = gen_server2:call( Server, {new_client_state, Ref, MsgOnDiskFun, CloseFDsFun}, infinity), #client_msstate { server = Server, @@ -402,7 +404,6 @@ client_init(Server, Ref, MsgOnDiskFun, CloseFDsFun) -> gc_pid = GCPid, file_handles_ets = FileHandlesEts, file_summary_ets = FileSummaryEts, - dedup_cache_ets = DedupCacheEts, cur_file_cache_ets = CurFileCacheEts }. client_terminate(CState = #client_msstate { client_ref = Ref }) -> @@ -416,44 +417,31 @@ client_delete_and_terminate(CState = #client_msstate { client_ref = Ref }) -> client_ref(#client_msstate { client_ref = Ref }) -> Ref. -write(Guid, Msg, +write(MsgId, Msg, CState = #client_msstate { cur_file_cache_ets = CurFileCacheEts, client_ref = CRef }) -> - ok = update_msg_cache(CurFileCacheEts, Guid, Msg), - ok = server_cast(CState, {write, CRef, Guid}). - -read(Guid, - CState = #client_msstate { dedup_cache_ets = DedupCacheEts, - cur_file_cache_ets = CurFileCacheEts }) -> - %% 1. Check the dedup cache - case fetch_and_increment_cache(DedupCacheEts, Guid) of - not_found -> - %% 2. Check the cur file cache - case ets:lookup(CurFileCacheEts, Guid) of - [] -> - Defer = fun() -> - {server_call(CState, {read, Guid}), CState} - end, - case index_lookup_positive_ref_count(Guid, CState) of - not_found -> Defer(); - MsgLocation -> client_read1(MsgLocation, Defer, CState) - end; - [{Guid, Msg, _CacheRefCount}] -> - %% Although we've found it, we don't know the - %% refcount, so can't insert into dedup cache - {{ok, Msg}, CState} + ok = update_msg_cache(CurFileCacheEts, MsgId, Msg), + ok = server_cast(CState, {write, CRef, MsgId}). + +read(MsgId, + CState = #client_msstate { cur_file_cache_ets = CurFileCacheEts }) -> + %% Check the cur file cache + case ets:lookup(CurFileCacheEts, MsgId) of + [] -> + Defer = fun() -> {server_call(CState, {read, MsgId}), CState} end, + case index_lookup_positive_ref_count(MsgId, CState) of + not_found -> Defer(); + MsgLocation -> client_read1(MsgLocation, Defer, CState) end; - Msg -> + [{MsgId, Msg, _CacheRefCount}] -> {{ok, Msg}, CState} end. -contains(Guid, CState) -> server_call(CState, {contains, Guid}). +contains(MsgId, CState) -> server_call(CState, {contains, MsgId}). remove([], _CState) -> ok; -remove(Guids, CState = #client_msstate { client_ref = CRef }) -> - server_cast(CState, {remove, CRef, Guids}). -release([], _CState) -> ok; -release(Guids, CState) -> server_cast(CState, {release, Guids}). -sync(Guids, K, CState) -> server_cast(CState, {sync, Guids, K}). +remove(MsgIds, CState = #client_msstate { client_ref = CRef }) -> + server_cast(CState, {remove, CRef, MsgIds}). +sync(MsgIds, K, CState) -> server_cast(CState, {sync, MsgIds, K}). set_maximum_since_use(Server, Age) -> gen_server2:cast(Server, {set_maximum_since_use, Age}). @@ -468,11 +456,11 @@ server_call(#client_msstate { server = Server }, Msg) -> server_cast(#client_msstate { server = Server }, Msg) -> gen_server2:cast(Server, Msg). -client_read1(#msg_location { guid = Guid, file = File } = MsgLocation, Defer, +client_read1(#msg_location { msg_id = MsgId, file = File } = MsgLocation, Defer, CState = #client_msstate { file_summary_ets = FileSummaryEts }) -> case ets:lookup(FileSummaryEts, File) of [] -> %% File has been GC'd and no longer exists. Go around again. - read(Guid, CState); + read(MsgId, CState); [#file_summary { locked = Locked, right = Right }] -> client_read2(Locked, Right, MsgLocation, Defer, CState) end. @@ -494,7 +482,7 @@ client_read2(true, _Right, _MsgLocation, Defer, _CState) -> %% the safest and simplest thing to do. Defer(); client_read2(false, _Right, - MsgLocation = #msg_location { guid = Guid, file = File }, + MsgLocation = #msg_location { msg_id = MsgId, file = File }, Defer, CState = #client_msstate { file_summary_ets = FileSummaryEts }) -> %% It's entirely possible that everything we're doing from here on @@ -503,12 +491,11 @@ client_read2(false, _Right, safe_ets_update_counter( FileSummaryEts, File, {#file_summary.readers, +1}, fun (_) -> client_read3(MsgLocation, Defer, CState) end, - fun () -> read(Guid, CState) end). + fun () -> read(MsgId, CState) end). -client_read3(#msg_location { guid = Guid, file = File }, Defer, +client_read3(#msg_location { msg_id = MsgId, file = File }, Defer, CState = #client_msstate { file_handles_ets = FileHandlesEts, file_summary_ets = FileSummaryEts, - dedup_cache_ets = DedupCacheEts, gc_pid = GCPid, client_ref = Ref }) -> Release = @@ -530,7 +517,7 @@ client_read3(#msg_location { guid = Guid, file = File }, Defer, %% too). case ets:lookup(FileSummaryEts, File) of [] -> %% GC has deleted our file, just go round again. - read(Guid, CState); + read(MsgId, CState); [#file_summary { locked = true }] -> %% If we get a badarg here, then the GC has finished and %% deleted our file. Try going around again. Otherwise, @@ -540,8 +527,8 @@ client_read3(#msg_location { guid = Guid, file = File }, Defer, %% GC ends, we +1 readers, msg_store ets:deletes (and %% unlocks the dest) try Release(), - Defer() - catch error:badarg -> read(Guid, CState) + Defer() + catch error:badarg -> read(MsgId, CState) end; [#file_summary { locked = false }] -> %% Ok, we're definitely safe to continue - a GC involving @@ -554,7 +541,7 @@ client_read3(#msg_location { guid = Guid, file = File }, Defer, %% us doing the lookup and the +1 on the readers. (Same as %% badarg scenario above, but we don't have a missing file %% - we just have the /wrong/ file). - case index_lookup(Guid, CState) of + case index_lookup(MsgId, CState) of #msg_location { file = File } = MsgLocation -> %% Still the same file. {ok, CState1} = close_all_indicated(CState), @@ -565,8 +552,8 @@ client_read3(#msg_location { guid = Guid, file = File }, Defer, %% Could the msg_store now mark the file to be %% closed? No: marks for closing are issued only %% when the msg_store has locked the file. - {Msg, CState2} = %% This will never be the current file - read_from_disk(MsgLocation, CState1, DedupCacheEts), + %% This will never be the current file + {Msg, CState2} = read_from_disk(MsgLocation, CState1), Release(), %% this MUST NOT fail with badarg {{ok, Msg}, CState2}; #msg_location {} = MsgLocation -> %% different file! @@ -580,9 +567,9 @@ client_read3(#msg_location { guid = Guid, file = File }, Defer, end end. -clear_client(CRef, State = #msstate { cref_to_guids = CTG, +clear_client(CRef, State = #msstate { cref_to_msg_ids = CTM, dying_clients = DyingClients }) -> - State #msstate { cref_to_guids = dict:erase(CRef, CTG), + State #msstate { cref_to_msg_ids = dict:erase(CRef, CTM), dying_clients = sets:del_element(CRef, DyingClients) }. @@ -630,13 +617,21 @@ init([Server, BaseDir, ClientRefs, StartupFunState]) -> %% CleanShutdown <=> msg location index and file_summary both %% recovered correctly. - DedupCacheEts = ets:new(rabbit_msg_store_dedup_cache, [set, public]), FileHandlesEts = ets:new(rabbit_msg_store_shared_file_handles, [ordered_set, public]), CurFileCacheEts = ets:new(rabbit_msg_store_cur_file, [set, public]), {ok, FileSizeLimit} = application:get_env(msg_store_file_size_limit), + {ok, GCPid} = rabbit_msg_store_gc:start_link( + #gc_state { dir = Dir, + index_module = IndexModule, + index_state = IndexState, + file_summary_ets = FileSummaryEts, + file_handles_ets = FileHandlesEts, + msg_store = self() + }), + State = #msstate { dir = Dir, index_module = IndexModule, index_state = IndexState, @@ -648,17 +643,16 @@ init([Server, BaseDir, ClientRefs, StartupFunState]) -> sum_valid_data = 0, sum_file_size = 0, pending_gc_completion = orddict:new(), - gc_pid = undefined, + gc_pid = GCPid, file_handles_ets = FileHandlesEts, file_summary_ets = FileSummaryEts, - dedup_cache_ets = DedupCacheEts, cur_file_cache_ets = CurFileCacheEts, dying_clients = sets:new(), clients = Clients, successfully_recovered = CleanShutdown, file_size_limit = FileSizeLimit, - cref_to_guids = dict:new() - }, + cref_to_msg_ids = dict:new() + }, %% If we didn't recover the msg location index then we need to %% rebuild it now. @@ -671,17 +665,7 @@ init([Server, BaseDir, ClientRefs, StartupFunState]) -> {ok, Offset} = file_handle_cache:position(CurHdl, Offset), ok = file_handle_cache:truncate(CurHdl), - {ok, GCPid} = rabbit_msg_store_gc:start_link( - #gc_state { dir = Dir, - index_module = IndexModule, - index_state = IndexState, - file_summary_ets = FileSummaryEts, - file_handles_ets = FileHandlesEts, - msg_store = self() - }), - - {ok, maybe_compact( - State1 #msstate { current_file_handle = CurHdl, gc_pid = GCPid }), + {ok, maybe_compact(State1 #msstate { current_file_handle = CurHdl }), hibernate, {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. @@ -689,7 +673,7 @@ prioritise_call(Msg, _From, _State) -> case Msg of successfully_recovered_state -> 7; {new_client_state, _Ref, _MODC, _CloseFDsFun} -> 7; - {read, _Guid} -> 2; + {read, _MsgId} -> 2; _ -> 0 end. @@ -712,29 +696,27 @@ handle_call(successfully_recovered_state, _From, State) -> reply(State #msstate.successfully_recovered, State); handle_call({new_client_state, CRef, MsgOnDiskFun, CloseFDsFun}, _From, - State = #msstate { dir = Dir, - index_state = IndexState, - index_module = IndexModule, - file_handles_ets = FileHandlesEts, - file_summary_ets = FileSummaryEts, - dedup_cache_ets = DedupCacheEts, - cur_file_cache_ets = CurFileCacheEts, - clients = Clients, - gc_pid = GCPid }) -> + State = #msstate { dir = Dir, + index_state = IndexState, + index_module = IndexModule, + file_handles_ets = FileHandlesEts, + file_summary_ets = FileSummaryEts, + cur_file_cache_ets = CurFileCacheEts, + clients = Clients, + gc_pid = GCPid }) -> Clients1 = dict:store(CRef, {MsgOnDiskFun, CloseFDsFun}, Clients), - reply({IndexState, IndexModule, Dir, GCPid, - FileHandlesEts, FileSummaryEts, DedupCacheEts, CurFileCacheEts}, - State #msstate { clients = Clients1 }); + reply({IndexState, IndexModule, Dir, GCPid, FileHandlesEts, FileSummaryEts, + CurFileCacheEts}, State #msstate { clients = Clients1 }); handle_call({client_terminate, CRef}, _From, State) -> reply(ok, clear_client(CRef, State)); -handle_call({read, Guid}, From, State) -> - State1 = read_message(Guid, From, State), +handle_call({read, MsgId}, From, State) -> + State1 = read_message(MsgId, From, State), noreply(State1); -handle_call({contains, Guid}, From, State) -> - State1 = contains_message(Guid, From, State), +handle_call({contains, MsgId}, From, State) -> + State1 = contains_message(MsgId, From, State), noreply(State1). handle_cast({client_dying, CRef}, @@ -747,53 +729,47 @@ handle_cast({client_delete, CRef}, State = #msstate { clients = Clients }) -> State1 = State #msstate { clients = dict:erase(CRef, Clients) }, noreply(remove_message(CRef, CRef, clear_client(CRef, State1))); -handle_cast({write, CRef, Guid}, +handle_cast({write, CRef, MsgId}, State = #msstate { cur_file_cache_ets = CurFileCacheEts }) -> - true = 0 =< ets:update_counter(CurFileCacheEts, Guid, {3, -1}), - [{Guid, Msg, _CacheRefCount}] = ets:lookup(CurFileCacheEts, Guid), + true = 0 =< ets:update_counter(CurFileCacheEts, MsgId, {3, -1}), + [{MsgId, Msg, _CacheRefCount}] = ets:lookup(CurFileCacheEts, MsgId), noreply( - case write_action(should_mask_action(CRef, Guid, State), Guid, State) of + case write_action(should_mask_action(CRef, MsgId, State), MsgId, State) of {write, State1} -> - write_message(CRef, Guid, Msg, State1); + write_message(CRef, MsgId, Msg, State1); {ignore, CurFile, State1 = #msstate { current_file = CurFile }} -> State1; {ignore, _File, State1} -> - true = ets:delete_object(CurFileCacheEts, {Guid, Msg, 0}), + true = ets:delete_object(CurFileCacheEts, {MsgId, Msg, 0}), State1; {confirm, CurFile, State1 = #msstate { current_file = CurFile }}-> - record_pending_confirm(CRef, Guid, State1); + record_pending_confirm(CRef, MsgId, State1); {confirm, _File, State1} -> - true = ets:delete_object(CurFileCacheEts, {Guid, Msg, 0}), + true = ets:delete_object(CurFileCacheEts, {MsgId, Msg, 0}), update_pending_confirms( - fun (MsgOnDiskFun, CTG) -> - MsgOnDiskFun(gb_sets:singleton(Guid), written), - CTG + fun (MsgOnDiskFun, CTM) -> + MsgOnDiskFun(gb_sets:singleton(MsgId), written), + CTM end, CRef, State1) end); -handle_cast({remove, CRef, Guids}, State) -> +handle_cast({remove, CRef, MsgIds}, State) -> State1 = lists:foldl( - fun (Guid, State2) -> remove_message(Guid, CRef, State2) end, - State, Guids), - noreply(maybe_compact( - client_confirm(CRef, gb_sets:from_list(Guids), removed, State1))); - -handle_cast({release, Guids}, State = - #msstate { dedup_cache_ets = DedupCacheEts }) -> - lists:foreach( - fun (Guid) -> decrement_cache(DedupCacheEts, Guid) end, Guids), - noreply(State); + fun (MsgId, State2) -> remove_message(MsgId, CRef, State2) end, + State, MsgIds), + noreply(maybe_compact(client_confirm(CRef, gb_sets:from_list(MsgIds), + removed, State1))); -handle_cast({sync, Guids, K}, +handle_cast({sync, MsgIds, K}, State = #msstate { current_file = CurFile, current_file_handle = CurHdl, on_sync = Syncs }) -> {ok, SyncOffset} = file_handle_cache:last_sync_offset(CurHdl), - case lists:any(fun (Guid) -> + case lists:any(fun (MsgId) -> #msg_location { file = File, offset = Offset } = - index_lookup(Guid, State), + index_lookup(MsgId, State), File =:= CurFile andalso Offset >= SyncOffset - end, Guids) of + end, MsgIds) of false -> K(), noreply(State); true -> noreply(State #msstate { on_sync = [K | Syncs] }) @@ -837,7 +813,6 @@ terminate(_Reason, State = #msstate { index_state = IndexState, gc_pid = GCPid, file_handles_ets = FileHandlesEts, file_summary_ets = FileSummaryEts, - dedup_cache_ets = DedupCacheEts, cur_file_cache_ets = CurFileCacheEts, clients = Clients, dir = Dir }) -> @@ -847,22 +822,24 @@ terminate(_Reason, State = #msstate { index_state = IndexState, State1 = case CurHdl of undefined -> State; _ -> State2 = internal_sync(State), - file_handle_cache:close(CurHdl), + ok = file_handle_cache:close(CurHdl), State2 end, State3 = close_all_handles(State1), - store_file_summary(FileSummaryEts, Dir), - [ets:delete(T) || - T <- [FileSummaryEts, DedupCacheEts, FileHandlesEts, CurFileCacheEts]], + ok = store_file_summary(FileSummaryEts, Dir), + [true = ets:delete(T) || + T <- [FileSummaryEts, FileHandlesEts, CurFileCacheEts]], IndexModule:terminate(IndexState), - store_recovery_terms([{client_refs, dict:fetch_keys(Clients)}, - {index_module, IndexModule}], Dir), + ok = store_recovery_terms([{client_refs, dict:fetch_keys(Clients)}, + {index_module, IndexModule}], Dir), State3 #msstate { index_state = undefined, current_file_handle = undefined }. code_change(_OldVsn, State, _Extra) -> {ok, State}. +format_message_queue(Opt, MQ) -> rabbit_misc:format_message_queue(Opt, MQ). + %%---------------------------------------------------------------------------- %% general helper functions %%---------------------------------------------------------------------------- @@ -875,16 +852,16 @@ reply(Reply, State) -> {State1, Timeout} = next_state(State), {reply, Reply, State1, Timeout}. -next_state(State = #msstate { sync_timer_ref = undefined, - on_sync = Syncs, - cref_to_guids = CTG }) -> - case {Syncs, dict:size(CTG)} of +next_state(State = #msstate { sync_timer_ref = undefined, + on_sync = Syncs, + cref_to_msg_ids = CTM }) -> + case {Syncs, dict:size(CTM)} of {[], 0} -> {State, hibernate}; _ -> {start_sync_timer(State), 0} end; -next_state(State = #msstate { on_sync = Syncs, - cref_to_guids = CTG }) -> - case {Syncs, dict:size(CTG)} of +next_state(State = #msstate { on_sync = Syncs, + cref_to_msg_ids = CTM }) -> + case {Syncs, dict:size(CTM)} of {[], 0} -> {stop_sync_timer(State), hibernate}; _ -> {State, 0} end. @@ -901,66 +878,69 @@ stop_sync_timer(State = #msstate { sync_timer_ref = TRef }) -> internal_sync(State = #msstate { current_file_handle = CurHdl, on_sync = Syncs, - cref_to_guids = CTG }) -> + cref_to_msg_ids = CTM }) -> State1 = stop_sync_timer(State), - CGs = dict:fold(fun (CRef, Guids, NS) -> - case gb_sets:is_empty(Guids) of + CGs = dict:fold(fun (CRef, MsgIds, NS) -> + case gb_sets:is_empty(MsgIds) of true -> NS; - false -> [{CRef, Guids} | NS] + false -> [{CRef, MsgIds} | NS] end - end, [], CTG), - case {Syncs, CGs} of - {[], []} -> ok; - _ -> file_handle_cache:sync(CurHdl) - end, + end, [], CTM), + ok = case {Syncs, CGs} of + {[], []} -> ok; + _ -> file_handle_cache:sync(CurHdl) + end, [K() || K <- lists:reverse(Syncs)], - [client_confirm(CRef, Guids, written, State1) || {CRef, Guids} <- CGs], - State1 #msstate { cref_to_guids = dict:new(), on_sync = [] }. + State2 = lists:foldl( + fun ({CRef, MsgIds}, StateN) -> + client_confirm(CRef, MsgIds, written, StateN) + end, State1, CGs), + State2 #msstate { on_sync = [] }. -write_action({true, not_found}, _Guid, State) -> +write_action({true, not_found}, _MsgId, State) -> {ignore, undefined, State}; -write_action({true, #msg_location { file = File }}, _Guid, State) -> +write_action({true, #msg_location { file = File }}, _MsgId, State) -> {ignore, File, State}; -write_action({false, not_found}, _Guid, State) -> +write_action({false, not_found}, _MsgId, State) -> {write, State}; write_action({Mask, #msg_location { ref_count = 0, file = File, total_size = TotalSize }}, - Guid, State = #msstate { file_summary_ets = FileSummaryEts }) -> + MsgId, State = #msstate { file_summary_ets = FileSummaryEts }) -> case {Mask, ets:lookup(FileSummaryEts, File)} of {false, [#file_summary { locked = true }]} -> - ok = index_delete(Guid, State), + ok = index_delete(MsgId, State), {write, State}; {false_if_increment, [#file_summary { locked = true }]} -> - %% The msg for Guid is older than the client death + %% The msg for MsgId is older than the client death %% message, but as it is being GC'd currently we'll have %% to write a new copy, which will then be younger, so %% ignore this write. {ignore, File, State}; {_Mask, [#file_summary {}]} -> - ok = index_update_ref_count(Guid, 1, State), + ok = index_update_ref_count(MsgId, 1, State), State1 = adjust_valid_total_size(File, TotalSize, State), {confirm, File, State1} end; write_action({_Mask, #msg_location { ref_count = RefCount, file = File }}, - Guid, State) -> - ok = index_update_ref_count(Guid, RefCount + 1, State), + MsgId, State) -> + ok = index_update_ref_count(MsgId, RefCount + 1, State), %% We already know about it, just update counter. Only update %% field otherwise bad interaction with concurrent GC {confirm, File, State}. -write_message(CRef, Guid, Msg, State) -> - write_message(Guid, Msg, record_pending_confirm(CRef, Guid, State)). +write_message(CRef, MsgId, Msg, State) -> + write_message(MsgId, Msg, record_pending_confirm(CRef, MsgId, State)). -write_message(Guid, Msg, +write_message(MsgId, Msg, State = #msstate { current_file_handle = CurHdl, current_file = CurFile, sum_valid_data = SumValid, sum_file_size = SumFileSize, file_summary_ets = FileSummaryEts }) -> {ok, CurOffset} = file_handle_cache:current_virtual_offset(CurHdl), - {ok, TotalSize} = rabbit_msg_file:append(CurHdl, Guid, Msg), + {ok, TotalSize} = rabbit_msg_file:append(CurHdl, MsgId, Msg), ok = index_insert( - #msg_location { guid = Guid, ref_count = 1, file = CurFile, + #msg_location { msg_id = MsgId, ref_count = 1, file = CurFile, offset = CurOffset, total_size = TotalSize }, State), [#file_summary { right = undefined, locked = false }] = ets:lookup(FileSummaryEts, CurFile), @@ -972,31 +952,23 @@ write_message(Guid, Msg, sum_valid_data = SumValid + TotalSize, sum_file_size = SumFileSize + TotalSize }). -read_message(Guid, From, - State = #msstate { dedup_cache_ets = DedupCacheEts }) -> - case index_lookup_positive_ref_count(Guid, State) of - not_found -> - gen_server2:reply(From, not_found), - State; - MsgLocation -> - case fetch_and_increment_cache(DedupCacheEts, Guid) of - not_found -> read_message1(From, MsgLocation, State); - Msg -> gen_server2:reply(From, {ok, Msg}), - State - end +read_message(MsgId, From, State) -> + case index_lookup_positive_ref_count(MsgId, State) of + not_found -> gen_server2:reply(From, not_found), + State; + MsgLocation -> read_message1(From, MsgLocation, State) end. -read_message1(From, #msg_location { guid = Guid, ref_count = RefCount, - file = File, offset = Offset } = MsgLoc, +read_message1(From, #msg_location { msg_id = MsgId, file = File, + offset = Offset } = MsgLoc, State = #msstate { current_file = CurFile, current_file_handle = CurHdl, file_summary_ets = FileSummaryEts, - dedup_cache_ets = DedupCacheEts, cur_file_cache_ets = CurFileCacheEts }) -> case File =:= CurFile of true -> {Msg, State1} = %% can return [] if msg in file existed on startup - case ets:lookup(CurFileCacheEts, Guid) of + case ets:lookup(CurFileCacheEts, MsgId) of [] -> {ok, RawOffSet} = file_handle_cache:current_raw_offset(CurHdl), @@ -1004,10 +976,8 @@ read_message1(From, #msg_location { guid = Guid, ref_count = RefCount, true -> file_handle_cache:flush(CurHdl); false -> ok end, - read_from_disk(MsgLoc, State, DedupCacheEts); - [{Guid, Msg1, _CacheRefCount}] -> - ok = maybe_insert_into_cache( - DedupCacheEts, RefCount, Guid, Msg1), + read_from_disk(MsgLoc, State); + [{MsgId, Msg1, _CacheRefCount}] -> {Msg1, State} end, gen_server2:reply(From, {ok, Msg}), @@ -1015,56 +985,51 @@ read_message1(From, #msg_location { guid = Guid, ref_count = RefCount, false -> [#file_summary { locked = Locked }] = ets:lookup(FileSummaryEts, File), case Locked of - true -> add_to_pending_gc_completion({read, Guid, From}, + true -> add_to_pending_gc_completion({read, MsgId, From}, File, State); - false -> {Msg, State1} = - read_from_disk(MsgLoc, State, DedupCacheEts), + false -> {Msg, State1} = read_from_disk(MsgLoc, State), gen_server2:reply(From, {ok, Msg}), State1 end end. -read_from_disk(#msg_location { guid = Guid, ref_count = RefCount, - file = File, offset = Offset, - total_size = TotalSize }, - State, DedupCacheEts) -> +read_from_disk(#msg_location { msg_id = MsgId, file = File, offset = Offset, + total_size = TotalSize }, State) -> {Hdl, State1} = get_read_handle(File, State), {ok, Offset} = file_handle_cache:position(Hdl, Offset), - {ok, {Guid, Msg}} = + {ok, {MsgId, Msg}} = case rabbit_msg_file:read(Hdl, TotalSize) of - {ok, {Guid, _}} = Obj -> + {ok, {MsgId, _}} = Obj -> Obj; Rest -> {error, {misread, [{old_state, State}, {file_num, File}, {offset, Offset}, - {guid, Guid}, + {msg_id, MsgId}, {read, Rest}, {proc_dict, get()} ]}} end, - ok = maybe_insert_into_cache(DedupCacheEts, RefCount, Guid, Msg), {Msg, State1}. -contains_message(Guid, From, +contains_message(MsgId, From, State = #msstate { pending_gc_completion = Pending }) -> - case index_lookup_positive_ref_count(Guid, State) of + case index_lookup_positive_ref_count(MsgId, State) of not_found -> gen_server2:reply(From, false), State; #msg_location { file = File } -> case orddict:is_key(File, Pending) of true -> add_to_pending_gc_completion( - {contains, Guid, From}, File, State); + {contains, MsgId, From}, File, State); false -> gen_server2:reply(From, true), State end end. -remove_message(Guid, CRef, - State = #msstate { file_summary_ets = FileSummaryEts, - dedup_cache_ets = DedupCacheEts }) -> - case should_mask_action(CRef, Guid, State) of +remove_message(MsgId, CRef, + State = #msstate { file_summary_ets = FileSummaryEts }) -> + case should_mask_action(CRef, MsgId, State) of {true, _Location} -> State; {false_if_increment, #msg_location { ref_count = 0 }} -> @@ -1077,25 +1042,24 @@ remove_message(Guid, CRef, total_size = TotalSize }} when RefCount > 0 -> %% only update field, otherwise bad interaction with %% concurrent GC - Dec = - fun () -> index_update_ref_count(Guid, RefCount - 1, State) end, + Dec = fun () -> + index_update_ref_count(MsgId, RefCount - 1, State) + end, case RefCount of %% don't remove from CUR_FILE_CACHE_ETS_NAME here %% because there may be further writes in the mailbox %% for the same msg. - 1 -> ok = remove_cache_entry(DedupCacheEts, Guid), - case ets:lookup(FileSummaryEts, File) of + 1 -> case ets:lookup(FileSummaryEts, File) of [#file_summary { locked = true }] -> add_to_pending_gc_completion( - {remove, Guid, CRef}, File, State); + {remove, MsgId, CRef}, File, State); [#file_summary {}] -> ok = Dec(), delete_file_if_empty( File, adjust_valid_total_size(File, -TotalSize, State)) end; - _ -> ok = decrement_cache(DedupCacheEts, Guid), - ok = Dec(), + _ -> ok = Dec(), State end end. @@ -1115,12 +1079,12 @@ run_pending(Files, State) -> lists:reverse(orddict:fetch(File, Pending))) end, State, Files). -run_pending_action({read, Guid, From}, State) -> - read_message(Guid, From, State); -run_pending_action({contains, Guid, From}, State) -> - contains_message(Guid, From, State); -run_pending_action({remove, Guid, CRef}, State) -> - remove_message(Guid, CRef, State). +run_pending_action({read, MsgId, From}, State) -> + read_message(MsgId, From, State); +run_pending_action({contains, MsgId, From}, State) -> + contains_message(MsgId, From, State); +run_pending_action({remove, MsgId, CRef}, State) -> + remove_message(MsgId, CRef, State). safe_ets_update_counter(Tab, Key, UpdateOp, SuccessFun, FailThunk) -> try @@ -1142,44 +1106,46 @@ orddict_store(Key, Val, Dict) -> false = orddict:is_key(Key, Dict), orddict:store(Key, Val, Dict). -update_pending_confirms(Fun, CRef, State = #msstate { clients = Clients, - cref_to_guids = CTG }) -> +update_pending_confirms(Fun, CRef, + State = #msstate { clients = Clients, + cref_to_msg_ids = CTM }) -> case dict:fetch(CRef, Clients) of {undefined, _CloseFDsFun} -> State; - {MsgOnDiskFun, _CloseFDsFun} -> CTG1 = Fun(MsgOnDiskFun, CTG), - State #msstate { cref_to_guids = CTG1 } + {MsgOnDiskFun, _CloseFDsFun} -> CTM1 = Fun(MsgOnDiskFun, CTM), + State #msstate { + cref_to_msg_ids = CTM1 } end. -record_pending_confirm(CRef, Guid, State) -> +record_pending_confirm(CRef, MsgId, State) -> update_pending_confirms( - fun (_MsgOnDiskFun, CTG) -> - dict:update(CRef, fun (Guids) -> gb_sets:add(Guid, Guids) end, - gb_sets:singleton(Guid), CTG) + fun (_MsgOnDiskFun, CTM) -> + dict:update(CRef, fun (MsgIds) -> gb_sets:add(MsgId, MsgIds) end, + gb_sets:singleton(MsgId), CTM) end, CRef, State). -client_confirm(CRef, Guids, ActionTaken, State) -> +client_confirm(CRef, MsgIds, ActionTaken, State) -> update_pending_confirms( - fun (MsgOnDiskFun, CTG) -> - MsgOnDiskFun(Guids, ActionTaken), - case dict:find(CRef, CTG) of - {ok, Gs} -> Guids1 = gb_sets:difference(Gs, Guids), - case gb_sets:is_empty(Guids1) of - true -> dict:erase(CRef, CTG); - false -> dict:store(CRef, Guids1, CTG) + fun (MsgOnDiskFun, CTM) -> + MsgOnDiskFun(MsgIds, ActionTaken), + case dict:find(CRef, CTM) of + {ok, Gs} -> MsgIds1 = gb_sets:difference(Gs, MsgIds), + case gb_sets:is_empty(MsgIds1) of + true -> dict:erase(CRef, CTM); + false -> dict:store(CRef, MsgIds1, CTM) end; - error -> CTG + error -> CTM end end, CRef, State). -%% Detect whether the Guid is older or younger than the client's death +%% Detect whether the MsgId is older or younger than the client's death %% msg (if there is one). If the msg is older than the client death %% msg, and it has a 0 ref_count we must only alter the ref_count, not %% rewrite the msg - rewriting it would make it younger than the death %% msg and thus should be ignored. Note that this (correctly) returns %% false when testing to remove the death msg itself. -should_mask_action(CRef, Guid, +should_mask_action(CRef, MsgId, State = #msstate { dying_clients = DyingClients }) -> - case {sets:is_element(CRef, DyingClients), index_lookup(Guid, State)} of + case {sets:is_element(CRef, DyingClients), index_lookup(MsgId, State)} of {false, Location} -> {false, Location}; {true, not_found} -> @@ -1252,7 +1218,7 @@ safe_file_delete(File, Dir, FileHandlesEts) -> close_all_indicated(#client_msstate { file_handles_ets = FileHandlesEts, client_ref = Ref } = - CState) -> + CState) -> Objs = ets:match_object(FileHandlesEts, {{Ref, '_'}, close}), {ok, lists:foldl(fun ({Key = {_Ref, File}, close}, CStateM) -> true = ets:delete(FileHandlesEts, Key), @@ -1316,48 +1282,14 @@ list_sorted_file_names(Dir, Ext) -> %% message cache helper functions %%---------------------------------------------------------------------------- -maybe_insert_into_cache(DedupCacheEts, RefCount, Guid, Msg) - when RefCount > 1 -> - update_msg_cache(DedupCacheEts, Guid, Msg); -maybe_insert_into_cache(_DedupCacheEts, _RefCount, _Guid, _Msg) -> - ok. - -update_msg_cache(CacheEts, Guid, Msg) -> - case ets:insert_new(CacheEts, {Guid, Msg, 1}) of +update_msg_cache(CacheEts, MsgId, Msg) -> + case ets:insert_new(CacheEts, {MsgId, Msg, 1}) of true -> ok; false -> safe_ets_update_counter_ok( - CacheEts, Guid, {3, +1}, - fun () -> update_msg_cache(CacheEts, Guid, Msg) end) + CacheEts, MsgId, {3, +1}, + fun () -> update_msg_cache(CacheEts, MsgId, Msg) end) end. -remove_cache_entry(DedupCacheEts, Guid) -> - true = ets:delete(DedupCacheEts, Guid), - ok. - -fetch_and_increment_cache(DedupCacheEts, Guid) -> - case ets:lookup(DedupCacheEts, Guid) of - [] -> - not_found; - [{_Guid, Msg, _RefCount}] -> - safe_ets_update_counter_ok( - DedupCacheEts, Guid, {3, +1}, - %% someone has deleted us in the meantime, insert us - fun () -> ok = update_msg_cache(DedupCacheEts, Guid, Msg) end), - Msg - end. - -decrement_cache(DedupCacheEts, Guid) -> - true = safe_ets_update_counter( - DedupCacheEts, Guid, {3, -1}, - fun (N) when N =< 0 -> true = ets:delete(DedupCacheEts, Guid); - (_N) -> true - end, - %% Guid is not in there because although it's been - %% delivered, it's never actually been read (think: - %% persistent message held in RAM) - fun () -> true end), - ok. - %%---------------------------------------------------------------------------- %% index %%---------------------------------------------------------------------------- @@ -1460,8 +1392,8 @@ recover_file_summary(false, _Dir) -> recover_file_summary(true, Dir) -> Path = filename:join(Dir, ?FILE_SUMMARY_FILENAME), case ets:file2tab(Path) of - {ok, Tid} -> file:delete(Path), - {true, Tid}; + {ok, Tid} -> ok = file:delete(Path), + {true, Tid}; {error, _Error} -> recover_file_summary(false, Dir) end. @@ -1469,19 +1401,19 @@ count_msg_refs(Gen, Seed, State) -> case Gen(Seed) of finished -> ok; - {_Guid, 0, Next} -> + {_MsgId, 0, Next} -> count_msg_refs(Gen, Next, State); - {Guid, Delta, Next} -> - ok = case index_lookup(Guid, State) of + {MsgId, Delta, Next} -> + ok = case index_lookup(MsgId, State) of not_found -> - index_insert(#msg_location { guid = Guid, + index_insert(#msg_location { msg_id = MsgId, file = undefined, ref_count = Delta }, State); #msg_location { ref_count = RefCount } = StoreEntry -> NewRefCount = RefCount + Delta, case NewRefCount of - 0 -> index_delete(Guid, State); + 0 -> index_delete(MsgId, State); _ -> index_update(StoreEntry #msg_location { ref_count = NewRefCount }, State) @@ -1525,15 +1457,17 @@ scan_file_for_valid_messages(Dir, FileName) -> case open_file(Dir, FileName, ?READ_MODE) of {ok, Hdl} -> Valid = rabbit_msg_file:scan( Hdl, filelib:file_size( - form_filename(Dir, FileName))), - %% if something really bad has happened, - %% the close could fail, but ignore - file_handle_cache:close(Hdl), + form_filename(Dir, FileName)), + fun scan_fun/2, []), + ok = file_handle_cache:close(Hdl), Valid; {error, enoent} -> {ok, [], 0}; {error, Reason} -> {error, {unable_to_scan_file, FileName, Reason}} end. +scan_fun({MsgId, TotalSize, Offset, _Msg}, Acc) -> + [{MsgId, TotalSize, Offset} | Acc]. + %% Takes the list in *ascending* order (i.e. eldest message %% first). This is the opposite of what scan_file_for_valid_messages %% produces. The list of msgs that is produced is youngest first. @@ -1581,8 +1515,8 @@ build_index(Gatherer, Left, [], sum_file_size = SumFileSize }) -> case gatherer:out(Gatherer) of empty -> + unlink(Gatherer), ok = gatherer:stop(Gatherer), - ok = rabbit_misc:unlink_and_capture_exit(Gatherer), ok = index_delete_by_file(undefined, State), Offset = case ets:lookup(FileSummaryEts, Left) of [] -> 0; @@ -1611,8 +1545,8 @@ build_index_worker(Gatherer, State = #msstate { dir = Dir }, scan_file_for_valid_messages(Dir, filenum_to_name(File)), {ValidMessages, ValidTotalSize} = lists:foldl( - fun (Obj = {Guid, TotalSize, Offset}, {VMAcc, VTSAcc}) -> - case index_lookup(Guid, State) of + fun (Obj = {MsgId, TotalSize, Offset}, {VMAcc, VTSAcc}) -> + case index_lookup(MsgId, State) of #msg_location { file = undefined } = StoreEntry -> ok = index_update(StoreEntry #msg_location { file = File, offset = Offset, @@ -1630,7 +1564,7 @@ build_index_worker(Gatherer, State = #msstate { dir = Dir }, %% file size. [] -> {undefined, case ValidMessages of [] -> 0; - _ -> {_Guid, TotalSize, Offset} = + _ -> {_MsgId, TotalSize, Offset} = lists:last(ValidMessages), Offset + TotalSize end}; @@ -1685,8 +1619,8 @@ maybe_compact(State = #msstate { sum_valid_data = SumValid, pending_gc_completion = Pending, file_summary_ets = FileSummaryEts, file_size_limit = FileSizeLimit }) - when (SumFileSize > 2 * FileSizeLimit andalso - (SumFileSize - SumValid) / SumFileSize > ?GARBAGE_FRACTION) -> + when SumFileSize > 2 * FileSizeLimit andalso + (SumFileSize - SumValid) / SumFileSize > ?GARBAGE_FRACTION -> %% TODO: the algorithm here is sub-optimal - it may result in a %% complete traversal of FileSummaryEts. case ets:first(FileSummaryEts) of @@ -1749,10 +1683,10 @@ delete_file_if_empty(File, State = #msstate { locked = false }] = ets:lookup(FileSummaryEts, File), case ValidData of - 0 -> %% don't delete the file_summary_ets entry for File here - %% because we could have readers which need to be able to - %% decrement the readers count. - true = ets:update_element(FileSummaryEts, File, + %% don't delete the file_summary_ets entry for File here + %% because we could have readers which need to be able to + %% decrement the readers count. + 0 -> true = ets:update_element(FileSummaryEts, File, {#file_summary.locked, true}), ok = rabbit_msg_store_gc:delete(GCPid, File), Pending1 = orddict_store(File, [], Pending), @@ -1805,17 +1739,17 @@ combine_files(Source, Destination, dir = Dir, msg_store = Server }) -> [#file_summary { - readers = 0, - left = Destination, - valid_total_size = SourceValid, - file_size = SourceFileSize, - locked = true }] = ets:lookup(FileSummaryEts, Source), + readers = 0, + left = Destination, + valid_total_size = SourceValid, + file_size = SourceFileSize, + locked = true }] = ets:lookup(FileSummaryEts, Source), [#file_summary { - readers = 0, - right = Source, - valid_total_size = DestinationValid, - file_size = DestinationFileSize, - locked = true }] = ets:lookup(FileSummaryEts, Destination), + readers = 0, + right = Source, + valid_total_size = DestinationValid, + file_size = DestinationFileSize, + locked = true }] = ets:lookup(FileSummaryEts, Destination), SourceName = filenum_to_name(Source), DestinationName = filenum_to_name(Destination), @@ -1895,8 +1829,8 @@ load_and_vacuum_message_file(File, #gc_state { dir = Dir, scan_file_for_valid_messages(Dir, filenum_to_name(File)), %% foldl will reverse so will end up with msgs in ascending offset order lists:foldl( - fun ({Guid, TotalSize, Offset}, Acc = {List, Size}) -> - case Index:lookup(Guid, IndexState) of + fun ({MsgId, TotalSize, Offset}, Acc = {List, Size}) -> + case Index:lookup(MsgId, IndexState) of #msg_location { file = File, total_size = TotalSize, offset = Offset, ref_count = 0 } = Entry -> ok = Index:delete_object(Entry, IndexState), @@ -1921,13 +1855,13 @@ copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl, end, case lists:foldl( - fun (#msg_location { guid = Guid, offset = Offset, + fun (#msg_location { msg_id = MsgId, offset = Offset, total_size = TotalSize }, {CurOffset, Block = {BlockStart, BlockEnd}}) -> %% CurOffset is in the DestinationFile. %% Offset, BlockStart and BlockEnd are in the SourceFile %% update MsgLocation to reflect change of file and offset - ok = Index:update_fields(Guid, + ok = Index:update_fields(MsgId, [{#msg_location.file, Destination}, {#msg_location.offset, CurOffset}], IndexState), @@ -1958,3 +1892,54 @@ copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl, {got, FinalOffsetZ}, {destination, Destination}]} end. + +force_recovery(BaseDir, Store) -> + Dir = filename:join(BaseDir, atom_to_list(Store)), + case file:delete(filename:join(Dir, ?CLEAN_FILENAME)) of + ok -> ok; + {error, enoent} -> ok + end, + recover_crashed_compactions(BaseDir), + ok. + +foreach_file(D, Fun, Files) -> + [ok = Fun(filename:join(D, File)) || File <- Files]. + +foreach_file(D1, D2, Fun, Files) -> + [ok = Fun(filename:join(D1, File), filename:join(D2, File)) || File <- Files]. + +transform_dir(BaseDir, Store, TransformFun) -> + Dir = filename:join(BaseDir, atom_to_list(Store)), + TmpDir = filename:join(Dir, ?TRANSFORM_TMP), + TransformFile = fun (A, B) -> transform_msg_file(A, B, TransformFun) end, + CopyFile = fun (Src, Dst) -> {ok, _Bytes} = file:copy(Src, Dst), ok end, + case filelib:is_dir(TmpDir) of + true -> throw({error, transform_failed_previously}); + false -> FileList = list_sorted_file_names(Dir, ?FILE_EXTENSION), + foreach_file(Dir, TmpDir, TransformFile, FileList), + foreach_file(Dir, fun file:delete/1, FileList), + foreach_file(TmpDir, Dir, CopyFile, FileList), + foreach_file(TmpDir, fun file:delete/1, FileList), + ok = file:del_dir(TmpDir) + end. + +transform_msg_file(FileOld, FileNew, TransformFun) -> + ok = rabbit_misc:ensure_parent_dirs_exist(FileNew), + {ok, RefOld} = file_handle_cache:open(FileOld, [raw, binary, read], []), + {ok, RefNew} = file_handle_cache:open(FileNew, [raw, binary, write], + [{write_buffer, + ?HANDLE_CACHE_BUFFER_SIZE}]), + {ok, _Acc, _IgnoreSize} = + rabbit_msg_file:scan( + RefOld, filelib:file_size(FileOld), + fun({MsgId, _Size, _Offset, BinMsg}, ok) -> + {ok, MsgNew} = case binary_to_term(BinMsg) of + <<>> -> {ok, <<>>}; %% dying client marker + Msg -> TransformFun(Msg) + end, + {ok, _} = rabbit_msg_file:append(RefNew, MsgId, MsgNew), + ok + end, ok), + ok = file_handle_cache:close(RefOld), + ok = file_handle_cache:close(RefNew), + ok. diff --git a/src/rabbit_msg_store_ets_index.erl b/src/rabbit_msg_store_ets_index.erl index 077400d6..d6dc5568 100644 --- a/src/rabbit_msg_store_ets_index.erl +++ b/src/rabbit_msg_store_ets_index.erl @@ -31,7 +31,7 @@ new(Dir) -> file:delete(filename:join(Dir, ?FILENAME)), - Tid = ets:new(?MSG_LOC_NAME, [set, public, {keypos, #msg_location.guid}]), + Tid = ets:new(?MSG_LOC_NAME, [set, public, {keypos, #msg_location.msg_id}]), #state { table = Tid, dir = Dir }. recover(Dir) -> diff --git a/src/rabbit_multi.erl b/src/rabbit_multi.erl deleted file mode 100644 index ebd7fe8a..00000000 --- a/src/rabbit_multi.erl +++ /dev/null @@ -1,349 +0,0 @@ -%% The contents of this file are subject to the Mozilla Public License -%% Version 1.1 (the "License"); you may not use this file except in -%% compliance with the License. You may obtain a copy of the License -%% at http://www.mozilla.org/MPL/ -%% -%% Software distributed under the License is distributed on an "AS IS" -%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See -%% the License for the specific language governing rights and -%% limitations under the License. -%% -%% The Original Code is RabbitMQ. -%% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2011 VMware, Inc. All rights reserved. -%% - --module(rabbit_multi). --include("rabbit.hrl"). - --export([start/0, stop/0]). - --define(RPC_SLEEP, 500). - -%%---------------------------------------------------------------------------- - --ifdef(use_specs). - --spec(start/0 :: () -> no_return()). --spec(stop/0 :: () -> 'ok'). --spec(usage/0 :: () -> no_return()). - --endif. - -%%---------------------------------------------------------------------------- - -start() -> - RpcTimeout = - case init:get_argument(maxwait) of - {ok,[[N1]]} -> 1000 * list_to_integer(N1); - _ -> ?MAX_WAIT - end, - case init:get_plain_arguments() of - [] -> - usage(); - FullCommand -> - {Command, Args} = parse_args(FullCommand), - case catch action(Command, Args, RpcTimeout) of - ok -> - io:format("done.~n"), - halt(); - {'EXIT', {function_clause, [{?MODULE, action, _} | _]}} -> - print_error("invalid command '~s'", - [string:join(FullCommand, " ")]), - usage(); - timeout -> - print_error("timeout starting some nodes.", []), - halt(1); - Other -> - print_error("~p", [Other]), - halt(2) - end - end. - -print_error(Format, Args) -> - rabbit_misc:format_stderr("Error: " ++ Format ++ "~n", Args). - -parse_args([Command | Args]) -> - {list_to_atom(Command), Args}. - -stop() -> - ok. - -usage() -> - io:format("~s", [rabbit_multi_usage:usage()]), - halt(1). - -action(start_all, [NodeCount], RpcTimeout) -> - io:format("Starting all nodes...~n", []), - application:load(rabbit), - {_NodeNamePrefix, NodeHost} = NodeName = rabbit_misc:nodeparts( - getenv("RABBITMQ_NODENAME")), - case net_adm:names(NodeHost) of - {error, EpmdReason} -> - throw({cannot_connect_to_epmd, NodeHost, EpmdReason}); - {ok, _} -> - ok - end, - {NodePids, Running} = - case list_to_integer(NodeCount) of - 1 -> {NodePid, Started} = start_node(rabbit_misc:makenode(NodeName), - RpcTimeout), - {[NodePid], Started}; - N -> start_nodes(N, N, [], true, NodeName, - get_node_tcp_listener(), RpcTimeout) - end, - write_pids_file(NodePids), - case Running of - true -> ok; - false -> timeout - end; - -action(status, [], RpcTimeout) -> - io:format("Status of all running nodes...~n", []), - call_all_nodes( - fun ({Node, Pid}) -> - RabbitRunning = - case is_rabbit_running(Node, RpcTimeout) of - false -> not_running; - true -> running - end, - io:format("Node '~p' with Pid ~p: ~p~n", - [Node, Pid, RabbitRunning]) - end); - -action(stop_all, [], RpcTimeout) -> - io:format("Stopping all nodes...~n", []), - call_all_nodes(fun ({Node, Pid}) -> - io:format("Stopping node ~p~n", [Node]), - rpc:call(Node, rabbit, stop_and_halt, []), - case kill_wait(Pid, RpcTimeout, false) of - false -> kill_wait(Pid, RpcTimeout, true); - true -> ok - end, - io:format("OK~n", []) - end), - delete_pids_file(); - -action(rotate_logs, [], RpcTimeout) -> - action(rotate_logs, [""], RpcTimeout); - -action(rotate_logs, [Suffix], RpcTimeout) -> - io:format("Rotating logs for all nodes...~n", []), - BinarySuffix = list_to_binary(Suffix), - call_all_nodes( - fun ({Node, _}) -> - io:format("Rotating logs for node ~p", [Node]), - case rpc:call(Node, rabbit, rotate_logs, - [BinarySuffix], RpcTimeout) of - {badrpc, Error} -> io:format(": ~p.~n", [Error]); - ok -> io:format(": ok.~n", []) - end - end). - -%% PNodePid is the list of PIDs -%% Running is a boolean exhibiting success at some moment -start_nodes(0, _, PNodePid, Running, _, _, _) -> {PNodePid, Running}; - -start_nodes(N, Total, PNodePid, Running, NodeNameBase, Listener, RpcTimeout) -> - {NodePre, NodeSuff} = NodeNameBase, - NodeNumber = Total - N, - NodePre1 = case NodeNumber of - %% For compatibility with running a single node - 0 -> NodePre; - _ -> NodePre ++ "_" ++ integer_to_list(NodeNumber) - end, - Node = rabbit_misc:makenode({NodePre1, NodeSuff}), - os:putenv("RABBITMQ_NODENAME", atom_to_list(Node)), - case Listener of - {NodeIpAddress, NodePortBase} -> - NodePort = NodePortBase + NodeNumber, - os:putenv("RABBITMQ_NODE_PORT", integer_to_list(NodePort)), - os:putenv("RABBITMQ_NODE_IP_ADDRESS", NodeIpAddress); - undefined -> - ok - end, - {NodePid, Started} = start_node(Node, RpcTimeout), - start_nodes(N - 1, Total, [NodePid | PNodePid], - Started and Running, NodeNameBase, Listener, RpcTimeout). - -start_node(Node, RpcTimeout) -> - io:format("Starting node ~s...~n", [Node]), - case rpc:call(Node, os, getpid, []) of - {badrpc, _} -> - Port = run_rabbitmq_server(), - Started = wait_for_rabbit_to_start(Node, RpcTimeout, Port), - Pid = case rpc:call(Node, os, getpid, []) of - {badrpc, _} -> throw(cannot_get_pid); - PidS -> list_to_integer(PidS) - end, - io:format("~s~n", [case Started of - true -> "OK"; - false -> "timeout" - end]), - {{Node, Pid}, Started}; - PidS -> - Pid = list_to_integer(PidS), - throw({node_already_running, Node, Pid}) - end. - -wait_for_rabbit_to_start(_ , RpcTimeout, _) when RpcTimeout < 0 -> - false; -wait_for_rabbit_to_start(Node, RpcTimeout, Port) -> - case is_rabbit_running(Node, RpcTimeout) of - true -> true; - false -> receive - {'EXIT', Port, PosixCode} -> - throw({node_start_failed, PosixCode}) - after ?RPC_SLEEP -> - wait_for_rabbit_to_start( - Node, RpcTimeout - ?RPC_SLEEP, Port) - end - end. - -run_rabbitmq_server() -> - with_os([{unix, fun run_rabbitmq_server_unix/0}, - {win32, fun run_rabbitmq_server_win32/0}]). - -run_rabbitmq_server_unix() -> - CmdLine = getenv("RABBITMQ_SCRIPT_HOME") ++ "/rabbitmq-server -noinput", - erlang:open_port({spawn, CmdLine}, [nouse_stdio]). - -run_rabbitmq_server_win32() -> - Cmd = filename:nativename(os:find_executable("cmd")), - CmdLine = "\"" ++ getenv("RABBITMQ_SCRIPT_HOME") ++ - "\\rabbitmq-server.bat\" -noinput -detached", - erlang:open_port({spawn_executable, Cmd}, - [{arg0, Cmd}, {args, ["/q", "/s", "/c", CmdLine]}, - nouse_stdio]). - -is_rabbit_running(Node, RpcTimeout) -> - case rpc:call(Node, rabbit, status, [], RpcTimeout) of - {badrpc, _} -> false; - Status -> case proplists:get_value(running_applications, Status) of - undefined -> false; - Apps -> lists:keymember(rabbit, 1, Apps) - end - end. - -with_os(Handlers) -> - {OsFamily, _} = os:type(), - case proplists:get_value(OsFamily, Handlers) of - undefined -> throw({unsupported_os, OsFamily}); - Handler -> Handler() - end. - -pids_file() -> getenv("RABBITMQ_PIDS_FILE"). - -write_pids_file(Pids) -> - FileName = pids_file(), - Handle = case file:open(FileName, [write]) of - {ok, Device} -> - Device; - {error, Reason} -> - throw({cannot_create_pids_file, FileName, Reason}) - end, - try - ok = io:write(Handle, Pids), - ok = io:put_chars(Handle, [$.]) - after - case file:close(Handle) of - ok -> ok; - {error, Reason1} -> - throw({cannot_create_pids_file, FileName, Reason1}) - end - end, - ok. - -delete_pids_file() -> - FileName = pids_file(), - case file:delete(FileName) of - ok -> ok; - {error, enoent} -> ok; - {error, Reason} -> throw({cannot_delete_pids_file, FileName, Reason}) - end. - -read_pids_file() -> - FileName = pids_file(), - case file:consult(FileName) of - {ok, [Pids]} -> Pids; - {error, enoent} -> []; - {error, Reason} -> throw({cannot_read_pids_file, FileName, Reason}) - end. - -kill_wait(Pid, TimeLeft, Forceful) when TimeLeft < 0 -> - Cmd = with_os([{unix, fun () -> if Forceful -> "kill -9"; - true -> "kill" - end - end}, - %% Kill forcefully always on Windows, since erl.exe - %% seems to completely ignore non-forceful killing - %% even when everything is working - {win32, fun () -> "taskkill /f /pid" end}]), - os:cmd(Cmd ++ " " ++ integer_to_list(Pid)), - false; % Don't assume what we did just worked! - -% Returns true if the process is dead, false otherwise. -kill_wait(Pid, TimeLeft, Forceful) -> - timer:sleep(?RPC_SLEEP), - io:format(".", []), - is_dead(Pid) orelse kill_wait(Pid, TimeLeft - ?RPC_SLEEP, Forceful). - -% Test using some OS clunkiness since we shouldn't trust -% rpc:call(os, getpid, []) at this point -is_dead(Pid) -> - PidS = integer_to_list(Pid), - with_os([{unix, fun () -> - system("kill -0 " ++ PidS - ++ " >/dev/null 2>&1") /= 0 - end}, - {win32, fun () -> - Res = os:cmd("tasklist /nh /fi \"pid eq " ++ - PidS ++ "\" 2>&1"), - case re:run(Res, "erl\\.exe", [{capture, none}]) of - match -> false; - _ -> true - end - end}]). - -% Like system(3) -system(Cmd) -> - ShCmd = "sh -c '" ++ escape_quotes(Cmd) ++ "'", - Port = erlang:open_port({spawn, ShCmd}, [exit_status,nouse_stdio]), - receive {Port, {exit_status, Status}} -> Status end. - -% Escape the quotes in a shell command so that it can be used in "sh -c 'cmd'" -escape_quotes(Cmd) -> - lists:flatten(lists:map(fun ($') -> "'\\''"; (Ch) -> Ch end, Cmd)). - -call_all_nodes(Func) -> - case read_pids_file() of - [] -> throw(no_nodes_running); - NodePids -> lists:foreach(Func, NodePids) - end. - -getenv(Var) -> - case os:getenv(Var) of - false -> throw({missing_env_var, Var}); - Value -> Value - end. - -get_node_tcp_listener() -> - try - {getenv("RABBITMQ_NODE_IP_ADDRESS"), - list_to_integer(getenv("RABBITMQ_NODE_PORT"))} - catch _ -> - case application:get_env(rabbit, tcp_listeners) of - {ok, [{_IpAddy, _Port} = Listener]} -> - Listener; - {ok, [Port]} when is_number(Port) -> - {"0.0.0.0", Port}; - {ok, []} -> - undefined; - {ok, Other} -> - throw({cannot_start_multiple_nodes, multiple_tcp_listeners, - Other}); - undefined -> - throw({missing_configuration, tcp_listeners}) - end - end. diff --git a/src/rabbit_net.erl b/src/rabbit_net.erl index c500548a..b944ec81 100644 --- a/src/rabbit_net.erl +++ b/src/rabbit_net.erl @@ -18,7 +18,7 @@ -include("rabbit.hrl"). -export([is_ssl/1, ssl_info/1, controlling_process/2, getstat/2, - async_recv/3, port_command/2, send/2, close/1, + recv/1, async_recv/3, port_command/2, setopts/2, send/2, close/1, sockname/1, peername/1, peercert/1]). %%--------------------------------------------------------------------------- @@ -28,8 +28,8 @@ -export_type([socket/0]). -type(stat_option() :: - 'recv_cnt' | 'recv_max' | 'recv_avg' | 'recv_oct' | 'recv_dvi' | - 'send_cnt' | 'send_max' | 'send_avg' | 'send_oct' | 'send_pend'). + 'recv_cnt' | 'recv_max' | 'recv_avg' | 'recv_oct' | 'recv_dvi' | + 'send_cnt' | 'send_max' | 'send_avg' | 'send_oct' | 'send_pend'). -type(ok_val_or_error(A) :: rabbit_types:ok_or_error2(A, any())). -type(ok_or_any_error() :: rabbit_types:ok_or_error(any())). -type(socket() :: port() | #ssl_socket{}). @@ -42,9 +42,15 @@ -spec(getstat/2 :: (socket(), [stat_option()]) -> ok_val_or_error([{stat_option(), integer()}])). +-spec(recv/1 :: (socket()) -> + {'data', [char()] | binary()} | 'closed' | + rabbit_types:error(any()) | {'other', any()}). -spec(async_recv/3 :: (socket(), integer(), timeout()) -> rabbit_types:ok(any())). -spec(port_command/2 :: (socket(), iolist()) -> 'true'). +-spec(setopts/2 :: (socket(), [{atom(), any()} | + {raw, non_neg_integer(), non_neg_integer(), + binary()}]) -> ok_or_any_error()). -spec(send/2 :: (socket(), binary() | iolist()) -> ok_or_any_error()). -spec(close/1 :: (socket()) -> ok_or_any_error()). -spec(sockname/1 :: @@ -80,6 +86,19 @@ getstat(Sock, Stats) when ?IS_SSL(Sock) -> getstat(Sock, Stats) when is_port(Sock) -> inet:getstat(Sock, Stats). +recv(Sock) when ?IS_SSL(Sock) -> + recv(Sock#ssl_socket.ssl, {ssl, ssl_closed, ssl_error}); +recv(Sock) when is_port(Sock) -> + recv(Sock, {tcp, tcp_closed, tcp_error}). + +recv(S, {DataTag, ClosedTag, ErrorTag}) -> + receive + {DataTag, S, Data} -> {data, Data}; + {ClosedTag, S} -> closed; + {ErrorTag, S, Reason} -> {error, Reason}; + Other -> {other, Other} + end. + async_recv(Sock, Length, Timeout) when ?IS_SSL(Sock) -> Pid = self(), Ref = make_ref(), @@ -103,6 +122,11 @@ port_command(Sock, Data) when ?IS_SSL(Sock) -> port_command(Sock, Data) when is_port(Sock) -> erlang:port_command(Sock, Data). +setopts(Sock, Options) when ?IS_SSL(Sock) -> + ssl:setopts(Sock#ssl_socket.ssl, Options); +setopts(Sock, Options) when is_port(Sock) -> + inet:setopts(Sock, Options). + send(Sock, Data) when ?IS_SSL(Sock) -> ssl:send(Sock#ssl_socket.ssl, Data); send(Sock, Data) when is_port(Sock) -> gen_tcp:send(Sock, Data). diff --git a/src/rabbit_networking.erl b/src/rabbit_networking.erl index 283d25c7..451e56e8 100644 --- a/src/rabbit_networking.erl +++ b/src/rabbit_networking.erl @@ -24,7 +24,8 @@ close_connection/2]). %%used by TCP-based transports, e.g. STOMP adapter --export([check_tcp_listener_address/2]). +-export([check_tcp_listener_address/2, + ensure_ssl/0, ssl_transform_fun/1]). -export([tcp_listener_started/3, tcp_listener_stopped/3, start_client/1, start_ssl_client/2]). @@ -32,16 +33,6 @@ -include("rabbit.hrl"). -include_lib("kernel/include/inet.hrl"). --define(RABBIT_TCP_OPTS, [ - binary, - {packet, raw}, % no packaging - {reuseaddr, true}, % allow rebind without waiting - {backlog, 128}, % use the maximum listen(2) backlog value - %% {nodelay, true}, % TCP_NODELAY - disable Nagle's alg. - %% {delay_send, true}, - {exit_on_close, false} - ]). - -define(SSL_TIMEOUT, 5). %% seconds -define(FIRST_TEST_BIND_PORT, 10000). @@ -52,6 +43,9 @@ -export_type([ip_port/0, hostname/0]). +-type(hostname() :: inet:hostname()). +-type(ip_port() :: inet:ip_port()). + -type(family() :: atom()). -type(listener_config() :: ip_port() | {hostname(), ip_port()} | @@ -98,19 +92,8 @@ boot_ssl() -> {ok, []} -> ok; {ok, SslListeners} -> - ok = rabbit_misc:start_applications([crypto, public_key, ssl]), - {ok, SslOptsConfig} = application:get_env(ssl_options), - % unknown_ca errors are silently ignored prior to R14B unless we - % supply this verify_fun - remove when at least R14B is required - SslOpts = - case proplists:get_value(verify, SslOptsConfig, verify_none) of - verify_none -> SslOptsConfig; - verify_peer -> [{verify_fun, fun([]) -> true; - ([_|_]) -> false - end} - | SslOptsConfig] - end, - [start_ssl_listener(Listener, SslOpts) || Listener <- SslListeners], + [start_ssl_listener(Listener, ensure_ssl()) + || Listener <- SslListeners], ok end. @@ -157,6 +140,34 @@ resolve_family({_,_,_,_,_,_,_,_}, auto) -> inet6; resolve_family(IP, auto) -> throw({error, {strange_family, IP}}); resolve_family(_, F) -> F. +ensure_ssl() -> + ok = rabbit_misc:start_applications([crypto, public_key, ssl]), + {ok, SslOptsConfig} = application:get_env(rabbit, ssl_options), + + % unknown_ca errors are silently ignored prior to R14B unless we + % supply this verify_fun - remove when at least R14B is required + case proplists:get_value(verify, SslOptsConfig, verify_none) of + verify_none -> SslOptsConfig; + verify_peer -> [{verify_fun, fun([]) -> true; + ([_|_]) -> false + end} + | SslOptsConfig] + end. + +ssl_transform_fun(SslOpts) -> + fun (Sock) -> + case catch ssl:ssl_accept(Sock, SslOpts, ?SSL_TIMEOUT * 1000) of + {ok, SslSock} -> + rabbit_log:info("upgraded TCP connection ~p to SSL~n", + [self()]), + {ok, #ssl_socket{tcp = Sock, ssl = SslSock}}; + {error, Reason} -> + {error, {ssl_upgrade_error, Reason}}; + {'EXIT', Reason} -> + {error, {ssl_upgrade_failure, Reason}} + end + end. + check_tcp_listener_address(NamePrefix, Port) when is_integer(Port) -> check_tcp_listener_address_auto(NamePrefix, Port); @@ -200,7 +211,7 @@ start_listener0({IPAddress, Port, Family, Name}, Protocol, Label, OnConnect) -> rabbit_sup, {Name, {tcp_listener_sup, start_link, - [IPAddress, Port, [Family | ?RABBIT_TCP_OPTS], + [IPAddress, Port, [Family | tcp_opts()], {?MODULE, tcp_listener_started, [Protocol]}, {?MODULE, tcp_listener_stopped, [Protocol]}, OnConnect, Label]}, @@ -256,21 +267,7 @@ start_client(Sock) -> start_client(Sock, fun (S) -> {ok, S} end). start_ssl_client(SslOpts, Sock) -> - start_client( - Sock, - fun (Sock1) -> - case catch ssl:ssl_accept(Sock1, SslOpts, ?SSL_TIMEOUT * 1000) of - {ok, SslSock} -> - rabbit_log:info("upgraded TCP connection ~p to SSL~n", - [self()]), - {ok, #ssl_socket{tcp = Sock1, ssl = SslSock}}; - {error, Reason} -> - {error, {ssl_upgrade_error, Reason}}; - {'EXIT', Reason} -> - {error, {ssl_upgrade_failure, Reason}} - - end - end). + start_client(Sock, ssl_transform_fun(SslOpts)). connections() -> [rabbit_connection_sup:reader(ConnSup) || @@ -315,6 +312,10 @@ hostname() -> cmap(F) -> rabbit_misc:filter_exit_map(F, connections()). +tcp_opts() -> + {ok, Opts} = application:get_env(rabbit, tcp_listen_options), + Opts. + %%-------------------------------------------------------------------- %% There are three kinds of machine (for our purposes). diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl index e4bc1cdc..1f30a2fc 100644 --- a/src/rabbit_node_monitor.erl +++ b/src/rabbit_node_monitor.erl @@ -22,14 +22,41 @@ -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]). +-export([notify_cluster/0, rabbit_running_on/1]). -define(SERVER, ?MODULE). +-define(RABBIT_UP_RPC_TIMEOUT, 2000). + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-spec(rabbit_running_on/1 :: (node()) -> 'ok'). +-spec(notify_cluster/0 :: () -> 'ok'). + +-endif. %%-------------------------------------------------------------------- start_link() -> gen_server:start_link({local, ?SERVER}, ?MODULE, [], []). +rabbit_running_on(Node) -> + gen_server:cast(rabbit_node_monitor, {rabbit_running_on, Node}). + +notify_cluster() -> + Node = node(), + Nodes = rabbit_mnesia:running_clustered_nodes() -- [Node], + %% notify other rabbits of this rabbit + case rpc:multicall(Nodes, rabbit_node_monitor, rabbit_running_on, + [Node], ?RABBIT_UP_RPC_TIMEOUT) of + {_, [] } -> ok; + {_, Bad} -> rabbit_log:info("failed to contact nodes ~p~n", [Bad]) + end, + %% register other active rabbits with this rabbit + [ rabbit_node_monitor:rabbit_running_on(N) || N <- Nodes ], + ok. + %%-------------------------------------------------------------------- init([]) -> @@ -39,19 +66,21 @@ init([]) -> handle_call(_Request, _From, State) -> {noreply, State}. +handle_cast({rabbit_running_on, Node}, State) -> + rabbit_log:info("node ~p up~n", [Node]), + erlang:monitor(process, {rabbit, Node}), + ok = rabbit_alarm:on_node_up(Node), + {noreply, State}; handle_cast(_Msg, State) -> {noreply, State}. -handle_info({nodeup, Node}, State) -> - rabbit_log:info("node ~p up", [Node]), - {noreply, State}; handle_info({nodedown, Node}, State) -> - rabbit_log:info("node ~p down", [Node]), - %% TODO: This may turn out to be a performance hog when there are - %% lots of nodes. We really only need to execute this code on - %% *one* node, rather than all of them. - ok = rabbit_networking:on_node_down(Node), - ok = rabbit_amqqueue:on_node_down(Node), + rabbit_log:info("node ~p down~n", [Node]), + ok = handle_dead_rabbit(Node), + {noreply, State}; +handle_info({'DOWN', _MRef, process, {rabbit, Node}, _Reason}, State) -> + rabbit_log:info("node ~p lost 'rabbit'~n", [Node]), + ok = handle_dead_rabbit(Node), {noreply, State}; handle_info(_Info, State) -> {noreply, State}. @@ -64,3 +93,10 @@ code_change(_OldVsn, State, _Extra) -> %%-------------------------------------------------------------------- +%% TODO: This may turn out to be a performance hog when there are lots +%% of nodes. We really only need to execute some of these statements +%% on *one* node, rather than all of them. +handle_dead_rabbit(Node) -> + ok = rabbit_networking:on_node_down(Node), + ok = rabbit_amqqueue:on_node_down(Node), + ok = rabbit_alarm:on_node_down(Node). diff --git a/src/rabbit_prelaunch.erl b/src/rabbit_prelaunch.erl index d9d92788..92829e49 100644 --- a/src/rabbit_prelaunch.erl +++ b/src/rabbit_prelaunch.erl @@ -67,11 +67,15 @@ start() -> AppVersions}, %% Write it out to $RABBITMQ_PLUGINS_EXPAND_DIR/rabbit.rel - file:write_file(RootName ++ ".rel", io_lib:format("~p.~n", [RDesc])), + rabbit_misc:write_file(RootName ++ ".rel", io_lib:format("~p.~n", [RDesc])), + + %% We exclude mochiweb due to its optional use of fdsrv. + XRefExclude = [mochiweb], %% Compile the script ScriptFile = RootName ++ ".script", - case systools:make_script(RootName, [local, silent, exref]) of + case systools:make_script(RootName, [local, silent, + {exref, AllApps -- XRefExclude}]) of {ok, Module, Warnings} -> %% This gets lots of spurious no-source warnings when we %% have .ez files, so we want to supress them to prevent @@ -93,7 +97,8 @@ start() -> end]), case length(WarningStr) of 0 -> ok; - _ -> io:format("~s", [WarningStr]) + _ -> S = string:copies("*", 80), + io:format("~n~s~n~s~s~n~n", [S, WarningStr, S]) end, ok; {error, Module, Error} -> @@ -235,7 +240,7 @@ post_process_script(ScriptFile) -> {error, {failed_to_load_script, Reason}} end. -process_entry(Entry = {apply,{application,start_boot,[rabbit,permanent]}}) -> +process_entry(Entry = {apply,{application,start_boot,[mnesia,permanent]}}) -> [{apply,{rabbit,prepare,[]}}, Entry]; process_entry(Entry) -> [Entry]. @@ -250,16 +255,21 @@ duplicate_node_check(NodeStr) -> case net_adm:names(NodeHost) of {ok, NamePorts} -> case proplists:is_defined(NodeName, NamePorts) of - true -> io:format("node with name ~p " - "already running on ~p~n", - [NodeName, NodeHost]), - [io:format(Fmt ++ "~n", Args) || - {Fmt, Args} <- rabbit_control:diagnostics(Node)], - terminate(?ERROR_CODE); - false -> ok + true -> io:format("node with name ~p " + "already running on ~p~n", + [NodeName, NodeHost]), + [io:format(Fmt ++ "~n", Args) || + {Fmt, Args} <- rabbit_control:diagnostics(Node)], + terminate(?ERROR_CODE); + false -> ok end; - {error, EpmdReason} -> terminate("unexpected epmd error: ~p~n", - [EpmdReason]) + {error, EpmdReason} -> + terminate("epmd error for host ~p: ~p (~s)~n", + [NodeHost, EpmdReason, + case EpmdReason of + address -> "unable to establish tcp connection"; + _ -> inet:format_error(EpmdReason) + end]) end. terminate(Fmt, Args) -> diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl index 76b1136f..bf89cdb2 100644 --- a/src/rabbit_queue_index.erl +++ b/src/rabbit_queue_index.erl @@ -76,17 +76,16 @@ %% the segment file combined with the journal, no writing needs to be %% done to the segment file either (in fact it is deleted if it exists %% at all). This is safe given that the set of acks is a subset of the -%% set of publishes. When it's necessary to sync messages because of -%% transactions, it's only necessary to fsync on the journal: when -%% entries are distributed from the journal to segment files, those -%% segments appended to are fsync'd prior to the journal being -%% truncated. +%% set of publishes. When it is necessary to sync messages, it is +%% sufficient to fsync on the journal: when entries are distributed +%% from the journal to segment files, those segments appended to are +%% fsync'd prior to the journal being truncated. %% %% This module is also responsible for scanning the queue index files %% and seeding the message store on start up. %% %% Note that in general, the representation of a message's state as -%% the tuple: {('no_pub'|{Guid, MsgProps, IsPersistent}), +%% the tuple: {('no_pub'|{MsgId, MsgProps, IsPersistent}), %% ('del'|'no_del'), ('ack'|'no_ack')} is richer than strictly %% necessary for most operations. However, for startup, and to ensure %% the safe and correct combination of journal entries with entries @@ -126,31 +125,33 @@ %% (range: 0 - 16383) -define(REL_SEQ_ONLY_PREFIX, 00). -define(REL_SEQ_ONLY_PREFIX_BITS, 2). --define(REL_SEQ_ONLY_ENTRY_LENGTH_BYTES, 2). +-define(REL_SEQ_ONLY_RECORD_BYTES, 2). %% publish record is binary 1 followed by a bit for is_persistent, %% then 14 bits of rel seq id, 64 bits for message expiry and 128 bits %% of md5sum msg id --define(PUBLISH_PREFIX, 1). --define(PUBLISH_PREFIX_BITS, 1). +-define(PUB_PREFIX, 1). +-define(PUB_PREFIX_BITS, 1). -define(EXPIRY_BYTES, 8). -define(EXPIRY_BITS, (?EXPIRY_BYTES * 8)). -define(NO_EXPIRY, 0). --define(GUID_BYTES, 16). %% md5sum is 128 bit or 16 bytes --define(GUID_BITS, (?GUID_BYTES * 8)). -%% 16 bytes for md5sum + 8 for expiry + 2 for seq, bits and prefix --define(PUBLISH_RECORD_LENGTH_BYTES, ?GUID_BYTES + ?EXPIRY_BYTES + 2). +-define(MSG_ID_BYTES, 16). %% md5sum is 128 bit or 16 bytes +-define(MSG_ID_BITS, (?MSG_ID_BYTES * 8)). + +%% 16 bytes for md5sum + 8 for expiry +-define(PUB_RECORD_BODY_BYTES, (?MSG_ID_BYTES + ?EXPIRY_BYTES)). +%% + 2 for seq, bits and prefix +-define(PUB_RECORD_BYTES, (?PUB_RECORD_BODY_BYTES + 2)). %% 1 publish, 1 deliver, 1 ack per msg -define(SEGMENT_TOTAL_SIZE, ?SEGMENT_ENTRY_COUNT * - (?PUBLISH_RECORD_LENGTH_BYTES + - (2 * ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES))). + (?PUB_RECORD_BYTES + (2 * ?REL_SEQ_ONLY_RECORD_BYTES))). %% ---- misc ---- --define(PUB, {_, _, _}). %% {Guid, MsgProps, IsPersistent} +-define(PUB, {_, _, _}). %% {MsgId, MsgProps, IsPersistent} -define(READ_MODE, [binary, raw, read]). -define(READ_AHEAD_MODE, [{read_ahead, ?SEGMENT_TOTAL_SIZE} | ?READ_MODE]). @@ -159,7 +160,7 @@ %%---------------------------------------------------------------------------- -record(qistate, { dir, segments, journal_handle, dirty_count, - max_journal_entries, on_sync, unsynced_guids }). + max_journal_entries, on_sync, unsynced_msg_ids }). -record(segment, { num, path, journal_entries, unacked }). @@ -167,7 +168,7 @@ %%---------------------------------------------------------------------------- --rabbit_upgrade({add_queue_ttl, []}). +-rabbit_upgrade({add_queue_ttl, local, []}). -ifdef(use_specs). @@ -177,7 +178,7 @@ path :: file:filename(), journal_entries :: array(), unacked :: non_neg_integer() - })). + })). -type(seq_id() :: integer()). -type(seg_dict() :: {dict(), [segment()]}). -type(on_sync_fun() :: fun ((gb_set()) -> ok)). @@ -187,21 +188,21 @@ dirty_count :: integer(), max_journal_entries :: non_neg_integer(), on_sync :: on_sync_fun(), - unsynced_guids :: [rabbit_guid:guid()] - }). --type(startup_fun_state() :: - {fun ((A) -> 'finished' | {rabbit_guid:guid(), non_neg_integer(), A}), - A}). + unsynced_msg_ids :: [rabbit_types:msg_id()] + }). +-type(contains_predicate() :: fun ((rabbit_types:msg_id()) -> boolean())). +-type(walker(A) :: fun ((A) -> 'finished' | + {rabbit_types:msg_id(), non_neg_integer(), A})). -type(shutdown_terms() :: [any()]). -spec(init/2 :: (rabbit_amqqueue:name(), on_sync_fun()) -> qistate()). -spec(shutdown_terms/1 :: (rabbit_amqqueue:name()) -> shutdown_terms()). -spec(recover/5 :: (rabbit_amqqueue:name(), shutdown_terms(), boolean(), - fun ((rabbit_guid:guid()) -> boolean()), on_sync_fun()) -> - {'undefined' | non_neg_integer(), qistate()}). + contains_predicate(), on_sync_fun()) -> + {'undefined' | non_neg_integer(), qistate()}). -spec(terminate/2 :: ([any()], qistate()) -> qistate()). -spec(delete_and_terminate/1 :: (qistate()) -> qistate()). --spec(publish/5 :: (rabbit_guid:guid(), seq_id(), +-spec(publish/5 :: (rabbit_types:msg_id(), seq_id(), rabbit_types:message_properties(), boolean(), qistate()) -> qistate()). -spec(deliver/2 :: ([seq_id()], qistate()) -> qistate()). @@ -209,14 +210,13 @@ -spec(sync/2 :: ([seq_id()], qistate()) -> qistate()). -spec(flush/1 :: (qistate()) -> qistate()). -spec(read/3 :: (seq_id(), seq_id(), qistate()) -> - {[{rabbit_guid:guid(), seq_id(), + {[{rabbit_types:msg_id(), seq_id(), rabbit_types:message_properties(), boolean(), boolean()}], qistate()}). -spec(next_segment_boundary/1 :: (seq_id()) -> seq_id()). -spec(bounds/1 :: (qistate()) -> - {non_neg_integer(), non_neg_integer(), qistate()}). --spec(recover/1 :: ([rabbit_amqqueue:name()]) -> - {[[any()]], startup_fun_state()}). + {non_neg_integer(), non_neg_integer(), qistate()}). +-spec(recover/1 :: ([rabbit_amqqueue:name()]) -> {[[any()]], {walker(A), A}}). -spec(add_queue_ttl/0 :: () -> 'ok'). @@ -259,22 +259,22 @@ delete_and_terminate(State) -> ok = rabbit_misc:recursive_delete([Dir]), State1. -publish(Guid, SeqId, MsgProps, IsPersistent, - State = #qistate { unsynced_guids = UnsyncedGuids }) - when is_binary(Guid) -> - ?GUID_BYTES = size(Guid), +publish(MsgId, SeqId, MsgProps, IsPersistent, + State = #qistate { unsynced_msg_ids = UnsyncedMsgIds }) + when is_binary(MsgId) -> + ?MSG_ID_BYTES = size(MsgId), {JournalHdl, State1} = get_journal_handle( State #qistate { - unsynced_guids = [Guid | UnsyncedGuids] }), + unsynced_msg_ids = [MsgId | UnsyncedMsgIds] }), ok = file_handle_cache:append( JournalHdl, [<<(case IsPersistent of true -> ?PUB_PERSIST_JPREFIX; false -> ?PUB_TRANS_JPREFIX end):?JPREFIX_BITS, SeqId:?SEQ_BITS>>, - create_pub_record_body(Guid, MsgProps)]), + create_pub_record_body(MsgId, MsgProps)]), maybe_flush_journal( - add_to_journal(SeqId, {Guid, MsgProps, IsPersistent}, State1)). + add_to_journal(SeqId, {MsgId, MsgProps, IsPersistent}, State1)). deliver(SeqIds, State) -> deliver_or_ack(del, SeqIds, State). @@ -284,18 +284,17 @@ ack(SeqIds, State) -> %% This is only called when there are outstanding confirms and the %% queue is idle. -sync(State = #qistate { unsynced_guids = Guids }) -> - sync_if([] =/= Guids, State). +sync(State = #qistate { unsynced_msg_ids = MsgIds }) -> + sync_if([] =/= MsgIds, State). sync(SeqIds, State) -> - %% The SeqIds here contains the SeqId of every publish and ack in - %% the transaction. Ideally we should go through these seqids and - %% only sync the journal if the pubs or acks appear in the + %% The SeqIds here contains the SeqId of every publish and ack to + %% be sync'ed. Ideally we should go through these seqids and only + %% sync the journal if the pubs or acks appear in the %% journal. However, this would be complex to do, and given that %% the variable queue publishes and acks to the qi, and then %% syncs, all in one operation, there is no possibility of the - %% seqids not being in the journal, provided the transaction isn't - %% emptied (handled by sync_if anyway). + %% seqids not being in the journal. sync_if([] =/= SeqIds, State). flush(State = #qistate { dirty_count = 0 }) -> State; @@ -388,7 +387,7 @@ blank_state(QueueName) -> dirty_count = 0, max_journal_entries = MaxJournal, on_sync = fun (_) -> ok end, - unsynced_guids = [] }. + unsynced_msg_ids = [] }. clean_file_name(Dir) -> filename:join(Dir, ?CLEAN_FILENAME). @@ -470,8 +469,9 @@ recover_segment(ContainsCheckFun, CleanShutdown, {SegEntries1, UnackedCountDelta} = segment_plus_journal(SegEntries, JEntries), array:sparse_foldl( - fun (RelSeq, {{Guid, _MsgProps, _IsPersistent}, Del, no_ack}, Segment1) -> - recover_message(ContainsCheckFun(Guid), CleanShutdown, + fun (RelSeq, {{MsgId, _MsgProps, _IsPersistent}, Del, no_ack}, + Segment1) -> + recover_message(ContainsCheckFun(MsgId), CleanShutdown, Del, RelSeq, Segment1) end, Segment #segment { unacked = UnackedCount + UnackedCountDelta }, @@ -512,20 +512,20 @@ queue_index_walker({start, DurableQueues}) when is_list(DurableQueues) -> queue_index_walker({next, Gatherer}) when is_pid(Gatherer) -> case gatherer:out(Gatherer) of empty -> + unlink(Gatherer), ok = gatherer:stop(Gatherer), - ok = rabbit_misc:unlink_and_capture_exit(Gatherer), finished; - {value, {Guid, Count}} -> - {Guid, Count, {next, Gatherer}} + {value, {MsgId, Count}} -> + {MsgId, Count, {next, Gatherer}} end. queue_index_walker_reader(QueueName, Gatherer) -> State = #qistate { segments = Segments, dir = Dir } = recover_journal(blank_state(QueueName)), [ok = segment_entries_foldr( - fun (_RelSeq, {{Guid, _MsgProps, true}, _IsDelivered, no_ack}, + fun (_RelSeq, {{MsgId, _MsgProps, true}, _IsDelivered, no_ack}, ok) -> - gatherer:in(Gatherer, {Guid, 1}); + gatherer:in(Gatherer, {MsgId, 1}); (_RelSeq, _Value, Acc) -> Acc end, ok, segment_find_or_new(Seg, Dir, Segments)) || @@ -537,27 +537,21 @@ queue_index_walker_reader(QueueName, Gatherer) -> %% expiry/binary manipulation %%---------------------------------------------------------------------------- -create_pub_record_body(Guid, #message_properties{expiry = Expiry}) -> - [Guid, expiry_to_binary(Expiry)]. +create_pub_record_body(MsgId, #message_properties { expiry = Expiry }) -> + [MsgId, expiry_to_binary(Expiry)]. expiry_to_binary(undefined) -> <<?NO_EXPIRY:?EXPIRY_BITS>>; expiry_to_binary(Expiry) -> <<Expiry:?EXPIRY_BITS>>. -read_pub_record_body(Hdl) -> - case file_handle_cache:read(Hdl, ?GUID_BYTES + ?EXPIRY_BYTES) of - {ok, Bin} -> - %% work around for binary data fragmentation. See - %% rabbit_msg_file:read_next/2 - <<GuidNum:?GUID_BITS, Expiry:?EXPIRY_BITS>> = Bin, - <<Guid:?GUID_BYTES/binary>> = <<GuidNum:?GUID_BITS>>, - Exp = case Expiry of - ?NO_EXPIRY -> undefined; - X -> X - end, - {Guid, #message_properties{expiry = Exp}}; - Error -> - Error - end. +parse_pub_record_body(<<MsgIdNum:?MSG_ID_BITS, Expiry:?EXPIRY_BITS>>) -> + %% work around for binary data fragmentation. See + %% rabbit_msg_file:read_next/2 + <<MsgId:?MSG_ID_BYTES/binary>> = <<MsgIdNum:?MSG_ID_BITS>>, + Exp = case Expiry of + ?NO_EXPIRY -> undefined; + X -> X + end, + {MsgId, #message_properties { expiry = Exp }}. %%---------------------------------------------------------------------------- %% journal manipulation @@ -666,8 +660,8 @@ recover_journal(State) -> journal_minus_segment(JEntries, SegEntries), Segment #segment { journal_entries = JEntries1, unacked = (UnackedCountInJournal + - UnackedCountInSeg - - UnackedCountDuplicates) } + UnackedCountInSeg - + UnackedCountDuplicates) } end, Segments), State1 #qistate { segments = Segments1 }. @@ -680,15 +674,16 @@ load_journal_entries(State = #qistate { journal_handle = Hdl }) -> ?ACK_JPREFIX -> load_journal_entries(add_to_journal(SeqId, ack, State)); _ -> - case read_pub_record_body(Hdl) of - {Guid, MsgProps} -> - Publish = {Guid, MsgProps, - case Prefix of - ?PUB_PERSIST_JPREFIX -> true; - ?PUB_TRANS_JPREFIX -> false - end}, + case file_handle_cache:read(Hdl, ?PUB_RECORD_BODY_BYTES) of + {ok, Bin} -> + {MsgId, MsgProps} = parse_pub_record_body(Bin), + IsPersistent = case Prefix of + ?PUB_PERSIST_JPREFIX -> true; + ?PUB_TRANS_JPREFIX -> false + end, load_journal_entries( - add_to_journal(SeqId, Publish, State)); + add_to_journal( + SeqId, {MsgId, MsgProps, IsPersistent}, State)); _ErrOrEoF -> %% err, we've lost at least a publish State end @@ -716,9 +711,9 @@ sync_if(true, State = #qistate { journal_handle = JournalHdl }) -> ok = file_handle_cache:sync(JournalHdl), notify_sync(State). -notify_sync(State = #qistate { unsynced_guids = UG, on_sync = OnSyncFun }) -> +notify_sync(State = #qistate { unsynced_msg_ids = UG, on_sync = OnSyncFun }) -> OnSyncFun(gb_sets:from_list(UG)), - State #qistate { unsynced_guids = [] }. + State #qistate { unsynced_msg_ids = [] }. %%---------------------------------------------------------------------------- %% segment manipulation @@ -796,19 +791,19 @@ write_entry_to_segment(RelSeq, {Pub, Del, Ack}, Hdl) -> ok = case Pub of no_pub -> ok; - {Guid, MsgProps, IsPersistent} -> + {MsgId, MsgProps, IsPersistent} -> file_handle_cache:append( - Hdl, [<<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS, - (bool_to_int(IsPersistent)):1, - RelSeq:?REL_SEQ_BITS>>, - create_pub_record_body(Guid, MsgProps)]) + Hdl, [<<?PUB_PREFIX:?PUB_PREFIX_BITS, + (bool_to_int(IsPersistent)):1, + RelSeq:?REL_SEQ_BITS>>, + create_pub_record_body(MsgId, MsgProps)]) end, ok = case {Del, Ack} of {no_del, no_ack} -> ok; _ -> Binary = <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS, - RelSeq:?REL_SEQ_BITS>>, + RelSeq:?REL_SEQ_BITS>>, file_handle_cache:append( Hdl, case {Del, Ack} of {del, ack} -> [Binary, Binary]; @@ -821,10 +816,10 @@ read_bounded_segment(Seg, {StartSeg, StartRelSeq}, {EndSeg, EndRelSeq}, {Messages, Segments}, Dir) -> Segment = segment_find_or_new(Seg, Dir, Segments), {segment_entries_foldr( - fun (RelSeq, {{Guid, MsgProps, IsPersistent}, IsDelivered, no_ack}, Acc) + fun (RelSeq, {{MsgId, MsgProps, IsPersistent}, IsDelivered, no_ack}, Acc) when (Seg > StartSeg orelse StartRelSeq =< RelSeq) andalso (Seg < EndSeg orelse EndRelSeq >= RelSeq) -> - [ {Guid, reconstruct_seq_id(StartSeg, RelSeq), MsgProps, + [ {MsgId, reconstruct_seq_id(StartSeg, RelSeq), MsgProps, IsPersistent, IsDelivered == del} | Acc ]; (_RelSeq, _Value, Acc) -> Acc @@ -845,36 +840,40 @@ load_segment(KeepAcked, #segment { path = Path }) -> false -> {array_new(), 0}; true -> {ok, Hdl} = file_handle_cache:open(Path, ?READ_AHEAD_MODE, []), {ok, 0} = file_handle_cache:position(Hdl, bof), - Res = load_segment_entries(KeepAcked, Hdl, array_new(), 0), + {ok, SegData} = file_handle_cache:read( + Hdl, ?SEGMENT_TOTAL_SIZE), + Res = load_segment_entries(KeepAcked, SegData, array_new(), 0), ok = file_handle_cache:close(Hdl), Res end. -load_segment_entries(KeepAcked, Hdl, SegEntries, UnackedCount) -> - case file_handle_cache:read(Hdl, ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES) of - {ok, <<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS, - IsPersistentNum:1, RelSeq:?REL_SEQ_BITS>>} -> - {Guid, MsgProps} = read_pub_record_body(Hdl), - Obj = {{Guid, MsgProps, 1 == IsPersistentNum}, no_del, no_ack}, - SegEntries1 = array:set(RelSeq, Obj, SegEntries), - load_segment_entries(KeepAcked, Hdl, SegEntries1, - UnackedCount + 1); - {ok, <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS, - RelSeq:?REL_SEQ_BITS>>} -> - {UnackedCountDelta, SegEntries1} = - case array:get(RelSeq, SegEntries) of - {Pub, no_del, no_ack} -> - { 0, array:set(RelSeq, {Pub, del, no_ack}, SegEntries)}; - {Pub, del, no_ack} when KeepAcked -> - {-1, array:set(RelSeq, {Pub, del, ack}, SegEntries)}; - {_Pub, del, no_ack} -> - {-1, array:reset(RelSeq, SegEntries)} - end, - load_segment_entries(KeepAcked, Hdl, SegEntries1, - UnackedCount + UnackedCountDelta); - _ErrOrEoF -> - {SegEntries, UnackedCount} - end. +load_segment_entries(KeepAcked, + <<?PUB_PREFIX:?PUB_PREFIX_BITS, + IsPersistentNum:1, RelSeq:?REL_SEQ_BITS, + PubRecordBody:?PUB_RECORD_BODY_BYTES/binary, + SegData/binary>>, + SegEntries, UnackedCount) -> + {MsgId, MsgProps} = parse_pub_record_body(PubRecordBody), + Obj = {{MsgId, MsgProps, 1 == IsPersistentNum}, no_del, no_ack}, + SegEntries1 = array:set(RelSeq, Obj, SegEntries), + load_segment_entries(KeepAcked, SegData, SegEntries1, UnackedCount + 1); +load_segment_entries(KeepAcked, + <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS, + RelSeq:?REL_SEQ_BITS, SegData/binary>>, + SegEntries, UnackedCount) -> + {UnackedCountDelta, SegEntries1} = + case array:get(RelSeq, SegEntries) of + {Pub, no_del, no_ack} -> + { 0, array:set(RelSeq, {Pub, del, no_ack}, SegEntries)}; + {Pub, del, no_ack} when KeepAcked -> + {-1, array:set(RelSeq, {Pub, del, ack}, SegEntries)}; + {_Pub, del, no_ack} -> + {-1, array:reset(RelSeq, SegEntries)} + end, + load_segment_entries(KeepAcked, SegData, SegEntries1, + UnackedCount + UnackedCountDelta); +load_segment_entries(_KeepAcked, _SegData, SegEntries, UnackedCount) -> + {SegEntries, UnackedCount}. array_new() -> array:new([{default, undefined}, fixed, {size, ?SEGMENT_ENTRY_COUNT}]). @@ -1002,17 +1001,17 @@ add_queue_ttl_journal(<<?ACK_JPREFIX:?JPREFIX_BITS, SeqId:?SEQ_BITS, Rest/binary>>) -> {<<?ACK_JPREFIX:?JPREFIX_BITS, SeqId:?SEQ_BITS>>, Rest}; add_queue_ttl_journal(<<Prefix:?JPREFIX_BITS, SeqId:?SEQ_BITS, - Guid:?GUID_BYTES/binary, Rest/binary>>) -> - {[<<Prefix:?JPREFIX_BITS, SeqId:?SEQ_BITS>>, Guid, + MsgId:?MSG_ID_BYTES/binary, Rest/binary>>) -> + {[<<Prefix:?JPREFIX_BITS, SeqId:?SEQ_BITS>>, MsgId, expiry_to_binary(undefined)], Rest}; add_queue_ttl_journal(_) -> stop. -add_queue_ttl_segment(<<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS, IsPersistentNum:1, - RelSeq:?REL_SEQ_BITS, Guid:?GUID_BYTES/binary, +add_queue_ttl_segment(<<?PUB_PREFIX:?PUB_PREFIX_BITS, IsPersistentNum:1, + RelSeq:?REL_SEQ_BITS, MsgId:?MSG_ID_BYTES/binary, Rest/binary>>) -> - {[<<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS, IsPersistentNum:1, - RelSeq:?REL_SEQ_BITS>>, Guid, expiry_to_binary(undefined)], Rest}; + {[<<?PUB_PREFIX:?PUB_PREFIX_BITS, IsPersistentNum:1, RelSeq:?REL_SEQ_BITS>>, + MsgId, expiry_to_binary(undefined)], Rest}; add_queue_ttl_segment(<<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS, RelSeq:?REL_SEQ_BITS, Rest>>) -> {<<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS, RelSeq:?REL_SEQ_BITS>>, @@ -1035,8 +1034,8 @@ foreach_queue_index(Funs) -> end) end || QueueDirName <- QueueDirNames], empty = gatherer:out(Gatherer), - ok = gatherer:stop(Gatherer), - ok = rabbit_misc:unlink_and_capture_exit(Gatherer). + unlink(Gatherer), + ok = gatherer:stop(Gatherer). transform_queue(Dir, Gatherer, {JournalFun, SegmentFun}) -> ok = transform_file(filename:join(Dir, ?JOURNAL_FILENAME), JournalFun), diff --git a/src/rabbit_reader.erl b/src/rabbit_reader.erl index 34883058..3bc0e389 100644 --- a/src/rabbit_reader.erl +++ b/src/rabbit_reader.erl @@ -24,7 +24,7 @@ -export([init/4, mainloop/2]). --export([conserve_memory/2, server_properties/0]). +-export([conserve_memory/2, server_properties/1]). -export([process_channel_frame/5]). %% used by erlang-client @@ -33,14 +33,13 @@ -define(CLOSING_TIMEOUT, 1). -define(CHANNEL_TERMINATION_TIMEOUT, 3). -define(SILENT_CLOSE_DELAY, 3). --define(FRAME_MAX, 131072). %% set to zero once QPid fix their negotiation -%--------------------------------------------------------------------------- +%%-------------------------------------------------------------------------- --record(v1, {parent, sock, connection, callback, recv_length, recv_ref, +-record(v1, {parent, sock, connection, callback, recv_len, pending_recv, connection_state, queue_collector, heartbeater, stats_timer, - channel_sup_sup_pid, start_heartbeat_fun, auth_mechanism, - auth_state}). + channel_sup_sup_pid, start_heartbeat_fun, buf, buf_len, + auth_mechanism, auth_state}). -define(STATISTICS_KEYS, [pid, recv_oct, recv_cnt, send_oct, send_cnt, send_pend, state, channels]). @@ -55,98 +54,12 @@ -define(INFO_KEYS, ?CREATION_EVENT_KEYS ++ ?STATISTICS_KEYS -- [pid]). -%% connection lifecycle -%% -%% all state transitions and terminations are marked with *...* -%% -%% The lifecycle begins with: start handshake_timeout timer, *pre-init* -%% -%% all states, unless specified otherwise: -%% socket error -> *exit* -%% socket close -> *throw* -%% writer send failure -> *throw* -%% forced termination -> *exit* -%% handshake_timeout -> *throw* -%% pre-init: -%% receive protocol header -> send connection.start, *starting* -%% starting: -%% receive connection.start_ok -> *securing* -%% securing: -%% check authentication credentials -%% if authentication success -> send connection.tune, *tuning* -%% if more challenge needed -> send connection.secure, -%% receive connection.secure_ok *securing* -%% otherwise send close, *exit* -%% tuning: -%% receive connection.tune_ok -> start heartbeats, *opening* -%% opening: -%% receive connection.open -> send connection.open_ok, *running* -%% running: -%% receive connection.close -> -%% tell channels to terminate gracefully -%% if no channels then send connection.close_ok, start -%% terminate_connection timer, *closed* -%% else *closing* -%% forced termination -%% -> wait for channels to terminate forcefully, start -%% terminate_connection timer, send close, *exit* -%% channel exit with hard error -%% -> log error, wait for channels to terminate forcefully, start -%% terminate_connection timer, send close, *closed* -%% channel exit with soft error -%% -> log error, mark channel as closing, *running* -%% handshake_timeout -> ignore, *running* -%% heartbeat timeout -> *throw* -%% conserve_memory=true -> *blocking* -%% blocking: -%% conserve_memory=true -> *blocking* -%% conserve_memory=false -> *running* -%% receive a method frame for a content-bearing method -%% -> process, stop receiving, *blocked* -%% ...rest same as 'running' -%% blocked: -%% conserve_memory=true -> *blocked* -%% conserve_memory=false -> resume receiving, *running* -%% ...rest same as 'running' -%% closing: -%% socket close -> *terminate* -%% receive connection.close -> send connection.close_ok, -%% *closing* -%% receive frame -> ignore, *closing* -%% handshake_timeout -> ignore, *closing* -%% heartbeat timeout -> *throw* -%% channel exit with hard error -%% -> log error, wait for channels to terminate forcefully, start -%% terminate_connection timer, send close, *closed* -%% channel exit with soft error -%% -> log error, mark channel as closing -%% if last channel to exit then send connection.close_ok, -%% start terminate_connection timer, *closed* -%% else *closing* -%% channel exits normally -%% -> if last channel to exit then send connection.close_ok, -%% start terminate_connection timer, *closed* -%% closed: -%% socket close -> *terminate* -%% receive connection.close -> send connection.close_ok, -%% *closed* -%% receive connection.close_ok -> self() ! terminate_connection, -%% *closed* -%% receive frame -> ignore, *closed* -%% terminate_connection timeout -> *terminate* -%% handshake_timeout -> ignore, *closed* -%% heartbeat timeout -> *throw* -%% channel exit -> log error, *closed* -%% -%% -%% TODO: refactor the code so that the above is obvious - -define(IS_RUNNING(State), (State#v1.connection_state =:= running orelse State#v1.connection_state =:= blocking orelse State#v1.connection_state =:= blocked)). -%%---------------------------------------------------------------------------- +%%-------------------------------------------------------------------------- -ifdef(use_specs). @@ -157,7 +70,8 @@ -spec(info/2 :: (pid(), rabbit_types:info_keys()) -> rabbit_types:infos()). -spec(shutdown/2 :: (pid(), string()) -> 'ok'). -spec(conserve_memory/2 :: (pid(), boolean()) -> 'ok'). --spec(server_properties/0 :: () -> rabbit_framing:amqp_table()). +-spec(server_properties/1 :: (rabbit_types:protocol()) -> + rabbit_framing:amqp_table()). %% These specs only exists to add no_return() to keep dialyzer happy -spec(init/4 :: (pid(), pid(), pid(), rabbit_heartbeat:start_heartbeat_fun()) @@ -213,7 +127,7 @@ conserve_memory(Pid, Conserve) -> Pid ! {conserve_memory, Conserve}, ok. -server_properties() -> +server_properties(Protocol) -> {ok, Product} = application:get_key(rabbit, id), {ok, Version} = application:get_key(rabbit, vsn), @@ -224,22 +138,31 @@ server_properties() -> %% Normalize the simplifed (2-tuple) and unsimplified (3-tuple) forms %% from the config and merge them with the generated built-in properties NormalizedConfigServerProps = - [case X of - {KeyAtom, Value} -> {list_to_binary(atom_to_list(KeyAtom)), - longstr, - list_to_binary(Value)}; - {BinKey, Type, Value} -> {BinKey, Type, Value} - end || X <- RawConfigServerProps ++ - [{product, Product}, - {version, Version}, - {platform, "Erlang/OTP"}, - {copyright, ?COPYRIGHT_MESSAGE}, - {information, ?INFORMATION_MESSAGE}]], - - %% Filter duplicated properties in favor of config file provided values + [{<<"capabilities">>, table, server_capabilities(Protocol)} | + [case X of + {KeyAtom, Value} -> {list_to_binary(atom_to_list(KeyAtom)), + longstr, + list_to_binary(Value)}; + {BinKey, Type, Value} -> {BinKey, Type, Value} + end || X <- RawConfigServerProps ++ + [{product, Product}, + {version, Version}, + {platform, "Erlang/OTP"}, + {copyright, ?COPYRIGHT_MESSAGE}, + {information, ?INFORMATION_MESSAGE}]]], + + %% Filter duplicated properties in favour of config file provided values lists:usort(fun ({K1,_,_}, {K2,_,_}) -> K1 =< K2 end, NormalizedConfigServerProps). +server_capabilities(rabbit_framing_amqp_0_9_1) -> + [{<<"publisher_confirms">>, bool, true}, + {<<"exchange_exchange_bindings">>, bool, true}, + {<<"basic.nack">>, bool, true}, + {<<"consumer_cancel_notify">>, bool, true}]; +server_capabilities(_) -> + []. + inet_op(F) -> rabbit_misc:throw_on_error(inet_error, F). socket_op(Sock, Fun) -> @@ -263,7 +186,7 @@ start_connection(Parent, ChannelSupSupPid, Collector, StartHeartbeatFun, Deb, erlang:send_after(?HANDSHAKE_TIMEOUT * 1000, self(), handshake_timeout), try - mainloop(Deb, switch_callback( + recvloop(Deb, switch_callback( #v1{parent = Parent, sock = ClientSock, connection = #connection{ @@ -272,10 +195,11 @@ start_connection(Parent, ChannelSupSupPid, Collector, StartHeartbeatFun, Deb, timeout_sec = ?HANDSHAKE_TIMEOUT, frame_max = ?FRAME_MIN_SIZE, vhost = none, - client_properties = none}, + client_properties = none, + capabilities = []}, callback = uninitialized_callback, - recv_length = 0, - recv_ref = none, + recv_len = 0, + pending_recv = false, connection_state = pre_init, queue_collector = Collector, heartbeater = none, @@ -283,6 +207,8 @@ start_connection(Parent, ChannelSupSupPid, Collector, StartHeartbeatFun, Deb, rabbit_event:init_stats_timer(), channel_sup_sup_pid = ChannelSupSupPid, start_heartbeat_fun = StartHeartbeatFun, + buf = [], + buf_len = 0, auth_mechanism = none, auth_state = none }, @@ -307,88 +233,104 @@ start_connection(Parent, ChannelSupSupPid, Collector, StartHeartbeatFun, Deb, end, done. -mainloop(Deb, State = #v1{parent = Parent, sock= Sock, recv_ref = Ref}) -> - receive - {inet_async, Sock, Ref, {ok, Data}} -> - mainloop(Deb, handle_input(State#v1.callback, Data, - State#v1{recv_ref = none})); - {inet_async, Sock, Ref, {error, closed}} -> - if State#v1.connection_state =:= closed -> - State; - true -> - throw(connection_closed_abruptly) - end; - {inet_async, Sock, Ref, {error, Reason}} -> - throw({inet_error, Reason}); - {conserve_memory, Conserve} -> - mainloop(Deb, internal_conserve_memory(Conserve, State)); - {'EXIT', Parent, Reason} -> - terminate(io_lib:format("broker forced connection closure " - "with reason '~w'", [Reason]), State), - %% this is what we are expected to do according to - %% http://www.erlang.org/doc/man/sys.html - %% - %% If we wanted to be *really* nice we should wait for a - %% while for clients to close the socket at their end, - %% just as we do in the ordinary error case. However, - %% since this termination is initiated by our parent it is - %% probably more important to exit quickly. - exit(Reason); - {channel_exit, _Channel, E = {writer, send_failed, _Error}} -> - throw(E); - {channel_exit, Channel, Reason} -> - mainloop(Deb, handle_exception(State, Channel, Reason)); - {'DOWN', _MRef, process, ChPid, Reason} -> - mainloop(Deb, handle_dependent_exit(ChPid, Reason, State)); - terminate_connection -> - State; - handshake_timeout -> - if ?IS_RUNNING(State) orelse - State#v1.connection_state =:= closing orelse - State#v1.connection_state =:= closed -> - mainloop(Deb, State); - true -> - throw({handshake_timeout, State#v1.callback}) - end; - timeout -> - case State#v1.connection_state of - closed -> mainloop(Deb, State); - S -> throw({timeout, S}) - end; - {'$gen_call', From, {shutdown, Explanation}} -> - {ForceTermination, NewState} = terminate(Explanation, State), - gen_server:reply(From, ok), - case ForceTermination of - force -> ok; - normal -> mainloop(Deb, NewState) - end; - {'$gen_call', From, info} -> - gen_server:reply(From, infos(?INFO_KEYS, State)), - mainloop(Deb, State); - {'$gen_call', From, {info, Items}} -> - gen_server:reply(From, try {ok, infos(Items, State)} - catch Error -> {error, Error} - end), - mainloop(Deb, State); - emit_stats -> - State1 = internal_emit_stats(State), - mainloop(Deb, State1); - {system, From, Request} -> - sys:handle_system_msg(Request, From, - Parent, ?MODULE, Deb, State); - Other -> - %% internal error -> something worth dying for - exit({unexpected_message, Other}) +recvloop(Deb, State = #v1{pending_recv = true}) -> + mainloop(Deb, State); +recvloop(Deb, State = #v1{connection_state = blocked}) -> + mainloop(Deb, State); +recvloop(Deb, State = #v1{sock = Sock, recv_len = RecvLen, buf_len = BufLen}) + when BufLen < RecvLen -> + ok = rabbit_net:setopts(Sock, [{active, once}]), + mainloop(Deb, State#v1{pending_recv = true}); +recvloop(Deb, State = #v1{recv_len = RecvLen, buf = Buf, buf_len = BufLen}) -> + {Data, Rest} = split_binary(case Buf of + [B] -> B; + _ -> list_to_binary(lists:reverse(Buf)) + end, RecvLen), + recvloop(Deb, handle_input(State#v1.callback, Data, + State#v1{buf = [Rest], + buf_len = BufLen - RecvLen})). + +mainloop(Deb, State = #v1{sock = Sock, buf = Buf, buf_len = BufLen}) -> + case rabbit_net:recv(Sock) of + {data, Data} -> recvloop(Deb, State#v1{buf = [Data | Buf], + buf_len = BufLen + size(Data), + pending_recv = false}); + closed -> if State#v1.connection_state =:= closed -> + State; + true -> + throw(connection_closed_abruptly) + end; + {error, Reason} -> throw({inet_error, Reason}); + {other, Other} -> handle_other(Other, Deb, State) end. +handle_other({conserve_memory, Conserve}, Deb, State) -> + recvloop(Deb, internal_conserve_memory(Conserve, State)); +handle_other({channel_closing, ChPid}, Deb, State) -> + ok = rabbit_channel:ready_for_close(ChPid), + channel_cleanup(ChPid), + mainloop(Deb, State); +handle_other({'EXIT', Parent, Reason}, _Deb, State = #v1{parent = Parent}) -> + terminate(io_lib:format("broker forced connection closure " + "with reason '~w'", [Reason]), State), + %% this is what we are expected to do according to + %% http://www.erlang.org/doc/man/sys.html + %% + %% If we wanted to be *really* nice we should wait for a while for + %% clients to close the socket at their end, just as we do in the + %% ordinary error case. However, since this termination is + %% initiated by our parent it is probably more important to exit + %% quickly. + exit(Reason); +handle_other({channel_exit, _Channel, E = {writer, send_failed, _Error}}, + _Deb, _State) -> + throw(E); +handle_other({channel_exit, Channel, Reason}, Deb, State) -> + mainloop(Deb, handle_exception(State, Channel, Reason)); +handle_other({'DOWN', _MRef, process, ChPid, Reason}, Deb, State) -> + mainloop(Deb, handle_dependent_exit(ChPid, Reason, State)); +handle_other(terminate_connection, _Deb, State) -> + State; +handle_other(handshake_timeout, Deb, State) + when ?IS_RUNNING(State) orelse + State#v1.connection_state =:= closing orelse + State#v1.connection_state =:= closed -> + mainloop(Deb, State); +handle_other(handshake_timeout, _Deb, State) -> + throw({handshake_timeout, State#v1.callback}); +handle_other(timeout, Deb, State = #v1{connection_state = closed}) -> + mainloop(Deb, State); +handle_other(timeout, _Deb, #v1{connection_state = S}) -> + throw({timeout, S}); +handle_other({'$gen_call', From, {shutdown, Explanation}}, Deb, State) -> + {ForceTermination, NewState} = terminate(Explanation, State), + gen_server:reply(From, ok), + case ForceTermination of + force -> ok; + normal -> mainloop(Deb, NewState) + end; +handle_other({'$gen_call', From, info}, Deb, State) -> + gen_server:reply(From, infos(?INFO_KEYS, State)), + mainloop(Deb, State); +handle_other({'$gen_call', From, {info, Items}}, Deb, State) -> + gen_server:reply(From, try {ok, infos(Items, State)} + catch Error -> {error, Error} + end), + mainloop(Deb, State); +handle_other(emit_stats, Deb, State) -> + mainloop(Deb, internal_emit_stats(State)); +handle_other({system, From, Request}, Deb, State = #v1{parent = Parent}) -> + sys:handle_system_msg(Request, From, Parent, ?MODULE, Deb, State); +handle_other(Other, _Deb, _State) -> + %% internal error -> something worth dying for + exit({unexpected_message, Other}). + switch_callback(State = #v1{connection_state = blocked, heartbeater = Heartbeater}, Callback, Length) -> ok = rabbit_heartbeat:pause_monitor(Heartbeater), - State#v1{callback = Callback, recv_length = Length, recv_ref = none}; + State#v1{callback = Callback, recv_len = Length}; switch_callback(State, Callback, Length) -> - Ref = inet_op(fun () -> rabbit_net:async_recv( - State#v1.sock, Length, infinity) end), - State#v1{callback = Callback, recv_length = Length, recv_ref = Ref}. + State#v1{callback = Callback, recv_len = Length}. terminate(Explanation, State) when ?IS_RUNNING(State) -> {normal, send_exception(State, 0, @@ -402,12 +344,9 @@ internal_conserve_memory(true, State = #v1{connection_state = running}) -> internal_conserve_memory(false, State = #v1{connection_state = blocking}) -> State#v1{connection_state = running}; internal_conserve_memory(false, State = #v1{connection_state = blocked, - heartbeater = Heartbeater, - callback = Callback, - recv_length = Length, - recv_ref = none}) -> + heartbeater = Heartbeater}) -> ok = rabbit_heartbeat:resume_monitor(Heartbeater), - switch_callback(State#v1{connection_state = running}, Callback, Length); + State#v1{connection_state = running}; internal_conserve_memory(_Conserve, State) -> State. @@ -429,32 +368,32 @@ close_connection(State = #v1{queue_collector = Collector, erlang:send_after(TimeoutMillisec, self(), terminate_connection), State#v1{connection_state = closed}. -close_channel(Channel, State) -> - put({channel, Channel}, closing), - State. - handle_dependent_exit(ChPid, Reason, State) -> case termination_kind(Reason) of controlled -> - erase({ch_pid, ChPid}), + channel_cleanup(ChPid), maybe_close(State); uncontrolled -> case channel_cleanup(ChPid) of undefined -> exit({abnormal_dependent_exit, ChPid, Reason}); - Channel -> maybe_close( + Channel -> rabbit_log:error( + "connection ~p, channel ~p - error:~n~p~n", + [self(), Channel, Reason]), + maybe_close( handle_exception(State, Channel, Reason)) end end. channel_cleanup(ChPid) -> case get({ch_pid, ChPid}) of - undefined -> undefined; - Channel -> erase({channel, Channel}), - erase({ch_pid, ChPid}), - Channel + undefined -> undefined; + {Channel, MRef} -> erase({channel, Channel}), + erase({ch_pid, ChPid}), + erlang:demonitor(MRef, [flush]), + Channel end. -all_channels() -> [ChPid || {{ch_pid, ChPid}, _Channel} <- get()]. +all_channels() -> [ChPid || {{ch_pid, ChPid}, _ChannelMRef} <- get()]. terminate_channels() -> NChannels = @@ -509,8 +448,8 @@ maybe_close(State = #v1{connection_state = closing, maybe_close(State) -> State. -termination_kind(normal) -> controlled; -termination_kind(_) -> uncontrolled. +termination_kind(normal) -> controlled; +termination_kind(_) -> uncontrolled. handle_frame(Type, 0, Payload, State = #v1{connection_state = CS, @@ -546,8 +485,8 @@ handle_frame(Type, Channel, Payload, Channel, ChPid, FramingState), put({channel, Channel}, {ChPid, NewAState}), case AnalyzedFrame of - {method, 'channel.close', _} -> - erase({channel, Channel}), + {method, 'channel.close_ok', _} -> + channel_cleanup(ChPid), State; {method, MethodName, _} -> case (State#v1.connection_state =:= blocking @@ -559,25 +498,6 @@ handle_frame(Type, Channel, Payload, _ -> State end; - closing -> - %% According to the spec, after sending a - %% channel.close we must ignore all frames except - %% channel.close and channel.close_ok. In the - %% event of a channel.close, we should send back a - %% channel.close_ok. - case AnalyzedFrame of - {method, 'channel.close_ok', _} -> - erase({channel, Channel}); - {method, 'channel.close', _} -> - %% We're already closing this channel, so - %% there's no cleanup to do (notify - %% queues, etc.) - ok = rabbit_writer:internal_send_command( - State#v1.sock, Channel, - #'channel.close_ok'{}, Protocol); - _ -> ok - end, - State; undefined -> case ?IS_RUNNING(State) of true -> send_to_new_channel( @@ -598,8 +518,8 @@ handle_input({frame_payload, Type, Channel, PayloadSize}, PayloadAndMarker, State) -> case PayloadAndMarker of <<Payload:PayloadSize/binary, ?FRAME_END>> -> - handle_frame(Type, Channel, Payload, - switch_callback(State, frame_header, 7)); + switch_callback(handle_frame(Type, Channel, Payload, State), + frame_header, 7); _ -> throw({bad_payload, Type, Channel, PayloadSize, PayloadAndMarker}) end; @@ -649,8 +569,8 @@ start_connection({ProtocolMajor, ProtocolMinor, _ProtocolRevision}, Start = #'connection.start'{ version_major = ProtocolMajor, version_minor = ProtocolMinor, - server_properties = server_properties(), - mechanisms = auth_mechanisms_binary(), + server_properties = server_properties(Protocol), + mechanisms = auth_mechanisms_binary(Sock), locales = <<"en_US">> }, ok = send_on_channel0(Sock, Start, Protocol), switch_callback(State#v1{connection = Connection#connection{ @@ -676,14 +596,14 @@ handle_method0(MethodName, FieldsBin, State = #v1{connection = #connection{protocol = Protocol}}) -> HandleException = fun(R) -> - case ?IS_RUNNING(State) of - true -> send_exception(State, 0, R); - %% We don't trust the client at this point - force - %% them to wait for a bit so they can't DOS us with - %% repeated failed logins etc. - false -> timer:sleep(?SILENT_CLOSE_DELAY * 1000), - throw({channel0_error, State#v1.connection_state, R}) - end + case ?IS_RUNNING(State) of + true -> send_exception(State, 0, R); + %% We don't trust the client at this point - force + %% them to wait for a bit so they can't DOS us with + %% repeated failed logins etc. + false -> timer:sleep(?SILENT_CLOSE_DELAY * 1000), + throw({channel0_error, State#v1.connection_state, R}) + end end, try handle_method0(Protocol:decode_method_fields(MethodName, FieldsBin), @@ -700,13 +620,19 @@ handle_method0(#'connection.start_ok'{mechanism = Mechanism, State0 = #v1{connection_state = starting, connection = Connection, sock = Sock}) -> - AuthMechanism = auth_mechanism_to_module(Mechanism), + AuthMechanism = auth_mechanism_to_module(Mechanism, Sock), + Capabilities = + case rabbit_misc:table_lookup(ClientProperties, <<"capabilities">>) of + {table, Capabilities1} -> Capabilities1; + _ -> [] + end, State = State0#v1{auth_mechanism = AuthMechanism, auth_state = AuthMechanism:init(Sock), connection_state = securing, connection = Connection#connection{ - client_properties = ClientProperties}}, + client_properties = ClientProperties, + capabilities = Capabilities}}, auth_phase(Response, State); handle_method0(#'connection.secure_ok'{response = Response}, @@ -719,14 +645,15 @@ handle_method0(#'connection.tune_ok'{frame_max = FrameMax, connection = Connection, sock = Sock, start_heartbeat_fun = SHF}) -> - if (FrameMax /= 0) and (FrameMax < ?FRAME_MIN_SIZE) -> + ServerFrameMax = server_frame_max(), + if FrameMax /= 0 andalso FrameMax < ?FRAME_MIN_SIZE -> rabbit_misc:protocol_error( not_allowed, "frame_max=~w < ~w min size", [FrameMax, ?FRAME_MIN_SIZE]); - (?FRAME_MAX /= 0) and (FrameMax > ?FRAME_MAX) -> + ServerFrameMax /= 0 andalso FrameMax > ServerFrameMax -> rabbit_misc:protocol_error( not_allowed, "frame_max=~w > ~w max size", - [FrameMax, ?FRAME_MAX]); + [FrameMax, ServerFrameMax]); true -> Frame = rabbit_binary_generator:build_heartbeat_frame(), SendFun = fun() -> catch rabbit_net:send(Sock, Frame) end, @@ -742,7 +669,6 @@ handle_method0(#'connection.tune_ok'{frame_max = FrameMax, end; handle_method0(#'connection.open'{virtual_host = VHostPath}, - State = #v1{connection_state = opening, connection = Connection = #connection{ user = User, @@ -757,7 +683,8 @@ handle_method0(#'connection.open'{virtual_host = VHostPath}, State#v1{connection_state = running, connection = NewConnection}), rabbit_event:notify(connection_created, - infos(?CREATION_EVENT_KEYS, State1)), + [{type, network} | + infos(?CREATION_EVENT_KEYS, State1)]), rabbit_event:if_enabled(StatsTimer, fun() -> internal_emit_stats(State1) end), State1; @@ -784,17 +711,23 @@ handle_method0(_Method, #v1{connection_state = S}) -> rabbit_misc:protocol_error( channel_error, "unexpected method in connection state ~w", [S]). +%% Compute frame_max for this instance. Could simply use 0, but breaks +%% QPid Java client. +server_frame_max() -> + {ok, FrameMax} = application:get_env(rabbit, frame_max), + FrameMax. + send_on_channel0(Sock, Method, Protocol) -> ok = rabbit_writer:internal_send_command(Sock, 0, Method, Protocol). -auth_mechanism_to_module(TypeBin) -> +auth_mechanism_to_module(TypeBin, Sock) -> case rabbit_registry:binary_to_type(TypeBin) of {error, not_found} -> rabbit_misc:protocol_error( command_invalid, "unknown authentication mechanism '~s'", [TypeBin]); T -> - case {lists:member(T, auth_mechanisms()), + case {lists:member(T, auth_mechanisms(Sock)), rabbit_registry:lookup_module(auth_mechanism, T)} of {true, {ok, Module}} -> Module; @@ -805,15 +738,14 @@ auth_mechanism_to_module(TypeBin) -> end end. -auth_mechanisms() -> +auth_mechanisms(Sock) -> {ok, Configured} = application:get_env(auth_mechanisms), - [Name || {Name, _Module} <- rabbit_registry:lookup_all(auth_mechanism), - lists:member(Name, Configured)]. + [Name || {Name, Module} <- rabbit_registry:lookup_all(auth_mechanism), + Module:should_offer(Sock), lists:member(Name, Configured)]. -auth_mechanisms_binary() -> +auth_mechanisms_binary(Sock) -> list_to_binary( - string:join( - [atom_to_list(A) || A <- auth_mechanisms()], " ")). + string:join([atom_to_list(A) || A <- auth_mechanisms(Sock)], " ")). auth_phase(Response, State = #v1{auth_mechanism = AuthMechanism, @@ -835,7 +767,7 @@ auth_phase(Response, State#v1{auth_state = AuthState1}; {ok, User} -> Tune = #'connection.tune'{channel_max = 0, - frame_max = ?FRAME_MAX, + frame_max = server_frame_max(), heartbeat = 0}, ok = send_on_channel0(Sock, Tune, Protocol), State#v1{connection_state = tuning, @@ -939,19 +871,20 @@ cert_info(F, Sock) -> send_to_new_channel(Channel, AnalyzedFrame, State) -> #v1{sock = Sock, queue_collector = Collector, channel_sup_sup_pid = ChanSupSup, - connection = #connection{protocol = Protocol, - frame_max = FrameMax, - user = User, - vhost = VHost}} = State, + connection = #connection{protocol = Protocol, + frame_max = FrameMax, + user = User, + vhost = VHost, + capabilities = Capabilities}} = State, {ok, _ChSupPid, {ChPid, AState}} = rabbit_channel_sup_sup:start_channel( - ChanSupSup, {tcp, Protocol, Sock, Channel, FrameMax, self(), User, - VHost, Collector}), - erlang:monitor(process, ChPid), + ChanSupSup, {tcp, Sock, Channel, FrameMax, self(), Protocol, User, + VHost, Capabilities, Collector}), + MRef = erlang:monitor(process, ChPid), NewAState = process_channel_frame(AnalyzedFrame, self(), Channel, ChPid, AState), put({channel, Channel}, {ChPid, NewAState}), - put({ch_pid, ChPid}, Channel), + put({ch_pid, ChPid}, {Channel, MRef}), State. process_channel_frame(Frame, ErrPid, Channel, ChPid, AState) -> @@ -967,29 +900,20 @@ process_channel_frame(Frame, ErrPid, Channel, ChPid, AState) -> AState end. -log_channel_error(ConnectionState, Channel, Reason) -> - rabbit_log:error("connection ~p (~p), channel ~p - error:~n~p~n", - [self(), ConnectionState, Channel, Reason]). - -handle_exception(State = #v1{connection_state = closed}, Channel, Reason) -> - log_channel_error(closed, Channel, Reason), +handle_exception(State = #v1{connection_state = closed}, _Channel, _Reason) -> State; -handle_exception(State = #v1{connection_state = CS}, Channel, Reason) -> - log_channel_error(CS, Channel, Reason), +handle_exception(State, Channel, Reason) -> send_exception(State, Channel, Reason). send_exception(State = #v1{connection = #connection{protocol = Protocol}}, Channel, Reason) -> - {ShouldClose, CloseChannel, CloseMethod} = + {0, CloseMethod} = rabbit_binary_generator:map_exception(Channel, Reason, Protocol), - NewState = case ShouldClose of - true -> terminate_channels(), - close_connection(State); - false -> close_channel(Channel, State) - end, + terminate_channels(), + State1 = close_connection(State), ok = rabbit_writer:internal_send_command( - NewState#v1.sock, CloseChannel, CloseMethod, Protocol), - NewState. + State1#v1.sock, 0, CloseMethod, Protocol), + State1. internal_emit_stats(State = #v1{stats_timer = StatsTimer}) -> rabbit_event:notify(connection_stats, infos(?STATISTICS_KEYS, State)), diff --git a/src/rabbit_registry.erl b/src/rabbit_registry.erl index 795413aa..9821ae7b 100644 --- a/src/rabbit_registry.erl +++ b/src/rabbit_registry.erl @@ -48,7 +48,7 @@ start_link() -> %%--------------------------------------------------------------------------- register(Class, TypeName, ModuleName) -> - gen_server:call(?SERVER, {register, Class, TypeName, ModuleName}). + gen_server:call(?SERVER, {register, Class, TypeName, ModuleName}, infinity). %% This is used with user-supplied arguments (e.g., on exchange %% declare), so we restrict it to existing atoms only. This means it diff --git a/src/rabbit_router.erl b/src/rabbit_router.erl index 692d2473..d453a870 100644 --- a/src/rabbit_router.erl +++ b/src/rabbit_router.erl @@ -37,7 +37,8 @@ fun ((rabbit_types:binding()) -> boolean())) -> match_result()). -spec(match_routing_key/2 :: (rabbit_types:binding_source(), - routing_key() | '_') -> match_result()). + [routing_key()] | ['_']) -> + match_result()). -endif. @@ -58,7 +59,7 @@ deliver(QNames, Delivery = #delivery{mandatory = false, {routed, QPids}; deliver(QNames, Delivery = #delivery{mandatory = Mandatory, - immediate = Immediate}) -> + immediate = Immediate}) -> QPids = lookup_qpids(QNames), {Success, _} = delegate:invoke(QPids, @@ -66,7 +67,7 @@ deliver(QNames, Delivery = #delivery{mandatory = Mandatory, rabbit_amqqueue:deliver(Pid, Delivery) end), {Routed, Handled} = - lists:foldl(fun fold_deliveries/2, {false, []}, Success), + lists:foldl(fun fold_deliveries/2, {false, []}, Success), check_delivery(Mandatory, Immediate, {Routed, Handled}). @@ -82,12 +83,19 @@ match_bindings(SrcName, Match) -> Match(Binding)]), mnesia:async_dirty(fun qlc:e/1, [Query]). -match_routing_key(SrcName, RoutingKey) -> - MatchHead = #route{binding = #binding{source = SrcName, +match_routing_key(SrcName, [RoutingKey]) -> + find_routes(#route{binding = #binding{source = SrcName, destination = '$1', key = RoutingKey, _ = '_'}}, - mnesia:dirty_select(rabbit_route, [{MatchHead, [], ['$1']}]). + []); +match_routing_key(SrcName, [_|_] = RoutingKeys) -> + find_routes(#route{binding = #binding{source = SrcName, + destination = '$1', + key = '$2', + _ = '_'}}, + [list_to_tuple(['orelse' | [{'=:=', '$2', RKey} || + RKey <- RoutingKeys]])]). %%-------------------------------------------------------------------- @@ -102,7 +110,31 @@ check_delivery(_ , _ , {_ , Qs}) -> {routed, Qs}. lookup_qpids(QNames) -> lists:foldl(fun (QName, QPids) -> case mnesia:dirty_read({rabbit_queue, QName}) of - [#amqqueue{pid = QPid}] -> [QPid | QPids]; - [] -> QPids + [#amqqueue{pid = QPid, slave_pids = SPids}] -> + [QPid | SPids ++ QPids]; + [] -> + QPids end end, [], QNames). + +%% Normally we'd call mnesia:dirty_select/2 here, but that is quite +%% expensive due to +%% +%% 1) general mnesia overheads (figuring out table types and +%% locations, etc). We get away with bypassing these because we know +%% that the table +%% - is not the schema table +%% - has a local ram copy +%% - does not have any indices +%% +%% 2) 'fixing' of the table with ets:safe_fixtable/2, which is wholly +%% unnecessary. According to the ets docs (and the code in erl_db.c), +%% 'select' is safe anyway ("Functions that internally traverse over a +%% table, like select and match, will give the same guarantee as +%% safe_fixtable.") and, furthermore, even the lower level iterators +%% ('first' and 'next') are safe on ordered_set tables ("Note that for +%% tables of the ordered_set type, safe_fixtable/2 is not necessary as +%% calls to first/1 and next/2 will always succeed."), which +%% rabbit_route is. +find_routes(MatchHead, Conditions) -> + ets:select(rabbit_route, [{MatchHead, Conditions, ['$1']}]). diff --git a/src/rabbit_ssl.erl b/src/rabbit_ssl.erl index e831ee51..e0defa9e 100644 --- a/src/rabbit_ssl.erl +++ b/src/rabbit_ssl.erl @@ -87,10 +87,10 @@ cert_info(F, Cert) -> find_by_type(Type, {rdnSequence, RDNs}) -> case [V || #'AttributeTypeAndValue'{type = T, value = V} - <- lists:flatten(RDNs), - T == Type] of - [{printableString, S}] -> S; - [] -> not_found + <- lists:flatten(RDNs), + T == Type] of + [Val] -> format_asn1_value(Val); + [] -> not_found end. %%-------------------------------------------------------------------------- @@ -162,12 +162,85 @@ escape_rdn_value([C | S], middle) -> format_asn1_value({ST, S}) when ST =:= teletexString; ST =:= printableString; ST =:= universalString; ST =:= utf8String; ST =:= bmpString -> - if is_binary(S) -> binary_to_list(S); - true -> S - end; + format_directory_string(ST, S); format_asn1_value({utcTime, [Y1, Y2, M1, M2, D1, D2, H1, H2, - Min1, Min2, S1, S2, $Z]}) -> + Min1, Min2, S1, S2, $Z]}) -> io_lib:format("20~c~c-~c~c-~c~cT~c~c:~c~c:~c~cZ", [Y1, Y2, M1, M2, D1, D2, H1, H2, Min1, Min2, S1, S2]); format_asn1_value(V) -> io_lib:format("~p", [V]). + +%% DirectoryString { INTEGER : maxSize } ::= CHOICE { +%% teletexString TeletexString (SIZE (1..maxSize)), +%% printableString PrintableString (SIZE (1..maxSize)), +%% bmpString BMPString (SIZE (1..maxSize)), +%% universalString UniversalString (SIZE (1..maxSize)), +%% uTF8String UTF8String (SIZE (1..maxSize)) } +%% +%% Precise definitions of printable / teletexString are hard to come +%% by. This is what I reconstructed: +%% +%% printableString: +%% "intended to represent the limited character sets available to +%% mainframe input terminals" +%% A-Z a-z 0-9 ' ( ) + , - . / : = ? [space] +%% http://msdn.microsoft.com/en-us/library/bb540814(v=vs.85).aspx +%% +%% teletexString: +%% "a sizable volume of software in the world treats TeletexString +%% (T61String) as a simple 8-bit string with mostly Windows Latin 1 +%% (superset of iso-8859-1) encoding" +%% http://www.mail-archive.com/asn1@asn1.org/msg00460.html +%% +%% (However according to that link X.680 actually defines +%% TeletexString in some much more involved and crazy way. I suggest +%% we treat it as ISO-8859-1 since Erlang does not support Windows +%% Latin 1). +%% +%% bmpString: +%% UCS-2 according to RFC 3641. Hence cannot represent Unicode +%% characters above 65535 (outside the "Basic Multilingual Plane"). +%% +%% universalString: +%% UCS-4 according to RFC 3641. +%% +%% utf8String: +%% UTF-8 according to RFC 3641. +%% +%% Within Rabbit we assume UTF-8 encoding. Since printableString is a +%% subset of ASCII it is also a subset of UTF-8. The others need +%% converting. Fortunately since the Erlang SSL library does the +%% decoding for us (albeit into a weird format, see below), we just +%% need to handle encoding into UTF-8. Note also that utf8Strings come +%% back as binary. +%% +%% Note for testing: the default Ubuntu configuration for openssl will +%% only create printableString or teletexString types no matter what +%% you do. Edit string_mask in the [req] section of +%% /etc/ssl/openssl.cnf to change this (see comments there). You +%% probably also need to set utf8 = yes to get it to accept UTF-8 on +%% the command line. Also note I could not get openssl to generate a +%% universalString. + +format_directory_string(printableString, S) -> S; +format_directory_string(teletexString, S) -> utf8_list_from(S); +format_directory_string(bmpString, S) -> utf8_list_from(S); +format_directory_string(universalString, S) -> utf8_list_from(S); +format_directory_string(utf8String, S) -> binary_to_list(S). + +utf8_list_from(S) -> + binary_to_list( + unicode:characters_to_binary(flatten_ssl_list(S), utf32, utf8)). + +%% The Erlang SSL implementation invents its own representation for +%% non-ascii strings - looking like [97,{0,0,3,187}] (that's LATIN +%% SMALL LETTER A followed by GREEK SMALL LETTER LAMDA). We convert +%% this into a list of unicode characters, which we can tell +%% unicode:characters_to_binary is utf32. + +flatten_ssl_list(L) -> [flatten_ssl_list_item(I) || I <- L]. + +flatten_ssl_list_item({A, B, C, D}) -> + A * (1 bsl 24) + B * (1 bsl 16) + C * (1 bsl 8) + D; +flatten_ssl_list_item(N) when is_number (N) -> + N. diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl index 49b09508..2a3ced92 100644 --- a/src/rabbit_tests.erl +++ b/src/rabbit_tests.erl @@ -26,6 +26,7 @@ -define(PERSISTENT_MSG_STORE, msg_store_persistent). -define(TRANSIENT_MSG_STORE, msg_store_transient). +-define(CLEANUP_QUEUE_NAME, <<"cleanup-queue">>). test_content_prop_roundtrip(Datum, Binary) -> Types = [element(1, E) || E <- Datum], @@ -34,6 +35,7 @@ test_content_prop_roundtrip(Datum, Binary) -> Binary = rabbit_binary_generator:encode_properties(Types, Values). %% assertion all_tests() -> + passed = gm_tests:all_tests(), application:set_env(rabbit, file_handles_high_watermark, 10, infinity), ok = file_handle_cache:set_limit(10), passed = test_file_handle_cache(), @@ -55,6 +57,7 @@ all_tests() -> passed = test_cluster_management(), passed = test_user_management(), passed = test_server_status(), + passed = test_confirms(), passed = maybe_run_cluster_dependent_tests(), passed = test_configurable_server_properties(), passed. @@ -80,20 +83,24 @@ run_cluster_dependent_tests(SecondaryNode) -> io:format("Running cluster dependent tests with node ~p~n", [SecondaryNode]), passed = test_delegates_async(SecondaryNode), passed = test_delegates_sync(SecondaryNode), + passed = test_queue_cleanup(SecondaryNode), + passed = test_declare_on_dead_queue(SecondaryNode), %% we now run the tests remotely, so that code coverage on the %% local node picks up more of the delegate Node = node(), Self = self(), Remote = spawn(SecondaryNode, - fun () -> A = test_delegates_async(Node), - B = test_delegates_sync(Node), - Self ! {self(), {A, B}} + fun () -> Rs = [ test_delegates_async(Node), + test_delegates_sync(Node), + test_queue_cleanup(Node), + test_declare_on_dead_queue(Node) ], + Self ! {self(), Rs} end), receive {Remote, Result} -> - Result = {passed, passed} - after 2000 -> + Result = lists:duplicate(length(Result), passed) + after 30000 -> throw(timeout) end, @@ -196,6 +203,42 @@ test_priority_queue() -> {true, false, 3, [{1, baz}, {0, foo}, {0, bar}], [baz, foo, bar]} = test_priority_queue(Q15), + %% 1-element infinity priority Q + Q16 = priority_queue:in(foo, infinity, Q), + {true, false, 1, [{infinity, foo}], [foo]} = test_priority_queue(Q16), + + %% add infinity to 0-priority Q + Q17 = priority_queue:in(foo, infinity, priority_queue:in(bar, Q)), + {true, false, 2, [{infinity, foo}, {0, bar}], [foo, bar]} = + test_priority_queue(Q17), + + %% and the other way around + Q18 = priority_queue:in(bar, priority_queue:in(foo, infinity, Q)), + {true, false, 2, [{infinity, foo}, {0, bar}], [foo, bar]} = + test_priority_queue(Q18), + + %% add infinity to mixed-priority Q + Q19 = priority_queue:in(qux, infinity, Q3), + {true, false, 3, [{infinity, qux}, {2, bar}, {1, foo}], [qux, bar, foo]} = + test_priority_queue(Q19), + + %% merge the above with a negative priority Q + Q20 = priority_queue:join(Q19, Q4), + {true, false, 4, [{infinity, qux}, {2, bar}, {1, foo}, {-1, foo}], + [qux, bar, foo, foo]} = test_priority_queue(Q20), + + %% merge two infinity priority queues + Q21 = priority_queue:join(priority_queue:in(foo, infinity, Q), + priority_queue:in(bar, infinity, Q)), + {true, false, 2, [{infinity, foo}, {infinity, bar}], [foo, bar]} = + test_priority_queue(Q21), + + %% merge two mixed priority with infinity queues + Q22 = priority_queue:join(Q18, Q20), + {true, false, 6, [{infinity, foo}, {infinity, qux}, {2, bar}, {1, foo}, + {0, bar}, {-1, foo}], [foo, qux, bar, foo, bar, foo]} = + test_priority_queue(Q22), + passed. priority_queue_in_all(Q, L) -> @@ -419,35 +462,35 @@ test_content_properties() -> [{<<"one">>, signedint, 1}, {<<"two">>, signedint, 2}]}]}], << - % property-flags - 16#8000:16, + %% property-flags + 16#8000:16, - % property-list: + %% property-list: - % table - 117:32, % table length in bytes + %% table + 117:32, % table length in bytes - 11,"a signedint", % name - "I",12345678:32, % type and value + 11,"a signedint", % name + "I",12345678:32, % type and value - 9,"a longstr", - "S",10:32,"yes please", + 9,"a longstr", + "S",10:32,"yes please", - 9,"a decimal", - "D",123,12345678:32, + 9,"a decimal", + "D",123,12345678:32, - 11,"a timestamp", - "T", 123456789012345:64, + 11,"a timestamp", + "T", 123456789012345:64, - 14,"a nested table", - "F", - 18:32, + 14,"a nested table", + "F", + 18:32, - 3,"one", - "I",1:32, + 3,"one", + "I",1:32, - 3,"two", - "I",2:32 >>), + 3,"two", + "I",2:32 >>), case catch rabbit_binary_parser:parse_properties([bit, bit, bit, bit], <<16#A0,0,1>>) of {'EXIT', content_properties_binary_overflow} -> passed; V -> exit({got_success_but_expected_failure, V}) @@ -474,28 +517,28 @@ test_field_values() -> ]}], << - % property-flags - 16#8000:16, - % table length in bytes - 228:32, - - 7,"longstr", "S", 21:32, "Here is a long string", % = 34 - 9,"signedint", "I", 12345:32/signed, % + 15 = 49 - 7,"decimal", "D", 3, 123456:32, % + 14 = 63 - 9,"timestamp", "T", 109876543209876:64, % + 19 = 82 - 5,"table", "F", 31:32, % length of table % + 11 = 93 - 3,"one", "I", 54321:32, % + 9 = 102 - 3,"two", "S", 13:32, "A long string",% + 22 = 124 - 4,"byte", "b", 255:8, % + 7 = 131 - 4,"long", "l", 1234567890:64, % + 14 = 145 - 5,"short", "s", 655:16, % + 9 = 154 - 4,"bool", "t", 1, % + 7 = 161 - 6,"binary", "x", 15:32, "a binary string", % + 27 = 188 - 4,"void", "V", % + 6 = 194 - 5,"array", "A", 23:32, % + 11 = 205 - "I", 54321:32, % + 5 = 210 - "S", 13:32, "A long string" % + 18 = 228 - >>), + %% property-flags + 16#8000:16, + %% table length in bytes + 228:32, + + 7,"longstr", "S", 21:32, "Here is a long string", % = 34 + 9,"signedint", "I", 12345:32/signed, % + 15 = 49 + 7,"decimal", "D", 3, 123456:32, % + 14 = 63 + 9,"timestamp", "T", 109876543209876:64, % + 19 = 82 + 5,"table", "F", 31:32, % length of table % + 11 = 93 + 3,"one", "I", 54321:32, % + 9 = 102 + 3,"two", "S", 13:32, "A long string", % + 22 = 124 + 4,"byte", "b", 255:8, % + 7 = 131 + 4,"long", "l", 1234567890:64, % + 14 = 145 + 5,"short", "s", 655:16, % + 9 = 154 + 4,"bool", "t", 1, % + 7 = 161 + 6,"binary", "x", 15:32, "a binary string", % + 27 = 188 + 4,"void", "V", % + 6 = 194 + 5,"array", "A", 23:32, % + 11 = 205 + "I", 54321:32, % + 5 = 210 + "S", 13:32, "A long string" % + 18 = 228 + >>), passed. %% Test that content frames don't exceed frame-max @@ -580,32 +623,134 @@ sequence_with_content(Sequence) -> rabbit_framing_amqp_0_9_1), Sequence). -test_topic_match(P, R) -> - test_topic_match(P, R, true). - -test_topic_match(P, R, Expected) -> - case rabbit_exchange_type_topic:topic_matches(list_to_binary(P), - list_to_binary(R)) of - Expected -> - passed; - _ -> - {topic_match_failure, P, R} - end. - test_topic_matching() -> - passed = test_topic_match("#", "test.test"), - passed = test_topic_match("#", ""), - passed = test_topic_match("#.T.R", "T.T.R"), - passed = test_topic_match("#.T.R", "T.R.T.R"), - passed = test_topic_match("#.Y.Z", "X.Y.Z.X.Y.Z"), - passed = test_topic_match("#.test", "test"), - passed = test_topic_match("#.test", "test.test"), - passed = test_topic_match("#.test", "ignored.test"), - passed = test_topic_match("#.test", "more.ignored.test"), - passed = test_topic_match("#.test", "notmatched", false), - passed = test_topic_match("#.z", "one.two.three.four", false), + XName = #resource{virtual_host = <<"/">>, + kind = exchange, + name = <<"test_exchange">>}, + X = #exchange{name = XName, type = topic, durable = false, + auto_delete = false, arguments = []}, + %% create + rabbit_exchange_type_topic:validate(X), + exchange_op_callback(X, create, []), + + %% add some bindings + Bindings = [#binding{source = XName, + key = list_to_binary(Key), + destination = #resource{virtual_host = <<"/">>, + kind = queue, + name = list_to_binary(Q)}} || + {Key, Q} <- [{"a.b.c", "t1"}, + {"a.*.c", "t2"}, + {"a.#.b", "t3"}, + {"a.b.b.c", "t4"}, + {"#", "t5"}, + {"#.#", "t6"}, + {"#.b", "t7"}, + {"*.*", "t8"}, + {"a.*", "t9"}, + {"*.b.c", "t10"}, + {"a.#", "t11"}, + {"a.#.#", "t12"}, + {"b.b.c", "t13"}, + {"a.b.b", "t14"}, + {"a.b", "t15"}, + {"b.c", "t16"}, + {"", "t17"}, + {"*.*.*", "t18"}, + {"vodka.martini", "t19"}, + {"a.b.c", "t20"}, + {"*.#", "t21"}, + {"#.*.#", "t22"}, + {"*.#.#", "t23"}, + {"#.#.#", "t24"}, + {"*", "t25"}, + {"#.b.#", "t26"}]], + lists:foreach(fun (B) -> exchange_op_callback(X, add_binding, [B]) end, + Bindings), + + %% test some matches + test_topic_expect_match( + X, [{"a.b.c", ["t1", "t2", "t5", "t6", "t10", "t11", "t12", + "t18", "t20", "t21", "t22", "t23", "t24", + "t26"]}, + {"a.b", ["t3", "t5", "t6", "t7", "t8", "t9", "t11", + "t12", "t15", "t21", "t22", "t23", "t24", + "t26"]}, + {"a.b.b", ["t3", "t5", "t6", "t7", "t11", "t12", "t14", + "t18", "t21", "t22", "t23", "t24", "t26"]}, + {"", ["t5", "t6", "t17", "t24"]}, + {"b.c.c", ["t5", "t6", "t18", "t21", "t22", "t23", + "t24", "t26"]}, + {"a.a.a.a.a", ["t5", "t6", "t11", "t12", "t21", "t22", + "t23", "t24"]}, + {"vodka.gin", ["t5", "t6", "t8", "t21", "t22", "t23", + "t24"]}, + {"vodka.martini", ["t5", "t6", "t8", "t19", "t21", "t22", "t23", + "t24"]}, + {"b.b.c", ["t5", "t6", "t10", "t13", "t18", "t21", + "t22", "t23", "t24", "t26"]}, + {"nothing.here.at.all", ["t5", "t6", "t21", "t22", "t23", "t24"]}, + {"oneword", ["t5", "t6", "t21", "t22", "t23", "t24", + "t25"]}]), + + %% remove some bindings + RemovedBindings = [lists:nth(1, Bindings), lists:nth(5, Bindings), + lists:nth(11, Bindings), lists:nth(19, Bindings), + lists:nth(21, Bindings)], + exchange_op_callback(X, remove_bindings, [RemovedBindings]), + RemainingBindings = ordsets:to_list( + ordsets:subtract(ordsets:from_list(Bindings), + ordsets:from_list(RemovedBindings))), + + %% test some matches + test_topic_expect_match( + X, + [{"a.b.c", ["t2", "t6", "t10", "t12", "t18", "t20", "t22", + "t23", "t24", "t26"]}, + {"a.b", ["t3", "t6", "t7", "t8", "t9", "t12", "t15", + "t22", "t23", "t24", "t26"]}, + {"a.b.b", ["t3", "t6", "t7", "t12", "t14", "t18", "t22", + "t23", "t24", "t26"]}, + {"", ["t6", "t17", "t24"]}, + {"b.c.c", ["t6", "t18", "t22", "t23", "t24", "t26"]}, + {"a.a.a.a.a", ["t6", "t12", "t22", "t23", "t24"]}, + {"vodka.gin", ["t6", "t8", "t22", "t23", "t24"]}, + {"vodka.martini", ["t6", "t8", "t22", "t23", "t24"]}, + {"b.b.c", ["t6", "t10", "t13", "t18", "t22", "t23", + "t24", "t26"]}, + {"nothing.here.at.all", ["t6", "t22", "t23", "t24"]}, + {"oneword", ["t6", "t22", "t23", "t24", "t25"]}]), + + %% remove the entire exchange + exchange_op_callback(X, delete, [RemainingBindings]), + %% none should match now + test_topic_expect_match(X, [{"a.b.c", []}, {"b.b.c", []}, {"", []}]), passed. +exchange_op_callback(X, Fun, Args) -> + rabbit_misc:execute_mnesia_transaction( + fun () -> rabbit_exchange:callback(X, Fun, [transaction, X] ++ Args) end), + rabbit_exchange:callback(X, Fun, [none, X] ++ Args). + +test_topic_expect_match(X, List) -> + lists:foreach( + fun ({Key, Expected}) -> + BinKey = list_to_binary(Key), + Message = rabbit_basic:message(X#exchange.name, BinKey, + #'P_basic'{}, <<>>), + Res = rabbit_exchange_type_topic:route( + X, #delivery{mandatory = false, + immediate = false, + sender = self(), + message = Message}), + ExpectedRes = lists:map( + fun (Q) -> #resource{virtual_host = <<"/">>, + kind = queue, + name = list_to_binary(Q)} + end, Expected), + true = (lists:usort(ExpectedRes) =:= lists:usort(Res)) + end, List). + test_app_management() -> %% starting, stopping, status ok = control_action(stop_app, []), @@ -713,7 +858,7 @@ test_log_management_during_startup() -> ok = delete_log_handlers([sasl_report_tty_h]), ok = case catch control_action(start_app, []) of ok -> exit({got_success_but_expected_failure, - log_rotation_tty_no_handlers_test}); + log_rotation_tty_no_handlers_test}); {error, {cannot_log_to_tty, _, _}} -> ok end, @@ -738,8 +883,8 @@ test_log_management_during_startup() -> ok = add_log_handlers([{error_logger_file_h, MainLog}]), ok = case control_action(start_app, []) of ok -> exit({got_success_but_expected_failure, - log_rotation_no_write_permission_dir_test}); - {error, {cannot_log_to_file, _, _}} -> ok + log_rotation_no_write_permission_dir_test}); + {error, {cannot_log_to_file, _, _}} -> ok end, %% start application with logging to a subdirectory which @@ -749,9 +894,9 @@ test_log_management_during_startup() -> ok = add_log_handlers([{error_logger_file_h, MainLog}]), ok = case control_action(start_app, []) of ok -> exit({got_success_but_expected_failure, - log_rotatation_parent_dirs_test}); + log_rotatation_parent_dirs_test}); {error, {cannot_log_to_file, _, - {error, {cannot_create_parent_dirs, _, eacces}}}} -> ok + {error, {cannot_create_parent_dirs, _, eacces}}}} -> ok end, ok = set_permissions(TmpDir, 8#00700), ok = set_permissions(TmpLog, 8#00600), @@ -771,22 +916,22 @@ test_log_management_during_startup() -> passed. test_option_parser() -> - % command and arguments should just pass through + %% command and arguments should just pass through ok = check_get_options({["mock_command", "arg1", "arg2"], []}, [], ["mock_command", "arg1", "arg2"]), - % get flags + %% get flags ok = check_get_options( {["mock_command", "arg1"], [{"-f", true}, {"-f2", false}]}, [{flag, "-f"}, {flag, "-f2"}], ["mock_command", "arg1", "-f"]), - % get options + %% get options ok = check_get_options( {["mock_command"], [{"-foo", "bar"}, {"-baz", "notbaz"}]}, [{option, "-foo", "notfoo"}, {option, "-baz", "notbaz"}], ["mock_command", "-foo", "bar"]), - % shuffled and interleaved arguments and options + %% shuffled and interleaved arguments and options ok = check_get_options( {["a1", "a2", "a3"], [{"-o1", "hello"}, {"-o2", "noto2"}, {"-f", true}]}, [{option, "-o1", "noto1"}, {flag, "-f"}, {option, "-o2", "noto2"}], @@ -795,7 +940,6 @@ test_option_parser() -> passed. test_cluster_management() -> - %% 'cluster' and 'reset' should only work if the app is stopped {error, _} = control_action(cluster, []), {error, _} = control_action(reset, []), @@ -843,13 +987,16 @@ test_cluster_management() -> ok = control_action(reset, []), ok = control_action(start_app, []), ok = control_action(stop_app, []), + ok = assert_disc_node(), ok = control_action(force_cluster, ["invalid1@invalid", "invalid2@invalid"]), + ok = assert_ram_node(), %% join a non-existing cluster as a ram node ok = control_action(reset, []), ok = control_action(force_cluster, ["invalid1@invalid", "invalid2@invalid"]), + ok = assert_ram_node(), SecondaryNode = rabbit_misc:makenode("hare"), case net_adm:ping(SecondaryNode) of @@ -868,15 +1015,18 @@ test_cluster_management2(SecondaryNode) -> %% make a disk node ok = control_action(reset, []), ok = control_action(cluster, [NodeS]), + ok = assert_disc_node(), %% make a ram node ok = control_action(reset, []), ok = control_action(cluster, [SecondaryNodeS]), + ok = assert_ram_node(), %% join cluster as a ram node ok = control_action(reset, []), ok = control_action(force_cluster, [SecondaryNodeS, "invalid1@invalid"]), ok = control_action(start_app, []), ok = control_action(stop_app, []), + ok = assert_ram_node(), %% change cluster config while remaining in same cluster ok = control_action(force_cluster, ["invalid2@invalid", SecondaryNodeS]), @@ -888,27 +1038,45 @@ test_cluster_management2(SecondaryNode) -> "invalid2@invalid"]), ok = control_action(start_app, []), ok = control_action(stop_app, []), + ok = assert_ram_node(), - %% join empty cluster as a ram node + %% join empty cluster as a ram node (converts to disc) ok = control_action(cluster, []), ok = control_action(start_app, []), ok = control_action(stop_app, []), + ok = assert_disc_node(), - %% turn ram node into disk node + %% make a new ram node ok = control_action(reset, []), + ok = control_action(force_cluster, [SecondaryNodeS]), + ok = control_action(start_app, []), + ok = control_action(stop_app, []), + ok = assert_ram_node(), + + %% turn ram node into disk node ok = control_action(cluster, [SecondaryNodeS, NodeS]), ok = control_action(start_app, []), ok = control_action(stop_app, []), + ok = assert_disc_node(), %% convert a disk node into a ram node + ok = assert_disc_node(), ok = control_action(force_cluster, ["invalid1@invalid", "invalid2@invalid"]), + ok = assert_ram_node(), + + %% make a new disk node + ok = control_action(force_reset, []), + ok = control_action(start_app, []), + ok = control_action(stop_app, []), + ok = assert_disc_node(), %% turn a disk node into a ram node ok = control_action(reset, []), ok = control_action(cluster, [SecondaryNodeS]), ok = control_action(start_app, []), ok = control_action(stop_app, []), + ok = assert_ram_node(), %% NB: this will log an inconsistent_database error, which is harmless %% Turning cover on / off is OK even if we're not in general using cover, @@ -934,6 +1102,10 @@ test_cluster_management2(SecondaryNode) -> {error, {no_running_cluster_nodes, _, _}} = control_action(reset, []), + %% attempt to change type when no other node is alive + {error, {no_running_cluster_nodes, _, _}} = + control_action(cluster, [SecondaryNodeS]), + %% leave system clustered, with the secondary node as a ram node ok = control_action(force_reset, []), ok = control_action(start_app, []), @@ -962,15 +1134,25 @@ test_user_management() -> control_action(list_permissions, [], [{"-p", "/testhost"}]), {error, {invalid_regexp, _, _}} = control_action(set_permissions, ["guest", "+foo", ".*", ".*"]), + {error, {no_such_user, _}} = + control_action(set_user_tags, ["foo", "bar"]), %% user creation ok = control_action(add_user, ["foo", "bar"]), {error, {user_already_exists, _}} = control_action(add_user, ["foo", "bar"]), ok = control_action(change_password, ["foo", "baz"]), - ok = control_action(set_admin, ["foo"]), - ok = control_action(clear_admin, ["foo"]), - ok = control_action(list_users, []), + + TestTags = fun (Tags) -> + Args = ["foo" | [atom_to_list(T) || T <- Tags]], + ok = control_action(set_user_tags, Args), + {ok, #internal_user{tags = Tags}} = + rabbit_auth_backend_internal:lookup_user(<<"foo">>), + ok = control_action(list_users, []) + end, + TestTags([foo, bar, baz]), + TestTags([administrator]), + TestTags([]), %% vhost creation ok = control_action(add_vhost, ["/testhost"]), @@ -1014,9 +1196,10 @@ test_user_management() -> test_server_status() -> %% create a few things so there is some useful information to list Writer = spawn(fun () -> receive shutdown -> ok end end), - {ok, Ch} = rabbit_channel:start_link(1, self(), Writer, - user(<<"user">>), <<"/">>, self(), - fun (_) -> {ok, self()} end), + {ok, Ch} = rabbit_channel:start_link( + 1, self(), Writer, self(), rabbit_framing_amqp_0_9_1, + user(<<"user">>), <<"/">>, [], self(), + fun (_) -> {ok, self()} end), [Q, Q2] = [Queue || Name <- [<<"foo">>, <<"bar">>], {new, Queue = #amqqueue{}} <- [rabbit_amqqueue:declare( @@ -1038,7 +1221,7 @@ test_server_status() -> [_|_] = rabbit_binding:list_for_source( rabbit_misc:r(<<"/">>, exchange, <<"">>)), [_] = rabbit_binding:list_for_destination( - rabbit_misc:r(<<"/">>, queue, <<"foo">>)), + rabbit_misc:r(<<"/">>, queue, <<"foo">>)), [_] = rabbit_binding:list_for_source_and_destination( rabbit_misc:r(<<"/">>, exchange, <<"">>), rabbit_misc:r(<<"/">>, queue, <<"foo">>)), @@ -1071,12 +1254,19 @@ test_server_status() -> passed. -test_spawn(Receiver) -> +test_writer(Pid) -> + receive + shutdown -> ok; + {send_command, Method} -> Pid ! Method, test_writer(Pid) + end. + +test_spawn() -> Me = self(), - Writer = spawn(fun () -> Receiver(Me) end), - {ok, Ch} = rabbit_channel:start_link(1, Me, Writer, - user(<<"guest">>), <<"/">>, self(), - fun (_) -> {ok, self()} end), + Writer = spawn(fun () -> test_writer(Me) end), + {ok, Ch} = rabbit_channel:start_link( + 1, Me, Writer, Me, rabbit_framing_amqp_0_9_1, + user(<<"guest">>), <<"/">>, [], self(), + fun (_) -> {ok, self()} end), ok = rabbit_channel:do(Ch, #'channel.open'{}), receive #'channel.open_ok'{} -> ok after 1000 -> throw(failed_to_receive_channel_open_ok) @@ -1085,25 +1275,14 @@ test_spawn(Receiver) -> user(Username) -> #user{username = Username, - is_admin = true, + tags = [administrator], auth_backend = rabbit_auth_backend_internal, impl = #internal_user{username = Username, - is_admin = true}}. - -test_statistics_receiver(Pid) -> - receive - shutdown -> - ok; - {send_command, Method} -> - Pid ! Method, - test_statistics_receiver(Pid) - end. + tags = [administrator]}}. test_statistics_event_receiver(Pid) -> receive - Foo -> - Pid ! Foo, - test_statistics_event_receiver(Pid) + Foo -> Pid ! Foo, test_statistics_event_receiver(Pid) end. test_statistics_receive_event(Ch, Matcher) -> @@ -1120,6 +1299,66 @@ test_statistics_receive_event1(Ch, Matcher) -> after 1000 -> throw(failed_to_receive_event) end. +test_confirms() -> + {_Writer, Ch} = test_spawn(), + DeclareBindDurableQueue = + fun() -> + rabbit_channel:do(Ch, #'queue.declare'{durable = true}), + receive #'queue.declare_ok'{queue = Q0} -> + rabbit_channel:do(Ch, #'queue.bind'{ + queue = Q0, + exchange = <<"amq.direct">>, + routing_key = "magic" }), + receive #'queue.bind_ok'{} -> + Q0 + after 1000 -> + throw(failed_to_bind_queue) + end + after 1000 -> + throw(failed_to_declare_queue) + end + end, + %% Declare and bind two queues + QName1 = DeclareBindDurableQueue(), + QName2 = DeclareBindDurableQueue(), + %% Get the first one's pid (we'll crash it later) + {ok, Q1} = rabbit_amqqueue:lookup(rabbit_misc:r(<<"/">>, queue, QName1)), + QPid1 = Q1#amqqueue.pid, + %% Enable confirms + rabbit_channel:do(Ch, #'confirm.select'{}), + receive + #'confirm.select_ok'{} -> ok + after 1000 -> throw(failed_to_enable_confirms) + end, + %% Publish a message + rabbit_channel:do(Ch, #'basic.publish'{exchange = <<"amq.direct">>, + routing_key = "magic" + }, + rabbit_basic:build_content( + #'P_basic'{delivery_mode = 2}, <<"">>)), + %% Crash the queue + QPid1 ! boom, + %% Wait for a nack + receive + #'basic.nack'{} -> ok; + #'basic.ack'{} -> throw(received_ack_instead_of_nack) + after 2000 -> throw(did_not_receive_nack) + end, + receive + #'basic.ack'{} -> throw(received_ack_when_none_expected) + after 1000 -> ok + end, + %% Cleanup + rabbit_channel:do(Ch, #'queue.delete'{queue = QName2}), + receive + #'queue.delete_ok'{} -> ok + after 1000 -> throw(failed_to_cleanup_queue) + end, + unlink(Ch), + ok = rabbit_channel:shutdown(Ch), + + passed. + test_statistics() -> application:set_env(rabbit, collect_statistics, fine), @@ -1127,7 +1366,7 @@ test_statistics() -> %% by far the most complex code though. %% Set up a channel and queue - {_Writer, Ch} = test_spawn(fun test_statistics_receiver/1), + {_Writer, Ch} = test_spawn(), rabbit_channel:do(Ch, #'queue.declare'{}), QName = receive #'queue.declare_ok'{queue = Q0} -> Q0 @@ -1200,9 +1439,9 @@ test_delegates_async(SecondaryNode) -> make_responder(FMsg) -> make_responder(FMsg, timeout). make_responder(FMsg, Throw) -> fun () -> - receive Msg -> FMsg(Msg) - after 1000 -> throw(Throw) - end + receive Msg -> FMsg(Msg) + after 1000 -> throw(Throw) + end end. spawn_responders(Node, Responder, Count) -> @@ -1213,10 +1452,10 @@ await_response(0) -> await_response(Count) -> receive response -> ok, - await_response(Count - 1) + await_response(Count - 1) after 1000 -> - io:format("Async reply not received~n"), - throw(timeout) + io:format("Async reply not received~n"), + throw(timeout) end. must_exit(Fun) -> @@ -1228,11 +1467,11 @@ must_exit(Fun) -> end. test_delegates_sync(SecondaryNode) -> - Sender = fun (Pid) -> gen_server:call(Pid, invoked) end, + Sender = fun (Pid) -> gen_server:call(Pid, invoked, infinity) end, BadSender = fun (_Pid) -> exit(exception) end, Responder = make_responder(fun ({'$gen_call', From, invoked}) -> - gen_server:reply(From, response) + gen_server:reply(From, response) end), BadResponder = make_responder(fun ({'$gen_call', From, invoked}) -> @@ -1244,7 +1483,7 @@ test_delegates_sync(SecondaryNode) -> must_exit(fun () -> delegate:invoke(spawn(BadResponder), BadSender) end), must_exit(fun () -> - delegate:invoke(spawn(SecondaryNode, BadResponder), BadSender) end), + delegate:invoke(spawn(SecondaryNode, BadResponder), BadSender) end), LocalGoodPids = spawn_responders(node(), Responder, 2), RemoteGoodPids = spawn_responders(SecondaryNode, Responder, 2), @@ -1278,7 +1517,52 @@ test_delegates_sync(SecondaryNode) -> passed. -%--------------------------------------------------------------------- +test_queue_cleanup(_SecondaryNode) -> + {_Writer, Ch} = test_spawn(), + rabbit_channel:do(Ch, #'queue.declare'{ queue = ?CLEANUP_QUEUE_NAME }), + receive #'queue.declare_ok'{queue = ?CLEANUP_QUEUE_NAME} -> + ok + after 1000 -> throw(failed_to_receive_queue_declare_ok) + end, + rabbit:stop(), + rabbit:start(), + rabbit_channel:do(Ch, #'queue.declare'{ passive = true, + queue = ?CLEANUP_QUEUE_NAME }), + receive + #'channel.close'{reply_code = ?NOT_FOUND} -> + ok + after 2000 -> + throw(failed_to_receive_channel_exit) + end, + passed. + +test_declare_on_dead_queue(SecondaryNode) -> + QueueName = rabbit_misc:r(<<"/">>, queue, ?CLEANUP_QUEUE_NAME), + Self = self(), + Pid = spawn(SecondaryNode, + fun () -> + {new, #amqqueue{name = QueueName, pid = QPid}} = + rabbit_amqqueue:declare(QueueName, false, false, [], + none), + exit(QPid, kill), + Self ! {self(), killed, QPid} + end), + receive + {Pid, killed, QPid} -> + {existing, #amqqueue{name = QueueName, + pid = QPid}} = + rabbit_amqqueue:declare(QueueName, false, false, [], none), + false = rabbit_misc:is_process_alive(QPid), + {new, Q} = rabbit_amqqueue:declare(QueueName, false, false, [], + none), + true = rabbit_misc:is_process_alive(Q#amqqueue.pid), + {ok, 0} = rabbit_amqqueue:delete(Q, false, false), + passed + after 2000 -> + throw(failed_to_create_and_kill_queue) + end. + +%%--------------------------------------------------------------------- control_action(Command, Args) -> control_action(Command, node(), Args, default_options()). @@ -1341,7 +1625,7 @@ test_logs_working(MainLogFile, SaslLogFile) -> ok = rabbit_log:error("foo bar"), ok = error_logger:error_report(crash_report, [foo, bar]), %% give the error loggers some time to catch up - timer:sleep(50), + timer:sleep(100), [true, true] = non_empty_files([MainLogFile, SaslLogFile]), ok. @@ -1360,6 +1644,18 @@ clean_logs(Files, Suffix) -> end || File <- Files], ok. +assert_ram_node() -> + case rabbit_mnesia:is_disc_node() of + true -> exit('not_ram_node'); + false -> ok + end. + +assert_disc_node() -> + case rabbit_mnesia:is_disc_node() of + true -> ok; + false -> exit('not_disc_node') + end. + delete_file(File) -> case file:delete(File) of ok -> ok; @@ -1391,23 +1687,42 @@ test_file_handle_cache() -> ok = file_handle_cache:set_limit(5), %% 1 or 2 sockets, 2 msg_stores TmpDir = filename:join(rabbit_mnesia:dir(), "tmp"), ok = filelib:ensure_dir(filename:join(TmpDir, "nothing")), + [Src1, Dst1, Src2, Dst2] = Files = + [filename:join(TmpDir, Str) || Str <- ["file1", "file2", "file3", "file4"]], + Content = <<"foo">>, + CopyFun = fun (Src, Dst) -> + ok = rabbit_misc:write_file(Src, Content), + {ok, SrcHdl} = file_handle_cache:open(Src, [read], []), + {ok, DstHdl} = file_handle_cache:open(Dst, [write], []), + Size = size(Content), + {ok, Size} = file_handle_cache:copy(SrcHdl, DstHdl, Size), + ok = file_handle_cache:delete(SrcHdl), + ok = file_handle_cache:delete(DstHdl) + end, Pid = spawn(fun () -> {ok, Hdl} = file_handle_cache:open( - filename:join(TmpDir, "file3"), + filename:join(TmpDir, "file5"), [write], []), - receive close -> ok end, - file_handle_cache:delete(Hdl) + receive {next, Pid1} -> Pid1 ! {next, self()} end, + file_handle_cache:delete(Hdl), + %% This will block and never return, so we + %% exercise the fhc tidying up the pending + %% queue on the death of a process. + ok = CopyFun(Src1, Dst1) end), - Src = filename:join(TmpDir, "file1"), - Dst = filename:join(TmpDir, "file2"), - Content = <<"foo">>, - ok = file:write_file(Src, Content), - {ok, SrcHdl} = file_handle_cache:open(Src, [read], []), - {ok, DstHdl} = file_handle_cache:open(Dst, [write], []), - Size = size(Content), - {ok, Size} = file_handle_cache:copy(SrcHdl, DstHdl, Size), - ok = file_handle_cache:delete(SrcHdl), - file_handle_cache:delete(DstHdl), - Pid ! close, + ok = CopyFun(Src1, Dst1), + ok = file_handle_cache:set_limit(2), + Pid ! {next, self()}, + receive {next, Pid} -> ok end, + timer:sleep(100), + Pid1 = spawn(fun () -> CopyFun(Src2, Dst2) end), + timer:sleep(100), + erlang:monitor(process, Pid), + erlang:monitor(process, Pid1), + exit(Pid, kill), + exit(Pid1, kill), + receive {'DOWN', _MRef, process, Pid, _Reason} -> ok end, + receive {'DOWN', _MRef1, process, Pid1, _Reason1} -> ok end, + [file:delete(File) || File <- Files], ok = file_handle_cache:set_limit(Limit), passed. @@ -1432,6 +1747,10 @@ test_backing_queue() -> passed = test_queue_recover(), application:set_env(rabbit, queue_index_max_journal_entries, MaxJournal, infinity), + %% We will have restarted the message store, and thus changed + %% the order of the children of rabbit_sup. This will cause + %% problems if there are subsequent failures - see bug 24262. + ok = restart_app(), passed; _ -> passed @@ -1442,50 +1761,50 @@ restart_msg_store_empty() -> ok = rabbit_variable_queue:start_msg_store( undefined, {fun (ok) -> finished end, ok}). -guid_bin(X) -> +msg_id_bin(X) -> erlang:md5(term_to_binary(X)). msg_store_client_init(MsgStore, Ref) -> rabbit_msg_store:client_init(MsgStore, Ref, undefined, undefined). -msg_store_contains(Atom, Guids, MSCState) -> +msg_store_contains(Atom, MsgIds, MSCState) -> Atom = lists:foldl( - fun (Guid, Atom1) when Atom1 =:= Atom -> - rabbit_msg_store:contains(Guid, MSCState) end, - Atom, Guids). + fun (MsgId, Atom1) when Atom1 =:= Atom -> + rabbit_msg_store:contains(MsgId, MSCState) end, + Atom, MsgIds). -msg_store_sync(Guids, MSCState) -> +msg_store_sync(MsgIds, MSCState) -> Ref = make_ref(), Self = self(), - ok = rabbit_msg_store:sync(Guids, fun () -> Self ! {sync, Ref} end, + ok = rabbit_msg_store:sync(MsgIds, fun () -> Self ! {sync, Ref} end, MSCState), receive {sync, Ref} -> ok after 10000 -> - io:format("Sync from msg_store missing for guids ~p~n", [Guids]), + io:format("Sync from msg_store missing for msg_ids ~p~n", [MsgIds]), throw(timeout) end. -msg_store_read(Guids, MSCState) -> - lists:foldl(fun (Guid, MSCStateM) -> - {{ok, Guid}, MSCStateN} = rabbit_msg_store:read( - Guid, MSCStateM), +msg_store_read(MsgIds, MSCState) -> + lists:foldl(fun (MsgId, MSCStateM) -> + {{ok, MsgId}, MSCStateN} = rabbit_msg_store:read( + MsgId, MSCStateM), MSCStateN - end, MSCState, Guids). + end, MSCState, MsgIds). -msg_store_write(Guids, MSCState) -> - ok = lists:foldl( - fun (Guid, ok) -> rabbit_msg_store:write(Guid, Guid, MSCState) end, - ok, Guids). +msg_store_write(MsgIds, MSCState) -> + ok = lists:foldl(fun (MsgId, ok) -> + rabbit_msg_store:write(MsgId, MsgId, MSCState) + end, ok, MsgIds). -msg_store_remove(Guids, MSCState) -> - rabbit_msg_store:remove(Guids, MSCState). +msg_store_remove(MsgIds, MSCState) -> + rabbit_msg_store:remove(MsgIds, MSCState). -msg_store_remove(MsgStore, Ref, Guids) -> +msg_store_remove(MsgStore, Ref, MsgIds) -> with_msg_store_client(MsgStore, Ref, fun (MSCStateM) -> - ok = msg_store_remove(Guids, MSCStateM), + ok = msg_store_remove(MsgIds, MSCStateM), MSCStateM end). @@ -1495,140 +1814,138 @@ with_msg_store_client(MsgStore, Ref, Fun) -> foreach_with_msg_store_client(MsgStore, Ref, Fun, L) -> rabbit_msg_store:client_terminate( - lists:foldl(fun (Guid, MSCState) -> Fun(Guid, MSCState) end, + lists:foldl(fun (MsgId, MSCState) -> Fun(MsgId, MSCState) end, msg_store_client_init(MsgStore, Ref), L)). test_msg_store() -> restart_msg_store_empty(), Self = self(), - Guids = [guid_bin(M) || M <- lists:seq(1,100)], - {Guids1stHalf, Guids2ndHalf} = lists:split(50, Guids), + MsgIds = [msg_id_bin(M) || M <- lists:seq(1,100)], + {MsgIds1stHalf, MsgIds2ndHalf} = lists:split(50, MsgIds), Ref = rabbit_guid:guid(), MSCState = msg_store_client_init(?PERSISTENT_MSG_STORE, Ref), %% check we don't contain any of the msgs we're about to publish - false = msg_store_contains(false, Guids, MSCState), + false = msg_store_contains(false, MsgIds, MSCState), %% publish the first half - ok = msg_store_write(Guids1stHalf, MSCState), + ok = msg_store_write(MsgIds1stHalf, MSCState), %% sync on the first half - ok = msg_store_sync(Guids1stHalf, MSCState), + ok = msg_store_sync(MsgIds1stHalf, MSCState), %% publish the second half - ok = msg_store_write(Guids2ndHalf, MSCState), + ok = msg_store_write(MsgIds2ndHalf, MSCState), %% sync on the first half again - the msg_store will be dirty, but %% we won't need the fsync - ok = msg_store_sync(Guids1stHalf, MSCState), + ok = msg_store_sync(MsgIds1stHalf, MSCState), %% check they're all in there - true = msg_store_contains(true, Guids, MSCState), + true = msg_store_contains(true, MsgIds, MSCState), %% publish the latter half twice so we hit the caching and ref count code - ok = msg_store_write(Guids2ndHalf, MSCState), + ok = msg_store_write(MsgIds2ndHalf, MSCState), %% check they're still all in there - true = msg_store_contains(true, Guids, MSCState), + true = msg_store_contains(true, MsgIds, MSCState), %% sync on the 2nd half, but do lots of individual syncs to try %% and cause coalescing to happen ok = lists:foldl( - fun (Guid, ok) -> rabbit_msg_store:sync( - [Guid], fun () -> Self ! {sync, Guid} end, - MSCState) - end, ok, Guids2ndHalf), + fun (MsgId, ok) -> rabbit_msg_store:sync( + [MsgId], fun () -> Self ! {sync, MsgId} end, + MSCState) + end, ok, MsgIds2ndHalf), lists:foldl( - fun(Guid, ok) -> + fun(MsgId, ok) -> receive - {sync, Guid} -> ok + {sync, MsgId} -> ok after 10000 -> - io:format("Sync from msg_store missing (guid: ~p)~n", - [Guid]), + io:format("Sync from msg_store missing (msg_id: ~p)~n", + [MsgId]), throw(timeout) end - end, ok, Guids2ndHalf), + end, ok, MsgIds2ndHalf), %% it's very likely we're not dirty here, so the 1st half sync %% should hit a different code path - ok = msg_store_sync(Guids1stHalf, MSCState), + ok = msg_store_sync(MsgIds1stHalf, MSCState), %% read them all - MSCState1 = msg_store_read(Guids, MSCState), + MSCState1 = msg_store_read(MsgIds, MSCState), %% read them all again - this will hit the cache, not disk - MSCState2 = msg_store_read(Guids, MSCState1), + MSCState2 = msg_store_read(MsgIds, MSCState1), %% remove them all - ok = rabbit_msg_store:remove(Guids, MSCState2), + ok = rabbit_msg_store:remove(MsgIds, MSCState2), %% check first half doesn't exist - false = msg_store_contains(false, Guids1stHalf, MSCState2), + false = msg_store_contains(false, MsgIds1stHalf, MSCState2), %% check second half does exist - true = msg_store_contains(true, Guids2ndHalf, MSCState2), + true = msg_store_contains(true, MsgIds2ndHalf, MSCState2), %% read the second half again - MSCState3 = msg_store_read(Guids2ndHalf, MSCState2), - %% release the second half, just for fun (aka code coverage) - ok = rabbit_msg_store:release(Guids2ndHalf, MSCState3), + MSCState3 = msg_store_read(MsgIds2ndHalf, MSCState2), %% read the second half again, just for fun (aka code coverage) - MSCState4 = msg_store_read(Guids2ndHalf, MSCState3), + MSCState4 = msg_store_read(MsgIds2ndHalf, MSCState3), ok = rabbit_msg_store:client_terminate(MSCState4), %% stop and restart, preserving every other msg in 2nd half ok = rabbit_variable_queue:stop_msg_store(), ok = rabbit_variable_queue:start_msg_store( [], {fun ([]) -> finished; - ([Guid|GuidsTail]) - when length(GuidsTail) rem 2 == 0 -> - {Guid, 1, GuidsTail}; - ([Guid|GuidsTail]) -> - {Guid, 0, GuidsTail} - end, Guids2ndHalf}), + ([MsgId|MsgIdsTail]) + when length(MsgIdsTail) rem 2 == 0 -> + {MsgId, 1, MsgIdsTail}; + ([MsgId|MsgIdsTail]) -> + {MsgId, 0, MsgIdsTail} + end, MsgIds2ndHalf}), MSCState5 = msg_store_client_init(?PERSISTENT_MSG_STORE, Ref), %% check we have the right msgs left lists:foldl( - fun (Guid, Bool) -> - not(Bool = rabbit_msg_store:contains(Guid, MSCState5)) - end, false, Guids2ndHalf), + fun (MsgId, Bool) -> + not(Bool = rabbit_msg_store:contains(MsgId, MSCState5)) + end, false, MsgIds2ndHalf), ok = rabbit_msg_store:client_terminate(MSCState5), %% restart empty restart_msg_store_empty(), MSCState6 = msg_store_client_init(?PERSISTENT_MSG_STORE, Ref), %% check we don't contain any of the msgs - false = msg_store_contains(false, Guids, MSCState6), + false = msg_store_contains(false, MsgIds, MSCState6), %% publish the first half again - ok = msg_store_write(Guids1stHalf, MSCState6), + ok = msg_store_write(MsgIds1stHalf, MSCState6), %% this should force some sort of sync internally otherwise misread ok = rabbit_msg_store:client_terminate( - msg_store_read(Guids1stHalf, MSCState6)), + msg_store_read(MsgIds1stHalf, MSCState6)), MSCState7 = msg_store_client_init(?PERSISTENT_MSG_STORE, Ref), - ok = rabbit_msg_store:remove(Guids1stHalf, MSCState7), + ok = rabbit_msg_store:remove(MsgIds1stHalf, MSCState7), ok = rabbit_msg_store:client_terminate(MSCState7), %% restart empty - restart_msg_store_empty(), %% now safe to reuse guids + restart_msg_store_empty(), %% now safe to reuse msg_ids %% push a lot of msgs in... at least 100 files worth {ok, FileSize} = application:get_env(rabbit, msg_store_file_size_limit), PayloadSizeBits = 65536, BigCount = trunc(100 * FileSize / (PayloadSizeBits div 8)), - GuidsBig = [guid_bin(X) || X <- lists:seq(1, BigCount)], + MsgIdsBig = [msg_id_bin(X) || X <- lists:seq(1, BigCount)], Payload = << 0:PayloadSizeBits >>, ok = with_msg_store_client( ?PERSISTENT_MSG_STORE, Ref, fun (MSCStateM) -> - [ok = rabbit_msg_store:write(Guid, Payload, MSCStateM) || - Guid <- GuidsBig], + [ok = rabbit_msg_store:write(MsgId, Payload, MSCStateM) || + MsgId <- MsgIdsBig], MSCStateM end), %% now read them to ensure we hit the fast client-side reading ok = foreach_with_msg_store_client( ?PERSISTENT_MSG_STORE, Ref, - fun (Guid, MSCStateM) -> + fun (MsgId, MSCStateM) -> {{ok, Payload}, MSCStateN} = rabbit_msg_store:read( - Guid, MSCStateM), + MsgId, MSCStateM), MSCStateN - end, GuidsBig), + end, MsgIdsBig), %% .., then 3s by 1... ok = msg_store_remove(?PERSISTENT_MSG_STORE, Ref, - [guid_bin(X) || X <- lists:seq(BigCount, 1, -3)]), + [msg_id_bin(X) || X <- lists:seq(BigCount, 1, -3)]), %% .., then remove 3s by 2, from the young end first. This hits %% GC (under 50% good data left, but no empty files. Must GC). ok = msg_store_remove(?PERSISTENT_MSG_STORE, Ref, - [guid_bin(X) || X <- lists:seq(BigCount-1, 1, -3)]), + [msg_id_bin(X) || X <- lists:seq(BigCount-1, 1, -3)]), %% .., then remove 3s by 3, from the young end first. This hits %% GC... ok = msg_store_remove(?PERSISTENT_MSG_STORE, Ref, - [guid_bin(X) || X <- lists:seq(BigCount-2, 1, -3)]), + [msg_id_bin(X) || X <- lists:seq(BigCount-2, 1, -3)]), %% ensure empty ok = with_msg_store_client( ?PERSISTENT_MSG_STORE, Ref, fun (MSCStateM) -> - false = msg_store_contains(false, GuidsBig, MSCStateM), + false = msg_store_contains(false, MsgIdsBig, MSCStateM), MSCStateM end), %% restart empty @@ -1648,8 +1965,8 @@ init_test_queue() -> PersistentClient = msg_store_client_init(?PERSISTENT_MSG_STORE, PRef), Res = rabbit_queue_index:recover( TestQueue, Terms, false, - fun (Guid) -> - rabbit_msg_store:contains(Guid, PersistentClient) + fun (MsgId) -> + rabbit_msg_store:contains(MsgId, PersistentClient) end, fun nop/1), ok = rabbit_msg_store:client_delete_and_terminate(PersistentClient), @@ -1673,6 +1990,10 @@ with_empty_test_queue(Fun) -> {0, Qi} = init_test_queue(), rabbit_queue_index:delete_and_terminate(Fun(Qi)). +restart_app() -> + rabbit:stop(), + rabbit:start(). + queue_index_publish(SeqIds, Persistent, Qi) -> Ref = rabbit_guid:guid(), MsgStore = case Persistent of @@ -1680,25 +2001,25 @@ queue_index_publish(SeqIds, Persistent, Qi) -> false -> ?TRANSIENT_MSG_STORE end, MSCState = msg_store_client_init(MsgStore, Ref), - {A, B = [{_SeqId, LastGuidWritten} | _]} = + {A, B = [{_SeqId, LastMsgIdWritten} | _]} = lists:foldl( - fun (SeqId, {QiN, SeqIdsGuidsAcc}) -> - Guid = rabbit_guid:guid(), + fun (SeqId, {QiN, SeqIdsMsgIdsAcc}) -> + MsgId = rabbit_guid:guid(), QiM = rabbit_queue_index:publish( - Guid, SeqId, #message_properties{}, Persistent, QiN), - ok = rabbit_msg_store:write(Guid, Guid, MSCState), - {QiM, [{SeqId, Guid} | SeqIdsGuidsAcc]} + MsgId, SeqId, #message_properties{}, Persistent, QiN), + ok = rabbit_msg_store:write(MsgId, MsgId, MSCState), + {QiM, [{SeqId, MsgId} | SeqIdsMsgIdsAcc]} end, {Qi, []}, SeqIds), %% do this just to force all of the publishes through to the msg_store: - true = rabbit_msg_store:contains(LastGuidWritten, MSCState), + true = rabbit_msg_store:contains(LastMsgIdWritten, MSCState), ok = rabbit_msg_store:client_delete_and_terminate(MSCState), {A, B}. verify_read_with_published(_Delivered, _Persistent, [], _) -> ok; verify_read_with_published(Delivered, Persistent, - [{Guid, SeqId, _Props, Persistent, Delivered}|Read], - [{SeqId, Guid}|Published]) -> + [{MsgId, SeqId, _Props, Persistent, Delivered}|Read], + [{SeqId, MsgId}|Published]) -> verify_read_with_published(Delivered, Persistent, Read, Published); verify_read_with_published(_Delivered, _Persistent, _Read, _Published) -> ko. @@ -1706,10 +2027,10 @@ verify_read_with_published(_Delivered, _Persistent, _Read, _Published) -> test_queue_index_props() -> with_empty_test_queue( fun(Qi0) -> - Guid = rabbit_guid:guid(), + MsgId = rabbit_guid:guid(), Props = #message_properties{expiry=12345}, - Qi1 = rabbit_queue_index:publish(Guid, 1, Props, true, Qi0), - {[{Guid, 1, Props, _, _}], Qi2} = + Qi1 = rabbit_queue_index:publish(MsgId, 1, Props, true, Qi0), + {[{MsgId, 1, Props, _, _}], Qi2} = rabbit_queue_index:read(1, 2, Qi1), Qi2 end), @@ -1731,19 +2052,19 @@ test_queue_index() -> with_empty_test_queue( fun (Qi0) -> {0, 0, Qi1} = rabbit_queue_index:bounds(Qi0), - {Qi2, SeqIdsGuidsA} = queue_index_publish(SeqIdsA, false, Qi1), + {Qi2, SeqIdsMsgIdsA} = queue_index_publish(SeqIdsA, false, Qi1), {0, SegmentSize, Qi3} = rabbit_queue_index:bounds(Qi2), {ReadA, Qi4} = rabbit_queue_index:read(0, SegmentSize, Qi3), ok = verify_read_with_published(false, false, ReadA, - lists:reverse(SeqIdsGuidsA)), + lists:reverse(SeqIdsMsgIdsA)), %% should get length back as 0, as all the msgs were transient {0, Qi6} = restart_test_queue(Qi4), {0, 0, Qi7} = rabbit_queue_index:bounds(Qi6), - {Qi8, SeqIdsGuidsB} = queue_index_publish(SeqIdsB, true, Qi7), + {Qi8, SeqIdsMsgIdsB} = queue_index_publish(SeqIdsB, true, Qi7), {0, TwoSegs, Qi9} = rabbit_queue_index:bounds(Qi8), {ReadB, Qi10} = rabbit_queue_index:read(0, SegmentSize, Qi9), ok = verify_read_with_published(false, true, ReadB, - lists:reverse(SeqIdsGuidsB)), + lists:reverse(SeqIdsMsgIdsB)), %% should get length back as MostOfASegment LenB = length(SeqIdsB), {LenB, Qi12} = restart_test_queue(Qi10), @@ -1751,7 +2072,7 @@ test_queue_index() -> Qi14 = rabbit_queue_index:deliver(SeqIdsB, Qi13), {ReadC, Qi15} = rabbit_queue_index:read(0, SegmentSize, Qi14), ok = verify_read_with_published(true, true, ReadC, - lists:reverse(SeqIdsGuidsB)), + lists:reverse(SeqIdsMsgIdsB)), Qi16 = rabbit_queue_index:ack(SeqIdsB, Qi15), Qi17 = rabbit_queue_index:flush(Qi16), %% Everything will have gone now because #pubs == #acks @@ -1767,12 +2088,12 @@ test_queue_index() -> %% a) partial pub+del+ack, then move to new segment with_empty_test_queue( fun (Qi0) -> - {Qi1, _SeqIdsGuidsC} = queue_index_publish(SeqIdsC, + {Qi1, _SeqIdsMsgIdsC} = queue_index_publish(SeqIdsC, false, Qi0), Qi2 = rabbit_queue_index:deliver(SeqIdsC, Qi1), Qi3 = rabbit_queue_index:ack(SeqIdsC, Qi2), Qi4 = rabbit_queue_index:flush(Qi3), - {Qi5, _SeqIdsGuidsC1} = queue_index_publish([SegmentSize], + {Qi5, _SeqIdsMsgIdsC1} = queue_index_publish([SegmentSize], false, Qi4), Qi5 end), @@ -1780,10 +2101,10 @@ test_queue_index() -> %% b) partial pub+del, then move to new segment, then ack all in old segment with_empty_test_queue( fun (Qi0) -> - {Qi1, _SeqIdsGuidsC2} = queue_index_publish(SeqIdsC, + {Qi1, _SeqIdsMsgIdsC2} = queue_index_publish(SeqIdsC, false, Qi0), Qi2 = rabbit_queue_index:deliver(SeqIdsC, Qi1), - {Qi3, _SeqIdsGuidsC3} = queue_index_publish([SegmentSize], + {Qi3, _SeqIdsMsgIdsC3} = queue_index_publish([SegmentSize], false, Qi2), Qi4 = rabbit_queue_index:ack(SeqIdsC, Qi3), rabbit_queue_index:flush(Qi4) @@ -1792,8 +2113,8 @@ test_queue_index() -> %% c) just fill up several segments of all pubs, then +dels, then +acks with_empty_test_queue( fun (Qi0) -> - {Qi1, _SeqIdsGuidsD} = queue_index_publish(SeqIdsD, - false, Qi0), + {Qi1, _SeqIdsMsgIdsD} = queue_index_publish(SeqIdsD, + false, Qi0), Qi2 = rabbit_queue_index:deliver(SeqIdsD, Qi1), Qi3 = rabbit_queue_index:ack(SeqIdsD, Qi2), rabbit_queue_index:flush(Qi3) @@ -1826,12 +2147,12 @@ test_queue_index() -> %% exercise journal_minus_segment, not segment_plus_journal. with_empty_test_queue( fun (Qi0) -> - {Qi1, _SeqIdsGuidsE} = queue_index_publish([0,1,2,4,5,7], + {Qi1, _SeqIdsMsgIdsE} = queue_index_publish([0,1,2,4,5,7], true, Qi0), Qi2 = rabbit_queue_index:deliver([0,1,4], Qi1), Qi3 = rabbit_queue_index:ack([0], Qi2), {5, Qi4} = restart_test_queue(Qi3), - {Qi5, _SeqIdsGuidsF} = queue_index_publish([3,6,8], true, Qi4), + {Qi5, _SeqIdsMsgIdsF} = queue_index_publish([3,6,8], true, Qi4), Qi6 = rabbit_queue_index:deliver([2,3,5,6], Qi5), Qi7 = rabbit_queue_index:ack([1,2,3], Qi6), {5, Qi8} = restart_test_queue(Qi7), @@ -1843,9 +2164,16 @@ test_queue_index() -> passed. +variable_queue_init(Q, Recover) -> + rabbit_variable_queue:init( + Q, Recover, fun nop/2, fun nop/2, fun nop/1). + variable_queue_publish(IsPersistent, Count, VQ) -> + variable_queue_publish(IsPersistent, Count, fun (_N, P) -> P end, VQ). + +variable_queue_publish(IsPersistent, Count, PropFun, VQ) -> lists:foldl( - fun (_N, VQN) -> + fun (N, VQN) -> rabbit_variable_queue:publish( rabbit_basic:message( rabbit_misc:r(<<>>, exchange, <<>>), @@ -1853,7 +2181,7 @@ variable_queue_publish(IsPersistent, Count, VQ) -> true -> 2; false -> 1 end}, <<>>), - #message_properties{}, VQN) + PropFun(N, #message_properties{}), self(), VQN) end, VQ, lists:seq(1, Count)). variable_queue_fetch(Count, IsPersistent, IsDelivered, Len, VQ) -> @@ -1871,18 +2199,44 @@ assert_prop(List, Prop, Value) -> assert_props(List, PropVals) -> [assert_prop(List, Prop, Value) || {Prop, Value} <- PropVals]. +test_amqqueue(Durable) -> + (rabbit_amqqueue:pseudo_queue(test_queue(), self())) + #amqqueue { durable = Durable }. + with_fresh_variable_queue(Fun) -> ok = empty_test_queue(), - VQ = rabbit_variable_queue:init(test_queue(), true, false, - fun nop/2, fun nop/1), + VQ = variable_queue_init(test_amqqueue(true), false), S0 = rabbit_variable_queue:status(VQ), assert_props(S0, [{q1, 0}, {q2, 0}, {delta, {delta, undefined, 0, undefined}}, {q3, 0}, {q4, 0}, {len, 0}]), - _ = rabbit_variable_queue:delete_and_terminate(Fun(VQ)), + _ = rabbit_variable_queue:delete_and_terminate(shutdown, Fun(VQ)), passed. +publish_and_confirm(QPid, Payload, Count) -> + Seqs = lists:seq(1, Count), + [begin + Msg = rabbit_basic:message(rabbit_misc:r(<<>>, exchange, <<>>), + <<>>, #'P_basic'{delivery_mode = 2}, + Payload), + Delivery = #delivery{mandatory = false, immediate = false, + sender = self(), message = Msg, msg_seq_no = Seq}, + true = rabbit_amqqueue:deliver(QPid, Delivery) + end || Seq <- Seqs], + wait_for_confirms(gb_sets:from_list(Seqs)). + +wait_for_confirms(Unconfirmed) -> + case gb_sets:is_empty(Unconfirmed) of + true -> ok; + false -> receive {'$gen_cast', {confirm, Confirmed, _}} -> + wait_for_confirms( + gb_sets:difference(Unconfirmed, + gb_sets:from_list(Confirmed))) + after 5000 -> exit(timeout_waiting_for_confirm) + end + end. + test_variable_queue() -> [passed = with_fresh_variable_queue(F) || F <- [fun test_variable_queue_dynamic_duration_change/1, @@ -1890,6 +2244,7 @@ test_variable_queue() -> fun test_variable_queue_all_the_bits_not_covered_elsewhere1/1, fun test_variable_queue_all_the_bits_not_covered_elsewhere2/1, fun test_dropwhile/1, + fun test_dropwhile_varying_ram_duration/1, fun test_variable_queue_ack_limiting/1]], passed. @@ -1926,14 +2281,9 @@ test_dropwhile(VQ0) -> Count = 10, %% add messages with sequential expiry - VQ1 = lists:foldl( - fun (N, VQN) -> - rabbit_variable_queue:publish( - rabbit_basic:message( - rabbit_misc:r(<<>>, exchange, <<>>), - <<>>, #'P_basic'{}, <<>>), - #message_properties{expiry = N}, VQN) - end, VQ0, lists:seq(1, Count)), + VQ1 = variable_queue_publish( + false, Count, + fun (N, Props) -> Props#message_properties{expiry = N} end, VQ0), %% drop the first 5 messages VQ2 = rabbit_variable_queue:dropwhile( @@ -1953,6 +2303,14 @@ test_dropwhile(VQ0) -> VQ4. +test_dropwhile_varying_ram_duration(VQ0) -> + VQ1 = variable_queue_publish(false, 1, VQ0), + VQ2 = rabbit_variable_queue:set_ram_duration_target(0, VQ1), + VQ3 = rabbit_variable_queue:dropwhile(fun(_) -> false end, VQ2), + VQ4 = rabbit_variable_queue:set_ram_duration_target(infinity, VQ3), + VQ5 = variable_queue_publish(false, 1, VQ4), + rabbit_variable_queue:dropwhile(fun(_) -> false end, VQ5). + test_variable_queue_dynamic_duration_change(VQ0) -> SegmentSize = rabbit_queue_index:next_segment_boundary(0), @@ -1976,7 +2334,7 @@ test_variable_queue_dynamic_duration_change(VQ0) -> %% drain {VQ8, AckTags} = variable_queue_fetch(Len, false, false, Len, VQ7), - VQ9 = rabbit_variable_queue:ack(AckTags, VQ8), + {_Guids, VQ9} = rabbit_variable_queue:ack(AckTags, VQ8), {empty, VQ10} = rabbit_variable_queue:fetch(true, VQ9), VQ10. @@ -1986,7 +2344,7 @@ publish_fetch_and_ack(0, _Len, VQ0) -> publish_fetch_and_ack(N, Len, VQ0) -> VQ1 = variable_queue_publish(false, 1, VQ0), {{_Msg, false, AckTag, Len}, VQ2} = rabbit_variable_queue:fetch(true, VQ1), - VQ3 = rabbit_variable_queue:ack([AckTag], VQ2), + {_Guids, VQ3} = rabbit_variable_queue:ack([AckTag], VQ2), publish_fetch_and_ack(N-1, Len, VQ3). test_variable_queue_partial_segments_delta_thing(VQ0) -> @@ -2020,7 +2378,7 @@ test_variable_queue_partial_segments_delta_thing(VQ0) -> {len, HalfSegment + 1}]), {VQ8, AckTags1} = variable_queue_fetch(HalfSegment + 1, true, false, HalfSegment + 1, VQ7), - VQ9 = rabbit_variable_queue:ack(AckTags ++ AckTags1, VQ8), + {_Guids, VQ9} = rabbit_variable_queue:ack(AckTags ++ AckTags1, VQ8), %% should be empty now {empty, VQ10} = rabbit_variable_queue:fetch(true, VQ9), VQ10. @@ -2033,10 +2391,10 @@ check_variable_queue_status(VQ0, Props) -> VQ1. variable_queue_wait_for_shuffling_end(VQ) -> - case rabbit_variable_queue:needs_idle_timeout(VQ) of - true -> variable_queue_wait_for_shuffling_end( - rabbit_variable_queue:idle_timeout(VQ)); - false -> VQ + case rabbit_variable_queue:needs_timeout(VQ) of + false -> VQ; + _ -> variable_queue_wait_for_shuffling_end( + rabbit_variable_queue:timeout(VQ)) end. test_variable_queue_all_the_bits_not_covered_elsewhere1(VQ0) -> @@ -2048,9 +2406,8 @@ test_variable_queue_all_the_bits_not_covered_elsewhere1(VQ0) -> Count + Count, VQ3), {VQ5, _AckTags1} = variable_queue_fetch(Count, false, false, Count, VQ4), - _VQ6 = rabbit_variable_queue:terminate(VQ5), - VQ7 = rabbit_variable_queue:init(test_queue(), true, true, - fun nop/2, fun nop/1), + _VQ6 = rabbit_variable_queue:terminate(shutdown, VQ5), + VQ7 = variable_queue_init(test_amqqueue(true), true), {{_Msg1, true, _AckTag1, Count1}, VQ8} = rabbit_variable_queue:fetch(true, VQ7), VQ9 = variable_queue_publish(false, 1, VQ8), @@ -2063,34 +2420,27 @@ test_variable_queue_all_the_bits_not_covered_elsewhere2(VQ0) -> VQ1 = rabbit_variable_queue:set_ram_duration_target(0, VQ0), VQ2 = variable_queue_publish(false, 4, VQ1), {VQ3, AckTags} = variable_queue_fetch(2, false, false, 4, VQ2), - VQ4 = rabbit_variable_queue:requeue(AckTags, fun(X) -> X end, VQ3), - VQ5 = rabbit_variable_queue:idle_timeout(VQ4), - _VQ6 = rabbit_variable_queue:terminate(VQ5), - VQ7 = rabbit_variable_queue:init(test_queue(), true, true, - fun nop/2, fun nop/1), + {_Guids, VQ4} = + rabbit_variable_queue:requeue(AckTags, fun(X) -> X end, VQ3), + VQ5 = rabbit_variable_queue:timeout(VQ4), + _VQ6 = rabbit_variable_queue:terminate(shutdown, VQ5), + VQ7 = variable_queue_init(test_amqqueue(true), true), {empty, VQ8} = rabbit_variable_queue:fetch(false, VQ7), VQ8. test_queue_recover() -> Count = 2 * rabbit_queue_index:next_segment_boundary(0), - TxID = rabbit_guid:guid(), - {new, #amqqueue { pid = QPid, name = QName }} = + {new, #amqqueue { pid = QPid, name = QName } = Q} = rabbit_amqqueue:declare(test_queue(), true, false, [], none), - [begin - Msg = rabbit_basic:message(rabbit_misc:r(<<>>, exchange, <<>>), - <<>>, #'P_basic'{delivery_mode = 2}, <<>>), - Delivery = #delivery{mandatory = false, immediate = false, txn = TxID, - sender = self(), message = Msg}, - true = rabbit_amqqueue:deliver(QPid, Delivery) - end || _ <- lists:seq(1, Count)], - rabbit_amqqueue:commit_all([QPid], TxID, self()), + publish_and_confirm(QPid, <<>>, Count), + exit(QPid, kill), MRef = erlang:monitor(process, QPid), receive {'DOWN', MRef, process, QPid, _Info} -> ok after 10000 -> exit(timeout_waiting_for_queue_death) end, rabbit_amqqueue:stop(), - ok = rabbit_amqqueue:start(), + rabbit_amqqueue:start(), rabbit_amqqueue:with_or_die( QName, fun (Q1 = #amqqueue { pid = QPid1 }) -> @@ -2098,11 +2448,10 @@ test_queue_recover() -> {ok, CountMinusOne, {QName, QPid1, _AckTag, true, _Msg}} = rabbit_amqqueue:basic_get(Q1, self(), false), exit(QPid1, shutdown), - VQ1 = rabbit_variable_queue:init(QName, true, true, - fun nop/2, fun nop/1), + VQ1 = variable_queue_init(Q, true), {{_Msg1, true, _AckTag1, CountMinusOne}, VQ2} = rabbit_variable_queue:fetch(true, VQ1), - _VQ3 = rabbit_variable_queue:delete_and_terminate(VQ2), + _VQ3 = rabbit_variable_queue:delete_and_terminate(shutdown, VQ2), rabbit_amqqueue:internal_delete(QName) end), passed. @@ -2111,18 +2460,10 @@ test_variable_queue_delete_msg_store_files_callback() -> ok = restart_msg_store_empty(), {new, #amqqueue { pid = QPid, name = QName } = Q} = rabbit_amqqueue:declare(test_queue(), true, false, [], none), - TxID = rabbit_guid:guid(), Payload = <<0:8388608>>, %% 1MB Count = 30, - [begin - Msg = rabbit_basic:message( - rabbit_misc:r(<<>>, exchange, <<>>), - <<>>, #'P_basic'{delivery_mode = 2}, Payload), - Delivery = #delivery{mandatory = false, immediate = false, txn = TxID, - sender = self(), message = Msg}, - true = rabbit_amqqueue:deliver(QPid, Delivery) - end || _ <- lists:seq(1, Count)], - rabbit_amqqueue:commit_all([QPid], TxID, self()), + publish_and_confirm(QPid, Payload, Count), + rabbit_amqqueue:set_ram_duration_target(QPid, 0), CountMinusOne = Count - 1, @@ -2141,9 +2482,11 @@ test_configurable_server_properties() -> BuiltInPropNames = [<<"product">>, <<"version">>, <<"platform">>, <<"copyright">>, <<"information">>], + Protocol = rabbit_framing_amqp_0_9_1, + %% Verify that the built-in properties are initially present - ActualPropNames = [Key || - {Key, longstr, _} <- rabbit_reader:server_properties()], + ActualPropNames = [Key || {Key, longstr, _} <- + rabbit_reader:server_properties(Protocol)], true = lists:all(fun (X) -> lists:member(X, ActualPropNames) end, BuiltInPropNames), @@ -2154,9 +2497,10 @@ test_configurable_server_properties() -> ConsProp = fun (X) -> application:set_env(rabbit, server_properties, [X | ServerProperties]) end, - IsPropPresent = fun (X) -> lists:member(X, - rabbit_reader:server_properties()) - end, + IsPropPresent = + fun (X) -> + lists:member(X, rabbit_reader:server_properties(Protocol)) + end, %% Add a wholly new property of the simplified {KeyAtom, StringValue} form NewSimplifiedProperty = {NewHareKey, NewHareVal} = {hare, "soup"}, @@ -2179,7 +2523,7 @@ test_configurable_server_properties() -> {BinNewVerKey, BinNewVerVal} = {list_to_binary(atom_to_list(NewVerKey)), list_to_binary(NewVerVal)}, ConsProp(NewVersion), - ClobberedServerProps = rabbit_reader:server_properties(), + ClobberedServerProps = rabbit_reader:server_properties(Protocol), %% Is the clobbering insert present? true = IsPropPresent({BinNewVerKey, longstr, BinNewVerVal}), %% Is the clobbering insert the only thing with the clobbering key? diff --git a/src/rabbit_trace.erl b/src/rabbit_trace.erl new file mode 100644 index 00000000..7d36856a --- /dev/null +++ b/src/rabbit_trace.erl @@ -0,0 +1,120 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License +%% at http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and +%% limitations under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is VMware, Inc. +%% Copyright (c) 2007-2011 VMware, Inc. All rights reserved. +%% + +-module(rabbit_trace). + +-export([init/1, tracing/1, tap_trace_in/2, tap_trace_out/2, start/1, stop/1]). + +-include("rabbit.hrl"). +-include("rabbit_framing.hrl"). + +-define(TRACE_VHOSTS, trace_vhosts). +-define(XNAME, <<"amq.rabbitmq.trace">>). + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-type(state() :: rabbit_types:exchange() | 'none'). + +-spec(init/1 :: (rabbit_types:vhost()) -> state()). +-spec(tracing/1 :: (rabbit_types:vhost()) -> boolean()). +-spec(tap_trace_in/2 :: (rabbit_types:basic_message(), state()) -> 'ok'). +-spec(tap_trace_out/2 :: (rabbit_amqqueue:qmsg(), state()) -> 'ok'). + +-spec(start/1 :: (rabbit_types:vhost()) -> 'ok'). +-spec(stop/1 :: (rabbit_types:vhost()) -> 'ok'). + +-endif. + +%%---------------------------------------------------------------------------- + +init(VHost) -> + case tracing(VHost) of + false -> none; + true -> {ok, X} = rabbit_exchange:lookup( + rabbit_misc:r(VHost, exchange, ?XNAME)), + X + end. + +tracing(VHost) -> + {ok, VHosts} = application:get_env(rabbit, ?TRACE_VHOSTS), + lists:member(VHost, VHosts). + +tap_trace_in(Msg = #basic_message{exchange_name = #resource{name = XName}}, + TraceX) -> + maybe_trace(TraceX, Msg, <<"publish">>, XName, []). + +tap_trace_out({#resource{name = QName}, _QPid, _QMsgId, Redelivered, Msg}, + TraceX) -> + RedeliveredNum = case Redelivered of true -> 1; false -> 0 end, + maybe_trace(TraceX, Msg, <<"deliver">>, QName, + [{<<"redelivered">>, signedint, RedeliveredNum}]). + +%%---------------------------------------------------------------------------- + +start(VHost) -> + update_config(fun (VHosts) -> [VHost | VHosts -- [VHost]] end). + +stop(VHost) -> + update_config(fun (VHosts) -> VHosts -- [VHost] end). + +update_config(Fun) -> + {ok, VHosts0} = application:get_env(rabbit, ?TRACE_VHOSTS), + VHosts = Fun(VHosts0), + application:set_env(rabbit, ?TRACE_VHOSTS, VHosts), + rabbit_channel:refresh_config_all(), + ok. + +%%---------------------------------------------------------------------------- + +maybe_trace(none, _Msg, _RKPrefix, _RKSuffix, _Extra) -> + ok; +maybe_trace(#exchange{name = Name}, #basic_message{exchange_name = Name}, + _RKPrefix, _RKSuffix, _Extra) -> + ok; +maybe_trace(X, Msg = #basic_message{content = #content{ + payload_fragments_rev = PFR}}, + RKPrefix, RKSuffix, Extra) -> + {ok, _, _} = rabbit_basic:publish( + X, <<RKPrefix/binary, ".", RKSuffix/binary>>, + #'P_basic'{headers = msg_to_table(Msg) ++ Extra}, PFR), + ok. + +msg_to_table(#basic_message{exchange_name = #resource{name = XName}, + routing_keys = RoutingKeys, + content = Content}) -> + #content{properties = Props} = + rabbit_binary_parser:ensure_content_decoded(Content), + {PropsTable, _Ix} = + lists:foldl(fun (K, {L, Ix}) -> + V = element(Ix, Props), + NewL = case V of + undefined -> L; + _ -> [{a2b(K), type(V), V} | L] + end, + {NewL, Ix + 1} + end, {[], 2}, record_info(fields, 'P_basic')), + [{<<"exchange_name">>, longstr, XName}, + {<<"routing_keys">>, array, [{longstr, K} || K <- RoutingKeys]}, + {<<"properties">>, table, PropsTable}, + {<<"node">>, longstr, a2b(node())}]. + +a2b(A) -> list_to_binary(atom_to_list(A)). + +type(V) when is_list(V) -> table; +type(V) when is_integer(V) -> signedint; +type(_V) -> longstr. diff --git a/src/rabbit_types.erl b/src/rabbit_types.erl index 3dbe740f..2db960ac 100644 --- a/src/rabbit_types.erl +++ b/src/rabbit_types.erl @@ -20,8 +20,8 @@ -ifdef(use_specs). --export_type([txn/0, maybe/1, info/0, infos/0, info_key/0, info_keys/0, - message/0, basic_message/0, +-export_type([maybe/1, info/0, infos/0, info_key/0, info_keys/0, + message/0, msg_id/0, basic_message/0, delivery/0, content/0, decoded_content/0, undecoded_content/0, unencoded_content/0, encoded_content/0, message_properties/0, vhost/0, ctag/0, amqp_error/0, r/1, r2/2, r3/3, listener/0, @@ -42,46 +42,43 @@ %% TODO: make this more precise by tying specific class_ids to %% specific properties -type(undecoded_content() :: - #content{class_id :: rabbit_framing:amqp_class_id(), - properties :: 'none', - properties_bin :: binary(), - payload_fragments_rev :: [binary()]} | - #content{class_id :: rabbit_framing:amqp_class_id(), - properties :: rabbit_framing:amqp_property_record(), - properties_bin :: 'none', - payload_fragments_rev :: [binary()]}). + #content{class_id :: rabbit_framing:amqp_class_id(), + properties :: 'none', + properties_bin :: binary(), + payload_fragments_rev :: [binary()]} | + #content{class_id :: rabbit_framing:amqp_class_id(), + properties :: rabbit_framing:amqp_property_record(), + properties_bin :: 'none', + payload_fragments_rev :: [binary()]}). -type(unencoded_content() :: undecoded_content()). -type(decoded_content() :: - #content{class_id :: rabbit_framing:amqp_class_id(), - properties :: rabbit_framing:amqp_property_record(), - properties_bin :: maybe(binary()), - payload_fragments_rev :: [binary()]}). + #content{class_id :: rabbit_framing:amqp_class_id(), + properties :: rabbit_framing:amqp_property_record(), + properties_bin :: maybe(binary()), + payload_fragments_rev :: [binary()]}). -type(encoded_content() :: - #content{class_id :: rabbit_framing:amqp_class_id(), - properties :: maybe(rabbit_framing:amqp_property_record()), - properties_bin :: binary(), - payload_fragments_rev :: [binary()]}). + #content{class_id :: rabbit_framing:amqp_class_id(), + properties :: maybe(rabbit_framing:amqp_property_record()), + properties_bin :: binary(), + payload_fragments_rev :: [binary()]}). -type(content() :: undecoded_content() | decoded_content()). +-type(msg_id() :: rabbit_guid:guid()). -type(basic_message() :: - #basic_message{exchange_name :: rabbit_exchange:name(), - routing_key :: rabbit_router:routing_key(), - content :: content(), - guid :: rabbit_guid:guid(), - is_persistent :: boolean()}). + #basic_message{exchange_name :: rabbit_exchange:name(), + routing_keys :: [rabbit_router:routing_key()], + content :: content(), + id :: msg_id(), + is_persistent :: boolean()}). -type(message() :: basic_message()). -type(delivery() :: - #delivery{mandatory :: boolean(), - immediate :: boolean(), - txn :: maybe(txn()), - sender :: pid(), - message :: message()}). + #delivery{mandatory :: boolean(), + immediate :: boolean(), + sender :: pid(), + message :: message()}). -type(message_properties() :: #message_properties{expiry :: pos_integer() | 'undefined', needs_confirming :: boolean()}). -%% this is really an abstract type, but dialyzer does not support them --type(txn() :: rabbit_guid:guid()). - -type(info_key() :: atom()). -type(info_keys() :: [info_key()]). @@ -89,9 +86,9 @@ -type(infos() :: [info()]). -type(amqp_error() :: - #amqp_error{name :: rabbit_framing:amqp_exception(), - explanation :: string(), - method :: rabbit_framing:amqp_method_name()}). + #amqp_error{name :: rabbit_framing:amqp_exception(), + explanation :: string(), + method :: rabbit_framing:amqp_method_name()}). -type(r(Kind) :: r2(vhost(), Kind)). @@ -103,34 +100,36 @@ name :: Name}). -type(listener() :: - #listener{node :: node(), - protocol :: atom(), - host :: rabbit_networking:hostname(), - port :: rabbit_networking:ip_port()}). + #listener{node :: node(), + protocol :: atom(), + host :: rabbit_networking:hostname(), + port :: rabbit_networking:ip_port()}). -type(binding_source() :: rabbit_exchange:name()). -type(binding_destination() :: rabbit_amqqueue:name() | rabbit_exchange:name()). -type(binding() :: - #binding{source :: rabbit_exchange:name(), - destination :: binding_destination(), - key :: rabbit_binding:key(), - args :: rabbit_framing:amqp_table()}). + #binding{source :: rabbit_exchange:name(), + destination :: binding_destination(), + key :: rabbit_binding:key(), + args :: rabbit_framing:amqp_table()}). -type(amqqueue() :: - #amqqueue{name :: rabbit_amqqueue:name(), - durable :: boolean(), - auto_delete :: boolean(), - exclusive_owner :: rabbit_types:maybe(pid()), - arguments :: rabbit_framing:amqp_table(), - pid :: rabbit_types:maybe(pid())}). + #amqqueue{name :: rabbit_amqqueue:name(), + durable :: boolean(), + auto_delete :: boolean(), + exclusive_owner :: rabbit_types:maybe(pid()), + arguments :: rabbit_framing:amqp_table(), + pid :: rabbit_types:maybe(pid()), + slave_pids :: [pid()], + mirror_nodes :: [node()] | 'undefined' | 'all'}). -type(exchange() :: - #exchange{name :: rabbit_exchange:name(), - type :: rabbit_exchange:type(), - durable :: boolean(), - auto_delete :: boolean(), - arguments :: rabbit_framing:amqp_table()}). + #exchange{name :: rabbit_exchange:name(), + type :: rabbit_exchange:type(), + durable :: boolean(), + auto_delete :: boolean(), + arguments :: rabbit_framing:amqp_table()}). -type(connection() :: pid()). @@ -138,14 +137,14 @@ -type(user() :: #user{username :: username(), - is_admin :: boolean(), + tags :: [atom()], auth_backend :: atom(), impl :: any()}). -type(internal_user() :: #internal_user{username :: username(), password_hash :: password_hash(), - is_admin :: boolean()}). + tags :: [atom()]}). -type(username() :: binary()). -type(password() :: binary()). diff --git a/src/rabbit_upgrade.erl b/src/rabbit_upgrade.erl index b0a71523..9739f6b7 100644 --- a/src/rabbit_upgrade.erl +++ b/src/rabbit_upgrade.erl @@ -16,7 +16,7 @@ -module(rabbit_upgrade). --export([maybe_upgrade/0, read_version/0, write_version/0, desired_version/0]). +-export([maybe_upgrade_mnesia/0, maybe_upgrade_local/0]). -include("rabbit.hrl"). @@ -27,142 +27,262 @@ -ifdef(use_specs). --type(step() :: atom()). --type(version() :: [step()]). - --spec(maybe_upgrade/0 :: () -> 'ok' | 'version_not_available'). --spec(read_version/0 :: () -> rabbit_types:ok_or_error2(version(), any())). --spec(write_version/0 :: () -> 'ok'). --spec(desired_version/0 :: () -> version()). +-spec(maybe_upgrade_mnesia/0 :: () -> 'ok'). +-spec(maybe_upgrade_local/0 :: () -> 'ok' | 'version_not_available'). -endif. %% ------------------------------------------------------------------- -%% Try to upgrade the schema. If no information on the existing schema -%% could be found, do nothing. rabbit_mnesia:check_schema_integrity() -%% will catch the problem. -maybe_upgrade() -> - case read_version() of - {ok, CurrentHeads} -> - with_upgrade_graph( - fun (G) -> - case unknown_heads(CurrentHeads, G) of - [] -> case upgrades_to_apply(CurrentHeads, G) of - [] -> ok; - Upgrades -> apply_upgrades(Upgrades) - end; - Unknown -> throw({error, - {future_upgrades_found, Unknown}}) - end - end); - {error, enoent} -> - version_not_available +%% The upgrade logic is quite involved, due to the existence of +%% clusters. +%% +%% Firstly, we have two different types of upgrades to do: Mnesia and +%% everythinq else. Mnesia upgrades must only be done by one node in +%% the cluster (we treat a non-clustered node as a single-node +%% cluster). This is the primary upgrader. The other upgrades need to +%% be done by all nodes. +%% +%% The primary upgrader has to start first (and do its Mnesia +%% upgrades). Secondary upgraders need to reset their Mnesia database +%% and then rejoin the cluster. They can't do the Mnesia upgrades as +%% well and then merge databases since the cookie for each table will +%% end up different and the merge will fail. +%% +%% This in turn means that we need to determine whether we are the +%% primary or secondary upgrader *before* Mnesia comes up. If we +%% didn't then the secondary upgrader would try to start Mnesia, and +%% either hang waiting for a node which is not yet up, or fail since +%% its schema differs from the other nodes in the cluster. +%% +%% Also, the primary upgrader needs to start Mnesia to do its +%% upgrades, but needs to forcibly load tables rather than wait for +%% them (in case it was not the last node to shut down, in which case +%% it would wait forever). +%% +%% This in turn means that maybe_upgrade_mnesia/0 has to be patched +%% into the boot process by prelaunch before the mnesia application is +%% started. By the time Mnesia is started the upgrades have happened +%% (on the primary), or Mnesia has been reset (on the secondary) and +%% rabbit_mnesia:init_db/3 can then make the node rejoin the cluster +%% in the normal way. +%% +%% The non-mnesia upgrades are then triggered by +%% rabbit_mnesia:init_db/3. Of course, it's possible for a given +%% upgrade process to only require Mnesia upgrades, or only require +%% non-Mnesia upgrades. In the latter case no Mnesia resets and +%% reclusterings occur. +%% +%% The primary upgrader needs to be a disc node. Ideally we would like +%% it to be the last disc node to shut down (since otherwise there's a +%% risk of data loss). On each node we therefore record the disc nodes +%% that were still running when we shut down. A disc node that knows +%% other nodes were up when it shut down, or a ram node, will refuse +%% to be the primary upgrader, and will thus not start when upgrades +%% are needed. +%% +%% However, this is racy if several nodes are shut down at once. Since +%% rabbit records the running nodes, and shuts down before mnesia, the +%% race manifests as all disc nodes thinking they are not the primary +%% upgrader. Therefore the user can remove the record of the last disc +%% node to shut down to get things going again. This may lose any +%% mnesia changes that happened after the node chosen as the primary +%% upgrader was shut down. + +%% ------------------------------------------------------------------- + +ensure_backup_taken() -> + case filelib:is_file(lock_filename()) of + false -> case filelib:is_dir(backup_dir()) of + false -> ok = take_backup(); + _ -> ok + end; + true -> throw({error, previous_upgrade_failed}) end. -read_version() -> - case rabbit_misc:read_term_file(schema_filename()) of - {ok, [Heads]} -> {ok, Heads}; - {error, _} = Err -> Err +take_backup() -> + BackupDir = backup_dir(), + case rabbit_mnesia:copy_db(BackupDir) of + ok -> info("upgrades: Mnesia dir backed up to ~p~n", + [BackupDir]); + {error, E} -> throw({could_not_back_up_mnesia_dir, E}) end. -write_version() -> - ok = rabbit_misc:write_term_file(schema_filename(), [desired_version()]), - ok. +ensure_backup_removed() -> + case filelib:is_dir(backup_dir()) of + true -> ok = remove_backup(); + _ -> ok + end. -desired_version() -> - with_upgrade_graph(fun (G) -> heads(G) end). +remove_backup() -> + ok = rabbit_misc:recursive_delete([backup_dir()]), + info("upgrades: Mnesia backup removed~n", []). -%% ------------------------------------------------------------------- +maybe_upgrade_mnesia() -> + AllNodes = rabbit_mnesia:all_clustered_nodes(), + case rabbit_version:upgrades_required(mnesia) of + {error, version_not_available} -> + case AllNodes of + [_] -> ok; + _ -> die("Cluster upgrade needed but upgrading from " + "< 2.1.1.~nUnfortunately you will need to " + "rebuild the cluster.", []) + end; + {error, _} = Err -> + throw(Err); + {ok, []} -> + ok; + {ok, Upgrades} -> + ensure_backup_taken(), + ok = case upgrade_mode(AllNodes) of + primary -> primary_upgrade(Upgrades, AllNodes); + secondary -> secondary_upgrade(AllNodes) + end + end. -with_upgrade_graph(Fun) -> - case rabbit_misc:build_acyclic_graph( - fun vertices/2, fun edges/2, - rabbit_misc:all_module_attributes(rabbit_upgrade)) of - {ok, G} -> try - Fun(G) - after - true = digraph:delete(G) - end; - {error, {vertex, duplicate, StepName}} -> - throw({error, {duplicate_upgrade_step, StepName}}); - {error, {edge, {bad_vertex, StepName}, _From, _To}} -> - throw({error, {dependency_on_unknown_upgrade_step, StepName}}); - {error, {edge, {bad_edge, StepNames}, _From, _To}} -> - throw({error, {cycle_in_upgrade_steps, StepNames}}) +upgrade_mode(AllNodes) -> + case nodes_running(AllNodes) of + [] -> + AfterUs = rabbit_mnesia:read_previously_running_nodes(), + case {is_disc_node_legacy(), AfterUs} of + {true, []} -> + primary; + {true, _} -> + Filename = rabbit_mnesia:running_nodes_filename(), + die("Cluster upgrade needed but other disc nodes shut " + "down after this one.~nPlease first start the last " + "disc node to shut down.~n~nNote: if several disc " + "nodes were shut down simultaneously they may " + "all~nshow this message. In which case, remove " + "the lock file on one of them and~nstart that node. " + "The lock file on this node is:~n~n ~s ", [Filename]); + {false, _} -> + die("Cluster upgrade needed but this is a ram node.~n" + "Please first start the last disc node to shut down.", + []) + end; + [Another|_] -> + MyVersion = rabbit_version:desired_for_scope(mnesia), + ErrFun = fun (ClusterVersion) -> + %% The other node(s) are running an + %% unexpected version. + die("Cluster upgrade needed but other nodes are " + "running ~p~nand I want ~p", + [ClusterVersion, MyVersion]) + end, + case rpc:call(Another, rabbit_version, desired_for_scope, + [mnesia]) of + {badrpc, {'EXIT', {undef, _}}} -> ErrFun(unknown_old_version); + {badrpc, Reason} -> ErrFun({unknown, Reason}); + CV -> case rabbit_version:matches( + MyVersion, CV) of + true -> secondary; + false -> ErrFun(CV) + end + end end. -vertices(Module, Steps) -> - [{StepName, {Module, StepName}} || {StepName, _Reqs} <- Steps]. +die(Msg, Args) -> + %% We don't throw or exit here since that gets thrown + %% straight out into do_boot, generating an erl_crash.dump + %% and displaying any error message in a confusing way. + error_logger:error_msg(Msg, Args), + io:format("~n~n****~n~n" ++ Msg ++ "~n~n****~n~n~n", Args), + error_logger:logfile(close), + halt(1). -edges(_Module, Steps) -> - [{Require, StepName} || {StepName, Requires} <- Steps, Require <- Requires]. +primary_upgrade(Upgrades, Nodes) -> + Others = Nodes -- [node()], + ok = apply_upgrades( + mnesia, + Upgrades, + fun () -> + force_tables(), + case Others of + [] -> ok; + _ -> info("mnesia upgrades: Breaking cluster~n", []), + [{atomic, ok} = mnesia:del_table_copy(schema, Node) + || Node <- Others] + end + end), + ok. +force_tables() -> + [mnesia:force_load_table(T) || T <- rabbit_mnesia:table_names()]. -unknown_heads(Heads, G) -> - [H || H <- Heads, digraph:vertex(G, H) =:= false]. +secondary_upgrade(AllNodes) -> + %% must do this before we wipe out schema + IsDiscNode = is_disc_node_legacy(), + rabbit_misc:ensure_ok(mnesia:delete_schema([node()]), + cannot_delete_schema), + %% Note that we cluster with all nodes, rather than all disc nodes + %% (as we can't know all disc nodes at this point). This is safe as + %% we're not writing the cluster config, just setting up Mnesia. + ClusterNodes = case IsDiscNode of + true -> AllNodes; + false -> AllNodes -- [node()] + end, + rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia), + ok = rabbit_mnesia:init_db(ClusterNodes, true, fun () -> ok end), + ok = rabbit_version:record_desired_for_scope(mnesia), + ok. -upgrades_to_apply(Heads, G) -> - %% Take all the vertices which can reach the known heads. That's - %% everything we've already applied. Subtract that from all - %% vertices: that's what we have to apply. - Unsorted = sets:to_list( - sets:subtract( - sets:from_list(digraph:vertices(G)), - sets:from_list(digraph_utils:reaching(Heads, G)))), - %% Form a subgraph from that list and find a topological ordering - %% so we can invoke them in order. - [element(2, digraph:vertex(G, StepName)) || - StepName <- digraph_utils:topsort(digraph_utils:subgraph(G, Unsorted))]. +nodes_running(Nodes) -> + [N || N <- Nodes, node_running(N)]. -heads(G) -> - lists:sort([V || V <- digraph:vertices(G), digraph:out_degree(G, V) =:= 0]). +node_running(Node) -> + case rpc:call(Node, application, which_applications, []) of + {badrpc, _} -> false; + Apps -> lists:keysearch(rabbit, 1, Apps) =/= false + end. %% ------------------------------------------------------------------- -apply_upgrades(Upgrades) -> - LockFile = lock_filename(dir()), - case rabbit_misc:lock_file(LockFile) of - ok -> - BackupDir = dir() ++ "-upgrade-backup", - info("Upgrades: ~w to apply~n", [length(Upgrades)]), - case rabbit_mnesia:copy_db(BackupDir) of - ok -> - %% We need to make the backup after creating the - %% lock file so that it protects us from trying to - %% overwrite the backup. Unfortunately this means - %% the lock file exists in the backup too, which - %% is not intuitive. Remove it. - ok = file:delete(lock_filename(BackupDir)), - info("Upgrades: Mnesia dir backed up to ~p~n", [BackupDir]), - [apply_upgrade(Upgrade) || Upgrade <- Upgrades], - info("Upgrades: All upgrades applied successfully~n", []), - ok = write_version(), - ok = rabbit_misc:recursive_delete([BackupDir]), - info("Upgrades: Mnesia backup removed~n", []), - ok = file:delete(LockFile); - {error, E} -> - %% If we can't backup, the upgrade hasn't started - %% hence we don't need the lockfile since the real - %% mnesia dir is the good one. - ok = file:delete(LockFile), - throw({could_not_back_up_mnesia_dir, E}) - end; - {error, eexist} -> - throw({error, previous_upgrade_failed}) +maybe_upgrade_local() -> + case rabbit_version:upgrades_required(local) of + {error, version_not_available} -> version_not_available; + {error, _} = Err -> throw(Err); + {ok, []} -> ensure_backup_removed(), + ok; + {ok, Upgrades} -> mnesia:stop(), + ensure_backup_taken(), + ok = apply_upgrades(local, Upgrades, + fun () -> ok end), + ensure_backup_removed(), + ok end. -apply_upgrade({M, F}) -> - info("Upgrades: Applying ~w:~w~n", [M, F]), +%% ------------------------------------------------------------------- + +apply_upgrades(Scope, Upgrades, Fun) -> + ok = rabbit_misc:lock_file(lock_filename()), + info("~s upgrades: ~w to apply~n", [Scope, length(Upgrades)]), + rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia), + Fun(), + [apply_upgrade(Scope, Upgrade) || Upgrade <- Upgrades], + info("~s upgrades: All upgrades applied successfully~n", [Scope]), + ok = rabbit_version:record_desired_for_scope(Scope), + ok = file:delete(lock_filename()). + +apply_upgrade(Scope, {M, F}) -> + info("~s upgrades: Applying ~w:~w~n", [Scope, M, F]), ok = apply(M, F, []). %% ------------------------------------------------------------------- dir() -> rabbit_mnesia:dir(). -schema_filename() -> filename:join(dir(), ?VERSION_FILENAME). - +lock_filename() -> lock_filename(dir()). lock_filename(Dir) -> filename:join(Dir, ?LOCK_FILENAME). +backup_dir() -> dir() ++ "-upgrade-backup". + +is_disc_node_legacy() -> + %% This is pretty ugly but we can't start Mnesia and ask it (will + %% hang), we can't look at the config file (may not include us + %% even if we're a disc node). We also can't use + %% rabbit_mnesia:is_disc_node/0 because that will give false + %% postivies on Rabbit up to 2.5.1. + filelib:is_regular(filename:join(dir(), "rabbit_durable_exchange.DCD")). %% NB: we cannot use rabbit_log here since it may not have been %% started yet diff --git a/src/rabbit_upgrade_functions.erl b/src/rabbit_upgrade_functions.erl index 68b88b3e..8d26866b 100644 --- a/src/rabbit_upgrade_functions.erl +++ b/src/rabbit_upgrade_functions.erl @@ -16,25 +16,42 @@ -module(rabbit_upgrade_functions). --include("rabbit.hrl"). +%% If you are tempted to add include("rabbit.hrl"). here, don't. Using record +%% defs here leads to pain later. -compile([export_all]). --rabbit_upgrade({remove_user_scope, []}). --rabbit_upgrade({hash_passwords, []}). --rabbit_upgrade({add_ip_to_listener, []}). --rabbit_upgrade({internal_exchanges, []}). --rabbit_upgrade({user_to_internal_user, [hash_passwords]}). +-rabbit_upgrade({remove_user_scope, mnesia, []}). +-rabbit_upgrade({hash_passwords, mnesia, []}). +-rabbit_upgrade({add_ip_to_listener, mnesia, []}). +-rabbit_upgrade({internal_exchanges, mnesia, []}). +-rabbit_upgrade({user_to_internal_user, mnesia, [hash_passwords]}). +-rabbit_upgrade({topic_trie, mnesia, []}). +-rabbit_upgrade({semi_durable_route, mnesia, []}). +-rabbit_upgrade({exchange_event_serial, mnesia, []}). +-rabbit_upgrade({trace_exchanges, mnesia, [internal_exchanges]}). +-rabbit_upgrade({user_admin_to_tags, mnesia, [user_to_internal_user]}). +-rabbit_upgrade({ha_mirrors, mnesia, []}). +-rabbit_upgrade({gm, mnesia, []}). +-rabbit_upgrade({exchange_scratch, mnesia, [trace_exchanges]}). %% ------------------------------------------------------------------- -ifdef(use_specs). --spec(remove_user_scope/0 :: () -> 'ok'). --spec(hash_passwords/0 :: () -> 'ok'). --spec(add_ip_to_listener/0 :: () -> 'ok'). --spec(internal_exchanges/0 :: () -> 'ok'). +-spec(remove_user_scope/0 :: () -> 'ok'). +-spec(hash_passwords/0 :: () -> 'ok'). +-spec(add_ip_to_listener/0 :: () -> 'ok'). +-spec(internal_exchanges/0 :: () -> 'ok'). -spec(user_to_internal_user/0 :: () -> 'ok'). +-spec(topic_trie/0 :: () -> 'ok'). +-spec(semi_durable_route/0 :: () -> 'ok'). +-spec(exchange_event_serial/0 :: () -> 'ok'). +-spec(trace_exchanges/0 :: () -> 'ok'). +-spec(user_admin_to_tags/0 :: () -> 'ok'). +-spec(ha_mirrors/0 :: () -> 'ok'). +-spec(gm/0 :: () -> 'ok'). +-spec(exchange_scratch/0 :: () -> 'ok'). -endif. @@ -47,7 +64,7 @@ %% point. remove_user_scope() -> - mnesia( + transform( rabbit_user_permission, fun ({user_permission, UV, {permission, _Scope, Conf, Write, Read}}) -> {user_permission, UV, {permission, Conf, Write, Read}} @@ -55,7 +72,7 @@ remove_user_scope() -> [user_vhost, permission]). hash_passwords() -> - mnesia( + transform( rabbit_user, fun ({user, Username, Password, IsAdmin}) -> Hash = rabbit_auth_backend_internal:hash_password(Password), @@ -64,7 +81,7 @@ hash_passwords() -> [username, password_hash, is_admin]). add_ip_to_listener() -> - mnesia( + transform( rabbit_listener, fun ({listener, Node, Protocol, Host, Port}) -> {listener, Node, Protocol, Host, {0,0,0,0}, Port} @@ -77,27 +94,104 @@ internal_exchanges() -> fun ({exchange, Name, Type, Durable, AutoDelete, Args}) -> {exchange, Name, Type, Durable, AutoDelete, false, Args} end, - [ ok = mnesia(T, - AddInternalFun, - [name, type, durable, auto_delete, internal, arguments]) + [ ok = transform(T, + AddInternalFun, + [name, type, durable, auto_delete, internal, arguments]) || T <- Tables ], ok. user_to_internal_user() -> - mnesia( + transform( rabbit_user, fun({user, Username, PasswordHash, IsAdmin}) -> {internal_user, Username, PasswordHash, IsAdmin} end, [username, password_hash, is_admin], internal_user). +topic_trie() -> + create(rabbit_topic_trie_edge, [{record_name, topic_trie_edge}, + {attributes, [trie_edge, node_id]}, + {type, ordered_set}]), + create(rabbit_topic_trie_binding, [{record_name, topic_trie_binding}, + {attributes, [trie_binding, value]}, + {type, ordered_set}]). + +semi_durable_route() -> + create(rabbit_semi_durable_route, [{record_name, route}, + {attributes, [binding, value]}]). + +exchange_event_serial() -> + create(rabbit_exchange_serial, [{record_name, exchange_serial}, + {attributes, [name, next]}]). + +trace_exchanges() -> + [declare_exchange( + rabbit_misc:r(VHost, exchange, <<"amq.rabbitmq.trace">>), topic) || + VHost <- rabbit_vhost:list()], + ok. + +user_admin_to_tags() -> + transform( + rabbit_user, + fun({internal_user, Username, PasswordHash, true}) -> + {internal_user, Username, PasswordHash, [administrator]}; + ({internal_user, Username, PasswordHash, false}) -> + {internal_user, Username, PasswordHash, [management]} + end, + [username, password_hash, tags], internal_user). + +ha_mirrors() -> + Tables = [rabbit_queue, rabbit_durable_queue], + AddMirrorPidsFun = + fun ({amqqueue, Name, Durable, AutoDelete, Owner, Arguments, Pid}) -> + {amqqueue, Name, Durable, AutoDelete, Owner, Arguments, Pid, + [], undefined} + end, + [ ok = transform(T, + AddMirrorPidsFun, + [name, durable, auto_delete, exclusive_owner, arguments, + pid, slave_pids, mirror_nodes]) + || T <- Tables ], + ok. + +gm() -> + create(gm_group, [{record_name, gm_group}, + {attributes, [name, version, members]}]). + +exchange_scratch() -> + ok = exchange_scratch(rabbit_exchange), + ok = exchange_scratch(rabbit_durable_exchange). + +exchange_scratch(Table) -> + transform( + Table, + fun ({exchange, Name, Type, Dur, AutoDel, Int, Args}) -> + {exchange, Name, Type, Dur, AutoDel, Int, Args, undefined} + end, + [name, type, durable, auto_delete, internal, arguments, scratch]). + %%-------------------------------------------------------------------- -mnesia(TableName, Fun, FieldList) -> +transform(TableName, Fun, FieldList) -> + rabbit_mnesia:wait_for_tables([TableName]), {atomic, ok} = mnesia:transform_table(TableName, Fun, FieldList), ok. -mnesia(TableName, Fun, FieldList, NewRecordName) -> +transform(TableName, Fun, FieldList, NewRecordName) -> + rabbit_mnesia:wait_for_tables([TableName]), {atomic, ok} = mnesia:transform_table(TableName, Fun, FieldList, NewRecordName), ok. + +create(Tab, TabDef) -> + {atomic, ok} = mnesia:create_table(Tab, TabDef), + ok. + +%% Dumb replacement for rabbit_exchange:declare that does not require +%% the exchange type registry or worker pool to be running by dint of +%% not validating anything and assuming the exchange type does not +%% require serialisation. +%% NB: this assumes the pre-exchange-scratch-space format +declare_exchange(XName, Type) -> + X = {exchange, XName, Type, true, false, false, []}, + ok = mnesia:dirty_write(rabbit_durable_exchange, X). diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index 7142d560..ea72de66 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -16,13 +16,13 @@ -module(rabbit_variable_queue). --export([init/3, terminate/1, delete_and_terminate/1, - purge/1, publish/3, publish_delivered/4, fetch/2, ack/2, - tx_publish/4, tx_ack/3, tx_rollback/2, tx_commit/4, - requeue/3, len/1, is_empty/1, dropwhile/2, +-export([init/3, terminate/2, delete_and_terminate/2, + purge/1, publish/4, publish_delivered/5, drain_confirmed/1, + dropwhile/2, fetch/2, ack/2, requeue/3, len/1, is_empty/1, set_ram_duration_target/2, ram_duration/1, - needs_idle_timeout/1, idle_timeout/1, handle_pre_hibernate/1, - status/1]). + needs_timeout/1, timeout/1, handle_pre_hibernate/1, + status/1, invoke/3, is_duplicate/2, discard/3, + multiple_routing_keys/0]). -export([start/1, stop/0]). @@ -145,18 +145,21 @@ %% any one time. This further smooths the effects of changes to the %% target_ram_count and ensures the queue remains responsive %% even when there is a large amount of IO work to do. The -%% idle_timeout callback is utilised to ensure that conversions are +%% timeout callback is utilised to ensure that conversions are %% done as promptly as possible whilst ensuring the queue remains %% responsive. %% %% In the queue we keep track of both messages that are pending -%% delivery and messages that are pending acks. This ensures that -%% purging (deleting the former) and deletion (deleting the former and -%% the latter) are both cheap and do require any scanning through qi -%% segments. +%% delivery and messages that are pending acks. In the event of a +%% queue purge, we only need to load qi segments if the queue has +%% elements in deltas (i.e. it came under significant memory +%% pressure). In the event of a queue deletion, in addition to the +%% preceding, by keeping track of pending acks in RAM, we do not need +%% to search through qi segments looking for messages that are yet to +%% be acknowledged. %% %% Pending acks are recorded in memory either as the tuple {SeqId, -%% Guid, MsgProps} (tuple-form) or as the message itself (message- +%% MsgId, MsgProps} (tuple-form) or as the message itself (message- %% form). Acks for persistent messages are always stored in the tuple- %% form. Acks for transient messages are also stored in tuple-form if %% the message has been sent to disk as part of the memory reduction @@ -234,10 +237,11 @@ ram_ack_index, index_state, msg_store_clients, - on_sync, durable, transient_threshold, + async_callback, + len, persistent_count, @@ -252,6 +256,7 @@ msgs_on_disk, msg_indices_on_disk, unconfirmed, + confirmed, ack_out_counter, ack_in_counter, ack_rates @@ -261,24 +266,20 @@ -record(msg_status, { seq_id, - guid, + msg_id, msg, is_persistent, is_delivered, msg_on_disk, index_on_disk, msg_props - }). + }). -record(delta, { start_seq_id, %% start_seq_id is inclusive count, end_seq_id %% end_seq_id is exclusive - }). - --record(tx, { pending_messages, pending_acks }). - --record(sync, { acks_persistent, acks_all, pubs, funs }). + }). %% When we discover, on publish, that we should write some indices to %% disk for some betas, the IO_BATCH_SIZE sets the number of betas @@ -294,6 +295,8 @@ %%---------------------------------------------------------------------------- +-rabbit_upgrade({multiple_routing_keys, local, []}). + -ifdef(use_specs). -type(timestamp() :: {non_neg_integer(), non_neg_integer(), non_neg_integer()}). @@ -310,12 +313,6 @@ count :: non_neg_integer(), end_seq_id :: non_neg_integer() }). --type(sync() :: #sync { acks_persistent :: [[seq_id()]], - acks_all :: [[seq_id()]], - pubs :: [{message_properties_transformer(), - [rabbit_types:basic_message()]}], - funs :: [fun (() -> any())] }). - -type(state() :: #vqstate { q1 :: queue(), q2 :: bpqueue:bpqueue(), @@ -328,13 +325,14 @@ index_state :: any(), msg_store_clients :: 'undefined' | {{any(), binary()}, {any(), binary()}}, - on_sync :: sync(), durable :: boolean(), + transient_threshold :: non_neg_integer(), + + async_callback :: async_callback(), len :: non_neg_integer(), persistent_count :: non_neg_integer(), - transient_threshold :: non_neg_integer(), target_ram_count :: non_neg_integer() | 'infinity', ram_msg_count :: non_neg_integer(), ram_msg_count_prev :: non_neg_integer(), @@ -345,12 +343,15 @@ msgs_on_disk :: gb_set(), msg_indices_on_disk :: gb_set(), unconfirmed :: gb_set(), + confirmed :: gb_set(), ack_out_counter :: non_neg_integer(), ack_in_counter :: non_neg_integer(), ack_rates :: rates() }). -include("rabbit_backing_queue_spec.hrl"). +-spec(multiple_routing_keys/0 :: () -> 'ok'). + -endif. -define(BLANK_DELTA, #delta { start_seq_id = undefined, @@ -360,11 +361,6 @@ count = 0, end_seq_id = Z }). --define(BLANK_SYNC, #sync { acks_persistent = [], - acks_all = [], - pubs = [], - funs = [] }). - %%---------------------------------------------------------------------------- %% Public API %%---------------------------------------------------------------------------- @@ -393,25 +389,26 @@ stop_msg_store() -> ok = rabbit_sup:stop_child(?PERSISTENT_MSG_STORE), ok = rabbit_sup:stop_child(?TRANSIENT_MSG_STORE). -init(QueueName, IsDurable, Recover) -> - Self = self(), - init(QueueName, IsDurable, Recover, - fun (Guids, ActionTaken) -> - msgs_written_to_disk(Self, Guids, ActionTaken) +init(Queue, Recover, AsyncCallback) -> + init(Queue, Recover, AsyncCallback, + fun (MsgIds, ActionTaken) -> + msgs_written_to_disk(AsyncCallback, MsgIds, ActionTaken) end, - fun (Guids) -> msg_indices_written_to_disk(Self, Guids) end). + fun (MsgIds) -> msg_indices_written_to_disk(AsyncCallback, MsgIds) end). -init(QueueName, IsDurable, false, MsgOnDiskFun, MsgIdxOnDiskFun) -> +init(#amqqueue { name = QueueName, durable = IsDurable }, false, + AsyncCallback, MsgOnDiskFun, MsgIdxOnDiskFun) -> IndexState = rabbit_queue_index:init(QueueName, MsgIdxOnDiskFun), - init(IsDurable, IndexState, 0, [], + init(IsDurable, IndexState, 0, [], AsyncCallback, case IsDurable of true -> msg_store_client_init(?PERSISTENT_MSG_STORE, - MsgOnDiskFun); + MsgOnDiskFun, AsyncCallback); false -> undefined end, - msg_store_client_init(?TRANSIENT_MSG_STORE, undefined)); + msg_store_client_init(?TRANSIENT_MSG_STORE, undefined, AsyncCallback)); -init(QueueName, true, true, MsgOnDiskFun, MsgIdxOnDiskFun) -> +init(#amqqueue { name = QueueName, durable = true }, true, + AsyncCallback, MsgOnDiskFun, MsgIdxOnDiskFun) -> Terms = rabbit_queue_index:shutdown_terms(QueueName), {PRef, TRef, Terms1} = case [persistent_ref, transient_ref] -- proplists:get_keys(Terms) of @@ -421,25 +418,25 @@ init(QueueName, true, true, MsgOnDiskFun, MsgIdxOnDiskFun) -> _ -> {rabbit_guid:guid(), rabbit_guid:guid(), []} end, PersistentClient = msg_store_client_init(?PERSISTENT_MSG_STORE, PRef, - MsgOnDiskFun), + MsgOnDiskFun, AsyncCallback), TransientClient = msg_store_client_init(?TRANSIENT_MSG_STORE, TRef, - undefined), + undefined, AsyncCallback), {DeltaCount, IndexState} = rabbit_queue_index:recover( QueueName, Terms1, rabbit_msg_store:successfully_recovered_state(?PERSISTENT_MSG_STORE), - fun (Guid) -> - rabbit_msg_store:contains(Guid, PersistentClient) + fun (MsgId) -> + rabbit_msg_store:contains(MsgId, PersistentClient) end, MsgIdxOnDiskFun), - init(true, IndexState, DeltaCount, Terms1, + init(true, IndexState, DeltaCount, Terms1, AsyncCallback, PersistentClient, TransientClient). -terminate(State) -> +terminate(_Reason, State) -> State1 = #vqstate { persistent_count = PCount, index_state = IndexState, msg_store_clients = {MSCStateP, MSCStateT} } = - remove_pending_ack(true, tx_commit_index(State)), + remove_pending_ack(true, State), PRef = case MSCStateP of undefined -> undefined; _ -> ok = rabbit_msg_store:client_terminate(MSCStateP), @@ -456,7 +453,7 @@ terminate(State) -> %% the only difference between purge and delete is that delete also %% needs to delete everything that's been delivered and not ack'd. -delete_and_terminate(State) -> +delete_and_terminate(_Reason, State) -> %% TODO: there is no need to interact with qi at all - which we do %% as part of 'purge' and 'remove_pending_ack', other than %% deleting it. @@ -501,32 +498,37 @@ purge(State = #vqstate { q4 = Q4, ram_index_count = 0, persistent_count = PCount1 })}. -publish(Msg, MsgProps, State) -> +publish(Msg, MsgProps, _ChPid, State) -> {_SeqId, State1} = publish(Msg, MsgProps, false, false, State), a(reduce_memory_use(State1)). -publish_delivered(false, #basic_message { guid = Guid }, - _MsgProps, State = #vqstate { len = 0 }) -> - blind_confirm(self(), gb_sets:singleton(Guid)), +publish_delivered(false, #basic_message { id = MsgId }, + #message_properties { needs_confirming = NeedsConfirming }, + _ChPid, State = #vqstate { async_callback = Callback, + len = 0 }) -> + case NeedsConfirming of + true -> blind_confirm(Callback, gb_sets:singleton(MsgId)); + false -> ok + end, {undefined, a(State)}; publish_delivered(true, Msg = #basic_message { is_persistent = IsPersistent, - guid = Guid }, + id = MsgId }, MsgProps = #message_properties { needs_confirming = NeedsConfirming }, - State = #vqstate { len = 0, - next_seq_id = SeqId, - out_counter = OutCount, - in_counter = InCount, - persistent_count = PCount, - durable = IsDurable, - unconfirmed = UC }) -> + _ChPid, State = #vqstate { len = 0, + next_seq_id = SeqId, + out_counter = OutCount, + in_counter = InCount, + persistent_count = PCount, + durable = IsDurable, + unconfirmed = UC }) -> IsPersistent1 = IsDurable andalso IsPersistent, MsgStatus = (msg_status(IsPersistent1, SeqId, Msg, MsgProps)) #msg_status { is_delivered = true }, {MsgStatus1, State1} = maybe_write_to_disk(false, false, MsgStatus, State), State2 = record_pending_ack(m(MsgStatus1), State1), PCount1 = PCount + one_if(IsPersistent1), - UC1 = gb_sets_maybe_insert(NeedsConfirming, Guid, UC), + UC1 = gb_sets_maybe_insert(NeedsConfirming, MsgId, UC), {SeqId, a(reduce_memory_use( State2 #vqstate { next_seq_id = SeqId + 1, out_counter = OutCount + 1, @@ -534,186 +536,61 @@ publish_delivered(true, Msg = #basic_message { is_persistent = IsPersistent, persistent_count = PCount1, unconfirmed = UC1 }))}. -dropwhile(Pred, State) -> - {_OkOrEmpty, State1} = dropwhile1(Pred, State), - State1. +drain_confirmed(State = #vqstate { confirmed = C }) -> + {gb_sets:to_list(C), State #vqstate { confirmed = gb_sets:new() }}. -dropwhile1(Pred, State) -> - internal_queue_out( - fun(MsgStatus = #msg_status { msg_props = MsgProps }, State1) -> - case Pred(MsgProps) of - true -> - {_, State2} = internal_fetch(false, MsgStatus, State1), - dropwhile1(Pred, State2); - false -> - %% message needs to go back into Q4 (or maybe go - %% in for the first time if it was loaded from - %% Q3). Also the msg contents might not be in - %% RAM, so read them in now - {MsgStatus1, State2 = #vqstate { q4 = Q4 }} = - read_msg(MsgStatus, State1), - {ok, State2 #vqstate {q4 = queue:in_r(MsgStatus1, Q4) }} - end - end, State). +dropwhile(Pred, State) -> + case queue_out(State) of + {empty, State1} -> + a(State1); + {{value, MsgStatus = #msg_status { msg_props = MsgProps }}, State1} -> + case Pred(MsgProps) of + true -> {_, State2} = internal_fetch(false, MsgStatus, State1), + dropwhile(Pred, State2); + false -> a(in_r(MsgStatus, State1)) + end + end. fetch(AckRequired, State) -> - internal_queue_out( - fun(MsgStatus, State1) -> - %% it's possible that the message wasn't read from disk - %% at this point, so read it in. - {MsgStatus1, State2} = read_msg(MsgStatus, State1), - internal_fetch(AckRequired, MsgStatus1, State2) - end, State). - -internal_queue_out(Fun, State = #vqstate { q4 = Q4 }) -> - case queue:out(Q4) of - {empty, _Q4} -> - case fetch_from_q3(State) of - {empty, State1} = Result -> a(State1), Result; - {loaded, {MsgStatus, State1}} -> Fun(MsgStatus, State1) - end; - {{value, MsgStatus}, Q4a} -> - Fun(MsgStatus, State #vqstate { q4 = Q4a }) + case queue_out(State) of + {empty, State1} -> + {empty, a(State1)}; + {{value, MsgStatus}, State1} -> + %% it is possible that the message wasn't read from disk + %% at this point, so read it in. + {MsgStatus1, State2} = read_msg(MsgStatus, State1), + {Res, State3} = internal_fetch(AckRequired, MsgStatus1, State2), + {Res, a(State3)} end. -read_msg(MsgStatus = #msg_status { msg = undefined, - guid = Guid, - is_persistent = IsPersistent }, - State = #vqstate { ram_msg_count = RamMsgCount, - msg_store_clients = MSCState}) -> - {{ok, Msg = #basic_message {}}, MSCState1} = - msg_store_read(MSCState, IsPersistent, Guid), - {MsgStatus #msg_status { msg = Msg }, - State #vqstate { ram_msg_count = RamMsgCount + 1, - msg_store_clients = MSCState1 }}; -read_msg(MsgStatus, State) -> - {MsgStatus, State}. - -internal_fetch(AckRequired, MsgStatus = #msg_status { - seq_id = SeqId, - guid = Guid, - msg = Msg, - is_persistent = IsPersistent, - is_delivered = IsDelivered, - msg_on_disk = MsgOnDisk, - index_on_disk = IndexOnDisk }, - State = #vqstate {ram_msg_count = RamMsgCount, - out_counter = OutCount, - index_state = IndexState, - msg_store_clients = MSCState, - len = Len, - persistent_count = PCount }) -> - %% 1. Mark it delivered if necessary - IndexState1 = maybe_write_delivered( - IndexOnDisk andalso not IsDelivered, - SeqId, IndexState), - - %% 2. Remove from msg_store and queue index, if necessary - Rem = fun () -> - ok = msg_store_remove(MSCState, IsPersistent, [Guid]) - end, - Ack = fun () -> rabbit_queue_index:ack([SeqId], IndexState1) end, - IndexState2 = - case {AckRequired, MsgOnDisk, IndexOnDisk, IsPersistent} of - {false, true, false, _} -> Rem(), IndexState1; - {false, true, true, _} -> Rem(), Ack(); - { true, true, true, false} -> Ack(); - _ -> IndexState1 - end, - - %% 3. If an ack is required, add something sensible to PA - {AckTag, State1} = case AckRequired of - true -> StateN = record_pending_ack( - MsgStatus #msg_status { - is_delivered = true }, State), - {SeqId, StateN}; - false -> {undefined, State} - end, - - PCount1 = PCount - one_if(IsPersistent andalso not AckRequired), - Len1 = Len - 1, - RamMsgCount1 = RamMsgCount - one_if(Msg =/= undefined), - - {{Msg, IsDelivered, AckTag, Len1}, - a(State1 #vqstate { ram_msg_count = RamMsgCount1, - out_counter = OutCount + 1, - index_state = IndexState2, - len = Len1, - persistent_count = PCount1 })}. - ack(AckTags, State) -> - a(ack(fun msg_store_remove/3, - fun (_, State0) -> State0 end, - AckTags, State)). - -tx_publish(Txn, Msg = #basic_message { is_persistent = IsPersistent }, MsgProps, - State = #vqstate { durable = IsDurable, - msg_store_clients = MSCState }) -> - Tx = #tx { pending_messages = Pubs } = lookup_tx(Txn), - store_tx(Txn, Tx #tx { pending_messages = [{Msg, MsgProps} | Pubs] }), - case IsPersistent andalso IsDurable of - true -> MsgStatus = msg_status(true, undefined, Msg, MsgProps), - #msg_status { msg_on_disk = true } = - maybe_write_msg_to_disk(false, MsgStatus, MSCState); - false -> ok - end, - a(State). - -tx_ack(Txn, AckTags, State) -> - Tx = #tx { pending_acks = Acks } = lookup_tx(Txn), - store_tx(Txn, Tx #tx { pending_acks = [AckTags | Acks] }), - State. - -tx_rollback(Txn, State = #vqstate { durable = IsDurable, - msg_store_clients = MSCState }) -> - #tx { pending_acks = AckTags, pending_messages = Pubs } = lookup_tx(Txn), - erase_tx(Txn), - ok = case IsDurable of - true -> msg_store_remove(MSCState, true, persistent_guids(Pubs)); - false -> ok - end, - {lists:append(AckTags), a(State)}. - -tx_commit(Txn, Fun, MsgPropsFun, - State = #vqstate { durable = IsDurable, - msg_store_clients = MSCState }) -> - #tx { pending_acks = AckTags, pending_messages = Pubs } = lookup_tx(Txn), - erase_tx(Txn), - AckTags1 = lists:append(AckTags), - PersistentGuids = persistent_guids(Pubs), - HasPersistentPubs = PersistentGuids =/= [], - {AckTags1, - a(case IsDurable andalso HasPersistentPubs of - true -> ok = msg_store_sync( - MSCState, true, PersistentGuids, - msg_store_callback(PersistentGuids, Pubs, AckTags1, - Fun, MsgPropsFun)), - State; - false -> tx_commit_post_msg_store(HasPersistentPubs, Pubs, AckTags1, - Fun, MsgPropsFun, State) - end)}. + {MsgIds, State1} = ack(fun msg_store_remove/3, + fun (_, State0) -> State0 end, + AckTags, State), + {MsgIds, a(State1)}. requeue(AckTags, MsgPropsFun, State) -> MsgPropsFun1 = fun (MsgProps) -> (MsgPropsFun(MsgProps)) #message_properties { needs_confirming = false } end, - a(reduce_memory_use( - ack(fun msg_store_release/3, + {MsgIds, State1} = + ack(fun (_, _, _) -> ok end, fun (#msg_status { msg = Msg, msg_props = MsgProps }, State1) -> {_SeqId, State2} = publish(Msg, MsgPropsFun1(MsgProps), true, false, State1), State2; - ({IsPersistent, Guid, MsgProps}, State1) -> + ({IsPersistent, MsgId, MsgProps}, State1) -> #vqstate { msg_store_clients = MSCState } = State1, {{ok, Msg = #basic_message{}}, MSCState1} = - msg_store_read(MSCState, IsPersistent, Guid), + msg_store_read(MSCState, IsPersistent, MsgId), State2 = State1 #vqstate { msg_store_clients = MSCState1 }, {_SeqId, State3} = publish(Msg, MsgPropsFun1(MsgProps), true, true, State2), State3 end, - AckTags, State))). + AckTags, State), + {MsgIds, a(reduce_memory_use(State1))}. len(#vqstate { len = Len }) -> Len. @@ -768,8 +645,8 @@ ram_duration(State = #vqstate { RamAckCount = gb_trees:size(RamAckIndex), Duration = %% msgs+acks / (msgs+acks/sec) == sec - case AvgEgressRate == 0 andalso AvgIngressRate == 0 andalso - AvgAckEgressRate == 0 andalso AvgAckIngressRate == 0 of + case (AvgEgressRate == 0 andalso AvgIngressRate == 0 andalso + AvgAckEgressRate == 0 andalso AvgAckIngressRate == 0) of true -> infinity; false -> (RamMsgCountPrev + RamMsgCount + RamAckCount + RamAckCountPrev) / @@ -797,22 +674,22 @@ ram_duration(State = #vqstate { ram_msg_count_prev = RamMsgCount, ram_ack_count_prev = RamAckCount }}. -needs_idle_timeout(State = #vqstate { on_sync = OnSync }) -> - case {OnSync, needs_index_sync(State)} of - {?BLANK_SYNC, false} -> - {Res, _State} = reduce_memory_use( - fun (_Quota, State1) -> {0, State1} end, - fun (_Quota, State1) -> State1 end, - fun (State1) -> State1 end, - fun (_Quota, State1) -> {0, State1} end, - State), - Res; - _ -> - true +needs_timeout(State) -> + case needs_index_sync(State) of + false -> case reduce_memory_use( + fun (_Quota, State1) -> {0, State1} end, + fun (_Quota, State1) -> State1 end, + fun (State1) -> State1 end, + fun (_Quota, State1) -> {0, State1} end, + State) of + {true, _State} -> idle; + {false, _State} -> false + end; + true -> timed end. -idle_timeout(State) -> - a(reduce_memory_use(confirm_commit_index(tx_commit_index(State)))). +timeout(State) -> + a(reduce_memory_use(confirm_commit_index(State))). handle_pre_hibernate(State = #vqstate { index_state = IndexState }) -> State #vqstate { index_state = rabbit_queue_index:flush(IndexState) }. @@ -822,7 +699,6 @@ status(#vqstate { len = Len, pending_ack = PA, ram_ack_index = RAI, - on_sync = #sync { funs = From }, target_ram_count = TargetRamCount, ram_msg_count = RamMsgCount, ram_index_count = RamIndexCount, @@ -839,7 +715,6 @@ status(#vqstate { {q4 , queue:len(Q4)}, {len , Len}, {pending_acks , dict:size(PA)}, - {outstanding_txns , length(From)}, {target_ram_count , TargetRamCount}, {ram_msg_count , RamMsgCount}, {ram_ack_count , gb_trees:size(RAI)}, @@ -851,6 +726,12 @@ status(#vqstate { {avg_ack_ingress_rate, AvgAckIngressRate}, {avg_ack_egress_rate , AvgAckEgressRate} ]. +invoke(?MODULE, Fun, State) -> Fun(?MODULE, State). + +is_duplicate(_Msg, State) -> {false, State}. + +discard(_Msg, _ChPid, State) -> State. + %%---------------------------------------------------------------------------- %% Minor helpers %%---------------------------------------------------------------------------- @@ -896,12 +777,12 @@ cons_if(true, E, L) -> [E | L]; cons_if(false, _E, L) -> L. gb_sets_maybe_insert(false, _Val, Set) -> Set; -%% when requeueing, we re-add a guid to the unconfirmed set +%% when requeueing, we re-add a msg_id to the unconfirmed set gb_sets_maybe_insert(true, Val, Set) -> gb_sets:add(Val, Set). -msg_status(IsPersistent, SeqId, Msg = #basic_message { guid = Guid }, +msg_status(IsPersistent, SeqId, Msg = #basic_message { id = MsgId }, MsgProps) -> - #msg_status { seq_id = SeqId, guid = Guid, msg = Msg, + #msg_status { seq_id = SeqId, msg_id = MsgId, msg = Msg, is_persistent = IsPersistent, is_delivered = false, msg_on_disk = false, index_on_disk = false, msg_props = MsgProps }. @@ -920,38 +801,28 @@ with_immutable_msg_store_state(MSCState, IsPersistent, Fun) -> end), Res. -msg_store_client_init(MsgStore, MsgOnDiskFun) -> - msg_store_client_init(MsgStore, rabbit_guid:guid(), MsgOnDiskFun). +msg_store_client_init(MsgStore, MsgOnDiskFun, Callback) -> + msg_store_client_init(MsgStore, rabbit_guid:guid(), MsgOnDiskFun, Callback). -msg_store_client_init(MsgStore, Ref, MsgOnDiskFun) -> - rabbit_msg_store:client_init( - MsgStore, Ref, MsgOnDiskFun, - msg_store_close_fds_fun(MsgStore =:= ?PERSISTENT_MSG_STORE)). +msg_store_client_init(MsgStore, Ref, MsgOnDiskFun, Callback) -> + CloseFDsFun = msg_store_close_fds_fun(MsgStore =:= ?PERSISTENT_MSG_STORE), + rabbit_msg_store:client_init(MsgStore, Ref, MsgOnDiskFun, + fun () -> Callback(?MODULE, CloseFDsFun) end). -msg_store_write(MSCState, IsPersistent, Guid, Msg) -> +msg_store_write(MSCState, IsPersistent, MsgId, Msg) -> with_immutable_msg_store_state( MSCState, IsPersistent, - fun (MSCState1) -> rabbit_msg_store:write(Guid, Msg, MSCState1) end). + fun (MSCState1) -> rabbit_msg_store:write(MsgId, Msg, MSCState1) end). -msg_store_read(MSCState, IsPersistent, Guid) -> +msg_store_read(MSCState, IsPersistent, MsgId) -> with_msg_store_state( MSCState, IsPersistent, - fun (MSCState1) -> rabbit_msg_store:read(Guid, MSCState1) end). - -msg_store_remove(MSCState, IsPersistent, Guids) -> - with_immutable_msg_store_state( - MSCState, IsPersistent, - fun (MCSState1) -> rabbit_msg_store:remove(Guids, MCSState1) end). - -msg_store_release(MSCState, IsPersistent, Guids) -> - with_immutable_msg_store_state( - MSCState, IsPersistent, - fun (MCSState1) -> rabbit_msg_store:release(Guids, MCSState1) end). + fun (MSCState1) -> rabbit_msg_store:read(MsgId, MSCState1) end). -msg_store_sync(MSCState, IsPersistent, Guids, Callback) -> +msg_store_remove(MSCState, IsPersistent, MsgIds) -> with_immutable_msg_store_state( MSCState, IsPersistent, - fun (MSCState1) -> rabbit_msg_store:sync(Guids, Callback, MSCState1) end). + fun (MCSState1) -> rabbit_msg_store:remove(MsgIds, MCSState1) end). msg_store_close_fds(MSCState, IsPersistent) -> with_msg_store_state( @@ -959,15 +830,9 @@ msg_store_close_fds(MSCState, IsPersistent) -> fun (MSCState1) -> rabbit_msg_store:close_all_indicated(MSCState1) end). msg_store_close_fds_fun(IsPersistent) -> - Self = self(), - fun () -> - rabbit_amqqueue:maybe_run_queue_via_backing_queue_async( - Self, - fun (State = #vqstate { msg_store_clients = MSCState }) -> - {ok, MSCState1} = - msg_store_close_fds(MSCState, IsPersistent), - {[], State #vqstate { msg_store_clients = MSCState1 }} - end) + fun (?MODULE, State = #vqstate { msg_store_clients = MSCState }) -> + {ok, MSCState1} = msg_store_close_fds(MSCState, IsPersistent), + State #vqstate { msg_store_clients = MSCState1 } end. maybe_write_delivered(false, _SeqId, IndexState) -> @@ -975,31 +840,17 @@ maybe_write_delivered(false, _SeqId, IndexState) -> maybe_write_delivered(true, SeqId, IndexState) -> rabbit_queue_index:deliver([SeqId], IndexState). -lookup_tx(Txn) -> case get({txn, Txn}) of - undefined -> #tx { pending_messages = [], - pending_acks = [] }; - V -> V - end. - -store_tx(Txn, Tx) -> put({txn, Txn}, Tx). - -erase_tx(Txn) -> erase({txn, Txn}). - -persistent_guids(Pubs) -> - [Guid || {#basic_message { guid = Guid, - is_persistent = true }, _MsgProps} <- Pubs]. - betas_from_index_entries(List, TransientThreshold, IndexState) -> {Filtered, Delivers, Acks} = lists:foldr( - fun ({Guid, SeqId, MsgProps, IsPersistent, IsDelivered}, + fun ({MsgId, SeqId, MsgProps, IsPersistent, IsDelivered}, {Filtered1, Delivers1, Acks1}) -> case SeqId < TransientThreshold andalso not IsPersistent of true -> {Filtered1, cons_if(not IsDelivered, SeqId, Delivers1), [SeqId | Acks1]}; false -> {[m(#msg_status { msg = undefined, - guid = Guid, + msg_id = MsgId, seq_id = SeqId, is_persistent = IsPersistent, is_delivered = IsDelivered, @@ -1052,7 +903,7 @@ update_rate(Now, Then, Count, {OThen, OCount}) -> %% Internal major helpers for Public API %%---------------------------------------------------------------------------- -init(IsDurable, IndexState, DeltaCount, Terms, +init(IsDurable, IndexState, DeltaCount, Terms, AsyncCallback, PersistentClient, TransientClient) -> {LowSeqId, NextSeqId, IndexState1} = rabbit_queue_index:bounds(IndexState), @@ -1075,10 +926,11 @@ init(IsDurable, IndexState, DeltaCount, Terms, ram_ack_index = gb_trees:empty(), index_state = IndexState1, msg_store_clients = {PersistentClient, TransientClient}, - on_sync = ?BLANK_SYNC, durable = IsDurable, transient_threshold = NextSeqId, + async_callback = AsyncCallback, + len = DeltaCount1, persistent_count = DeltaCount1, @@ -1093,6 +945,7 @@ init(IsDurable, IndexState, DeltaCount, Terms, msgs_on_disk = gb_sets:new(), msg_indices_on_disk = gb_sets:new(), unconfirmed = gb_sets:new(), + confirmed = gb_sets:new(), ack_out_counter = 0, ack_in_counter = 0, ack_rates = blank_rate(Now, 0) }, @@ -1105,89 +958,94 @@ blank_rate(Timestamp, IngressLength) -> avg_ingress = 0.0, timestamp = Timestamp }. -msg_store_callback(PersistentGuids, Pubs, AckTags, Fun, MsgPropsFun) -> - Self = self(), - F = fun () -> rabbit_amqqueue:maybe_run_queue_via_backing_queue( - Self, fun (StateN) -> {[], tx_commit_post_msg_store( - true, Pubs, AckTags, - Fun, MsgPropsFun, StateN)} - end) - end, - fun () -> spawn(fun () -> ok = rabbit_misc:with_exit_handler( - fun () -> remove_persistent_messages( - PersistentGuids) - end, F) - end) +in_r(MsgStatus = #msg_status { msg = undefined, index_on_disk = IndexOnDisk }, + State = #vqstate { q3 = Q3, q4 = Q4, ram_index_count = RamIndexCount }) -> + case queue:is_empty(Q4) of + true -> State #vqstate { + q3 = bpqueue:in_r(IndexOnDisk, MsgStatus, Q3), + ram_index_count = RamIndexCount + one_if(not IndexOnDisk) }; + false -> {MsgStatus1, State1 = #vqstate { q4 = Q4a }} = + read_msg(MsgStatus, State), + State1 #vqstate { q4 = queue:in_r(MsgStatus1, Q4a) } + end; +in_r(MsgStatus, State = #vqstate { q4 = Q4 }) -> + State #vqstate { q4 = queue:in_r(MsgStatus, Q4) }. + +queue_out(State = #vqstate { q4 = Q4 }) -> + case queue:out(Q4) of + {empty, _Q4} -> + case fetch_from_q3(State) of + {empty, _State1} = Result -> Result; + {loaded, {MsgStatus, State1}} -> {{value, MsgStatus}, State1} + end; + {{value, MsgStatus}, Q4a} -> + {{value, MsgStatus}, State #vqstate { q4 = Q4a }} end. -remove_persistent_messages(Guids) -> - PersistentClient = msg_store_client_init(?PERSISTENT_MSG_STORE, undefined), - ok = rabbit_msg_store:remove(Guids, PersistentClient), - rabbit_msg_store:client_delete_and_terminate(PersistentClient). - -tx_commit_post_msg_store(HasPersistentPubs, Pubs, AckTags, Fun, MsgPropsFun, - State = #vqstate { - on_sync = OnSync = #sync { - acks_persistent = SPAcks, - acks_all = SAcks, - pubs = SPubs, - funs = SFuns }, - pending_ack = PA, - durable = IsDurable }) -> - PersistentAcks = - case IsDurable of - true -> [AckTag || AckTag <- AckTags, - case dict:fetch(AckTag, PA) of - #msg_status {} -> - false; - {IsPersistent, _Guid, _MsgProps} -> - IsPersistent - end]; - false -> [] +read_msg(MsgStatus = #msg_status { msg = undefined, + msg_id = MsgId, + is_persistent = IsPersistent }, + State = #vqstate { ram_msg_count = RamMsgCount, + msg_store_clients = MSCState}) -> + {{ok, Msg = #basic_message {}}, MSCState1} = + msg_store_read(MSCState, IsPersistent, MsgId), + {MsgStatus #msg_status { msg = Msg }, + State #vqstate { ram_msg_count = RamMsgCount + 1, + msg_store_clients = MSCState1 }}; +read_msg(MsgStatus, State) -> + {MsgStatus, State}. + +internal_fetch(AckRequired, MsgStatus = #msg_status { + seq_id = SeqId, + msg_id = MsgId, + msg = Msg, + is_persistent = IsPersistent, + is_delivered = IsDelivered, + msg_on_disk = MsgOnDisk, + index_on_disk = IndexOnDisk }, + State = #vqstate {ram_msg_count = RamMsgCount, + out_counter = OutCount, + index_state = IndexState, + msg_store_clients = MSCState, + len = Len, + persistent_count = PCount }) -> + %% 1. Mark it delivered if necessary + IndexState1 = maybe_write_delivered( + IndexOnDisk andalso not IsDelivered, + SeqId, IndexState), + + %% 2. Remove from msg_store and queue index, if necessary + Rem = fun () -> + ok = msg_store_remove(MSCState, IsPersistent, [MsgId]) + end, + Ack = fun () -> rabbit_queue_index:ack([SeqId], IndexState1) end, + IndexState2 = + case {AckRequired, MsgOnDisk, IndexOnDisk, IsPersistent} of + {false, true, false, _} -> Rem(), IndexState1; + {false, true, true, _} -> Rem(), Ack(); + { true, true, true, false} -> Ack(); + _ -> IndexState1 end, - case IsDurable andalso (HasPersistentPubs orelse PersistentAcks =/= []) of - true -> State #vqstate { - on_sync = #sync { - acks_persistent = [PersistentAcks | SPAcks], - acks_all = [AckTags | SAcks], - pubs = [{MsgPropsFun, Pubs} | SPubs], - funs = [Fun | SFuns] }}; - false -> State1 = tx_commit_index( - State #vqstate { - on_sync = #sync { - acks_persistent = [], - acks_all = [AckTags], - pubs = [{MsgPropsFun, Pubs}], - funs = [Fun] } }), - State1 #vqstate { on_sync = OnSync } - end. -tx_commit_index(State = #vqstate { on_sync = ?BLANK_SYNC }) -> - State; -tx_commit_index(State = #vqstate { on_sync = #sync { - acks_persistent = SPAcks, - acks_all = SAcks, - pubs = SPubs, - funs = SFuns }, - durable = IsDurable }) -> - PAcks = lists:append(SPAcks), - Acks = lists:append(SAcks), - Pubs = [{Msg, Fun(MsgProps)} || {Fun, PubsN} <- lists:reverse(SPubs), - {Msg, MsgProps} <- lists:reverse(PubsN)], - {SeqIds, State1 = #vqstate { index_state = IndexState }} = - lists:foldl( - fun ({Msg = #basic_message { is_persistent = IsPersistent }, - MsgProps}, - {SeqIdsAcc, State2}) -> - IsPersistent1 = IsDurable andalso IsPersistent, - {SeqId, State3} = - publish(Msg, MsgProps, false, IsPersistent1, State2), - {cons_if(IsPersistent1, SeqId, SeqIdsAcc), State3} - end, {PAcks, ack(Acks, State)}, Pubs), - IndexState1 = rabbit_queue_index:sync(SeqIds, IndexState), - [ Fun() || Fun <- lists:reverse(SFuns) ], - reduce_memory_use( - State1 #vqstate { index_state = IndexState1, on_sync = ?BLANK_SYNC }). + %% 3. If an ack is required, add something sensible to PA + {AckTag, State1} = case AckRequired of + true -> StateN = record_pending_ack( + MsgStatus #msg_status { + is_delivered = true }, State), + {SeqId, StateN}; + false -> {undefined, State} + end, + + PCount1 = PCount - one_if(IsPersistent andalso not AckRequired), + Len1 = Len - 1, + RamMsgCount1 = RamMsgCount - one_if(Msg =/= undefined), + + {{Msg, IsDelivered, AckTag, Len1}, + State1 #vqstate { ram_msg_count = RamMsgCount1, + out_counter = OutCount + 1, + index_state = IndexState2, + len = Len1, + persistent_count = PCount1 }}. purge_betas_and_deltas(LensByStore, State = #vqstate { q3 = Q3, @@ -1206,38 +1064,38 @@ purge_betas_and_deltas(LensByStore, end. remove_queue_entries(Fold, Q, LensByStore, IndexState, MSCState) -> - {GuidsByStore, Delivers, Acks} = + {MsgIdsByStore, Delivers, Acks} = Fold(fun remove_queue_entries1/2, {orddict:new(), [], []}, Q), - ok = orddict:fold(fun (IsPersistent, Guids, ok) -> - msg_store_remove(MSCState, IsPersistent, Guids) - end, ok, GuidsByStore), - {sum_guids_by_store_to_len(LensByStore, GuidsByStore), + ok = orddict:fold(fun (IsPersistent, MsgIds, ok) -> + msg_store_remove(MSCState, IsPersistent, MsgIds) + end, ok, MsgIdsByStore), + {sum_msg_ids_by_store_to_len(LensByStore, MsgIdsByStore), rabbit_queue_index:ack(Acks, rabbit_queue_index:deliver(Delivers, IndexState))}. remove_queue_entries1( - #msg_status { guid = Guid, seq_id = SeqId, + #msg_status { msg_id = MsgId, seq_id = SeqId, is_delivered = IsDelivered, msg_on_disk = MsgOnDisk, index_on_disk = IndexOnDisk, is_persistent = IsPersistent }, - {GuidsByStore, Delivers, Acks}) -> + {MsgIdsByStore, Delivers, Acks}) -> {case MsgOnDisk of - true -> rabbit_misc:orddict_cons(IsPersistent, Guid, GuidsByStore); - false -> GuidsByStore + true -> rabbit_misc:orddict_cons(IsPersistent, MsgId, MsgIdsByStore); + false -> MsgIdsByStore end, cons_if(IndexOnDisk andalso not IsDelivered, SeqId, Delivers), cons_if(IndexOnDisk, SeqId, Acks)}. -sum_guids_by_store_to_len(LensByStore, GuidsByStore) -> +sum_msg_ids_by_store_to_len(LensByStore, MsgIdsByStore) -> orddict:fold( - fun (IsPersistent, Guids, LensByStore1) -> - orddict:update_counter(IsPersistent, length(Guids), LensByStore1) - end, LensByStore, GuidsByStore). + fun (IsPersistent, MsgIds, LensByStore1) -> + orddict:update_counter(IsPersistent, length(MsgIds), LensByStore1) + end, LensByStore, MsgIdsByStore). %%---------------------------------------------------------------------------- %% Internal gubbins for publishing %%---------------------------------------------------------------------------- -publish(Msg = #basic_message { is_persistent = IsPersistent, guid = Guid }, +publish(Msg = #basic_message { is_persistent = IsPersistent, id = MsgId }, MsgProps = #message_properties { needs_confirming = NeedsConfirming }, IsDelivered, MsgOnDisk, State = #vqstate { q1 = Q1, q3 = Q3, q4 = Q4, @@ -1257,7 +1115,7 @@ publish(Msg = #basic_message { is_persistent = IsPersistent, guid = Guid }, true -> State1 #vqstate { q4 = queue:in(m(MsgStatus1), Q4) } end, PCount1 = PCount + one_if(IsPersistent1), - UC1 = gb_sets_maybe_insert(NeedsConfirming, Guid, UC), + UC1 = gb_sets_maybe_insert(NeedsConfirming, MsgId, UC), {SeqId, State2 #vqstate { next_seq_id = SeqId + 1, len = Len + 1, in_counter = InCount + 1, @@ -1269,14 +1127,14 @@ maybe_write_msg_to_disk(_Force, MsgStatus = #msg_status { msg_on_disk = true }, _MSCState) -> MsgStatus; maybe_write_msg_to_disk(Force, MsgStatus = #msg_status { - msg = Msg, guid = Guid, + msg = Msg, msg_id = MsgId, is_persistent = IsPersistent }, MSCState) when Force orelse IsPersistent -> Msg1 = Msg #basic_message { %% don't persist any recoverable decoded properties content = rabbit_binary_parser:clear_decoded_content( Msg #basic_message.content)}, - ok = msg_store_write(MSCState, IsPersistent, Guid, Msg1), + ok = msg_store_write(MSCState, IsPersistent, MsgId, Msg1), MsgStatus #msg_status { msg_on_disk = true }; maybe_write_msg_to_disk(_Force, MsgStatus, _MSCState) -> MsgStatus. @@ -1286,7 +1144,7 @@ maybe_write_index_to_disk(_Force, MsgStatus = #msg_status { true = MsgStatus #msg_status.msg_on_disk, %% ASSERTION {MsgStatus, IndexState}; maybe_write_index_to_disk(Force, MsgStatus = #msg_status { - guid = Guid, + msg_id = MsgId, seq_id = SeqId, is_persistent = IsPersistent, is_delivered = IsDelivered, @@ -1294,7 +1152,7 @@ maybe_write_index_to_disk(Force, MsgStatus = #msg_status { when Force orelse IsPersistent -> true = MsgStatus #msg_status.msg_on_disk, %% ASSERTION IndexState1 = rabbit_queue_index:publish( - Guid, SeqId, MsgProps, IsPersistent, IndexState), + MsgId, SeqId, MsgProps, IsPersistent, IndexState), {MsgStatus #msg_status { index_on_disk = true }, maybe_write_delivered(IsDelivered, SeqId, IndexState1)}; maybe_write_index_to_disk(_Force, MsgStatus, IndexState) -> @@ -1313,7 +1171,7 @@ maybe_write_to_disk(ForceMsg, ForceIndex, MsgStatus, %%---------------------------------------------------------------------------- record_pending_ack(#msg_status { seq_id = SeqId, - guid = Guid, + msg_id = MsgId, is_persistent = IsPersistent, msg_on_disk = MsgOnDisk, msg_props = MsgProps } = MsgStatus, @@ -1322,8 +1180,8 @@ record_pending_ack(#msg_status { seq_id = SeqId, ack_in_counter = AckInCount}) -> {AckEntry, RAI1} = case MsgOnDisk of - true -> {{IsPersistent, Guid, MsgProps}, RAI}; - false -> {MsgStatus, gb_trees:insert(SeqId, Guid, RAI)} + true -> {{IsPersistent, MsgId, MsgProps}, RAI}; + false -> {MsgStatus, gb_trees:insert(SeqId, MsgId, RAI)} end, PA1 = dict:store(SeqId, AckEntry, PA), State #vqstate { pending_ack = PA1, @@ -1334,28 +1192,28 @@ remove_pending_ack(KeepPersistent, State = #vqstate { pending_ack = PA, index_state = IndexState, msg_store_clients = MSCState }) -> - {PersistentSeqIds, GuidsByStore} = + {PersistentSeqIds, MsgIdsByStore, _AllMsgIds} = dict:fold(fun accumulate_ack/3, accumulate_ack_init(), PA), State1 = State #vqstate { pending_ack = dict:new(), ram_ack_index = gb_trees:empty() }, case KeepPersistent of - true -> case orddict:find(false, GuidsByStore) of - error -> State1; - {ok, Guids} -> ok = msg_store_remove(MSCState, false, - Guids), + true -> case orddict:find(false, MsgIdsByStore) of + error -> State1; + {ok, MsgIds} -> ok = msg_store_remove(MSCState, false, + MsgIds), State1 end; false -> IndexState1 = rabbit_queue_index:ack(PersistentSeqIds, IndexState), - [ok = msg_store_remove(MSCState, IsPersistent, Guids) - || {IsPersistent, Guids} <- orddict:to_list(GuidsByStore)], + [ok = msg_store_remove(MSCState, IsPersistent, MsgIds) + || {IsPersistent, MsgIds} <- orddict:to_list(MsgIdsByStore)], State1 #vqstate { index_state = IndexState1 } end. ack(_MsgStoreFun, _Fun, [], State) -> - State; + {[], State}; ack(MsgStoreFun, Fun, AckTags, State) -> - {{PersistentSeqIds, GuidsByStore}, + {{PersistentSeqIds, MsgIdsByStore, AllMsgIds}, State1 = #vqstate { index_state = IndexState, msg_store_clients = MSCState, persistent_count = PCount, @@ -1371,25 +1229,28 @@ ack(MsgStoreFun, Fun, AckTags, State) -> gb_trees:delete_any(SeqId, RAI)})} end, {accumulate_ack_init(), State}, AckTags), IndexState1 = rabbit_queue_index:ack(PersistentSeqIds, IndexState), - [ok = MsgStoreFun(MSCState, IsPersistent, Guids) - || {IsPersistent, Guids} <- orddict:to_list(GuidsByStore)], - PCount1 = PCount - find_persistent_count(sum_guids_by_store_to_len( - orddict:new(), GuidsByStore)), - State1 #vqstate { index_state = IndexState1, - persistent_count = PCount1, - ack_out_counter = AckOutCount + length(AckTags) }. + [ok = MsgStoreFun(MSCState, IsPersistent, MsgIds) + || {IsPersistent, MsgIds} <- orddict:to_list(MsgIdsByStore)], + PCount1 = PCount - find_persistent_count(sum_msg_ids_by_store_to_len( + orddict:new(), MsgIdsByStore)), + {lists:reverse(AllMsgIds), + State1 #vqstate { index_state = IndexState1, + persistent_count = PCount1, + ack_out_counter = AckOutCount + length(AckTags) }}. -accumulate_ack_init() -> {[], orddict:new()}. +accumulate_ack_init() -> {[], orddict:new(), []}. accumulate_ack(_SeqId, #msg_status { is_persistent = false, %% ASSERTIONS msg_on_disk = false, - index_on_disk = false }, - {PersistentSeqIdsAcc, GuidsByStore}) -> - {PersistentSeqIdsAcc, GuidsByStore}; -accumulate_ack(SeqId, {IsPersistent, Guid, _MsgProps}, - {PersistentSeqIdsAcc, GuidsByStore}) -> + index_on_disk = false, + msg_id = MsgId }, + {PersistentSeqIdsAcc, MsgIdsByStore, AllMsgIds}) -> + {PersistentSeqIdsAcc, MsgIdsByStore, [MsgId | AllMsgIds]}; +accumulate_ack(SeqId, {IsPersistent, MsgId, _MsgProps}, + {PersistentSeqIdsAcc, MsgIdsByStore, AllMsgIds}) -> {cons_if(IsPersistent, SeqId, PersistentSeqIdsAcc), - rabbit_misc:orddict_cons(IsPersistent, Guid, GuidsByStore)}. + rabbit_misc:orddict_cons(IsPersistent, MsgId, MsgIdsByStore), + [MsgId | AllMsgIds]}. find_persistent_count(LensByStore) -> case orddict:find(true, LensByStore) of @@ -1408,12 +1269,14 @@ confirm_commit_index(State = #vqstate { index_state = IndexState }) -> false -> State end. -remove_confirms(GuidSet, State = #vqstate { msgs_on_disk = MOD, - msg_indices_on_disk = MIOD, - unconfirmed = UC }) -> - State #vqstate { msgs_on_disk = gb_sets:difference(MOD, GuidSet), - msg_indices_on_disk = gb_sets:difference(MIOD, GuidSet), - unconfirmed = gb_sets:difference(UC, GuidSet) }. +record_confirms(MsgIdSet, State = #vqstate { msgs_on_disk = MOD, + msg_indices_on_disk = MIOD, + unconfirmed = UC, + confirmed = C }) -> + State #vqstate { msgs_on_disk = gb_sets:difference(MOD, MsgIdSet), + msg_indices_on_disk = gb_sets:difference(MIOD, MsgIdSet), + unconfirmed = gb_sets:difference(UC, MsgIdSet), + confirmed = gb_sets:union (C, MsgIdSet) }. needs_index_sync(#vqstate { msg_indices_on_disk = MIOD, unconfirmed = UC }) -> @@ -1430,38 +1293,35 @@ needs_index_sync(#vqstate { msg_indices_on_disk = MIOD, %% subtraction. not (gb_sets:is_empty(UC) orelse gb_sets:is_subset(UC, MIOD)). -msgs_confirmed(GuidSet, State) -> - {gb_sets:to_list(GuidSet), remove_confirms(GuidSet, State)}. - -blind_confirm(QPid, GuidSet) -> - rabbit_amqqueue:maybe_run_queue_via_backing_queue_async( - QPid, fun (State) -> msgs_confirmed(GuidSet, State) end). - -msgs_written_to_disk(QPid, GuidSet, removed) -> - blind_confirm(QPid, GuidSet); -msgs_written_to_disk(QPid, GuidSet, written) -> - rabbit_amqqueue:maybe_run_queue_via_backing_queue_async( - QPid, fun (State = #vqstate { msgs_on_disk = MOD, - msg_indices_on_disk = MIOD, - unconfirmed = UC }) -> - msgs_confirmed(gb_sets:intersection(GuidSet, MIOD), - State #vqstate { - msgs_on_disk = - gb_sets:intersection( - gb_sets:union(MOD, GuidSet), UC) }) - end). - -msg_indices_written_to_disk(QPid, GuidSet) -> - rabbit_amqqueue:maybe_run_queue_via_backing_queue_async( - QPid, fun (State = #vqstate { msgs_on_disk = MOD, - msg_indices_on_disk = MIOD, - unconfirmed = UC }) -> - msgs_confirmed(gb_sets:intersection(GuidSet, MOD), - State #vqstate { - msg_indices_on_disk = - gb_sets:intersection( - gb_sets:union(MIOD, GuidSet), UC) }) - end). +blind_confirm(Callback, MsgIdSet) -> + Callback(?MODULE, + fun (?MODULE, State) -> record_confirms(MsgIdSet, State) end). + +msgs_written_to_disk(Callback, MsgIdSet, removed) -> + blind_confirm(Callback, MsgIdSet); +msgs_written_to_disk(Callback, MsgIdSet, written) -> + Callback(?MODULE, + fun (?MODULE, State = #vqstate { msgs_on_disk = MOD, + msg_indices_on_disk = MIOD, + unconfirmed = UC }) -> + Confirmed = gb_sets:intersection(UC, MsgIdSet), + record_confirms(gb_sets:intersection(MsgIdSet, MIOD), + State #vqstate { + msgs_on_disk = + gb_sets:union(MOD, Confirmed) }) + end). + +msg_indices_written_to_disk(Callback, MsgIdSet) -> + Callback(?MODULE, + fun (?MODULE, State = #vqstate { msgs_on_disk = MOD, + msg_indices_on_disk = MIOD, + unconfirmed = UC }) -> + Confirmed = gb_sets:intersection(UC, MsgIdSet), + record_confirms(gb_sets:intersection(MsgIdSet, MOD), + State #vqstate { + msg_indices_on_disk = + gb_sets:union(MIOD, Confirmed) }) + end). %%---------------------------------------------------------------------------- %% Phase changes @@ -1538,17 +1398,16 @@ limit_ram_acks(Quota, State = #vqstate { pending_ack = PA, true -> {Quota, State}; false -> - {SeqId, Guid, RAI1} = gb_trees:take_largest(RAI), + {SeqId, MsgId, RAI1} = gb_trees:take_largest(RAI), MsgStatus = #msg_status { - guid = Guid, %% ASSERTION + msg_id = MsgId, %% ASSERTION is_persistent = false, %% ASSERTION msg_props = MsgProps } = dict:fetch(SeqId, PA), {_, State1} = maybe_write_to_disk(true, false, MsgStatus, State), + PA1 = dict:store(SeqId, {false, MsgId, MsgProps}, PA), limit_ram_acks(Quota - 1, - State1 #vqstate { - pending_ack = - dict:store(SeqId, {false, Guid, MsgProps}, PA), - ram_ack_index = RAI1 }) + State1 #vqstate { pending_ack = PA1, + ram_ack_index = RAI1 }) end. @@ -1801,3 +1660,27 @@ push_betas_to_deltas(Generator, Limit, Q, Count, RamIndexCount, IndexState) -> push_betas_to_deltas( Generator, Limit, Qa, Count + 1, RamIndexCount1, IndexState1) end. + +%%---------------------------------------------------------------------------- +%% Upgrading +%%---------------------------------------------------------------------------- + +multiple_routing_keys() -> + transform_storage( + fun ({basic_message, ExchangeName, Routing_Key, Content, + MsgId, Persistent}) -> + {ok, {basic_message, ExchangeName, [Routing_Key], Content, + MsgId, Persistent}}; + (_) -> {error, corrupt_message} + end), + ok. + + +%% Assumes message store is not running +transform_storage(TransformFun) -> + transform_store(?PERSISTENT_MSG_STORE, TransformFun), + transform_store(?TRANSIENT_MSG_STORE, TransformFun). + +transform_store(Store, TransformFun) -> + rabbit_msg_store:force_recovery(rabbit_mnesia:dir(), Store), + rabbit_msg_store:transform_dir(rabbit_mnesia:dir(), Store, TransformFun). diff --git a/src/rabbit_version.erl b/src/rabbit_version.erl new file mode 100644 index 00000000..400abc10 --- /dev/null +++ b/src/rabbit_version.erl @@ -0,0 +1,172 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License +%% at http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and +%% limitations under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is VMware, Inc. +%% Copyright (c) 2007-2011 VMware, Inc. All rights reserved. +%% + +-module(rabbit_version). + +-export([recorded/0, matches/2, desired/0, desired_for_scope/1, + record_desired/0, record_desired_for_scope/1, + upgrades_required/1]). + +%% ------------------------------------------------------------------- +-ifdef(use_specs). + +-export_type([scope/0, step/0]). + +-type(scope() :: atom()). +-type(scope_version() :: [atom()]). +-type(step() :: {atom(), atom()}). + +-type(version() :: [atom()]). + +-spec(recorded/0 :: () -> rabbit_types:ok_or_error2(version(), any())). +-spec(matches/2 :: ([A], [A]) -> boolean()). +-spec(desired/0 :: () -> version()). +-spec(desired_for_scope/1 :: (scope()) -> scope_version()). +-spec(record_desired/0 :: () -> 'ok'). +-spec(record_desired_for_scope/1 :: + (scope()) -> rabbit_types:ok_or_error(any())). +-spec(upgrades_required/1 :: + (scope()) -> rabbit_types:ok_or_error2([step()], any())). + +-endif. +%% ------------------------------------------------------------------- + +-define(VERSION_FILENAME, "schema_version"). +-define(SCOPES, [mnesia, local]). + +%% ------------------------------------------------------------------- + +recorded() -> case rabbit_misc:read_term_file(schema_filename()) of + {ok, [V]} -> {ok, V}; + {error, _} = Err -> Err + end. + +record(V) -> ok = rabbit_misc:write_term_file(schema_filename(), [V]). + +recorded_for_scope(Scope) -> + case recorded() of + {error, _} = Err -> + Err; + {ok, Version} -> + {ok, case lists:keysearch(Scope, 1, categorise_by_scope(Version)) of + false -> []; + {value, {Scope, SV1}} -> SV1 + end} + end. + +record_for_scope(Scope, ScopeVersion) -> + case recorded() of + {error, _} = Err -> + Err; + {ok, Version} -> + Version1 = lists:keystore(Scope, 1, categorise_by_scope(Version), + {Scope, ScopeVersion}), + ok = record([Name || {_Scope, Names} <- Version1, Name <- Names]) + end. + +%% ------------------------------------------------------------------- + +matches(VerA, VerB) -> + lists:usort(VerA) =:= lists:usort(VerB). + +%% ------------------------------------------------------------------- + +desired() -> [Name || Scope <- ?SCOPES, Name <- desired_for_scope(Scope)]. + +desired_for_scope(Scope) -> with_upgrade_graph(fun heads/1, Scope). + +record_desired() -> record(desired()). + +record_desired_for_scope(Scope) -> + record_for_scope(Scope, desired_for_scope(Scope)). + +upgrades_required(Scope) -> + case recorded_for_scope(Scope) of + {error, enoent} -> + {error, version_not_available}; + {ok, CurrentHeads} -> + with_upgrade_graph( + fun (G) -> + case unknown_heads(CurrentHeads, G) of + [] -> {ok, upgrades_to_apply(CurrentHeads, G)}; + Unknown -> {error, {future_upgrades_found, Unknown}} + end + end, Scope) + end. + +%% ------------------------------------------------------------------- + +with_upgrade_graph(Fun, Scope) -> + case rabbit_misc:build_acyclic_graph( + fun (Module, Steps) -> vertices(Module, Steps, Scope) end, + fun (Module, Steps) -> edges(Module, Steps, Scope) end, + rabbit_misc:all_module_attributes(rabbit_upgrade)) of + {ok, G} -> try + Fun(G) + after + true = digraph:delete(G) + end; + {error, {vertex, duplicate, StepName}} -> + throw({error, {duplicate_upgrade_step, StepName}}); + {error, {edge, {bad_vertex, StepName}, _From, _To}} -> + throw({error, {dependency_on_unknown_upgrade_step, StepName}}); + {error, {edge, {bad_edge, StepNames}, _From, _To}} -> + throw({error, {cycle_in_upgrade_steps, StepNames}}) + end. + +vertices(Module, Steps, Scope0) -> + [{StepName, {Module, StepName}} || {StepName, Scope1, _Reqs} <- Steps, + Scope0 == Scope1]. + +edges(_Module, Steps, Scope0) -> + [{Require, StepName} || {StepName, Scope1, Requires} <- Steps, + Require <- Requires, + Scope0 == Scope1]. +unknown_heads(Heads, G) -> + [H || H <- Heads, digraph:vertex(G, H) =:= false]. + +upgrades_to_apply(Heads, G) -> + %% Take all the vertices which can reach the known heads. That's + %% everything we've already applied. Subtract that from all + %% vertices: that's what we have to apply. + Unsorted = sets:to_list( + sets:subtract( + sets:from_list(digraph:vertices(G)), + sets:from_list(digraph_utils:reaching(Heads, G)))), + %% Form a subgraph from that list and find a topological ordering + %% so we can invoke them in order. + [element(2, digraph:vertex(G, StepName)) || + StepName <- digraph_utils:topsort(digraph_utils:subgraph(G, Unsorted))]. + +heads(G) -> + lists:sort([V || V <- digraph:vertices(G), digraph:out_degree(G, V) =:= 0]). + +%% ------------------------------------------------------------------- + +categorise_by_scope(Version) when is_list(Version) -> + Categorised = + [{Scope, Name} || {_Module, Attributes} <- + rabbit_misc:all_module_attributes(rabbit_upgrade), + {Name, Scope, _Requires} <- Attributes, + lists:member(Name, Version)], + orddict:to_list( + lists:foldl(fun ({Scope, Name}, CatVersion) -> + rabbit_misc:orddict_cons(Scope, Name, CatVersion) + end, orddict:new(), Categorised)). + +dir() -> rabbit_mnesia:dir(). + +schema_filename() -> filename:join(dir(), ?VERSION_FILENAME). diff --git a/src/rabbit_vhost.erl b/src/rabbit_vhost.erl index efebef06..08d6c99a 100644 --- a/src/rabbit_vhost.erl +++ b/src/rabbit_vhost.erl @@ -21,6 +21,7 @@ %%---------------------------------------------------------------------------- -export([add/1, delete/1, exists/1, list/0, with/2]). +-export([info/1, info/2, info_all/0, info_all/1]). -ifdef(use_specs). @@ -30,10 +31,18 @@ -spec(list/0 :: () -> [rabbit_types:vhost()]). -spec(with/2 :: (rabbit_types:vhost(), rabbit_misc:thunk(A)) -> A). +-spec(info/1 :: (rabbit_types:vhost()) -> rabbit_types:infos()). +-spec(info/2 :: (rabbit_types:vhost(), rabbit_types:info_keys()) + -> rabbit_types:infos()). +-spec(info_all/0 :: () -> [rabbit_types:infos()]). +-spec(info_all/1 :: (rabbit_types:info_keys()) -> [rabbit_types:infos()]). + -endif. %%---------------------------------------------------------------------------- +-define(INFO_KEYS, [name, tracing]). + add(VHostPath) -> R = rabbit_misc:execute_mnesia_transaction( fun () -> @@ -48,15 +57,16 @@ add(VHostPath) -> ok; (ok, false) -> [rabbit_exchange:declare( - rabbit_misc:r(VHostPath, exchange, Name), - Type, true, false, false, []) || - {Name,Type} <- - [{<<"">>, direct}, - {<<"amq.direct">>, direct}, - {<<"amq.topic">>, topic}, - {<<"amq.match">>, headers}, %% per 0-9-1 pdf - {<<"amq.headers">>, headers}, %% per 0-9-1 xml - {<<"amq.fanout">>, fanout}]], + rabbit_misc:r(VHostPath, exchange, Name), + Type, true, false, false, []) || + {Name,Type} <- + [{<<"">>, direct}, + {<<"amq.direct">>, direct}, + {<<"amq.topic">>, topic}, + {<<"amq.match">>, headers}, %% per 0-9-1 pdf + {<<"amq.headers">>, headers}, %% per 0-9-1 xml + {<<"amq.fanout">>, fanout}, + {<<"amq.rabbitmq.trace">>, topic}]], ok end), rabbit_log:info("Added vhost ~p~n", [VHostPath]), @@ -81,9 +91,9 @@ delete(VHostPath) -> internal_delete(VHostPath) -> lists:foreach( - fun ({Username, _, _, _}) -> - ok = rabbit_auth_backend_internal:clear_permissions(Username, - VHostPath) + fun (Info) -> + ok = rabbit_auth_backend_internal:clear_permissions( + proplists:get_value(user, Info), VHostPath) end, rabbit_auth_backend_internal:list_vhost_permissions(VHostPath)), ok = mnesia:delete({rabbit_vhost, VHostPath}), @@ -104,3 +114,17 @@ with(VHostPath, Thunk) -> Thunk() end end. + +%%---------------------------------------------------------------------------- + +infos(Items, X) -> [{Item, i(Item, X)} || Item <- Items]. + +i(name, VHost) -> VHost; +i(tracing, VHost) -> rabbit_trace:tracing(VHost); +i(Item, _) -> throw({bad_argument, Item}). + +info(VHost) -> infos(?INFO_KEYS, VHost). +info(VHost, Items) -> infos(Items, VHost). + +info_all() -> info_all(?INFO_KEYS). +info_all(Items) -> [info(VHost, Items) || VHost <- list()]. diff --git a/src/rabbit_writer.erl b/src/rabbit_writer.erl index eba86a55..ac3434d2 100644 --- a/src/rabbit_writer.erl +++ b/src/rabbit_writer.erl @@ -28,7 +28,7 @@ -define(HIBERNATE_AFTER, 5000). -%%---------------------------------------------------------------------------- +%%--------------------------------------------------------------------------- -ifdef(use_specs). @@ -69,7 +69,7 @@ -endif. -%%---------------------------------------------------------------------------- +%%--------------------------------------------------------------------------- start(Sock, Channel, FrameMax, Protocol, ReaderPid) -> {ok, @@ -133,7 +133,7 @@ handle_message({inet_reply, _, Status}, _State) -> handle_message(Message, _State) -> exit({writer, message_not_understood, Message}). -%--------------------------------------------------------------------------- +%%--------------------------------------------------------------------------- send_command(W, MethodRecord) -> W ! {send_command, MethodRecord}, @@ -157,13 +157,13 @@ send_command_and_notify(W, Q, ChPid, MethodRecord, Content) -> W ! {send_command_and_notify, Q, ChPid, MethodRecord, Content}, ok. -%--------------------------------------------------------------------------- +%%--------------------------------------------------------------------------- call(Pid, Msg) -> {ok, Res} = gen:call(Pid, '$gen_call', Msg, infinity), Res. -%--------------------------------------------------------------------------- +%%--------------------------------------------------------------------------- assemble_frame(Channel, MethodRecord, Protocol) -> ?LOGMESSAGE(out, Channel, MethodRecord, none), diff --git a/src/supervisor2.erl b/src/supervisor2.erl index d1537f26..405949ef 100644 --- a/src/supervisor2.erl +++ b/src/supervisor2.erl @@ -38,6 +38,9 @@ %% child is a supervisor and it exits normally (i.e. with reason of %% 'shutdown') then the child's parent also exits normally. %% +%% 5) normal, and {shutdown, _} exit reasons are all treated the same +%% (i.e. are regarded as normal exits) +%% %% All modifications are (C) 2010-2011 VMware, Inc. %% %% %CopyrightBegin% @@ -539,17 +542,12 @@ do_restart({RestartType, Delay}, Reason, Child, State) -> do_restart(permanent, Reason, Child, State) -> report_error(child_terminated, Reason, Child, State#state.name), restart(Child, State); -do_restart(intrinsic, normal, Child, State) -> - {shutdown, state_del_child(Child, State)}; -do_restart(intrinsic, shutdown, Child = #child{child_type = supervisor}, - State) -> - {shutdown, state_del_child(Child, State)}; -do_restart(_, normal, Child, State) -> - NState = state_del_child(Child, State), - {ok, NState}; -do_restart(_, shutdown, Child, State) -> - NState = state_del_child(Child, State), - {ok, NState}; +do_restart(Type, normal, Child, State) -> + del_child_and_maybe_shutdown(Type, Child, State); +do_restart(Type, {shutdown, _}, Child, State) -> + del_child_and_maybe_shutdown(Type, Child, State); +do_restart(Type, shutdown, Child = #child{child_type = supervisor}, State) -> + del_child_and_maybe_shutdown(Type, Child, State); do_restart(Type, Reason, Child, State) when Type =:= transient orelse Type =:= intrinsic -> report_error(child_terminated, Reason, Child, State#state.name), @@ -559,6 +557,11 @@ do_restart(temporary, Reason, Child, State) -> NState = state_del_child(Child, State), {ok, NState}. +del_child_and_maybe_shutdown(intrinsic, Child, State) -> + {shutdown, state_del_child(Child, State)}; +del_child_and_maybe_shutdown(_, Child, State) -> + {ok, state_del_child(Child, State)}. + restart(Child, State) -> case add_restart(State) of {ok, NState} -> diff --git a/src/test_sup.erl b/src/test_sup.erl index b4df1fd0..84c4121c 100644 --- a/src/test_sup.erl +++ b/src/test_sup.erl @@ -33,10 +33,10 @@ test_supervisor_delayed_restart() -> test_supervisor_delayed_restart(SupPid) -> ok = ping_child(SupPid), ok = exit_child(SupPid), - timer:sleep(10), + timer:sleep(100), ok = ping_child(SupPid), ok = exit_child(SupPid), - timer:sleep(10), + timer:sleep(100), timeout = ping_child(SupPid), timer:sleep(1010), ok = ping_child(SupPid), @@ -45,8 +45,8 @@ test_supervisor_delayed_restart(SupPid) -> with_sup(RestartStrategy, Fun) -> {ok, SupPid} = supervisor2:start_link(?MODULE, [RestartStrategy]), Res = Fun(SupPid), + unlink(SupPid), exit(SupPid, shutdown), - rabbit_misc:unlink_and_capture_exit(SupPid), Res. init([RestartStrategy]) -> diff --git a/src/vm_memory_monitor.erl b/src/vm_memory_monitor.erl index 44e1e4b5..fb2fa267 100644 --- a/src/vm_memory_monitor.erl +++ b/src/vm_memory_monitor.erl @@ -175,10 +175,10 @@ internal_update(State = #state { memory_limit = MemLimit, case {Alarmed, NewAlarmed} of {false, true} -> emit_update_info(set, MemUsed, MemLimit), - alarm_handler:set_alarm({vm_memory_high_watermark, []}); + alarm_handler:set_alarm({{vm_memory_high_watermark, node()}, []}); {true, false} -> emit_update_info(clear, MemUsed, MemLimit), - alarm_handler:clear_alarm(vm_memory_high_watermark); + alarm_handler:clear_alarm({vm_memory_high_watermark, node()}); _ -> ok end, @@ -239,10 +239,13 @@ get_total_memory({unix,darwin}) -> PageSize * (Inactive + Active + Free + Wired); get_total_memory({unix,freebsd}) -> - PageSize = freebsd_sysctl("vm.stats.vm.v_page_size"), - PageCount = freebsd_sysctl("vm.stats.vm.v_page_count"), + PageSize = sysctl("vm.stats.vm.v_page_size"), + PageCount = sysctl("vm.stats.vm.v_page_count"), PageCount * PageSize; +get_total_memory({unix,openbsd}) -> + sysctl("hw.usermem"); + get_total_memory({win32,_OSname}) -> %% Due to the Erlang print format bug, on Windows boxes the memory %% size is broken. For example Windows 7 64 bit with 4Gigs of RAM @@ -342,7 +345,7 @@ parse_line_aix(Line) -> false -> list_to_integer(Value) end}. -freebsd_sysctl(Def) -> +sysctl(Def) -> list_to_integer(cmd("/sbin/sysctl -n " ++ Def) -- "\n"). %% file:read_file does not work on files in /proc as it seems to get |