0003259: Wrap raw opus in some kind of container or protocol

ID	Project	Category	View Status	Date Submitted	Last Update

0003259	GNUnet	conversation service	public	2014-01-08 17:21	2014-04-08 16:42

Reporter	LRN	Assigned To	LRN
Priority	normal	Severity	tweak	Reproducibility	N/A
Status	closed	Resolution	fixed
Product Version	0.10.0
Target Version	0.10.1	Fixed in Version	0.10.1

Summary	0003259: Wrap raw opus in some kind of container or protocol
Description	Right now conversation transfers raw opus frames across the network. Raw frames lack any metadata, thus correct metadata must be hardcoded on both nodes that are communicating. Or additional conversation message should be defined, and used to transfer metadata (frame rate, channel count - at least) before any frames come through. OR we could just wrap opus in a container. Available containers are ogg, matroska, rtp.
Tags	No tags attached.
Attached Files	0001-Implement-ogg-wrapping-for-the-recorder-v1.patch (8,684 bytes) From 78f804db1c43f4195613b8dc176c0c472dc5d8c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=A0=D1=83=D1=81=D0=BB=D0=B0=D0=BD=20=D0=98=D0=B6=D0=B1?= =?UTF-8?q?=D1=83=D0=BB=D0=B0=D1=82=D0=BE=D0=B2?= <lrn1986@gmail.com> Date: Mon, 13 Jan 2014 21:09:46 +0000 Subject: [PATCH] Implement ogg wrapping for the recorder --- src/conversation/gnunet-helper-audio-record.c \| 268 +++++++++++++++++++++++--- 1 file changed, 245 insertions(+), 23 deletions(-) diff --git a/src/conversation/gnunet-helper-audio-record.c b/src/conversation/gnunet-helper-audio-record.c index 2081259..103881b 100644 --- a/src/conversation/gnunet-helper-audio-record.c +++ b/src/conversation/gnunet-helper-audio-record.c @@ -38,6 +38,7 @@ #include <pulse/pulseaudio.h> #include <opus/opus.h> #include <opus/opus_types.h> +#include <ogg/ogg.h> #define SAMPLING_RATE 48000 @@ -51,6 +52,20 @@ static pa_sample_spec sample_spec = { .channels = 1 }; +typedef struct +{ + int version; + int channels; /* Number of channels: 1..255 / + int preskip; + uint32_t input_sample_rate; + int gain; / in dB S7.8 should be zero whenever possible / + int channel_mapping; + / The rest is only used if channel_mapping != 0 / + int nb_streams; + int nb_coupled; + unsigned char stream_map[255]; +} OpusHeader; + /* * Pulseaudio mainloop api / @@ -126,6 +141,20 @@ static size_t transmit_buffer_index; / static struct AudioMessage audio_message; +/* + * Ogg muxer state + / +static ogg_stream_state os; + +/* + * Ogg packet id + / +static int32_t packet_id; + +/* + * Ogg granule for current packet + / +static int64_t enc_granulepos; /* * Pulseaudio shutdown task @@ -138,20 +167,54 @@ quit (int ret) } +static void +write_data (const char ptr, size_t msg_size) +{ + ssize_t ret; + size_t off; + off = 0; + while (off < msg_size) + { + ret = write (1, &ptr[off], msg_size - off); + if (0 >= ret) + { + if (-1 == ret) + GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "write"); + quit (2); + } + off += ret; + } +} + +static void +write_page (ogg_page og) +{ + static unsigned long long toff; + size_t msg_size; + msg_size = sizeof (struct AudioMessage) + og->header_len + og->body_len; + audio_message->header.size = htons ((uint16_t) msg_size); + memcpy (&audio_message[1], og->header, og->header_len); + memcpy (((char ) &audio_message[1]) + og->header_len, og->body, og->body_len); + + toff += msg_size; + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, + "Sending %u bytes of audio data (total: %llu)\n", + (unsigned int) msg_size, + toff); + write_data ((const char ) audio_message, msg_size); +} + /** * Creates OPUS packets from PCM data / static void packetizer () { - static unsigned long long toff; char nbuf; size_t new_size; - const char ptr; - size_t off; - ssize_t ret; - int len; // FIXME: int? - size_t msg_size; + int32_t len; + ogg_packet op; + ogg_page og; while (transmit_buffer_length >= transmit_buffer_index + pcm_length) { @@ -163,34 +226,51 @@ packetizer () opus_encode_float (enc, pcm_buffer, frame_size, opus_data, max_payload_bytes); + if (len < 0) + { + GNUNET_log (GNUNET_ERROR_TYPE_ERROR, + _("opus_encode_float() failed: %s. Aborting\n"), + opus_strerror (len)); + quit (5); + } if (len > UINT16_MAX - sizeof (struct AudioMessage)) { GNUNET_break (0); continue; } + / As per OggOpus spec, granule is calculated as if the audio + had 48kHz sampling rate. / + enc_granulepos += frame_size 48000 / SAMPLING_RATE; - msg_size = sizeof (struct AudioMessage) + len; - audio_message->header.size = htons ((uint16_t) msg_size); - memcpy (&audio_message[1], opus_data, len); + /* FIXME: replace 255 * 255 with something else - this is not a + HARD limit, but a SOFT limit - page is committed when it goes + OVER that length in bytes. / + while (ogg_stream_flush_fill (&os, &og, 255 255)) + { + if (og.header_len + og.body_len > UINT16_MAX - sizeof (struct AudioMessage)) + { + GNUNET_assert (0); + continue; + } + write_page (&og); + } - toff += msg_size; - GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, - "Sending %u bytes of audio data (total: %llu)\n", - (unsigned int) msg_size, - toff); - ptr = (const char ) audio_message; - off = 0; - while (off < msg_size) + op.packet = (unsigned char ) opus_data; + op.bytes = len; + op.b_o_s = 0; + op.granulepos = enc_granulepos; + op.packetno = packet_id++; + ogg_stream_packetin (&os, &op); + + while (ogg_stream_flush_fill (&os, &og, 255 * 255)) { - ret = write (1, &ptr[off], msg_size - off); - if (0 >= ret) + if (og.header_len + og.body_len > UINT16_MAX - sizeof (struct AudioMessage)) { - if (-1 == ret) - GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "write"); - quit (2); + GNUNET_assert (0); + continue; } - off += ret; + write_page (&og); } } @@ -481,6 +561,147 @@ opus_init () OPUS_SET_SIGNAL (OPUS_SIGNAL_VOICE)); } +static int +write_uint32 (unsigned char data, int maxlen, int pos, ogg_uint32_t val) +{ + if (pos > maxlen - 4) + return 0; + data[pos + 0] = (val >> 0) & 0xFF; + data[pos + 1] = (val >> 8) & 0xFF; + data[pos + 2] = (val >> 16) & 0xFF; + data[pos + 3] = (val >> 24) & 0xFF; + pos += 4; + return 1; +} + +static int +write_uint16 (unsigned char data, int maxlen, int pos, ogg_uint16_t val) +{ + if (pos > maxlen - 2) + return 0; + data[pos + 0] = (val >> 0) & 0xFF; + data[pos + 1] = (val >> 8) & 0xFF; + pos += 2; + return 1; +} + +static int +write_chars (unsigned char data, int maxlen, int pos, const unsigned char str, int nb_chars) +{ + int i; + if (pos > maxlen - nb_chars) + return 0; + for (i = 0; i < nb_chars; i++) + data[(pos)++] = str[i]; + return 1; +} + +static int +opus_header_to_packet (const OpusHeader h, unsigned char packet, int len) +{ + int i; + int pos; + unsigned char ch; + + pos = 0; + if (len < 19) + return 0; + if (!write_chars (packet, len, &pos, (const unsigned char)"OpusHead", 8)) + return 0; + /* Version is 1 / + ch = 1; + if (!write_chars (packet, len, &pos, &ch, 1)) + return 0; + + ch = h->channels; + if (!write_chars (packet, len, &pos, &ch, 1)) + return 0; + + if (!write_uint16 (packet, len, &pos, h->preskip)) + return 0; + + if (!write_uint32 (packet, len, &pos, h->input_sample_rate)) + return 0; + + if (!write_uint16 (packet, len, &pos, h->gain)) + return 0; + + ch = h->channel_mapping; + if (!write_chars (packet, len, &pos, &ch, 1)) + return 0; + + if (h->channel_mapping != 0) + { + ch = h->nb_streams; + if (!write_chars (packet, len, &pos, &ch, 1)) + return 0; + + ch = h->nb_coupled; + if (!write_chars (packet, len, &pos, &ch, 1)) + return 0; + + / Multi-stream support / + for (i = 0; i < h->channels; i++) + { + if (!write_chars (packet, len, &pos, &h->stream_map[i], 1)) + return 0; + } + } + + return pos; +} + + +static void +ogg_init () +{ + int serialno; + OpusHeader header; + int channels = 1; + + serialno = GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_STRONG, 0xFFFFFFFF); + + / OggOpus headers / + header.channels = channels; + header.channel_mapping = 255; + if (header.channels <= 8) + header.channel_mapping = header.channels > 2; + header.input_sample_rate = SAMPLING_RATE; + header.gain = 0; + header.preskip = 0; + + /Initialize Ogg stream struct/ + if (-1 == ogg_stream_init (&os, serialno)) + { + GNUNET_log (GNUNET_ERROR_TYPE_ERROR, + _("ogg_stream_init() failed.\n")); + exit (3); + } + + packet_id = 0; + + /Write header/ + { + ogg_packet op; + ogg_page og; + unsigned char header_data[100]; + int packet_size = opus_header_to_packet (&header, header_data, 100); + op.packet = header_data; + op.bytes = packet_size; + op.b_o_s = 1; + op.e_o_s = 0; + op.granulepos = 0; + op.packetno = packet_id++; + ogg_stream_packetin (&os, &op); + + while (ogg_stream_flush (&os, &og)) + { + write_page (&og); + } + } + +} + /* * The main function for the record helper. @@ -500,6 +721,7 @@ main (int argc, char argv[]) "Audio source starts\n"); audio_message = GNUNET_malloc (UINT16_MAX); audio_message->header.type = htons (GNUNET_MESSAGE_TYPE_CONVERSATION_AUDIO); + ogg_init (); opus_init (); pa_init (); return 0; -- 1.8.4 0001-Implement-ogg-wrapping-for-the-recorder-v1.patch (8,684 bytes) 0001-Wrap-opus-in-ogg-container.patch (47,772 bytes) From 0a969593384e42214567d1283f7a6ec00bf28bbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=A0=D1=83=D1=81=D0=BB=D0=B0=D0=BD=20=D0=98=D0=B6=D0=B1?= =?UTF-8?q?=D1=83=D0=BB=D0=B0=D1=82=D0=BE=D0=B2?= <lrn1986@gmail.com> Date: Mon, 13 Jan 2014 21:09:46 +0000 Subject: [PATCH] Wrap opus in ogg container --- src/conversation/Makefile.am \| 5 +- .../gnunet-helper-audio-playback-gst.c \| 198 ++++---- src/conversation/gnunet-helper-audio-playback.c \| 539 +++++++++++++++++++-- src/conversation/gnunet-helper-audio-record-gst.c \| 75 ++- src/conversation/gnunet-helper-audio-record.c \| 362 ++++++++++++-- 5 files changed, 960 insertions(+), 219 deletions(-) diff --git a/src/conversation/Makefile.am b/src/conversation/Makefile.am index a11763c..37cf5c8 100644 --- a/src/conversation/Makefile.am +++ b/src/conversation/Makefile.am @@ -101,7 +101,7 @@ gnunet_helper_audio_record_SOURCES = \ gnunet-helper-audio-record.c gnunet_helper_audio_record_LDADD = \ $(top_builddir)/src/util/libgnunetutil.la \ - -lpulse -lopus\ + -lpulse -lopus -logg \ $(INTLLIBS) gnunet_helper_audio_record_LDFLAGS = \ $(GNUNET_LDFLAGS) $(WINFLAGS) @@ -110,7 +110,7 @@ gnunet_helper_audio_playback_SOURCES = \ gnunet-helper-audio-playback.c gnunet_helper_audio_playback_LDADD = \ $(top_builddir)/src/util/libgnunetutil.la \ - -lpulse -lopus\ + -lpulse -lopus -logg \ $(INTLLIBS) gnunet_helper_audio_playback_LDFLAGS = \ $(GNUNET_LDFLAGS) $(WINFLAGS) @@ -131,7 +131,6 @@ gnunet_helper_audio_playback_SOURCES = \ gnunet-helper-audio-playback-gst.c gnunet_helper_audio_playback_LDADD = \ $(top_builddir)/src/util/libgnunetutil.la \ - -lopus \ $(GST_LIBS) \ $(INTLLIBS) gnunet_helper_audio_playback_LDFLAGS = \ diff --git a/src/conversation/gnunet-helper-audio-playback-gst.c b/src/conversation/gnunet-helper-audio-playback-gst.c index d6d2316..612898d 100755 --- a/src/conversation/gnunet-helper-audio-playback-gst.c +++ b/src/conversation/gnunet-helper-audio-playback-gst.c @@ -30,26 +30,17 @@ #include "gnunet_core_service.h" #include <gst/gst.h> -#include <gst/app/gstappsrc.h> #include <gst/audio/gstaudiobasesrc.h> +#include <gst/app/gstappsrc.h> #include <glib.h> -#include <opus/opus.h> -#include <opus/opus_types.h> +#define DEBUG_READ_PURE_OGG 1 /* * How much data to read in one go / #define MAXLINE 4096 -#define SAMPLING_RATE 48000 - -#define CHANNELS 1 - -#define FRAME_SIZE (SAMPLING_RATE / 50) - -#define PCM_LENGTH (FRAME_SIZE CHANNELS * sizeof (int16_t)) - /** * Max number of microseconds to buffer in audiosink. * Default is 200000 @@ -77,31 +68,18 @@ static GstElement pipeline; / static GstElement source; -/* - * OPUS decoder - / -static OpusDecoder dec; - +static GstElement demuxer; +static GstElement decoder; +static GstElement conv; +static GstElement resampler; +static GstElement sink; /* * Set to 1 to break the reading loop / static int abort_read; - -/* - * OPUS initialization - / static void -opus_init () -{ - int err; - int channels = 1; - - dec = opus_decoder_create (SAMPLING_RATE, channels, &err); -} - -void sink_child_added (GstChildProxy child_proxy, GObject object, gchar name, gpointer user_data) { if (GST_IS_AUDIO_BASE_SRC (object)) @@ -109,6 +87,22 @@ sink_child_added (GstChildProxy child_proxy, GObject object, gchar name, gpoi } static void +ogg_pad_added (GstElement element, GstPad pad, gpointer data) +{ + GstPad sinkpad; + GstElement decoder = (GstElement ) data; + + /* We can now link this pad with the opus-decoder sink pad / + sinkpad = gst_element_get_static_pad (decoder, "sink"); + + gst_pad_link (pad, sinkpad); + + gst_element_link_many (decoder, conv, resampler, sink, NULL); + + gst_object_unref (sinkpad); +} + +static void quit () { if (NULL != source) @@ -157,6 +151,50 @@ signalhandler (int s) quit (); } +static int +feed_buffer_to_gst (const char audio, size_t b_len) +{ + GstBuffer b; + gchar bufspace; + GstFlowReturn flow; + + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, + "Feeding %u bytes to GStreamer\n", + (unsigned int) b_len); + + bufspace = g_memdup (audio, b_len); + b = gst_buffer_new_wrapped (bufspace, b_len); + if (NULL == b) + { + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Failed to wrap a buffer\n"); + g_free (bufspace); + return GNUNET_SYSERR; + } + flow = gst_app_src_push_buffer (GST_APP_SRC (source), b); + /* They all return GNUNET_OK, because currently player stops when + * data stops coming. This might need to be changed for the player + * to also stop when pipeline breaks. + / + switch (flow) + { + case GST_FLOW_OK: + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Fed %u bytes to the pipeline\n", + (unsigned int) b_len); + break; + case GST_FLOW_FLUSHING: + / buffer was dropped, because pipeline state is not PAUSED or PLAYING / + GNUNET_log (GNUNET_ERROR_TYPE_INFO, "Dropped a buffer\n"); + break; + case GST_FLOW_EOS: + / end of stream / + GNUNET_log (GNUNET_ERROR_TYPE_INFO, "EOS\n"); + break; + default: + GNUNET_log (GNUNET_ERROR_TYPE_WARNING, "Unexpected push result\n"); + break; + } + return GNUNET_OK; +} /* * Message callback @@ -167,68 +205,15 @@ stdin_receiver (void cls, const struct GNUNET_MessageHeader msg) { struct AudioMessage audio; - GstBuffer b; - int16_t bufspace; - GstFlowReturn flow; - int ret; + size_t b_len; switch (ntohs (msg->type)) { case GNUNET_MESSAGE_TYPE_CONVERSATION_AUDIO: audio = (struct AudioMessage ) msg; - bufspace = (int16_t ) g_malloc (PCM_LENGTH); - - ret = opus_decode (dec, - (const unsigned char ) &audio[1], - ntohs (audio->header.size) - sizeof (struct AudioMessage), - bufspace, - FRAME_SIZE, 0); - if (ret < 0) - { - GNUNET_log (GNUNET_ERROR_TYPE_ERROR, - "Opus decoding failed: %d\n", - ret); - g_free (bufspace); - return GNUNET_OK; - } - GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, - "Decoded frame with %u bytes\n", - ntohs (audio->header.size)); - - b = gst_buffer_new_wrapped (bufspace, ret * sizeof (int16_t)); - if (NULL == b) - { - GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Failed to wrap a buffer\n"); - g_free (bufspace); - return GNUNET_SYSERR; - } - - GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "pushing...\n"); - flow = gst_app_src_push_buffer (GST_APP_SRC (source), b); - GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "pushed!\n"); - /* They all return GNUNET_OK, because currently player stops when - * data stops coming. This might need to be changed for the player - * to also stop when pipeline breaks. - / - switch (flow) - { - case GST_FLOW_OK: - GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Fed %u bytes to the pipeline\n", - (unsigned int) ret sizeof (int16_t)); - break; - case GST_FLOW_FLUSHING: - /* buffer was dropped, because pipeline state is not PAUSED or PLAYING / - GNUNET_log (GNUNET_ERROR_TYPE_INFO, "Dropped a buffer\n"); - break; - case GST_FLOW_EOS: - / end of stream / - GNUNET_log (GNUNET_ERROR_TYPE_INFO, "EOS\n"); - break; - default: - GNUNET_log (GNUNET_ERROR_TYPE_WARNING, "Unexpected push result\n"); - break; - } + b_len = ntohs (audio->header.size) - sizeof (struct AudioMessage); + feed_buffer_to_gst ((const char ) &audio[1], b_len); break; default: break; @@ -240,15 +225,16 @@ stdin_receiver (void cls, int main (int argc, char argv) { - GstElement conv, resampler, sink; GstBus bus; - GstCaps caps; guint bus_watch_id; uint64_t toff; typedef void (SignalHandlerPointer) (int); SignalHandlerPointer inthandler, termhandler; +#ifdef DEBUG_READ_PURE_OGG + int read_pure_ogg = getenv ("GNUNET_READ_PURE_OGG") ? 1 : 0; +#endif inthandler = signal (SIGINT, signalhandler); termhandler = signal (SIGTERM, signalhandler); @@ -257,8 +243,6 @@ main (int argc, char argv) setmode (0, _O_BINARY); #endif - opus_init (); - / Initialisation / gst_init (&argc, &argv); @@ -275,11 +259,13 @@ main (int argc, char argv) / Create gstreamer elements / pipeline = gst_pipeline_new ("audio-player"); source = gst_element_factory_make ("appsrc", "audio-input"); + demuxer = gst_element_factory_make ("oggdemux", "ogg-demuxer"); + decoder = gst_element_factory_make ("opusdec", "opus-decoder"); conv = gst_element_factory_make ("audioconvert", "converter"); resampler= gst_element_factory_make ("audioresample", "resampler"); sink = gst_element_factory_make ("autoaudiosink", "audiosink"); - if (!pipeline \|\| !source \|\| !conv \|\| !resampler \|\| !sink) + if (!pipeline \|\| !source \|\| !conv \|\| !resampler \|\| !decoder \|\| !demuxer \|\| !sink) { GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "One element could not be created. Exiting.\n"); @@ -287,15 +273,7 @@ main (int argc, char argv) } g_signal_connect (sink, "child-added", G_CALLBACK (sink_child_added), NULL); - - caps = gst_caps_new_simple ("audio/x-raw", - "format", G_TYPE_STRING, "S16LE", - "rate", G_TYPE_INT, SAMPLING_RATE, - "channels", G_TYPE_INT, CHANNELS, - "layout", G_TYPE_STRING, "interleaved", - NULL); - gst_app_src_set_caps (GST_APP_SRC (source), caps); - gst_caps_unref (caps); + g_signal_connect (demuxer, "pad-added", G_CALLBACK (ogg_pad_added), decoder); / Keep a reference to it, we operate on it / gst_object_ref (GST_OBJECT (source)); @@ -304,23 +282,29 @@ main (int argc, char argv) / we feed appsrc as fast as possible, it just blocks when it's full / g_object_set (G_OBJECT (source), - "format", GST_FORMAT_TIME, +/ "format", GST_FORMAT_TIME,/ "block", TRUE, "is-live", TRUE, NULL); + g_object_set (G_OBJECT (decoder), +/ "plc", FALSE,/ +/ "apply-gain", TRUE,/ + "use-inband-fec", TRUE, + NULL); + / we add a message handler / bus = gst_pipeline_get_bus (GST_PIPELINE (pipeline)); bus_watch_id = gst_bus_add_watch (bus, bus_call, pipeline); gst_object_unref (bus); / we add all elements into the pipeline / - / audio-input \| converter \| resampler \| audiosink / - gst_bin_add_many (GST_BIN (pipeline), source, conv, + / audio-input \| ogg-demuxer \| opus-decoder \| converter \| resampler \| audiosink / + gst_bin_add_many (GST_BIN (pipeline), source, demuxer, decoder, conv, resampler, sink, NULL); / we link the elements together / - gst_element_link_many (source, conv, resampler, sink, NULL); + gst_element_link_many (source, demuxer, NULL); / Set the pipeline to "playing" state/ GNUNET_log (GNUNET_ERROR_TYPE_INFO, "Now playing\n"); @@ -349,6 +333,13 @@ main (int argc, char argv) toff); if (0 == ret) break; +#ifdef DEBUG_READ_PURE_OGG + if (read_pure_ogg) + { + feed_buffer_to_gst (readbuf, ret); + } + else +#endif GNUNET_SERVER_mst_receive (stdin_mst, NULL, readbuf, ret, GNUNET_NO, GNUNET_NO); @@ -370,3 +361,4 @@ main (int argc, char argv) return 0; } + diff --git a/src/conversation/gnunet-helper-audio-playback.c b/src/conversation/gnunet-helper-audio-playback.c index cb08315..9c8b871 100644 --- a/src/conversation/gnunet-helper-audio-playback.c +++ b/src/conversation/gnunet-helper-audio-playback.c @@ -38,20 +38,32 @@ #include <pulse/pulseaudio.h> #include <opus/opus.h> #include <opus/opus_types.h> +#include <ogg/ogg.h> + +#define DEBUG_READ_PURE_OGG 1 +#define DEBUG_DUMP_DECODED_OGG 1 #define MAXLINE 4096 #define SAMPLING_RATE 48000 +#define CHANNELS 1 + +/ 120ms at 48000 / +#define MAX_FRAME_SIZE (960 6) + /** * Pulseaudio specification. May change in the future. / static pa_sample_spec sample_spec = { .format = PA_SAMPLE_FLOAT32LE, .rate = SAMPLING_RATE, - .channels = 1 + .channels = CHANNELS }; +#ifdef DEBUG_DUMP_DECODED_OGG +static int dump_to_stdout; +#endif /* * Pulseaudio mainloop api @@ -84,11 +96,6 @@ static OpusDecoder dec; static float pcm_buffer; /** - * Length of PCM buffer - / -static int pcm_length; - -/* * Number of samples for one frame / static int frame_size; @@ -99,50 +106,214 @@ static int frame_size; static int ready_pipe[2]; /* - * Message callback + * Ogg I/O state. / -static int -stdin_receiver (void cls, - void client, - const struct GNUNET_MessageHeader msg) +static ogg_sync_state oy; + +/** + * Ogg stream state. + / +static ogg_stream_state os; + +static int channels; + +static int preskip; + +static float gain; + +GNUNET_NETWORK_STRUCT_BEGIN + +/ OggOpus spec says the numbers must be in little-endian order / +struct OpusHeadPacket { - struct AudioMessage audio; - int ret; + uint8_t magic[8]; + uint8_t version; + uint8_t channels; + uint16_t preskip GNUNET_PACKED; + uint32_t sampling_rate GNUNET_PACKED; + uint16_t gain GNUNET_PACKED; + uint8_t channel_mapping; +}; - switch (ntohs (msg->type)) +GNUNET_NETWORK_STRUCT_END + +/Process an Opus header and setup the opus decoder based on it. + It takes several pointers for header values which are needed + elsewhere in the code./ +static OpusDecoder * +process_header (ogg_packet op) +{ + int err; + OpusDecoder dec; + struct OpusHeadPacket header; + + if (op->bytes < sizeof (header)) + return NULL; + memcpy (&header, op->packet, sizeof (header)); + header.preskip = GNUNET_le16toh (header.preskip); + header.sampling_rate = GNUNET_le32toh (header.sampling_rate); + header.gain = GNUNET_le16toh (header.gain); + + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, + "Header: v%u, %u-ch, skip %u, %uHz, %u gain\n", + header.version, header.channels, header.preskip, header.sampling_rate, header.gain); + + channels = header.channels; + preskip = header.preskip; + + if (header.channel_mapping != 0) { - case GNUNET_MESSAGE_TYPE_CONVERSATION_AUDIO: - audio = (struct AudioMessage ) msg; + fprintf (stderr, "This implementation does not support non-mono streams\n"); + return NULL; + } - ret = opus_decode_float (dec, - (const unsigned char ) &audio[1], - ntohs (audio->header.size) - sizeof (struct AudioMessage), - pcm_buffer, - frame_size, 0); - if (ret < 0) + dec = opus_decoder_create (SAMPLING_RATE, channels, &err); + if (OPUS_OK != err) + { + fprintf (stderr, "Cannot create encoder: %s\n", opus_strerror (err)); + return NULL; + } + if (!dec) + { + fprintf (stderr, "Decoder initialization failed: %s\n", opus_strerror (err)); + return NULL; + } + + if (0 != header.gain) + { + /Gain API added in a newer libopus version, if we don't have it + we apply the gain ourselves. We also add in a user provided + manual gain at the same time./ + int gainadj = (int) header.gain; + err = opus_decoder_ctl (dec, OPUS_SET_GAIN (gainadj)); + if(OPUS_UNIMPLEMENTED == err) { - GNUNET_log (GNUNET_ERROR_TYPE_ERROR, - "Opus decoding failed: %d\n", - ret); - return GNUNET_OK; + gain = pow (10.0, gainadj / 5120.0); } - GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, - "Decoded frame with %u bytes\n", - ntohs (audio->header.size)); - if (pa_stream_write - (stream_out, pcm_buffer, pcm_length, NULL, 0, - PA_SEEK_RELATIVE) < 0) + else if (OPUS_OK != err) { - GNUNET_log (GNUNET_ERROR_TYPE_ERROR, - _("pa_stream_write() failed: %s\n"), - pa_strerror (pa_context_errno (context))); - return GNUNET_OK; + fprintf (stderr, "Error setting gain: %s\n", opus_strerror (err)); + return NULL; } - break; - default: - break; } - return GNUNET_OK; + + return dec; +} + + +#ifdef DEBUG_DUMP_DECODED_OGG +static size_t fwrite_le32(opus_int32 i32, FILE file) +{ + unsigned char buf[4]; + buf[0]=(unsigned char)(i32&0xFF); + buf[1]=(unsigned char)(i32>>8&0xFF); + buf[2]=(unsigned char)(i32>>16&0xFF); + buf[3]=(unsigned char)(i32>>24&0xFF); + return fwrite(buf,4,1,file); +} + +static size_t fwrite_le16(int i16, FILE file) +{ + unsigned char buf[2]; + buf[0]=(unsigned char)(i16&0xFF); + buf[1]=(unsigned char)(i16>>8&0xFF); + return fwrite(buf,2,1,file); +} + +static int write_wav_header() +{ + int ret; + FILE file = stdout; + + ret = fprintf (file, "RIFF") >= 0; + ret &= fwrite_le32 (0x7fffffff, file); + + ret &= fprintf (file, "WAVEfmt ") >= 0; + ret &= fwrite_le32 (16, file); + ret &= fwrite_le16 (1, file); + ret &= fwrite_le16 (channels, file); + ret &= fwrite_le32 (SAMPLING_RATE, file); + ret &= fwrite_le32 (2channelsSAMPLING_RATE, file); + ret &= fwrite_le16 (2channels, file); + ret &= fwrite_le16 (16, file); + + ret &= fprintf (file, "data") >= 0; + ret &= fwrite_le32 (0x7fffffff, file); + + return !ret ? -1 : 16; +} + +#endif + +static int64_t +audio_write (int64_t maxout) +{ + int64_t sampout = 0; + int tmp_skip; + unsigned out_len; + unsigned to_write; + float output; +#ifdef DEBUG_DUMP_DECODED_OGG + static int wrote_wav_header; + + if (dump_to_stdout && !wrote_wav_header) + { + write_wav_header (); + wrote_wav_header = 1; + } +#endif + maxout = 0 > maxout ? 0 : maxout; + do + { + tmp_skip = (preskip > frame_size) ? (int) frame_size : preskip; + preskip -= tmp_skip; + output = pcm_buffer + channels tmp_skip; + out_len = frame_size - tmp_skip; + if (out_len > MAX_FRAME_SIZE) + exit (6); + frame_size = 0; + + to_write = out_len < maxout ? out_len : (unsigned) maxout; + if (0 < maxout) + { + int64_t wrote = 0; + wrote = to_write; + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, + "Writing %u * %u * %u = %u bytes into PA\n", + to_write, channels, sizeof (float), + to_write * channels * sizeof (float)); +#ifdef DEBUG_DUMP_DECODED_OGG + if (dump_to_stdout) + { +# define fminf(_x,_y) ((_x)<(_y)?(_x):(_y)) +# define fmaxf(_x,_y) ((_x)>(_y)?(_x):(_y)) +# define float2int(flt) ((int)(floor(.5+flt))) + int i; + int16_t out = alloca(sizeof(short)MAX_FRAME_SIZEchannels); + for (i=0;i<(int)out_lenchannels;i++) + out[i]=(short)float2int(fmaxf(-32768,fminf(output[i]32768.f,32767))); + + fwrite (out, 2 channels, out_len<maxout?out_len:maxout, stdout); + } + else +#endif + if (pa_stream_write + (stream_out, output, to_write * channels * sizeof (float), NULL, 0, + PA_SEEK_RELATIVE) < 0) + { + GNUNET_log (GNUNET_ERROR_TYPE_ERROR, + _("pa_stream_write() failed: %s\n"), + pa_strerror (pa_context_errno (context))); + } + sampout += wrote; + maxout -= wrote; + } + } while (0 < frame_size && 0 < maxout); + + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, + "Wrote %" PRId64 " samples\n", + sampout); + return sampout; } @@ -157,6 +328,236 @@ quit (int ret) } +static void +ogg_demux_and_decode () +{ + ogg_page og; + static int stream_init; + int64_t page_granule = 0; + ogg_packet op; + static int has_opus_stream; + static int has_tags_packet; + static int32_t opus_serialno; + static int64_t link_out; + static int64_t packet_count; + int eos = 0; + static int total_links; + static int gran_offset; + + while (1 == ogg_sync_pageout (&oy, &og)) + { + if (0 == stream_init) + { + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, + "Initialized the stream\n"); + ogg_stream_init (&os, ogg_page_serialno (&og)); + stream_init = 1; + } + if (ogg_page_serialno (&og) != os.serialno) + { + /* so all streams are read. / + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, + "Re-set serial number\n"); + ogg_stream_reset_serialno (&os, ogg_page_serialno (&og)); + } + /Add page to the bitstream/ + ogg_stream_pagein (&os, &og); + page_granule = ogg_page_granulepos (&og); + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, + "Reading page that ends at %" PRId64 "\n", + page_granule); + /Extract all available packets/ + while (1 == ogg_stream_packetout (&os, &op)) + { + /OggOpus streams are identified by a magic string in the initial + stream header./ + if (op.b_o_s && op.bytes >= 8 && !memcmp (op.packet, "OpusHead", 8)) + { + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, + "Got Opus Header\n"); + if (has_opus_stream && has_tags_packet) + { + /If we're seeing another BOS OpusHead now it means + the stream is chained without an EOS. + This can easily happen if record helper is terminated unexpectedly. + / + has_opus_stream = 0; + if (dec) + opus_decoder_destroy (dec); + dec = NULL; + fprintf (stderr, "\nWarning: stream %" PRId64 " ended without EOS and a new stream began.\n", (int64_t) os.serialno); + } + if (!has_opus_stream) + { + if (packet_count > 0 && opus_serialno == os.serialno) + { + fprintf (stderr, "\nError: Apparent chaining without changing serial number (%" PRId64 "==%" PRId64 ").\n", + (int64_t) opus_serialno, (int64_t) os.serialno); + quit(1); + } + opus_serialno = os.serialno; + has_opus_stream = 1; + has_tags_packet = 0; + link_out = 0; + packet_count = 0; + eos = 0; + total_links++; + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, + "Got header for stream %" PRId64 ", this is %dth link\n", + (int64_t) opus_serialno, total_links); + } + else + { + fprintf (stderr, "\nWarning: ignoring opus stream %" PRId64 "\n", (int64_t) os.serialno); + } + } + if (!has_opus_stream \|\| os.serialno != opus_serialno) + { + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, + "breaking out\n"); + break; + } + /If first packet in a logical stream, process the Opus header/ + if (0 == packet_count) + { + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, + "Decoding header\n"); + dec = process_header (&op); + if (!dec) + quit (1); + + if (0 != ogg_stream_packetout (&os, &op) \|\| 255 == og.header[og.header_len - 1]) + { + /The format specifies that the initial header and tags packets are on their + own pages. To aid implementors in discovering that their files are wrong + we reject them explicitly here. In some player designs files like this would + fail even without an explicit test./ + fprintf (stderr, "Extra packets on initial header page. Invalid stream.\n"); + quit (1); + } + + /Remember how many samples at the front we were told to skip + so that we can adjust the timestamp counting./ + gran_offset = preskip; + + if (!pcm_buffer) + { + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, + "Allocating %u %u * %u = %u bytes of buffer space\n", + MAX_FRAME_SIZE, channels, sizeof (float), + MAX_FRAME_SIZE * channels * sizeof (float)); + pcm_buffer = pa_xmalloc (sizeof (float) * MAX_FRAME_SIZE * channels); + } + } + else if (1 == packet_count) + { + has_tags_packet = 1; + if (0 != ogg_stream_packetout (&os, &op) \|\| 255 == og.header[og.header_len - 1]) + { + fprintf (stderr, "Extra packets on initial tags page. Invalid stream.\n"); + quit (1); + } + } + else + { + int ret; + int64_t maxout; + int64_t outsamp; + + /End of stream condition/ + if (op.e_o_s && os.serialno == opus_serialno) + { + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, + "Got EOS\n"); + eos = 1; /* don't care for anything except opus eos / + } + + /Decode Opus packet/ + ret = opus_decode_float (dec, + (const unsigned char ) op.packet, + op.bytes, + pcm_buffer, + MAX_FRAME_SIZE, 0); + + /If the decoder returned less than zero, we have an error./ + if (0 > ret) + { + fprintf (stderr, "Decoding error: %s\n", opus_strerror (ret)); + break; + } + frame_size = ret; + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, + "Decoded %d bytes/channel (%d bytes) from %u compressed bytes\n", + ret, ret * channels, op.bytes); + + /Apply header gain, if we're not using an opus library new + enough to do this internally./ + if (0 != gain) + { + int i; + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, + "Applying gain %f\n", + gain); + for (i = 0; i < frame_size * channels; i++) + pcm_buffer[i] = gain; + } + + /This handles making sure that our output duration respects + the final end-trim by not letting the output sample count + get ahead of the granpos indicated value./ + maxout = ((page_granule - gran_offset) SAMPLING_RATE / 48000) - link_out; + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, + "Writing audio packet %" PRId64 ", at most %" PRId64 " samples\n", + packet_count, maxout); + + outsamp = audio_write (0 > maxout ? 0 : maxout); + link_out += outsamp; + } + packet_count++; + } + if (eos) + { + has_opus_stream = 0; + if (dec) + opus_decoder_destroy (dec); + dec = NULL; + } + } +} + +/** + * Message callback + / +static int +stdin_receiver (void cls, + void client, + const struct GNUNET_MessageHeader msg) +{ + struct AudioMessage audio; + char data; + size_t payload_len; + + switch (ntohs (msg->type)) + { + case GNUNET_MESSAGE_TYPE_CONVERSATION_AUDIO: + audio = (struct AudioMessage ) msg; + payload_len = ntohs (audio->header.size) - sizeof (struct AudioMessage); + + /Get the ogg buffer for writing/ + data = ogg_sync_buffer (&oy, payload_len); + /Read bitstream from input file/ + memcpy (data, (const unsigned char ) &audio[1], payload_len); + ogg_sync_wrote (&oy, payload_len); + + ogg_demux_and_decode (); + break; + default: + break; + } + return GNUNET_OK; +} + + /** * Callback when data is there for playback / @@ -299,22 +700,17 @@ pa_init () } -/* - * OPUS initialization - / static void -opus_init () +ogg_init () { - int err; - int channels = 1; - - frame_size = SAMPLING_RATE / 50; - pcm_length = frame_size channels * sizeof (float); - - dec = opus_decoder_create (SAMPLING_RATE, channels, &err); - pcm_buffer = (float ) pa_xmalloc (frame_size channels * sizeof (float)); + ogg_sync_init (&oy); } +static void +drain_callback (pa_streams, int success, void userdata) +{ + pa_threaded_mainloop_signal (m, 0); +} /** * The main function for the playback helper. @@ -332,6 +728,9 @@ main (int argc, char argv[]) struct GNUNET_SERVER_MessageStreamTokenizer stdin_mst; char c; ssize_t ret; +#ifdef DEBUG_READ_PURE_OGG + int read_pure_ogg = getenv ("GNUNET_READ_PURE_OGG") ? 1 : 0; +#endif GNUNET_assert (GNUNET_OK == GNUNET_log_setup ("gnunet-helper-audio-playback", @@ -343,7 +742,7 @@ main (int argc, char argv[]) return 1; } stdin_mst = GNUNET_SERVER_mst_create (&stdin_receiver, NULL); - opus_init (); + ogg_init (); pa_init (); GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Waiting for PulseAudio to be ready.\n"); @@ -352,6 +751,9 @@ main (int argc, char argv[]) close (ready_pipe[1]); ready_pipe[0] = -1; ready_pipe[1] = -1; +#ifdef DEBUG_DUMP_DECODED_OGG + dump_to_stdout = getenv ("GNUNET_DUMP_DECODED_OGG") ? 1 : 0; +#endif while (1) { ret = read (0, readbuf, sizeof (readbuf)); @@ -369,10 +771,41 @@ main (int argc, char argv[]) } if (0 == ret) break; +#ifdef DEBUG_READ_PURE_OGG + if (read_pure_ogg) + { + char data = ogg_sync_buffer (&oy, ret); + memcpy (data, readbuf, ret); + ogg_sync_wrote (&oy, ret); + ogg_demux_and_decode (); + } + else +#endif GNUNET_SERVER_mst_receive (stdin_mst, NULL, readbuf, ret, GNUNET_NO, GNUNET_NO); } GNUNET_SERVER_mst_destroy (stdin_mst); + if (stream_out) + { + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, + "Locking\n"); + pa_threaded_mainloop_lock (m); + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, + "Draining\n"); + pa_operation o = pa_stream_drain (stream_out, drain_callback, NULL); + while (pa_operation_get_state (o) == PA_OPERATION_RUNNING) + { + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, + "Waiting\n"); + pa_threaded_mainloop_wait (m); + } + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, + "Unreffing\n"); + pa_operation_unref (o); + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, + "Unlocking\n"); + pa_threaded_mainloop_unlock (m); + } return 0; } diff --git a/src/conversation/gnunet-helper-audio-record-gst.c b/src/conversation/gnunet-helper-audio-record-gst.c index 8d7a88f..808ad20 100755 --- a/src/conversation/gnunet-helper-audio-record-gst.c +++ b/src/conversation/gnunet-helper-audio-record-gst.c @@ -34,6 +34,8 @@ #include <gst/audio/gstaudiobasesrc.h> #include <glib.h> +#define DEBUG_RECORD_PURE_OGG 1 + /* * Number of channels. * Must be one of the following (from libopusenc documentation): @@ -51,7 +53,7 @@ * Must be one of the following (from libopus documentation): * 2.5, 5, 10, 20, 40 or 60 / -#define OPUS_FRAME_SIZE 20 +#define OPUS_FRAME_SIZE 40 /* * Expected packet loss to prepare for, in percents. @@ -68,19 +70,37 @@ * Max number of microseconds to buffer in audiosource. * Default is 200000 / -#define BUFFER_TIME 1000 +#define BUFFER_TIME 1000 / 1ms / /* * Min number of microseconds to buffer in audiosource. * Default is 10000 / -#define LATENCY_TIME 1000 +#define LATENCY_TIME 1000 / 1ms / + +/* + * Maximum delay in multiplexing streams, in ns. + * Setting this to 0 forces page flushing, which + * decreases delay, but increases overhead. + / +#define OGG_MAX_DELAY 0 + +/* + * Maximum delay for sending out a page, in ns. + * Setting this to 0 forces page flushing, which + * decreases delay, but increases overhead. + / +#define OGG_MAX_PAGE_DELAY 0 /* * Main pipeline. / static GstElement pipeline; +#ifdef DEBUG_RECORD_PURE_OGG +static int dump_pure_ogg; +#endif + static void quit () { @@ -103,13 +123,13 @@ bus_call (GstBus bus, GstMessage msg, gpointer data) { gchar debug; GError error; - + gst_message_parse_error (msg, &error, &debug); g_free (debug); - + GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Error: %s\n", error->message); g_error_free (error); - + quit (); break; } @@ -137,18 +157,23 @@ signalhandler (int s) int main (int argc, char *argv) { - GstElement source, encoder, conv, resampler, sink; + GstElement source, filter, encoder, conv, resampler, sink, oggmux; + GstCaps caps; GstBus bus; guint bus_watch_id; struct AudioMessage audio_message; int abort_send = 0; typedef void (SignalHandlerPointer) (int); - + SignalHandlerPointer inthandler, termhandler; inthandler = signal (SIGINT, signalhandler); termhandler = signal (SIGTERM, signalhandler); +#ifdef DEBUG_RECORD_PURE_OGG + dump_pure_ogg = getenv ("GNUNET_RECORD_PURE_OGG") ? 1 : 0; +#endif + #ifdef WINDOWS setmode (1, _O_BINARY); #endif @@ -169,12 +194,14 @@ main (int argc, char *argv) / Create gstreamer elements / pipeline = gst_pipeline_new ("audio-recorder"); source = gst_element_factory_make ("autoaudiosrc", "audiosource"); + filter = gst_element_factory_make ("capsfilter", "filter"); conv = gst_element_factory_make ("audioconvert", "converter"); resampler= gst_element_factory_make ("audioresample", "resampler"); encoder = gst_element_factory_make ("opusenc", "opus-encoder"); + oggmux = gst_element_factory_make ("oggmux", "ogg-muxer"); sink = gst_element_factory_make ("appsink", "audio-output"); - if (!pipeline \|\| !source \|\| !conv \|\| !resampler \|\| !encoder \|\| !sink) + if (!pipeline \|\| !filter \|\| !source \|\| !conv \|\| !resampler \|\| !encoder \|\| !oggmux \|\| !sink) { GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "One element could not be created. Exiting.\n"); @@ -185,6 +212,17 @@ main (int argc, char argv) / Set up the pipeline / + caps = gst_caps_new_simple ("audio/x-raw", + "format", G_TYPE_STRING, "S16LE", +/ "rate", G_TYPE_INT, SAMPLING_RATE,/ + "channels", G_TYPE_INT, OPUS_CHANNELS, +/ "layout", G_TYPE_STRING, "interleaved",/ + NULL); + g_object_set (G_OBJECT (filter), + "caps", caps, + NULL); + gst_caps_unref (caps); + g_object_set (G_OBJECT (encoder), / "bitrate", 64000, / / "bandwidth", OPUS_BANDWIDTH_FULLBAND, / @@ -194,7 +232,12 @@ main (int argc, char argv) "audio", FALSE, / VoIP, not audio / "frame-size", OPUS_FRAME_SIZE, NULL); - + + g_object_set (G_OBJECT (oggmux), + "max-delay", OGG_MAX_DELAY, + "max-page-delay", OGG_MAX_PAGE_DELAY, + NULL); + / we add a message handler / bus = gst_pipeline_get_bus (GST_PIPELINE (pipeline)); bus_watch_id = gst_bus_add_watch (bus, bus_call, pipeline); @@ -202,11 +245,11 @@ main (int argc, char argv) / we add all elements into the pipeline / / audiosource \| converter \| resampler \| opus-encoder \| audio-output / - gst_bin_add_many (GST_BIN (pipeline), source, conv, resampler, encoder, - sink, NULL); + gst_bin_add_many (GST_BIN (pipeline), source, filter, conv, resampler, encoder, + oggmux, sink, NULL); / we link the elements together / - gst_element_link_many (source, conv, resampler, encoder, sink, NULL); + gst_element_link_many (source, filter, conv, resampler, encoder, oggmux, sink, NULL); / Set the pipeline to "playing" state/ GNUNET_log (GNUNET_ERROR_TYPE_INFO, "Now playing\n"); @@ -288,6 +331,10 @@ main (int argc, char argv) ssize_t ret; if (0 == phase) { +#ifdef DEBUG_RECORD_PURE_OGG + if (dump_pure_ogg) + continue; +#endif ptr = (const char ) &audio_message; to_send = sizeof (audio_message); } @@ -308,7 +355,7 @@ main (int argc, char argv) "Failed to write %u bytes at offset %u (total %u) in phase %d: %s\n", (unsigned int) to_send - offset, (unsigned int) offset, (unsigned int) (to_send + offset), phase, strerror (errno)); - abort_send = 1; + abort_send = 1; break; } } diff --git a/src/conversation/gnunet-helper-audio-record.c b/src/conversation/gnunet-helper-audio-record.c index 2081259..a400bf9 100644 --- a/src/conversation/gnunet-helper-audio-record.c +++ b/src/conversation/gnunet-helper-audio-record.c @@ -38,9 +38,104 @@ #include <pulse/pulseaudio.h> #include <opus/opus.h> #include <opus/opus_types.h> +#include <ogg/ogg.h> +#define DEBUG_RECORD_PURE_OGG 1 + +/ + * Sampling rate + / #define SAMPLING_RATE 48000 +/* + * How many ms of audio to buffer before encoding them. + * Possible values: + * 60, 40, 20, 10, 5, 2.5 + / +#define FRAME_SIZE_MS 40 + +/* + * How many samples to buffer before encoding them. + / +#define FRAME_SIZE (SAMPLING_RATE / 1000 FRAME_SIZE_MS) + +/** + * Pages are commited when their size goes over this value. + * Note that in practice we flush pages VERY often (every frame), + * which means that pages NEVER really get to be this big. + * With one-packet-per-page, pages are roughly 100-300 bytes each. + * + * This value is chosen to make MAX_PAYLOAD_BYTES=1024 fit + * into a single page. + / +#define PAGE_WATERLINE 800 + +/* + * Maximum length of opus payload + / +#define MAX_PAYLOAD_BYTES 1024 + +/* + * Number of channels + / +#define CHANNELS 1 + +/* + * Configures the encoder's expected packet loss percentage. + * + * Higher values will trigger progressively more loss resistant behavior + * in the encoder at the expense of quality at a given bitrate + * in the lossless case, but greater quality under loss. + / +#define CONV_OPUS_PACKET_LOSS_PERCENTAGE 1 + +/* + * Configures the encoder's computational complexity. + * + * The supported range is 0-10 inclusive with 10 representing + * the highest complexity. + / +#define CONV_OPUS_ENCODING_COMPLEXITY 10 + +/* + * Configures the encoder's use of inband forward error correction (FEC). + * + * Note: This is only applicable to the LPC layer. + / +#define CONV_OPUS_INBAND_FEC 1 + +/* + * Configures the type of signal being encoded. + * + * This is a hint which helps the encoder's mode selection. + * + * Possible values: + * OPUS_AUTO - (default) Encoder detects the type automatically. + * OPUS_SIGNAL_VOICE - Bias thresholds towards choosing LPC or Hybrid modes. + * OPUS_SIGNAL_MUSIC - Bias thresholds towards choosing MDCT modes. + / +#define CONV_OPUS_SIGNAL OPUS_AUTO + +/* + * Coding mode. + * + * Possible values: + * OPUS_APPLICATION_VOIP - gives best quality at a given bitrate for voice + * signals. It enhances the input signal by high-pass filtering and + * emphasizing formants and harmonics. Optionally it includes in-band forward + * error correction to protect against packet loss. Use this mode for typical + * VoIP applications. Because of the enhancement, even at high bitrates + * the output may sound different from the input. + * OPUS_APPLICATION_AUDIO - gives best quality at a given bitrate for most + * non-voice signals like music. Use this mode for music and mixed + * (music/voice) content, broadcast, and applications requiring less than + * 15 ms of coding delay. + * OPUS_APPLICATION_RESTRICTED_LOWDELAY - configures low-delay mode that + * disables the speech-optimized mode in exchange for slightly reduced delay. + * This mode can only be set on an newly initialized or freshly reset encoder + * because it changes the codec delay. + / +#define CONV_OPUS_APP_TYPE OPUS_APPLICATION_VOIP /* * Specification for recording. May change in the future to spec negotiation. @@ -48,9 +143,38 @@ static pa_sample_spec sample_spec = { .format = PA_SAMPLE_FLOAT32LE, .rate = SAMPLING_RATE, - .channels = 1 + .channels = CHANNELS }; +GNUNET_NETWORK_STRUCT_BEGIN + +/* OggOpus spec says the numbers must be in little-endian order / +struct OpusHeadPacket +{ + uint8_t magic[8]; + uint8_t version; + uint8_t channels; + uint16_t preskip GNUNET_PACKED; + uint32_t sampling_rate GNUNET_PACKED; + uint16_t gain GNUNET_PACKED; + uint8_t channel_mapping; +}; + +struct OpusCommentsPacket +{ + uint8_t magic[8]; + uint32_t vendor_length; + / followed by: + char vendor[vendor_length]; + uint32_t string_count; + followed by @a string_count pairs of: + uint32_t string_length; + char string[string_length]; + / +}; + +GNUNET_NETWORK_STRUCT_END + /* * Pulseaudio mainloop api / @@ -82,7 +206,7 @@ static pa_io_event stdio_event; static OpusEncoder enc; /* - * + * Buffer for encoded data / static unsigned char opus_data; @@ -97,16 +221,6 @@ static float pcm_buffer; static int pcm_length; /* - * Number of samples for one frame - / -static int frame_size; - -/* -* Maximum length of opus payload -/ -static int max_payload_bytes = 1500; - -/* * Audio buffer / static char transmit_buffer; @@ -126,6 +240,28 @@ static size_t transmit_buffer_index; / static struct AudioMessage audio_message; +/** + * Ogg muxer state + / +static ogg_stream_state os; + +/* + * Ogg packet id + / +static int32_t packet_id; + +/* + * Ogg granule for current packet + / +static int64_t enc_granulepos; + +#ifdef DEBUG_RECORD_PURE_OGG +/* + * 1 to not to write GNUnet message headers, + * producing pure playable ogg output + / +static int dump_pure_ogg; +#endif /* * Pulseaudio shutdown task @@ -138,20 +274,59 @@ quit (int ret) } +static void +write_data (const char ptr, size_t msg_size) +{ + ssize_t ret; + size_t off; + off = 0; + while (off < msg_size) + { + ret = write (1, &ptr[off], msg_size - off); + if (0 >= ret) + { + if (-1 == ret) + GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "write"); + quit (2); + } + off += ret; + } +} + +static void +write_page (ogg_page og) +{ + static unsigned long long toff; + size_t msg_size; + msg_size = sizeof (struct AudioMessage) + og->header_len + og->body_len; + audio_message->header.size = htons ((uint16_t) msg_size); + memcpy (&audio_message[1], og->header, og->header_len); + memcpy (((char ) &audio_message[1]) + og->header_len, og->body, og->body_len); + + toff += msg_size; + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, + "Sending %u bytes of audio data (total: %llu)\n", + (unsigned int) msg_size, + toff); +#ifdef DEBUG_RECORD_PURE_OGG + if (dump_pure_ogg) + write_data ((const char ) &audio_message[1], og->header_len + og->body_len); + else +#endif + write_data ((const char ) audio_message, msg_size); +} + /* * Creates OPUS packets from PCM data / static void packetizer () { - static unsigned long long toff; char nbuf; size_t new_size; - const char ptr; - size_t off; - ssize_t ret; - int len; // FIXME: int? - size_t msg_size; + int32_t len; + ogg_packet op; + ogg_page og; while (transmit_buffer_length >= transmit_buffer_index + pcm_length) { @@ -160,37 +335,42 @@ packetizer () pcm_length); transmit_buffer_index += pcm_length; len = - opus_encode_float (enc, pcm_buffer, frame_size, opus_data, - max_payload_bytes); + opus_encode_float (enc, pcm_buffer, FRAME_SIZE, opus_data, + MAX_PAYLOAD_BYTES); + if (len < 0) + { + GNUNET_log (GNUNET_ERROR_TYPE_ERROR, + _("opus_encode_float() failed: %s. Aborting\n"), + opus_strerror (len)); + quit (5); + } if (len > UINT16_MAX - sizeof (struct AudioMessage)) { GNUNET_break (0); continue; } + / As per OggOpus spec, granule is calculated as if the audio + had 48kHz sampling rate. / + enc_granulepos += FRAME_SIZE 48000 / SAMPLING_RATE; - msg_size = sizeof (struct AudioMessage) + len; - audio_message->header.size = htons ((uint16_t) msg_size); - memcpy (&audio_message[1], opus_data, len); + op.packet = (unsigned char ) opus_data; + op.bytes = len; + op.b_o_s = 0; + op.e_o_s = 0; + op.granulepos = enc_granulepos; + op.packetno = packet_id++; + ogg_stream_packetin (&os, &op); - toff += msg_size; - GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, - "Sending %u bytes of audio data (total: %llu)\n", - (unsigned int) msg_size, - toff); - ptr = (const char ) audio_message; - off = 0; - while (off < msg_size) + while (ogg_stream_flush_fill (&os, &og, PAGE_WATERLINE)) { - ret = write (1, &ptr[off], msg_size - off); - if (0 >= ret) + if (og.header_len + og.body_len > UINT16_MAX - sizeof (struct AudioMessage)) { - if (-1 == ret) - GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "write"); - quit (2); + GNUNET_assert (0); + continue; } - off += ret; + write_page (&og); } } @@ -460,27 +640,112 @@ pa_init () static void opus_init () { - int channels = 1; int err; - frame_size = SAMPLING_RATE / 50; - pcm_length = frame_size * channels * sizeof (float); + pcm_length = FRAME_SIZE * CHANNELS * sizeof (float); pcm_buffer = pa_xmalloc (pcm_length); - opus_data = GNUNET_malloc (max_payload_bytes); + opus_data = GNUNET_malloc (MAX_PAYLOAD_BYTES); enc = opus_encoder_create (SAMPLING_RATE, - channels, - OPUS_APPLICATION_VOIP, + CHANNELS, + CONV_OPUS_APP_TYPE, &err); opus_encoder_ctl (enc, - OPUS_SET_PACKET_LOSS_PERC(1)); + OPUS_SET_PACKET_LOSS_PERC (CONV_OPUS_PACKET_LOSS_PERCENTAGE)); opus_encoder_ctl (enc, - OPUS_SET_COMPLEXITY(10)); + OPUS_SET_COMPLEXITY (CONV_OPUS_ENCODING_COMPLEXITY)); opus_encoder_ctl (enc, - OPUS_SET_INBAND_FEC(1)); + OPUS_SET_INBAND_FEC (CONV_OPUS_INBAND_FEC)); opus_encoder_ctl (enc, OPUS_SET_SIGNAL (OPUS_SIGNAL_VOICE)); } +static void +ogg_init () +{ + int serialno; + struct OpusHeadPacket headpacket; + struct OpusCommentsPacket commentspacket; + size_t commentspacket_len; + + serialno = GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_STRONG, 0x7FFFFFFF); + + /Initialize Ogg stream struct/ + if (-1 == ogg_stream_init (&os, serialno)) + { + GNUNET_log (GNUNET_ERROR_TYPE_ERROR, + _("ogg_stream_init() failed.\n")); + exit (3); + } + + packet_id = 0; + + /Write header/ + { + ogg_packet op; + ogg_page og; + const char opusver; + int vendor_length; + + memcpy (headpacket.magic, "OpusHead", 8); + headpacket.version = 1; + headpacket.channels = CHANNELS; + headpacket.preskip = GNUNET_htole16 (0); + headpacket.sampling_rate = GNUNET_htole32 (SAMPLING_RATE); + headpacket.gain = GNUNET_htole16 (0); + headpacket.channel_mapping = 0; /* Mono or stereo / + + op.packet = (unsigned char ) &headpacket; + op.bytes = sizeof (headpacket); + op.b_o_s = 1; + op.e_o_s = 0; + op.granulepos = 0; + op.packetno = packet_id++; + ogg_stream_packetin (&os, &op); + + /* Head packet must be alone on its page / + while (ogg_stream_flush (&os, &og)) + { + write_page (&og); + } + + commentspacket_len = sizeof (commentspacket); + opusver = opus_get_version_string (); + vendor_length = strlen (opusver); + commentspacket_len += vendor_length; + commentspacket_len += sizeof (uint32_t); + + commentspacket = (struct OpusCommentsPacket ) malloc (commentspacket_len); + if (NULL == commentspacket) + { + GNUNET_log (GNUNET_ERROR_TYPE_ERROR, + _("Failed to allocate %d bytes for second packet\n"), + commentspacket_len); + exit (5); + } + + memcpy (commentspacket->magic, "OpusTags", 8); + commentspacket->vendor_length = GNUNET_htole32 (vendor_length); + memcpy (&commentspacket[1], opusver, vendor_length); + (uint32_t ) &((char ) &commentspacket[1])[vendor_length] = \ + GNUNET_htole32 (0); /* no tags / + + op.packet = (unsigned char ) commentspacket; + op.bytes = commentspacket_len; + op.b_o_s = 0; + op.e_o_s = 0; + op.granulepos = 0; + op.packetno = packet_id++; + ogg_stream_packetin (&os, &op); + + /* Comment packets must not be mixed with audio packets on their pages / + while (ogg_stream_flush (&os, &og)) + { + write_page (&og); + } + + free (commentspacket); + } +} /* * The main function for the record helper. @@ -500,6 +765,11 @@ main (int argc, char *argv[]) "Audio source starts\n"); audio_message = GNUNET_malloc (UINT16_MAX); audio_message->header.type = htons (GNUNET_MESSAGE_TYPE_CONVERSATION_AUDIO); + +#ifdef DEBUG_RECORD_PURE_OGG + dump_pure_ogg = getenv ("GNUNET_RECORD_PURE_OGG") ? 1 : 0; +#endif + ogg_init (); opus_init (); pa_init (); return 0; -- 1.8.5.2 0001-Wrap-opus-in-ogg-container.patch (47,772 bytes)

LRN 2014-01-08 17:24 developer ~0007975	At the moment i'm ok with ogg. pulseaudio+libopus conversation backend will need to use and link to libogg. GStreamer conversation backend will just add oggmux/oggdemux to the pipeline (and drop manual libopus decoding that i hacked in to work around a bug in raw opus playback).

LRN 2014-01-10 13:22 developer ~0007980	News from gst camp: the raw opus playback bug was closed as invalid. I've been told that it's not possible to play raw opus stream without some metadata (packet size). pulseaudio-based backend hardcodes this information on both ends, but GStreamer-based backend does not have direct control over opus decoder, and there's no way to give it that information (well, except for using a container...). So yeah, a container IS needed.

LRN 2014-01-12 10:34 developer ~0007982	Suddenly i discovered that using libogg to wrap libopus in ogg is not as easy as i thought. Too few examples, and existing programs that do that (opusenc, for example) are 1k LOC monsters. Feasible options (for pulseaudio-based backend): 1) Use libav (like libextractor does) 2) Drop pulseaudio-based backend (only have Gstreamer-based backend) Unfeasible (but possible) options: 1) Keep carefully stripping opusenc of features until it's simple enough to grasp, then use it as an example for implementing the pulseaudio+libopus+libogg backend.

Christian Grothoff 2014-01-13 11:38 manager ~0007984	Using libav sounds fine --- if that's significantly easier than stripping opusenc. But 1k LOC doesn't sound that bad either (but I've not seen the code; 1k clean code vs. 1k messy code makes a big difference...)

LRN 2014-01-13 22:14 developer ~0007988	Uploaded 0001-Implement-ogg-wrapping-for-the-recorder-v1.patch - adds ogg wrapping for the recorder. I can't test it, since i don't have PulseAudio. It compiles - that's all i know. For testing, change this line: write_data ((const char ) audio_message, msg_size); into this: write_data ((const char ) audio_message[1], msg_size - sizeof (struct AudioMessage)); then dump the output into a file, try playing it in VLC.

LRN 2014-01-13 22:30 developer ~0007989	and yeah, you'll need to add -logg to the LIBS in Makefile.am

Christian Grothoff 2014-01-14 11:37 manager ~0007990	Ok, that "OpusHeader" should be defined without typedef, and I'd like a few more comments on the members (as I don't fully know what they mean). Also, that 255*255 looks very strange; what does the number signify? If it is the sample size, it seems way too big as 64k samples likely mean a way too high latency. Also, the opus_header_to_packet is quite ugly (especially the write_uintXX). Why can't we do our usual thing of defining a C struct with the proper (packed) layout and nothl/ntohs things as needed (at least for the header)?

Christian Grothoff 2014-01-14 11:40 manager ~0007991	Running the code with the patch, I get: Jan 14 11:40:06-236991 gnunet-helper-audio-playback-19628 ERROR Opus decoding failed: -4

LRN 2014-01-15 00:50 developer ~0007992	OK, i think i've managed to fix this to work (at least i'm getting playable ogg file out of it now). The problem was in: 1) I discarded the code that wrote the second packet (with comments). Turns out, OggOpus spec mandates that this packet must exist. I've re-added the code for it back. 2) I didn't set e_o_s appropriately (the code was lost during refactoring). Now the question is about the 255*255 thing. I've switched the API that is invoked for flushing to ogg_stream_pageout_fill() and gave it 1300 as a watermark. Now it always writes 1200-1400-bytes-long pages. Thing is, on the IRC you've mentioned "buffer 5ms at most", and this is definitely not 5ms. In fact, frame size is SAMPLING_RATE/50 (this is from your original code), which is 960. With 48000 Hz / 1000 ms-per-second = 48 Hz per ms, 960 means 20ms. This is the amount of data that we feed to opus encoder currently. So the latency is at least 20ms to begin with.

LRN 2014-01-16 10:37 developer ~0007996	Uploaded 0001-Wrap-opus-in-ogg-container.patch , adds ogg wrapping to both recorder and player, for both backends.

LRN 2014-01-16 10:58 developer ~0007997	To summarize some of the stuff i've said on the IRC: ogg overhead for ultra-low-delay streaming is huge, and can go up to 50% for extreme cases (2.5ms frame size, one packet per page). On the other hand, opus does not allow frame sizes larger than 60ms, and for these overhead is acceptable. One-packet-per-page rule can be changed to enough-packets-to-provide-N-ms-of-audio rule, where N is a multiple of frame size. GStreamer supports this out of the box, for pulseaudio backend this will have to be implemented. The difference between this and one-packet-per-page is, basically, in increased granularity (multiples of 2.5ms (for smallest frame sizes) instead of a fixed (and small) set of supported frame sizes). However, note that opus uses less efficient encoding for smallest frame sizes, so this strategy may yield poor results (lower quality) for small frames, and provide no advantage (the only "unusual" page size we can get is 30ms-per-page (3 packets, each made from a 10ms frame)). Another nitpick: pulseaudio-based player does not support stereo playback. This is worked around in GStreamer-based recorder by forcing mono mode (normally GStreamer will negotiate a stereo stream, if the microphone provides that, which it usually does). This is fixable, but requires PA reinitialization on the fly (or, at least, delayed initialization). There's also the matter of supporting streams that change their parameters on the fly (opus has that feature), but that's a bug for another day.

Christian Grothoff 2014-01-30 15:51 manager ~0008042	So is this issue not resolved now?

LRN 2014-01-30 15:55 developer ~0008043	If you consider: 1) Very high overhead on ultra-small frames (because ogg) 2) PA backend doesn't support stereo (because inflexible backend implementation) 3) No on-the-fly parameter adjustments (because noone cares) to be "resolved" (in context of this issue) - then yes, it's resolved.

Christian Grothoff 2014-01-30 17:04 manager ~0008044	I consider those separate issues (or (1) something that we simply cannot resolve).

Date Modified	Username	Field	Change
2014-01-08 17:21	LRN	New Issue
2014-01-08 17:24	LRN	Note Added: 0007975
2014-01-10 13:22	LRN	Note Added: 0007980
2014-01-10 13:28	Christian Grothoff	Assigned To	=> LRN
2014-01-10 13:28	Christian Grothoff	Status	new => assigned
2014-01-10 13:29	Christian Grothoff	Product Version	=> 0.10.0
2014-01-12 10:34	LRN	Note Added: 0007982
2014-01-13 11:38	Christian Grothoff	Note Added: 0007984
2014-01-13 22:11	LRN	File Added: 0001-Implement-ogg-wrapping-for-the-recorder-v1.patch
2014-01-13 22:14	LRN	Note Added: 0007988
2014-01-13 22:30	LRN	Note Added: 0007989
2014-01-14 11:37	Christian Grothoff	Note Added: 0007990
2014-01-14 11:40	Christian Grothoff	Note Added: 0007991
2014-01-15 00:50	LRN	Note Added: 0007992
2014-01-16 10:36	LRN	File Added: 0001-Wrap-opus-in-ogg-container.patch
2014-01-16 10:37	LRN	Note Added: 0007996
2014-01-16 10:58	LRN	Note Added: 0007997
2014-01-30 15:51	Christian Grothoff	Note Added: 0008042
2014-01-30 15:51	Christian Grothoff	Target Version	=> 0.10.1
2014-01-30 15:55	LRN	Note Added: 0008043
2014-01-30 17:04	Christian Grothoff	Note Added: 0008044
2014-01-30 17:04	Christian Grothoff	Status	assigned => resolved
2014-01-30 17:04	Christian Grothoff	Fixed in Version	=> 0.10.1
2014-01-30 17:04	Christian Grothoff	Resolution	open => fixed
2014-04-08 16:42	Christian Grothoff	Status	resolved => closed

View Issue Details

Activities

Issue History