diff --git a/doc/muxers.texi b/doc/muxers.texi index 7101df072dec2..a07c5f6b1b3f1 100644 --- a/doc/muxers.texi +++ b/doc/muxers.texi @@ -3915,16 +3915,37 @@ Default value is 5000. @item pkt_size @var{integer} Set the maximum size, in bytes, of RTP packets that send out. -Default value is 1500. +Default value is 1200. @item authorization @var{string} -The optional Bearer token for WHIP Authorization. +Optional Bearer token for WHIP Authorization. @item cert_file @var{string} -The optional certificate file path for DTLS. +Optional certificate file path for DTLS. @item key_file @var{string} -The optional private key file path for DTLS. +Optional private key file path for DTLS. + +@item whip_flags @var{flags} +Possible values: + +@table @samp +@item ignore_ipv6 +Ignore any IPv6 ICE candidates. + +@item disable_rtx +Disable RFC 4588 RTX (Retransmission) support. +This disables the retransmission mechanism for lost RTP packets. + +@item dtls_active +Set DTLS role as active (client role) instead of passive (server role). +By default, WHIP uses passive DTLS role, but some servers may require active role. +@end table + +@item rtx_history_size @var{integer} +Set the packet history size for RTX (retransmission) support. +This determines how many recent RTP packets are kept in memory for potential +retransmission requests. Range is 64 to 2048, default is 256. @end table diff --git a/doc/protocols.texi b/doc/protocols.texi index 089f917dcceb7..fc67e869f52de 100644 --- a/doc/protocols.texi +++ b/doc/protocols.texi @@ -2028,6 +2028,84 @@ To play back a stream from the TLS/SSL server using @command{ffplay}: ffplay tls://@var{hostname}:@var{port} @end example +@section dtls + +Datagram Transport Layer Security (DTLS) + +The required syntax for a DTLS URL is: +@example +dtls://@var{hostname}:@var{port} +@end example + +DTLS shares most options with TLS, but operates over UDP instead of TCP. +The following parameters can be set via command line options +(or in code via @code{AVOption}s): + +@table @option + +@item ca_file, cafile=@var{filename} +A file containing certificate authority (CA) root certificates to treat +as trusted. If the linked TLS library contains a default this might not +need to be specified for verification to work, but not all libraries and +setups have defaults built in. +The file must be in OpenSSL PEM format. + +@item tls_verify=@var{1|0} +If enabled, try to verify the peer that we are communicating with. +Note, if using OpenSSL, this currently only makes sure that the +peer certificate is signed by one of the root certificates in the CA +database, but it does not validate that the certificate actually +matches the host name we are trying to connect to. + +This is disabled by default since it requires a CA database to be +provided by the caller in many cases. + +@item cert_file, cert=@var{filename} +A file containing a certificate to use in the handshake with the peer. +(When operating as server, in listen mode, this is more often required +by the peer, while client certificates only are mandated in certain +setups.) + +@item key_file, key=@var{filename} +A file containing the private key for the certificate. + +@item cert_pem=@var{string} +A PEM-encoded certificate string to use in the handshake with the peer. + +@item key_pem=@var{string} +A PEM-encoded private key string for the certificate. + +@item listen=@var{1|0} +If enabled, listen for connections on the provided port, and assume +the server role in the handshake instead of the client role. + +@item mtu=@var{size} +Set the Maximum Transmission Unit (MTU) for DTLS packets. + +@item use_srtp=@var{1|0} +Enable the use_srtp DTLS extension. +This is used in WebRTC applications to establish SRTP encryption keys +through the DTLS handshake. Default is disabled. + +@item external_sock=@var{1|0} +Use an external socket instead of creating a new one. Default is disabled. + +@end table + +Example command lines: + +To create a DTLS server: + +@example +ffmpeg -listen 1 -i dtls://@var{hostname}:@var{port} @var{output} +@end example + +To create a DTLS client and send data to server: + +@example +ffmpeg -i @var{input} -f @var{format} dtls://@var{hostname}:@var{port} +@end example + @section udp User Datagram Protocol. diff --git a/libavformat/tls.h b/libavformat/tls.h index 0c02a4ab27840..a11f8d6afb042 100644 --- a/libavformat/tls.h +++ b/libavformat/tls.h @@ -33,17 +33,6 @@ */ #define MAX_CERTIFICATE_SIZE 8192 -enum DTLSState { - DTLS_STATE_NONE, - - /* Whether DTLS handshake is finished. */ - DTLS_STATE_FINISHED, - /* Whether DTLS session is closed. */ - DTLS_STATE_CLOSED, - /* Whether DTLS handshake is failed. */ - DTLS_STATE_FAILED, -}; - typedef struct TLSShared { char *ca_file; int verify; @@ -62,8 +51,7 @@ typedef struct TLSShared { URLContext *tcp; int is_dtls; - - enum DTLSState state; + int use_srtp; /* The certificate and private key content used for DTLS handshake */ char* cert_buf; @@ -90,6 +78,7 @@ typedef struct TLSShared { {"listen", "Listen for incoming connections", offsetof(pstruct, options_field . listen), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, .flags = TLS_OPTFL }, \ {"http_proxy", "Set proxy to tunnel through", offsetof(pstruct, options_field . http_proxy), AV_OPT_TYPE_STRING, .flags = TLS_OPTFL }, \ {"external_sock", "Use external socket", offsetof(pstruct, options_field . external_sock), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, .flags = TLS_OPTFL }, \ + {"use_srtp", "Enable use_srtp DTLS extension", offsetof(pstruct, options_field . use_srtp), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, .flags = TLS_OPTFL }, \ {"mtu", "Maximum Transmission Unit", offsetof(pstruct, options_field . mtu), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, .flags = TLS_OPTFL}, \ {"cert_pem", "Certificate PEM string", offsetof(pstruct, options_field . cert_buf), AV_OPT_TYPE_STRING, .flags = TLS_OPTFL }, \ {"key_pem", "Private key PEM string", offsetof(pstruct, options_field . key_buf), AV_OPT_TYPE_STRING, .flags = TLS_OPTFL }, \ @@ -103,8 +92,6 @@ int ff_tls_set_external_socket(URLContext *h, URLContext *sock); int ff_dtls_export_materials(URLContext *h, char *dtls_srtp_materials, size_t materials_sz); -int ff_dtls_state(URLContext *h); - int ff_ssl_read_key_cert(char *key_url, char *cert_url, char *key_buf, size_t key_sz, char *cert_buf, size_t cert_sz, char **fingerprint); int ff_ssl_gen_key_cert(char *key_buf, size_t key_sz, char *cert_buf, size_t cert_sz, char **fingerprint); diff --git a/libavformat/tls_openssl.c b/libavformat/tls_openssl.c index 9f7b46c3ca2c1..edfbbeaa8ad5b 100644 --- a/libavformat/tls_openssl.c +++ b/libavformat/tls_openssl.c @@ -314,7 +314,8 @@ static int openssl_gen_private_key(EVP_PKEY **pkey) static int openssl_gen_certificate(EVP_PKEY *pkey, X509 **cert, char **fingerprint) { - int ret = 0, serial, expire_day; + int ret = 0, expire_day; + uint64_t serial; const char *aor = "lavf"; X509_NAME* subject = NULL; @@ -329,8 +330,8 @@ static int openssl_gen_certificate(EVP_PKEY *pkey, X509 **cert, char **fingerpri goto enomem_end; } - serial = (int)av_get_random_seed(); - if (ASN1_INTEGER_set(X509_get_serialNumber(*cert), serial) != 1) { + serial = av_get_random_seed(); + if (ASN1_INTEGER_set_uint64(X509_get_serialNumber(*cert), serial) != 1) { av_log(NULL, AV_LOG_ERROR, "TLS: Failed to set serial, %s\n", ERR_error_string(ERR_get_error(), NULL)); goto einval_end; } @@ -500,16 +501,16 @@ typedef struct TLSContext { * to a human-readable string, and stores it in the TLSContext's error_message field. * The error queue is then cleared using ERR_clear_error(). */ -static const char* openssl_get_error(TLSContext *ctx) +static const char* openssl_get_error(TLSContext *c) { int r2 = ERR_get_error(); if (r2) { - ERR_error_string_n(r2, ctx->error_message, sizeof(ctx->error_message)); + ERR_error_string_n(r2, c->error_message, sizeof(c->error_message)); } else - ctx->error_message[0] = '\0'; + c->error_message[0] = '\0'; ERR_clear_error(); - return ctx->error_message; + return c->error_message; } int ff_tls_set_external_socket(URLContext *h, URLContext *sock) @@ -540,12 +541,6 @@ int ff_dtls_export_materials(URLContext *h, char *dtls_srtp_materials, size_t ma return 0; } -int ff_dtls_state(URLContext *h) -{ - TLSContext *c = h->priv_data; - return c->tls_shared.state; -} - static int print_ssl_error(URLContext *h, int ret) { TLSContext *c = h->priv_data; @@ -661,24 +656,24 @@ static int url_bio_bputs(BIO *b, const char *str) static av_cold void init_bio_method(URLContext *h) { - TLSContext *p = h->priv_data; + TLSContext *c = h->priv_data; BIO *bio; - p->url_bio_method = BIO_meth_new(BIO_TYPE_SOURCE_SINK, "urlprotocol bio"); - BIO_meth_set_write(p->url_bio_method, url_bio_bwrite); - BIO_meth_set_read(p->url_bio_method, url_bio_bread); - BIO_meth_set_puts(p->url_bio_method, url_bio_bputs); - BIO_meth_set_ctrl(p->url_bio_method, url_bio_ctrl); - BIO_meth_set_create(p->url_bio_method, url_bio_create); - BIO_meth_set_destroy(p->url_bio_method, url_bio_destroy); - bio = BIO_new(p->url_bio_method); - BIO_set_data(bio, p); - - SSL_set_bio(p->ssl, bio, bio); + c->url_bio_method = BIO_meth_new(BIO_TYPE_SOURCE_SINK, "urlprotocol bio"); + BIO_meth_set_write(c->url_bio_method, url_bio_bwrite); + BIO_meth_set_read(c->url_bio_method, url_bio_bread); + BIO_meth_set_puts(c->url_bio_method, url_bio_bputs); + BIO_meth_set_ctrl(c->url_bio_method, url_bio_ctrl); + BIO_meth_set_create(c->url_bio_method, url_bio_create); + BIO_meth_set_destroy(c->url_bio_method, url_bio_destroy); + bio = BIO_new(c->url_bio_method); + BIO_set_data(bio, c); + + SSL_set_bio(c->ssl, bio, bio); } static void openssl_info_callback(const SSL *ssl, int where, int ret) { const char *method = "undefined"; - TLSContext *ctx = (TLSContext*)SSL_get_ex_data(ssl, 0); + TLSContext *c = (TLSContext*)SSL_get_ex_data(ssl, 0); if (where & SSL_ST_CONNECT) { method = "SSL_connect"; @@ -686,11 +681,11 @@ static void openssl_info_callback(const SSL *ssl, int where, int ret) { method = "SSL_accept"; if (where & SSL_CB_LOOP) { - av_log(ctx, AV_LOG_DEBUG, "Info method=%s state=%s(%s), where=%d, ret=%d\n", + av_log(c, AV_LOG_DEBUG, "Info method=%s state=%s(%s), where=%d, ret=%d\n", method, SSL_state_string(ssl), SSL_state_string_long(ssl), where, ret); } else if (where & SSL_CB_ALERT) { method = (where & SSL_CB_READ) ? "read":"write"; - av_log(ctx, AV_LOG_DEBUG, "Alert method=%s state=%s(%s), where=%d, ret=%d\n", + av_log(c, AV_LOG_DEBUG, "Alert method=%s state=%s(%s), where=%d, ret=%d\n", method, SSL_state_string(ssl), SSL_state_string_long(ssl), where, ret); } } @@ -698,29 +693,28 @@ static void openssl_info_callback(const SSL *ssl, int where, int ret) { static int dtls_handshake(URLContext *h) { int ret = 1, r0, r1; - TLSContext *p = h->priv_data; + TLSContext *c = h->priv_data; - p->tls_shared.udp->flags &= ~AVIO_FLAG_NONBLOCK; + c->tls_shared.udp->flags &= ~AVIO_FLAG_NONBLOCK; - r0 = SSL_do_handshake(p->ssl); + r0 = SSL_do_handshake(c->ssl); if (r0 <= 0) { - r1 = SSL_get_error(p->ssl, r0); + r1 = SSL_get_error(c->ssl, r0); if (r1 != SSL_ERROR_WANT_READ && r1 != SSL_ERROR_WANT_WRITE && r1 != SSL_ERROR_ZERO_RETURN) { - av_log(p, AV_LOG_ERROR, "Handshake failed, r0=%d, r1=%d\n", r0, r1); + av_log(c, AV_LOG_ERROR, "Handshake failed, r0=%d, r1=%d\n", r0, r1); ret = print_ssl_error(h, r0); goto end; } } else { - av_log(p, AV_LOG_TRACE, "Handshake success, r0=%d\n", r0); + av_log(c, AV_LOG_TRACE, "Handshake success, r0=%d\n", r0); } /* Check whether the handshake is completed. */ - if (SSL_is_init_finished(p->ssl) != TLS_ST_OK) + if (SSL_is_init_finished(c->ssl) != TLS_ST_OK) goto end; ret = 0; - p->tls_shared.state = DTLS_STATE_FINISHED; end: return ret; } @@ -728,57 +722,57 @@ static int dtls_handshake(URLContext *h) static av_cold int openssl_init_ca_key_cert(URLContext *h) { int ret; - TLSContext *p = h->priv_data; - TLSShared *c = &p->tls_shared; + TLSContext *c = h->priv_data; + TLSShared *s = &c->tls_shared; EVP_PKEY *pkey = NULL; X509 *cert = NULL; /* setup ca, private key, certificate */ - if (c->ca_file) { - if (!SSL_CTX_load_verify_locations(p->ctx, c->ca_file, NULL)) - av_log(h, AV_LOG_ERROR, "SSL_CTX_load_verify_locations %s\n", openssl_get_error(p)); + if (s->ca_file) { + if (!SSL_CTX_load_verify_locations(c->ctx, s->ca_file, NULL)) + av_log(h, AV_LOG_ERROR, "SSL_CTX_load_verify_locations %s\n", openssl_get_error(c)); } else { - if (!SSL_CTX_set_default_verify_paths(p->ctx)) { + if (!SSL_CTX_set_default_verify_paths(c->ctx)) { // Only log the failure but do not error out, as this is not fatal av_log(h, AV_LOG_WARNING, "Failure setting default verify locations: %s\n", - openssl_get_error(p)); + openssl_get_error(c)); } } - if (c->cert_file) { - ret = SSL_CTX_use_certificate_chain_file(p->ctx, c->cert_file); + if (s->cert_file) { + ret = SSL_CTX_use_certificate_chain_file(c->ctx, s->cert_file); if (ret <= 0) { av_log(h, AV_LOG_ERROR, "Unable to load cert file %s: %s\n", - c->cert_file, openssl_get_error(p)); + s->cert_file, openssl_get_error(c)); ret = AVERROR(EIO); goto fail; } - } else if (c->cert_buf) { - cert = cert_from_pem_string(c->cert_buf); - if (SSL_CTX_use_certificate(p->ctx, cert) != 1) { - av_log(p, AV_LOG_ERROR, "SSL: Init SSL_CTX_use_certificate failed, %s\n", openssl_get_error(p)); + } else if (s->cert_buf) { + cert = cert_from_pem_string(s->cert_buf); + if (SSL_CTX_use_certificate(c->ctx, cert) != 1) { + av_log(c, AV_LOG_ERROR, "SSL: Init SSL_CTX_use_certificate failed, %s\n", openssl_get_error(c)); ret = AVERROR(EINVAL); goto fail; } } - if (c->key_file) { - ret = SSL_CTX_use_PrivateKey_file(p->ctx, c->key_file, SSL_FILETYPE_PEM); + if (s->key_file) { + ret = SSL_CTX_use_PrivateKey_file(c->ctx, s->key_file, SSL_FILETYPE_PEM); if (ret <= 0) { av_log(h, AV_LOG_ERROR, "Unable to load key file %s: %s\n", - c->key_file, openssl_get_error(p)); + s->key_file, openssl_get_error(c)); ret = AVERROR(EIO); goto fail; } - } else if (c->key_buf) { - pkey = pkey_from_pem_string(c->key_buf, 1); - if (SSL_CTX_use_PrivateKey(p->ctx, pkey) != 1) { - av_log(p, AV_LOG_ERROR, "Init SSL_CTX_use_PrivateKey failed, %s\n", openssl_get_error(p)); + } else if (s->key_buf) { + pkey = pkey_from_pem_string(s->key_buf, 1); + if (SSL_CTX_use_PrivateKey(c->ctx, pkey) != 1) { + av_log(c, AV_LOG_ERROR, "Init SSL_CTX_use_PrivateKey failed, %s\n", openssl_get_error(c)); ret = AVERROR(EINVAL); goto fail; } } - if (c->listen && !c->cert_file && !c->cert_buf && !c->key_file && !c->key_buf) { + if (s->listen && !s->cert_file && !s->cert_buf && !s->key_file && !s->key_buf) { av_log(h, AV_LOG_VERBOSE, "No server certificate provided, using self-signed\n"); ret = openssl_gen_private_key(&pkey); @@ -789,14 +783,14 @@ static av_cold int openssl_init_ca_key_cert(URLContext *h) if (ret < 0) goto fail; - if (SSL_CTX_use_certificate(p->ctx, cert) != 1) { - av_log(p, AV_LOG_ERROR, "SSL_CTX_use_certificate failed for self-signed cert, %s\n", openssl_get_error(p)); + if (SSL_CTX_use_certificate(c->ctx, cert) != 1) { + av_log(c, AV_LOG_ERROR, "SSL_CTX_use_certificate failed for self-signed cert, %s\n", openssl_get_error(c)); ret = AVERROR(EINVAL); goto fail; } - if (SSL_CTX_use_PrivateKey(p->ctx, pkey) != 1) { - av_log(p, AV_LOG_ERROR, "SSL_CTX_use_PrivateKey failed for self-signed cert, %s\n", openssl_get_error(p)); + if (SSL_CTX_use_PrivateKey(c->ctx, pkey) != 1) { + av_log(c, AV_LOG_ERROR, "SSL_CTX_use_PrivateKey failed for self-signed cert, %s\n", openssl_get_error(c)); ret = AVERROR(EINVAL); goto fail; } @@ -815,19 +809,15 @@ static av_cold int openssl_init_ca_key_cert(URLContext *h) */ static int dtls_start(URLContext *h, const char *url, int flags, AVDictionary **options) { - TLSContext *p = h->priv_data; - TLSShared *c = &p->tls_shared; + TLSContext *c = h->priv_data; + TLSShared *s = &c->tls_shared; + if (!s) + return AVERROR(EINVAL); int ret = 0; - c->is_dtls = 1; + s->is_dtls = 1; - /** - * The profile for OpenSSL's SRTP is SRTP_AES128_CM_SHA1_80, see ssl/d1_srtp.c. - * The profile for FFmpeg's SRTP is SRTP_AES128_CM_HMAC_SHA1_80, see libavformat/srtp.c. - */ - const char* profiles = "SRTP_AES128_CM_SHA1_80"; - - p->ctx = SSL_CTX_new(c->listen ? DTLS_server_method() : DTLS_client_method()); - if (!p->ctx) { + c->ctx = SSL_CTX_new(s->listen ? DTLS_server_method() : DTLS_client_method()); + if (!c->ctx) { ret = AVERROR(ENOMEM); goto fail; } @@ -836,53 +826,61 @@ static int dtls_start(URLContext *h, const char *url, int flags, AVDictionary ** if (ret < 0) goto fail; /* Note, this doesn't check that the peer certificate actually matches the requested hostname. */ - if (c->verify) - SSL_CTX_set_verify(p->ctx, SSL_VERIFY_PEER|SSL_VERIFY_FAIL_IF_NO_PEER_CERT, NULL); - - /* Setup the SRTP context */ - if (SSL_CTX_set_tlsext_use_srtp(p->ctx, profiles)) { - av_log(p, AV_LOG_ERROR, "Init SSL_CTX_set_tlsext_use_srtp failed, profiles=%s, %s\n", - profiles, openssl_get_error(p)); - ret = AVERROR(EINVAL); - return ret; + if (s->verify) + SSL_CTX_set_verify(c->ctx, SSL_VERIFY_PEER|SSL_VERIFY_FAIL_IF_NO_PEER_CERT, NULL); + + if (s->use_srtp) { + /** + * The profile for OpenSSL's SRTP is SRTP_AES128_CM_SHA1_80, see ssl/d1_srtp.c. + * The profile for FFmpeg's SRTP is SRTP_AES128_CM_HMAC_SHA1_80, see libavformat/srtp.c. + */ + const char* profiles = "SRTP_AES128_CM_SHA1_80"; + if (SSL_CTX_set_tlsext_use_srtp(c->ctx, profiles)) { + av_log(c, AV_LOG_ERROR, "Init SSL_CTX_set_tlsext_use_srtp failed, profiles=%s, %s\n", + profiles, openssl_get_error(c)); + ret = AVERROR(EINVAL); + goto fail; + } } /* The ssl should not be created unless the ctx has been initialized. */ - p->ssl = SSL_new(p->ctx); - if (!p->ssl) { + c->ssl = SSL_new(c->ctx); + if (!c->ssl) { ret = AVERROR(ENOMEM); goto fail; } - if (!c->listen && !c->numerichost) - SSL_set_tlsext_host_name(p->ssl, c->host); + if (!s->listen && !s->numerichost) + SSL_set_tlsext_host_name(c->ssl, s->host); /* Setup the callback for logging. */ - SSL_set_ex_data(p->ssl, 0, p); - SSL_CTX_set_info_callback(p->ctx, openssl_info_callback); + SSL_set_ex_data(c->ssl, 0, c); + SSL_CTX_set_info_callback(c->ctx, openssl_info_callback); /** * We have set the MTU to fragment the DTLS packet. It is important to note that the * packet is split to ensure that each handshake packet is smaller than the MTU. */ - if (c->mtu <= 0) - c->mtu = 1096; - SSL_set_options(p->ssl, SSL_OP_NO_QUERY_MTU); - SSL_set_mtu(p->ssl, c->mtu); - DTLS_set_link_mtu(p->ssl, c->mtu); + if (s->mtu <= 0) + s->mtu = 1096; + SSL_set_options(c->ssl, SSL_OP_NO_QUERY_MTU); + SSL_set_mtu(c->ssl, s->mtu); + DTLS_set_link_mtu(c->ssl, s->mtu); + init_bio_method(h); - if (p->tls_shared.external_sock != 1) { - if ((ret = ff_tls_open_underlying(&p->tls_shared, h, url, options)) < 0) { - av_log(p, AV_LOG_ERROR, "Failed to connect %s\n", url); + + if (!c->tls_shared.external_sock) { + if ((ret = ff_tls_open_underlying(&c->tls_shared, h, url, options)) < 0) { + av_log(c, AV_LOG_ERROR, "Failed to connect %s\n", url); return ret; } } - /* This seems to be necessary despite explicitly setting client/server method above. */ - if (c->listen) - SSL_set_accept_state(p->ssl); + /* This seems to be neccesary despite explicitly setting client/server method above. */ + if (s->listen) + SSL_set_accept_state(c->ssl); else - SSL_set_connect_state(p->ssl); + SSL_set_connect_state(c->ssl); /** * During initialization, we only need to call SSL_do_handshake once because SSL_read consumes @@ -895,16 +893,16 @@ static int dtls_start(URLContext *h, const char *url, int flags, AVDictionary ** * * The SSL_do_handshake can't be called if DTLS hasn't prepare for udp. */ - if (p->tls_shared.external_sock != 1) { + if (!c->tls_shared.external_sock) { ret = dtls_handshake(h); // Fatal SSL error, for example, no available suite when peer is DTLS 1.0 while we are DTLS 1.2. if (ret < 0) { - av_log(p, AV_LOG_ERROR, "Failed to drive SSL context, ret=%d\n", ret); + av_log(c, AV_LOG_ERROR, "Failed to drive SSL context, ret=%d\n", ret); return AVERROR(EIO); } } - av_log(p, AV_LOG_VERBOSE, "Setup ok, MTU=%d\n", p->tls_shared.mtu); + av_log(c, AV_LOG_VERBOSE, "Setup ok, MTU=%d\n", c->tls_shared.mtu); ret = 0; fail: @@ -913,24 +911,26 @@ static int dtls_start(URLContext *h, const char *url, int flags, AVDictionary ** static int tls_open(URLContext *h, const char *uri, int flags, AVDictionary **options) { - TLSContext *p = h->priv_data; - TLSShared *c = &p->tls_shared; + TLSContext *c = h->priv_data; + TLSShared *s = &c->tls_shared; + if (!s) + return AVERROR(EINVAL); int ret; - if ((ret = ff_tls_open_underlying(c, h, uri, options)) < 0) + if ((ret = ff_tls_open_underlying(s, h, uri, options)) < 0) goto fail; // We want to support all versions of TLS >= 1.0, but not the deprecated // and insecure SSLv2 and SSLv3. Despite the name, TLS_*_method() // enables support for all versions of SSL and TLS, and we then disable // support for the old protocols immediately after creating the context. - p->ctx = SSL_CTX_new(c->listen ? TLS_server_method() : TLS_client_method()); - if (!p->ctx) { - av_log(h, AV_LOG_ERROR, "%s\n", openssl_get_error(p)); + c->ctx = SSL_CTX_new(s->listen ? TLS_server_method() : TLS_client_method()); + if (!c->ctx) { + av_log(h, AV_LOG_ERROR, "%s\n", openssl_get_error(c)); ret = AVERROR(EIO); goto fail; } - if (!SSL_CTX_set_min_proto_version(p->ctx, TLS1_VERSION)) { + if (!SSL_CTX_set_min_proto_version(c->ctx, TLS1_VERSION)) { av_log(h, AV_LOG_ERROR, "Failed to set minimum TLS version to TLSv1\n"); ret = AVERROR_EXTERNAL; goto fail; @@ -938,33 +938,33 @@ static int tls_open(URLContext *h, const char *uri, int flags, AVDictionary **op ret = openssl_init_ca_key_cert(h); if (ret < 0) goto fail; - if (c->verify) - SSL_CTX_set_verify(p->ctx, SSL_VERIFY_PEER|SSL_VERIFY_FAIL_IF_NO_PEER_CERT, NULL); - p->ssl = SSL_new(p->ctx); - if (!p->ssl) { - av_log(h, AV_LOG_ERROR, "%s\n", openssl_get_error(p)); + if (s->verify) + SSL_CTX_set_verify(c->ctx, SSL_VERIFY_PEER|SSL_VERIFY_FAIL_IF_NO_PEER_CERT, NULL); + c->ssl = SSL_new(c->ctx); + if (!c->ssl) { + av_log(h, AV_LOG_ERROR, "%s\n", openssl_get_error(c)); ret = AVERROR(EIO); goto fail; } - SSL_set_ex_data(p->ssl, 0, p); - SSL_CTX_set_info_callback(p->ctx, openssl_info_callback); + SSL_set_ex_data(c->ssl, 0, c); + SSL_CTX_set_info_callback(c->ctx, openssl_info_callback); init_bio_method(h); - if (!c->listen && !c->numerichost) { + if (!s->listen && !s->numerichost) { // By default OpenSSL does too lax wildcard matching - SSL_set_hostflags(p->ssl, X509_CHECK_FLAG_NO_PARTIAL_WILDCARDS); - if (!SSL_set1_host(p->ssl, c->host)) { + SSL_set_hostflags(c->ssl, X509_CHECK_FLAG_NO_PARTIAL_WILDCARDS); + if (!SSL_set1_host(c->ssl, s->host)) { av_log(h, AV_LOG_ERROR, "Failed to set hostname for TLS/SSL verification: %s\n", - openssl_get_error(p)); + openssl_get_error(c)); ret = AVERROR_EXTERNAL; goto fail; } - if (!SSL_set_tlsext_host_name(p->ssl, c->host)) { - av_log(h, AV_LOG_ERROR, "Failed to set hostname for SNI: %s\n", openssl_get_error(p)); + if (!SSL_set_tlsext_host_name(c->ssl, s->host)) { + av_log(h, AV_LOG_ERROR, "Failed to set hostname for SNI: %s\n", openssl_get_error(c)); ret = AVERROR_EXTERNAL; goto fail; } } - ret = c->listen ? SSL_accept(p->ssl) : SSL_connect(p->ssl); + ret = s->listen ? SSL_accept(c->ssl) : SSL_connect(c->ssl); if (ret == 0) { av_log(h, AV_LOG_ERROR, "Unable to negotiate TLS/SSL session\n"); ret = AVERROR(EIO); @@ -983,10 +983,10 @@ static int tls_open(URLContext *h, const char *uri, int flags, AVDictionary **op static int tls_read(URLContext *h, uint8_t *buf, int size) { TLSContext *c = h->priv_data; - URLContext *uc = c->tls_shared.is_dtls ? c->tls_shared.udp - : c->tls_shared.tcp; + TLSShared *s = &c->tls_shared; + URLContext *uc = s->is_dtls ? s->udp : s->tcp; int ret; - // Set or clear the AVIO_FLAG_NONBLOCK on c->tls_shared.tcp + // Set or clear the AVIO_FLAG_NONBLOCK on the underlying socket uc->flags &= ~AVIO_FLAG_NONBLOCK; uc->flags |= h->flags & AVIO_FLAG_NONBLOCK; ret = SSL_read(c->ssl, buf, size); @@ -1000,15 +1000,15 @@ static int tls_read(URLContext *h, uint8_t *buf, int size) static int tls_write(URLContext *h, const uint8_t *buf, int size) { TLSContext *c = h->priv_data; - URLContext *uc = c->tls_shared.is_dtls ? c->tls_shared.udp - : c->tls_shared.tcp; + TLSShared *s = &c->tls_shared; + URLContext *uc = s->is_dtls ? s->udp : s->tcp; int ret; // Set or clear the AVIO_FLAG_NONBLOCK on c->tls_shared.tcp uc->flags &= ~AVIO_FLAG_NONBLOCK; uc->flags |= h->flags & AVIO_FLAG_NONBLOCK; - if (c->tls_shared.is_dtls) + if (s->is_dtls) size = FFMIN(size, DTLS_get_data_mtu(c->ssl)); ret = SSL_write(c->ssl, buf, size); @@ -1021,16 +1021,16 @@ static int tls_write(URLContext *h, const uint8_t *buf, int size) static int tls_get_file_handle(URLContext *h) { - TLSContext *p = h->priv_data; - TLSShared *c = &p->tls_shared; - return ffurl_get_file_handle(c->is_dtls ? c->udp : c->tcp); + TLSContext *c = h->priv_data; + TLSShared *s = &c->tls_shared; + return ffurl_get_file_handle(s->is_dtls ? s->udp : s->tcp); } static int tls_get_short_seek(URLContext *h) { - TLSContext *p = h->priv_data; - TLSShared *c = &p->tls_shared; - return ffurl_get_short_seek(c->is_dtls ? c->udp : c->tcp); + TLSContext *c = h->priv_data; + TLSShared *s = &c->tls_shared; + return ffurl_get_short_seek(s->is_dtls ? s->udp : s->tcp); } static const AVOption options[] = { diff --git a/libavformat/tls_schannel.c b/libavformat/tls_schannel.c index b60e3100be277..b854f484fac6b 100644 --- a/libavformat/tls_schannel.c +++ b/libavformat/tls_schannel.c @@ -681,12 +681,6 @@ int ff_dtls_export_materials(URLContext *h, char *dtls_srtp_materials, size_t ma #endif } -int ff_dtls_state(URLContext *h) -{ - TLSContext *c = h->priv_data; - return c->tls_shared.state; -} - static void init_sec_buffer(SecBuffer *buffer, unsigned long type, void *data, unsigned long size) { @@ -1111,7 +1105,6 @@ static int tls_handshake(URLContext *h) #endif c->connected = 1; - s->state = DTLS_STATE_FINISHED; fail: return ret; diff --git a/libavformat/whip.c b/libavformat/whip.c index 256ea14d2ced8..0ff1c235da372 100644 --- a/libavformat/whip.c +++ b/libavformat/whip.c @@ -40,9 +40,12 @@ #include "internal.h" #include "mux.h" #include "network.h" +#include "rtp.h" #include "srtp.h" #include "tls.h" +#define INET_PTON_OK 1 + /** * Maximum size limit of a Session Description Protocol (SDP), * be it an offer or answer. @@ -114,6 +117,7 @@ /* Referring to Chrome's definition of RTP payload types. */ #define WHIP_RTP_PAYLOAD_TYPE_H264 106 #define WHIP_RTP_PAYLOAD_TYPE_OPUS 111 +#define WHIP_RTP_PAYLOAD_TYPE_RTX 105 /** * The STUN message header, which is 20 bytes long, comprises the @@ -130,6 +134,13 @@ */ #define WHIP_RTP_HEADER_SIZE 12 +/** + * Each RTCP packet begins with a fixed part similar to that of RTP data packets, + * followed by structured elements that MUST end on a 32 bit boundary. + * See https://www.rfc-editor.org/rfc/rfc3550#section-6.1 + */ +#define WHIP_RTCP_HEADER_SIZE 4 + /** * For RTCP, PT is [128, 223] (or without marker [0, 95]). Literally, RTCP starts * from 64 not 0, so PT is [192, 223] (or without marker [64, 95]), see "RTCP Control @@ -150,6 +161,11 @@ #define WHIP_SDP_SESSION_ID "4489045141692799359" #define WHIP_SDP_CREATOR_IP "127.0.0.1" +/* The default size of the rtp history buffer.*/ +#define HISTORY_SIZE_DEFAULT 512 + +#define SSRC_GENERATOR_MAX_ATTEMPTS 100 + /* Calculate the elapsed time from starttime to endtime in milliseconds. */ #define ELAPSED(starttime, endtime) ((int)(endtime - starttime) / 1000) @@ -181,8 +197,6 @@ enum WHIPState { WHIP_STATE_ICE_CONNECTING, /* The muxer has received the ICE response from the peer. */ WHIP_STATE_ICE_CONNECTED, - /* The muxer starts attempting the DTLS handshake. */ - WHIP_STATE_DTLS_CONNECTING, /* The muxer has finished the DTLS handshake with the peer. */ WHIP_STATE_DTLS_FINISHED, /* The muxer has finished the SRTP setup. */ @@ -193,18 +207,29 @@ enum WHIPState { WHIP_STATE_FAILED, }; +typedef enum WHIPFlags { + WHIP_FLAG_IGNORE_IPV6 = (1 << 0), // Ignore ipv6 candidate + WHIP_FLAG_DISABLE_RTX = (1 << 1), // Disable retransmission + WHIP_FLAG_DTLS_ACTIVE = (1 << 2), // DTLS active role +} WHIPFlags; + +typedef struct RtpHistoryItem { + uint16_t seq; + int size; + uint8_t* buf; +} RtpHistoryItem; + typedef struct WHIPContext { AVClass *av_class; + uint32_t flags; // enum WHIPFlags /* The state of the RTC connection. */ enum WHIPState state; - /* The callback return value for DTLS. */ - int dtls_ret; - int dtls_closed; /* Parameters for the input audio and video codecs. */ AVCodecParameters *audio_par; AVCodecParameters *video_par; + uint8_t constraint_set_flags; /** * The h264_mp4toannexb Bitstream Filter (BSF) bypasses the AnnexB packet; @@ -219,9 +244,15 @@ typedef struct WHIPContext { /* The ICE username and pwd fragment generated by the muxer. */ char ice_ufrag_local[9]; char ice_pwd_local[33]; + + uint32_t ssrc_buffer[3]; + int nb_ssrc; /* The SSRC of the audio and video stream, generated by the muxer. */ uint32_t audio_ssrc; uint32_t video_ssrc; + + uint16_t audio_first_seq; + uint16_t video_first_seq; /* The PT(Payload Type) of stream, generated by the muxer. */ uint8_t audio_payload_type; uint8_t video_payload_type; @@ -231,6 +262,7 @@ typedef struct WHIPContext { */ char *sdp_offer; + int is_peer_ice_lite; /* The ICE username and pwd from remote server. */ char *ice_ufrag_remote; char *ice_pwd_remote; @@ -279,6 +311,7 @@ typedef struct WHIPContext { /* The SRTP send context, to encrypt outgoing packets. */ SRTPContext srtp_audio_send; SRTPContext srtp_video_send; + SRTPContext srtp_video_rtx_send; SRTPContext srtp_rtcp_send; /* The SRTP receive context, to decrypt incoming packets. */ SRTPContext srtp_recv; @@ -303,6 +336,14 @@ typedef struct WHIPContext { /* The certificate and private key used for DTLS handshake. */ char* cert_file; char* key_file; + + /* RTX and NACK */ + uint8_t rtx_payload_type; + uint32_t video_rtx_ssrc; + uint16_t rtx_seq; + int history_size; + RtpHistoryItem *history; /* ring buffer */ + int hist_head; } WHIPContext; /** @@ -348,54 +389,6 @@ static av_cold int certificate_key_init(AVFormatContext *s) return ret; } -/** - * When DTLS state change. - */ -static int dtls_context_on_state(AVFormatContext *s, const char* type, const char* desc) -{ - int ret = 0; - WHIPContext *whip = s->priv_data; - int state = ff_dtls_state(whip->dtls_uc); - - if (state == DTLS_STATE_CLOSED) { - whip->dtls_closed = 1; - av_log(whip, AV_LOG_VERBOSE, "DTLS session closed, type=%s, desc=%s, elapsed=%dms\n", - type ? type : "", desc ? desc : "", ELAPSED(whip->whip_starttime, av_gettime())); - goto error; - } - - if (state == DTLS_STATE_FAILED) { - whip->state = WHIP_STATE_FAILED; - av_log(whip, AV_LOG_ERROR, "DTLS session failed, type=%s, desc=%s\n", - type ? type : "", desc ? desc : ""); - whip->dtls_ret = AVERROR(EIO); - goto error; - } - - if (state == DTLS_STATE_FINISHED && whip->state < WHIP_STATE_DTLS_FINISHED) { - whip->state = WHIP_STATE_DTLS_FINISHED; - whip->whip_dtls_time = av_gettime(); - av_log(whip, AV_LOG_VERBOSE, "DTLS handshake is done, elapsed=%dms\n", - ELAPSED(whip->whip_starttime, av_gettime())); - return ret; - } -error: - return -1; -} - -static av_cold int dtls_initialize(AVFormatContext *s) -{ - WHIPContext *whip = s->priv_data; - /* reuse the udp created by whip */ - ff_tls_set_external_socket(whip->dtls_uc, whip->udp); - - /* Make the socket non-blocking */ - ff_socket_nonblock(ffurl_get_file_handle(whip->dtls_uc), 1); - whip->dtls_uc->flags |= AVIO_FLAG_NONBLOCK; - - return 0; -} - /** * Initialize and check the options for the WebRTC muxer. */ @@ -405,7 +398,7 @@ static av_cold int initialize(AVFormatContext *s) WHIPContext *whip = s->priv_data; uint32_t seed; - whip->whip_starttime = av_gettime(); + whip->whip_starttime = av_gettime_relative(); ret = certificate_key_init(s); if (ret < 0) { @@ -417,15 +410,19 @@ static av_cold int initialize(AVFormatContext *s) seed = av_get_random_seed(); av_lfg_init(&whip->rnd, seed); + whip->nb_ssrc = 0; + whip->audio_first_seq = av_lfg_get(&whip->rnd) & 0x0fff; + whip->video_first_seq = av_lfg_get(&whip->rnd) & 0x0fff; + if (whip->pkt_size < ideal_pkt_size) av_log(whip, AV_LOG_WARNING, "pkt_size=%d(<%d) is too small, may cause packet loss\n", whip->pkt_size, ideal_pkt_size); if (whip->state < WHIP_STATE_INIT) whip->state = WHIP_STATE_INIT; - whip->whip_init_time = av_gettime(); + whip->whip_init_time = av_gettime_relative(); av_log(whip, AV_LOG_VERBOSE, "Init state=%d, handshake_timeout=%dms, pkt_size=%d, seed=%d, elapsed=%dms\n", - whip->state, whip->handshake_timeout, whip->pkt_size, seed, ELAPSED(whip->whip_starttime, av_gettime())); + whip->state, whip->handshake_timeout, whip->pkt_size, seed, ELAPSED(whip->whip_starttime, av_gettime_relative())); return 0; } @@ -445,45 +442,30 @@ static av_cold int initialize(AVFormatContext *s) static int parse_profile_level(AVFormatContext *s, AVCodecParameters *par) { int ret = 0; - const uint8_t *r = par->extradata, *r1, *end = par->extradata + par->extradata_size; - H264SPS seq, *const sps = &seq; - uint32_t state; + const uint8_t *r = par->extradata; WHIPContext *whip = s->priv_data; if (par->codec_id != AV_CODEC_ID_H264) return ret; - if (par->profile != AV_PROFILE_UNKNOWN && par->level != AV_LEVEL_UNKNOWN) - return ret; - - if (!par->extradata || par->extradata_size <= 0) { + if (!par->extradata || par->extradata_size < 8) { av_log(whip, AV_LOG_ERROR, "Unable to parse profile from empty extradata=%p, size=%d\n", par->extradata, par->extradata_size); return AVERROR(EINVAL); } - while (1) { - r = avpriv_find_start_code(r, end, &state); - if (r >= end) - break; - - r1 = ff_nal_find_startcode(r, end); - if ((state & 0x1f) == H264_NAL_SPS) { - ret = ff_avc_decode_sps(sps, r, r1 - r); - if (ret < 0) { - av_log(whip, AV_LOG_ERROR, "Failed to decode SPS, state=%x, size=%d\n", - state, (int)(r1 - r)); - return ret; - } - - av_log(whip, AV_LOG_VERBOSE, "Parse profile=%d, level=%d from SPS\n", - sps->profile_idc, sps->level_idc); - par->profile = sps->profile_idc; - par->level = sps->level_idc; - } + if (AV_RB32(r) == 0x00000001 && (r[4] & 0x1F) == 7) + r = &r[5]; + else if (AV_RB24(r) == 0x000001 && (r[3] & 0x1F) == 7) + r = &r[4]; + else if (r[0] == 0x01) // avcC + r = &r[1]; + else + return AVERROR(EINVAL); - r = r1; - } + if (par->profile == AV_PROFILE_UNKNOWN) par->profile = r[0]; + whip->constraint_set_flags = r[1]; + if (par->level == AV_LEVEL_UNKNOWN) par->level = r[2]; return ret; } @@ -584,6 +566,35 @@ static int parse_codec(AVFormatContext *s) return ret; } +static uint32_t generate_unique_ssrc(WHIPContext *whip) +{ + uint32_t candidate = 0; + int max_attempts = SSRC_GENERATOR_MAX_ATTEMPTS; + int is_unique = 1, i; + + do { + // Truncate to 31 bits to avoid overflow when passed into rtpenc as signed int + candidate = av_lfg_get(&whip->rnd) & 0x7fffffff; + if (candidate == 0) continue; + + is_unique = 1; + for (i = 0; i < whip->nb_ssrc; i++) { + if (whip->ssrc_buffer[i] == candidate) { + is_unique = 0; + break; + } + } + + if (is_unique) { + whip->ssrc_buffer[whip->nb_ssrc++] = candidate; + return candidate; + } + } while (--max_attempts > 0); + + av_log(whip, AV_LOG_ERROR, "Failed to generate unique SSRC after %d attempts\n", SSRC_GENERATOR_MAX_ATTEMPTS); + return candidate; +} + /** * Generate SDP offer according to the codec parameters, DTLS and ICE information. * @@ -594,10 +605,11 @@ static int parse_codec(AVFormatContext *s) */ static int generate_sdp_offer(AVFormatContext *s) { - int ret = 0, profile, level, profile_iop; + int ret = 0, profile, level; const char *acodec_name = NULL, *vcodec_name = NULL; AVBPrint bp; WHIPContext *whip = s->priv_data; + int is_dtls_active = whip->flags & WHIP_FLAG_DTLS_ACTIVE; /* To prevent a crash during cleanup, always initialize it. */ av_bprint_init(&bp, 1, MAX_SDP_SIZE); @@ -614,12 +626,22 @@ static int generate_sdp_offer(AVFormatContext *s) av_lfg_get(&whip->rnd), av_lfg_get(&whip->rnd), av_lfg_get(&whip->rnd), av_lfg_get(&whip->rnd)); - whip->audio_ssrc = av_lfg_get(&whip->rnd); - whip->video_ssrc = av_lfg_get(&whip->rnd); + whip->audio_ssrc = generate_unique_ssrc(whip); + whip->video_ssrc = generate_unique_ssrc(whip); whip->audio_payload_type = WHIP_RTP_PAYLOAD_TYPE_OPUS; whip->video_payload_type = WHIP_RTP_PAYLOAD_TYPE_H264; + /* RTX and NACK init */ + whip->rtx_payload_type = WHIP_RTP_PAYLOAD_TYPE_RTX; + whip->video_rtx_ssrc = generate_unique_ssrc(whip); + whip->rtx_seq = 0; + whip->hist_head = 0; + whip->history_size = FFMAX(64, whip->history_size); + whip->history = av_calloc(whip->history_size, sizeof(*whip->history)); + if (!whip->history) + return AVERROR(ENOMEM); + av_bprintf(&bp, "" "v=0\r\n" "o=FFmpeg %s 2 IN IP4 %s\r\n" @@ -641,7 +663,7 @@ static int generate_sdp_offer(AVFormatContext *s) "a=ice-ufrag:%s\r\n" "a=ice-pwd:%s\r\n" "a=fingerprint:sha-256 %s\r\n" - "a=setup:passive\r\n" + "a=setup:%s\r\n" "a=mid:0\r\n" "a=sendonly\r\n" "a=msid:FFmpeg audio\r\n" @@ -653,6 +675,7 @@ static int generate_sdp_offer(AVFormatContext *s) whip->ice_ufrag_local, whip->ice_pwd_local, whip->dtls_fingerprint, + is_dtls_active ? "active" : "passive", whip->audio_payload_type, acodec_name, whip->audio_par->sample_rate, @@ -662,21 +685,20 @@ static int generate_sdp_offer(AVFormatContext *s) } if (whip->video_par) { - profile_iop = profile = whip->video_par->profile; + profile = whip->video_par->profile; level = whip->video_par->level; if (whip->video_par->codec_id == AV_CODEC_ID_H264) { vcodec_name = "H264"; - profile_iop &= AV_PROFILE_H264_CONSTRAINED; profile &= (~AV_PROFILE_H264_CONSTRAINED); } av_bprintf(&bp, "" - "m=video 9 UDP/TLS/RTP/SAVPF %u\r\n" + "m=video 9 UDP/TLS/RTP/SAVPF %u %u\r\n" "c=IN IP4 0.0.0.0\r\n" "a=ice-ufrag:%s\r\n" "a=ice-pwd:%s\r\n" "a=fingerprint:sha-256 %s\r\n" - "a=setup:passive\r\n" + "a=setup:%s\r\n" "a=mid:1\r\n" "a=sendonly\r\n" "a=msid:FFmpeg video\r\n" @@ -684,20 +706,36 @@ static int generate_sdp_offer(AVFormatContext *s) "a=rtcp-rsize\r\n" "a=rtpmap:%u %s/90000\r\n" "a=fmtp:%u level-asymmetry-allowed=1;packetization-mode=1;profile-level-id=%02x%02x%02x\r\n" + "a=rtcp-fb:%u nack\r\n" + "a=rtpmap:%u rtx/90000\r\n" + "a=fmtp:%u apt=%u\r\n" + "a=ssrc-group:FID %u %u\r\n" + "a=ssrc:%u cname:FFmpeg\r\n" + "a=ssrc:%u msid:FFmpeg video\r\n" "a=ssrc:%u cname:FFmpeg\r\n" "a=ssrc:%u msid:FFmpeg video\r\n", whip->video_payload_type, + whip->rtx_payload_type, whip->ice_ufrag_local, whip->ice_pwd_local, whip->dtls_fingerprint, + is_dtls_active ? "active" : "passive", whip->video_payload_type, vcodec_name, whip->video_payload_type, profile, - profile_iop, + whip->constraint_set_flags, level, + whip->video_payload_type, + whip->rtx_payload_type, + whip->rtx_payload_type, + whip->video_payload_type, + whip->video_ssrc, + whip->video_rtx_ssrc, + whip->video_ssrc, whip->video_ssrc, - whip->video_ssrc); + whip->video_rtx_ssrc, + whip->video_rtx_ssrc); } if (!av_bprint_is_complete(&bp)) { @@ -714,7 +752,7 @@ static int generate_sdp_offer(AVFormatContext *s) if (whip->state < WHIP_STATE_OFFER) whip->state = WHIP_STATE_OFFER; - whip->whip_offer_time = av_gettime(); + whip->whip_offer_time = av_gettime_relative(); av_log(whip, AV_LOG_VERBOSE, "Generated state=%d, offer: %s\n", whip->state, whip->sdp_offer); end: @@ -867,6 +905,8 @@ static int parse_answer(AVFormatContext *s) for (i = 0; !avio_feof(pb); i++) { ff_get_chomp_line(pb, line, sizeof(line)); + if (av_strstart(line, "a=ice-lite", &ptr)) + whip->is_peer_ice_lite = 1; if (av_strstart(line, "a=ice-ufrag:", &ptr) && !whip->ice_ufrag_remote) { whip->ice_ufrag_remote = av_strdup(ptr); if (!whip->ice_ufrag_remote) { @@ -884,6 +924,9 @@ static int parse_answer(AVFormatContext *s) if (ptr && av_stristr(ptr, "host")) { char protocol[17], host[129]; int priority, port; +#if HAVE_STRUCT_SOCKADDR_IN6 + struct in6_addr addr6; +#endif ret = sscanf(ptr, "%16s %d %128s %d typ host", protocol, &priority, host, &port); if (ret != 4) { av_log(whip, AV_LOG_ERROR, "Failed %d to parse line %d %s from %s\n", @@ -891,7 +934,12 @@ static int parse_answer(AVFormatContext *s) ret = AVERROR(EIO); goto end; } - +#if HAVE_STRUCT_SOCKADDR_IN6 + if (whip->flags & WHIP_FLAG_IGNORE_IPV6 && inet_pton(AF_INET6, host, &addr6) == INET_PTON_OK) { + av_log(whip, AV_LOG_VERBOSE, "Ignoring IPv6 ICE candidates %s, line %d %s \n", host, i, line); + continue; + } +#endif if (av_strcasecmp(protocol, "udp")) { av_log(whip, AV_LOG_ERROR, "Protocol %s is not supported by RTC, choose udp, line %d %s of %s\n", protocol, i, line, whip->sdp_answer); @@ -930,10 +978,10 @@ static int parse_answer(AVFormatContext *s) if (whip->state < WHIP_STATE_NEGOTIATED) whip->state = WHIP_STATE_NEGOTIATED; - whip->whip_answer_time = av_gettime(); + whip->whip_answer_time = av_gettime_relative(); av_log(whip, AV_LOG_VERBOSE, "SDP state=%d, offer=%zuB, answer=%zuB, ufrag=%s, pwd=%zuB, transport=%s://%s:%d, elapsed=%dms\n", whip->state, strlen(whip->sdp_offer), strlen(whip->sdp_answer), whip->ice_ufrag_remote, strlen(whip->ice_pwd_remote), - whip->ice_protocol, whip->ice_host, whip->ice_port, ELAPSED(whip->whip_starttime, av_gettime())); + whip->ice_protocol, whip->ice_host, whip->ice_port, ELAPSED(whip->whip_starttime, av_gettime_relative())); end: avio_context_free(&pb); @@ -1127,6 +1175,11 @@ static int ice_is_binding_response(uint8_t *b, int size) * see https://www.rfc-editor.org/rfc/rfc3550#section-5.1 * The RTCP packet header is similar to RTP, * see https://www.rfc-editor.org/rfc/rfc3550#section-6.4.1 + * + * The smallest RTCP packet empty RR packet is 8 bytes, + * see https://www.rfc-editor.org/rfc/rfc3550#section-6.4.2 + * Just ignore that case because we can't parse any useful info from empty RR. + * So set the size >= WHIP_RTP_HEADER_SIZE(12 bytes) covers all rtp and rtcp case. */ static int media_is_rtp_rtcp(const uint8_t *b, int size) { @@ -1136,7 +1189,7 @@ static int media_is_rtp_rtcp(const uint8_t *b, int size) /* Whether the packet is RTCP. */ static int media_is_rtcp(const uint8_t *b, int size) { - return size >= WHIP_RTP_HEADER_SIZE && b[1] >= WHIP_RTCP_PT_START && b[1] <= WHIP_RTCP_PT_END; + return size >= WHIP_RTCP_HEADER_SIZE && b[1] >= WHIP_RTCP_PT_START && b[1] <= WHIP_RTCP_PT_END; } /** @@ -1211,22 +1264,21 @@ static int udp_connect(AVFormatContext *s) if (whip->state < WHIP_STATE_UDP_CONNECTED) whip->state = WHIP_STATE_UDP_CONNECTED; - whip->whip_udp_time = av_gettime(); + whip->whip_udp_time = av_gettime_relative(); av_log(whip, AV_LOG_VERBOSE, "UDP state=%d, elapsed=%dms, connected to udp://%s:%d\n", - whip->state, ELAPSED(whip->whip_starttime, av_gettime()), whip->ice_host, whip->ice_port); + whip->state, ELAPSED(whip->whip_starttime, av_gettime_relative()), whip->ice_host, whip->ice_port); end: av_dict_free(&opts); return ret; } -static int ice_dtls_handshake(AVFormatContext *s) +static int handle_ice_handshake(AVFormatContext *s) { int ret = 0, size, i; - int64_t starttime = av_gettime(), now; + int64_t starttime = av_gettime_relative(), now; WHIPContext *whip = s->priv_data; - AVDictionary *opts = NULL; - char buf[256], *cert_buf = NULL, *key_buf = NULL; + int is_dtls_active = whip->flags & WHIP_FLAG_DTLS_ACTIVE; if (whip->state < WHIP_STATE_UDP_CONNECTED || !whip->udp) { av_log(whip, AV_LOG_ERROR, "UDP not connected, state=%d, udp=%p\n", whip->state, whip->udp); @@ -1248,25 +1300,20 @@ static int ice_dtls_handshake(AVFormatContext *s) goto end; } - if (whip->state < WHIP_STATE_ICE_CONNECTING) - whip->state = WHIP_STATE_ICE_CONNECTING; + whip->state = WHIP_STATE_ICE_CONNECTING; } next_packet: - if (whip->state >= WHIP_STATE_DTLS_FINISHED) - /* DTLS handshake is done, exit the loop. */ - break; - - now = av_gettime(); + now = av_gettime_relative(); if (now - starttime >= whip->handshake_timeout * 1000) { - av_log(whip, AV_LOG_ERROR, "DTLS handshake timeout=%dms, cost=%dms, elapsed=%dms, state=%d\n", + av_log(whip, AV_LOG_ERROR, "ICE handshake timeout=%dms, cost=%dms, elapsed=%dms, state=%d\n", whip->handshake_timeout, ELAPSED(starttime, now), ELAPSED(whip->whip_starttime, now), whip->state); ret = AVERROR(ETIMEDOUT); goto end; } - /* Read the STUN or DTLS messages from peer. */ - for (i = 0; i < ICE_DTLS_READ_INTERVAL / 5 && whip->state < WHIP_STATE_DTLS_CONNECTING; i++) { + /* Read the STUN or DTLS client hello from peer. */ + for (i = 0; i < ICE_DTLS_READ_INTERVAL / 5; i++) { ret = ffurl_read(whip->udp, whip->buf, sizeof(whip->buf)); if (ret > 0) break; @@ -1274,69 +1321,87 @@ static int ice_dtls_handshake(AVFormatContext *s) av_usleep(5 * 1000); continue; } + if (is_dtls_active) + break; av_log(whip, AV_LOG_ERROR, "Failed to read message\n"); goto end; } - /* Got nothing, continue to process handshake. */ - if (ret <= 0 && whip->state < WHIP_STATE_DTLS_CONNECTING) - continue; - /* Handle the ICE binding response. */ if (ice_is_binding_response(whip->buf, ret)) { - if (whip->state < WHIP_STATE_ICE_CONNECTED) { + if (whip->is_peer_ice_lite) { whip->state = WHIP_STATE_ICE_CONNECTED; - whip->whip_ice_time = av_gettime(); - av_log(whip, AV_LOG_VERBOSE, "ICE STUN ok, state=%d, url=udp://%s:%d, location=%s, username=%s:%s, res=%dB, elapsed=%dms\n", - whip->state, whip->ice_host, whip->ice_port, whip->whip_resource_url ? whip->whip_resource_url : "", - whip->ice_ufrag_remote, whip->ice_ufrag_local, ret, ELAPSED(whip->whip_starttime, av_gettime())); - - ff_url_join(buf, sizeof(buf), "dtls", NULL, whip->ice_host, whip->ice_port, NULL); - av_dict_set_int(&opts, "mtu", whip->pkt_size, 0); - if (whip->cert_file) { - av_dict_set(&opts, "cert_file", whip->cert_file, 0); - } else - av_dict_set(&opts, "cert_pem", whip->cert_buf, 0); - - if (whip->key_file) { - av_dict_set(&opts, "key_file", whip->key_file, 0); - } else - av_dict_set(&opts, "key_pem", whip->key_buf, 0); - av_dict_set_int(&opts, "external_sock", 1, 0); - av_dict_set_int(&opts, "listen", 1, 0); - /* If got the first binding response, start DTLS handshake. */ - ret = ffurl_open_whitelist(&whip->dtls_uc, buf, AVIO_FLAG_READ_WRITE, &s->interrupt_callback, - &opts, s->protocol_whitelist, s->protocol_blacklist, NULL); - av_dict_free(&opts); - if (ret < 0) - goto end; - dtls_initialize(s); + whip->whip_ice_time = av_gettime_relative(); } goto next_packet; } - /* When a binding request is received, it is necessary to respond immediately. */ + /* See RFC8445, Triggered check when the peer is ice full mode */ if (ice_is_binding_request(whip->buf, ret)) { if ((ret = ice_handle_binding_request(s, whip->buf, ret)) < 0) goto end; goto next_packet; } - /* If got any DTLS messages, handle it. */ - if (is_dtls_packet(whip->buf, ret) && whip->state >= WHIP_STATE_ICE_CONNECTED || whip->state == WHIP_STATE_DTLS_CONNECTING) { - whip->state = WHIP_STATE_DTLS_CONNECTING; - if ((ret = ffurl_handshake(whip->dtls_uc)) < 0) - goto end; - dtls_context_on_state(s, NULL, NULL); - goto next_packet; + if (is_dtls_packet(whip->buf, ret) || whip->flags & WHIP_FLAG_DTLS_ACTIVE) { + if (whip->state < WHIP_STATE_ICE_CONNECTED) { + whip->state = WHIP_STATE_ICE_CONNECTED; + whip->whip_ice_time = av_gettime_relative(); + } + ret = 0; + av_log(whip, AV_LOG_VERBOSE, "ICE STUN ok, state=%d, url=udp://%s:%d, location=%s, username=%s:%s, res=%dB, elapsed=%dms\n", + whip->state, whip->ice_host, whip->ice_port, whip->whip_resource_url ? whip->whip_resource_url : "", + whip->ice_ufrag_remote, whip->ice_ufrag_local, ret, ELAPSED(whip->whip_starttime, whip->whip_ice_time)); + break; } } +end: + return ret; +} + +static int handle_dtls_handshake(AVFormatContext *s) +{ + int ret = 0; + WHIPContext *whip = s->priv_data; + AVDictionary *opts = NULL; + char buf[256]; + + ff_url_join(buf, sizeof(buf), "dtls", NULL, whip->ice_host, whip->ice_port, NULL); + av_dict_set_int(&opts, "mtu", whip->pkt_size, 0); + if (whip->cert_file) { + av_dict_set(&opts, "cert_file", whip->cert_file, 0); + } else + av_dict_set(&opts, "cert_pem", whip->cert_buf, 0); + + if (whip->key_file) { + av_dict_set(&opts, "key_file", whip->key_file, 0); + } else + av_dict_set(&opts, "key_pem", whip->key_buf, 0); + av_dict_set_int(&opts, "external_sock", 1, 0); + av_dict_set_int(&opts, "use_srtp", 1, 0); + av_dict_set_int(&opts, "listen", whip->flags & WHIP_FLAG_DTLS_ACTIVE ? 0 : 1, 0); + + ret = ffurl_open_whitelist(&whip->dtls_uc, buf, AVIO_FLAG_READ_WRITE, &s->interrupt_callback, + &opts, s->protocol_whitelist, s->protocol_blacklist, NULL); + av_dict_free(&opts); + if (ret < 0) + goto end; + + /* reuse the udp created by whip */ + ff_tls_set_external_socket(whip->dtls_uc, whip->udp); + ret = ffurl_handshake(whip->dtls_uc); + if (ret < 0) { + whip->state = WHIP_STATE_FAILED; + av_log(whip, AV_LOG_VERBOSE, "DTLS session failed\n"); + } + if (!ret) { + whip->state = WHIP_STATE_DTLS_FINISHED; + whip->whip_dtls_time = av_gettime_relative(); + av_log(whip, AV_LOG_VERBOSE, "DTLS handshake is done, elapsed=%dms\n", + ELAPSED(whip->whip_starttime, whip->whip_dtls_time)); + } end: - if (cert_buf) - av_free(cert_buf); - if (key_buf) - av_free(key_buf); return ret; } @@ -1361,6 +1426,8 @@ static int setup_srtp(AVFormatContext *s) */ const char* suite = "SRTP_AES128_CM_HMAC_SHA1_80"; WHIPContext *whip = s->priv_data; + int is_dtls_active = whip->flags & WHIP_FLAG_DTLS_ACTIVE; + ret = ff_dtls_export_materials(whip->dtls_uc, whip->dtls_srtp_materials, sizeof(whip->dtls_srtp_materials)); if (ret < 0) goto end; @@ -1375,13 +1442,11 @@ static int setup_srtp(AVFormatContext *s) char *client_salt = server_key + DTLS_SRTP_KEY_LEN; char *server_salt = client_salt + DTLS_SRTP_SALT_LEN; - /* As DTLS server, the recv key is client master key plus salt. */ - memcpy(recv_key, client_key, DTLS_SRTP_KEY_LEN); - memcpy(recv_key + DTLS_SRTP_KEY_LEN, client_salt, DTLS_SRTP_SALT_LEN); + memcpy(is_dtls_active ? send_key : recv_key, client_key, DTLS_SRTP_KEY_LEN); + memcpy(is_dtls_active ? send_key + DTLS_SRTP_KEY_LEN : recv_key + DTLS_SRTP_KEY_LEN, client_salt, DTLS_SRTP_SALT_LEN); - /* As DTLS server, the send key is server master key plus salt. */ - memcpy(send_key, server_key, DTLS_SRTP_KEY_LEN); - memcpy(send_key + DTLS_SRTP_KEY_LEN, server_salt, DTLS_SRTP_SALT_LEN); + memcpy(is_dtls_active ? recv_key : send_key, server_key, DTLS_SRTP_KEY_LEN); + memcpy(is_dtls_active ? recv_key + DTLS_SRTP_KEY_LEN : send_key + DTLS_SRTP_KEY_LEN, server_salt, DTLS_SRTP_SALT_LEN); /* Setup SRTP context for outgoing packets */ if (!av_base64_encode(buf, sizeof(buf), send_key, sizeof(send_key))) { @@ -1402,6 +1467,12 @@ static int setup_srtp(AVFormatContext *s) goto end; } + ret = ff_srtp_set_crypto(&whip->srtp_video_rtx_send, suite, buf); + if (ret < 0) { + av_log(whip, AV_LOG_ERROR, "Failed to set crypto for video rtx send\n"); + goto end; + } + ret = ff_srtp_set_crypto(&whip->srtp_rtcp_send, suite, buf); if (ret < 0) { av_log(whip, AV_LOG_ERROR, "Failed to set crypto for rtcp send\n"); @@ -1423,14 +1494,41 @@ static int setup_srtp(AVFormatContext *s) if (whip->state < WHIP_STATE_SRTP_FINISHED) whip->state = WHIP_STATE_SRTP_FINISHED; - whip->whip_srtp_time = av_gettime(); + whip->whip_srtp_time = av_gettime_relative(); av_log(whip, AV_LOG_VERBOSE, "SRTP setup done, state=%d, suite=%s, key=%zuB, elapsed=%dms\n", - whip->state, suite, sizeof(send_key), ELAPSED(whip->whip_starttime, av_gettime())); + whip->state, suite, sizeof(send_key), ELAPSED(whip->whip_starttime, av_gettime_relative())); end: return ret; } + +static int rtp_history_store(WHIPContext *whip, const uint8_t *buf, int size) +{ + uint16_t seq = AV_RB16(buf + 2); + uint32_t pos = ((uint32_t)seq - (uint32_t)whip->video_first_seq) % whip->history_size; + RtpHistoryItem *it = &whip->history[pos]; + /* free older entry */ + av_free(it->buf); + it->buf = av_memdup(buf, size); + if (!it->buf) + return AVERROR(ENOMEM); + it->size = size; + it->seq = seq; + + whip->hist_head = ++pos; + return 0; +} + +static const RtpHistoryItem *rtp_history_find(WHIPContext *whip, uint16_t seq) +{ + uint32_t pos = ((uint32_t)seq - (uint32_t)whip->video_first_seq) % whip->history_size; + const RtpHistoryItem *it = &whip->history[pos]; + if (it->seq == seq) + return it; + return NULL; +} + /** * Callback triggered by the RTP muxer when it creates and sends out an RTP packet. * @@ -1440,7 +1538,7 @@ static int setup_srtp(AVFormatContext *s) */ static int on_rtp_write_packet(void *opaque, const uint8_t *buf, int buf_size) { - int ret, cipher_size, is_rtcp, is_video; + int ret, cipher_size, is_rtcp, is_video = 0, is_audio = 0; uint8_t payload_type; AVFormatContext *s = opaque; WHIPContext *whip = s->priv_data; @@ -1452,10 +1550,13 @@ static int on_rtp_write_packet(void *opaque, const uint8_t *buf, int buf_size) /* Only support audio, video and rtcp. */ is_rtcp = media_is_rtcp(buf, buf_size); - payload_type = buf[1] & 0x7f; - is_video = payload_type == whip->video_payload_type; - if (!is_rtcp && payload_type != whip->video_payload_type && payload_type != whip->audio_payload_type) - return 0; + if (!is_rtcp) { + payload_type = buf[1] & 0x7f; + is_video = payload_type == whip->video_payload_type; + is_audio = payload_type == whip->audio_payload_type; + if (!is_video && !is_audio) + return 0; + } /* Get the corresponding SRTP context. */ srtp = is_rtcp ? &whip->srtp_rtcp_send : (is_video? &whip->srtp_video_send : &whip->srtp_audio_send); @@ -1467,6 +1568,12 @@ static int on_rtp_write_packet(void *opaque, const uint8_t *buf, int buf_size) return 0; } + /* Store only ORIGINAL video packets (non-RTX, non-RTCP) */ + if (is_video) { + ret = rtp_history_store(whip, buf, buf_size); + if (ret < 0) return ret; + } + ret = ffurl_write(whip->udp, whip->buf, cipher_size); if (ret < 0) { av_log(whip, AV_LOG_ERROR, "Failed to write packet=%dB, ret=%d\n", cipher_size, ret); @@ -1476,6 +1583,67 @@ static int on_rtp_write_packet(void *opaque, const uint8_t *buf, int buf_size) return ret; } +/** + * See https://datatracker.ietf.org/doc/html/rfc4588 + * Send RTX packet according to a sequence number parsed from NACK packet + */ +static void handle_rtx_packet(AVFormatContext *s, int seq) +{ + int ret = -1; + WHIPContext *whip = s->priv_data; + int new_size, cipher_size; + uint8_t *orig_buf, out_buf[MAX_UDP_BUFFER_SIZE]; + int orig_size; + const RtpHistoryItem *it = rtp_history_find(whip, seq); + int latest_seq = whip->history[(whip->hist_head - 1 + whip->history_size) % whip->history_size].seq; + + if (!it) { + av_log(whip, AV_LOG_WARNING, + "RTX: Packet seq=%d not found, latest seq=%d, latest rtx seq=%d\n", + seq, latest_seq, whip->rtx_seq); + return; + } + av_log(whip, AV_LOG_VERBOSE, + "RTX: Resending packet, size=%d, seq=%d, lateset stored packet seq=%d, latest rtx seq=%d\n", + it->size, seq, latest_seq, whip->rtx_seq); + + orig_buf = it->buf; + orig_size = it->size; + + /* new buffer: header + 2 + payload */ + if (orig_size + 2 > sizeof(whip->buf)) { + av_log(whip, AV_LOG_WARNING, "RTX packet is too large, size=%d\n", orig_size); + goto end; + } + + memcpy(whip->buf, orig_buf, orig_size); + + uint8_t *hdr = whip->buf; + uint16_t orig_seq = AV_RB16(hdr + 2); + + /* rewrite header */ + hdr[1] = (hdr[1] & 0x80) | whip->rtx_payload_type; /* keep M bit */ + AV_WB16(hdr + 2, whip->rtx_seq++); + AV_WB32(hdr + 8, whip->video_rtx_ssrc); + + /* shift payload 2 bytes */ + memmove(hdr + 12 + 2, hdr + 12, orig_size - 12); + AV_WB16(hdr + 12, orig_seq); + + new_size = orig_size + 2; + + /* Encrypt by SRTP and send out. */ + cipher_size = ff_srtp_encrypt(&whip->srtp_video_rtx_send, whip->buf, new_size, out_buf, sizeof(out_buf)); + if (cipher_size <= 0) { + av_log(whip, AV_LOG_WARNING, "Failed to encrypt packet=%dB, cipher=%dB\n", new_size, cipher_size); + goto end; + } + ret = ffurl_write(whip->udp, out_buf, cipher_size); +end: + if (ret < 0) + av_log(whip, AV_LOG_WARNING, "Failed to send RTX packet, try to next one\n"); +} + /** * Creates dedicated RTP muxers for each stream in the AVFormatContext to build RTP * packets from the encoded frames. @@ -1493,7 +1661,6 @@ static int create_rtp_muxer(AVFormatContext *s) AVFormatContext *rtp_ctx = NULL; AVDictionary *opts = NULL; uint8_t *buffer = NULL; - char buf[64]; WHIPContext *whip = s->priv_data; whip->udp->flags |= AVIO_FLAG_NONBLOCK; @@ -1560,10 +1727,9 @@ static int create_rtp_muxer(AVFormatContext *s) rtp_ctx->pb->av_class = &ff_avio_class; is_video = s->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO; - snprintf(buf, sizeof(buf), "%d", is_video? whip->video_payload_type : whip->audio_payload_type); - av_dict_set(&opts, "payload_type", buf, 0); - snprintf(buf, sizeof(buf), "%d", is_video? whip->video_ssrc : whip->audio_ssrc); - av_dict_set(&opts, "ssrc", buf, 0); + av_dict_set_int(&opts, "payload_type", is_video ? whip->video_payload_type : whip->audio_payload_type, 0); + av_dict_set_int(&opts, "ssrc", is_video ? whip->video_ssrc : whip->audio_ssrc, 0); + av_dict_set_int(&opts, "seq", is_video ? whip->video_first_seq : whip->audio_first_seq, 0); ret = avformat_write_header(rtp_ctx, &opts); if (ret < 0) { @@ -1581,7 +1747,7 @@ static int create_rtp_muxer(AVFormatContext *s) whip->state = WHIP_STATE_READY; av_log(whip, AV_LOG_INFO, "Muxer state=%d, buffer_size=%d, max_packet_size=%d, " "elapsed=%dms(init:%d,offer:%d,answer:%d,udp:%d,ice:%d,dtls:%d,srtp:%d)\n", - whip->state, buffer_size, max_packet_size, ELAPSED(whip->whip_starttime, av_gettime()), + whip->state, buffer_size, max_packet_size, ELAPSED(whip->whip_starttime, av_gettime_relative()), ELAPSED(whip->whip_starttime, whip->whip_init_time), ELAPSED(whip->whip_init_time, whip->whip_offer_time), ELAPSED(whip->whip_offer_time, whip->whip_answer_time), @@ -1760,7 +1926,10 @@ static av_cold int whip_init(AVFormatContext *s) if ((ret = udp_connect(s)) < 0) goto end; - if ((ret = ice_dtls_handshake(s)) < 0) + if ((ret = handle_ice_handshake(s)) < 0) + goto end; + + if ((ret = handle_dtls_handshake(s)) < 0) goto end; if ((ret = setup_srtp(s)) < 0) @@ -1770,13 +1939,73 @@ static av_cold int whip_init(AVFormatContext *s) goto end; end: - if (ret < 0 && whip->state < WHIP_STATE_FAILED) + if (ret < 0) whip->state = WHIP_STATE_FAILED; - if (ret >= 0 && whip->state >= WHIP_STATE_FAILED && whip->dtls_ret < 0) - ret = whip->dtls_ret; return ret; } +static void process_nack_rtx(AVFormatContext *s, int size) +{ + int i = 0, ret; + WHIPContext *whip = s->priv_data; + uint8_t *buf = NULL; + int rtcp_len, srtcp_len, header_len = 12; + + if (whip->flags & WHIP_FLAG_DISABLE_RTX) + return; + /** + * Refer to RFC 3550, Section 6.4.1. + * The length of this RTCP packet in 32-bit words minus one, + * including the header and any padding. + */ + rtcp_len = (AV_RB16(&whip->buf[2]) + 1) * 4; + if (rtcp_len <= header_len) { + av_log(whip, AV_LOG_WARNING, "NACK packet(SRTCP) is broken, rtcp_len: %d\n", rtcp_len); + goto error; + } + /* SRTCP index(4 bytes) + HMAC (SRTP_AES128_CM_SHA1_80 10bytes) */ + srtcp_len = rtcp_len + 4 + 10; + if (srtcp_len != size) { + av_log(whip, AV_LOG_WARNING, "NACK packet(SRTCP) size not match, srtcp_len: %d, size: %d\n", srtcp_len, size); + goto error; + } + buf = av_memdup(whip->buf, srtcp_len); + if (!buf) + goto error; + ret = ff_srtp_decrypt(&whip->srtp_recv, buf, &srtcp_len); + if (ret < 0) { + av_log(whip, AV_LOG_WARNING, "NACK packet(SRTCP) decrypt failed: %d\n", ret); + goto error; + } + while (header_len + i + 4 <= rtcp_len) { + /** + * See https://datatracker.ietf.org/doc/html/rfc4585#section-6.1 + * Handle multi NACKs in bundled packet. + */ + uint16_t pid = AV_RB16(&buf[12 + i]); + uint16_t blp = AV_RB16(&buf[14 + i]); + + handle_rtx_packet(s, pid); + /* retransmit pid + any bit set in blp */ + for (int bit = 0; bit < 16; bit++) { + uint16_t seq = pid + bit + 1; + if (!blp) break; + if (!(blp & (1 << bit))) + continue; + + handle_rtx_packet(s, seq); + } + i = i + 4; + } + goto end; +error: + av_log(whip, AV_LOG_WARNING, "Skip to handle NACK and RTX\n"); +end: + if (buf) + av_freep(&buf); + return; +} + static int whip_write_packet(AVFormatContext *s, AVPacket *pkt) { int ret; @@ -1791,18 +2020,35 @@ static int whip_write_packet(AVFormatContext *s, AVPacket *pkt) * and RTCP like PLI requests, then respond to them. */ ret = ffurl_read(whip->udp, whip->buf, sizeof(whip->buf)); - if (ret > 0) { - if (is_dtls_packet(whip->buf, ret)) { - if ((ret = ffurl_write(whip->dtls_uc, whip->buf, ret)) < 0) { - av_log(whip, AV_LOG_ERROR, "Failed to handle DTLS message\n"); - goto end; - } - } - } else if (ret != AVERROR(EAGAIN)) { - av_log(whip, AV_LOG_ERROR, "Failed to read from UDP socket\n"); + if (ret < 0) { + if (ret == AVERROR(EAGAIN)) + goto write_packet; + goto end; + } + if (!ret) goto end; + if (is_dtls_packet(whip->buf, ret)) { + if ((ret = ffurl_write(whip->dtls_uc, whip->buf, ret)) < 0) { + av_log(whip, AV_LOG_ERROR, "Failed to handle DTLS message\n"); + goto end; + } } + if (media_is_rtcp(whip->buf, ret)) { + uint8_t pt = whip->buf[1]; + uint8_t fmt = (whip->buf[0] & 0x1f); + /** + * Handle RTCP NACK + * Refer to RFC 4585, Section 6.2.1 + * The Generic NACK message is identified by PT=RTPFB and FMT=1. + * TODO: disable retransmisstion when "-tune zerolatency" + */ + if (pt != RTCP_RTPFB) + goto write_packet; + if (fmt == 1) + process_nack_rtx(s, ret); + } +write_packet: if (whip->h264_annexb_insert_sps_pps && st->codecpar->codec_id == AV_CODEC_ID_H264) { if ((ret = h264_annexb_insert_sps_pps(s, pkt)) < 0) { av_log(whip, AV_LOG_ERROR, "Failed to insert SPS/PPS before IDR\n"); @@ -1821,12 +2067,8 @@ static int whip_write_packet(AVFormatContext *s, AVPacket *pkt) } end: - if (ret < 0 && whip->state < WHIP_STATE_FAILED) + if (ret < 0) whip->state = WHIP_STATE_FAILED; - if (ret >= 0 && whip->state >= WHIP_STATE_FAILED && whip->dtls_ret < 0) - ret = whip->dtls_ret; - if (ret >= 0 && whip->dtls_closed) - ret = AVERROR(EIO); return ret; } @@ -1868,10 +2110,15 @@ static av_cold void whip_deinit(AVFormatContext *s) av_freep(&whip->key_file); ff_srtp_free(&whip->srtp_audio_send); ff_srtp_free(&whip->srtp_video_send); + ff_srtp_free(&whip->srtp_video_rtx_send); ff_srtp_free(&whip->srtp_rtcp_send); ff_srtp_free(&whip->srtp_recv); ffurl_close(whip->dtls_uc); ffurl_closep(&whip->udp); + for (i = 0; i < whip->history_size; i++) { + av_freep(&whip->history[i].buf); + } + av_freep(&whip->history); } static int whip_check_bitstream(AVFormatContext *s, AVStream *st, const AVPacket *pkt) @@ -1896,11 +2143,16 @@ static int whip_check_bitstream(AVFormatContext *s, AVStream *st, const AVPacket #define OFFSET(x) offsetof(WHIPContext, x) #define ENC AV_OPT_FLAG_ENCODING_PARAM static const AVOption options[] = { - { "handshake_timeout", "Timeout in milliseconds for ICE and DTLS handshake.", OFFSET(handshake_timeout), AV_OPT_TYPE_INT, { .i64 = 5000 }, -1, INT_MAX, ENC }, - { "pkt_size", "The maximum size, in bytes, of RTP packets that send out", OFFSET(pkt_size), AV_OPT_TYPE_INT, { .i64 = 1200 }, -1, INT_MAX, ENC }, - { "authorization", "The optional Bearer token for WHIP Authorization", OFFSET(authorization), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, ENC }, - { "cert_file", "The optional certificate file path for DTLS", OFFSET(cert_file), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, ENC }, - { "key_file", "The optional private key file path for DTLS", OFFSET(key_file), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, ENC }, + { "handshake_timeout", "Timeout in milliseconds for ICE and DTLS handshake.", OFFSET(handshake_timeout), AV_OPT_TYPE_INT, { .i64 = 5000 }, -1, INT_MAX, ENC }, + { "pkt_size", "The maximum size, in bytes, of RTP packets that send out", OFFSET(pkt_size), AV_OPT_TYPE_INT, { .i64 = 1200 }, -1, INT_MAX, ENC }, + { "authorization", "Optional Bearer token for WHIP Authorization", OFFSET(authorization), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, ENC }, + { "cert_file", "Optional certificate file path for DTLS", OFFSET(cert_file), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, ENC }, + { "key_file", "Optional private key file path for DTLS", OFFSET(key_file), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, ENC }, + { "whip_flags", "Set flags affecting WHIP connection behavior", OFFSET(flags), AV_OPT_TYPE_FLAGS, { .i64 = 0 }, 0, 0, ENC, .unit = "flags" }, + { "ignore_ipv6", "Ignore any IPv6 ICE candidate", 0, AV_OPT_TYPE_CONST, { .i64 = WHIP_FLAG_IGNORE_IPV6 }, 0, UINT_MAX, ENC, .unit = "flags" }, + { "disable_rtx", "Disable RFC 4588 RTX", 0, AV_OPT_TYPE_CONST, { .i64 = WHIP_FLAG_DISABLE_RTX }, 0, UINT_MAX, ENC, .unit = "flags" }, + { "dtls_active", "Set dtls role as active", 0, AV_OPT_TYPE_CONST, { .i64 = WHIP_FLAG_DTLS_ACTIVE }, 0, UINT_MAX, ENC, .unit = "flags" }, + { "rtx_history_size", "Packet history size", OFFSET(history_size), AV_OPT_TYPE_INT, { .i64 = HISTORY_SIZE_DEFAULT }, 64, 2048, ENC }, { NULL }, };