| rfc9957.original.xml | rfc9957.xml | |||
|---|---|---|---|---|
| <?xml version='1.0' encoding='utf-8'?> | <?xml version='1.0' encoding='UTF-8'?> | |||
| <!DOCTYPE rfc [ | <!DOCTYPE rfc [ | |||
| <!ENTITY nbsp " "> | <!ENTITY nbsp " "> | |||
| <!ENTITY reg "®"> | ||||
| <!ENTITY zwsp "​"> | <!ENTITY zwsp "​"> | |||
| <!ENTITY nbhy "‑"> | <!ENTITY nbhy "‑"> | |||
| <!ENTITY wj "⁠"> | <!ENTITY wj "⁠"> | |||
| ]> | ]> | |||
| <!-- This template is for creating an Internet Draft using xml2rfc, | ||||
| which is available here: http://xml.resource.org. --> | ||||
| <?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?> | ||||
| <!-- used by XSLT processors --> | ||||
| <!-- For a complete list and description of processing instructions (PIs), | ||||
| please see http://xml.resource.org/authoring/README.html. --> | ||||
| <!-- Below are generally applicable Processing Instructions (PIs) that most I-Ds | ||||
| might want to use. | ||||
| (Here they are set differently than their defaults in xml2rfc v1.32) --> | ||||
| <?rfc strict="yes" ?> | ||||
| <!-- give errors regarding ID-nits and DTD validation --> | ||||
| <!-- control the table of contents (ToC) --> | ||||
| <?rfc toc="yes"?> | ||||
| <!-- generate a ToC --> | ||||
| <?rfc tocdepth="4"?> | ||||
| <!-- the number of levels of subsections in ToC. default: 3 --> | ||||
| <!-- control references --> | ||||
| <?rfc symrefs="yes"?> | ||||
| <!-- use symbolic references tags, i.e, [RFC2119] instead of [1] --> | ||||
| <?rfc sortrefs="yes" ?> | ||||
| <!-- sort the reference entries alphabetically --> | ||||
| <!-- control vertical white space | ||||
| (using these PIs as follows is recommended by the RFC Editor) --> | ||||
| <?rfc compact="yes" ?> | ||||
| <!-- do not start each main section on a new page --> | ||||
| <?rfc subcompact="no" ?> | ||||
| <!-- keep one blank line between list items --> | ||||
| <!-- end of list of popular I-D processing instructions --> | ||||
| <rfc xmlns:xi="http://www.w3.org/2001/XInclude" category="info" docName="draft-b | ||||
| riscoe-docsis-q-protection-07" ipr="trust200902" obsoletes="" updates="" submiss | ||||
| ionType="IETF" xml:lang="en" tocInclude="true" tocDepth="4" symRefs="true" sortR | ||||
| efs="true" version="3"> | ||||
| <!-- xml2rfc v2v3 conversion 3.18.2 --> | ||||
| <!-- category values: std, bcp, info, exp, and historic | ||||
| ipr values: trust200902, noModificationTrust200902, noDerivativesTrust200902 | ||||
| , | ||||
| or pre5378Trust200902 | ||||
| you can add the attributes updates="NNNN" and obsoletes="NNNN" | ||||
| they will automatically be output with "(if approved)" --> | ||||
| <!-- ***** FRONT MATTER ***** --> | <rfc xmlns:xi="http://www.w3.org/2001/XInclude" category="info" docName="draft-b riscoe-docsis-q-protection-07" number="9957" ipr="trust200902" obsoletes="" upda tes="" submissionType="independent" xml:lang="en" tocInclude="true" tocDepth="4" symRefs="true" sortRefs="true" version="3"> | |||
| <front> | <front> | |||
| <!-- The abbreviated title is used in the page header - it is only necessary | <!--[rfced] May the "®" be removed from the title? | |||
| if the | We note that previous RFCs with DOCSIS in the title do not use this. | |||
| full title is longer than 39 characters --> | Also, on this topic, the Chicago Manual of Style says that it is | |||
| not necessary in "publications that are not advertising or sales materials". | ||||
| <title abbrev="Queue Protection to Preserve Low Latency">The DOCSIS® | Original: The DOCSIS® Queue Protection Algorithm to Preserve Low Latency | |||
| Suggested: The DOCSIS Queue Protection Algorithm to Preserve Low Latency | ||||
| --> | ||||
| <title abbrev="Queue Protection to Preserve Low Latency">The DOCSIS® | ||||
| Queue Protection Algorithm to Preserve Low Latency</title> | Queue Protection Algorithm to Preserve Low Latency</title> | |||
| <seriesInfo name="Internet-Draft" value="draft-briscoe-docsis-q-protection-0 7"/> | <seriesInfo name="RFC" value="9957"/> | |||
| <author fullname="Bob Briscoe" initials="B." role="editor" surname="Briscoe" > | <author fullname="Bob Briscoe" initials="B." role="editor" surname="Briscoe" > | |||
| <organization>Independent</organization> | <organization>Independent</organization> | |||
| <address> | <address> | |||
| <postal> | <postal> | |||
| <street/> | <country>United Kingdom</country> | |||
| <country>UK</country> | ||||
| </postal> | </postal> | |||
| <email>ietf@bobbriscoe.net</email> | <email>ietf@bobbriscoe.net</email> | |||
| <uri>http://bobbriscoe.net/</uri> | <uri>https://bobbriscoe.net/</uri> | |||
| </address> | </address> | |||
| </author> | </author> | |||
| <author fullname="Greg White" initials="G." surname="White"> | <author fullname="Greg White" initials="G." surname="White"> | |||
| <organization>CableLabs</organization> | <organization>CableLabs</organization> | |||
| <address> | <address> | |||
| <postal> | <postal> | |||
| <street/> | <country>United States of America</country> | |||
| <country>US</country> | ||||
| </postal> | </postal> | |||
| <email>G.White@CableLabs.com</email> | <email>G.White@CableLabs.com</email> | |||
| </address> | </address> | |||
| </author> | </author> | |||
| <date month="" year=""/> | <date month="April" year="2026"/> | |||
| <area>WIT</area> | ||||
| <workgroup>tsvwg</workgroup> | ||||
| <keyword>Independent Submission Stream</keyword> | <keyword>Independent Submission Stream</keyword> | |||
| <keyword>ISE</keyword> | <keyword>ISE</keyword> | |||
| <keyword>Latency</keyword> | <keyword>Latency</keyword> | |||
| <keyword>Policing</keyword> | <keyword>Policing</keyword> | |||
| <abstract> | <abstract> | |||
| <t>This informational document explains the specification of the queue | <t>This Informational RFC explains the specification of the queue | |||
| protection algorithm used in DOCSIS technology since version 3.1. A | protection algorithm used in Data-Over-Cable Service Interface Specificati | |||
| shared low latency queue relies on the non-queue-building behaviour of | on (DOCSIS) technology since version 3.1. A | |||
| shared low-latency queue relies on the non-queue-building behavior of | ||||
| every traffic flow using it. However, some flows might not take such | every traffic flow using it. However, some flows might not take such | |||
| care, either accidentally or maliciously. If a queue is about to exceed | care, either accidentally or maliciously. If a queue is about to exceed | |||
| a threshold level of delay, the queue protection algorithm can rapidly | a threshold level of delay, the queue protection algorithm can rapidly | |||
| detect the flows most likely to be responsible. It can then prevent harm | detect the flows most likely to be responsible. It can then prevent harm | |||
| to other traffic in the low latency queue by ejecting selected packets | to other traffic in the low-latency queue by ejecting selected packets | |||
| (or all packets) of these flows. The document is designed for four types | (or all packets) of these flows. This document is designed for four audien | |||
| of audience: a) congestion control designers who need to understand how | ces: a) congestion control designers who need to understand how | |||
| to keep on the 'good' side of the algorithm; b) implementers of the | to keep on the "good" side of the algorithm; b) implementers of the | |||
| algorithm who want to understand it in more depth; c) designers of | algorithm who want to understand it in more depth; c) designers of | |||
| algorithms with similar goals, perhaps for non-DOCSIS scenarios; and d) | algorithms with similar goals, perhaps for non-DOCSIS scenarios; and | |||
| researchers interested in evaluating the algorithm.</t> | d) researchers interested in evaluating the algorithm.</t> | |||
| </abstract> | </abstract> | |||
| </front> | </front> | |||
| <middle> | <middle> | |||
| <section anchor="qp_intro" numbered="true" toc="default"> | <section anchor="qp_intro" numbered="true" toc="default"> | |||
| <name>Introduction</name> | <name>Introduction</name> | |||
| <t>This informational document explains the specification of the queue | ||||
| <t>This Informational RFC explains the specification of the queue | ||||
| protection (QProt) algorithm used in DOCSIS technology since version 3.1 | protection (QProt) algorithm used in DOCSIS technology since version 3.1 | |||
| <xref target="DOCSIS" format="default"/>.</t> | <xref target="DOCSIS" format="default"/>.</t> | |||
| <t>Although the algorithm is defined in annex P of <xref target="DOCSIS" f | <t>Although the algorithm is defined in Annex P of <xref target="DOCSIS" f | |||
| ormat="default"/>, it relies on cross-references to other parts of the | ormat="default"/>, it relies on cross references to other parts of the | |||
| set of specs. This document pulls all the strands together into one | set of specifications. This document pulls all the strands together into o | |||
| ne | ||||
| self-contained document. The core of the document is a similar | self-contained document. The core of the document is a similar | |||
| pseudocode walk-through to that in the DOCSIS spec, but it also includes | pseudocode walk-through to that in the DOCSIS specification, but it also i | |||
| additional material: i) a brief overview; ii) a definition of how a data | ncludes | |||
| sender needs to behave to avoid triggering queue protection; and iii) a | additional material:</t> | |||
| section giving the rationale for the design choices.</t> | <ol spacing="normal" type="i"> | |||
| <li>a brief overview,</li> | ||||
| <li>a definition of how a data | ||||
| sender needs to behave to avoid triggering queue protection, and</li> | ||||
| <li>a | ||||
| section giving the rationale for the design choices.</li></ol> | ||||
| <t>Low queuing delay depends on hosts sending their data smoothly, | <t>Low queuing delay depends on hosts sending their data smoothly, | |||
| either at low rate or responding to explicit congestion notifications | either at a low rate or responding to Explicit Congestion Notifications | |||
| (ECN <xref target="RFC8311" format="default"/>, <xref target="RFC9331" for | (ECNs) (see <xref target="RFC8311" format="default"/> and <xref target="RF | |||
| mat="default"/>). So low queuing | C9331" format="default"/>). So, low-latency queuing | |||
| latency is something hosts create themselves, not something the network | is something hosts create themselves, not something the network | |||
| gives them. This tends to ensure that self-interest alone does not drive | gives them. This tends to ensure that self-interest alone does not drive | |||
| flows to mis-mark their packets for the low latency queue. However, | flows to mis-mark their packets for the low-latency queue. However, | |||
| traffic from an application that does not behave in a non-queue-building | traffic from an application that does not behave in a non-queue-building | |||
| way might erroneously be classified into a low latency queue, whether | way might erroneously be classified into a low-latency queue, whether | |||
| accidentally or maliciously. QProt protects other traffic in the low | accidentally or maliciously. QProt protects other traffic in the low-laten | |||
| latency queue from the harm due to excess queuing that would otherwise | cy queue from the harm due to excess queuing that would otherwise | |||
| be caused by such anomalous behaviour.</t> | be caused by such anomalous behavior.</t> | |||
| <t>In normal scenarios without misclassified traffic, QProt is not | <t>In normal scenarios without misclassified traffic, QProt is not | |||
| expected to intervene at all in the classification or forwarding of | expected to intervene at all in the classification or forwarding of | |||
| packets.</t> | packets.</t> | |||
| <t>An overview of how low latency support has been added to DOCSIS | <t>An overview of how low-latency support has been added to DOCSIS | |||
| technology is given in <xref target="LLD" format="default"/>. In each dire ction of a | technology is given in <xref target="LLD" format="default"/>. In each dire ction of a | |||
| DOCSIS link (upstream and downstream), there are two queues: one for Low | DOCSIS link (upstream and downstream), there are two queues: one for Low-L | |||
| Latency (LL) and one for Classic traffic, in an arrangement similar to | atency (LL) and one for Classic traffic, in an arrangement similar to | |||
| the IETF's Coupled DualQ AQM <xref target="RFC9332" format="default"/>. Th | the IETF's Coupled DualQ Active Queue Management (AQM) <xref target="RFC93 | |||
| e two queues | 32" format="default"/>. The two queues | |||
| enable a transition from 'Classic' to 'Scalable' congestion control so | enable a transition from "Classic" to "Scalable" congestion control so | |||
| that low latency can become the norm for any application, including ones | that low latency can become the norm for any application, including ones | |||
| seeking both full throughput and low latency, not just low-rate | seeking both full throughput and low latency, not just low-rate | |||
| applications that have been more traditionally associated with a low | applications that have been more traditionally associated with a low-laten | |||
| latency service. The Classic queue is only necessary for traffic such as | cy service. | |||
| <!--[rfced] We note that the companion document RFC-to-be 9956 | ||||
| (draft-ietf-tsvwg-nab-33) cites more information for Reno | ||||
| and Cubic. Should citations be added here as well for | ||||
| the ease of the reader? | ||||
| Original: | ||||
| The Classic queue is only necessary for traffic such as traditional | ||||
| (Reno/Cubic) TCP that needs about a round trip of buffering to fully | ||||
| utilize the link, and therefore has no incentive to mismark itself as | ||||
| low latency. | ||||
| Perhaps: | ||||
| The Classic queue is only necessary for traffic such as traditional | ||||
| (Reno [RFC5681] / Cubic [RFC9438) TCP that needs about a round trip of | ||||
| buffering to fully utilize the link; therefore, this traffic has no | ||||
| incentive to mismark itself as low latency. | ||||
| --> | ||||
| The Classic queue is only necessary for traffic such as | ||||
| traditional (Reno/Cubic) TCP that needs about a round trip of buffering | traditional (Reno/Cubic) TCP that needs about a round trip of buffering | |||
| to fully utilize the link, and therefore has no incentive to mismark | to fully utilize the link, and therefore has no incentive to mismark | |||
| itself as low latency. The QProt function is located at the ingress to | itself as low latency. The QProt function is located at the ingress to | |||
| the Low Latency queue. Therefore, in the upstream QProt is located on | the Low-Latency queue. Therefore, in the upstream, QProt is located on | |||
| the cable modem (CM), and in the downstream it is located on the cable | the cable modem (CM); in the downstream, it is located on the | |||
| CMTS (CM Termination System). If an arriving packet triggers queue | CM Termination System (CMTS). If an arriving packet triggers queue | |||
| protection, the QProt algorithm ejects the packet from the Low Latency | protection, the QProt algorithm ejects the packet from the Low-Latency | |||
| queue and reclassifies it into the Classic queue.</t> | queue and reclassifies it into the Classic queue.</t> | |||
| <t>If QProt is used in settings other than DOCSIS links, it would be a | <t>If QProt is used in settings other than DOCSIS links, it would be a | |||
| simple matter to detect queue-building flows by using slightly different | simple matter to detect queue-building flows by using slightly different | |||
| conditions, and/or to trigger a different action as a consequence, as | conditions and/or to trigger a different action as a consequence, as | |||
| appropriate for the scenario, e.g., dropping instead of reclassifying | appropriate for the scenario, e.g., dropping instead of reclassifying | |||
| packets or perhaps accumulating a second per-flow score to decide | packets or perhaps accumulating a second per-flow score to decide | |||
| whether to redirect a whole flow rather than just certain packets. Such | whether to redirect a whole flow rather than just certain packets. Such | |||
| work is for future study and out of scope of the present document.</t> | work is for future study and out of scope of the present document.</t> | |||
| <t>The algorithm is based on a rigorous approach to quantifying how much | <t>The QProt algorithm is based on a rigorous approach to quantifying how much | |||
| each flow contributes to congestion, which is used in economics to | each flow contributes to congestion, which is used in economics to | |||
| allocate responsibility for the cost of one party's behaviour on others | allocate responsibility for the cost of one party's behavior on others | |||
| (the economic externality). Another important feature of the approach is | (the economic externality). Another important feature of the approach is | |||
| that the metric used for the queuing score is based on the same variable | that the metric used for the queuing score is based on the same variable | |||
| that determines the level of ECN signalling seen by the sender <xref targe t="RFC8311" format="default"/>, <xref target="RFC9331" format="default"/>. This makes the internal | that determines the level of ECN signalling seen by the sender (see <xref target="RFC8311" format="default"/> and <xref target="RFC9331" format="default"/ >). This makes the internal | |||
| queuing score visible externally as ECN markings. This transparency is | queuing score visible externally as ECN markings. This transparency is | |||
| necessary to be able to objectively state (in <xref target="qp_nec_flow_be haviour" format="default"/>) how a flow can keep on the 'good' side | necessary to be able to objectively state (in <xref target="qp_nec_flow_be havior" format="default"/>) how a flow can keep on the "good" side | |||
| of the algorithm.</t> | of the algorithm.</t> | |||
| <section numbered="true" toc="default"> | <section numbered="true" toc="default"> | |||
| <name>Document Roadmap</name> | <name>Document Roadmap</name> | |||
| <t>The core of the document is the walk-through of the DOCSIS QProt | <t>The core of the document is the walk-through of the DOCSIS QProt | |||
| algorithm's pseudocode in <xref target="qp_walk-through" format="default "/>.</t> | algorithm's pseudocode in <xref target="qp_walk-through" format="default "/>.</t> | |||
| <t>Prior to that, <xref target="qp_approach" format="default"/> summariz es the approach | <t>Prior to that, <xref target="qp_approach" format="default"/> summariz es the approach | |||
| used in the algorithm, then <xref target="qp_nec_flow_behaviour" format= | used in the algorithm. Then, <xref target="qp_nec_flow_behavior" format= | |||
| "default"/> | "default"/> | |||
| considers QProt from the perspective of the end-system, by defining | considers QProt from the perspective of the end-system by defining | |||
| the behaviour that a flow needs to comply with to avoid the QProt | the behavior that a flow needs to comply with to avoid the QProt | |||
| algorithm ejecting its packets from the low latency queue.</t> | algorithm ejecting its packets from the low-latency queue.</t> | |||
| <t><xref target="qp_rationale" format="default"/> gives deeper insight i nto the | <t><xref target="qp_rationale" format="default"/> gives deeper insight i nto the | |||
| principles and rationale behind the algorithm. Then <xref target="qp_lim | principles and rationale behind the algorithm. Then, <xref target="qp_li | |||
| itations" format="default"/> explains the limitations of the approach, | mitations" format="default"/> explains the limitations of the approach. The usu | |||
| followed by the usual closing sections.</t> | al closing sections follow.</t> | |||
| </section> | </section> | |||
| <section anchor="l4sds_Terminology" numbered="true" toc="default"> | <section anchor="l4sds_Terminology" numbered="true" toc="default"> | |||
| <name>Terminology</name> | <name>Terminology</name> | |||
| <t>The normative language for the DOCSIS QProt algorithm is in the | <t>The normative language for the DOCSIS QProt algorithm is in the | |||
| DOCSIS specs <xref target="DOCSIS" format="default"/>, <xref target="DOC | DOCSIS specifications <xref target="DOCSIS" format="default"/>, <xref ta | |||
| SIS-CM-OSS" format="default"/>, | rget="DOCSIS-CM-OSS" format="default"/>, and | |||
| <xref target="DOCSIS-CCAP-OSS" format="default"/> not in this informatio | <xref target="DOCSIS-CCAP-OSS" format="default"/>: not in this Informati | |||
| nal guide. If | onal RFC. If | |||
| there is any inconsistency, the DOCSIS specs take precedence.</t> | there is any inconsistency, the DOCSIS specifications take precedence.</ | |||
| t> | ||||
| <t>The following terms and abbreviations are used:</t> | <t>The following terms and abbreviations are used:</t> | |||
| <dl newline="false" spacing="normal"> | <dl newline="false" spacing="normal"> | |||
| <dt>CM:</dt> | <dt>CM:</dt> | |||
| <dd>Cable Modem</dd> | <dd>Cable Modem</dd> | |||
| <dt>CMTS:</dt> | <dt>CMTS:</dt> | |||
| <dd>CM Termination System</dd> | <dd>CM Termination System</dd> | |||
| <dt>Congestion-rate:</dt> | <dt>Congestion-rate:</dt> | |||
| <dd>The transmission rate of bits or | <dd>The transmission rate of bits or | |||
| bytes contained within packets of a flow that have the CE | bytes contained within packets of a flow that have the CE | |||
| codepoint set in the IP-ECN field <xref target="RFC3168" format="def ault"/> | codepoint set in the IP-ECN field <xref target="RFC3168" format="def ault"/> | |||
| (including IP headers unless specified otherwise). | (including IP headers unless specified otherwise). | |||
| Congestion-bit-rate and congestion-volume were introduced in <xref t arget="RFC7713" format="default"/> and <xref target="RFC6789" format="default"/> .</dd> | Congestion-bit-rate and congestion-volume were introduced in <xref t arget="RFC7713" format="default"/> and <xref target="RFC6789" format="default"/> .</dd> | |||
| <dt>DOCSIS:</dt> | <dt>DOCSIS:</dt> | |||
| <dd>Data Over Cable System Interface | <dd>Data-Over-Cable System Interface | |||
| Specification. "DOCSIS" is a registered trademark of Cable | Specification. "DOCSIS" is a registered trademark of Cable | |||
| Television Laboratories, Inc. ("CableLabs").</dd> | Television Laboratories, Inc. ("CableLabs").</dd> | |||
| <dt>Non-queue-building:</dt> | <dt>Non-queue-building:</dt> | |||
| <dd>A flow that tends not to build a | <dd>A flow that tends not to build a | |||
| queue</dd> | queue.</dd> | |||
| <dt>Queue-building:</dt> | <dt>Queue-building:</dt> | |||
| <dd>A flow that builds a queue. If it is | <dd>A flow that builds a queue. If it is | |||
| classified into the Low Latency queue, it is therefore a candidate | classified into the Low-Latency queue, it is therefore a candidate | |||
| for the queue protection algorithm to detect and sanction.</dd> | for the queue protection algorithm to detect and sanction.</dd> | |||
| <dt>ECN:</dt> | <dt>ECN:</dt> | |||
| <dd>Explicit Congestion Notification</dd> | <dd>Explicit Congestion Notification</dd> | |||
| <dt>QProt:</dt> | <dt>QProt:</dt> | |||
| <dd>The Queue Protection function</dd> | <dd>The Queue Protection function</dd> | |||
| </dl> | </dl> | |||
| </section> | </section> | |||
| <section numbered="true" toc="default"> | <section numbered="true" toc="default"> | |||
| <name>Copyright Material</name> | <name>Copyright Material</name> | |||
| <t>Parts of this document are reproduced from <xref target="DOCSIS" form at="default"/> | <t>Parts of this document are reproduced from <xref target="DOCSIS" form at="default"/> | |||
| with kind permission of the copyright holder, Cable Television | with kind permission of the copyright holder, Cable Television | |||
| Laboratories, Inc. ("CableLabs").</t> | Laboratories, Inc. ("CableLabs").</t> | |||
| </section> | </section> | |||
| </section> | </section> | |||
| <section anchor="qp_approach" numbered="true" toc="default"> | <section anchor="qp_approach" numbered="true" toc="default"> | |||
| <name>Approach - In Brief</name> | <name>Approach (In Brief)</name> | |||
| <t>The algorithm is divided into mechanism and policy. There is only a | <t>The algorithm is divided into mechanism and policy. There is only a | |||
| tiny amount of policy code, but policy might need to be changed in the | tiny amount of policy code, but policy might need to be changed in the | |||
| future. So, where hardware implementation is being considered, it would | future. So, where hardware implementation is being considered, it would | |||
| be advisable to implement the policy aspects in firmware or | be advisable to implement the policy aspects in firmware or | |||
| software:</t> | software:</t> | |||
| <ul spacing="normal"> | <ul spacing="normal"> | |||
| <li> | <li> | |||
| <t>The mechanism aspects identify flows, maintain flow-state and | <t>The mechanism aspects identify flows, maintain flow-state, and | |||
| accumulate per-flow queuing scores;</t> | accumulate per-flow queuing scores;</t> | |||
| </li> | </li> | |||
| <li> | <li> | |||
| <t>The policy aspects can be divided into conditions and | <t>The policy aspects can be divided into conditions and | |||
| actions:</t> | actions:</t> | |||
| <ul spacing="normal"> | <ul spacing="normal"> | |||
| <li> | <li> | |||
| <t>The conditions are the logic that determines when action | <t>The conditions are the logic that determines when action | |||
| should be taken to avert the risk of queuing delay becoming | should be taken to avert the risk of queuing delay becoming | |||
| excessive;</t> | excessive;</t> | |||
| </li> | </li> | |||
| <li> | <li> | |||
| <t>The actions determine how this risk is averted, e.g., by | <t>The actions determine how this risk is averted, e.g., by | |||
| redirecting packets from a flow into another queue, or to | redirecting packets from a flow into another queue or by | |||
| reclassify a whole flow that seems to be misclassified.</t> | reclassifying a whole flow that seems to be misclassified.</t> | |||
| </li> | </li> | |||
| </ul> | </ul> | |||
| </li> | </li> | |||
| </ul> | </ul> | |||
| <section anchor="qp_approach_mechanism" numbered="true" toc="default"> | <section anchor="qp_approach_mechanism" numbered="true" toc="default"> | |||
| <name>Mechanism</name> | <name>Mechanism</name> | |||
| <t>The algorithm maintains per-flow-state, where 'flow' usually means | <t>The algorithm maintains per-flow-state, where "flow" usually means | |||
| an end-to-end (layer-4) 5-tuple. The flow-state consists of a queuing | an end-to-end (Layer 4) 5-tuple. The flow-state consists of a queuing | |||
| score that decays over time. Indeed it is transformed into time units | score that decays over time. Indeed, it is transformed into time units | |||
| so that it represents the flow-state's own expiry time (explained in | so that it represents the flow-state's own expiry time (explained in | |||
| <xref target="qp_rationale_normalize" format="default"/>). A higher queu ing score | <xref target="qp_rationale_normalize" format="default"/>). A higher queu ing score | |||
| pushes out the expiry time further.</t> | pushes out the expiry time further.</t> | |||
| <t>Non-queue-building flows tend to release their flow-state rapidly | <t>Non-queue-building flows tend to release their flow-state rapidly: | |||
| --- it usually expires reasonably early in the gap between the packets | it usually expires reasonably early in the gap between the packets | |||
| of a normal flow. Then the memory can be recycled for packets from | of a normal flow. Then, the memory can be recycled for packets from | |||
| other flows that arrive in between. So only queue-building flows hold | other flows that arrive in-between. So, only queue-building flows hold | |||
| flow state persistently.</t> | flow state persistently.</t> | |||
| <t>The simplicity and effectiveness of the algorithm is due to the | <t>The simplicity and effectiveness of the algorithm is due to the | |||
| definition of the queuing score. The queueing score represents the | definition of the queuing score. The queueing score represents the | |||
| share of blame for queuing that each flow bears. The scoring algorithm | share of blame for queuing that each flow bears. The scoring algorithm | |||
| uses the same internal variable, probNative, that the AQM for the low | uses the same internal variable, probNative, that the AQM for the low-la | |||
| latency queue uses to ECN-mark packets (the other two forms of | tency queue uses to ECN-mark packets. (The other two forms of | |||
| marking, Classic and coupled, are driven by Classic traffic and | marking, Classic and coupled, are driven by Classic traffic; | |||
| therefore not relevant to protection of the LL queue). In this way, | therefore, they are not relevant to protection of the LL queue). In this | |||
| way, | ||||
| the queuing score accumulates the size of each arriving packet of a | the queuing score accumulates the size of each arriving packet of a | |||
| flow, but scaled by the value of probNative (in the range 0 to 1) at | flow but scaled by the value of probNative (in the range 0 to 1) at | |||
| the instant the packet arrives. So a flow's score accumulates faster, | the instant the packet arrives. So, a flow's score accumulates faster:</ | |||
| the higher the degree of queuing and the faster that the flow's | t> | |||
| packets arrive when there is queuing. <xref target="qp_rationale_not_thr | <ul> | |||
| oughput" format="default"/> explains further why this score | <li>the higher the degree of queuing and</li> | |||
| <li>the faster that the flow's | ||||
| packets arrive when there is queuing.</li> | ||||
| </ul> | ||||
| <t><xref target="qp_rationale_not_throughput" format="default"/> explains | ||||
| further why this score | ||||
| represents blame for queuing.</t> | represents blame for queuing.</t> | |||
| <t>The algorithm as described so far would accumulate a number that | <t>The algorithm, as described so far, would accumulate a number that | |||
| would rise at the so-called congestion-rate of the flow (see | would rise at the so-called congestion-rate of the flow (see | |||
| Terminology in <xref target="l4sds_Terminology" format="default"/>), i.e | <xref target="l4sds_Terminology" format="default"/>), i.e., the | |||
| ., the | rate at which the flow is contributing to congestion or the rate at | |||
| rate at which the flow is contributing to congestion, or the rate at | which the AQM is forwarding bytes of the flow that are ECN-marked. | |||
| which the AQM is forwarding bytes of the flow that are ECN marked. | ||||
| However, rather than growing continually, the queuing score is also | However, rather than growing continually, the queuing score is also | |||
| reduced (or 'aged') at a constant rate. This is because it is | reduced (or "aged") at a constant rate. This is because it is | |||
| unavoidable for capacity-seeking flows to induce a continuous low | unavoidable for capacity-seeking flows to induce a continuous low | |||
| level of congestion as they track available capacity. <xref target="qp_r ationale_aging" format="default"/> explains why this allowance can be set | level of congestion as they track available capacity. <xref target="qp_r ationale_aging" format="default"/> explains why this allowance can be set | |||
| to the same constant for any scalable flow, whatever its bit rate.</t> | to the same constant for any scalable flow, whatever its bit rate.</t> | |||
| <t>For implementation efficiency, the queuing score is transformed | <t>For implementation efficiency, the queuing score is transformed | |||
| into time units so that it represents the expiry time of the flow | into time units; this is so that it represents the expiry time of the fl | |||
| state (as already discussed above). Then it does not need to be | ow | |||
| explicitly aged, because the natural passage of time implicitly 'ages' | state (as already discussed above). Then, it does not need to be | |||
| explicitly aged because the natural passage of time implicitly "ages" | ||||
| an expiry time. The transformation into time units simply involves | an expiry time. The transformation into time units simply involves | |||
| dividing the queuing score of each packet by the constant aging rate | dividing the queuing score of each packet by the constant aging rate | |||
| (explained further in <xref target="qp_rationale_normalize" format="defa ult"/>).</t> | (this is explained further in <xref target="qp_rationale_normalize" form at="default"/>).</t> | |||
| </section> | </section> | |||
| <section anchor="qp_approach_policy" numbered="true" toc="default"> | <section anchor="qp_approach_policy" numbered="true" toc="default"> | |||
| <name>Policy</name> | <name>Policy</name> | |||
| <section numbered="true" toc="default"> | <section numbered="true" toc="default"> | |||
| <name>Policy Conditions</name> | <name>Policy Conditions</name> | |||
| <t>The algorithm uses the queuing score to determine whether to | <t>The algorithm uses the queuing score to determine whether to | |||
| eject each packet only at the time it first arrives. This limits the | eject each packet only at the time it first arrives. This limits the | |||
| policies available. For instance, when queueing delay exceeds a | policies available. For instance, when queueing delay exceeds a | |||
| threshold, it is not possible to eject a packet from the flow with | threshold, it is not possible to eject a packet from the flow with | |||
| the highest queuing scoring, because that would involve searching | the highest queuing scoring because that would involve searching | |||
| the queue for such a packet (if indeed one was still in the queue). | the queue for such a packet (if, indeed, one were still in the queue). | |||
| Nonetheless, it is still possible to develop a policy that protects | Nonetheless, it is still possible to develop a policy that protects | |||
| the low latency of the queue by making the queuing score threshold | the low latency of the queue by making the queuing score threshold | |||
| stricter the greater the excess of queuing delay relative to the | stricter the greater the excess of queuing delay relative to the | |||
| threshold (explained in <xref target="qp_rationale_conditions" format= "default"/>).</t> | threshold (this is explained in <xref target="qp_rationale_conditions" format="default"/>).</t> | |||
| </section> | </section> | |||
| <section numbered="true" toc="default"> | <section numbered="true" toc="default"> | |||
| <name>Policy Action</name> | <name>Policy Action</name> | |||
| <t>In the DOCSIS QProt spec at the time of writing, when the policy | <t>At the time of writing, the DOCSIS QProt specification states that | |||
| conditions are met the action taken to protect the low latency queue | when the policy | |||
| is to reclassify a packet into the Classic queue (justified in <xref t | conditions are met, the action taken to protect the low-latency queue | |||
| arget="qp_rationale_reclassify" format="default"/>).</t> | is to reclassify a packet into the Classic queue (this is justified in | |||
| <xref target="qp_rationale_reclassify" format="default"/>).</t> | ||||
| </section> | </section> | |||
| </section> | </section> | |||
| </section> | </section> | |||
| <section anchor="qp_nec_flow_behaviour" numbered="true" toc="default"> | <section anchor="qp_nec_flow_behavior" numbered="true" toc="default"> | |||
| <name>Necessary Flow Behaviour</name> | <name>Necessary Flow Behavior</name> | |||
| <t>The QProt algorithm described here can be used for responsive and/or | <t>The QProt algorithm described here can be used for responsive and/or | |||
| unresponsive flows.</t> | unresponsive flows.</t> | |||
| <ul spacing="normal"> | <ul spacing="normal"> | |||
| <li> | <li> | |||
| <t>It is possible to objectively describe the least responsive way | <t>It is possible to objectively describe the least responsive way | |||
| that a flow will need to respond to congestion signals in order to | that a flow will need to respond to congestion signals in order to | |||
| avoid triggering queue protection, no matter the link capacity and | avoid triggering queue protection, no matter the link capacity and | |||
| no matter how much other traffic there is.</t> | no matter how much other traffic there is.</t> | |||
| </li> | </li> | |||
| <li> | <li> | |||
| <t>It is not possible to describe how fast or smooth an unresponsive | <t>It is not possible to describe how fast or smooth an unresponsive | |||
| flow should be to avoid queue protection, because this depends on | flow should be to avoid queue protection because this depends on | |||
| how much other traffic there is and the capacity of the link, which | how much other traffic there is and the capacity of the link, which | |||
| an application is unable to know. However, the more smoothly an | an application is unable to know. However, the more smoothly an | |||
| unresponsive flow paces its packets and the lower its rate relative | unresponsive flow paces its packets and the lower its rate relative | |||
| to typical broadband link capacities, the less likelihood that it | to typical broadband link capacities, the less likelihood that it | |||
| will risk causing enough queueing to trigger queue protection.</t> | will risk causing enough queueing to trigger queue protection.</t> | |||
| </li> | </li> | |||
| </ul> | </ul> | |||
| <t>Responsive low latency flows can use an L4S ECN codepoint <xref target= "RFC9331" format="default"/> to get classified into the low latency queue.</t> | <t>Responsive low-latency flows can use a Low Latency, Low Loss, and Scala ble throughput (L4S) ECN codepoint <xref target="RFC9331" format="default"/> to get classified into the low-latency queue.</t> | |||
| <t>A sender can arrange for flows that are smooth but do not respond to | <t>A sender can arrange for flows that are smooth but do not respond to | |||
| ECN marking to be classified into the low latency queue by using the | ECN marking to be classified into the low-latency queue by using the | |||
| Non-Queue-Building (NQB) Diffserv codepoint <xref target="I-D.ietf-tsvwg-n | Non-Queue-Building (NQB) Diffserv codepoint <xref target="RFC9956" format= | |||
| qb" format="default"/>, which the DOCSIS specs support, or an | "default"/>, which the DOCSIS specifications support, or an | |||
| operator can use various other local classifiers.</t> | operator can use various other local classifiers.</t> | |||
| <t>As already explained in <xref target="qp_approach_mechanism" format="de fault"/>, the | <t>As already explained in <xref target="qp_approach_mechanism" format="de fault"/>, the | |||
| QProt algorithm is driven from the same variable that drives the ECN | QProt algorithm is driven from the same variable that drives the ECN-marki | |||
| marking probability in the low latency or 'LL' queue (the 'Native' AQM | ng probability in the low-latency or "LL" queue (the "Native" AQM | |||
| of the LL queue is defined in the Immediate Active Queue Management | of the LL queue is defined in the Immediate Active Queue Management | |||
| Annex of <xref target="DOCSIS" format="default"/>). The algorithm that cal culates this | Annex of <xref target="DOCSIS" format="default"/>). The algorithm that cal culates this | |||
| internal variable is run on the arrival of every packet, whether it is | internal variable is run on the arrival of every packet, whether or not it | |||
| ECN-capable or not, so that it can be used by the QProt algorithm. But | is | |||
| ECN-capable, so that it can be used by the QProt algorithm. But | ||||
| the variable is only used to ECN-mark packets that are ECN-capable.</t> | the variable is only used to ECN-mark packets that are ECN-capable.</t> | |||
| <t>Not only does this dual use of the variable improve processing | <t>Not only does this dual use of the variable improve processing | |||
| efficiency, but it also makes the basis of the QProt algorithm visible | efficiency, but it also makes the basis of the QProt algorithm visible | |||
| and transparent, at least for responsive ECN-capable flows. Then it is | and transparent, at least for responsive ECN-capable flows. Then, it is | |||
| possible to state objectively that a flow can avoid triggering queue | possible to state objectively that a flow can avoid triggering queue | |||
| protection by keeping the bit rate of ECN marked packets (the | protection by keeping the bit rate of ECN-marked packets (the | |||
| congestion-rate) below AGING, which is a configured constant of the | congestion-rate) below AGING, which is a configured constant of the | |||
| algorithm (default 2^19 B/s ~= 4 Mb/s). Note that it is in a congestion | algorithm (default 2^19 B/s ~= 4 Mb/s). Note that it is in a congestion | |||
| controller's own interest to keep its average congestion-rate well below | controller's own interest to keep its average congestion-rate well below | |||
| this level (e.g., ~1 Mb/s), to ensure that it does not trigger queue | this level (e.g., ~1 Mb/s) to ensure that it does not trigger queue | |||
| protection during transient dynamics.</t> | protection during transient dynamics.</t> | |||
| <t>If the QProt algorithm is used in other settings, it would still need | <t>If the QProt algorithm is used in other settings, it would still need | |||
| to be based on the visible level of congestion signalling, in a similar | to be based on the visible level of congestion signalling, in a similar | |||
| way to the DOCSIS approach. Without transparency of the basis of the | way to the DOCSIS approach. Without transparency of the basis of the | |||
| algorithm's decisions, end-systems would not be able to avoid triggering | algorithm's decisions, end-systems would not be able to avoid triggering | |||
| queue protection on an objective basis.</t> | queue protection on an objective basis.</t> | |||
| </section> | </section> | |||
| <section anchor="qp_walk-through" numbered="true" toc="default"> | <section anchor="qp_walk-through" numbered="true" toc="default"> | |||
| <name>Pseudocode Walk-Through</name> | <name>Pseudocode Walk-Through</name> | |||
| <t/> | ||||
| <section anchor="qp_header_file" numbered="true" toc="default"> | <section anchor="qp_header_file" numbered="true" toc="default"> | |||
| <name>Input Parameters, Constants and Variables</name> | <name>Input Parameters, Constants, and Variables</name> | |||
| <t>The operator input parameters that set the parameters in the first | <t>The operator input parameters that set the parameters in the first | |||
| two blocks of pseudocode below are defined for cable modems (CMs) in | two blocks of pseudocode below are defined for cable modems (CMs) in | |||
| <xref target="DOCSIS-CM-OSS" format="default"/> and for CMTSs in <xref t arget="DOCSIS-CCAP-OSS" format="default"/>. Then, further constants are either d erived | <xref target="DOCSIS-CM-OSS" format="default"/> and for CMTSs in <xref t arget="DOCSIS-CCAP-OSS" format="default"/>. Then, further constants are either d erived | |||
| from the input parameters or hard-coded.</t> | from the input parameters or hard-coded.</t> | |||
| <t>Defaults and units are shown in square brackets. Defaults (or | <t>Defaults and units are shown in square brackets. Defaults (or | |||
| indeed any aspect of the algorithm) are subject to change, so the | indeed any aspect of the algorithm) are subject to change, so the | |||
| latest DOCSIS specs are the definitive references. Also any operator | latest DOCSIS specifications are the definitive references. Also, any op erator | |||
| might set certain parameters to non-default values.</t> | might set certain parameters to non-default values.</t> | |||
| <sourcecode name="" type="" markers="true"><![CDATA[ | <!--[rfced] FYI, "us" has been updated to "µs" in three instances | |||
| where it follows numerals in comments in the pseudocode. This is | ||||
| in keeping with using µs for microseconds in RFC-to-be 9956. | ||||
| Original: | ||||
| 4000us | ||||
| 1000us | ||||
| 525 us | ||||
| Current: | ||||
| 4000 µs | ||||
| 1000 µs | ||||
| 525 µs | ||||
| --> | ||||
| <sourcecode name="" type="pseudocode" markers="true"><![CDATA[ | ||||
| // Input Parameters | // Input Parameters | |||
| MAX_RATE; // Configured maximum sustained rate [b/s] | MAX_RATE; // Configured maximum sustained rate [b/s] | |||
| QPROTECT_ON; // Queue Protection is enabled [Default: TRUE] | QPROTECT_ON; // Queue Protection is enabled [Default: TRUE] | |||
| CRITICALqL_us; // LL queue threshold delay [us] Default: MAXTH_us | CRITICALqL_us; // LL queue threshold delay [us] Default: MAXTH_us | |||
| CRITICALqLSCORE_us;// The threshold queuing score [Default: 4000us] | CRITICALqLSCORE_us;// The threshold queuing score [Default: 4000 µs] | |||
| LG_AGING; // The aging rate of the q'ing score [Default: 19] | LG_AGING; // The aging rate of the q'ing score [Default: 19] | |||
| // as log base 2 of the congestion-rate [lg(B/s)] | // as log base 2 of the congestion-rate [lg(B/s)] | |||
| // Input Parameters for the calcProbNative() algorithm: | // Input Parameters for the calcProbNative() algorithm: | |||
| MAXTH_us; // Max LL AQM marking threshold [Default: 1000us] | MAXTH_us; // Max LL AQM marking threshold [Default: 1000 µs] | |||
| LG_RANGE; // Log base 2 of the range of ramp [lg(ns)] | LG_RANGE; // Log base 2 of the range of ramp [lg(ns)] | |||
| // Default: 2^19 = 524288 ns (roughly 525 us) | // Default: 2^19 = 524288 ns (roughly 525 µs)]]></sourcecode | |||
| ]]></sourcecode> | > | |||
| <sourcecode name="" type="" markers="true"><![CDATA[ | ||||
| <sourcecode name="" type="pseudocode" markers="true"><![CDATA[ | ||||
| // Constants, either derived from input parameters or hard-coded | // Constants, either derived from input parameters or hard-coded | |||
| T_RES; // Resolution of t_exp [ns] | T_RES; // Resolution of t_exp [ns] | |||
| // Convert units (approx) | // Convert units (approx) | |||
| AGING = pow(2, (LG_AGING-30) ) * T_RES; // lg([B/s]) to [B/T_RES] | AGING = pow(2, (LG_AGING-30) ) * T_RES; // lg([B/s]) to [B/T_RES] | |||
| CRITICALqL = CRITICALqL_us * 1000; // [us] to [ns] | CRITICALqL = CRITICALqL_us * 1000; // [us] to [ns] | |||
| CRITICALqLSCORE = CRITICALqLSCORE_us * 1000/T_RES; // [us] to [T_RES] | CRITICALqLSCORE = CRITICALqLSCORE_us * 1000/T_RES; // [us] to [T_RES] | |||
| // Threshold for the q'ing score condition | // Threshold for the q'ing score condition | |||
| CRITICALqLPRODUCT = CRITICALqL * CRITICALqLSCORE; | CRITICALqLPRODUCT = CRITICALqL * CRITICALqLSCORE; | |||
| qLSCORE_MAX = 5E9 / T_RES; // Max queuing score = 5 s | qLSCORE_MAX = 5E9 / T_RES; // Max queuing score = 5 s | |||
| skipping to change at line 413 ¶ | skipping to change at line 425 ¶ | |||
| EXIT_SANCTION = 1; // Redirect the packet | EXIT_SANCTION = 1; // Redirect the packet | |||
| MAX_PROB = 1; // For integer arithmetic, would use a large int | MAX_PROB = 1; // For integer arithmetic, would use a large int | |||
| // e.g., 2^31, to allow space for overflow | // e.g., 2^31, to allow space for overflow | |||
| MAXTH = MAXTH_us * 1000; // Max marking threshold [ns] | MAXTH = MAXTH_us * 1000; // Max marking threshold [ns] | |||
| MAX_FRAME_SIZE = 2000; // DOCSIS-wide constant [B] | MAX_FRAME_SIZE = 2000; // DOCSIS-wide constant [B] | |||
| // Minimum marking threshold of 2 MTU for slow links [ns] | // Minimum marking threshold of 2 MTU for slow links [ns] | |||
| FLOOR = 2 * 8 * MAX_FRAME_SIZE * 10^9 / MAX_RATE; | FLOOR = 2 * 8 * MAX_FRAME_SIZE * 10^9 / MAX_RATE; | |||
| RANGE = (1 << LG_RANGE); // Range of ramp [ns] | RANGE = (1 << LG_RANGE); // Range of ramp [ns] | |||
| MINTH = max ( MAXTH - RANGE, FLOOR); | MINTH = max ( MAXTH - RANGE, FLOOR); | |||
| MAXTH = MINTH + RANGE; // Max marking threshold [ns] | MAXTH = MINTH + RANGE; // Max marking threshold [ns]]]></sourcecode> | |||
| ]]></sourcecode> | ||||
| <!--[rfced] Please review and rephrase the following sentence with | ||||
| regard to the clause that begins "but in the floating..." as the | ||||
| sentence does not seem to parse as is. | ||||
| Original: | ||||
| The actual DOCSIS | ||||
| QProt algorithm is defined using integer arithmetic, but in the | ||||
| floating point arithmetic used in this document, (0 <= probNative <= 1). | ||||
| Perhaps: | ||||
| The actual DOCSIS | ||||
| QProt algorithm is defined using integer arithmetic, but in the | ||||
| floating-point arithmetic used in this document, | ||||
| the native marking probability is between 0 and 1 (inclusive), | ||||
| i.e., 0 <= probNative <= 1. | ||||
| --> | ||||
| <t>Throughout the pseudocode, most variables are integers. The only | <t>Throughout the pseudocode, most variables are integers. The only | |||
| exceptions are floating point variables representing probabilities | exceptions are floating-point variables representing probabilities | |||
| (MAX_PROB and probNative) and the AGING parameter. The actual DOCSIS | (MAX_PROB and probNative) and the AGING parameter. The actual DOCSIS | |||
| QProt algorithm is defined using integer arithmetic, but in the | QProt algorithm is defined using integer arithmetic, but in the | |||
| floating point arithmetic used in this document, (0 <= probNative | floating-point arithmetic used in this document, (0 <= probNative | |||
| <= 1). Also, the pseudocode omits overflow checking and it would | <= 1). Also, the pseudocode omits overflow checking, and it would | |||
| need to be made robust to non-default input parameters.</t> | need to be made robust to non-default input parameters.</t> | |||
| <t>The resolution for expressing time, T_RES, needs to be chosen to | <t>The resolution for expressing time, T_RES, needs to be chosen to | |||
| ensure that expiry times for buckets can represent times that are a | ensure that expiry times for buckets can represent times that are a | |||
| fraction (e.g., 1/10) of the expected packet interarrival time for the | fraction (e.g., 1/10) of the expected packet interarrival time for the | |||
| system.</t> | system.</t> | |||
| <t>The following definitions explain the purpose of important | <t>The following definitions explain the purpose of important | |||
| variables and functions.</t> | variables and functions.</t> | |||
| <sourcecode name="" type="" markers="true"><![CDATA[ | <sourcecode name="" type="pseudocode" markers="true"><![CDATA[ | |||
| // Public variables: | // Public variables: | |||
| qdelay; // The current queuing delay of the LL queue [ns] | qdelay; // The current queuing delay of the LL queue [ns] | |||
| probNative; // Native marking probability of LL queue within [0,1] | probNative; // Native marking probability of LL queue within [0,1] | |||
| // External variables | // External variables | |||
| packet; // The structure holding packet header fields | packet; // The structure holding packet header fields | |||
| packet.size; // The size of the current packet [B] | packet.size; // The size of the current packet [B] | |||
| packet.uflow; // The flow identifier of the current packet | packet.uflow; // The flow identifier of the current packet | |||
| // (e.g., 5-tuple or 4-tuple if IPSec) | // (e.g., 5-tuple or 4-tuple if IPsec) | |||
| // Irrelevant details of DOCSIS function to return qdelay are removed | // Irrelevant details of DOCSIS function to return qdelay are removed | |||
| qdelayL(...) // Returns current delay of the low latency Q [ns] | qdelayL(...) // Returns current delay of the low-latency Q [ns]]]></sourcec | |||
| ]]></sourcecode> | ode> | |||
| <t>Pseudocode for how the algorithm categorizes packets by flow ID to | <t>Pseudocode for how the algorithm categorizes packets by flow ID to | |||
| populate the variable packet.uflow is not given in detail here. The | populate the variable packet.uflow is not given in detail here. The | |||
| application's flow ID is usually defined by a common 5-tuple (or | application's flow ID is usually defined by a common 5-tuple (or | |||
| 4-tuple) of:</t> | 4-tuple) of:</t> | |||
| <ul spacing="normal"> | <ul spacing="normal"> | |||
| <li> | <li> | |||
| <t>source and destination IP addresses of the innermost IP header | <t>source and destination IP addresses of the innermost IP header | |||
| found;</t> | found;</t> | |||
| </li> | </li> | |||
| <li> | <li> | |||
| <t>the protocol (IPv4) or next header (IPv6) field in this IP | <t>the protocol (IPv4) or next header (IPv6) field in this IP | |||
| header</t> | header</t> | |||
| </li> | </li> | |||
| <li> | <li> | |||
| <t>either of:</t> | <t>either of:</t> | |||
| <ul spacing="normal"> | <ul spacing="normal"> | |||
| <li> | <li> | |||
| <t>source and destination port numbers, for TCP, UDP, | <t>source and destination port numbers, for TCP, UDP, | |||
| UDP-Lite, SCTP, DCCP, etc.</t> | UDP-Lite, Stream Control Transmission Protocol (SCTP), Datagram Congestion Control Protocol (DCCP), etc.</t> | |||
| </li> | </li> | |||
| <li> | <li> | |||
| <t>Security Parameters Index (SPI) for IPSec Encapsulating | <t>Security Parameter Index (SPI) for IPsec Encapsulating | |||
| Security Payload (ESP) <xref target="RFC4303" format="default"/> .</t> | Security Payload (ESP) <xref target="RFC4303" format="default"/> .</t> | |||
| </li> | </li> | |||
| </ul> | </ul> | |||
| </li> | </li> | |||
| </ul> | </ul> | |||
| <t>The Microflow Classification section of the Queue Protection | <t>The Microflow Classification section of the Queue Protection | |||
| Annex of the DOCSIS spec <xref target="DOCSIS" format="default"/> d efines various | Annex of the DOCSIS specification <xref target="DOCSIS" format="default" /> defines various | |||
| strategies to find these headers by skipping extension headers or | strategies to find these headers by skipping extension headers or | |||
| encapsulations. If they cannot be found, the spec defines | encapsulations. If they cannot be found, the specification defines | |||
| various less-specific 3-tuples that would be used. The DOCSIS | various less-specific 3-tuples that would be used. The DOCSIS | |||
| spec should be referred to for all these strategies, which will | specification should be referred to for all these strategies, which will | |||
| not be repeated here.</t> | not be repeated here.</t> | |||
| <t>The array of bucket structures defined below is used by all the | <t>The array of bucket structures defined below is used by all the | |||
| Queue Protection functions:</t> | Queue Protection functions:</t> | |||
| <sourcecode name="" type="" markers="true"><![CDATA[ | <sourcecode name="" type="pseudocode" markers="true"><![CDATA[ | |||
| struct bucket { // The leaky bucket structure to hold per-flow state | struct bucket { // The leaky bucket structure to hold per-flow state | |||
| id; // identifier (e.g., 5-tuple) of flow using bucket | id; // identifier (e.g., 5-tuple) of flow using bucket | |||
| t_exp; // expiry time in units of T_RES | t_exp; // expiry time in units of T_RES | |||
| // (t_exp - now) = flow's transformed q'ing score | // (t_exp - now) = flow's transformed q'ing score | |||
| }; | }; | |||
| struct bucket buckets[NBUCKETS+1]; | struct bucket buckets[NBUCKETS+1];]]></sourcecode> | |||
| ]]></sourcecode> | ||||
| </section> | </section> | |||
| <section anchor="qp_data_path" numbered="true" toc="default"> | <section anchor="qp_data_path" numbered="true" toc="default"> | |||
| <name>Queue Protection Data Path</name> | <name>Queue Protection Data Path</name> | |||
| <t>All the functions of Queue Protection operate on the data path, | <t>All the functions of Queue Protection operate on the data path, | |||
| driven by packet arrivals.</t> | driven by packet arrivals.</t> | |||
| <t>The following functions that maintain per-flow queuing scores and | <t>The following functions that maintain per-flow queuing scores and | |||
| manage per-flow state are considered primarily as mechanism:</t> | manage per-flow state are considered primarily as mechanism:</t> | |||
| <ul empty="true" spacing="normal"> | <ul spacing="normal"> | |||
| <li> | <li> | |||
| <t>pick_bucket(uflow_id); // Returns bucket identifier</t> | <t>pick_bucket(uflow_id); // Returns bucket identifier</t> | |||
| </li> | </li> | |||
| <li> | <li> | |||
| <t>fill_bucket(bucket_id, pkt_size, probNative); // Returns | <t>fill_bucket(bucket_id, pkt_size, probNative); // Returns | |||
| queuing score</t> | queuing score</t> | |||
| </li> | </li> | |||
| <li> | <li> | |||
| <t>calcProbNative(qdelay) // Returns ECN-marking probability of | <t>calcProbNative(qdelay) // Returns ECN-marking probability of | |||
| the native LL AQM</t> | the native LL AQM</t> | |||
| </li> | </li> | |||
| </ul> | </ul> | |||
| <t>The following function is primarily concerned with | <t>The following function is primarily concerned with | |||
| policy:</t> | policy:</t> | |||
| <ul empty="true" spacing="normal"> | <ul spacing="normal"> | |||
| <li> | <li> | |||
| <t>qprotect(packet, ...); // Returns exit status to either forward | <t>qprotect(packet, ...); // Returns exit status to either forward | |||
| or redirect the packet</t> | or redirect the packet</t> | |||
| </li> | </li> | |||
| </ul> | </ul> | |||
| <t>('...' suppresses distracting detail.)</t> | <t>('...' suppresses distracting detail.)</t> | |||
| <t>Future modifications to policy aspects are more likely than to | <t>Future modifications to policy aspects are more likely than modificat ions to | |||
| mechanisms. Therefore, policy aspects would be less appropriate | mechanisms. Therefore, policy aspects would be less appropriate | |||
| candidates for any hardware acceleration.</t> | candidates for any hardware acceleration.</t> | |||
| <t>The entry point to these functions is qprotect(), which is called | <t>The entry point to these functions is qprotect(), which is called | |||
| from packet classification before each packet is enqueued into the | from packet classification before each packet is enqueued into the | |||
| appropriate queue, queue_id, as follows:</t> | appropriate queue, queue_id, as follows:</t> | |||
| <sourcecode name="" type="" markers="true"><![CDATA[ | <sourcecode name="" type="pseudocode" markers="true"><![CDATA[ | |||
| classifier(packet) { | classifier(packet) { | |||
| // Determine which queue using ECN, DSCP and any local-use fields | // Determine which queue using ECN, DSCP, and any local-use fields | |||
| queue_id = classify(packet); | queue_id = classify(packet); | |||
| // LQ & CQ are macros for valid queue IDs returned by classify() | // LQ & CQ are macros for valid queue IDs returned by classify() | |||
| if (queue_id == LQ) { | if (queue_id == LQ) { | |||
| // if packet classified to Low Latency Service Flow | // if packet classified to Low-Latency Service Flow | |||
| if (QPROTECT_ON) { | if (QPROTECT_ON) { | |||
| if (qprotect(packet, ...) == EXIT_SANCTION) { | if (qprotect(packet, ...) == EXIT_SANCTION) { | |||
| // redirect packet to Classic Service Flow | // redirect packet to Classic Service Flow | |||
| queue_id = CQ; | queue_id = CQ; | |||
| } | } | |||
| } | } | |||
| return queue_id; | return queue_id; | |||
| } | }]]></sourcecode> | |||
| ]]></sourcecode> | ||||
| <section anchor="qp_qprotect" numbered="true" toc="default"> | <section anchor="qp_qprotect" numbered="true" toc="default"> | |||
| <name>The qprotect() function</name> | <name>The qprotect() Function</name> | |||
| <t>On each packet arrival at the LL queue, qprotect() measures the | <t>On each packet arrival at the LL queue, qprotect() measures the | |||
| current delay of the LL queue and derives the native LL marking | current delay of the LL queue and derives the native LL marking | |||
| probability from it. Then it uses pick_bucket to find the bucket | probability from it. Then, it uses pick_bucket to find the bucket | |||
| already holding the flow's state, or to allocate a new bucket if the | already holding the flow's state or to allocate a new bucket if the | |||
| flow is new or its state has expired (the most likely case). Then | flow is new or its state has expired (the most likely case). Then, | |||
| the queuing score is updated by the fill_bucket() function. That | the queuing score is updated by the fill_bucket() function. That | |||
| completes the mechanism aspects.</t> | completes the mechanism aspects.</t> | |||
| <t>The comments against the subsequent policy conditions and actions | <t>The comments against the subsequent policy conditions and actions | |||
| should be self-explanatory at a superficial level. The deeper | should be self-explanatory at a superficial level. The deeper | |||
| rationale for these conditions is given in <xref target="qp_rationale_ conditions" format="default"/>.</t> | rationale for these conditions is given in <xref target="qp_rationale_ conditions" format="default"/>.</t> | |||
| <sourcecode name="" type="" markers="true"><![CDATA[ | <sourcecode name="" type="pseudocode" markers="true"><![CDATA[ | |||
| // Per packet queue protection | // Per-packet queue protection | |||
| qprotect(packet, ...) { | qprotect(packet, ...) { | |||
| bckt_id; // bucket index | bckt_id; // bucket index | |||
| qLscore; // queuing score of pkt's flow in units of T_RES | qLscore; // queuing score of pkt's flow in units of T_RES | |||
| qdelay = qL.qdelay(...); | qdelay = qL.qdelay(...); | |||
| probNative = calcProbNative(qdelay); | probNative = calcProbNative(qdelay); | |||
| bckt_id = pick_bucket(packet.uflow); | bckt_id = pick_bucket(packet.uflow); | |||
| qLscore = fill_bucket(buckets[bckt_id], packet.size, probNative); | qLscore = fill_bucket(buckets[bckt_id], packet.size, probNative); | |||
| skipping to change at line 574 ¶ | skipping to change at line 603 ¶ | |||
| // ...and if flow's q'ing score scaled by qdelay/CRITICALqL | // ...and if flow's q'ing score scaled by qdelay/CRITICALqL | |||
| // ...exceeds CRITICALqLSCORE | // ...exceeds CRITICALqLSCORE | |||
| && ( qdelay * qLscore > CRITICALqLPRODUCT ) ) | && ( qdelay * qLscore > CRITICALqLPRODUCT ) ) | |||
| // or qLSCORE_MAX reached | // or qLSCORE_MAX reached | |||
| || ( qLscore >= qLSCORE_MAX ) ) | || ( qLscore >= qLSCORE_MAX ) ) | |||
| return EXIT_SANCTION; | return EXIT_SANCTION; | |||
| else | else | |||
| return EXIT_SUCCESS; | return EXIT_SUCCESS; | |||
| } | }]]></sourcecode> | |||
| ]]></sourcecode> | ||||
| </section> | </section> | |||
| <section anchor="qp_pick_bucket" numbered="true" toc="default"> | <section anchor="qp_pick_bucket" numbered="true" toc="default"> | |||
| <name>The pick_bucket() function</name> | <name>The pick_bucket() Function</name> | |||
| <t>The pick_bucket() function is optimized for flow-state that will | <t>The pick_bucket() function is optimized for flow-state that will | |||
| normally have expired from packet to packet of the same flow. It is | normally have expired from packet to packet of the same flow. It is | |||
| just one way of finding the bucket associated with the flow ID of | just one way of finding the bucket associated with the flow ID of | |||
| each packet - it might be possible to develop more efficient | each packet: it might be possible to develop more efficient | |||
| alternatives.</t> | alternatives.</t> | |||
| <t>The algorithm is arranged so that the bucket holding any live | <t>The algorithm is arranged so that the bucket holding any live | |||
| (non-expired) flow-state associated with a packet will always be | (non-expired) flow-state associated with a packet will always be | |||
| found before a new bucket is allocated. The constant ATTEMPTS, | found before a new bucket is allocated. The constant ATTEMPTS, | |||
| defined earlier, determines how many hashes are used to find a | defined earlier, determines how many hashes are used to find a | |||
| bucket for each flow (actually, only one hash is generated; then, by | bucket for each flow. (Actually, only one hash is generated; then, by | |||
| default, 5 bits of it at a time are used as the hash value, because | default, 5 bits of it at a time are used as the hash value because, | |||
| by default there are 2^5 = 32 buckets).</t> | by default, there are 2^5 = 32 buckets).</t> | |||
| <t>The algorithm stores the flow's own ID in its flow-state. So, | <t>The algorithm stores the flow's own ID in its flow-state. So, | |||
| when a packet of a flow arrives, the algorithm tries up to ATTEMPTS | when a packet of a flow arrives, the algorithm tries up to ATTEMPTS | |||
| times to hash to a bucket, looking for the flow's own ID. If found, | times to hash to a bucket, looking for the flow's own ID. If found, | |||
| it uses that bucket, first resettings the expiry time to 'now' if it | it uses that bucket, first resetting the expiry time to "now" if it | |||
| has expired.</t> | has expired.</t> | |||
| <t>If it does not find the flow's ID, and the expiry time is still | <t>If it does not find the flow's ID, and the expiry time is still | |||
| current, the algorithm can tell that another flow is using that | current, the algorithm can tell that another flow is using that | |||
| bucket, and it continues to look for a bucket for the flow. Even if | bucket, and it continues to look for a bucket for the flow. Even if | |||
| it finds another flow's bucket where the expiry time has passed, it | it finds another flow's bucket where the expiry time has passed, it | |||
| doesn't immediately use it. It merely remembers it as the potential | doesn't immediately use it. It merely remembers it as the potential | |||
| bucket to use. But first it runs through all the ATTEMPTS hashes to | bucket to use. But first it runs through all the ATTEMPTS hashes to | |||
| look for a bucket assigned to the flow ID. Then, if a live bucket is | look for a bucket assigned to the flow ID. Then, if a live bucket is | |||
| not already associated with the packet's flow, the algorithm should | not already associated with the packet's flow, the algorithm should | |||
| have already set aside an existing bucket with a score that has aged | have already set aside an existing bucket with a score that has aged | |||
| skipping to change at line 617 ¶ | skipping to change at line 645 ¶ | |||
| flow.</t> | flow.</t> | |||
| <t>If all else fails, there is one additional bucket (called the | <t>If all else fails, there is one additional bucket (called the | |||
| dregs) that can be used. If the dregs is still in live use by | dregs) that can be used. If the dregs is still in live use by | |||
| another flow, subsequent flows that cannot find a bucket of their | another flow, subsequent flows that cannot find a bucket of their | |||
| own all share it, adding their score to the one in the dregs. A flow | own all share it, adding their score to the one in the dregs. A flow | |||
| might get away with using the dregs on its own, but when there are | might get away with using the dregs on its own, but when there are | |||
| many mis-marked flows, multiple flows are more likely to collide in | many mis-marked flows, multiple flows are more likely to collide in | |||
| the dregs, including innocent flows. The choice of number of buckets | the dregs, including innocent flows. The choice of number of buckets | |||
| and number of hash attempts determines how likely it will be that | and number of hash attempts determines how likely it will be that | |||
| this undesirable scenario will occur.</t> | this undesirable scenario will occur.</t> | |||
| <sourcecode name="" type="" markers="true"><![CDATA[ | <sourcecode name="" type="pseudocode" markers="true"><![CDATA[ | |||
| // Pick the bucket associated with flow uflw | // Pick the bucket associated with flow uflw | |||
| pick_bucket(uflw) { | pick_bucket(uflw) { | |||
| now; // current time | now; // current time | |||
| j; // loop counter | j; // loop counter | |||
| h32; // holds hash of the packet's flow IDs | h32; // holds hash of the packet's flow IDs | |||
| h; // bucket index being checked | h; // bucket index being checked | |||
| hsav; // interim chosen bucket index | hsav; // interim chosen bucket index | |||
| h32 = hash32(uflw); // 32-bit hash of flow ID | h32 = hash32(uflw); // 32-bit hash of flow ID | |||
| hsav = NBUCKETS; // Default bucket | hsav = NBUCKETS; // Default bucket | |||
| now = get_time_now(); // in units of T_RES | now = get_time_now(); // in units of T_RES | |||
| // The for loop checks ATTEMPTS buckets for ownership by flow-ID | // The for loop checks ATTEMPTS buckets for ownership by flow ID | |||
| // It also records the 1st bucket, if any, that could be recycled | // It also records the 1st bucket, if any, that could be recycled | |||
| // because it's expired. | // because it's expired. | |||
| // Must not recycle a bucket until all ownership checks completed | // Must not recycle a bucket until all ownership checks completed | |||
| for (j=0; j<ATTEMPTS; j++) { | for (j=0; j<ATTEMPTS; j++) { | |||
| // Use least signif. BI_SIZE bits of hash for each attempt | // Use least signif. BI_SIZE bits of hash for each attempt | |||
| h = h32 & MASK; | h = h32 & MASK; | |||
| if (buckets[h].id == uflw) { // Once uflw's bucket found... | if (buckets[h].id == uflw) { // Once uflw's bucket found... | |||
| if (buckets[h].t_exp <= now) // ...if bucket has expired... | if (buckets[h].t_exp <= now) // ...if bucket has expired... | |||
| buckets[h].t_exp = now; // ...reset it | buckets[h].t_exp = now; // ...reset it | |||
| return h; // Either way, use it | return h; // Either way, use it | |||
| } | } | |||
| else if ( (hsav == NBUCKETS) // If not seen expired bucket yet | else if ( (hsav == NBUCKETS) // If not seen expired bucket yet | |||
| // and this bucket has expired | // and this bucket has expired | |||
| && (buckets[h].t_exp <= now) ) { | && (buckets[h].t_exp <= now) ) { | |||
| hsav = h; // set it as the interim bucket | hsav = h; // set it as the interim bucket | |||
| } | } | |||
| h32 >>= BI_SIZE; // Bit-shift hash for next attempt | h32 >>= BI_SIZE; // Bit-shift hash for next attempt | |||
| } | } | |||
| // If reached here, no tested bucket was owned by the flow-ID | // If reached here, no tested bucket was owned by the flow ID | |||
| if (hsav != NBUCKETS) { | if (hsav != NBUCKETS) { | |||
| // If here, found an expired bucket within the above for loop | // If here, found an expired bucket within the above for loop | |||
| buckets[hsav].t_exp = now; // Reset expired bucket | buckets[hsav].t_exp = now; // Reset expired bucket | |||
| } else { | } else { | |||
| // If here, we're having to use the default bucket (the dregs) | // If here, we're having to use the default bucket (the dregs) | |||
| if (buckets[hsav].t_exp <= now) { // If dregs has expired... | if (buckets[hsav].t_exp <= now) { // If dregs has expired... | |||
| buckets[hsav].t_exp = now; // ...reset it | buckets[hsav].t_exp = now; // ...reset it | |||
| } | } | |||
| } | } | |||
| buckets[hsav].id = uflw; // In either case, claim for recycling | buckets[hsav].id = uflw; // In either case, claim for recycling | |||
| return hsav; | return hsav; | |||
| } | }]]></sourcecode> | |||
| ]]></sourcecode> | ||||
| </section> | </section> | |||
| <section anchor="qp_fill_bucket" numbered="true" toc="default"> | <section anchor="qp_fill_bucket" numbered="true" toc="default"> | |||
| <name>The fill_bucket() function</name> | <name>The fill_bucket() Function</name> | |||
| <t>The fill_bucket() function both accumulates and ages the queuing | <t>The fill_bucket() function both accumulates and ages the queuing | |||
| score over time, as outlined in <xref target="qp_approach_mechanism" f ormat="default"/>. To make aging the score efficient, | score over time, as outlined in <xref target="qp_approach_mechanism" f ormat="default"/>. To make aging the score efficient, | |||
| the increment of the queuing score is transformed into units of time | the increment of the queuing score is transformed into units of time | |||
| by dividing by AGING, so that the result represents the new expiry | by dividing by AGING so that the result represents the new expiry | |||
| time of the flow.</t> | time of the flow.</t> | |||
| <t>Given that probNative is already used to select which packets to | <t>Given that probNative is already used to select which packets to | |||
| ECN-mark, it might be thought that the queuing score could just be | ECN-mark, it might be thought that the queuing score could just be | |||
| incremented by the full size of each selected packet, instead of | incremented by the full size of each selected packet, instead of | |||
| incrementing it by the product of every packet's size (pkt_sz) and | incrementing it by the product of every packet's size (pkt_sz) and | |||
| probNative. However, the unpublished experience of one of the | probNative. However, the unpublished experience of one of the | |||
| authors with other congestion policers has found that the score then | authors with other congestion policers has found that the score then | |||
| increments far too jumpily, particularly when probNative is low.</t> | increments far too jumpily, particularly when probNative is low.</t> | |||
| <t>A deeper explanation of the queuing score is given in <xref target= "qp_rationale" format="default"/>.</t> | <t>A deeper explanation of the queuing score is given in <xref target= "qp_rationale" format="default"/>.</t> | |||
| <sourcecode name="" type="" markers="true"><![CDATA[ | <sourcecode name="" type="pseudocode" markers="true"><![CDATA[ | |||
| fill_bucket(bckt_id, pkt_sz, probNative) { | fill_bucket(bckt_id, pkt_sz, probNative) { | |||
| now; // current time | now; // current time | |||
| now = get_time_now(); // in units of T_RES | now = get_time_now(); // in units of T_RES | |||
| // Add packet's queuing score | // Add packet's queuing score | |||
| // For integer arithmetic, a bit-shift can replace the division | // For integer arithmetic, a bit-shift can replace the division | |||
| qLscore = min(buckets[bckt_id].t_exp - now | qLscore = min(buckets[bckt_id].t_exp - now | |||
| + probNative * pkt_sz / AGING, qLSCORE_MAX); | + probNative * pkt_sz / AGING, qLSCORE_MAX); | |||
| buckets[bckt_id].t_exp = now + qLscore; | buckets[bckt_id].t_exp = now + qLscore; | |||
| return qLscore; | return qLscore; | |||
| } | }]]></sourcecode> | |||
| ]]></sourcecode> | ||||
| </section> | </section> | |||
| <section anchor="qp_calcProbNative" numbered="true" toc="default"> | <section anchor="qp_calcProbNative" numbered="true" toc="default"> | |||
| <name>The calcProbNative() function</name> | <name>The calcProbNative() Function</name> | |||
| <t>To derive this queuing score, the QProt algorithm uses the linear | <t>To derive this queuing score, the QProt algorithm uses the linear | |||
| ramp function calcProbNative() to normalize instantaneous queuing | ramp function calcProbNative() to normalize instantaneous queuing | |||
| delay of the LL queue into a probability in the range [0,1], which | delay of the LL queue into a probability in the range [0,1], which | |||
| it assigns to probNative.</t> | it assigns to probNative.</t> | |||
| <sourcecode name="" type="" markers="true"><![CDATA[ | <sourcecode name="" type="pseudocode" markers="true"><![CDATA[ | |||
| calcProbNative(qdelay){ | calcProbNative(qdelay){ | |||
| if ( qdelay >= MAXTH ) { | if ( qdelay >= MAXTH ) { | |||
| probNative = MAX_PROB; | probNative = MAX_PROB; | |||
| } else if ( qdelay > MINTH ) { | } else if ( qdelay > MINTH ) { | |||
| probNative = MAX_PROB * (qdelay - MINTH)/RANGE; | probNative = MAX_PROB * (qdelay - MINTH)/RANGE; | |||
| // In practice, the * and the / would use a bit-shift | // In practice, the * and the / would use a bit-shift | |||
| } else { | } else { | |||
| probNative = 0; | probNative = 0; | |||
| } | } | |||
| return probNative; | return probNative; | |||
| } | }]]></sourcecode> | |||
| ]]></sourcecode> | ||||
| </section> | </section> | |||
| </section> | </section> | |||
| </section> | </section> | |||
| <!--[rfced] Section 5 is titled "Rationale". Then there is a | ||||
| difference between the formatting of the title of Section 5.1 | ||||
| (Rationale:) and the other titles. Might we update as follows? | ||||
| Original: | ||||
| 5. Rationale . . . . . . . . . . . . . . . . . . . . . . . . . . 18 | ||||
| 5.1. Rationale: Blame for Queuing, not for Rate in Itself . . 18 | ||||
| 5.2. Rationale for Constant Aging of the Queuing Score . . . . 20 | ||||
| 5.3. Rationale for Transformed Queuing Score . . . . . . . . . 21 | ||||
| 5.4. Rationale for Policy Conditions . . . . . . . . . . . . . 22 | ||||
| 5.5. Rationale for Reclassification as the Policy Action . . . 25 | ||||
| Perhaps A (all colons): | ||||
| 5. Rationale | ||||
| 5.1. Rationale: Blame for Queuing, Not for Rate in Itself | ||||
| 5.2. Rationale: Constant Aging of the Queuing Score | ||||
| 5.3. Rationale: Transformed Queuing Score | ||||
| 5.4. Rationale: Policy Conditions | ||||
| 5.5. Rationale: Reclassification as the Policy Action | ||||
| Perhaps B (all "for"): | ||||
| 5. Rationale | ||||
| 5.1. Rationale for Blame for Queuing, Not for Rate in Itself | ||||
| 5.2. Rationale for Constant Aging of the Queuing Score | ||||
| 5.3. Rationale for Transformed Queuing Score | ||||
| 5.4. Rationale for Policy Conditions | ||||
| 5.5. Rationale for Reclassification as the Policy Action | ||||
| Perhaps C (just removing as they are all subsections of "Rationale"): | ||||
| 5. Rationale | ||||
| 5.1. Blame for Queuing, Not for Rate in Itself | ||||
| 5.2. Constant Aging of the Queuing Score | ||||
| 5.3. Transformed Queuing Score | ||||
| 5.4. Policy Conditions | ||||
| 5.5. Reclassification as the Policy Action | ||||
| --> | ||||
| <section anchor="qp_rationale" numbered="true" toc="default"> | <section anchor="qp_rationale" numbered="true" toc="default"> | |||
| <name>Rationale</name> | <name>Rationale</name> | |||
| <t/> | ||||
| <section anchor="qp_rationale_not_throughput" numbered="true" toc="default "> | <section anchor="qp_rationale_not_throughput" numbered="true" toc="default "> | |||
| <name>Rationale: Blame for Queuing, not for Rate in Itself</name> | <name>Rationale: Blame for Queuing, Not for Rate in Itself</name> | |||
| <t><xref target="qp_fig_blame_cbr_v_burst" format="default"/> shows the bit rates of | <t><xref target="qp_fig_blame_cbr_v_burst" format="default"/> shows the bit rates of | |||
| two flows as stacked areas. It poses the question of which flow is | two flows as stacked areas. It poses the question of which flow is | |||
| more to blame for queuing delay; the unresponsive constant bit rate | more to blame for queuing delay: the unresponsive constant bit rate | |||
| flow (c) that is consuming about 80% of the capacity, or the flow | flow (c) that is consuming about 80% of the capacity or the flow | |||
| sending regular short unresponsive bursts (b)? The smoothness of c | sending regular short unresponsive bursts (b)? The smoothness of c | |||
| seems better for avoiding queuing, but its high rate does not. | seems better for avoiding queuing, but its high rate does not. | |||
| However, if flow c was not there, or ran slightly more slowly, b would | However, if flow c were not there, or ran slightly more slowly, b would | |||
| not cause any queuing.</t> | not cause any queuing.</t> | |||
| <figure anchor="qp_fig_blame_cbr_v_burst"> | <figure anchor="qp_fig_blame_cbr_v_burst"> | |||
| <name>Which is More to Blame for Queuing Delay?</name> | <name>Which is more to blame for queuing delay?</name> | |||
| <artwork name="" type="" align="left" alt=""><![CDATA[^ bit rate (stac | <artwork name="" type="" align="left" alt=""><![CDATA[ | |||
| ked areas) | ^ bit rate (stacked areas) | |||
| | ,-. ,-. ,-. ,-. ,-. | | ,-. ,-. ,-. ,-. ,-. | |||
| |--|b|----------|b|----------|b|----------|b|----------|b|---Capacity | |--|b|----------|b|----------|b|----------|b|----------|b|---Capacity | |||
| |__|_|__________|_|__________|_|__________|_|__________|_|_____ | |__|_|__________|_|__________|_|__________|_|__________|_|_____ | |||
| | | | | |||
| | c | | c | |||
| | | | | |||
| | | | | |||
| | | | | |||
| +----------------------------------------------------------------> | +----------------------------------------------------------------> | |||
| time | time]]></artwork> | |||
| ]]></artwork> | ||||
| </figure> | </figure> | |||
| <t>To explain queuing scores, in the following it will initially be | <t>To explain queuing scores, in the following it will initially be | |||
| assumed that the QProt algorithm is accumulating queuing scores, but | assumed that the QProt algorithm is accumulating queuing scores but | |||
| not taking any action as a result.</t> | not taking any action as a result.</t> | |||
| <t>To quantify the responsibility that each flow bears for queuing | <t>To quantify the responsibility that each flow bears for queuing | |||
| delay, the QProt algorithm accumulates the product of the rate of each | delay, the QProt algorithm accumulates the product of the rate of each | |||
| flow and the level of congestion, both measured at the instant each | flow and the level of congestion, both measured at the instant each | |||
| packet arrives. The instantaneous flow rate is represented at each | packet arrives. The instantaneous flow rate is represented at each | |||
| discrete event when a packet arrives by the packet's size, which | discrete event when a packet arrives by the packet's size, which | |||
| accumulates faster the more packets arrive within each unit of time. | accumulates faster the more packets arrive within each unit of time. | |||
| The level of congestion is normalized to a dimensionless number | The level of congestion is normalized to a dimensionless number | |||
| between 0 and 1 (probNative). This fractional congestion level is used | between 0 and 1 (probNative). This fractional congestion level is used | |||
| in preference to a direct dependence on queuing delay for two | in preference to a direct dependence on queuing delay for two | |||
| skipping to change at line 769 ¶ | skipping to change at line 831 ¶ | |||
| insignificantly to delay</t> | insignificantly to delay</t> | |||
| </li> | </li> | |||
| <li> | <li> | |||
| <t>to be able to erect a steep barrier against excessive queuing | <t>to be able to erect a steep barrier against excessive queuing | |||
| delay</t> | delay</t> | |||
| </li> | </li> | |||
| </ul> | </ul> | |||
| <t>The unit of the resulting queue score is "congested-bytes" | <t>The unit of the resulting queue score is "congested-bytes" | |||
| per second, which distinguishes it from just bytes per second.</t> | per second, which distinguishes it from just bytes per second.</t> | |||
| <t>Then, during the periods between bursts (b), neither flow | <t>Then, during the periods between bursts (b), neither flow | |||
| accumulates any queuing score - the high rate of c is benign. But, | accumulates any queuing score: the high rate of c is benign. But, | |||
| during each burst, if we say the rate of c and b are 80% and 45% of | during each burst, if we say the rate of c and b are 80% and 45% of | |||
| capacity, thus causing 25% overload, they each bear (80/125)% and | capacity, thus causing 25% overload, they each bear (80/125)% and | |||
| (45/125)% of the responsibility for the queuing delay (64% and 36%). | (45/125)% of the responsibility for the queuing delay (64% and 36%). | |||
| The algorithm does not explicitly calculate these percentages. They | The algorithm does not explicitly calculate these percentages. They | |||
| are just the outcome of the number of packets arriving from each flow | are just the outcome of the number of packets arriving from each flow | |||
| during the burst.</t> | during the burst.</t> | |||
| <t>To summarize, the queuing score never sanctions rate solely on its | <t>To summarize, the queuing score never sanctions rate solely on its | |||
| own account. It only sanctions rate inasmuch as it causes queuing.</t> | own account. It only sanctions rate inasmuch as it causes queuing.</t> | |||
| <figure anchor="qp_fig_blame_scenario"> | <figure anchor="qp_fig_blame_scenario"> | |||
| <name>Responsibility for Queuing: More Complex Scenario</name> | <name>Responsibility for Queuing: More-Complex Scenario</name> | |||
| <artwork name="" type="" align="left" alt=""><![CDATA[^ bit rate (stac | <artwork name="" type="" align="left" alt=""><![CDATA[ | |||
| ked areas) , | ^ bit rate (stacked areas) , | |||
| | ,-. |\ ,- | | ,-. |\ ,- | |||
| |------Capacity-|b|----------,-.----------|b|----------|b\----- | |------Capacity-|b|----------,-.----------|b|----------|b\----- | |||
| | __|_|_______ |b| /``\| _...-._-': | ,.-- | | __|_|_______ |b| /``\| _...-._-': | ,.-- | |||
| | ,-. __/ \__|_|_ _/ |/ \|/ | | ,-. __/ \__|_|_ _/ |/ \|/ | |||
| | |b| ___/ \___/ __ r | | |b| ___/ \___/ __ r | |||
| | |_|/ v \__/ \_______ _/\____/ | | |_|/ v \__/ \_______ _/\____/ | |||
| | _/ \__/ | | _/ \__/ | |||
| | | | | |||
| +----------------------------------------------------------------> | +----------------------------------------------------------------> | |||
| time | time]]></artwork> | |||
| ]]></artwork> | ||||
| </figure> | </figure> | |||
| <t><xref target="qp_fig_blame_scenario" format="default"/> gives a more complex | <t><xref target="qp_fig_blame_scenario" format="default"/> gives a more- complex | |||
| illustration of the way the queuing score assigns responsibility for | illustration of the way the queuing score assigns responsibility for | |||
| queuing (limited to the precision that ASCII art can illustrate). The | queuing (limited to the precision that ASCII art can illustrate). The | |||
| figure shows the bit rates of three flows represented as stacked areas | figure shows the bit rates of three flows represented as stacked areas | |||
| labelled b, v and r. The unresponsive bursts (b) are the same as in | labelled b, v, and r. The unresponsive bursts (b) are the same as in | |||
| the previous example, but a variable rate video (v) replaces flow c. | the previous example, but a variable-rate video (v) replaces flow c. | |||
| It's rate varies as the complexity of the video scene varies. Also on | Its rate varies as the complexity of the video scene varies. Also, on | |||
| a slower timescale, in response to the level of congestion, the video | a slower timescale, in response to the level of congestion, the video | |||
| adapts its quality. However, on a short time-scale it appears to be | adapts its quality. However, on a short timescale it appears to be | |||
| unresponsive to small amounts of queuing. Also, part-way through, a | unresponsive to small amounts of queuing. Also, partway through, a | |||
| low latency responsive flow (r) joins in, aiming to fill the balance | low-latency responsive flow (r) joins in, aiming to fill the balance | |||
| of capacity left by the other two.</t> | of capacity left by the other two.</t> | |||
| <t>The combination of the first burst and the low application-limited | <t>The combination of the first burst and the low application-limited | |||
| rate of the video causes neither flow to accumulate queuing score. In | rate of the video causes neither flow to accumulate queuing score. In | |||
| contrast, the second burst causes similar excessive overload (125%) to | contrast, the second burst causes similar excessive overload (125%) to | |||
| the example in <xref target="qp_fig_blame_cbr_v_burst" format="default"/ >. Then, the | the example in <xref target="qp_fig_blame_cbr_v_burst" format="default"/ >. Then, the | |||
| video happens to reduce its rate (probably due to a less complex | video happens to reduce its rate (probably due to a less-complex | |||
| scene) so the third burst causes only a little congestion. Let us | scene) so the third burst causes only a little congestion. Let us | |||
| assume the resulting queue causes probNative to rise to just 1%, then | assume the resulting queue causes probNative to rise to just 1%, then | |||
| the queuing score will only accumulate 1% of the size of each packet | the queuing score will only accumulate 1% of the size of each packet | |||
| of flows v and b during this burst.</t> | of flows v and b during this burst.</t> | |||
| <t>The fourth burst happens to arrive just as the new responsive flow | <t>The fourth burst happens to arrive just as the new responsive flow | |||
| (r) has filled the available capacity, so it leads to very rapid | (r) has filled the available capacity, so it leads to very rapid | |||
| growth of the queue. After a round trip the responsive flow rapidly | growth of the queue. After a round trip, the responsive flow rapidly | |||
| backs off, and the adaptive video also backs off more rapidly than it | backs off, and the adaptive video also backs off more rapidly than it | |||
| would normally, because of the very high congestion level. The rapid | would normally because of the very high congestion level. The rapid | |||
| response to congestion of flow r reduces the queuing score that all | response to congestion of flow r reduces the queuing score that all | |||
| three flows accumulate, but they each still bear the cost in | three flows accumulate, but they each still bear the cost in | |||
| proportion to the product of the rates at which their packets arrive | proportion to the product of the rates at which their packets arrive | |||
| at the queue and the value of probNative when they do so. Thus, during | at the queue and the value of probNative when they do so. Thus, during | |||
| the fifth burst, they all accumulate less score than the fourth, | the fifth burst, they all accumulate a lower score than the fourth | |||
| because the queuing delay is not as excessive.</t> | because the queuing delay is not as excessive.</t> | |||
| </section> | </section> | |||
| <section anchor="qp_rationale_aging" numbered="true" toc="default"> | <section anchor="qp_rationale_aging" numbered="true" toc="default"> | |||
| <name>Rationale for Constant Aging of the Queuing Score</name> | <name>Rationale for Constant Aging of the Queuing Score</name> | |||
| <t>Even well-behaved flows will not always be able to respond fast | <t>Even well-behaved flows will not always be able to respond fast | |||
| enough to dynamic events. Also well-behaved flows, e.g., DCTCP <xref tar get="RFC8257" format="default"/>, TCP Prague <xref target="I-D.briscoe-iccrg-pra gue-congestion-control" format="default"/>, BBRv3 <xref target="BBRv3" format="d efault"/> or the L4S variant of SCReAM <xref target="SCReAM" format="default"/> | enough to dynamic events. Also, well-behaved flows, e.g., Data Center TC P (DCTCP) <xref target="RFC8257" format="default"/>, TCP Prague <xref target="I- D.briscoe-iccrg-prague-congestion-control" format="default"/>, Bottleneck Bandwi dth and Round-trip propagation time version 3 (BBRv3) <xref target="BBRv3" forma t="default"/>, or the L4S variant of SCReAM <xref target="SCReAM" format="defaul t"/> | |||
| for real-time media <xref target="RFC8298" format="default"/>, can maint ain a very | for real-time media <xref target="RFC8298" format="default"/>, can maint ain a very | |||
| shallow queue by continual careful probing for more while also | shallow queue by continual careful probing for more while also | |||
| continually subtracting a little from their rate (or congestion | continually subtracting a little from their rate (or congestion | |||
| window) in response to low levels of ECN signalling. Therefore, the | window) in response to low levels of ECN signalling. Therefore, the | |||
| QProt algorithm needs to continually offer a degree of forgiveness to | QProt algorithm needs to continually offer a degree of forgiveness to | |||
| age out the queuing score as it accumulates.</t> | age out the queuing score as it accumulates.</t> | |||
| <t>Scalable congestion controllers such as those above maintain their | <t>Scalable congestion controllers, such as those above, maintain their | |||
| congestion window in inverse proportion to the congestion level, | congestion window in inverse proportion to the congestion level, | |||
| probNative. That leads to the important property that on average a | probNative. That leads to the important property that, on average, a | |||
| scalable flow holds the product of its congestion window and the | scalable flow holds the product of its congestion window and the | |||
| congestion level constant, no matter the capacity of the link or how | congestion level constant, no matter the capacity of the link or how | |||
| many other flows it competes with. For instance, if the link capacity | many other flows it competes with. For instance, if the link capacity | |||
| doubles, a scalable flow induces half the congestion probability. Or | doubles, a scalable flow induces half the congestion probability. Or, | |||
| if three scalable flows compete for the capacity, each flow will | if three scalable flows compete for the capacity, each flow will | |||
| reduce to one third of the capacity they would use on their own and | reduce to one third of the capacity they would use on their own and | |||
| increase the congestion level by 3x. Therefore, in steady state, a | increase the congestion level by 3x. Therefore, in steady state, a | |||
| scalable flow will induce the same constant amount of | scalable flow will induce the same constant amount of | |||
| "congested-bytes" per round trip, whatever the link capacity, and no | "congested-bytes" per round trip, whatever the link capacity and no | |||
| matter how many flows are sharing the capacity.</t> | matter how many flows are sharing the capacity.</t> | |||
| <t>This suggests that the QProt algorithm will not sanction a | <t>This suggests that the QProt algorithm will not sanction a | |||
| well-behaved scalable flow if it ages out the queuing score at a | well-behaved scalable flow if it ages out the queuing score at a | |||
| sufficient constant rate. The constant will need to be somewhat above | sufficient constant rate. The constant will need to be somewhat above | |||
| the average of a well-behaved scalable flow to allow for normal | the average of a well-behaved scalable flow to allow for normal | |||
| dynamics.</t> | dynamics.</t> | |||
| <t>Relating QProt's aging constant to a scalable flow does not mean | <t>Relating QProt's aging constant to a scalable flow does not mean | |||
| that a flow has to behave like a scalable flow. It can be less | that a flow has to behave like a scalable flow: it can be less | |||
| aggressive, but not more. For instance, a longer RTT flow can run at a | aggressive but not more aggressive. For instance, a longer RTT flow can | |||
| run at a | ||||
| lower congestion-rate than the aging rate, but it can also increase | lower congestion-rate than the aging rate, but it can also increase | |||
| its aggressiveness to equal the rate of short RTT scalable flows <xref t arget="ScalingCC" format="default"/>. The constant aging of QProt also means tha t a | its aggressiveness to equal the rate of short RTT scalable flows <xref t arget="ScalingCC" format="default"/>. The constant aging of QProt also means tha t a | |||
| long-running unresponsive flow will be prone to trigger QProt if it | long-running unresponsive flow will be prone to trigger QProt if it | |||
| runs faster than a competing responsive scalable flow would. And, of | runs faster than a competing responsive scalable flow would. And, of | |||
| course, if a flow causes excessive queuing in the short-term, its | course, if a flow causes excessive queuing in the short term, its | |||
| queuing score will still rise faster than the constant aging process | queuing score will still rise faster than the constant aging process | |||
| will decrease it. Then QProt will still eject the flow's packets | will decrease it. Then, QProt will still eject the flow's packets | |||
| before they harm the low latency of the shared queue.</t> | before they harm the low latency of the shared queue.</t> | |||
| </section> | </section> | |||
| <section anchor="qp_rationale_normalize" numbered="true" toc="default"> | <section anchor="qp_rationale_normalize" numbered="true" toc="default"> | |||
| <name>Rationale for Transformed Queuing Score</name> | <name>Rationale for Transformed Queuing Score</name> | |||
| <t>The QProt algorithm holds a flow's queuing score state in a | <t>The QProt algorithm holds a flow's queuing score state in a | |||
| structure called a bucket, because of its similarity to a classic | structure called a "bucket". This is because of its similarity to a cla | |||
| leaky bucket (except the contents of the bucket does not represent | ssic | |||
| leaky bucket (except the contents of the bucket do not represent | ||||
| bytes).</t> | bytes).</t> | |||
| <figure anchor="qp_fig_qscore_normalize"> | <figure anchor="qp_fig_qscore_normalize"> | |||
| <name>Transformation of Queuing Score</name> | <name>Transformation of Queuing Score</name> | |||
| <artwork name="" type="" align="left" alt=""><![CDATA[probNative * pkt | <artwork name="" type="" align="left" alt=""><![CDATA[ | |||
| _sz probNative * pkt_sz / AGING | probNative * pkt_sz probNative * pkt_sz / AGING | |||
| | | | | | | |||
| | V | | V | | | V | | V | | |||
| | : | ___ | : | | | : | ___ | : | | |||
| |_____| ___ |_____| | |_____| ___ |_____| | |||
| | | ___ | | | | | ___ | | | |||
| |__ __| |__ __| | |__ __| |__ __| | |||
| | | | | | | |||
| V V | V V | |||
| AGING * Dt Dt | AGING * Dt Dt]]></artwork> | |||
| ]]></artwork> | ||||
| </figure> | </figure> | |||
| <t>The accumulation and aging of the queuing score is shown on the | <t>The accumulation and aging of the queuing score is shown on the | |||
| left of <xref target="qp_fig_qscore_normalize" format="default"/> in tok en bucket form. | left of <xref target="qp_fig_qscore_normalize" format="default"/> in tok en bucket form. | |||
| Dt is the difference between the times when the scores of the current | Dt is the difference between the times when the scores of the current | |||
| and previous packets were processed.</t> | and previous packets were processed.</t> | |||
| <t>A transformed equivalent of this token bucket is shown on the right | <t>A transformed equivalent of this token bucket is shown on the right | |||
| of <xref target="qp_fig_qscore_normalize" format="default"/>, dividing b oth the input | of <xref target="qp_fig_qscore_normalize" format="default"/>, dividing b oth the input | |||
| and output by the constant AGING rate. The result is a bucket-depth | and output by the constant AGING rate. The result is a bucket-depth | |||
| that represents time and it drains at the rate that time passes.</t> | that represents time and it drains at the rate that time passes.</t> | |||
| <t>As a further optimization, the time the bucket was last updated is | <t>As a further optimization, the time the bucket was last updated is | |||
| not stored with the flow-state. Instead, when the bucket is | not stored with the flow-state. Instead, when the bucket is | |||
| initialized the queuing score is added to the system time 'now' and | initialized, the queuing score is added to the system time "now" and | |||
| the resulting expiry time is written into the bucket. Subsequently, if | the resulting expiry time is written into the bucket. Subsequently, if | |||
| the bucket has not expired, the incremental queuing score is added to | the bucket has not expired, the incremental queuing score is added to | |||
| the time already held in the bucket. Then the queuing score always | the time already held in the bucket. Then, the queuing score always | |||
| represents the expiry time of the flow-state itself. This means that | represents the expiry time of the flow-state itself. This means that | |||
| the queuing score does not need to be aged explicitly because it ages | the queuing score does not need to be aged explicitly because it ages | |||
| itself implicitly.</t> | itself implicitly.</t> | |||
| </section> | </section> | |||
| <section anchor="qp_rationale_conditions" numbered="true" toc="default"> | <section anchor="qp_rationale_conditions" numbered="true" toc="default"> | |||
| <name>Rationale for Policy Conditions</name> | <name>Rationale for Policy Conditions</name> | |||
| <t>Pseudocode for the QProt policy conditions is given in <xref target=" qp_header_file" format="default"/> within the second half of the qprotect() | <t>Pseudocode for the QProt policy conditions is given in <xref target=" qp_header_file" format="default"/> within the second half of the qprotect() | |||
| function. When each packet arrives, after finding its flow state and | function. When each packet arrives, after finding its flow state and | |||
| updating the queuing score of the packet's flow, the algorithm checks | updating the queuing score of the packet's flow, the algorithm checks | |||
| whether the shared queue delay exceeds a constant threshold CRITICALqL | whether the shared queue delay exceeds a constant threshold CRITICALqL | |||
| (e.g., 2 ms), as repeated below for convenience:</t> | (e.g., 2 ms), as repeated below for convenience:</t> | |||
| <sourcecode name="" type="" markers="true"><![CDATA[ | <sourcecode name="" type="pseudocode" markers="true"><![CDATA[ | |||
| if ( ( qdelay > CRITICALqL ) // Test if qdelay over threshold... | if ( ( qdelay > CRITICALqL ) // Test if qdelay over threshold... | |||
| // ...and if flow's q'ing score scaled by qdelay/CRITICALqL | // ...and if flow's q'ing score scaled by qdelay/CRITICALqL | |||
| // ...exceeds CRITICALqLSCORE | // ...exceeds CRITICALqLSCORE | |||
| && ( qdelay * qLscore > CRITICALqLPRODUCT ) ) | && ( qdelay * qLscore > CRITICALqLPRODUCT ) ) | |||
| // Recall that CRITICALqLPRODUCT = CRITICALqL * CRITICALqLSCORE | // Recall that CRITICALqLPRODUCT = CRITICALqL * CRITICALqLSCORE]]></source | |||
| ]]></sourcecode> | code> | |||
| <t>If the queue delay threshold is exceeded, the flow's queuing score | <t>If the queue delay threshold is exceeded, the flow's queuing score | |||
| is temporarily scaled up by the ratio of the current queue delay to | is temporarily scaled up by the ratio of the current queue delay to | |||
| the threshold queuing delay, CRITICALqL (the reason for the scaling is | the threshold queuing delay, CRITICALqL (the reason for the scaling is | |||
| given next). If this scaled up score exceeds another constant | given next). If this scaled up score exceeds another constant | |||
| threshold CRITICALqLSCORE, the packet is ejected. The actual last line | threshold CRITICALqLSCORE, the packet is ejected. The actual last line | |||
| of code above multiplies both sides of the second condition by | of code above multiplies both sides of the second condition by | |||
| CRITICALqL to avoid a costly division.</t> | CRITICALqL to avoid a costly division.</t> | |||
| <t>This approach allows each packet to be assessed once, as it | <t>This approach allows each packet to be assessed once, as it | |||
| arrives. Once queue delay exceeds the threshold, it has two | arrives. Once queue delay exceeds the threshold, it has two | |||
| implications:</t> | implications:</t> | |||
| <ul spacing="normal"> | <ul spacing="normal"> | |||
| <li> | <li> | |||
| <t>The current packet might be ejected even though there are | <t>The current packet might be ejected, even though there are | |||
| packets already in the queue from flows with higher queuing | packets already in the queue from flows with higher queuing | |||
| scores. However, any flow that continues to contribute to the | scores. However, any flow that continues to contribute to the | |||
| queue will have to send further packets, giving an opportunity to | queue will have to send further packets, giving an opportunity to | |||
| eject them as well, as they subsequently arrive.</t> | eject them as well, as they subsequently arrive.</t> | |||
| </li> | </li> | |||
| <li> | <li> | |||
| <t>The next packets to arrive might not be ejected, because they | <t>The next packets to arrive might not be ejected because they | |||
| might belong to flows with low queuing scores. In this case, queue | might belong to flows with low queuing scores. In this case, queue | |||
| delay could continue to rise with no opportunity to eject a | delay could continue to rise with no opportunity to eject a | |||
| packet. This is why the queuing score is scaled up by the current | packet. This is why the queuing score is scaled up by the current | |||
| queue delay. Then, the more the queue has grown without ejecting a | queue delay. Then, the more the queue has grown without ejecting a | |||
| packet, the more the algorithm 'raises the bar' to further | packet, the more the algorithm "raises the bar" to further | |||
| packets.</t> | packets.</t> | |||
| </li> | </li> | |||
| </ul> | </ul> | |||
| <t>The above approach is preferred over the extra per-packet | <t>The above approach is preferred over the extra per-packet | |||
| processing cost of searching the buckets for the flow with the highest | processing cost of searching the buckets for the flow with the highest | |||
| queuing score and searching the queue for one of its packets to eject | queuing score and searching the queue for one of its packets to eject | |||
| (if one is still in the queue).</t> | (if one is still in the queue).</t> | |||
| <t>Note that by default CRITICALqL_us is set to the maximum threshold | <t>Note that, by default, CRITICALqL_us is set to the maximum threshold | |||
| of the ramp marking algorithm, MAXTH_us. However, there is some debate | of the ramp marking algorithm MAXTH_us. However, there is some debate | |||
| as to whether setting it to the minimum threshold instead would | as to whether setting it to the minimum threshold instead would | |||
| improve QProt performance. This would roughly double the ratio of | improve QProt performance. This would roughly double the ratio of | |||
| qdelay to CRITICALqL, which is compared against the CRITICALqLSCORE | qdelay to CRITICALqL, which is compared against the CRITICALqLSCORE | |||
| threshold. So the threshold would have to be roughly doubled | threshold. So, the threshold would have to be roughly doubled | |||
| accordingly.</t> | accordingly.</t> | |||
| <t><xref target="qp_fig_policy_conditions" format="default"/> explains t his approach | <t><xref target="qp_fig_policy_conditions" format="default"/> explains t his approach | |||
| graphically. On the horizontal axis it shows actual harm, meaning the | graphically. On the horizontal axis, it shows actual harm, meaning the | |||
| queuing delay in the shared queue. On the vertical axis it shows the | queuing delay in the shared queue. On the vertical axis, it shows the | |||
| behaviour record of the flow associated with the currently arriving | behavior record of the flow associated with the currently arriving | |||
| packet, represented in the algorithm by the flow's queuing score. The | packet, represented in the algorithm by the flow's queuing score. The | |||
| shaded region represents the combination of actual harm and behaviour | shaded region represents the combination of actual harm and behavior | |||
| record that will lead to the packet being ejected.</t> | record that will lead to the packet being ejected.</t> | |||
| <figure anchor="qp_fig_policy_conditions"> | <figure anchor="qp_fig_policy_conditions"> | |||
| <name>Graphical Explanation of the Policy Conditions</name> | <name>Graphical Explanation of the Policy Conditions</name> | |||
| <artwork name="" type="" align="left" alt=""><![CDATA[Behaviour Record | <artwork name="" type="" align="left" alt=""><![CDATA[ | |||
| : | Behavior Record: | |||
| Queueing Score of | Queueing Score of | |||
| Arriving Packet's Flow | Arriving Packet's Flow | |||
| ^ | ^ | |||
| | + |/ / / / / / / / / / / / / / / / / / / | | + |/ / / / / / / / / / / / / / / / / / / | |||
| | + N | / / / / / / / / / / / / / / / / / / / | | + N | / / / / / / / / / / / / / / / / / / / | |||
| | + |/ / / / / / / / / / | | + |/ / / / / / / / / / | |||
| | + | / / / / E (Eject packet) / / / / / | | + | / / / / E (Eject packet) / / / / / | |||
| | + |/ / / / / / / / / / | | + |/ / / / / / / / / / | |||
| | + | / / / / / / / / / / / / / / / / / / / | | + | / / / / / / / / / / / / / / / / / / / | |||
| | + |/ / / / / / / / / / / / / / / / / / / | | + |/ / / / / / / / / / / / / / / / / / / | |||
| | +| / / / / / / / / / / / / / / / / / / / | | +| / / / / / / / / / / / / / / / / / / / | |||
| | |+ / / / / / / / / / / / / / / / / / / | | |+ / / / / / / / / / / / / / / / / / / | |||
| | N | + / / / / / / / / / / / / / / / / / | | N | + / / / / / / / / / / / / / / / / / | |||
| | (No actual | +/ / / / / / / / / / / / / / / | | (No actual | +/ / / / / / / / / / / / / / / | |||
| | harm) | + / / / / / / / / / / / / | | harm) | + / / / / / / / / / / / / | |||
| | | P (Pass over) + ,/ / / / / / / / | | | P (Pass over) + ,/ / / / / / / / | |||
| | | ^ + /./ /_/ | | | ^ + /./ /_/ | |||
| +--------------+------------------------------------------> | +--------------+------------------------------------------> | |||
| CRITICALqL Actual Harm: Shared Queue Delay | CRITICALqL Actual Harm: Shared Queue Delay]]></artwork> | |||
| ]]></artwork> | ||||
| </figure> | </figure> | |||
| <t>The regions labelled 'N' represent cases where the first condition | <t>The regions labelled "N" represent cases where the first condition | |||
| is not met - no actual harm - queue delay is below the critical | is not met -- no actual harm -- queue delay is below the critical | |||
| threshold, CRITICALqL.</t> | threshold, CRITICALqL.</t> | |||
| <t>The region labelled 'E' represents cases where there is actual harm | <t>The region labelled "E" represents cases where there is actual harm | |||
| (queue delay exceeds CRITICALqL) and the queuing score associated with | (queue delay exceeds CRITICALqL) and the queuing score associated with | |||
| the arriving packet is high enough to be able to eject it with | the arriving packet is high enough to be able to eject it with | |||
| certainty.</t> | certainty.</t> | |||
| <t>The region labelled 'P' represents cases where there is actual | <t>The region labelled "P" represents cases where there is actual | |||
| harm, but the queuing score of the arriving packet is insufficient to | harm, but the queuing score of the arriving packet is insufficient to | |||
| eject it, so it has to be Passed over. This adds to queuing delay, but | eject it, so it has to be passed over. This adds to queuing delay, but | |||
| the alternative would be to sanction an innocent flow. It can be seen | the alternative would be to sanction an innocent flow. It can be seen | |||
| that, as actual harm increases, the judgement of innocence becomes | that, as actual harm increases, the judgment of innocence becomes | |||
| increasingly stringent; the behaviour record of the next packet's flow | increasingly stringent; the behavior record of the next packet's flow | |||
| does not have to be as bad to eject it.</t> | does not have to be as bad to eject it.</t> | |||
| <t>Conditioning ejection on actual harm helps prevent VPN packets | <t>Conditioning ejection on actual harm helps prevent VPN packets | |||
| being ejected unnecessarily. VPNs consisting of multiple flows can | being ejected unnecessarily. VPNs consisting of multiple flows can | |||
| tend to accumulate queuing score faster than it is aged out, because | tend to accumulate queuing score faster than it is aged out because | |||
| the aging rate is intended for a single flow. However, whether or not | the aging rate is intended for a single flow. However, whether or not | |||
| some traffic is in a VPN, the queue delay threshold (CRITICALqL) will | some traffic is in a VPN, the queue delay threshold (CRITICALqL) will | |||
| be no more likely to be exceeded. So conditioning ejection on actual | be no more likely to be exceeded. So, conditioning ejection on actual | |||
| harm helps reduce the chance that VPN traffic will be ejected by the | harm helps reduce the chance that VPN traffic will be ejected by the | |||
| QProt function.</t> | QProt function.</t> | |||
| </section> | </section> | |||
| <section anchor="qp_rationale_reclassify" numbered="true" toc="default"> | <section anchor="qp_rationale_reclassify" numbered="true" toc="default"> | |||
| <name>Rationale for Reclassification as the Policy Action</name> | <name>Rationale for Reclassification as the Policy Action</name> | |||
| <t>When the DOCSIS QProt algorithm deems that it is necessary to eject | <t>When the DOCSIS QProt algorithm deems that it is necessary to eject | |||
| a packet to protect the Low Latency queue, it redirects the packet to | a packet to protect the Low-Latency queue, it redirects the packet to | |||
| the Classic queue. In the Low Latency DOCSIS architecture (as in | the Classic queue. In the Low-Latency DOCSIS architecture (as in | |||
| Coupled DualQ AQMs generally), the Classic queue is expected to | Coupled DualQ AQMs generally), the Classic queue is expected to | |||
| frequently have a larger backlog of packets, caused by classic | frequently have a larger backlog of packets, which is caused by classic | |||
| congestion controllers interacting with a classic AQM (which has a | congestion controllers interacting with a classic AQM (which has a | |||
| latency target of 10ms) as well as other bursty traffic.</t> | latency target of 10 ms) as well as other bursty traffic.</t> | |||
| <t>Therefore, typically, an ejected packet will experience higher | <t>Therefore, typically, an ejected packet will experience higher | |||
| queuing delay than it would otherwise, and it could be re-ordered | queuing delay than it would otherwise, and it could be re-ordered | |||
| within its flow (assuming QProt does not eject all packets of an | within its flow (assuming QProt does not eject all packets of an | |||
| anomalous flow). The mild harm caused to the performance of the | anomalous flow). The mild harm caused to the performance of the | |||
| ejected packet's flow is deliberate. It gives senders a slight | ejected packet's flow is deliberate. It gives senders a slight | |||
| incentive to identify their packets correctly.</t> | incentive to identify their packets correctly.</t> | |||
| <t>If there were no such harm, there would be nothing to prevent all | <t>If there were no such harm, there would be nothing to prevent all | |||
| flows from identifying themselves as suitable for classification into | flows from identifying themselves as suitable for classification into | |||
| the low latency queue, and just letting QProt sort the resulting | the low-latency queue and just letting QProt sort the resulting | |||
| aggregate into queue-building and non-queue-building flows. This might | aggregate into queue-building and non-queue-building flows. This might | |||
| seem like a useful alternative to requiring senders to correctly | seem like a useful alternative to requiring senders to correctly | |||
| identify their flows. However, handling of mis-classified flows is not | identify their flows. However, handling of mis-classified flows is not | |||
| without a cost. The more packets that have to be reclassified, the | without a cost. The more packets that have to be reclassified, the | |||
| more often the delay of the low latency queue would exceed the | more often the delay of the low-latency queue would exceed the | |||
| threshold. Also more memory would be required to hold the extra flow | threshold. Also, more memory would be required to hold the extra flow | |||
| state.</t> | state.</t> | |||
| <t>When a packet is redirected into the Classic queue, an operator | <t>When a packet is redirected into the Classic queue, an operator | |||
| might want to alter the identifier(s) that originally caused it to be | might want to alter the identifier(s) that originally caused it to be | |||
| classified into the Low Latency queue, so that the packet will not be | classified into the Low-Latency queue so that the packet will not be | |||
| classified into another low latency queue further downstream. However, | classified into another low-latency queue further downstream. However, | |||
| redirection of occasional packets can be due to unusually high | redirection of occasional packets can be due to unusually high | |||
| transient load just at the specific bottleneck, not necessarily at any | transient load just at the specific bottleneck, not necessarily at any | |||
| other bottleneck, and not necessarily due to bad flow behaviour. | other bottleneck and not necessarily due to bad flow behavior. | |||
| Therefore, Section 5.4.1.2 of <xref target="RFC9331" format="default"/> | Therefore, <xref target="RFC9331" section="5.4.1.2"/> precludes a | |||
| precludes a | ||||
| network node from altering the end-to-end ECN field to exclude traffic | network node from altering the end-to-end ECN field to exclude traffic | |||
| from L4S treatment. Instead a local-use identifier ought to be used | from L4S treatment. Instead a local-use identifier ought to be used | |||
| (e.g., Diffserv Codepoint or VLAN tag), so that each operator can | (e.g., Diffserv Codepoint or VLAN tag) so that each operator can | |||
| apply its own policy, without prejudging what other operators ought to | apply its own policy, without prejudging what other operators ought to | |||
| do.</t> | do.</t> | |||
| <t>Although not supported in the DOCSIS specs, QProt could be extended | <t>Although not supported in the DOCSIS specifications, QProt could be e xtended | |||
| to recognize that large numbers of redirected packets belong to the | to recognize that large numbers of redirected packets belong to the | |||
| same flow. This might be detected when the bucket expiry time t_exp | same flow. This might be detected when the bucket expiry time t_exp | |||
| exceeds a threshold. Depending on policy and implementation | exceeds a threshold. Depending on policy and implementation | |||
| capabilities, QProt could then install a classifier to redirect a | capabilities, QProt could then install a classifier to redirect a | |||
| whole flow into the Classic queue, with an idle timeout to remove | whole flow into the Classic queue, with an idle timeout to remove | |||
| stale classifiers. In these 'persistent offender' cases, QProt might | stale classifiers. In these "persistent offender" cases, QProt might | |||
| also overwrite each redirected packet's DSCP or clear its ECN field to | also overwrite each redirected packet's DSCP or clear its ECN field to | |||
| Not-ECT, in order to protect other potential L4S queues downstream. | Not-ECT, in order to protect other potential L4S queues downstream. | |||
| The DOCSIS specs do not discuss sanctioning whole flows, so further | The DOCSIS specifications do not discuss sanctioning whole flows; furthe r | |||
| discussion is beyond the scope of the present document.</t> | discussion is beyond the scope of the present document.</t> | |||
| </section> | </section> | |||
| </section> | </section> | |||
| <section anchor="qp_limitations" numbered="true" toc="default"> | <section anchor="qp_limitations" numbered="true" toc="default"> | |||
| <name>Limitations</name> | <name>Limitations</name> | |||
| <t>The QProt algorithm groups packets with common layer-4 flow | <t>The QProt algorithm groups packets with common Layer 4 flow | |||
| identifiers. It then uses this grouping to accumulate queuing scores and | identifiers. It then uses this grouping to accumulate queuing scores and | |||
| to sanction packets.</t> | to sanction packets.</t> | |||
| <t>This choice of identifier for grouping is pragmatic with no | <t>This choice of identifier for grouping is pragmatic with no | |||
| scientific basis. All the packets of a flow certainly pass between the | scientific basis. All the packets of a flow certainly pass between the | |||
| same two endpoints. But some applications might initiate multiple flows | same two endpoints. However, some applications might initiate multiple flo | |||
| between the same end-points, e.g., for media, control, data, etc. Others | ws | |||
| between the same endpoints, e.g., for media, control, data, etc. Others | ||||
| might use common flow identifiers for all these streams. Also, a user | might use common flow identifiers for all these streams. Also, a user | |||
| might group multiple application flows within the same encrypted VPN | might group multiple application flows within the same encrypted VPN | |||
| between the same layer-4 tunnel end-points. And even if there were a | between the same Layer 4 tunnel endpoints. And, even if there were a | |||
| one-to-one mapping between flows and applications, there is no reason to | one-to-one mapping between flows and applications, there is no reason to | |||
| believe that the rate at which congestion can be caused ought to be | believe that the rate at which congestion can be caused ought to be | |||
| allocated on a per application flow basis.</t> | allocated on a per-application-flow basis.</t> | |||
| <t>The use of a queuing score that excludes those aspects of flow rate | <t>The use of a queuing score that excludes those aspects of flow rate | |||
| that do not contribute to queuing (<xref target="qp_rationale_not_throughp ut" format="default"/>) goes some way to mitigating this | that do not contribute to queuing (<xref target="qp_rationale_not_throughp ut" format="default"/>) goes some way to mitigating this | |||
| limitation, because the algorithm does not judge responsibility for | limitation because the algorithm does not judge responsibility for | |||
| queuing delay primarily on the combined rate of a set of flows grouped | queuing delay primarily on the combined rate of a set of flows grouped | |||
| under one flow ID.</t> | under one flow ID.</t> | |||
| </section> | </section> | |||
| <section anchor="l4sds_IANA" numbered="true" toc="default"> | <section anchor="l4sds_IANA" numbered="true" toc="default"> | |||
| <name>IANA Considerations (to be removed by RFC Editor)</name> | <name>IANA Considerations</name> | |||
| <t>This specification contains no IANA considerations.</t> | <t>This document has no IANA actions.</t> | |||
| </section> | </section> | |||
| <!-- [rfced] We had the following questions related to the | ||||
| Implementation Status section: | ||||
| a) Should this section be removed per the guidance in RFC 7942 | ||||
| (relevant parts copied below for your convenience)? | ||||
| We recommend that the Implementation Status section should be removed | ||||
| from Internet-Drafts before they are published as RFCs. As a result, | ||||
| we do not envisage changes to this section after approval of the | ||||
| document for publication, while the document sits in the RFC Editor | ||||
| queue, e.g., the RFC errata process does not apply. | ||||
| This process is not mandatory. Authors of Internet-Drafts are | ||||
| encouraged to consider using the process for their documents, and | ||||
| working groups are invited to think about applying the process to all | ||||
| of their protocol specifications. | ||||
| ... | ||||
| This process was initially proposed as an experiment in [RFC6982]. | ||||
| That document is now obsoleted, and the process advanced to Best | ||||
| Current Practice. | ||||
| ... | ||||
| Each Internet-Draft may contain a section entitled "Implementation | ||||
| Status". This section, if it appears, should be located just before | ||||
| the "Security Considerations" section ... | ||||
| ... | ||||
| Since this information is necessarily time dependent, it is | ||||
| inappropriate for inclusion in a published RFC. The authors should | ||||
| include a note to the RFC Editor requesting that the section be | ||||
| removed before publication. | ||||
| ... | ||||
| Authors are requested to add a note to the RFC Editor at the top of | ||||
| this section, advising the Editor to remove the entire section before | ||||
| publication, as well as the reference to RFC 7942. | ||||
| b) If not, should Table 1 have some sort of title? | ||||
| c) FYI - In the meantime, we have updated per your guidance on the | ||||
| document intake form as follows: | ||||
| Old:" and one CMTS implementation by a third manufacturer." | ||||
| Current: " and several CMTS implementations by other manufacturers.” | ||||
| --> | ||||
| <section anchor="qp_impl_status" numbered="true" toc="default"> | <section anchor="qp_impl_status" numbered="true" toc="default"> | |||
| <name>Implementation Status</name> | <name>Implementation Status</name> | |||
| <table align="center"> | <table align="center"> | |||
| <thead> | <thead> | |||
| <tr> | <tr> | |||
| <th align="left">Implementation name:</th> | <th align="left">Implementation name:</th> | |||
| <th align="left">DOCSIS models for ns-3</th> | <th align="left">DOCSIS models for ns-3</th> | |||
| </tr> | </tr> | |||
| </thead> | </thead> | |||
| <tbody> | <tbody> | |||
| <tr> | <tr> | |||
| <td align="left">Organization</td> | <td align="left">Organization</td> | |||
| <td align="left">CableLabs</td> | <td align="left">CableLabs</td> | |||
| </tr> | </tr> | |||
| <tr> | <tr> | |||
| <td align="left">Web page</td> | <td align="left">Web page</td> | |||
| <td align="left">https://apps.nsnam.org/app/docsis-ns3/</td> | <td align="left">https://apps.nsnam.org/app/docsis-ns3/</td> | |||
| </tr> | </tr> | |||
| <tr> | <tr> | |||
| <td align="left">Description</td> | <td align="left">Description</td> | |||
| <td align="left">ns-3 simulation models developed and used in suppor | <td align="left">ns-3 simulation models developed and used in suppor | |||
| t of the Low | t of the Low-Latency DOCSIS development, including models of Dual Queue Coupled | |||
| Latency DOCSIS development, including models of Dual Queue Coupled | ||||
| AQM, Queue Protection, and the DOCSIS MAC</td> | AQM, Queue Protection, and the DOCSIS MAC</td> | |||
| </tr> | </tr> | |||
| <tr> | <tr> | |||
| <td align="left">Maturity</td> | <td align="left">Maturity</td> | |||
| <td align="left">Simulation models that can also be used in emulatio n mode in a | <td align="left">Simulation models that can also be used in emulatio n mode in a | |||
| testbed context</td> | testbed context</td> | |||
| </tr> | </tr> | |||
| <tr> | <tr> | |||
| <td align="left">Coverage</td> | <td align="left">Coverage</td> | |||
| <td align="left">Complete implementation of Annex P of DOCSIS 3.1</t d> | <td align="left">Complete implementation of Annex P of DOCSIS 3.1</t d> | |||
| skipping to change at line 1135 ¶ | skipping to change at line 1247 ¶ | |||
| </tr> | </tr> | |||
| <tr> | <tr> | |||
| <td align="left">Licence</td> | <td align="left">Licence</td> | |||
| <td align="left">GPLv2</td> | <td align="left">GPLv2</td> | |||
| </tr> | </tr> | |||
| <tr> | <tr> | |||
| <td align="left">Contact</td> | <td align="left">Contact</td> | |||
| <td align="left">via web page</td> | <td align="left">via web page</td> | |||
| </tr> | </tr> | |||
| <tr> | <tr> | |||
| <td align="left">Last Impl'n update</td> | <td align="left">Last Implementation Update</td> | |||
| <td align="left">Mar 2022</td> | <td align="left">Mar 2022</td> | |||
| </tr> | </tr> | |||
| <tr> | <tr> | |||
| <td align="left">Information valid at</td> | <td align="left">Information valid at</td> | |||
| <td align="left">7 Mar 2022</td> | <td align="left">7 Mar 2022</td> | |||
| </tr> | </tr> | |||
| </tbody> | </tbody> | |||
| </table> | </table> | |||
| <t>There are also a number of closed source implementations, including 2 | <t>There are also a number of closed source implementations, including two | |||
| cable modem implementations written by different chipset manufacturers, | cable modem implementations written by different chipset manufacturers | |||
| and one CMTS implementation by a third manufacturer. These, as well as | and several CMTS implementations by other manufacturers. These, as well as | |||
| the ns-3 implementation, have passed the full suite of compliance tests | the ns-3 implementation, have passed the full suite of compliance tests | |||
| developed by CableLabs.</t> | developed by CableLabs.</t> | |||
| </section> | </section> | |||
| <section anchor="l4sds_Security_Considerations" numbered="true" toc="default "> | <section anchor="l4sds_Security_Considerations" numbered="true" toc="default "> | |||
| <name>Security Considerations</name> | <name>Security Considerations</name> | |||
| <t>The whole of this document concerns traffic security. It considers | <t>The whole of this document concerns traffic security. It considers | |||
| the security question of how to identify and eject traffic that does not | the security question of how to identify and eject traffic that does not | |||
| comply with the non-queue-building behaviour required to use a shared | comply with the non-queue-building behavior required to use a shared | |||
| low latency queue, whether accidentally or maliciously.</t> | low-latency queue, whether accidentally or maliciously.</t> | |||
| <t>Section 8.2 of the L4S architecture <xref target="RFC9330" format="defa | <t><xref target="RFC9330" section="8.2"/> of the L4S architecture <xref ta | |||
| ult"/> | rget="RFC9330" format="default"/> | |||
| introduces the problem of maintaining low latency by either | introduces the problem of maintaining low latency by either | |||
| self-restraint or enforcement, and places DOCSIS queue protection in | self-restraint or enforcement and places DOCSIS queue protection in | |||
| context within a wider set of approaches to the problem.</t> | context within a wider set of approaches to the problem.</t> | |||
| <section anchor="qp_resource_exhaust" numbered="true" toc="default"> | <section anchor="qp_resource_exhaust" numbered="true" toc="default"> | |||
| <name>Resource Exhaustion Attacks</name> | <name>Resource Exhaustion Attacks</name> | |||
| <t>The algorithm has been designed to fail gracefully in the face of | <t>The algorithm has been designed to fail gracefully in the face of | |||
| traffic crafted to overrun the resources used for the algorithm's own | traffic crafted to overrun the resources used for the algorithm's own | |||
| processing and flow state. This means that non-queue-building flows | processing and flow state. This means that non-queue-building flows | |||
| will always be less likely to be sanctioned than queue-building flows. | will always be less likely to be sanctioned than queue-building flows. | |||
| But an attack could be contrived to deplete resources in such a way | But an attack could be contrived to deplete resources in such a way | |||
| that the proportion of innocent (non-queue-building) flows that are | that the proportion of innocent (non-queue-building) flows that are | |||
| incorrectly sanctioned could increase.</t> | incorrectly sanctioned could increase.</t> | |||
| <t>Incorrect sanctioning is intended not to be catastrophic; it | <t>Incorrect sanctioning is intended not to be catastrophic; it | |||
| results in more packets from well-behaved flows being redirected into | results in more packets from well-behaved flows being redirected into | |||
| the Classic queue; thus introducing more reordering into innocent | the Classic queue, which introduces more reordering into innocent | |||
| flows.</t> | flows.</t> | |||
| <section anchor="qp_flow-state_exhaust" numbered="true" toc="default"> | <section anchor="qp_flow-state_exhaust" numbered="true" toc="default"> | |||
| <name>Exhausting Flow-State Storage</name> | <name>Exhausting Flow-State Storage</name> | |||
| <t>To exhaust the number of buckets, the most efficient attack is to | <t>To exhaust the number of buckets, the most efficient attack is to | |||
| send enough long-running attack flows to increase the chance that an | send enough long-running attack flows to increase the chance that an | |||
| arriving flow will not find an available bucket, and therefore have | arriving flow will not find an available bucket and will, therefore, h | |||
| to share the 'dregs' bucket. For instance, if ATTEMPTS=2 and | ave | |||
| to share the "dregs" bucket. For instance, if ATTEMPTS=2 and | ||||
| NBUCKETS=32, it requires about 94 attack flows, each using different | NBUCKETS=32, it requires about 94 attack flows, each using different | |||
| port numbers, to increase the probability to 99% that an arriving | port numbers, to increase the probability to 99% that an arriving | |||
| flow will have to share the dregs, where it will share a high degree | flow will have to share the dregs, where it will share a high degree | |||
| of redirection into the C queue with the remainder of the attack | of redirection into the C queue with the remainder of the attack | |||
| flows.</t> | flows.</t> | |||
| <t>For an attacker to keep buckets busy, it is more efficient to | <t>For an attacker to keep buckets busy, it is more efficient to | |||
| hold onto them by cycling regularly through a set of port numbers | hold onto them by cycling regularly through a set of port numbers | |||
| (94 in the above example), rather than to keep occupying and | (94 in the above example) rather than to keep occupying and | |||
| releasing buckets with single packet flows across a much larger | releasing buckets with single packet flows across a much larger | |||
| number of ports.</t> | number of ports.</t> | |||
| <t>During such an attack, the coupled marking probability will have | <t>During such an attack, the coupled marking probability will have | |||
| saturated at 100%. So, to hold a bucket, the rate of an attack flow | saturated at 100%. So, to hold a bucket, the rate of an attack flow | |||
| needs to be no less than the AGING rate of each bucket; 4Mb/s by | needs to be no less than the AGING rate of each bucket: 4 Mb/s by | |||
| default. However, for an attack flow to be sure to hold on to its | default. However, for an attack flow to be sure to hold on to its | |||
| bucket, it would need to send somewhat faster. Thus an attack with | bucket, it would need to send somewhat faster. Thus, an attack with | |||
| 100 flows would need a total force of more than 100 * 4Mb/s = | 100 flows would need a total force of more than 100 * 4 Mb/s = | |||
| 400Mb/s.</t> | 400 Mb/s.</t> | |||
| <t>This attack can be mitigated (but not prevented) by increasing | <t>This attack can be mitigated (but not prevented) by increasing | |||
| the number of buckets. The required attack force scales linearly | the number of buckets. The required attack force scales linearly | |||
| with the number of buckets, NBUCKETS. So, if NBUCKETS were doubled | with the number of buckets, NBUCKETS. So, if NBUCKETS were doubled | |||
| to 64, twice as many 4Mb/s flows would be needed to maintain the | to 64, twice as many 4 Mb/s flows would be needed to maintain the | |||
| same impact on innocent flows.</t> | same impact on innocent flows.</t> | |||
| <t>Probably the most effective mitigation would be to implement | <t>Probably the most effective mitigation would be to implement | |||
| redirection of whole-flows once enough of the individual packets of | redirection of whole-flows once enough of the individual packets of | |||
| a certain offending flow had been redirected. This would free up the | a certain offending flow had been redirected. This would free up the | |||
| buckets used to maintain the per-packet queuing score of persistent | buckets used to maintain the per-packet queuing score of persistent | |||
| offenders. Whole-flow redirection is outside the scope of the | offenders. Whole-flow redirection is outside the scope of the | |||
| current version of the QProt algorithm specified here, but it is | current version of the QProt algorithm specified here, but it is | |||
| briefly discussed at the end of <xref target="qp_rationale_reclassify" format="default"/>.</t> | briefly discussed at the end of <xref target="qp_rationale_reclassify" format="default"/>.</t> | |||
| <t>It might be considered that all the packets of persistently | <t>It might be considered that all the packets of persistently | |||
| offending flows ought to be discarded rather than redirected. | offending flows ought to be discarded rather than redirected. | |||
| However, this is not recommended, because attack flows might be able | However, this is not recommended because attack flows might be able | |||
| to pervert whole-flow discard, turning it onto at least some | to pervert whole-flow discard, turning it onto at least some | |||
| innocent flows, thus amplifying an attack that causes reordering | innocent flows, thus amplifying an attack that causes reordering | |||
| into total deletion of some innocent flows.</t> | into total deletion of some innocent flows.</t> | |||
| </section> | </section> | |||
| <section anchor="qp_proc_exhaust" numbered="true" toc="default"> | <section anchor="qp_proc_exhaust" numbered="true" toc="default"> | |||
| <name>Exhausting Processing Resources</name> | <name>Exhausting Processing Resources</name> | |||
| <t>The processing time needed to apply the QProt algorithm to each | <t>The processing time needed to apply the QProt algorithm to each | |||
| LL packet is small and intended not to take all the time available | LL packet is small and intended not to take all the time available | |||
| between each of a run of fairly small packets. However, an attack | between each of a run of fairly small packets. However, an attack | |||
| could use minimum size packets launched from multiple input | could use minimum sized packets launched from multiple input | |||
| interfaces into a lower capacity output interface. Whether the QProt | interfaces into a lower capacity output interface. Whether the QProt | |||
| algorithm is vulnerable to processor exhaustion will depend on the | algorithm is vulnerable to processor exhaustion will depend on the | |||
| specific implementation.</t> | specific implementation.</t> | |||
| <t>Addition of a capability to redirect persistently offending flows | <t>Addition of a capability to redirect persistently offending flows | |||
| from LL to C would be the most effective way to reduce the | from LL to C would be the most effective way to reduce the | |||
| per-packet processing cost of the QProt algorithm, when under | per-packet processing cost of the QProt algorithm when under | |||
| attack. As already mentioned in <xref target="qp_flow-state_exhaust" f ormat="default"/>, this would also be an effective | attack. As already mentioned in <xref target="qp_flow-state_exhaust" f ormat="default"/>, this would also be an effective | |||
| way to mitigate flow-state exhaustion attacks. Further discussion of | way to mitigate flow-state exhaustion attacks. Further discussion of | |||
| whole-flow redirection is outside the scope of the present document, | whole-flow redirection is outside the scope of the present document | |||
| but briefly discussed at the end of <xref target="qp_rationale_reclass | but is briefly discussed at the end of <xref target="qp_rationale_recl | |||
| ify" format="default"/>.</t> | assify" format="default"/>.</t> | |||
| </section> | </section> | |||
| </section> | </section> | |||
| </section> | </section> | |||
| <section numbered="true" toc="default"> | <section numbered="true" toc="default"> | |||
| <name>Comments Solicited</name> | <name>Comments Solicited</name> | |||
| <t>Evaluation and assessment of the algorithm by researchers is | <t>Evaluation and assessment of the algorithm by researchers is | |||
| solicited. Comments and questions are also encouraged and welcome. They | solicited. Comments and questions are also encouraged and welcome. They | |||
| can be addressed to the authors.</t> | can be addressed to the authors.</t> | |||
| </section> | </section> | |||
| <section numbered="true" toc="default"> | ||||
| <name>Acknowledgements</name> | ||||
| <t>Thanks to Tom Henderson, Magnus Westerlund, David Black, Adrian | ||||
| Farrel and Gorry Fairhurst for their reviews of this document. The | ||||
| design of the QProt algorithm and the settings of the parameters | ||||
| benefited from discussion and critique from the participants of the | ||||
| cable industry working group on Low Latency DOCSIS. CableLabs funded Bob | ||||
| Briscoe's initial work on this document.</t> | ||||
| </section> | ||||
| </middle> | </middle> | |||
| <!-- *****BACK MATTER ***** --> | ||||
| <back> | <back> | |||
| <displayreference target="I-D.briscoe-iccrg-prague-congestion-control" to="P | ||||
| RAGUE-CC"/> | ||||
| <references> | <references> | |||
| <name>References</name> | <name>References</name> | |||
| <references> | <references> | |||
| <name>Normative References</name> | <name>Normative References</name> | |||
| <reference anchor="RFC3168" target="https://www.rfc-editor.org/info/rfc3 | <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.3 | |||
| 168" xml:base="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.3168.xml"> | 168.xml"/> | |||
| <front> | <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.8 | |||
| <title>The Addition of Explicit Congestion Notification (ECN) to IP< | 311.xml"/> | |||
| /title> | <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.9 | |||
| <author fullname="K. Ramakrishnan" initials="K." surname="Ramakrishn | 331.xml"/> | |||
| an"/> | <!-- [I-D.ietf-tsvwg-nqb] -> [RFC9956] | |||
| <author fullname="S. Floyd" initials="S." surname="Floyd"/> | companion doc RFC 9956 = draft-ietf-tsvwg-nqb-33 | |||
| <author fullname="D. Black" initials="D." surname="Black"/> | --> | |||
| <date month="September" year="2001"/> | ||||
| <abstract> | <reference anchor="RFC9956" target="https://www.rfc-editor.org/info/rfc9956"> | |||
| <t>This memo specifies the incorporation of ECN (Explicit Congesti | <front> | |||
| on Notification) to TCP and IP, including ECN's use of two bits in the IP header | <title>A Non-Queue-Building Per-Hop Behavior (NQB PHB) for Differentiated Se | |||
| . [STANDARDS-TRACK]</t> | rvices</title> | |||
| </abstract> | <author initials="G." surname="White" fullname="Greg White"> | |||
| </front> | <organization>CableLabs</organization> | |||
| <seriesInfo name="RFC" value="3168"/> | </author> | |||
| <seriesInfo name="DOI" value="10.17487/RFC3168"/> | <author initials="T." surname="Fossati" fullname="Thomas Fossati"> | |||
| </reference> | <organization>Linaro</organization> | |||
| <reference anchor="RFC8311" target="https://www.rfc-editor.org/info/rfc8 | </author> | |||
| 311" xml:base="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.8311.xml"> | <author initials="R." surname="Geib" fullname="Ruediger Geib"> | |||
| <front> | <organization>Deutsche Telekom</organization> | |||
| <title>Relaxing Restrictions on Explicit Congestion Notification (EC | </author> | |||
| N) Experimentation</title> | <date month='April' year='2026'/> | |||
| <author fullname="D. Black" initials="D." surname="Black"/> | </front> | |||
| <date month="January" year="2018"/> | <seriesInfo name="RFC" value="9956"/> | |||
| <abstract> | <seriesInfo name="DOI" value="10.17487/RFC9956"/> | |||
| <t>This memo updates RFC 3168, which specifies Explicit Congestion | </reference> | |||
| Notification (ECN) as an alternative to packet drops for indicating network con | ||||
| gestion to endpoints. It relaxes restrictions in RFC 3168 that hinder experiment | <!-- [rfced] Please review the following and let us know if any | |||
| ation towards benefits beyond just removal of loss. This memo summarizes the ant | further updates are necessary: | |||
| icipated areas of experimentation and updates RFC 3168 to enable experimentation | ||||
| in these areas. An Experimental RFC in the IETF document stream is required to | The original URLs for [DOCSIS], [DOCSIS-CCAP-OSS], and [DOCSIS-CM-OSS] | |||
| take advantage of any of these enabling updates. In addition, this memo makes re | resolved to a blank search results page. We found more-direct URLs for | |||
| lated updates to the ECN specifications for RTP in RFC 6679 and for the Datagram | these CableLabs specifications and updated the references accordingly. | |||
| Congestion Control Protocol (DCCP) in RFCs 4341, 4342, and 5622. This memo also | ||||
| records the conclusion of the ECN nonce experiment in RFC 3540 and provides the | Note that we also updated the date for [DOCSIS-CCAP-OSS] from "21 | |||
| rationale for reclassification of RFC 3540 from Experimental to Historic; this | January 2019" to "7 February 2019" to match the information provided | |||
| reclassification enables new experimental use of the ECT(1) codepoint.</t> | at that URL. | |||
| </abstract> | --> | |||
| </front> | <reference anchor="DOCSIS" target="https://www.cablelabs.com/specificati | |||
| <seriesInfo name="RFC" value="8311"/> | ons/CM-SP-MULPIv3.1"> | |||
| <seriesInfo name="DOI" value="10.17487/RFC8311"/> | ||||
| </reference> | ||||
| <reference anchor="RFC9331" target="https://www.rfc-editor.org/info/rfc9 | ||||
| 331" xml:base="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.9331.xml"> | ||||
| <front> | ||||
| <title>The Explicit Congestion Notification (ECN) Protocol for Low L | ||||
| atency, Low Loss, and Scalable Throughput (L4S)</title> | ||||
| <author fullname="K. De Schepper" initials="K." surname="De Schepper | ||||
| "/> | ||||
| <author fullname="B. Briscoe" initials="B." role="editor" surname="B | ||||
| riscoe"/> | ||||
| <date month="January" year="2023"/> | ||||
| <abstract> | ||||
| <t>This specification defines the protocol to be used for a new ne | ||||
| twork service called Low Latency, Low Loss, and Scalable throughput (L4S). L4S u | ||||
| ses an Explicit Congestion Notification (ECN) scheme at the IP layer that is sim | ||||
| ilar to the original (or 'Classic') ECN approach, except as specified within. L4 | ||||
| S uses 'Scalable' congestion control, which induces much more frequent control s | ||||
| ignals from the network, and it responds to them with much more fine-grained adj | ||||
| ustments so that very low (typically sub-millisecond on average) and consistentl | ||||
| y low queuing delay becomes possible for L4S traffic without compromising link u | ||||
| tilization. Thus, even capacity-seeking (TCP-like) traffic can have high bandwid | ||||
| th and very low delay at the same time, even during periods of high traffic load | ||||
| .</t> | ||||
| <t>The L4S identifier defined in this document distinguishes L4S f | ||||
| rom 'Classic' (e.g., TCP-Reno-friendly) traffic. Then, network bottlenecks can b | ||||
| e incrementally modified to distinguish and isolate existing traffic that still | ||||
| follows the Classic behaviour, to prevent it from degrading the low queuing dela | ||||
| y and low loss of L4S traffic. This Experimental specification defines the rules | ||||
| that L4S transports and network elements need to follow, with the intention tha | ||||
| t L4S flows neither harm each other's performance nor that of Classic traffic. I | ||||
| t also suggests open questions to be investigated during experimentation. Exampl | ||||
| es of new Active Queue Management (AQM) marking algorithms and new transports (w | ||||
| hether TCP-like or real time) are specified separately.</t> | ||||
| </abstract> | ||||
| </front> | ||||
| <seriesInfo name="RFC" value="9331"/> | ||||
| <seriesInfo name="DOI" value="10.17487/RFC9331"/> | ||||
| </reference> | ||||
| <reference anchor="I-D.ietf-tsvwg-nqb" target="https://datatracker.ietf. | ||||
| org/doc/html/draft-ietf-tsvwg-nqb-21" xml:base="https://bib.ietf.org/public/rfc/ | ||||
| bibxml-ids/reference.I-D.ietf-tsvwg-nqb.xml"> | ||||
| <front> | ||||
| <title>A Non-Queue-Building Per-Hop Behavior (NQB PHB) for Different | ||||
| iated Services</title> | ||||
| <author fullname="Greg White" initials="G." surname="White"> | ||||
| <organization>CableLabs</organization> | ||||
| </author> | ||||
| <author fullname="Thomas Fossati" initials="T." surname="Fossati"> | ||||
| <organization>Linaro</organization> | ||||
| </author> | ||||
| <author fullname="Ruediger Geib" initials="R." surname="Geib"> | ||||
| <organization>Deutsche Telekom</organization> | ||||
| </author> | ||||
| <date day="7" month="November" year="2023"/> | ||||
| <abstract> | ||||
| <t>This document specifies properties and characteristics of a Non | ||||
| - Queue-Building Per-Hop Behavior (NQB PHB). The NQB PHB provides a shallow-buff | ||||
| ered, best-effort service as a complement to a Default deep-buffered best-effort | ||||
| service for Internet services. The purpose of this NQB PHB is to provide a sepa | ||||
| rate queue that enables smooth (i.e. non-bursty), low-data-rate, application-lim | ||||
| ited traffic microflows, which would ordinarily share a queue with bursty and ca | ||||
| pacity-seeking traffic, to avoid the latency, latency variation and loss caused | ||||
| by such traffic. This PHB is implemented without prioritization and can be imple | ||||
| mented without rate policing, making it suitable for environments where the use | ||||
| of these features is restricted. The NQB PHB has been developed primarily for us | ||||
| e by access network segments, where queuing delays and queuing loss caused by Qu | ||||
| eue-Building protocols are manifested, but its use is not limited to such segmen | ||||
| ts. In particular, applications to cable broadband links, Wi-Fi links, and mobil | ||||
| e network radio and core segments are discussed. This document recommends a spec | ||||
| ific Differentiated Services Code Point (DSCP) to identify Non-Queue- Building m | ||||
| icroflows. [NOTE (to be removed by RFC-Editor): This document references an ISE | ||||
| submission draft (I-D.briscoe-docsis-q-protection) that is approved for publicat | ||||
| ion as an RFC. This draft should be held for publication until the queue protect | ||||
| ion RFC can be referenced.]</t> | ||||
| </abstract> | ||||
| </front> | ||||
| <seriesInfo name="Internet-Draft" value="draft-ietf-tsvwg-nqb-21"/> | ||||
| </reference> | ||||
| <reference anchor="DOCSIS" target="https://specification-search.cablelab | ||||
| s.com/CM-SP-MULPIv3.1"> | ||||
| <front> | <front> | |||
| <title>MAC and Upper Layer Protocols Interface (MULPI) | <title>MAC and Upper Layer Protocols Interface (MULPI) | |||
| Specification, CM-SP-MULPIv3.1</title> | Specification, CM-SP-MULPIv3.1</title> | |||
| <author fullname="" surname=""> | <author fullname="" surname=""> | |||
| <organization>CableLabs</organization> | <organization>CableLabs</organization> | |||
| </author> | </author> | |||
| <date day="21" month="January" year="2019"/> | <date day="21" month="January" year="2019"/> | |||
| </front> | </front> | |||
| <seriesInfo name="Data-Over-Cable Service Interface Specifications DOC SIS® 3.1" value="Version I17 or later"/> | <seriesInfo name="Data-Over-Cable Service Interface Specifications DOC SIS(r) 3.1" value="Version I17 or later"/> | |||
| </reference> | </reference> | |||
| <reference anchor="DOCSIS-CM-OSS" target="https://specification-search.c ablelabs.com/CM-SP-CM-OSSIv3.1"> | <reference anchor="DOCSIS-CM-OSS" target="https://www.cablelabs.com/spec ifications/CM-SP-CCAP-OSSIv3.1"> | |||
| <front> | <front> | |||
| <title>Cable Modem Operations Support System Interface Spec</title> | <title>Cable Modem Operations Support System Interface Specification </title> | |||
| <author fullname="" surname=""> | <author fullname="" surname=""> | |||
| <organization>CableLabs</organization> | <organization>CableLabs</organization> | |||
| </author> | </author> | |||
| <date day="21" month="January" year="2019"/> | <date day="21" month="January" year="2019"/> | |||
| </front> | </front> | |||
| <seriesInfo name="Data-Over-Cable Service Interface Specifications DOC SIS® 3.1" value="Version I14 or later"/> | <seriesInfo name="Data-Over-Cable Service Interface Specifications DOC SIS(r) 3.1" value="Version I14 or later"/> | |||
| </reference> | </reference> | |||
| <reference anchor="DOCSIS-CCAP-OSS" target="https://specification-search .cablelabs.com/CM-SP-CM-OSSIv3.1"> | <reference anchor="DOCSIS-CCAP-OSS" target="https://www.cablelabs.com/sp ecifications/CM-SP-CM-OSSIv3.1"> | |||
| <front> | <front> | |||
| <title>CCAP Operations Support System Interface Spec</title> | <title>CCAP Operations Support System Interface Specification</title > | |||
| <author fullname="" surname=""> | <author fullname="" surname=""> | |||
| <organization>CableLabs</organization> | <organization>CableLabs</organization> | |||
| </author> | </author> | |||
| <date day="21" month="January" year="2019"/> | <date day="7" month="February" year="2019"/> | |||
| </front> | </front> | |||
| <seriesInfo name="Data-Over-Cable Service Interface Specifications DOC SIS® 3.1" value="Version I14 or later"/> | <seriesInfo name="Data-Over-Cable Service Interface Specifications DOC SIS(r) 3.1" value="Version I14 or later"/> | |||
| <format target="https://specification-search.cablelabs.com/CM-SP-CCAP- OSSIv3.1" type="PDF"/> | <format target="https://specification-search.cablelabs.com/CM-SP-CCAP- OSSIv3.1" type="PDF"/> | |||
| </reference> | </reference> | |||
| </references> | </references> | |||
| <references> | <references> | |||
| <name>Informative References</name> | <name>Informative References</name> | |||
| <reference anchor="RFC4303" target="https://www.rfc-editor.org/info/rfc4 | <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.4 | |||
| 303" xml:base="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.4303.xml"> | 303.xml"/> | |||
| <front> | <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.6 | |||
| <title>IP Encapsulating Security Payload (ESP)</title> | 789.xml"/> | |||
| <author fullname="S. Kent" initials="S." surname="Kent"/> | <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.7 | |||
| <date month="December" year="2005"/> | 713.xml"/> | |||
| <abstract> | <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.8 | |||
| <t>This document describes an updated version of the Encapsulating | 257.xml"/> | |||
| Security Payload (ESP) protocol, which is designed to provide a mix of security | <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.8 | |||
| services in IPv4 and IPv6. ESP is used to provide confidentiality, data origin | 298.xml"/> | |||
| authentication, connectionless integrity, an anti-replay service (a form of part | <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.9 | |||
| ial sequence integrity), and limited traffic flow confidentiality. This document | 332.xml"/> | |||
| obsoletes RFC 2406 (November 1998). [STANDARDS-TRACK]</t> | <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.9 | |||
| </abstract> | 330.xml"/> | |||
| </front> | <!-- [I-D.briscoe-iccrg-prague-congestion-control] | |||
| <seriesInfo name="RFC" value="4303"/> | draft-briscoe-iccrg-prague-congestion-control-04 | |||
| <seriesInfo name="DOI" value="10.17487/RFC4303"/> | IESG State: Expired as of 1/5/25 | |||
| </reference> | --> | |||
| <reference anchor="RFC6789" target="https://www.rfc-editor.org/info/rfc6 | <xi:include href="https://bib.ietf.org/public/rfc/bibxml3/reference.I-D.briscoe- | |||
| 789" xml:base="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.6789.xml"> | iccrg-prague-congestion-control.xml"/> | |||
| <front> | ||||
| <title>Congestion Exposure (ConEx) Concepts and Use Cases</title> | ||||
| <author fullname="B. Briscoe" initials="B." role="editor" surname="B | ||||
| riscoe"/> | ||||
| <author fullname="R. Woundy" initials="R." role="editor" surname="Wo | ||||
| undy"/> | ||||
| <author fullname="A. Cooper" initials="A." role="editor" surname="Co | ||||
| oper"/> | ||||
| <date month="December" year="2012"/> | ||||
| <abstract> | ||||
| <t>This document provides the entry point to the set of documentat | ||||
| ion about the Congestion Exposure (ConEx) protocol. It explains the motivation f | ||||
| or including a ConEx marking at the IP layer: to expose information about conges | ||||
| tion to network nodes. Although such information may have a number of uses, this | ||||
| document focuses on how the information communicated by the ConEx marking can s | ||||
| erve as the basis for significantly more efficient and effective traffic managem | ||||
| ent than what exists on the Internet today. This document is not an Internet Sta | ||||
| ndards Track specification; it is published for informational purposes.</t> | ||||
| </abstract> | ||||
| </front> | ||||
| <seriesInfo name="RFC" value="6789"/> | ||||
| <seriesInfo name="DOI" value="10.17487/RFC6789"/> | ||||
| </reference> | ||||
| <reference anchor="RFC7713" target="https://www.rfc-editor.org/info/rfc7 | ||||
| 713" xml:base="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.7713.xml"> | ||||
| <front> | ||||
| <title>Congestion Exposure (ConEx) Concepts, Abstract Mechanism, and | ||||
| Requirements</title> | ||||
| <author fullname="M. Mathis" initials="M." surname="Mathis"/> | ||||
| <author fullname="B. Briscoe" initials="B." surname="Briscoe"/> | ||||
| <date month="December" year="2015"/> | ||||
| <abstract> | ||||
| <t>This document describes an abstract mechanism by which senders | ||||
| inform the network about the congestion recently encountered by packets in the s | ||||
| ame flow. Today, network elements at any layer may signal congestion to the rece | ||||
| iver by dropping packets or by Explicit Congestion Notification (ECN) markings, | ||||
| and the receiver passes this information back to the sender in transport-layer f | ||||
| eedback. The mechanism described here enables the sender to also relay this cong | ||||
| estion information back into the network in-band at the IP layer, such that the | ||||
| total amount of congestion from all elements on the path is revealed to all IP e | ||||
| lements along the path, where it could, for example, be used to provide input to | ||||
| traffic management. This mechanism is called Congestion Exposure, or ConEx. The | ||||
| companion document, "Congestion Exposure (ConEx) Concepts and Use Cases" (RFC 6 | ||||
| 789), provides the entry point to the set of ConEx documentation.</t> | ||||
| </abstract> | ||||
| </front> | ||||
| <seriesInfo name="RFC" value="7713"/> | ||||
| <seriesInfo name="DOI" value="10.17487/RFC7713"/> | ||||
| </reference> | ||||
| <reference anchor="RFC8257" target="https://www.rfc-editor.org/info/rfc8 | ||||
| 257" xml:base="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.8257.xml"> | ||||
| <front> | ||||
| <title>Data Center TCP (DCTCP): TCP Congestion Control for Data Cent | ||||
| ers</title> | ||||
| <author fullname="S. Bensley" initials="S." surname="Bensley"/> | ||||
| <author fullname="D. Thaler" initials="D." surname="Thaler"/> | ||||
| <author fullname="P. Balasubramanian" initials="P." surname="Balasub | ||||
| ramanian"/> | ||||
| <author fullname="L. Eggert" initials="L." surname="Eggert"/> | ||||
| <author fullname="G. Judd" initials="G." surname="Judd"/> | ||||
| <date month="October" year="2017"/> | ||||
| <abstract> | ||||
| <t>This Informational RFC describes Data Center TCP (DCTCP): a TCP | ||||
| congestion control scheme for data-center traffic. DCTCP extends the Explicit C | ||||
| ongestion Notification (ECN) processing to estimate the fraction of bytes that e | ||||
| ncounter congestion rather than simply detecting that some congestion has occurr | ||||
| ed. DCTCP then scales the TCP congestion window based on this estimate. This met | ||||
| hod achieves high-burst tolerance, low latency, and high throughput with shallow | ||||
| - buffered switches. This memo also discusses deployment issues related to the c | ||||
| oexistence of DCTCP and conventional TCP, discusses the lack of a negotiating me | ||||
| chanism between sender and receiver, and presents some possible mitigations. Thi | ||||
| s memo documents DCTCP as currently implemented by several major operating syste | ||||
| ms. DCTCP, as described in this specification, is applicable to deployments in c | ||||
| ontrolled environments like data centers, but it must not be deployed over the p | ||||
| ublic Internet without additional measures.</t> | ||||
| </abstract> | ||||
| </front> | ||||
| <seriesInfo name="RFC" value="8257"/> | ||||
| <seriesInfo name="DOI" value="10.17487/RFC8257"/> | ||||
| </reference> | ||||
| <reference anchor="RFC8298" target="https://www.rfc-editor.org/info/rfc8 | ||||
| 298" xml:base="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.8298.xml"> | ||||
| <front> | ||||
| <title>Self-Clocked Rate Adaptation for Multimedia</title> | ||||
| <author fullname="I. Johansson" initials="I." surname="Johansson"/> | ||||
| <author fullname="Z. Sarker" initials="Z." surname="Sarker"/> | ||||
| <date month="December" year="2017"/> | ||||
| <abstract> | ||||
| <t>This memo describes a rate adaptation algorithm for conversatio | ||||
| nal media services such as interactive video. The solution conforms to the packe | ||||
| t conservation principle and uses a hybrid loss-and-delay- based congestion cont | ||||
| rol algorithm. The algorithm is evaluated over both simulated Internet bottlenec | ||||
| k scenarios as well as in a Long Term Evolution (LTE) system simulator and is sh | ||||
| own to achieve both low latency and high video throughput in these scenarios.</t | ||||
| > | ||||
| </abstract> | ||||
| </front> | ||||
| <seriesInfo name="RFC" value="8298"/> | ||||
| <seriesInfo name="DOI" value="10.17487/RFC8298"/> | ||||
| </reference> | ||||
| <reference anchor="RFC9332" target="https://www.rfc-editor.org/info/rfc9 | ||||
| 332" xml:base="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.9332.xml"> | ||||
| <front> | ||||
| <title>Dual-Queue Coupled Active Queue Management (AQM) for Low Late | ||||
| ncy, Low Loss, and Scalable Throughput (L4S)</title> | ||||
| <author fullname="K. De Schepper" initials="K." surname="De Schepper | ||||
| "/> | ||||
| <author fullname="B. Briscoe" initials="B." role="editor" surname="B | ||||
| riscoe"/> | ||||
| <author fullname="G. White" initials="G." surname="White"/> | ||||
| <date month="January" year="2023"/> | ||||
| <abstract> | ||||
| <t>This specification defines a framework for coupling the Active | ||||
| Queue Management (AQM) algorithms in two queues intended for flows with differen | ||||
| t responses to congestion. This provides a way for the Internet to transition fr | ||||
| om the scaling problems of standard TCP-Reno-friendly ('Classic') congestion con | ||||
| trols to the family of 'Scalable' congestion controls. These are designed for co | ||||
| nsistently very low queuing latency, very low congestion loss, and scaling of pe | ||||
| r-flow throughput by using Explicit Congestion Notification (ECN) in a modified | ||||
| way. Until the Coupled Dual Queue (DualQ), these Scalable L4S congestion control | ||||
| s could only be deployed where a clean-slate environment could be arranged, such | ||||
| as in private data centres.</t> | ||||
| <t>This specification first explains how a Coupled DualQ works. It | ||||
| then gives the normative requirements that are necessary for it to work well. A | ||||
| ll this is independent of which two AQMs are used, but pseudocode examples of sp | ||||
| ecific AQMs are given in appendices.</t> | ||||
| </abstract> | ||||
| </front> | ||||
| <seriesInfo name="RFC" value="9332"/> | ||||
| <seriesInfo name="DOI" value="10.17487/RFC9332"/> | ||||
| </reference> | ||||
| <reference anchor="RFC9330" target="https://www.rfc-editor.org/info/rfc9 | ||||
| 330" xml:base="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.9330.xml"> | ||||
| <front> | ||||
| <title>Low Latency, Low Loss, and Scalable Throughput (L4S) Internet | ||||
| Service: Architecture</title> | ||||
| <author fullname="B. Briscoe" initials="B." role="editor" surname="B | ||||
| riscoe"/> | ||||
| <author fullname="K. De Schepper" initials="K." surname="De Schepper | ||||
| "/> | ||||
| <author fullname="M. Bagnulo" initials="M." surname="Bagnulo"/> | ||||
| <author fullname="G. White" initials="G." surname="White"/> | ||||
| <date month="January" year="2023"/> | ||||
| <abstract> | ||||
| <t>This document describes the L4S architecture, which enables Int | ||||
| ernet applications to achieve low queuing latency, low congestion loss, and scal | ||||
| able throughput control. L4S is based on the insight that the root cause of queu | ||||
| ing delay is in the capacity-seeking congestion controllers of senders, not in t | ||||
| he queue itself. With the L4S architecture, all Internet applications could (but | ||||
| do not have to) transition away from congestion control algorithms that cause s | ||||
| ubstantial queuing delay and instead adopt a new class of congestion controls th | ||||
| at can seek capacity with very little queuing. These are aided by a modified for | ||||
| m of Explicit Congestion Notification (ECN) from the network. With this new arch | ||||
| itecture, applications can have both low latency and high throughput.</t> | ||||
| <t>The architecture primarily concerns incremental deployment. It | ||||
| defines mechanisms that allow the new class of L4S congestion controls to coexis | ||||
| t with 'Classic' congestion controls in a shared network. The aim is for L4S lat | ||||
| ency and throughput to be usually much better (and rarely worse) while typically | ||||
| not impacting Classic performance.</t> | ||||
| </abstract> | ||||
| </front> | ||||
| <seriesInfo name="RFC" value="9330"/> | ||||
| <seriesInfo name="DOI" value="10.17487/RFC9330"/> | ||||
| </reference> | ||||
| <reference anchor="I-D.briscoe-iccrg-prague-congestion-control" target=" | ||||
| https://datatracker.ietf.org/doc/html/draft-briscoe-iccrg-prague-congestion-cont | ||||
| rol-03" xml:base="https://bib.ietf.org/public/rfc/bibxml-ids/reference.I-D.brisc | ||||
| oe-iccrg-prague-congestion-control.xml"> | ||||
| <front> | ||||
| <title>Prague Congestion Control</title> | ||||
| <author fullname="Koen De Schepper" initials="K." surname="De Schepp | ||||
| er"> | ||||
| <organization>Nokia Bell Labs</organization> | ||||
| </author> | ||||
| <author fullname="Olivier Tilmans" initials="O." surname="Tilmans"> | ||||
| <organization>Nokia Bell Labs</organization> | ||||
| </author> | ||||
| <author fullname="Bob Briscoe" initials="B." surname="Briscoe"> | ||||
| <organization>Independent</organization> | ||||
| </author> | ||||
| <author fullname="Vidhi Goel" initials="V." surname="Goel"> | ||||
| <organization>Apple Inc</organization> | ||||
| </author> | ||||
| <date day="14" month="October" year="2023"/> | ||||
| <abstract> | ||||
| <t>This specification defines the Prague congestion control scheme | ||||
| , which is derived from DCTCP and adapted for Internet traffic by implementing t | ||||
| he Prague L4S requirements. Over paths with L4S support at the bottleneck, it ad | ||||
| apts the DCTCP mechanisms to achieve consistently low latency and full throughpu | ||||
| t. It is defined independently of any particular transport protocol or operating | ||||
| system, but notes are added that highlight issues specific to certain transport | ||||
| s and OSs. It is mainly based on experience with the reference Linux implementat | ||||
| ion of TCP Prague and the Apple implementation over QUIC, but it includes experi | ||||
| ence from other implementations where available. The implementation does not sat | ||||
| isfy all the Prague requirements (yet) and the IETF might decide that certain re | ||||
| quirements need to be relaxed as an outcome of the process of trying to satisfy | ||||
| them all. Future plans that have typically only been implemented as proof-of- co | ||||
| ncept code are outlined in a separate section.</t> | ||||
| </abstract> | ||||
| </front> | ||||
| <seriesInfo name="Internet-Draft" value="draft-briscoe-iccrg-prague-co | ||||
| ngestion-control-03"/> | ||||
| </reference> | ||||
| <reference anchor="LLD" target="https://cablela.bs/low-latency-docsis-te chnology-overview-february-2019"> | <reference anchor="LLD" target="https://cablela.bs/low-latency-docsis-te chnology-overview-february-2019"> | |||
| <front> | <front> | |||
| <title>Low Latency DOCSIS: Technology Overview</title> | <title>Low Latency DOCSIS: Technology Overview</title> | |||
| <author fullname="Greg White" initials="G." surname="White"> | <author fullname="Greg White" initials="G." surname="White"> | |||
| <organization>CableLabs</organization> | <organization>CableLabs</organization> | |||
| </author> | </author> | |||
| <author fullname="Karthik Sundaresan" initials="K." surname="Sundare san"> | <author fullname="Karthik Sundaresan" initials="K." surname="Sundare san"> | |||
| <organization>CableLabs</organization> | <organization>CableLabs</organization> | |||
| </author> | </author> | |||
| <author fullname="Bob Briscoe" initials="B." surname="Briscoe"> | <author fullname="Bob Briscoe" initials="B." surname="Briscoe"> | |||
| <organization>CableLabs</organization> | <organization>CableLabs</organization> | |||
| </author> | </author> | |||
| <date day="" month="February" year="2019"/> | <date day="" month="February" year="2019"/> | |||
| </front> | </front> | |||
| <seriesInfo name="CableLabs White Paper" value=""/> | <refcontent>CableLabs White Paper</refcontent> | |||
| </reference> | </reference> | |||
| <reference anchor="ScalingCC" target="https://arxiv.org/abs/1904.07605"> | <reference anchor="ScalingCC" target="https://arxiv.org/abs/1904.07605"> | |||
| <front> | <front> | |||
| <title>Resolving Tensions between Congestion Control Scaling | <title>Resolving Tensions between Congestion Control Scaling | |||
| Requirements</title> | Requirements</title> | |||
| <author fullname="Bob Briscoe" initials="B." surname="Briscoe"> | <author fullname="Bob Briscoe" initials="B." surname="Briscoe"> | |||
| <organization>Simula Research Lab</organization> | <organization>Simula Research Lab</organization> | |||
| </author> | </author> | |||
| <author fullname="Koen De Schepper" initials="K." surname="De Schepp er"> | <author fullname="Koen De Schepper" initials="K." surname="De Schepp er"> | |||
| <organization>Nokia Bell Labs</organization> | <organization>Nokia Bell Labs</organization> | |||
| </author> | </author> | |||
| <date month="July" year="2017"/> | <date month="July" year="2017"/> | |||
| </front> | </front> | |||
| <seriesInfo name="Simula Technical Report" value="TR-CS-2016-001 arXiv | <seriesInfo name="Simula Technical Report" value="TR-CS-2016-001"/> | |||
| :1904.07605"/> | <seriesInfo name="DOI" value="10.48550/arXiv.1904.07605"/> | |||
| <refcontent>arXiv:1904.07605</refcontent> | ||||
| </reference> | </reference> | |||
| <!-- [rfced] FYI: We updated the [BBRv3] and [SCReAM] references to | ||||
| match current style guidance for references to web-based public | ||||
| code repositories: | ||||
| https://www.rfc-editor.org/styleguide/part2/#ref_repo --> | ||||
| <reference anchor="BBRv3" target="https://github.com/google/bbr/blob/v3/ README.md"> | <reference anchor="BBRv3" target="https://github.com/google/bbr/blob/v3/ README.md"> | |||
| <front> | <front> | |||
| <title>TCP BBR v3 Release</title> | <title>TCP BBR v3 Release</title> | |||
| <author fullname="Neal Cardwell" initials="N" surname="Cardwell"> | <author/> | |||
| <organization/> | <date day="18" month="March" year="2025"/> | |||
| </author> | ||||
| <date/> | ||||
| </front> | </front> | |||
| <seriesInfo name="github repository;" value="Linux congestion control module"/> | <refcontent>commit 90210de</refcontent> | |||
| </reference> | </reference> | |||
| <reference anchor="SCReAM" target="https://github.com/EricssonResearch/s cream/blob/master/README.md"> | <reference anchor="SCReAM" target="https://github.com/EricssonResearch/s cream/blob/master/README.md"> | |||
| <front> | <front> | |||
| <title>SCReAM</title> | <title>SCReAM</title> | |||
| <author fullname="Ingemar Johansson" initials="I" surname="Johansson | <author/> | |||
| "> | <date day="10" month="November" year="2025"/> | |||
| <organization/> | ||||
| </author> | ||||
| <date/> | ||||
| </front> | </front> | |||
| <seriesInfo name="github repository;" value=""/> | <refcontent>commit 0208f59</refcontent> | |||
| <format target="https://github.com/google/bbr/blob/v2alpha/README.md" | ||||
| type="Source code"/> | ||||
| </reference> | </reference> | |||
| </references> | </references> | |||
| </references> | </references> | |||
| <section numbered="false" toc="default"> | ||||
| <name>Acknowledgements</name> | ||||
| <t>Thanks to <contact fullname="Tom Henderson"/>, <contact | ||||
| fullname="Magnus Westerlund"/>, <contact fullname="David Black"/>, | ||||
| <contact fullname="Adrian Farrel"/>, and <contact fullname="Gorry | ||||
| Fairhurst"/> for their reviews of this document. The design of the QProt | ||||
| algorithm and the settings of the parameters benefited from discussion | ||||
| and critique from the participants of the cable industry working group | ||||
| on Low-Latency DOCSIS. CableLabs funded <contact fullname="Bob | ||||
| Briscoe"/>'s initial work on this document.</t> | ||||
| </section> | ||||
| <!--[rfced] We had the following questions related to terminology used | ||||
| throughout the document: | ||||
| a) Several sections use "the algorithm" in an opening statement while | ||||
| other sections say "The QProt algorithm". Would it be easier for the | ||||
| reader to call it "The QProt algorithm" in first mentions in a section | ||||
| (and use "the algorithm" thereafter in the section)? Thinking of | ||||
| readers that may not read the entire RFC, but instead jump to a | ||||
| section from a reference link. | ||||
| b) We have updated to use the form on the right throughout. Please | ||||
| let us know any objections. | ||||
| IPSec / IPsec (to match RFC 4303) | ||||
| flow-ID / flow ID | ||||
| c) How may we make the following terms consistent throughout? | ||||
| Congestion-rate vs. congestion-rate | ||||
| Coupled DualQ AQM vs. Dual Queue Coupled AQM (companion uses "IETF's | ||||
| Coupled DualQ AQM") | ||||
| Diffserv Codepoint vs. Diffserv codepoint (companion uses Diffserv | ||||
| Code Point and Differentiated Services Code Point) | ||||
| flow state vs. flow-state | ||||
| Native vs. native vs. "Native" | ||||
| per-flow-state vs. per-flow state | ||||
| queue protection vs. Queue Protection | ||||
| --> | ||||
| <!--[rfced] We had the following questions related to abbreviations | ||||
| used throughout the document: | ||||
| a) FYI - We have added expansions for abbreviations upon first use per | ||||
| Section 3.6 of RFC 7322 ("RFC Style Guide"). Please review each | ||||
| expansion in the document carefully to ensure correctness. | ||||
| b) We see that the companion document (draft-ietf-tsvwg-nqb-33) uses | ||||
| the following abbreviations: | ||||
| NQB - Non-Queue-Building | ||||
| QB - Queue-Building | ||||
| We see that this document only uses NQB when mentioning the Diffserv | ||||
| codepoint. Can NQB be introduced earlier in the document and be used | ||||
| to refer to the general concept? | ||||
| c) We see that [DOCSIS] uses "Queue Protection" rather than "queue | ||||
| protection". We see both the capped and lowercase versions used in | ||||
| this document. May we update to simply QProt (after first expansion) | ||||
| when referring to the algorithm? And/Or are there places where | ||||
| capping or lowercasing this term is necessary? If not, please let us | ||||
| know how we may make this consistent. | ||||
| Further, is it QProt algorithm or DOCSIS QProt algorithm? | ||||
| d) FYI - We have updated the expansion of DOCSIS to use hyphenation | ||||
| (i.e., Data-Over-Cable) to match the use in [DOCSIS] and the companion | ||||
| document. Please let us know any objections. | ||||
| e) How may we expand the following abbreviations? | ||||
| CE | ||||
| MAC | ||||
| f) We will update to use the abbreviated forms of the following after | ||||
| expansion on first use (per the guidance at | ||||
| https://www.rfc-editor.org/styleguide/part2/#exp_abbrev): | ||||
| LL | ||||
| CM | ||||
| g) We note that this document uses LL queue as an abbreviation for | ||||
| low-latency queue. However, we see RFC 9332 uses "low-latency (L) | ||||
| queue". Please review this discrepancy and let us know if any further | ||||
| updates are necessary. | ||||
| Further, please note that we have hyphenated low latency when it appears in attr | ||||
| ibutive position to match its use in RFC 9330-9332. | ||||
| --> | ||||
| <!-- [rfced] Please review the "Inclusive Language" portion of the online | ||||
| Style Guide <https://www.rfc-editor.org/styleguide/part2/#inclusive_language> | ||||
| and let us know if any changes are needed. Updates of this nature typically | ||||
| result in more precise language, which is helpful for readers. | ||||
| For example, please consider whether the following should be updated: | ||||
| native | ||||
| In addition, please consider whether uses of "tradition" should be updated for | ||||
| clarity. While the NIST website | ||||
| <https://web.archive.org/web/20250214092458/https://www.nist.gov/nist-research-l | ||||
| ibrary/nist-technical-series-publications-author-instructions#table1> | ||||
| indicates that this term is potentially biased, it is also ambiguous. | ||||
| "Tradition" is a subjective term, as it is not the same for everyone. | ||||
| --> | ||||
| </back> | </back> | |||
| </rfc> | </rfc> | |||
| End of changes. 211 change blocks. | ||||
| 730 lines changed or deleted | 682 lines changed or added | |||
This html diff was produced by rfcdiff 1.48. | ||||