<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta charset="utf-8">
<meta name="generator" content="pdf2htmlEX">
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
<link rel="stylesheet" href="https://static.pudn.com/base/css/base.min.css">
<link rel="stylesheet" href="https://static.pudn.com/base/css/fancy.min.css">
<link rel="stylesheet" href="https://static.pudn.com/prod/directory_preview_static/6275eafd16f2c0769caad825/raw.css">
<script src="https://static.pudn.com/base/js/compatibility.min.js"></script>
<script src="https://static.pudn.com/base/js/pdf2htmlEX.min.js"></script>
<script>
try{
pdf2htmlEX.defaultViewer = new pdf2htmlEX.Viewer({});
}catch(e){}
</script>
<title></title>
</head>
<body>
<div id="sidebar" style="display: none">
<div id="outline">
</div>
</div>
<div id="pf1" class="pf w0 h0" data-page-no="1"><div class="pc pc1 w0 h0"><img class="bi x0 y0 w1 h1" alt="" src="https://static.pudn.com/prod/directory_preview_static/6275eafd16f2c0769caad825/bg1.jpg"><div class="t m0 x1 h2 y1 ff1 fs0 fc0 sc0 ls0 ws0">Y<span class="_ _0"></span>OLOv4:<span class="_ _1"> </span>Optimal<span class="_"> </span>Speed<span class="_"> </span>and<span class="_"> </span>Accuracy<span class="_"> </span>of<span class="_"> </span>Object<span class="_"> </span>Detection</div><div class="t m0 x2 h3 y2 ff2 fs1 fc0 sc0 ls0 ws0">Alex<span class="_ _0"></span>ey<span class="_"> </span>Bochko<span class="_ _0"></span>vskiy</div><div class="t m0 x3 h4 y3 ff3 fs2 fc0 sc0 ls0 ws0">∗</div><div class="t m0 x4 h5 y4 ff4 fs3 fc0 sc0 ls0 ws0">alexeyab84@gmail.com</div><div class="t m0 x5 h3 y2 ff2 fs1 fc0 sc0 ls0 ws0">Chien-Y<span class="_ _2"></span>ao<span class="_"> </span>W<span class="_ _2"></span>ang</div><div class="t m0 x6 h4 y3 ff3 fs2 fc0 sc0 ls0 ws0">∗</div><div class="t m0 x7 h3 y4 ff2 fs1 fc0 sc0 ls0 ws0">Institute<span class="_"> </span>of<span class="_"> </span>Information<span class="_"> </span>Science</div><div class="t m0 x8 h3 y5 ff2 fs1 fc0 sc0 ls0 ws0">Academia<span class="_"> </span>Sinica,<span class="_"> </span>T<span class="_ _2"></span>aiwan</div><div class="t m0 x9 h5 y6 ff4 fs3 fc0 sc0 ls0 ws0">kinyiu@iis.sinica.edu.tw</div><div class="t m0 xa h3 y2 ff2 fs1 fc0 sc0 ls0 ws0">Hong-Y<span class="_ _2"></span>uan<span class="_"> </span>Mark<span class="_"> </span>Liao</div><div class="t m0 xb h3 y7 ff2 fs1 fc0 sc0 ls0 ws0">Institute<span class="_"> </span>of<span class="_"> </span>Information<span class="_"> </span>Science</div><div class="t m0 xc h3 y5 ff2 fs1 fc0 sc0 ls0 ws0">Academia<span class="_"> </span>Sinica,<span class="_"> </span>T<span class="_ _2"></span>aiwan</div><div class="t m0 xd h5 y6 ff4 fs3 fc0 sc0 ls0 ws0">liao@iis.sinica.edu.tw</div><div class="t m0 xe h6 y8 ff1 fs1 fc0 sc0 ls0 ws0">Abstract</div><div class="t m0 xf h7 y9 ff5 fs4 fc0 sc0 ls0 ws0">Ther<span class="_ _0"></span>e<span class="_ _3"> </span>ar<span class="_ _0"></span>e<span class="_ _3"> </span>a<span class="_ _3"> </span>huge<span class="_ _4"> </span>number<span class="_ _4"> </span>of<span class="_ _3"> </span>featur<span class="_ _0"></span>es<span class="_ _3"> </span>which<span class="_ _4"> </span>are<span class="_ _4"> </span>said<span class="_ _4"> </span>to</div><div class="t m0 x10 h7 ya ff5 fs4 fc0 sc0 ls0 ws0">impr<span class="_ _0"></span>ove<span class="_ _1"> </span>Con<span class="_ _0"></span>volutional<span class="_ _5"> </span>Neural<span class="_ _5"> </span>Network<span class="_ _5"> </span>(CNN)<span class="_ _5"> </span>accuracy<span class="_ _0"></span>.</div><div class="t m0 x10 h7 yb ff5 fs4 fc0 sc0 ls0 ws0">Practical<span class="_ _6"> </span>testing<span class="_ _6"> </span>of<span class="_ _4"> </span>combinations<span class="_ _6"> </span>of<span class="_ _6"> </span>such<span class="_ _6"> </span>features<span class="_ _6"> </span>on<span class="_ _6"> </span>larg<span class="_ _0"></span>e</div><div class="t m0 x10 h7 yc ff5 fs4 fc0 sc0 ls0 ws0">datasets,<span class="_ _1"> </span>and<span class="_ _5"> </span>theor<span class="_ _0"></span>etical<span class="_ _5"> </span>justification<span class="_ _5"> </span>of<span class="_ _5"> </span>the<span class="_ _5"> </span>r<span class="_ _0"></span>esult,<span class="_ _1"> </span>is<span class="_ _5"> </span>r<span class="_ _0"></span>e-</div><div class="t m0 x10 h7 yd ff5 fs4 fc0 sc0 ls0 ws0">quir<span class="_ _0"></span>ed.<span class="_ _4"> </span>Some<span class="_ _7"> </span>featur<span class="_ _0"></span>es<span class="_ _7"> </span>operate<span class="_ _7"> </span>on<span class="_ _7"> </span>certain<span class="_ _8"> </span>models<span class="_ _7"> </span>exclusively</div><div class="t m0 x10 h7 ye ff5 fs4 fc0 sc0 ls0 ws0">and<span class="_ _7"> </span>for<span class="_ _8"> </span>certain<span class="_ _7"> </span>pr<span class="_ _0"></span>oblems<span class="_"> </span>e<span class="_ _0"></span>xclusively<span class="_ _0"></span>,<span class="_ _8"> </span>or<span class="_ _7"> </span>only<span class="_ _8"> </span>for<span class="_ _7"> </span>small-scale</div><div class="t m0 x10 h7 yf ff5 fs4 fc0 sc0 ls0 ws0">datasets;<span class="_"> </span>while<span class="_ _6"> </span>some<span class="_"> </span>features,<span class="_"> </span>such<span class="_"> </span>as<span class="_"> </span>batch-normalization</div><div class="t m0 x10 h7 y10 ff5 fs4 fc0 sc0 ls0 ws0">and<span class="_"> </span>residual-connections,<span class="_"> </span>are<span class="_"> </span>applicable<span class="_ _6"> </span>to<span class="_ _6"> </span>the<span class="_ _6"> </span>majority<span class="_ _6"> </span>of</div><div class="t m0 x10 h7 y11 ff5 fs4 fc0 sc0 ls0 ws0">models,<span class="_"> </span>tasks,<span class="_"> </span>and<span class="_ _6"> </span>datasets.<span class="_ _4"> </span>W<span class="_ _0"></span>e<span class="_"> </span>assume<span class="_"> </span>that<span class="_"> </span>such<span class="_"> </span>universal</div><div class="t m0 x10 h7 y12 ff5 fs4 fc0 sc0 ls0 ws0">featur<span class="_ _0"></span>es<span class="_ _9"> </span>include<span class="_ _9"> </span>W<span class="_ _2"></span>eighted-Residual-Connections<span class="_ _9"> </span>(WRC),</div><div class="t m0 x10 h7 y13 ff5 fs4 fc0 sc0 ls0 ws0">Cr<span class="_ _0"></span>oss-Stage-P<span class="_ _a"></span>artial-connections<span class="_ _5"> </span>(CSP),<span class="_ _5"> </span>Cr<span class="_ _0"></span>oss<span class="_ _1"> </span>mini-Batch</div><div class="t m0 x10 h7 y14 ff5 fs4 fc0 sc0 ls0 ws0">Normalization<span class="_ _b"> </span>(CmBN),<span class="_ _b"> </span>Self-adversarial-tr<span class="_ _0"></span>aining<span class="_ _b"> </span>(SAT)</div><div class="t m0 x10 h7 y15 ff5 fs4 fc0 sc0 ls0 ws0">and<span class="_ _5"> </span>Mish-activation.<span class="_ _c"> </span>W<span class="_ _a"></span>e<span class="_ _5"> </span>use<span class="_ _1"> </span>new<span class="_ _5"> </span>features:<span class="_ _d"> </span>WRC,<span class="_ _5"> </span>CSP<span class="_ _2"></span>,</div><div class="t m0 x10 h7 y16 ff5 fs4 fc0 sc0 ls0 ws0">CmBN,<span class="_ _3"> </span>SAT<span class="_ _a"></span>,<span class="_ _e"> </span>Mish<span class="_ _3"> </span>activation,<span class="_ _5"> </span>Mosaic<span class="_ _e"> </span>data<span class="_ _e"> </span>augmentation,</div><div class="t m0 x10 h7 y17 ff5 fs4 fc0 sc0 ls0 ws0">CmBN,<span class="_ _8"> </span>Dr<span class="_ _0"></span>opBlock<span class="_ _7"> </span>re<span class="_ _0"></span>gularization,<span class="_ _8"> </span>and<span class="_ _8"> </span>CIoU<span class="_"> </span>loss,<span class="_ _7"> </span>and<span class="_"> </span>com-</div><div class="t m0 x10 h7 y18 ff5 fs4 fc0 sc0 ls0 ws0">bine<span class="_ _8"> </span>some<span class="_ _8"> </span>of<span class="_ _8"> </span>them<span class="_ _8"> </span>to<span class="_ _8"> </span>achie<span class="_ _0"></span>ve<span class="_"> </span>state-of-the-art<span class="_ _7"> </span>results:<span class="_"> </span>43.5%</div><div class="t m0 x10 h7 y19 ff5 fs4 fc0 sc0 ls0 ws0">AP<span class="_ _1"> </span>(65.7%<span class="_ _f"> </span>AP</div><div class="t m0 x11 h8 y1a ff6 fs5 fc0 sc0 ls0 ws0">50</div><div class="t m0 x12 h7 y19 ff5 fs4 fc0 sc0 ls0 ws0">)<span class="_ _1"> </span>for<span class="_ _f"> </span>the<span class="_ _1"> </span>MS<span class="_ _f"> </span>COCO<span class="_ _1"> </span>dataset<span class="_ _f"> </span>at<span class="_ _1"> </span>a<span class="_ _f"> </span>r<span class="_ _0"></span>eal-</div><div class="t m0 x10 h9 y1b ff5 fs4 fc0 sc0 ls0 ws0">time<span class="_ _4"> </span>speed<span class="_ _3"> </span>of<span class="_ _3"> </span><span class="ff7">∼</span>65<span class="_ _4"> </span>FPS<span class="_ _3"> </span>on<span class="_ _3"> </span>T<span class="_ _2"></span>esla<span class="_ _3"> </span>V100.<span class="_ _9"> </span>Sour<span class="_ _0"></span>ce<span class="_ _3"> </span>code<span class="_ _4"> </span>is<span class="_ _3"> </span>at</div><div class="t m0 x10 h7 y1c ff8 fs4 fc1 sc0 ls0 ws0">https://github.com/AlexeyAB/darknet<span class="ff5 fc0">.</span></div><div class="t m0 x10 h6 y1d ff1 fs1 fc0 sc0 ls0 ws0">1.<span class="_"> </span>Introduction</div><div class="t m0 xf ha y1e ff2 fs4 fc0 sc0 ls0 ws0">The<span class="_ _6"> </span>majority<span class="_ _6"> </span>of<span class="_ _6"> </span>CNN-based<span class="_ _4"> </span>object<span class="_"> </span>detectors<span class="_ _4"> </span>are<span class="_ _6"> </span>largely</div><div class="t m0 x10 ha y1f ff2 fs4 fc0 sc0 ls0 ws0">applicable<span class="_"> </span>only<span class="_"> </span>for<span class="_"> </span>recommendation<span class="_ _8"> </span>systems.<span class="_ _4"> </span>For<span class="_"> </span>e<span class="_ _0"></span>xample,</div><div class="t m0 x10 ha y20 ff2 fs4 fc0 sc0 ls0 ws0">searching<span class="_ _4"> </span>for<span class="_ _4"> </span>free<span class="_ _4"> </span>parking<span class="_ _4"> </span>spaces<span class="_ _4"> </span>via<span class="_ _4"> </span>urban<span class="_ _4"> </span>video<span class="_ _4"> </span>cameras</div><div class="t m0 x10 ha y21 ff2 fs4 fc0 sc0 ls0 ws0">is<span class="_ _6"> </span>ex<span class="_ _0"></span>ecuted<span class="_ _4"> </span>by<span class="_"> </span>slow<span class="_ _6"> </span>accurate<span class="_ _6"> </span>models,<span class="_ _6"> </span>whereas<span class="_ _6"> </span>car<span class="_ _6"> </span>collision</div><div class="t m0 x10 ha y22 ff2 fs4 fc0 sc0 ls0 ws0">warning<span class="_ _5"> </span>is<span class="_ _5"> </span>related<span class="_ _5"> </span>to<span class="_ _5"> </span>fast<span class="_ _5"> </span>inaccurate<span class="_ _1"> </span>models.<span class="_ _10"> </span>Improving</div><div class="t m0 x10 ha y23 ff2 fs4 fc0 sc0 ls0 ws0">the<span class="_ _3"> </span>real-time<span class="_ _3"> </span>object<span class="_ _e"> </span>detector<span class="_ _3"> </span>accuracy<span class="_ _3"> </span>enables<span class="_ _3"> </span>using<span class="_ _e"> </span>them</div><div class="t m0 x10 ha y24 ff2 fs4 fc0 sc0 ls0 ws0">not<span class="_ _4"> </span>only<span class="_ _3"> </span>for<span class="_ _4"> </span>hint<span class="_ _4"> </span>generating<span class="_ _3"> </span>recommendation<span class="_ _4"> </span>systems,<span class="_ _3"> </span>but</div><div class="t m0 x10 ha y25 ff2 fs4 fc0 sc0 ls0 ws0">also<span class="_ _6"> </span>for<span class="_ _6"> </span>stand-alone<span class="_ _4"> </span>process<span class="_"> </span>management<span class="_ _4"> </span>and<span class="_ _6"> </span>human<span class="_ _6"> </span>input</div><div class="t m0 x10 ha y26 ff2 fs4 fc0 sc0 ls0 ws0">reduction.<span class="_ _11"> </span>Real-time<span class="_ _4"> </span>object<span class="_ _4"> </span>detector<span class="_ _4"> </span>operation<span class="_ _3"> </span>on<span class="_ _4"> </span>con<span class="_ _0"></span>ven-</div><div class="t m0 x10 ha y27 ff2 fs4 fc0 sc0 ls0 ws0">tional<span class="_ _4"> </span>Graphics<span class="_ _4"> </span>Processing<span class="_ _4"> </span>Units<span class="_ _3"> </span>(GPU)<span class="_ _4"> </span>allo<span class="_ _0"></span>ws<span class="_ _3"> </span>their<span class="_ _4"> </span>mass</div><div class="t m0 x10 ha y28 ff2 fs4 fc0 sc0 ls0 ws0">usage<span class="_ _e"> </span>at<span class="_ _5"> </span>an<span class="_ _e"> </span>affordable<span class="_ _e"> </span>price.<span class="_ _12"> </span>The<span class="_ _e"> </span>most<span class="_ _5"> </span>accurate<span class="_ _e"> </span>modern</div><div class="t m0 x10 ha y29 ff2 fs4 fc0 sc0 ls0 ws0">neural<span class="_ _7"> </span>networks<span class="_ _7"> </span>do<span class="_ _8"> </span>not<span class="_ _7"> </span>operate<span class="_ _8"> </span>in<span class="_ _7"> </span>real<span class="_ _8"> </span>time<span class="_ _7"> </span>and<span class="_ _8"> </span>require<span class="_ _7"> </span>large</div><div class="t m0 x10 ha y2a ff2 fs4 fc0 sc0 ls0 ws0">number<span class="_ _4"> </span>of<span class="_ _4"> </span>GPUs<span class="_ _4"> </span>for<span class="_ _4"> </span>training<span class="_ _6"> </span>with<span class="_ _4"> </span>a<span class="_ _4"> </span>large<span class="_ _4"> </span>mini-batch-size.</div><div class="t m0 x10 ha y2b ff2 fs4 fc0 sc0 ls0 ws0">W<span class="_ _a"></span>e<span class="_"> </span>address<span class="_"> </span>such<span class="_"> </span>problems<span class="_ _7"> </span>through<span class="_"> </span>creating<span class="_"> </span>a<span class="_"> </span>CNN<span class="_ _8"> </span>that<span class="_"> </span>op-</div><div class="t m0 x10 ha y2c ff2 fs4 fc0 sc0 ls0 ws0">erates<span class="_ _3"> </span>in<span class="_ _3"> </span>real-time<span class="_ _3"> </span>on<span class="_ _3"> </span>a<span class="_ _3"> </span>con<span class="_ _0"></span>ventional<span class="_ _3"> </span>GPU,<span class="_ _3"> </span>and<span class="_ _3"> </span>for<span class="_ _3"> </span>which</div><div class="t m0 x10 ha y2d ff2 fs4 fc0 sc0 ls0 ws0">training<span class="_"> </span>requires<span class="_"> </span>only<span class="_"> </span>one<span class="_"> </span>con<span class="_ _0"></span>ventional<span class="_"> </span>GPU.</div><div class="t m0 x13 ha y2e ff2 fs4 fc0 sc0 ls0 ws0">Figure<span class="_ _6"> </span>1:<span class="_ _5"> </span>Comparison<span class="_ _6"> </span>of<span class="_ _4"> </span>the<span class="_ _6"> </span>proposed<span class="_ _4"> </span>YOLOv4<span class="_"> </span>and<span class="_ _4"> </span>other</div><div class="t m0 x13 ha y2f ff2 fs4 fc0 sc0 ls0 ws0">state-of-the-art<span class="_"> </span>object<span class="_"> </span>detectors.<span class="_ _3"> </span>Y<span class="_ _0"></span>OLOv4<span class="_ _6"> </span>runs<span class="_"> </span>twice<span class="_"> </span>faster</div><div class="t m0 x13 ha y30 ff2 fs4 fc0 sc0 ls0 ws0">than<span class="_ _6"> </span>EfficientDet<span class="_ _6"> </span>with<span class="_ _4"> </span>comparable<span class="_ _6"> </span>performance.<span class="_ _1"> </span>Improves</div><div class="t m0 x13 ha y31 ff2 fs4 fc0 sc0 ls0 ws0">Y<span class="_ _0"></span>OLOv3’<span class="_ _0"></span>s<span class="_"> </span>AP<span class="_"> </span>and<span class="_"> </span>FPS<span class="_"> </span>by<span class="_"> </span>10%<span class="_"> </span>and<span class="_"> </span>12%,<span class="_"> </span>respectively<span class="_ _a"></span>.</div><div class="t m0 x14 ha y32 ff2 fs4 fc0 sc0 ls0 ws0">The<span class="_"> </span>main<span class="_ _6"> </span>goal<span class="_ _6"> </span>of<span class="_ _6"> </span>this<span class="_"> </span>work<span class="_ _6"> </span>is<span class="_ _6"> </span>designing<span class="_"> </span>a<span class="_ _6"> </span>fast<span class="_ _6"> </span>operating</div><div class="t m0 x13 ha y33 ff2 fs4 fc0 sc0 ls0 ws0">speed<span class="_"> </span>of<span class="_ _6"> </span>an<span class="_"> </span>object<span class="_ _6"> </span>detector<span class="_"> </span>in<span class="_ _6"> </span>production<span class="_ _6"> </span>systems<span class="_"> </span>and<span class="_ _6"> </span>opti-</div><div class="t m0 x13 ha y34 ff2 fs4 fc0 sc0 ls0 ws0">mization<span class="_ _7"> </span>for<span class="_ _7"> </span>parallel<span class="_ _8"> </span>computations,<span class="_ _8"> </span>rather<span class="_ _7"> </span>than<span class="_ _7"> </span>the<span class="_ _8"> </span>low<span class="_ _7"> </span>com-</div><div class="t m0 x13 ha y35 ff2 fs4 fc0 sc0 ls0 ws0">putation<span class="_ _e"> </span>volume<span class="_ _e"> </span>theoretical<span class="_ _5"> </span>indicator<span class="_ _e"> </span>(BFLOP).<span class="_ _5"> </span>W<span class="_ _a"></span>e<span class="_ _5"> </span>hope</div><div class="t m0 x13 ha y36 ff2 fs4 fc0 sc0 ls0 ws0">that<span class="_"> </span>the<span class="_"> </span>designed<span class="_"> </span>object<span class="_ _6"> </span>can<span class="_"> </span>be<span class="_"> </span>easily<span class="_ _6"> </span>trained<span class="_"> </span>and<span class="_"> </span>used.<span class="_ _3"> </span>For</div><div class="t m0 x13 ha y37 ff2 fs4 fc0 sc0 ls0 ws0">example,<span class="_"> </span>an<span class="_ _0"></span>yone<span class="_"> </span>who<span class="_"> </span>uses<span class="_"> </span>a<span class="_ _8"> </span>con<span class="_ _0"></span>ventional<span class="_"> </span>GPU<span class="_"> </span>to<span class="_ _8"> </span>train<span class="_"> </span>and</div><div class="t m0 x13 ha y38 ff2 fs4 fc0 sc0 ls0 ws0">test<span class="_"> </span>can<span class="_ _6"> </span>achiev<span class="_ _0"></span>e<span class="_ _6"> </span>real-time,<span class="_ _6"> </span>high<span class="_"> </span>quality<span class="_ _0"></span>,<span class="_ _6"> </span>and<span class="_"> </span>convincing<span class="_"> </span>ob-</div><div class="t m0 x13 ha y39 ff2 fs4 fc0 sc0 ls0 ws0">ject<span class="_"> </span>detection<span class="_"> </span>results,<span class="_"> </span>as<span class="_"> </span>the<span class="_ _6"> </span>Y<span class="_ _0"></span>OLOv4<span class="_ _6"> </span>results<span class="_"> </span>shown<span class="_"> </span>in<span class="_"> </span>Fig-</div><div class="t m0 x13 ha y3a ff2 fs4 fc0 sc0 ls0 ws0">ure<span class="_"> </span><span class="fc2">1</span>.<span class="_ _4"> </span>Our<span class="_"> </span>contributions<span class="_"> </span>are<span class="_"> </span>summarized<span class="_"> </span>as<span class="_"> </span>follo<span class="_ _0"></span>ws:</div><div class="t m0 x15 ha y3b ff2 fs4 fc0 sc0 ls0 ws0">1.<span class="_ _13"> </span>W<span class="_ _a"></span>e<span class="_"> </span>de<span class="_ _0"></span>velope<span class="_ _7"> </span>an<span class="_"> </span>ef<span class="_ _0"></span>ficient<span class="_ _8"> </span>and<span class="_ _8"> </span>powerful<span class="_ _7"> </span>object<span class="_ _8"> </span>detection</div><div class="t m0 x16 ha y3c ff2 fs4 fc0 sc0 ls0 ws0">model.<span class="_ _6"> </span>It<span class="_"> </span>makes<span class="_ _7"> </span>ev<span class="_ _0"></span>eryone<span class="_"> </span>can<span class="_ _7"> </span>use<span class="_"> </span>a<span class="_ _7"> </span>1080<span class="_"> </span>T<span class="_ _0"></span>i<span class="_ _8"> </span>or<span class="_ _8"> </span>2080<span class="_ _8"> </span>T<span class="_ _0"></span>i</div><div class="t m0 x16 ha y3d ff2 fs4 fc0 sc0 ls0 ws0">GPU<span class="_"> </span>to<span class="_"> </span>train<span class="_"> </span>a<span class="_"> </span>super<span class="_"> </span>fast<span class="_"> </span>and<span class="_"> </span>accurate<span class="_"> </span>object<span class="_"> </span>detector<span class="_ _0"></span>.</div><div class="t m0 x15 ha y3e ff2 fs4 fc0 sc0 ls0 ws0">2.<span class="_ _13"> </span>W<span class="_ _a"></span>e<span class="_ _9"> </span>v<span class="_ _0"></span>erify<span class="_ _14"> </span>the<span class="_ _11"> </span>influence<span class="_ _14"> </span>of<span class="_ _11"> </span>state-of-the-art<span class="_ _14"> </span>Bag-of-</div><div class="t m0 x16 ha y3f ff2 fs4 fc0 sc0 ls0 ws0">Freebies<span class="_ _8"> </span>and<span class="_"> </span>Bag-of-Specials<span class="_ _7"> </span>methods<span class="_"> </span>of<span class="_ _7"> </span>object<span class="_"> </span>detec-</div><div class="t m0 x16 ha y40 ff2 fs4 fc0 sc0 ls0 ws0">tion<span class="_"> </span>during<span class="_"> </span>the<span class="_"> </span>detector<span class="_"> </span>training.</div><div class="t m0 x15 ha y41 ff2 fs4 fc0 sc0 ls0 ws0">3.<span class="_ _13"> </span>W<span class="_ _a"></span>e<span class="_ _1"> </span>modify<span class="_ _5"> </span>state-of-the-art<span class="_ _1"> </span>methods<span class="_ _5"> </span>and<span class="_ _5"> </span>make<span class="_ _1"> </span>them</div><div class="t m0 x16 ha y42 ff2 fs4 fc0 sc0 ls0 ws0">more<span class="_ _e"> </span>effecient<span class="_ _3"> </span>and<span class="_ _5"> </span>suitable<span class="_ _e"> </span>for<span class="_ _e"> </span>single<span class="_ _e"> </span>GPU<span class="_ _e"> </span>training,</div><div class="t m0 x16 ha y43 ff2 fs4 fc0 sc0 ls0 ws0">including<span class="_"> </span>CBN<span class="_"> </span>[<span class="fc3">89</span>],<span class="_"> </span>P<span class="_ _a"></span>AN<span class="_"> </span>[<span class="fc3">49</span>],<span class="_"> </span>SAM<span class="_"> </span>[<span class="fc3">85</span>],<span class="_"> </span>etc.</div><div class="t m0 x17 ha y44 ff2 fs4 fc0 sc0 ls0 ws0">1</div><div class="t m1 x18 hb y45 ff9 fs6 fc4 sc0 ls0 ws0">arXiv:2004.10934v1 [cs.CV] 23 Apr 2020</div><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a></div><div class="pi" data-data='{"ctm":[1.568627,0.000000,0.000000,1.568627,0.000000,0.000000]}'></div></div>
</body>
</html>
<div id="pf2" class="pf w0 h0" data-page-no="2"><div class="pc pc2 w0 h0"><img class="bi x0 y0 w1 h1" alt="" src="https://static.pudn.com/prod/directory_preview_static/6275eafd16f2c0769caad825/bg2.jpg"><div class="t m0 x19 ha y46 ff2 fs4 fc0 sc0 ls0 ws0">Figure<span class="_"> </span>2:<span class="_ _4"> </span>Object<span class="_"> </span>detector<span class="_ _0"></span>.</div><div class="t m0 x10 h6 y47 ff1 fs1 fc0 sc0 ls0 ws0">2.<span class="_"> </span>Related<span class="_"> </span>work</div><div class="t m0 x10 hc y48 ff1 fs7 fc0 sc0 ls0 ws0">2.1.<span class="_"> </span>Object<span class="_"> </span>detection<span class="_"> </span>models</div><div class="t m0 xf ha y49 ff2 fs4 fc0 sc0 ls0 ws0">A<span class="_ _5"> </span>modern<span class="_ _5"> </span>detector<span class="_ _5"> </span>is<span class="_ _5"> </span>usually<span class="_ _1"> </span>composed<span class="_ _5"> </span>of<span class="_ _5"> </span>two<span class="_ _5"> </span>parts,</div><div class="t m0 x10 ha y4a ff2 fs4 fc0 sc0 ls0 ws0">a<span class="_ _3"> </span>backbone<span class="_ _3"> </span>which<span class="_ _e"> </span>is<span class="_ _3"> </span>pre-trained<span class="_ _3"> </span>on<span class="_ _3"> </span>ImageNet<span class="_ _e"> </span>and<span class="_ _3"> </span>a<span class="_ _3"> </span>head</div><div class="t m0 x10 ha y4b ff2 fs4 fc0 sc0 ls0 ws0">which<span class="_"> </span>is<span class="_ _6"> </span>used<span class="_ _6"> </span>to<span class="_ _6"> </span>predict<span class="_ _6"> </span>classes<span class="_ _6"> </span>and<span class="_ _6"> </span>bounding<span class="_ _6"> </span>boxes<span class="_"> </span>of<span class="_ _6"> </span>ob-</div><div class="t m0 x10 ha y4c ff2 fs4 fc0 sc0 ls0 ws0">jects.<span class="_ _14"> </span>For<span class="_ _4"> </span>those<span class="_ _4"> </span>detectors<span class="_ _3"> </span>running<span class="_ _4"> </span>on<span class="_ _3"> </span>GPU<span class="_ _3"> </span>platform,<span class="_ _3"> </span>their</div><div class="t m0 x10 ha y4d ff2 fs4 fc0 sc0 ls0 ws0">backbone<span class="_"> </span>could<span class="_ _6"> </span>be<span class="_ _6"> </span>VGG<span class="_ _6"> </span>[<span class="fc3">68</span>],<span class="_ _6"> </span>ResNet<span class="_ _6"> </span>[<span class="fc3">26</span>],<span class="_ _6"> </span>ResNeXt<span class="_ _6"> </span>[<span class="fc3">86</span>],</div><div class="t m0 x10 ha y4e ff2 fs4 fc0 sc0 ls0 ws0">or<span class="_ _8"> </span>DenseNet<span class="_ _8"> </span>[<span class="fc3">30</span>].<span class="_ _6"> </span>For<span class="_"> </span>those<span class="_ _7"> </span>detectors<span class="_ _8"> </span>running<span class="_ _8"> </span>on<span class="_ _8"> </span>CPU<span class="_ _8"> </span>plat-</div><div class="t m0 x10 ha y4f ff2 fs4 fc0 sc0 ls0 ws0">form,<span class="_"> </span>their<span class="_"> </span>backbone<span class="_ _8"> </span>could<span class="_"> </span>be<span class="_ _8"> </span>SqueezeNet<span class="_"> </span>[<span class="fc3">31</span>],<span class="_"> </span>MobileNet</div><div class="t m0 x10 ha y50 ff2 fs4 fc0 sc0 ls0 ws0">[<span class="fc3">28</span>,<span class="_"> </span><span class="fc3">66</span>,<span class="_"> </span><span class="fc3">27</span>,<span class="_ _8"> </span><span class="fc3">74</span>],<span class="_"> </span>or<span class="_"> </span>Shuf<span class="_ _0"></span>fleNet<span class="_"> </span>[<span class="fc3">97</span>,<span class="_"> </span><span class="fc3">53</span>].<span class="_ _6"> </span>As<span class="_"> </span>to<span class="_"> </span>the<span class="_"> </span>head<span class="_ _8"> </span>part,</div><div class="t m0 x10 ha y51 ff2 fs4 fc0 sc0 ls0 ws0">it<span class="_ _7"> </span>is<span class="_ _7"> </span>usually<span class="_ _8"> </span>categorized<span class="_ _7"> </span>into<span class="_ _7"> </span>two<span class="_ _7"> </span>kinds,<span class="_ _7"> </span>i.e.,<span class="_"> </span>one-stage<span class="_ _7"> </span>object</div><div class="t m0 x10 ha y52 ff2 fs4 fc0 sc0 ls0 ws0">detector<span class="_ _6"> </span>and<span class="_ _6"> </span>two-stage<span class="_ _6"> </span>object<span class="_ _6"> </span>detector<span class="_ _0"></span>.<span class="_ _5"> </span>The<span class="_"> </span>most<span class="_ _4"> </span>represen-</div><div class="t m0 x10 ha y53 ff2 fs4 fc0 sc0 ls0 ws0">tativ<span class="_ _0"></span>e<span class="_ _3"> </span>two-stage<span class="_ _3"> </span>object<span class="_ _3"> </span>detector<span class="_ _3"> </span>is<span class="_ _3"> </span>the<span class="_ _3"> </span>R-CNN<span class="_ _3"> </span>[<span class="fc3">19</span>]<span class="_ _3"> </span>series,</div><div class="t m0 x10 ha y1e ff2 fs4 fc0 sc0 ls0 ws0">including<span class="_"> </span>f<span class="_ _0"></span>ast<span class="_"> </span>R-CNN<span class="_"> </span>[<span class="fc3">18</span>],<span class="_"> </span>f<span class="_ _0"></span>aster<span class="_"> </span>R-CNN<span class="_"> </span>[<span class="fc3">64</span>],<span class="_ _8"> </span>R-FCN<span class="_"> </span>[<span class="fc3">9</span>],</div><div class="t m0 x10 ha y1f ff2 fs4 fc0 sc0 ls0 ws0">and<span class="_ _4"> </span>Libra<span class="_ _4"> </span>R-CNN<span class="_ _4"> </span>[<span class="fc3">58</span>].<span class="_ _11"> </span>It<span class="_ _4"> </span>is<span class="_ _4"> </span>also<span class="_ _4"> </span>possible<span class="_ _3"> </span>to<span class="_ _4"> </span>make<span class="_ _4"> </span>a<span class="_ _4"> </span>two-</div><div class="t m0 x10 ha y20 ff2 fs4 fc0 sc0 ls0 ws0">stage<span class="_"> </span>object<span class="_"> </span>detector<span class="_ _8"> </span>an<span class="_"> </span>anchor<span class="_ _0"></span>-free<span class="_"> </span>object<span class="_"> </span>detector<span class="_ _0"></span>,<span class="_"> </span>such<span class="_"> </span>as</div><div class="t m0 x10 ha y21 ff2 fs4 fc0 sc0 ls0 ws0">RepPoints<span class="_ _6"> </span>[<span class="fc3">87</span>].<span class="_ _1"> </span>As<span class="_ _4"> </span>for<span class="_ _4"> </span>one-stage<span class="_ _6"> </span>object<span class="_ _4"> </span>detector<span class="_ _0"></span>,<span class="_ _4"> </span>the<span class="_ _6"> </span>most</div><div class="t m0 x10 ha y22 ff2 fs4 fc0 sc0 ls0 ws0">representativ<span class="_ _0"></span>e<span class="_ _e"> </span>models<span class="_ _e"> </span>are<span class="_ _e"> </span>YOLO<span class="_ _e"> </span>[<span class="fc3">61</span>,<span class="_ _e"> </span><span class="fc3">62</span>,<span class="_ _e"> </span><span class="fc3">63</span>],<span class="_ _5"> </span>SSD<span class="_ _e"> </span>[<span class="fc3">50</span>],</div><div class="t m0 x10 ha y23 ff2 fs4 fc0 sc0 ls0 ws0">and<span class="_ _6"> </span>RetinaNet<span class="_ _4"> </span>[<span class="fc3">45</span>].<span class="_ _1"> </span>In<span class="_ _4"> </span>recent<span class="_ _6"> </span>years,<span class="_ _4"> </span>anchor-free<span class="_ _6"> </span>one-stage</div><div class="t m0 x10 ha y24 ff2 fs4 fc0 sc0 ls0 ws0">object<span class="_ _7"> </span>detectors<span class="_"> </span>are<span class="_ _7"> </span>de<span class="_ _0"></span>veloped.<span class="_ _4"> </span>The<span class="_ _7"> </span>detectors<span class="_ _8"> </span>of<span class="_ _7"> </span>this<span class="_"> </span>sort<span class="_ _7"> </span>are</div><div class="t m0 x10 ha y25 ff2 fs4 fc0 sc0 ls0 ws0">CenterNet<span class="_ _7"> </span>[<span class="fc3">13</span>],<span class="_"> </span>CornerNet<span class="_ _7"> </span>[<span class="fc3">37</span>,<span class="_"> </span><span class="fc3">38</span>],<span class="_ _7"> </span>FCOS<span class="_ _8"> </span>[<span class="fc3">78</span>],<span class="_ _8"> </span>etc.<span class="_ _4"> </span>Object</div><div class="t m0 x10 ha y26 ff2 fs4 fc0 sc0 ls0 ws0">detectors<span class="_ _3"> </span>de<span class="_ _0"></span>veloped<span class="_ _3"> </span>in<span class="_ _3"> </span>recent<span class="_ _3"> </span>years<span class="_ _3"> </span>often<span class="_ _4"> </span>insert<span class="_ _3"> </span>some<span class="_ _3"> </span>lay-</div><div class="t m0 x10 ha y27 ff2 fs4 fc0 sc0 ls0 ws0">ers<span class="_ _4"> </span>between<span class="_ _3"> </span>backbone<span class="_ _4"> </span>and<span class="_ _4"> </span>head,<span class="_ _3"> </span>and<span class="_ _3"> </span>these<span class="_ _4"> </span>layers<span class="_ _3"> </span>are<span class="_ _4"> </span>usu-</div><div class="t m0 x10 ha y28 ff2 fs4 fc0 sc0 ls0 ws0">ally<span class="_ _6"> </span>used<span class="_ _6"> </span>to<span class="_ _6"> </span>collect<span class="_ _4"> </span>feature<span class="_ _6"> </span>maps<span class="_ _6"> </span>from<span class="_ _6"> </span>different<span class="_ _6"> </span>stages.<span class="_ _5"> </span>W<span class="_ _a"></span>e</div><div class="t m0 x10 ha y54 ff2 fs4 fc0 sc0 ls0 ws0">can<span class="_ _3"> </span>call<span class="_ _3"> </span>it<span class="_ _3"> </span>the<span class="_ _e"> </span>neck<span class="_ _3"> </span>of<span class="_ _3"> </span>an<span class="_ _e"> </span>object<span class="_ _3"> </span>detector<span class="_ _0"></span>.<span class="_ _3"> </span>Usually<span class="_ _0"></span>,<span class="_ _e"> </span>a<span class="_ _3"> </span>neck</div><div class="t m0 x10 ha y2a ff2 fs4 fc0 sc0 ls0 ws0">is<span class="_ _e"> </span>composed<span class="_ _5"> </span>of<span class="_ _5"> </span>sev<span class="_ _0"></span>eral<span class="_ _5"> </span>bottom-up<span class="_ _e"> </span>paths<span class="_ _5"> </span>and<span class="_ _5"> </span>se<span class="_ _0"></span>veral<span class="_ _5"> </span>top-</div><div class="t m0 x10 ha y2b ff2 fs4 fc0 sc0 ls0 ws0">down<span class="_ _4"> </span>paths.<span class="_ _15"> </span>Networks<span class="_ _3"> </span>equipped<span class="_ _3"> </span>with<span class="_ _3"> </span>this<span class="_ _3"> </span>mechanism<span class="_ _3"> </span>in-</div><div class="t m0 x10 ha y2c ff2 fs4 fc0 sc0 ls0 ws0">clude<span class="_"> </span>Feature<span class="_"> </span>Pyramid<span class="_"> </span>Network<span class="_"> </span>(FPN)<span class="_"> </span>[<span class="fc3">44</span>],<span class="_"> </span>P<span class="_ _0"></span>ath<span class="_"> </span>Aggrega-</div><div class="t m0 x10 ha y2d ff2 fs4 fc0 sc0 ls0 ws0">tion<span class="_"> </span>Network<span class="_"> </span>(P<span class="_ _2"></span>AN)<span class="_"> </span>[<span class="fc3">49</span>],<span class="_"> </span>BiFPN<span class="_"> </span>[<span class="fc3">77</span>],<span class="_"> </span>and<span class="_"> </span>NAS-FPN<span class="_"> </span>[<span class="fc3">17</span>].</div><div class="t m0 x13 ha y55 ff2 fs4 fc0 sc0 ls0 ws0">In<span class="_"> </span>addition<span class="_ _6"> </span>to<span class="_"> </span>the<span class="_ _6"> </span>above<span class="_"> </span>models,<span class="_"> </span>some<span class="_"> </span>researchers<span class="_ _6"> </span>put<span class="_ _6"> </span>their</div><div class="t m0 x13 ha y56 ff2 fs4 fc0 sc0 ls0 ws0">emphasis<span class="_ _7"> </span>on<span class="_ _7"> </span>directly<span class="_ _7"> </span>building<span class="_ _7"> </span>a<span class="_ _7"> </span>new<span class="_ _7"> </span>backbone<span class="_ _7"> </span>(DetNet<span class="_ _7"> </span>[<span class="fc3">43</span>],</div><div class="t m0 x13 ha y57 ff2 fs4 fc0 sc0 ls0 ws0">DetN<span class="_ _0"></span>AS<span class="_"> </span>[<span class="fc3">7</span>])<span class="_"> </span>or<span class="_"> </span>a<span class="_"> </span>ne<span class="_ _0"></span>w<span class="_"> </span>whole<span class="_"> </span>model<span class="_"> </span>(SpineNet<span class="_"> </span>[<span class="fc3">12</span>],<span class="_"> </span>HitDe-</div><div class="t m0 x13 ha y58 ff2 fs4 fc0 sc0 ls0 ws0">tector<span class="_"> </span>[<span class="fc3">20</span>])<span class="_"> </span>for<span class="_"> </span>object<span class="_"> </span>detection.</div><div class="t m0 x14 ha y59 ff2 fs4 fc0 sc0 ls0 ws0">T<span class="_ _a"></span>o<span class="_ _3"> </span>sum<span class="_ _4"> </span>up,<span class="_ _3"> </span>an<span class="_ _3"> </span>ordinary<span class="_ _3"> </span>object<span class="_ _4"> </span>detector<span class="_ _3"> </span>is<span class="_ _4"> </span>composed<span class="_ _3"> </span>of</div><div class="t m0 x13 ha y5a ff2 fs4 fc0 sc0 ls0 ws0">sev<span class="_ _0"></span>eral<span class="_"> </span>parts:</div><div class="t m0 x1a h9 y5b ff7 fs4 fc0 sc0 ls0 ws0">•<span class="_ _13"> </span><span class="ff1">Input<span class="ff2">:<span class="_ _4"> </span>Image,<span class="_"> </span>Patches,<span class="_"> </span>Image<span class="_"> </span>Pyramid</span></span></div><div class="t m0 x1a h9 y5c ff7 fs4 fc0 sc0 ls0 ws0">•<span class="_ _13"> </span><span class="ff1">Backbones<span class="ff2">:<span class="_ _f"> </span>VGG16<span class="_ _4"> </span>[<span class="fc3">68</span>],<span class="_ _3"> </span>ResNet-50<span class="_ _3"> </span>[<span class="fc3">26</span>],<span class="_ _3"> </span>SpineNet</span></span></div><div class="t m0 x16 ha y5d ff2 fs4 fc0 sc0 ls0 ws0">[<span class="fc3">12</span>],<span class="_ _1"> </span>EfficientNet-B0/B7<span class="_ _e"> </span>[<span class="fc3">75</span>],<span class="_ _1"> </span>CSPResNeXt50<span class="_ _5"> </span>[<span class="fc3">81</span>],</div><div class="t m0 x16 ha y5e ff2 fs4 fc0 sc0 ls0 ws0">CSPDarknet53<span class="_"> </span>[<span class="fc3">81</span>]</div><div class="t m0 x1a h9 y5f ff7 fs4 fc0 sc0 ls0 ws0">•<span class="_ _13"> </span><span class="ff1">Neck<span class="ff2">:</span></span></div><div class="t m0 x1b h9 y60 ff7 fs4 fc0 sc0 ls0 ws0">•<span class="_ _13"> </span><span class="ff1">Additional<span class="_ _e"> </span>blocks<span class="ff2">:<span class="_ _15"> </span>SPP<span class="_ _e"> </span>[<span class="fc3">25</span>],<span class="_ _1"> </span>ASPP<span class="_ _e"> </span>[<span class="fc3">5</span>],<span class="_ _1"> </span>RFB</span></span></div><div class="t m0 x1c ha y61 ff2 fs4 fc0 sc0 ls0 ws0">[<span class="fc3">47</span>],<span class="_"> </span>SAM<span class="_"> </span>[<span class="fc3">85</span>]</div><div class="t m0 x1b h9 y62 ff7 fs4 fc0 sc0 ls0 ws0">•<span class="_ _13"> </span><span class="ff1">Path-aggr<span class="_ _0"></span>egation<span class="_ _4"> </span>blocks<span class="ff2">:<span class="_ _e"> </span>FPN<span class="_ _4"> </span>[<span class="fc3">44</span>],<span class="_ _6"> </span>P<span class="_ _0"></span>AN<span class="_ _6"> </span>[<span class="fc3">49</span>],</span></span></div><div class="t m0 x1c ha y63 ff2 fs4 fc0 sc0 ls0 ws0">N<span class="_ _0"></span>AS-FPN<span class="_ _13"> </span>[<span class="fc3">17</span>],<span class="_ _9"> </span>Fully-connected<span class="_ _11"> </span>FPN,<span class="_ _13"> </span>BiFPN</div><div class="t m0 x1c ha y64 ff2 fs4 fc0 sc0 ls0 ws0">[<span class="fc3">77</span>],<span class="_"> </span>ASFF<span class="_"> </span>[<span class="fc3">48</span>],<span class="_"> </span>SF<span class="_ _a"></span>AM<span class="_"> </span>[<span class="fc3">98</span>]</div><div class="t m0 x1a h9 y65 ff7 fs4 fc0 sc0 ls0 ws0">•<span class="_ _13"> </span><span class="ff1">Heads:<span class="ff2">:</span></span></div><div class="t m0 x1b h9 y66 ff7 fs4 fc0 sc0 ls0 ws0">•<span class="_ _13"> </span><span class="ff1">Dense<span class="_"> </span>Prediction<span class="_"> </span>(one-stage)<span class="ff2">:</span></span></div><div class="t m0 x1d h9 y67 ff7 fs4 fc0 sc0 ls0 ws0">◦<span class="_ _13"> </span><span class="ff2">RPN<span class="_ _8"> </span>[<span class="fc3">64</span>],<span class="_ _8"> </span>SSD<span class="_ _8"> </span>[<span class="fc3">50</span>],<span class="_"> </span>Y<span class="_ _0"></span>OLO<span class="_ _7"> </span>[<span class="fc3">61</span>],<span class="_"> </span>RetinaNet</span></div><div class="t m0 x1e ha y68 ff2 fs4 fc0 sc0 ls0 ws0">[<span class="fc3">45</span>]<span class="_"> </span>(anchor<span class="_"> </span>based)</div><div class="t m0 x1d h9 y69 ff7 fs4 fc0 sc0 ls0 ws0">◦<span class="_ _13"> </span><span class="ff2">CornerNet<span class="_ _4"> </span>[<span class="fc3">37</span>],<span class="_ _3"> </span>CenterNet<span class="_ _4"> </span>[<span class="fc3">13</span>],<span class="_ _3"> </span>MatrixNet</span></div><div class="t m0 x1e ha y6a ff2 fs4 fc0 sc0 ls0 ws0">[<span class="fc3">60</span>],<span class="_"> </span>FCOS<span class="_"> </span>[<span class="fc3">78</span>]<span class="_"> </span>(anchor<span class="_"> </span>free)</div><div class="t m0 x1b h9 y6b ff7 fs4 fc0 sc0 ls0 ws0">•<span class="_ _13"> </span><span class="ff1">Sparse<span class="_"> </span>Prediction<span class="_"> </span>(tw<span class="_ _0"></span>o-stage)<span class="ff2">:</span></span></div><div class="t m0 x1d h9 y6c ff7 fs4 fc0 sc0 ls0 ws0">◦<span class="_ _13"> </span><span class="ff2">Faster<span class="_ _e"> </span>R-CNN<span class="_ _1"> </span>[<span class="fc3">64</span>],<span class="_ _1"> </span>R-FCN<span class="_ _5"> </span>[<span class="fc3">9</span>],<span class="_ _f"> </span>Mask<span class="_ _5"> </span>R-</span></div><div class="t m0 x1e ha y6d ff2 fs4 fc0 sc0 ls0 ws0">CNN<span class="_"> </span>[<span class="fc3">23</span>]<span class="_"> </span>(anchor<span class="_"> </span>based)</div><div class="t m0 x1d h9 y6e ff7 fs4 fc0 sc0 ls0 ws0">◦<span class="_ _13"> </span><span class="ff2">RepPoints<span class="_"> </span>[<span class="fc3">87</span>]<span class="_"> </span>(anchor<span class="_"> </span>free)</span></div><div class="t m0 x17 ha y44 ff2 fs4 fc0 sc0 ls0 ws0">2</div><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a></div><div class="pi" data-data='{"ctm":[1.568627,0.000000,0.000000,1.568627,0.000000,0.000000]}'></div></div>
<div id="pf3" class="pf w0 h0" data-page-no="3"><div class="pc pc3 w0 h0"><img class="bi x0 y0 w1 h1" alt="" src="https://static.pudn.com/prod/directory_preview_static/6275eafd16f2c0769caad825/bg3.jpg"><div class="t m0 x10 hc y6f ff1 fs7 fc0 sc0 ls0 ws0">2.2.<span class="_"> </span>Bag<span class="_"> </span>of<span class="_"> </span>freebies</div><div class="t m0 xf ha y70 ff2 fs4 fc0 sc0 ls0 ws0">Usually<span class="_ _0"></span>,<span class="_ _1"> </span>a<span class="_ _1"> </span>con<span class="_ _0"></span>ventional<span class="_ _5"> </span>object<span class="_ _1"> </span>detector<span class="_ _5"> </span>is<span class="_ _1"> </span>trained<span class="_ _5"> </span>off-</div><div class="t m0 x10 ha y71 ff2 fs4 fc0 sc0 ls0 ws0">line.<span class="_ _5"> </span>Therefore,<span class="_ _4"> </span>researchers<span class="_ _6"> </span>always<span class="_"> </span>like<span class="_ _6"> </span>to<span class="_ _4"> </span>take<span class="_ _6"> </span>this<span class="_ _6"> </span>advan-</div><div class="t m0 x10 ha y72 ff2 fs4 fc0 sc0 ls0 ws0">tage<span class="_ _3"> </span>and<span class="_ _4"> </span>develop<span class="_ _4"> </span>better<span class="_ _3"> </span>training<span class="_ _4"> </span>methods<span class="_ _3"> </span>which<span class="_ _3"> </span>can<span class="_ _4"> </span>make</div><div class="t m0 x10 ha y73 ff2 fs4 fc0 sc0 ls0 ws0">the<span class="_ _6"> </span>object<span class="_ _6"> </span>detector<span class="_ _6"> </span>receiv<span class="_ _0"></span>e<span class="_ _6"> </span>better<span class="_ _6"> </span>accuracy<span class="_ _6"> </span>without<span class="_ _6"> </span>increas-</div><div class="t m0 x10 ha y74 ff2 fs4 fc0 sc0 ls0 ws0">ing<span class="_ _5"> </span>the<span class="_ _5"> </span>inference<span class="_ _5"> </span>cost.<span class="_ _10"> </span>W<span class="_ _2"></span>e<span class="_ _5"> </span>call<span class="_ _5"> </span>these<span class="_ _5"> </span>methods<span class="_ _5"> </span>that<span class="_ _5"> </span>only</div><div class="t m0 x10 ha y75 ff2 fs4 fc0 sc0 ls0 ws0">change<span class="_ _e"> </span>the<span class="_ _5"> </span>training<span class="_ _e"> </span>strategy<span class="_ _e"> </span>or<span class="_ _5"> </span>only<span class="_ _e"> </span>increase<span class="_ _e"> </span>the<span class="_ _5"> </span>training</div><div class="t m0 x10 ha y76 ff2 fs4 fc0 sc0 ls0 ws0">cost<span class="_ _4"> </span>as<span class="_ _4"> </span>“bag<span class="_ _6"> </span>of<span class="_ _4"> </span>freebies.<span class="_ _0"></span>”<span class="_ _f"> </span>What<span class="_ _4"> </span>is<span class="_ _4"> </span>often<span class="_ _4"> </span>adopted<span class="_ _4"> </span>by<span class="_ _6"> </span>object</div><div class="t m0 x10 ha y77 ff2 fs4 fc0 sc0 ls0 ws0">detection<span class="_ _4"> </span>methods<span class="_ _4"> </span>and<span class="_ _4"> </span>meets<span class="_ _4"> </span>the<span class="_ _4"> </span>definition<span class="_ _4"> </span>of<span class="_ _4"> </span>bag<span class="_ _4"> </span>of<span class="_ _4"> </span>free-</div><div class="t m0 x10 ha y78 ff2 fs4 fc0 sc0 ls0 ws0">bies<span class="_ _4"> </span>is<span class="_ _4"> </span>data<span class="_ _6"> </span>augmentation.<span class="_ _13"> </span>The<span class="_ _4"> </span>purpose<span class="_ _4"> </span>of<span class="_ _6"> </span>data<span class="_ _4"> </span>augmenta-</div><div class="t m0 x10 ha y79 ff2 fs4 fc0 sc0 ls0 ws0">tion<span class="_"> </span>is<span class="_ _7"> </span>to<span class="_"> </span>increase<span class="_"> </span>the<span class="_ _8"> </span>variability<span class="_ _7"> </span>of<span class="_"> </span>the<span class="_"> </span>input<span class="_ _7"> </span>images,<span class="_"> </span>so<span class="_"> </span>that</div><div class="t m0 x10 ha y7a ff2 fs4 fc0 sc0 ls0 ws0">the<span class="_ _4"> </span>designed<span class="_ _4"> </span>object<span class="_ _4"> </span>detection<span class="_ _4"> </span>model<span class="_ _3"> </span>has<span class="_ _4"> </span>higher<span class="_ _4"> </span>robustness</div><div class="t m0 x10 ha y7b ff2 fs4 fc0 sc0 ls0 ws0">to<span class="_ _e"> </span>the<span class="_ _e"> </span>images<span class="_ _e"> </span>obtained<span class="_ _5"> </span>from<span class="_ _e"> </span>dif<span class="_ _0"></span>ferent<span class="_ _5"> </span>en<span class="_ _0"></span>vironments.<span class="_ _16"> </span>For</div><div class="t m0 x10 ha y7c ff2 fs4 fc0 sc0 ls0 ws0">examples,<span class="_ _7"> </span>photometric<span class="_ _7"> </span>distortions<span class="_ _7"> </span>and<span class="_ _7"> </span>geometric<span class="_ _8"> </span>distortions</div><div class="t m0 x10 ha y7d ff2 fs4 fc0 sc0 ls0 ws0">are<span class="_ _7"> </span>two<span class="_ _7"> </span>commonly<span class="_"> </span>used<span class="_ _7"> </span>data<span class="_ _7"> </span>augmentation<span class="_ _7"> </span>method<span class="_"> </span>and<span class="_ _7"> </span>they</div><div class="t m0 x10 ha y7e ff2 fs4 fc0 sc0 ls0 ws0">definitely<span class="_ _6"> </span>benefit<span class="_ _4"> </span>the<span class="_ _6"> </span>object<span class="_ _6"> </span>detection<span class="_ _4"> </span>task.<span class="_ _5"> </span>In<span class="_ _4"> </span>dealing<span class="_ _6"> </span>with</div><div class="t m0 x10 ha y7f ff2 fs4 fc0 sc0 ls0 ws0">photometric<span class="_ _3"> </span>distortion,<span class="_ _5"> </span>we<span class="_ _3"> </span>adjust<span class="_ _3"> </span>the<span class="_ _e"> </span>brightness,<span class="_ _e"> </span>contrast,</div><div class="t m0 x10 ha y80 ff2 fs4 fc0 sc0 ls0 ws0">hue,<span class="_ _4"> </span>saturation,<span class="_ _4"> </span>and<span class="_ _4"> </span>noise<span class="_ _6"> </span>of<span class="_ _4"> </span>an<span class="_ _4"> </span>image.<span class="_ _f"> </span>For<span class="_ _6"> </span>geometric<span class="_ _4"> </span>dis-</div><div class="t m0 x10 ha y81 ff2 fs4 fc0 sc0 ls0 ws0">tortion,<span class="_ _6"> </span>we<span class="_ _6"> </span>add<span class="_ _6"> </span>random<span class="_ _6"> </span>scaling,<span class="_ _4"> </span>cropping,<span class="_ _6"> </span>flipping,<span class="_ _6"> </span>and<span class="_ _4"> </span>ro-</div><div class="t m0 x10 ha y82 ff2 fs4 fc0 sc0 ls0 ws0">tating.</div><div class="t m0 xf ha y83 ff2 fs4 fc0 sc0 ls0 ws0">The<span class="_"> </span>data<span class="_ _8"> </span>augmentation<span class="_"> </span>methods<span class="_"> </span>mentioned<span class="_ _8"> </span>abov<span class="_ _0"></span>e<span class="_"> </span>are<span class="_"> </span>all</div><div class="t m0 x10 ha y84 ff2 fs4 fc0 sc0 ls0 ws0">pixel-wise<span class="_ _7"> </span>adjustments,<span class="_"> </span>and<span class="_ _7"> </span>all<span class="_ _8"> </span>original<span class="_ _7"> </span>pixel<span class="_ _8"> </span>information<span class="_ _8"> </span>in</div><div class="t m0 x10 ha y85 ff2 fs4 fc0 sc0 ls0 ws0">the<span class="_ _6"> </span>adjusted<span class="_ _6"> </span>area<span class="_ _6"> </span>is<span class="_ _6"> </span>retained.<span class="_ _5"> </span>In<span class="_ _6"> </span>addition,<span class="_ _4"> </span>some<span class="_ _6"> </span>researchers</div><div class="t m0 x10 ha y86 ff2 fs4 fc0 sc0 ls0 ws0">engaged<span class="_ _3"> </span>in<span class="_ _3"> </span>data<span class="_ _e"> </span>augmentation<span class="_ _3"> </span>put<span class="_ _e"> </span>their<span class="_ _3"> </span>emphasis<span class="_ _3"> </span>on<span class="_ _e"> </span>sim-</div><div class="t m0 x10 ha y87 ff2 fs4 fc0 sc0 ls0 ws0">ulating<span class="_ _3"> </span>object<span class="_ _4"> </span>occlusion<span class="_ _3"> </span>issues.<span class="_ _9"> </span>They<span class="_ _4"> </span>have<span class="_ _4"> </span>achiev<span class="_ _0"></span>ed<span class="_ _3"> </span>good</div><div class="t m0 x10 ha y88 ff2 fs4 fc0 sc0 ls0 ws0">results<span class="_"> </span>in<span class="_ _6"> </span>image<span class="_"> </span>classification<span class="_ _6"> </span>and<span class="_ _6"> </span>object<span class="_"> </span>detection.<span class="_ _e"> </span>For<span class="_"> </span>ex-</div><div class="t m0 x10 ha y89 ff2 fs4 fc0 sc0 ls0 ws0">ample,<span class="_ _4"> </span>random<span class="_ _4"> </span>erase<span class="_ _4"> </span>[<span class="fc3">100</span>]<span class="_ _6"> </span>and<span class="_ _4"> </span>CutOut<span class="_ _4"> </span>[<span class="fc3">11</span>]<span class="_ _4"> </span>can<span class="_ _4"> </span>randomly</div><div class="t m0 x10 ha y8a ff2 fs4 fc0 sc0 ls0 ws0">select<span class="_"> </span>the<span class="_ _6"> </span>rectangle<span class="_ _6"> </span>region<span class="_ _6"> </span>in<span class="_"> </span>an<span class="_ _6"> </span>image<span class="_ _6"> </span>and<span class="_ _6"> </span>fill<span class="_ _6"> </span>in<span class="_ _6"> </span>a<span class="_ _6"> </span>random</div><div class="t m0 x10 ha y8b ff2 fs4 fc0 sc0 ls0 ws0">or<span class="_"> </span>complementary<span class="_"> </span>value<span class="_"> </span>of<span class="_"> </span>zero.<span class="_ _3"> </span>As<span class="_"> </span>for<span class="_ _6"> </span>hide-and-seek<span class="_"> </span>[<span class="fc3">69</span>]</div><div class="t m0 x10 ha y8c ff2 fs4 fc0 sc0 ls0 ws0">and<span class="_ _6"> </span>grid<span class="_ _6"> </span>mask<span class="_ _6"> </span>[<span class="fc3">6</span>],<span class="_ _6"> </span>they<span class="_ _6"> </span>randomly<span class="_ _6"> </span>or<span class="_ _6"> </span>evenly<span class="_"> </span>select<span class="_ _6"> </span>multiple</div><div class="t m0 x10 ha y8d ff2 fs4 fc0 sc0 ls0 ws0">rectangle<span class="_ _3"> </span>regions<span class="_ _3"> </span>in<span class="_ _e"> </span>an<span class="_ _3"> </span>image<span class="_ _3"> </span>and<span class="_ _e"> </span>replace<span class="_ _3"> </span>them<span class="_ _e"> </span>to<span class="_ _3"> </span>all<span class="_ _e"> </span>ze-</div><div class="t m0 x10 ha y8e ff2 fs4 fc0 sc0 ls0 ws0">ros.<span class="_ _14"> </span>If<span class="_ _4"> </span>similar<span class="_ _3"> </span>concepts<span class="_ _4"> </span>are<span class="_ _3"> </span>applied<span class="_ _4"> </span>to<span class="_ _3"> </span>feature<span class="_ _4"> </span>maps,<span class="_ _3"> </span>there</div><div class="t m0 x10 ha y8f ff2 fs4 fc0 sc0 ls0 ws0">are<span class="_ _4"> </span>DropOut<span class="_ _6"> </span>[<span class="fc3">71</span>],<span class="_ _3"> </span>DropConnect<span class="_ _6"> </span>[<span class="fc3">80</span>],<span class="_ _3"> </span>and<span class="_ _6"> </span>DropBlock<span class="_ _4"> </span>[<span class="fc3">16</span>]</div><div class="t m0 x10 ha y90 ff2 fs4 fc0 sc0 ls0 ws0">methods.<span class="_ _f"> </span>In<span class="_ _4"> </span>addition,<span class="_ _4"> </span>some<span class="_ _4"> </span>researchers<span class="_ _4"> </span>have<span class="_ _6"> </span>proposed<span class="_ _4"> </span>the</div><div class="t m0 x10 ha y91 ff2 fs4 fc0 sc0 ls0 ws0">methods<span class="_ _6"> </span>of<span class="_ _6"> </span>using<span class="_ _6"> </span>multiple<span class="_ _6"> </span>images<span class="_ _6"> </span>together<span class="_ _6"> </span>to<span class="_ _6"> </span>perform<span class="_ _6"> </span>data</div><div class="t m0 x10 ha y92 ff2 fs4 fc0 sc0 ls0 ws0">augmentation.<span class="_ _13"> </span>For<span class="_ _4"> </span>example,<span class="_ _4"> </span>MixUp<span class="_ _4"> </span>[<span class="fc3">92</span>]<span class="_ _4"> </span>uses<span class="_ _4"> </span>two<span class="_ _4"> </span>images</div><div class="t m0 x10 ha y93 ff2 fs4 fc0 sc0 ls0 ws0">to<span class="_ _3"> </span>multiply<span class="_ _e"> </span>and<span class="_ _3"> </span>superimpose<span class="_ _e"> </span>with<span class="_ _3"> </span>different<span class="_ _3"> </span>coefficient<span class="_ _3"> </span>ra-</div><div class="t m0 x10 ha y94 ff2 fs4 fc0 sc0 ls0 ws0">tios,<span class="_"> </span>and<span class="_ _6"> </span>then<span class="_ _6"> </span>adjusts<span class="_"> </span>the<span class="_ _6"> </span>label<span class="_ _6"> </span>with<span class="_"> </span>these<span class="_ _6"> </span>superimposed<span class="_ _6"> </span>ra-</div><div class="t m0 x10 ha y95 ff2 fs4 fc0 sc0 ls0 ws0">tios.<span class="_ _f"> </span>As<span class="_ _4"> </span>for<span class="_ _4"> </span>CutMix<span class="_ _4"> </span>[<span class="fc3">91</span>],<span class="_ _3"> </span>it<span class="_ _4"> </span>is<span class="_ _4"> </span>to<span class="_ _4"> </span>cov<span class="_ _0"></span>er<span class="_ _4"> </span>the<span class="_ _4"> </span>cropped<span class="_ _4"> </span>image</div><div class="t m0 x10 ha y96 ff2 fs4 fc0 sc0 ls0 ws0">to<span class="_ _3"> </span>rectangle<span class="_ _e"> </span>region<span class="_ _3"> </span>of<span class="_ _e"> </span>other<span class="_ _e"> </span>images,<span class="_ _e"> </span>and<span class="_ _e"> </span>adjusts<span class="_ _e"> </span>the<span class="_ _3"> </span>label</div><div class="t m0 x10 ha y97 ff2 fs4 fc0 sc0 ls0 ws0">according<span class="_ _e"> </span>to<span class="_ _e"> </span>the<span class="_ _e"> </span>size<span class="_ _e"> </span>of<span class="_ _e"> </span>the<span class="_ _5"> </span>mix<span class="_ _e"> </span>area.<span class="_ _b"> </span>In<span class="_ _5"> </span>addition<span class="_ _e"> </span>to<span class="_ _e"> </span>the</div><div class="t m0 x10 ha y98 ff2 fs4 fc0 sc0 ls0 ws0">abov<span class="_ _0"></span>e<span class="_ _4"> </span>mentioned<span class="_ _4"> </span>methods,<span class="_ _4"> </span>style<span class="_ _4"> </span>transfer<span class="_ _6"> </span>GAN<span class="_ _4"> </span>[<span class="fc3">15</span>]<span class="_ _4"> </span>is<span class="_ _4"> </span>also</div><div class="t m0 x10 ha y99 ff2 fs4 fc0 sc0 ls0 ws0">used<span class="_"> </span>for<span class="_ _6"> </span>data<span class="_ _6"> </span>augmentation,<span class="_ _4"> </span>and<span class="_"> </span>such<span class="_ _6"> </span>usage<span class="_ _6"> </span>can<span class="_ _6"> </span>effecti<span class="_ _0"></span>vely</div><div class="t m0 x10 ha y9a ff2 fs4 fc0 sc0 ls0 ws0">reduce<span class="_"> </span>the<span class="_"> </span>texture<span class="_"> </span>bias<span class="_"> </span>learned<span class="_"> </span>by<span class="_"> </span>CNN.</div><div class="t m0 xf ha y25 ff2 fs4 fc0 sc0 ls0 ws0">Different<span class="_ _4"> </span>from<span class="_ _4"> </span>the<span class="_ _3"> </span>various<span class="_ _4"> </span>approaches<span class="_ _3"> </span>proposed<span class="_ _4"> </span>above,</div><div class="t m0 x10 ha y26 ff2 fs4 fc0 sc0 ls0 ws0">some<span class="_ _8"> </span>other<span class="_ _8"> </span>bag<span class="_ _8"> </span>of<span class="_ _7"> </span>freebies<span class="_"> </span>methods<span class="_ _7"> </span>are<span class="_"> </span>dedicated<span class="_ _7"> </span>to<span class="_"> </span>solving</div><div class="t m0 x10 ha y27 ff2 fs4 fc0 sc0 ls0 ws0">the<span class="_ _7"> </span>problem<span class="_ _7"> </span>that<span class="_ _8"> </span>the<span class="_ _7"> </span>semantic<span class="_ _8"> </span>distribution<span class="_ _7"> </span>in<span class="_ _7"> </span>the<span class="_ _7"> </span>dataset<span class="_ _7"> </span>may</div><div class="t m0 x10 ha y28 ff2 fs4 fc0 sc0 ls0 ws0">hav<span class="_ _0"></span>e<span class="_ _4"> </span>bias.<span class="_ _14"> </span>In<span class="_ _4"> </span>dealing<span class="_ _4"> </span>with<span class="_ _3"> </span>the<span class="_ _4"> </span>problem<span class="_ _4"> </span>of<span class="_ _4"> </span>semantic<span class="_ _3"> </span>distri-</div><div class="t m0 x10 ha y29 ff2 fs4 fc0 sc0 ls0 ws0">bution<span class="_ _8"> </span>bias,<span class="_"> </span>a<span class="_"> </span>v<span class="_ _0"></span>ery<span class="_"> </span>important<span class="_"> </span>issue<span class="_ _8"> </span>is<span class="_"> </span>that<span class="_"> </span>there<span class="_ _8"> </span>is<span class="_"> </span>a<span class="_"> </span>problem</div><div class="t m0 x10 ha y2a ff2 fs4 fc0 sc0 ls0 ws0">of<span class="_ _6"> </span>data<span class="_ _6"> </span>imbalance<span class="_ _6"> </span>between<span class="_ _4"> </span>dif<span class="_ _0"></span>ferent<span class="_ _6"> </span>classes,<span class="_ _4"> </span>and<span class="_ _6"> </span>this<span class="_ _6"> </span>prob-</div><div class="t m0 x10 ha y2b ff2 fs4 fc0 sc0 ls0 ws0">lem<span class="_ _4"> </span>is<span class="_ _4"> </span>often<span class="_ _3"> </span>solved<span class="_ _4"> </span>by<span class="_ _4"> </span>hard<span class="_ _4"> </span>negati<span class="_ _0"></span>ve<span class="_ _4"> </span>example<span class="_ _4"> </span>mining<span class="_ _4"> </span>[<span class="fc3">72</span>]</div><div class="t m0 x10 ha y2c ff2 fs4 fc0 sc0 ls0 ws0">or<span class="_"> </span>online<span class="_"> </span>hard<span class="_ _6"> </span>example<span class="_"> </span>mining<span class="_"> </span>[<span class="fc3">67</span>]<span class="_ _6"> </span>in<span class="_"> </span>two-stage<span class="_"> </span>object<span class="_ _6"> </span>de-</div><div class="t m0 x10 ha y2d ff2 fs4 fc0 sc0 ls0 ws0">tector<span class="_ _0"></span>.<span class="_ _d"> </span>But<span class="_ _e"> </span>the<span class="_ _e"> </span>example<span class="_ _3"> </span>mining<span class="_ _e"> </span>method<span class="_ _e"> </span>is<span class="_ _e"> </span>not<span class="_ _3"> </span>applicable</div><div class="t m0 x13 ha y6f ff2 fs4 fc0 sc0 ls0 ws0">to<span class="_ _3"> </span>one-stage<span class="_ _3"> </span>object<span class="_ _3"> </span>detector<span class="_ _0"></span>,<span class="_ _3"> </span>because<span class="_ _3"> </span>this<span class="_ _3"> </span>kind<span class="_ _3"> </span>of<span class="_ _3"> </span>detector</div><div class="t m0 x13 ha y9b ff2 fs4 fc0 sc0 ls0 ws0">belongs<span class="_"> </span>to<span class="_ _6"> </span>the<span class="_ _6"> </span>dense<span class="_"> </span>prediction<span class="_ _6"> </span>architecture.<span class="_ _e"> </span>Therefore<span class="_"> </span>Lin</div><div class="t m0 x13 ha y9c ff5 fs4 fc0 sc0 ls0 ws0">et<span class="_ _e"> </span>al<span class="ff2">.<span class="_ _12"> </span>[<span class="fc3">45</span>]<span class="_ _e"> </span>proposed<span class="_ _e"> </span>focal<span class="_ _5"> </span>loss<span class="_ _e"> </span>to<span class="_ _e"> </span>deal<span class="_ _5"> </span>with<span class="_ _e"> </span>the<span class="_ _5"> </span>problem</span></div><div class="t m0 x13 ha y9d ff2 fs4 fc0 sc0 ls0 ws0">of<span class="_ _e"> </span>data<span class="_ _5"> </span>imbalance<span class="_ _e"> </span>existing<span class="_ _e"> </span>between<span class="_ _e"> </span>various<span class="_ _e"> </span>classes.<span class="_ _12"> </span>An-</div><div class="t m0 x13 ha y9e ff2 fs4 fc0 sc0 ls0 ws0">other<span class="_"> </span>v<span class="_ _0"></span>ery<span class="_"> </span>important<span class="_ _8"> </span>issue<span class="_ _8"> </span>is<span class="_"> </span>that<span class="_ _7"> </span>it<span class="_"> </span>is<span class="_"> </span>dif<span class="_ _0"></span>ficult<span class="_"> </span>to<span class="_ _7"> </span>express<span class="_"> </span>the</div><div class="t m0 x13 ha y9f ff2 fs4 fc0 sc0 ls0 ws0">relationship<span class="_ _4"> </span>of<span class="_ _4"> </span>the<span class="_ _3"> </span>degree<span class="_ _6"> </span>of<span class="_ _3"> </span>association<span class="_ _4"> </span>between<span class="_ _4"> </span>different</div><div class="t m0 x13 ha ya0 ff2 fs4 fc0 sc0 ls0 ws0">categories<span class="_ _4"> </span>with<span class="_ _4"> </span>the<span class="_ _3"> </span>one-hot<span class="_ _4"> </span>hard<span class="_ _3"> </span>representation.<span class="_ _14"> </span>This<span class="_ _4"> </span>rep-</div><div class="t m0 x13 ha ya1 ff2 fs4 fc0 sc0 ls0 ws0">resentation<span class="_ _4"> </span>scheme<span class="_ _4"> </span>is<span class="_ _3"> </span>often<span class="_ _4"> </span>used<span class="_ _4"> </span>when<span class="_ _3"> </span>ex<span class="_ _0"></span>ecuting<span class="_ _3"> </span>labeling.</div><div class="t m0 x13 ha ya2 ff2 fs4 fc0 sc0 ls0 ws0">The<span class="_"> </span>label<span class="_ _6"> </span>smoothing<span class="_"> </span>proposed<span class="_ _6"> </span>in<span class="_ _6"> </span>[<span class="fc3">73</span>]<span class="_"> </span>is<span class="_ _6"> </span>to<span class="_"> </span>conv<span class="_ _0"></span>ert<span class="_ _6"> </span>hard<span class="_ _6"> </span>la-</div><div class="t m0 x13 ha ya3 ff2 fs4 fc0 sc0 ls0 ws0">bel<span class="_"> </span>into<span class="_"> </span>soft<span class="_"> </span>label<span class="_ _8"> </span>for<span class="_"> </span>training,<span class="_"> </span>which<span class="_"> </span>can<span class="_ _8"> </span>make<span class="_"> </span>model<span class="_"> </span>more</div><div class="t m0 x13 ha ya4 ff2 fs4 fc0 sc0 ls0 ws0">robust.<span class="_ _6"> </span>In<span class="_ _7"> </span>order<span class="_"> </span>to<span class="_ _7"> </span>obtain<span class="_ _8"> </span>a<span class="_ _7"> </span>better<span class="_"> </span>soft<span class="_ _7"> </span>label,<span class="_"> </span>Islam<span class="_ _7"> </span><span class="ff5">et<span class="_ _7"> </span>al</span>.<span class="_ _4"> </span>[<span class="fc3">33</span>]</div><div class="t m0 x13 ha ya5 ff2 fs4 fc0 sc0 ls0 ws0">introduced<span class="_ _4"> </span>the<span class="_ _4"> </span>concept<span class="_ _4"> </span>of<span class="_ _3"> </span>kno<span class="_ _0"></span>wledge<span class="_ _4"> </span>distillation<span class="_ _3"> </span>to<span class="_ _4"> </span>design</div><div class="t m0 x13 ha ya6 ff2 fs4 fc0 sc0 ls0 ws0">the<span class="_"> </span>label<span class="_"> </span>refinement<span class="_"> </span>network.</div><div class="t m0 x14 ha ya7 ff2 fs4 fc0 sc0 ls0 ws0">The<span class="_ _f"> </span>last<span class="_ _13"> </span>bag<span class="_ _13"> </span>of<span class="_ _13"> </span>freebies<span class="_ _f"> </span>is<span class="_ _13"> </span>the<span class="_ _13"> </span>objectiv<span class="_ _0"></span>e<span class="_ _13"> </span>function<span class="_ _13"> </span>of</div><div class="t m0 x13 ha ya8 ff2 fs4 fc0 sc0 ls0 ws0">Bounding<span class="_ _e"> </span>Box<span class="_ _e"> </span>(BBox)<span class="_ _e"> </span>regression.<span class="_ _b"> </span>The<span class="_ _5"> </span>traditional<span class="_ _e"> </span>object</div><div class="t m0 x13 ha ya9 ff2 fs4 fc0 sc0 ls0 ws0">detector<span class="_ _13"> </span>usually<span class="_ _11"> </span>uses<span class="_ _13"> </span>Mean<span class="_ _11"> </span>Square<span class="_ _13"> </span>Error<span class="_ _11"> </span>(MSE)<span class="_ _13"> </span>to<span class="_ _11"> </span>di-</div><div class="t m0 x13 ha yaa ff2 fs4 fc0 sc0 ls0 ws0">rectly<span class="_ _e"> </span>perform<span class="_ _5"> </span>regression<span class="_ _e"> </span>on<span class="_ _e"> </span>the<span class="_ _5"> </span>center<span class="_ _e"> </span>point<span class="_ _5"> </span>coordinates</div><div class="t m0 x13 h9 yab ff2 fs4 fc0 sc0 ls0 ws0">and<span class="_ _3"> </span>height<span class="_ _4"> </span>and<span class="_ _3"> </span>width<span class="_ _3"> </span>of<span class="_ _3"> </span>the<span class="_ _3"> </span>BBox,<span class="_ _e"> </span>i.e.,<span class="_ _3"> </span><span class="ff7">{<span class="ffa">x</span></span></div><div class="t m0 x1f hd yac ffb fs5 fc0 sc0 ls0 ws0">center</div><div class="t m0 x20 he yab ff2 fs4 fc0 sc0 ls0 ws0">,<span class="_ _3"> </span><span class="ffa">y</span></div><div class="t m0 x21 hd yac ffb fs5 fc0 sc0 ls0 ws0">center</div><div class="t m0 x22 ha yab ff2 fs4 fc0 sc0 ls0 ws0">,</div><div class="t m0 x13 h9 yad ffa fs4 fc0 sc0 ls0 ws0">w<span class="_ _17"></span><span class="ff2">,<span class="_ _1"> </span></span>h<span class="ff7">}<span class="ff2">,<span class="_ _1"> </span>or<span class="_ _5"> </span>the<span class="_ _e"> </span>upper<span class="_ _5"> </span>left<span class="_ _5"> </span>point<span class="_ _e"> </span>and<span class="_ _5"> </span>the<span class="_ _5"> </span>lower<span class="_ _e"> </span>right<span class="_ _5"> </span>point,</span></span></div><div class="t m0 x13 h9 yae ff2 fs4 fc0 sc0 ls0 ws0">i.e.,<span class="_ _e"> </span><span class="ff7">{<span class="ffa">x</span></span></div><div class="t m0 x23 hd yaf ffb fs5 fc0 sc0 ls0 ws0">top</div><div class="t m0 x24 hd yb0 ffb fs5 fc0 sc0 ls0 ws0">lef<span class="_ _18"> </span>t</div><div class="t m0 x25 he yae ff2 fs4 fc0 sc0 ls0 ws0">,<span class="_ _e"> </span><span class="ffa">y</span></div><div class="t m0 xb hd yb0 ffb fs5 fc0 sc0 ls0 ws0">top<span class="_ _6"> </span>lef<span class="_ _18"></span>t</div><div class="t m0 x26 he yae ff2 fs4 fc0 sc0 ls0 ws0">,<span class="_ _e"> </span><span class="ffa">x</span></div><div class="t m0 x27 hd yb0 ffb fs5 fc0 sc0 ls0 ws0">bottom<span class="_ _6"> </span>right</div><div class="t m0 x28 he yae ff2 fs4 fc0 sc0 ls0 ws0">,<span class="_ _e"> </span><span class="ffa">y</span></div><div class="t m0 x29 hd yb0 ffb fs5 fc0 sc0 ls0 ws0">bottom<span class="_ _6"> </span>right</div><div class="t m0 x2a h9 yae ff7 fs4 fc0 sc0 ls0 ws0">}<span class="ff2">.<span class="_ _15"> </span>As</span></div><div class="t m0 x13 ha yb1 ff2 fs4 fc0 sc0 ls0 ws0">for<span class="_ _4"> </span>anchor-based<span class="_ _4"> </span>method,<span class="_ _3"> </span>it<span class="_ _3"> </span>is<span class="_ _4"> </span>to<span class="_ _4"> </span>estimate<span class="_ _3"> </span>the<span class="_ _4"> </span>correspond-</div><div class="t m0 x13 h9 yb2 ff2 fs4 fc0 sc0 ls0 ws0">ing<span class="_ _15"> </span>offset,<span class="_ _b"> </span>for<span class="_ _15"> </span>example<span class="_ _15"> </span><span class="ff7">{<span class="ffa">x</span></span></div><div class="t m0 x2b hd yb3 ffb fs5 fc0 sc0 ls0 ws0">center<span class="_ _6"> </span>of<span class="_ _18"></span>f<span class="_ _18"></span>set</div><div class="t m0 x2c he yb2 ff2 fs4 fc0 sc0 ls0 ws0">,<span class="_ _16"> </span><span class="ffa">y</span></div><div class="t m0 x2d hd yb3 ffb fs5 fc0 sc0 ls0 ws0">center<span class="_ _6"> </span>of<span class="_ _18"></span>f<span class="_ _18"></span>set</div><div class="t m0 x22 ha yb2 ff2 fs4 fc0 sc0 ls0 ws0">,</div><div class="t m0 x13 he yb4 ffa fs4 fc0 sc0 ls0 ws0">w</div><div class="t m0 x15 hd yb5 ffb fs5 fc0 sc0 ls0 ws0">of<span class="_ _18"></span>f<span class="_ _18"></span>set</div><div class="t m0 x1b he yb4 ff2 fs4 fc0 sc0 ls0 ws0">,<span class="_ _15"> </span><span class="ffa">h</span></div><div class="t m0 x2e hd yb5 ffb fs5 fc0 sc0 ls0 ws0">of<span class="_ _18"></span>f<span class="_ _18"></span>set</div><div class="t m0 x2f h9 yb4 ff7 fs4 fc0 sc0 ls0 ws0">}<span class="_ _14"> </span><span class="ff2">and<span class="_ _14"> </span></span>{<span class="ffa">x</span></div><div class="t m0 x30 hd yb5 ffb fs5 fc0 sc0 ls0 ws0">top<span class="_ _6"> </span>lef<span class="_ _18"></span>t<span class="_ _8"> </span>of<span class="_ _18"> </span>f<span class="_ _18"> </span>set</div><div class="t m0 x29 he yb4 ff2 fs4 fc0 sc0 ls0 ws0">,<span class="_ _15"> </span><span class="ffa">y</span></div><div class="t m0 x31 hd yb5 ffb fs5 fc0 sc0 ls0 ws0">top<span class="_ _6"> </span>lef<span class="_ _18"></span>t<span class="_ _8"> </span>of<span class="_ _18"> </span>f<span class="_ _18"> </span>set</div><div class="t m0 x22 ha yb4 ff2 fs4 fc0 sc0 ls0 ws0">,</div><div class="t m0 x13 he yb6 ffa fs4 fc0 sc0 ls0 ws0">x</div><div class="t m0 x32 hd yb7 ffb fs5 fc0 sc0 ls0 ws0">bottom<span class="_ _6"> </span>right<span class="_ _6"> </span>of<span class="_ _18"></span>f<span class="_ _18"></span>set</div><div class="t m0 x33 he yb6 ff2 fs4 fc0 sc0 ls0 ws0">,<span class="_ _4"> </span><span class="ffa">y</span></div><div class="t m0 x34 hd yb7 ffb fs5 fc0 sc0 ls0 ws0">bottom<span class="_ _6"> </span>right<span class="_ _6"> </span>of<span class="_ _18"></span>f<span class="_ _18"></span>set</div><div class="t m0 x35 h9 yb6 ff7 fs4 fc0 sc0 ls0 ws0">}<span class="ff2">.<span class="_ _f"> </span>Howe<span class="_ _0"></span>ver<span class="_ _0"></span>,<span class="_ _4"> </span>to<span class="_ _4"> </span>di-</span></div><div class="t m0 x13 ha yb8 ff2 fs4 fc0 sc0 ls0 ws0">rectly<span class="_ _3"> </span>estimate<span class="_ _e"> </span>the<span class="_ _e"> </span>coordinate<span class="_ _e"> </span>v<span class="_ _0"></span>alues<span class="_ _e"> </span>of<span class="_ _e"> </span>each<span class="_ _e"> </span>point<span class="_ _3"> </span>of<span class="_ _e"> </span>the</div><div class="t m0 x13 ha yb9 ff2 fs4 fc0 sc0 ls0 ws0">BBox<span class="_ _4"> </span>is<span class="_ _4"> </span>to<span class="_ _4"> </span>treat<span class="_ _4"> </span>these<span class="_ _4"> </span>points<span class="_ _4"> </span>as<span class="_ _4"> </span>independent<span class="_ _4"> </span>variables,<span class="_ _4"> </span>but</div><div class="t m0 x13 ha yba ff2 fs4 fc0 sc0 ls0 ws0">in<span class="_"> </span>fact<span class="_"> </span>does<span class="_"> </span>not<span class="_ _8"> </span>consider<span class="_"> </span>the<span class="_"> </span>integrity<span class="_"> </span>of<span class="_"> </span>the<span class="_ _8"> </span>object<span class="_"> </span>itself.<span class="_ _4"> </span>In</div><div class="t m0 x13 ha ybb ff2 fs4 fc0 sc0 ls0 ws0">order<span class="_"> </span>to<span class="_ _6"> </span>make<span class="_ _6"> </span>this<span class="_ _6"> </span>issue<span class="_"> </span>processed<span class="_ _6"> </span>better,<span class="_"> </span>some<span class="_ _6"> </span>researchers</div><div class="t m0 x13 ha ybc ff2 fs4 fc0 sc0 ls0 ws0">recently<span class="_"> </span>proposed<span class="_ _7"> </span>IoU<span class="_"> </span>loss<span class="_ _8"> </span>[<span class="fc3">90</span>],<span class="_"> </span>which<span class="_ _8"> </span>puts<span class="_"> </span>the<span class="_ _7"> </span>coverage<span class="_ _8"> </span>of</div><div class="t m0 x13 ha ybd ff2 fs4 fc0 sc0 ls0 ws0">predicted<span class="_ _6"> </span>BBox<span class="_ _6"> </span>area<span class="_ _6"> </span>and<span class="_ _6"> </span>ground<span class="_ _6"> </span>truth<span class="_ _6"> </span>BBox<span class="_ _6"> </span>area<span class="_ _6"> </span>into<span class="_ _6"> </span>con-</div><div class="t m0 x13 ha ybe ff2 fs4 fc0 sc0 ls0 ws0">sideration.<span class="_ _4"> </span>The<span class="_ _6"> </span>IoU<span class="_"> </span>loss<span class="_"> </span>computing<span class="_"> </span>process<span class="_ _6"> </span>will<span class="_"> </span>trigger<span class="_"> </span>the</div><div class="t m0 x13 ha ybf ff2 fs4 fc0 sc0 ls0 ws0">calculation<span class="_"> </span>of<span class="_ _7"> </span>the<span class="_"> </span>four<span class="_"> </span>coordinate<span class="_ _7"> </span>points<span class="_"> </span>of<span class="_"> </span>the<span class="_ _7"> </span>BBox<span class="_"> </span>by<span class="_ _8"> </span>ex-</div><div class="t m0 x13 ha yc0 ff2 fs4 fc0 sc0 ls0 ws0">ecuting<span class="_"> </span>IoU<span class="_ _6"> </span>with<span class="_ _6"> </span>the<span class="_ _6"> </span>ground<span class="_ _6"> </span>truth,<span class="_ _6"> </span>and<span class="_ _6"> </span>then<span class="_ _6"> </span>connecting<span class="_ _6"> </span>the</div><div class="t m0 x13 ha yc1 ff2 fs4 fc0 sc0 ls0 ws0">generated<span class="_"> </span>results<span class="_"> </span>into<span class="_"> </span>a<span class="_ _6"> </span>whole<span class="_"> </span>code.<span class="_ _4"> </span>Because<span class="_ _6"> </span>IoU<span class="_"> </span>is<span class="_"> </span>a<span class="_"> </span>scale</div><div class="t m0 x13 ha yc2 ff2 fs4 fc0 sc0 ls0 ws0">in<span class="_ _0"></span>variant<span class="_"> </span>representation,<span class="_"> </span>it<span class="_"> </span>can<span class="_"> </span>solve<span class="_"> </span>the<span class="_"> </span>problem<span class="_"> </span>that<span class="_"> </span>when</div><div class="t m0 x13 he yc3 ff2 fs4 fc0 sc0 ls0 ws0">traditional<span class="_ _4"> </span>methods<span class="_ _3"> </span>calculate<span class="_ _4"> </span>the<span class="_ _4"> </span><span class="ffa">l</span></div><div class="t m0 x36 h8 yc4 ff6 fs5 fc0 sc0 ls0 ws0">1</div><div class="t m0 x37 he yc3 ff2 fs4 fc0 sc0 ls0 ws0">or<span class="_ _4"> </span><span class="ffa">l</span></div><div class="t m0 x35 h8 yc4 ff6 fs5 fc0 sc0 ls0 ws0">2</div><div class="t m0 x38 h9 yc3 ff2 fs4 fc0 sc0 ls0 ws0">loss<span class="_ _4"> </span>of<span class="_ _3"> </span><span class="ff7">{<span class="ffa">x</span></span>,<span class="_ _4"> </span><span class="ffa">y<span class="_ _18"></span></span>,<span class="_ _4"> </span><span class="ffa">w<span class="_ _17"></span></span>,</div><div class="t m0 x13 h9 yc5 ffa fs4 fc0 sc0 ls0 ws0">h<span class="ff7">}<span class="ff2">,<span class="_ _e"> </span>the<span class="_ _e"> </span>loss<span class="_ _e"> </span>will<span class="_ _3"> </span>increase<span class="_ _e"> </span>with<span class="_ _3"> </span>the<span class="_ _e"> </span>scale.<span class="_ _d"> </span>Recently<span class="_ _0"></span>,<span class="_ _e"> </span>some</span></span></div><div class="t m0 x13 ha yc6 ff2 fs4 fc0 sc0 ls0 ws0">researchers<span class="_"> </span>hav<span class="_ _0"></span>e<span class="_ _6"> </span>continued<span class="_"> </span>to<span class="_"> </span>improve<span class="_"> </span>IoU<span class="_"> </span>loss.<span class="_ _4"> </span>For<span class="_"> </span>exam-</div><div class="t m0 x13 ha yc7 ff2 fs4 fc0 sc0 ls0 ws0">ple,<span class="_ _4"> </span>GIoU<span class="_ _4"> </span>loss<span class="_ _4"> </span>[<span class="fc3">65</span>]<span class="_ _6"> </span>is<span class="_ _4"> </span>to<span class="_ _4"> </span>include<span class="_ _4"> </span>the<span class="_ _4"> </span>shape<span class="_ _6"> </span>and<span class="_ _4"> </span>orientation</div><div class="t m0 x13 ha yc8 ff2 fs4 fc0 sc0 ls0 ws0">of<span class="_"> </span>object<span class="_ _8"> </span>in<span class="_"> </span>addition<span class="_ _8"> </span>to<span class="_"> </span>the<span class="_ _8"> </span>cov<span class="_ _0"></span>erage<span class="_"> </span>area.<span class="_ _4"> </span>They<span class="_ _7"> </span>proposed<span class="_"> </span>to</div><div class="t m0 x13 ha yc9 ff2 fs4 fc0 sc0 ls0 ws0">find<span class="_ _4"> </span>the<span class="_ _4"> </span>smallest<span class="_ _4"> </span>area<span class="_ _4"> </span>BBox<span class="_ _4"> </span>that<span class="_ _4"> </span>can<span class="_ _4"> </span>simultaneously<span class="_ _4"> </span>cover</div><div class="t m0 x13 ha yca ff2 fs4 fc0 sc0 ls0 ws0">the<span class="_ _e"> </span>predicted<span class="_ _e"> </span>BBox<span class="_ _e"> </span>and<span class="_ _e"> </span>ground<span class="_ _e"> </span>truth<span class="_ _e"> </span>BBox,<span class="_ _5"> </span>and<span class="_ _e"> </span>use<span class="_ _e"> </span>this</div><div class="t m0 x13 ha ycb ff2 fs4 fc0 sc0 ls0 ws0">BBox<span class="_"> </span>as<span class="_ _6"> </span>the<span class="_"> </span>denominator<span class="_ _6"> </span>to<span class="_"> </span>replace<span class="_ _6"> </span>the<span class="_"> </span>denominator<span class="_ _6"> </span>origi-</div><div class="t m0 x13 ha ycc ff2 fs4 fc0 sc0 ls0 ws0">nally<span class="_ _7"> </span>used<span class="_"> </span>in<span class="_ _7"> </span>IoU<span class="_ _7"> </span>loss.<span class="_ _4"> </span>As<span class="_ _7"> </span>for<span class="_"> </span>DIoU<span class="_ _7"> </span>loss<span class="_ _7"> </span>[<span class="fc3">99</span>],<span class="_"> </span>it<span class="_ _7"> </span>additionally</div><div class="t m0 x13 ha ycd ff2 fs4 fc0 sc0 ls0 ws0">considers<span class="_ _6"> </span>the<span class="_ _6"> </span>distance<span class="_ _6"> </span>of<span class="_ _6"> </span>the<span class="_ _6"> </span>center<span class="_ _6"> </span>of<span class="_ _6"> </span>an<span class="_ _4"> </span>object,<span class="_ _6"> </span>and<span class="_ _6"> </span>CIoU</div><div class="t m0 x13 ha yce ff2 fs4 fc0 sc0 ls0 ws0">loss<span class="_ _3"> </span>[<span class="fc3">99</span>],<span class="_ _5"> </span>on<span class="_ _3"> </span>the<span class="_ _e"> </span>other<span class="_ _3"> </span>hand<span class="_ _e"> </span>simultaneously<span class="_ _e"> </span>considers<span class="_ _3"> </span>the</div><div class="t m0 x13 ha ycf ff2 fs4 fc0 sc0 ls0 ws0">ov<span class="_ _0"></span>erlapping<span class="_ _e"> </span>area,<span class="_ _e"> </span>the<span class="_ _e"> </span>distance<span class="_ _e"> </span>between<span class="_ _3"> </span>center<span class="_ _e"> </span>points,<span class="_ _e"> </span>and</div><div class="t m0 x13 ha yd0 ff2 fs4 fc0 sc0 ls0 ws0">the<span class="_ _7"> </span>aspect<span class="_"> </span>ratio.<span class="_"> </span>CIoU<span class="_"> </span>can<span class="_ _7"> </span>achiev<span class="_ _0"></span>e<span class="_ _8"> </span>better<span class="_ _7"> </span>conv<span class="_ _0"></span>ergence<span class="_ _7"> </span>speed</div><div class="t m0 x13 ha yd1 ff2 fs4 fc0 sc0 ls0 ws0">and<span class="_"> </span>accuracy<span class="_"> </span>on<span class="_"> </span>the<span class="_"> </span>BBox<span class="_"> </span>re<span class="_ _0"></span>gression<span class="_"> </span>problem.</div><div class="t m0 x17 ha y44 ff2 fs4 fc0 sc0 ls0 ws0">3</div><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a></div><div class="pi" data-data='{"ctm":[1.568627,0.000000,0.000000,1.568627,0.000000,0.000000]}'></div></div>
<div id="pf4" class="pf w0 h0" data-page-no="4"><div class="pc pc4 w0 h0"><img class="bi x0 y0 w1 h1" alt="" src="https://static.pudn.com/prod/directory_preview_static/6275eafd16f2c0769caad825/bg4.jpg"><div class="t m0 x10 hc y6f ff1 fs7 fc0 sc0 ls0 ws0">2.3.<span class="_"> </span>Bag<span class="_"> </span>of<span class="_"> </span>specials</div><div class="t m0 xf ha y70 ff2 fs4 fc0 sc0 ls0 ws0">For<span class="_ _4"> </span>those<span class="_ _4"> </span>plugin<span class="_ _3"> </span>modules<span class="_ _4"> </span>and<span class="_ _3"> </span>post-processing<span class="_ _4"> </span>methods</div><div class="t m0 x10 ha y71 ff2 fs4 fc0 sc0 ls0 ws0">that<span class="_ _1"> </span>only<span class="_ _5"> </span>increase<span class="_ _1"> </span>the<span class="_ _1"> </span>inference<span class="_ _1"> </span>cost<span class="_ _1"> </span>by<span class="_ _1"> </span>a<span class="_ _1"> </span>small<span class="_ _1"> </span>amount</div><div class="t m0 x10 ha y72 ff2 fs4 fc0 sc0 ls0 ws0">but<span class="_"> </span>can<span class="_ _4"> </span>significantly<span class="_ _6"> </span>improve<span class="_ _6"> </span>the<span class="_ _6"> </span>accuracy<span class="_ _6"> </span>of<span class="_ _4"> </span>object<span class="_ _6"> </span>detec-</div><div class="t m0 x10 ha y73 ff2 fs4 fc0 sc0 ls0 ws0">tion,<span class="_ _3"> </span>we<span class="_ _3"> </span>call<span class="_ _3"> </span>them<span class="_ _4"> </span>“bag<span class="_ _3"> </span>of<span class="_ _3"> </span>specials”.<span class="_ _14"> </span>Generally<span class="_ _3"> </span>speaking,</div><div class="t m0 x10 ha y74 ff2 fs4 fc0 sc0 ls0 ws0">these<span class="_"> </span>plugin<span class="_"> </span>modules<span class="_"> </span>are<span class="_"> </span>for<span class="_"> </span>enhancing<span class="_"> </span>certain<span class="_"> </span>attributes<span class="_"> </span>in</div><div class="t m0 x10 ha y75 ff2 fs4 fc0 sc0 ls0 ws0">a<span class="_ _3"> </span>model,<span class="_ _3"> </span>such<span class="_ _3"> </span>as<span class="_ _3"> </span>enlarging<span class="_ _4"> </span>receptiv<span class="_ _0"></span>e<span class="_ _3"> </span>field,<span class="_ _3"> </span>introducing<span class="_ _3"> </span>at-</div><div class="t m0 x10 ha y76 ff2 fs4 fc0 sc0 ls0 ws0">tention<span class="_"> </span>mechanism,<span class="_"> </span>or<span class="_"> </span>strengthening<span class="_ _6"> </span>feature<span class="_"> </span>integration<span class="_"> </span>ca-</div><div class="t m0 x10 ha y77 ff2 fs4 fc0 sc0 ls0 ws0">pability<span class="_ _0"></span>,<span class="_"> </span>etc.,<span class="_"> </span>and<span class="_"> </span>post-processing<span class="_ _8"> </span>is<span class="_"> </span>a<span class="_"> </span>method<span class="_"> </span>for<span class="_"> </span>screening</div><div class="t m0 x10 ha y78 ff2 fs4 fc0 sc0 ls0 ws0">model<span class="_"> </span>prediction<span class="_"> </span>results.</div><div class="t m0 xf ha yd2 ff2 fs4 fc0 sc0 ls0 ws0">Common<span class="_ _e"> </span>modules<span class="_ _5"> </span>that<span class="_ _e"> </span>can<span class="_ _5"> </span>be<span class="_ _e"> </span>used<span class="_ _5"> </span>to<span class="_ _e"> </span>enhance<span class="_ _5"> </span>recep-</div><div class="t m0 x10 ha yd3 ff2 fs4 fc0 sc0 ls0 ws0">tiv<span class="_ _0"></span>e<span class="_ _1"> </span>field<span class="_ _1"> </span>are<span class="_ _1"> </span>SPP<span class="_ _1"> </span>[<span class="fc3">25</span>],<span class="_ _13"> </span>ASPP<span class="_ _f"> </span>[<span class="fc3">5</span>],<span class="_ _13"> </span>and<span class="_ _1"> </span>RFB<span class="_ _1"> </span>[<span class="fc3">47</span>].<span class="_ _19"> </span>The</div><div class="t m0 x10 ha yd4 ff2 fs4 fc0 sc0 ls0 ws0">SPP<span class="_ _3"> </span>module<span class="_ _3"> </span>was<span class="_ _3"> </span>originated<span class="_ _3"> </span>from<span class="_ _3"> </span>Spatial<span class="_ _3"> </span>Pyramid<span class="_ _3"> </span>Match-</div><div class="t m0 x10 ha yd5 ff2 fs4 fc0 sc0 ls0 ws0">ing<span class="_"> </span>(SPM)<span class="_ _7"> </span>[<span class="fc3">39</span>],<span class="_"> </span>and<span class="_"> </span>SPMs<span class="_ _8"> </span>original<span class="_"> </span>method<span class="_ _7"> </span>was<span class="_"> </span>to<span class="_ _8"> </span>split<span class="_"> </span>fea-</div><div class="t m0 x10 h9 yd6 ff2 fs4 fc0 sc0 ls0 ws0">ture<span class="_ _4"> </span>map<span class="_ _3"> </span>into<span class="_ _3"> </span>se<span class="_ _0"></span>veral<span class="_ _3"> </span><span class="ffa">d<span class="_ _6"> </span><span class="ff7">×<span class="_ _6"> </span></span>d<span class="_ _3"> </span></span>equal<span class="_ _4"> </span>blocks,<span class="_ _e"> </span>where<span class="_ _4"> </span><span class="ffa">d<span class="_ _3"> </span></span>can<span class="_ _3"> </span>be</div><div class="t m0 x10 h9 yd7 ff7 fs4 fc0 sc0 ls0 ws0">{<span class="ffc">1<span class="ffa">,<span class="_ _7"> </span></span>2<span class="ffa">,<span class="_ _1a"> </span></span>3<span class="ffa">,<span class="_ _1a"> </span>...</span></span>}<span class="ff2">,<span class="_"> </span>thus<span class="_ _7"> </span>forming<span class="_"> </span>spatial<span class="_ _7"> </span>pyramid,<span class="_ _8"> </span>and<span class="_ _8"> </span>then<span class="_ _8"> </span>extract-</span></div><div class="t m0 x10 ha yd8 ff2 fs4 fc0 sc0 ls0 ws0">ing<span class="_ _3"> </span>bag-of-word<span class="_ _3"> </span>features.<span class="_ _d"> </span>SPP<span class="_ _3"> </span>integrates<span class="_ _3"> </span>SPM<span class="_ _e"> </span>into<span class="_ _3"> </span>CNN</div><div class="t m0 x10 ha yd9 ff2 fs4 fc0 sc0 ls0 ws0">and<span class="_ _6"> </span>use<span class="_ _4"> </span>max-pooling<span class="_ _6"> </span>operation<span class="_ _4"> </span>instead<span class="_ _6"> </span>of<span class="_ _4"> </span>bag-of-word<span class="_ _6"> </span>op-</div><div class="t m0 x10 ha yda ff2 fs4 fc0 sc0 ls0 ws0">eration.<span class="_ _1"> </span>Since<span class="_ _4"> </span>the<span class="_ _4"> </span>SPP<span class="_ _6"> </span>module<span class="_ _4"> </span>proposed<span class="_ _6"> </span>by<span class="_ _4"> </span>He<span class="_ _4"> </span><span class="ff5">et<span class="_ _6"> </span>al</span>.<span class="_ _f"> </span>[<span class="fc3">25</span>]</div><div class="t m0 x10 ha ydb ff2 fs4 fc0 sc0 ls0 ws0">will<span class="_ _7"> </span>output<span class="_"> </span>one<span class="_ _7"> </span>dimensional<span class="_ _8"> </span>feature<span class="_ _8"> </span>vector<span class="_ _0"></span>,<span class="_ _7"> </span>it<span class="_"> </span>is<span class="_ _7"> </span>infeasible<span class="_ _8"> </span>to</div><div class="t m0 x10 ha y83 ff2 fs4 fc0 sc0 ls0 ws0">be<span class="_"> </span>applied<span class="_ _6"> </span>in<span class="_ _6"> </span>Fully<span class="_"> </span>Conv<span class="_ _0"></span>olutional<span class="_ _6"> </span>Network<span class="_"> </span>(FCN).<span class="_ _6"> </span>Thus<span class="_"> </span>in</div><div class="t m0 x10 ha y84 ff2 fs4 fc0 sc0 ls0 ws0">the<span class="_"> </span>design<span class="_ _6"> </span>of<span class="_ _6"> </span>YOLOv3<span class="_"> </span>[<span class="fc3">63</span>],<span class="_ _6"> </span>Redmon<span class="_ _6"> </span>and<span class="_ _6"> </span>Farhadi<span class="_ _6"> </span>improve</div><div class="t m0 x10 ha y85 ff2 fs4 fc0 sc0 ls0 ws0">SPP<span class="_ _3"> </span>module<span class="_ _e"> </span>to<span class="_ _e"> </span>the<span class="_ _e"> </span>concatenation<span class="_ _3"> </span>of<span class="_ _e"> </span>max-pooling<span class="_ _e"> </span>outputs</div><div class="t m0 x10 h9 y86 ff2 fs4 fc0 sc0 ls0 ws0">with<span class="_ _6"> </span>kernel<span class="_ _6"> </span>size<span class="_ _4"> </span><span class="ffa">k<span class="_ _6"> </span><span class="ff7">×<span class="_ _1b"> </span></span>k<span class="_ _17"></span></span>,<span class="_ _4"> </span>where<span class="_ _6"> </span><span class="ffa">k<span class="_ _e"> </span><span class="ffc">=<span class="_ _3"> </span><span class="ff7">{</span>1</span>,<span class="_ _1a"> </span><span class="ffc">5</span>,<span class="_ _7"> </span><span class="ffc">9</span>,<span class="_ _1a"> </span><span class="ffc">13<span class="ff7">}</span></span></span>,<span class="_ _4"> </span>and<span class="_ _6"> </span>stride</div><div class="t m0 x10 h9 y87 ff2 fs4 fc0 sc0 ls0 ws0">equals<span class="_"> </span>to<span class="_ _8"> </span>1.<span class="_ _4"> </span>Under<span class="_ _8"> </span>this<span class="_"> </span>design,<span class="_"> </span>a<span class="_ _7"> </span>relatively<span class="_ _8"> </span>large<span class="_ _8"> </span><span class="ffa">k<span class="_ _7"> </span><span class="ff7">×<span class="_ _1a"> </span></span>k<span class="_ _6"> </span></span>max-</div><div class="t m0 x10 ha y88 ff2 fs4 fc0 sc0 ls0 ws0">pooling<span class="_"> </span>effecti<span class="_ _0"></span>vely<span class="_"> </span>increase<span class="_ _6"> </span>the<span class="_ _6"> </span>receptiv<span class="_ _0"></span>e<span class="_ _6"> </span>field<span class="_ _6"> </span>of<span class="_"> </span>backbone</div><div class="t m0 x10 ha y89 ff2 fs4 fc0 sc0 ls0 ws0">feature.<span class="_ _4"> </span>After<span class="_ _6"> </span>adding<span class="_"> </span>the<span class="_"> </span>improved<span class="_"> </span>version<span class="_"> </span>of<span class="_"> </span>SPP<span class="_"> </span>module,</div><div class="t m0 x10 ha y8a ff2 fs4 fc0 sc0 ls0 ws0">Y<span class="_ _0"></span>OLOv3-608<span class="_ _e"> </span>upgrades<span class="_ _3"> </span>AP</div><div class="t m0 x39 h8 ydc ff6 fs5 fc0 sc0 ls0 ws0">50</div><div class="t m0 x3a ha y8a ff2 fs4 fc0 sc0 ls0 ws0">by<span class="_ _3"> </span>2.7%<span class="_ _3"> </span>on<span class="_ _e"> </span>the<span class="_ _3"> </span>MS<span class="_ _3"> </span>COCO</div><div class="t m0 x10 ha y8b ff2 fs4 fc0 sc0 ls0 ws0">object<span class="_"> </span>detection<span class="_"> </span>task<span class="_"> </span>at<span class="_"> </span>the<span class="_"> </span>cost<span class="_"> </span>of<span class="_ _6"> </span>0.5%<span class="_"> </span>extra<span class="_"> </span>computation.</div><div class="t m0 x10 ha y8c ff2 fs4 fc0 sc0 ls0 ws0">The<span class="_ _6"> </span>difference<span class="_"> </span>in<span class="_ _6"> </span>operation<span class="_ _6"> </span>between<span class="_ _6"> </span>ASPP<span class="_ _4"> </span>[<span class="fc3">5</span>]<span class="_"> </span>module<span class="_ _4"> </span>and</div><div class="t m0 x10 h9 y8d ff2 fs4 fc0 sc0 ls0 ws0">improv<span class="_ _0"></span>ed<span class="_"> </span>SPP<span class="_ _7"> </span>module<span class="_ _8"> </span>is<span class="_ _7"> </span>mainly<span class="_"> </span>from<span class="_ _7"> </span>the<span class="_ _8"> </span>original<span class="_ _8"> </span><span class="ffa">k<span class="_ _18"></span><span class="ff7">×<span class="_ _18"></span></span>k<span class="_ _1b"> </span></span>ker-</div><div class="t m0 x10 h9 y8e ff2 fs4 fc0 sc0 ls0 ws0">nel<span class="_ _6"> </span>size,<span class="_ _6"> </span>max-pooling<span class="_ _4"> </span>of<span class="_ _6"> </span>stride<span class="_ _6"> </span>equals<span class="_ _6"> </span>to<span class="_ _6"> </span>1<span class="_ _4"> </span>to<span class="_ _6"> </span>sev<span class="_ _0"></span>eral<span class="_ _6"> </span><span class="ffc">3<span class="_ _1b"> </span><span class="ff7">×<span class="_ _1b"> </span></span>3</span></div><div class="t m0 x10 he y8f ff2 fs4 fc0 sc0 ls0 ws0">kernel<span class="_ _6"> </span>size,<span class="_ _4"> </span>dilated<span class="_ _6"> </span>ratio<span class="_ _4"> </span>equals<span class="_ _6"> </span>to<span class="_ _6"> </span><span class="ffa">k<span class="_ _17"></span></span>,<span class="_ _4"> </span>and<span class="_ _4"> </span>stride<span class="_ _6"> </span>equals<span class="_ _6"> </span>to<span class="_ _4"> </span>1</div><div class="t m0 x10 ha y90 ff2 fs4 fc0 sc0 ls0 ws0">in<span class="_"> </span>dilated<span class="_ _8"> </span>con<span class="_ _0"></span>volution<span class="_ _8"> </span>operation.<span class="_ _4"> </span>RFB<span class="_ _8"> </span>module<span class="_"> </span>is<span class="_ _8"> </span>to<span class="_"> </span>use<span class="_ _8"> </span>sev-</div><div class="t m0 x10 h9 y91 ff2 fs4 fc0 sc0 ls0 ws0">eral<span class="_ _7"> </span>dilated<span class="_"> </span>con<span class="_ _0"></span>v<span class="_ _0"></span>olutions<span class="_"> </span>of<span class="_ _7"> </span><span class="ffa">k<span class="_ _18"></span><span class="ff7">×<span class="_ _18"></span></span>k<span class="_ _1b"> </span></span>kernel,<span class="_ _7"> </span>dilated<span class="_"> </span>ratio<span class="_ _7"> </span>equals</div><div class="t m0 x10 he y92 ff2 fs4 fc0 sc0 ls0 ws0">to<span class="_"> </span><span class="ffa">k<span class="_ _17"></span></span>,<span class="_"> </span>and<span class="_ _6"> </span>stride<span class="_"> </span>equals<span class="_"> </span>to<span class="_ _6"> </span>1<span class="_"> </span>to<span class="_"> </span>obtain<span class="_ _6"> </span>a<span class="_"> </span>more<span class="_"> </span>comprehensive</div><div class="t m0 x10 ha y93 ff2 fs4 fc0 sc0 ls0 ws0">spatial<span class="_ _6"> </span>coverage<span class="_ _6"> </span>than<span class="_ _6"> </span>ASPP<span class="_ _2"></span>.<span class="_ _4"> </span>RFB<span class="_ _6"> </span>[<span class="fc3">47</span>]<span class="_ _4"> </span>only<span class="_ _6"> </span>costs<span class="_ _4"> </span>7%<span class="_ _6"> </span>extra</div><div class="t m0 x10 ha y94 ff2 fs4 fc0 sc0 ls0 ws0">inference<span class="_ _6"> </span>time<span class="_ _4"> </span>to<span class="_ _6"> </span>increase<span class="_ _4"> </span>the<span class="_ _6"> </span>AP</div><div class="t m0 x3b h8 ydd ff6 fs5 fc0 sc0 ls0 ws0">50</div><div class="t m0 x3c ha y94 ff2 fs4 fc0 sc0 ls0 ws0">of<span class="_ _6"> </span>SSD<span class="_ _4"> </span>on<span class="_ _6"> </span>MS<span class="_ _4"> </span>COCO</div><div class="t m0 x10 ha y95 ff2 fs4 fc0 sc0 ls0 ws0">by<span class="_"> </span>5.7%.</div><div class="t m0 xf ha y20 ff2 fs4 fc0 sc0 ls0 ws0">The<span class="_ _4"> </span>attention<span class="_ _6"> </span>module<span class="_ _4"> </span>that<span class="_ _4"> </span>is<span class="_ _4"> </span>often<span class="_ _6"> </span>used<span class="_ _4"> </span>in<span class="_ _4"> </span>object<span class="_ _4"> </span>detec-</div><div class="t m0 x10 ha y21 ff2 fs4 fc0 sc0 ls0 ws0">tion<span class="_ _7"> </span>is<span class="_ _7"> </span>mainly<span class="_ _8"> </span>di<span class="_ _0"></span>vided<span class="_ _8"> </span>into<span class="_ _7"> </span>channel-wise<span class="_ _7"> </span>attention<span class="_ _8"> </span>and<span class="_ _7"> </span>point-</div><div class="t m0 x10 ha y22 ff2 fs4 fc0 sc0 ls0 ws0">wise<span class="_ _3"> </span>attention,<span class="_ _e"> </span>and<span class="_ _3"> </span>the<span class="_ _3"> </span>representativ<span class="_ _0"></span>es<span class="_ _e"> </span>of<span class="_ _3"> </span>these<span class="_ _3"> </span>two<span class="_ _3"> </span>atten-</div><div class="t m0 x10 ha yde ff2 fs4 fc0 sc0 ls0 ws0">tion<span class="_"> </span>models<span class="_"> </span>are<span class="_"> </span>Squeeze-and-Excitation<span class="_"> </span>(SE)<span class="_"> </span>[<span class="fc3">29</span>]<span class="_ _8"> </span>and<span class="_"> </span>Spa-</div><div class="t m0 x10 ha y24 ff2 fs4 fc0 sc0 ls0 ws0">tial<span class="_ _4"> </span>Attention<span class="_ _4"> </span>Module<span class="_ _6"> </span>(SAM)<span class="_ _4"> </span>[<span class="fc3">85</span>],<span class="_ _3"> </span>respecti<span class="_ _0"></span>vely<span class="_ _0"></span>.<span class="_ _f"> </span>Although</div><div class="t m0 x10 ha y25 ff2 fs4 fc0 sc0 ls0 ws0">SE<span class="_ _6"> </span>module<span class="_ _4"> </span>can<span class="_ _6"> </span>improve<span class="_ _6"> </span>the<span class="_ _6"> </span>power<span class="_ _6"> </span>of<span class="_ _4"> </span>ResNet50<span class="_ _6"> </span>in<span class="_ _4"> </span>the<span class="_ _6"> </span>Im-</div><div class="t m0 x10 ha y26 ff2 fs4 fc0 sc0 ls0 ws0">ageNet<span class="_ _4"> </span>image<span class="_ _3"> </span>classification<span class="_ _3"> </span>task<span class="_ _4"> </span>1%<span class="_ _3"> </span>top-1<span class="_ _4"> </span>accuracy<span class="_ _3"> </span>at<span class="_ _4"> </span>the</div><div class="t m0 x10 ha y27 ff2 fs4 fc0 sc0 ls0 ws0">cost<span class="_"> </span>of<span class="_ _6"> </span>only<span class="_"> </span>increasing<span class="_ _6"> </span>the<span class="_ _6"> </span>computational<span class="_"> </span>effort<span class="_"> </span>by<span class="_ _6"> </span>2%,<span class="_"> </span>but</div><div class="t m0 x10 ha y28 ff2 fs4 fc0 sc0 ls0 ws0">on<span class="_ _5"> </span>a<span class="_ _5"> </span>GPU<span class="_ _1"> </span>usually<span class="_ _5"> </span>it<span class="_ _5"> </span>will<span class="_ _1"> </span>increase<span class="_ _5"> </span>the<span class="_ _5"> </span>inference<span class="_ _1"> </span>time<span class="_ _5"> </span>by</div><div class="t m0 x10 ha y29 ff2 fs4 fc0 sc0 ls0 ws0">about<span class="_ _4"> </span>10%,<span class="_ _3"> </span>so<span class="_ _4"> </span>it<span class="_ _4"> </span>is<span class="_ _3"> </span>more<span class="_ _4"> </span>appropriate<span class="_ _4"> </span>to<span class="_ _3"> </span>be<span class="_ _4"> </span>used<span class="_ _4"> </span>in<span class="_ _4"> </span>mobile</div><div class="t m0 x10 ha y2a ff2 fs4 fc0 sc0 ls0 ws0">devices.<span class="_ _3"> </span>But<span class="_"> </span>for<span class="_ _6"> </span>SAM,<span class="_ _6"> </span>it<span class="_"> </span>only<span class="_ _6"> </span>needs<span class="_ _6"> </span>to<span class="_ _6"> </span>pay<span class="_"> </span>0.1%<span class="_ _6"> </span>extra<span class="_"> </span>cal-</div><div class="t m0 x10 ha y2b ff2 fs4 fc0 sc0 ls0 ws0">culation<span class="_"> </span>and<span class="_"> </span>it<span class="_"> </span>can<span class="_"> </span>improv<span class="_ _0"></span>e<span class="_"> </span>ResNet50-SE<span class="_"> </span>0.5%<span class="_"> </span>top-1<span class="_"> </span>accu-</div><div class="t m0 x10 ha y2c ff2 fs4 fc0 sc0 ls0 ws0">racy<span class="_"> </span>on<span class="_"> </span>the<span class="_"> </span>ImageNet<span class="_ _6"> </span>image<span class="_"> </span>classification<span class="_ _6"> </span>task.<span class="_ _3"> </span>Best<span class="_ _6"> </span>of<span class="_"> </span>all,</div><div class="t m0 x10 ha y2d ff2 fs4 fc0 sc0 ls0 ws0">it<span class="_"> </span>does<span class="_"> </span>not<span class="_"> </span>affect<span class="_"> </span>the<span class="_"> </span>speed<span class="_"> </span>of<span class="_"> </span>inference<span class="_"> </span>on<span class="_"> </span>the<span class="_"> </span>GPU<span class="_"> </span>at<span class="_"> </span>all.</div><div class="t m0 x14 ha y6f ff2 fs4 fc0 sc0 ls0 ws0">In<span class="_ _7"> </span>terms<span class="_"> </span>of<span class="_ _7"> </span>feature<span class="_ _8"> </span>integration,<span class="_ _7"> </span>the<span class="_"> </span>early<span class="_ _7"> </span>practice<span class="_ _7"> </span>is<span class="_"> </span>to<span class="_ _7"> </span>use</div><div class="t m0 x13 ha y9b ff2 fs4 fc0 sc0 ls0 ws0">skip<span class="_"> </span>connection<span class="_ _7"> </span>[<span class="fc3">51</span>]<span class="_"> </span>or<span class="_"> </span>hyper<span class="_ _0"></span>-column<span class="_"> </span>[<span class="fc3">22</span>]<span class="_ _8"> </span>to<span class="_"> </span>inte<span class="_ _0"></span>grate<span class="_"> </span>lo<span class="_ _0"></span>w-</div><div class="t m0 x13 ha y9c ff2 fs4 fc0 sc0 ls0 ws0">lev<span class="_ _0"></span>el<span class="_ _6"> </span>physical<span class="_ _6"> </span>feature<span class="_ _4"> </span>to<span class="_"> </span>high-level<span class="_"> </span>semantic<span class="_ _6"> </span>feature.<span class="_ _5"> </span>Since</div><div class="t m0 x13 ha y9d ff2 fs4 fc0 sc0 ls0 ws0">multi-scale<span class="_ _4"> </span>prediction<span class="_ _3"> </span>methods<span class="_ _3"> </span>such<span class="_ _3"> </span>as<span class="_ _4"> </span>FPN<span class="_ _3"> </span>hav<span class="_ _0"></span>e<span class="_ _3"> </span>become</div><div class="t m0 x13 ha y9e ff2 fs4 fc0 sc0 ls0 ws0">popular<span class="_ _0"></span>,<span class="_ _3"> </span>many<span class="_ _6"> </span>lightweight<span class="_ _4"> </span>modules<span class="_ _4"> </span>that<span class="_ _4"> </span>integrate<span class="_ _4"> </span>dif<span class="_ _0"></span>ferent</div><div class="t m0 x13 ha y9f ff2 fs4 fc0 sc0 ls0 ws0">feature<span class="_ _4"> </span>pyramid<span class="_ _4"> </span>hav<span class="_ _0"></span>e<span class="_ _4"> </span>been<span class="_ _4"> </span>proposed.<span class="_ _11"> </span>The<span class="_ _4"> </span>modules<span class="_ _4"> </span>of<span class="_ _4"> </span>this</div><div class="t m0 x13 ha ya0 ff2 fs4 fc0 sc0 ls0 ws0">sort<span class="_ _6"> </span>include<span class="_ _6"> </span>SF<span class="_ _0"></span>AM<span class="_ _6"> </span>[<span class="fc3">98</span>],<span class="_ _6"> </span>ASFF<span class="_ _4"> </span>[<span class="fc3">48</span>],<span class="_ _6"> </span>and<span class="_ _6"> </span>BiFPN<span class="_ _6"> </span>[<span class="fc3">77</span>].<span class="_ _5"> </span>The</div><div class="t m0 x13 ha ya1 ff2 fs4 fc0 sc0 ls0 ws0">main<span class="_ _8"> </span>idea<span class="_ _8"> </span>of<span class="_ _8"> </span>SF<span class="_ _a"></span>AM<span class="_"> </span>is<span class="_ _7"> </span>to<span class="_"> </span>use<span class="_ _7"> </span>SE<span class="_ _8"> </span>module<span class="_ _8"> </span>to<span class="_ _8"> </span>ex<span class="_ _0"></span>ecute<span class="_"> </span>channel-</div><div class="t m0 x13 ha ya2 ff2 fs4 fc0 sc0 ls0 ws0">wise<span class="_"> </span>le<span class="_ _0"></span>vel<span class="_"> </span>re-weighting<span class="_ _8"> </span>on<span class="_"> </span>multi-scale<span class="_"> </span>concatenated<span class="_ _8"> </span>feature</div><div class="t m0 x13 ha ya3 ff2 fs4 fc0 sc0 ls0 ws0">maps.<span class="_ _5"> </span>As<span class="_"> </span>for<span class="_ _4"> </span>ASFF<span class="_ _a"></span>,<span class="_ _6"> </span>it<span class="_ _6"> </span>uses<span class="_ _6"> </span>softmax<span class="_ _4"> </span>as<span class="_"> </span>point-wise<span class="_ _4"> </span>le<span class="_ _0"></span>vel<span class="_ _6"> </span>re-</div><div class="t m0 x13 ha ya4 ff2 fs4 fc0 sc0 ls0 ws0">weighting<span class="_ _3"> </span>and<span class="_ _e"> </span>then<span class="_ _3"> </span>adds<span class="_ _e"> </span>feature<span class="_ _e"> </span>maps<span class="_ _3"> </span>of<span class="_ _e"> </span>different<span class="_ _3"> </span>scales.</div><div class="t m0 x13 ha ya5 ff2 fs4 fc0 sc0 ls0 ws0">In<span class="_"> </span>BiFPN,<span class="_ _6"> </span>the<span class="_"> </span>multi-input<span class="_ _6"> </span>weighted<span class="_"> </span>residual<span class="_ _6"> </span>connections<span class="_"> </span>is</div><div class="t m0 x13 ha ya6 ff2 fs4 fc0 sc0 ls0 ws0">proposed<span class="_"> </span>to<span class="_ _8"> </span>ex<span class="_ _0"></span>ecute<span class="_"> </span>scale-wise<span class="_"> </span>le<span class="_ _0"></span>vel<span class="_"> </span>re-weighting,<span class="_"> </span>and<span class="_ _8"> </span>then</div><div class="t m0 x13 ha ya7 ff2 fs4 fc0 sc0 ls0 ws0">add<span class="_"> </span>feature<span class="_"> </span>maps<span class="_"> </span>of<span class="_"> </span>different<span class="_"> </span>scales.</div><div class="t m0 x14 ha ydf ff2 fs4 fc0 sc0 ls0 ws0">In<span class="_ _4"> </span>the<span class="_ _3"> </span>research<span class="_ _4"> </span>of<span class="_ _3"> </span>deep<span class="_ _4"> </span>learning,<span class="_ _3"> </span>some<span class="_ _3"> </span>people<span class="_ _4"> </span>put<span class="_ _3"> </span>their</div><div class="t m0 x13 ha ye0 ff2 fs4 fc0 sc0 ls0 ws0">focus<span class="_ _e"> </span>on<span class="_ _e"> </span>searching<span class="_ _e"> </span>for<span class="_ _3"> </span>good<span class="_ _e"> </span>activ<span class="_ _0"></span>ation<span class="_ _e"> </span>function.<span class="_ _b"> </span>A<span class="_ _e"> </span>good</div><div class="t m0 x13 ha ye1 ff2 fs4 fc0 sc0 ls0 ws0">activ<span class="_ _0"></span>ation<span class="_ _4"> </span>function<span class="_ _3"> </span>can<span class="_ _3"> </span>make<span class="_ _4"> </span>the<span class="_ _3"> </span>gradient<span class="_ _3"> </span>more<span class="_ _4"> </span>efficiently</div><div class="t m0 x13 ha ye2 ff2 fs4 fc0 sc0 ls0 ws0">propagated,<span class="_ _11"> </span>and<span class="_ _13"> </span>at<span class="_ _f"> </span>the<span class="_ _f"> </span>same<span class="_ _13"> </span>time<span class="_ _f"> </span>it<span class="_ _13"> </span>will<span class="_ _f"> </span>not<span class="_ _13"> </span>cause<span class="_ _f"> </span>too</div><div class="t m0 x13 ha ye3 ff2 fs4 fc0 sc0 ls0 ws0">much<span class="_ _5"> </span>extra<span class="_ _e"> </span>computational<span class="_ _5"> </span>cost.<span class="_ _10"> </span>In<span class="_ _5"> </span>2010,<span class="_ _1"> </span>Nair<span class="_ _5"> </span>and<span class="_ _5"> </span>Hin-</div><div class="t m0 x13 ha ye4 ff2 fs4 fc0 sc0 ls0 ws0">ton<span class="_ _3"> </span>[<span class="fc3">56</span>]<span class="_ _4"> </span>propose<span class="_ _3"> </span>ReLU<span class="_ _3"> </span>to<span class="_ _3"> </span>substantially<span class="_ _3"> </span>solve<span class="_ _4"> </span>the<span class="_ _3"> </span>gradient</div><div class="t m0 x13 ha ye5 ff2 fs4 fc0 sc0 ls0 ws0">vanish<span class="_ _3"> </span>problem<span class="_ _5"> </span>which<span class="_ _e"> </span>is<span class="_ _e"> </span>frequently<span class="_ _e"> </span>encountered<span class="_ _5"> </span>in<span class="_ _e"> </span>tradi-</div><div class="t m0 x13 ha ye6 ff2 fs4 fc0 sc0 ls0 ws0">tional<span class="_ _4"> </span>tanh<span class="_ _6"> </span>and<span class="_ _4"> </span>sigmoid<span class="_ _4"> </span>activ<span class="_ _0"></span>ation<span class="_ _4"> </span>function.<span class="_ _1"> </span>Subsequently<span class="_ _0"></span>,</div><div class="t m0 x13 ha ye7 ff2 fs4 fc0 sc0 ls0 ws0">LReLU<span class="_ _7"> </span>[<span class="fc3">54</span>],<span class="_"> </span>PReLU<span class="_ _7"> </span>[<span class="fc3">24</span>],<span class="_ _8"> </span>ReLU6<span class="_ _7"> </span>[<span class="fc3">28</span>],<span class="_"> </span>Scaled<span class="_ _7"> </span>Exponential</div><div class="t m0 x13 ha ye8 ff2 fs4 fc0 sc0 ls0 ws0">Linear<span class="_"> </span>Unit<span class="_"> </span>(SELU)<span class="_"> </span>[<span class="fc3">35</span>],<span class="_"> </span>Swish<span class="_"> </span>[<span class="fc3">59</span>],<span class="_"> </span>hard-Swish<span class="_ _8"> </span>[<span class="fc3">27</span>],<span class="_"> </span>and</div><div class="t m0 x13 ha ye9 ff2 fs4 fc0 sc0 ls0 ws0">Mish<span class="_ _3"> </span>[<span class="fc3">55</span>],<span class="_ _e"> </span>etc.,<span class="_ _e"> </span>which<span class="_ _3"> </span>are<span class="_ _3"> </span>also<span class="_ _3"> </span>used<span class="_ _e"> </span>to<span class="_ _3"> </span>solve<span class="_ _3"> </span>the<span class="_ _3"> </span>gradient</div><div class="t m0 x13 ha yea ff2 fs4 fc0 sc0 ls0 ws0">vanish<span class="_"> </span>problem,<span class="_ _6"> </span>hav<span class="_ _0"></span>e<span class="_ _4"> </span>been<span class="_"> </span>proposed.<span class="_ _e"> </span>The<span class="_ _6"> </span>main<span class="_ _6"> </span>purpose<span class="_ _4"> </span>of</div><div class="t m0 x13 ha yeb ff2 fs4 fc0 sc0 ls0 ws0">LReLU<span class="_ _6"> </span>and<span class="_ _4"> </span>PReLU<span class="_ _4"> </span>is<span class="_ _6"> </span>to<span class="_ _4"> </span>solve<span class="_ _6"> </span>the<span class="_ _4"> </span>problem<span class="_ _4"> </span>that<span class="_ _6"> </span>the<span class="_ _4"> </span>gradi-</div><div class="t m0 x13 ha yec ff2 fs4 fc0 sc0 ls0 ws0">ent<span class="_ _4"> </span>of<span class="_ _4"> </span>ReLU<span class="_ _4"> </span>is<span class="_ _4"> </span>zero<span class="_ _6"> </span>when<span class="_ _4"> </span>the<span class="_ _4"> </span>output<span class="_ _4"> </span>is<span class="_ _4"> </span>less<span class="_ _4"> </span>than<span class="_ _4"> </span>zero.<span class="_ _13"> </span>As</div><div class="t m0 x13 ha yed ff2 fs4 fc0 sc0 ls0 ws0">for<span class="_"> </span>ReLU6<span class="_ _6"> </span>and<span class="_ _6"> </span>hard-Swish,<span class="_ _6"> </span>they<span class="_"> </span>are<span class="_"> </span>specially<span class="_ _6"> </span>designed<span class="_ _6"> </span>for</div><div class="t m0 x13 ha yee ff2 fs4 fc0 sc0 ls0 ws0">quantization<span class="_ _3"> </span>networks.<span class="_ _15"> </span>F<span class="_ _0"></span>or<span class="_ _e"> </span>self-normalizing<span class="_ _3"> </span>a<span class="_ _3"> </span>neural<span class="_ _3"> </span>net-</div><div class="t m0 x13 ha yef ff2 fs4 fc0 sc0 ls0 ws0">work,<span class="_ _3"> </span>the<span class="_ _3"> </span>SELU<span class="_ _4"> </span>activ<span class="_ _0"></span>ation<span class="_ _3"> </span>function<span class="_ _3"> </span>is<span class="_ _3"> </span>proposed<span class="_ _3"> </span>to<span class="_ _4"> </span>satisfy</div><div class="t m0 x13 ha yf0 ff2 fs4 fc0 sc0 ls0 ws0">the<span class="_"> </span>goal.<span class="_ _6"> </span>One<span class="_"> </span>thing<span class="_"> </span>to<span class="_"> </span>be<span class="_"> </span>noted<span class="_"> </span>is<span class="_ _8"> </span>that<span class="_"> </span>both<span class="_"> </span>Swish<span class="_"> </span>and<span class="_ _8"> </span>Mish</div><div class="t m0 x13 ha yf1 ff2 fs4 fc0 sc0 ls0 ws0">are<span class="_"> </span>continuously<span class="_"> </span>differentiable<span class="_"> </span>acti<span class="_ _0"></span>vation<span class="_"> </span>function.</div><div class="t m0 x14 ha y50 ff2 fs4 fc0 sc0 ls0 ws0">The<span class="_ _5"> </span>post-processing<span class="_ _5"> </span>method<span class="_ _1"> </span>commonly<span class="_ _5"> </span>used<span class="_ _1"> </span>in<span class="_ _5"> </span>deep-</div><div class="t m0 x13 ha y51 ff2 fs4 fc0 sc0 ls0 ws0">learning-based<span class="_"> </span>object<span class="_"> </span>detection<span class="_"> </span>is<span class="_"> </span>NMS,<span class="_"> </span>which<span class="_"> </span>can<span class="_"> </span>be<span class="_"> </span>used</div><div class="t m0 x13 ha yf2 ff2 fs4 fc0 sc0 ls0 ws0">to<span class="_ _13"> </span>filter<span class="_ _11"> </span>those<span class="_ _11"> </span>BBoxes<span class="_ _13"> </span>that<span class="_ _11"> </span>badly<span class="_ _13"> </span>predict<span class="_ _11"> </span>the<span class="_ _11"> </span>same<span class="_ _11"> </span>ob-</div><div class="t m0 x13 ha yf3 ff2 fs4 fc0 sc0 ls0 ws0">ject,<span class="_ _4"> </span>and<span class="_ _4"> </span>only<span class="_ _3"> </span>retain<span class="_ _4"> </span>the<span class="_ _4"> </span>candidate<span class="_ _4"> </span>BBoxes<span class="_ _4"> </span>with<span class="_ _4"> </span>higher<span class="_ _4"> </span>re-</div><div class="t m0 x13 ha y1e ff2 fs4 fc0 sc0 ls0 ws0">sponse.<span class="_ _11"> </span>The<span class="_ _4"> </span>way<span class="_ _4"> </span>NMS<span class="_ _4"> </span>tries<span class="_ _3"> </span>to<span class="_ _4"> </span>improve<span class="_ _6"> </span>is<span class="_ _3"> </span>consistent<span class="_ _4"> </span>with</div><div class="t m0 x13 ha y1f ff2 fs4 fc0 sc0 ls0 ws0">the<span class="_ _4"> </span>method<span class="_ _4"> </span>of<span class="_ _4"> </span>optimizing<span class="_ _4"> </span>an<span class="_ _4"> </span>objecti<span class="_ _0"></span>ve<span class="_ _4"> </span>function.<span class="_ _f"> </span>The<span class="_ _4"> </span>orig-</div><div class="t m0 x13 ha y20 ff2 fs4 fc0 sc0 ls0 ws0">inal<span class="_ _4"> </span>method<span class="_ _4"> </span>proposed<span class="_ _4"> </span>by<span class="_ _3"> </span>NMS<span class="_ _4"> </span>does<span class="_ _4"> </span>not<span class="_ _4"> </span>consider<span class="_ _4"> </span>the<span class="_ _3"> </span>con-</div><div class="t m0 x13 ha y21 ff2 fs4 fc0 sc0 ls0 ws0">text<span class="_ _7"> </span>information,<span class="_"> </span>so<span class="_"> </span>Girshick<span class="_ _7"> </span><span class="ff5">et<span class="_"> </span>al</span>.<span class="_ _4"> </span>[<span class="fc3">19</span>]<span class="_ _8"> </span>added<span class="_"> </span>classification</div><div class="t m0 x13 ha yf4 ff2 fs4 fc0 sc0 ls0 ws0">confidence<span class="_ _8"> </span>score<span class="_ _8"> </span>in<span class="_ _8"> </span>R-CNN<span class="_ _8"> </span>as<span class="_ _8"> </span>a<span class="_"> </span>reference,<span class="_ _7"> </span>and<span class="_"> </span>according<span class="_ _7"> </span>to</div><div class="t m0 x13 ha yde ff2 fs4 fc0 sc0 ls0 ws0">the<span class="_ _6"> </span>order<span class="_ _6"> </span>of<span class="_ _6"> </span>confidence<span class="_ _6"> </span>score,<span class="_ _4"> </span>greedy<span class="_ _6"> </span>NMS<span class="_ _6"> </span>was<span class="_ _6"> </span>performed</div><div class="t m0 x13 ha y24 ff2 fs4 fc0 sc0 ls0 ws0">in<span class="_ _8"> </span>the<span class="_ _8"> </span>order<span class="_ _8"> </span>of<span class="_ _8"> </span>high<span class="_ _8"> </span>score<span class="_ _8"> </span>to<span class="_ _8"> </span>low<span class="_ _7"> </span>score.<span class="_ _4"> </span>As<span class="_ _7"> </span>for<span class="_"> </span>soft<span class="_ _7"> </span>NMS<span class="_"> </span>[<span class="fc3">1</span>],</div><div class="t m0 x13 ha y25 ff2 fs4 fc0 sc0 ls0 ws0">it<span class="_"> </span>considers<span class="_ _8"> </span>the<span class="_"> </span>problem<span class="_ _8"> </span>that<span class="_"> </span>the<span class="_ _8"> </span>occlusion<span class="_"> </span>of<span class="_ _7"> </span>an<span class="_"> </span>object<span class="_"> </span>may</div><div class="t m0 x13 ha y26 ff2 fs4 fc0 sc0 ls0 ws0">cause<span class="_ _4"> </span>the<span class="_ _3"> </span>degradation<span class="_ _4"> </span>of<span class="_ _4"> </span>confidence<span class="_ _4"> </span>score<span class="_ _3"> </span>in<span class="_ _4"> </span>greedy<span class="_ _3"> </span>NMS</div><div class="t m0 x13 ha yf5 ff2 fs4 fc0 sc0 ls0 ws0">with<span class="_ _3"> </span>IoU<span class="_ _e"> </span>score.<span class="_ _d"> </span>The<span class="_ _3"> </span>DIoU<span class="_ _3"> </span>NMS<span class="_ _e"> </span>[<span class="fc3">99</span>]<span class="_ _3"> </span>developers<span class="_ _3"> </span>way<span class="_ _3"> </span>of</div><div class="t m0 x13 ha yf6 ff2 fs4 fc0 sc0 ls0 ws0">thinking<span class="_ _3"> </span>is<span class="_ _3"> </span>to<span class="_ _3"> </span>add<span class="_ _3"> </span>the<span class="_ _3"> </span>information<span class="_ _3"> </span>of<span class="_ _3"> </span>the<span class="_ _3"> </span>center<span class="_ _3"> </span>point<span class="_ _3"> </span>dis-</div><div class="t m0 x13 ha y29 ff2 fs4 fc0 sc0 ls0 ws0">tance<span class="_ _e"> </span>to<span class="_ _e"> </span>the<span class="_ _e"> </span>BBox<span class="_ _5"> </span>screening<span class="_ _e"> </span>process<span class="_ _e"> </span>on<span class="_ _e"> </span>the<span class="_ _e"> </span>basis<span class="_ _5"> </span>of<span class="_ _e"> </span>soft</div><div class="t m0 x13 ha y2a ff2 fs4 fc0 sc0 ls0 ws0">NMS.<span class="_ _8"> </span>It<span class="_ _8"> </span>is<span class="_ _8"> </span>worth<span class="_ _8"> </span>mentioning<span class="_ _8"> </span>that,<span class="_"> </span>since<span class="_ _7"> </span>none<span class="_"> </span>of<span class="_ _7"> </span>above<span class="_ _7"> </span>post-</div><div class="t m0 x13 ha y2b ff2 fs4 fc0 sc0 ls0 ws0">processing<span class="_ _7"> </span>methods<span class="_"> </span>directly<span class="_ _7"> </span>refer<span class="_ _7"> </span>to<span class="_"> </span>the<span class="_ _7"> </span>captured<span class="_ _7"> </span>image<span class="_"> </span>fea-</div><div class="t m0 x13 ha y2c ff2 fs4 fc0 sc0 ls0 ws0">tures,<span class="_ _5"> </span>post-processing<span class="_ _e"> </span>is<span class="_ _e"> </span>no<span class="_ _e"> </span>longer<span class="_ _5"> </span>required<span class="_ _e"> </span>in<span class="_ _e"> </span>the<span class="_ _e"> </span>subse-</div><div class="t m0 x13 ha yf7 ff2 fs4 fc0 sc0 ls0 ws0">quent<span class="_"> </span>dev<span class="_ _0"></span>elopment<span class="_"> </span>of<span class="_"> </span>an<span class="_"> </span>anchor-free<span class="_"> </span>method.</div><div class="t m0 x17 ha y44 ff2 fs4 fc0 sc0 ls0 ws0">4</div><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a><a class="l" rel='nofollow' onclick='return false;'><div class="d m2"></div></a></div><div class="pi" data-data='{"ctm":[1.568627,0.000000,0.000000,1.568627,0.000000,0.000000]}'></div></div>