<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta charset="utf-8">
<meta name="generator" content="pdf2htmlEX">
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
<link rel="stylesheet" href="https://static.pudn.com/base/css/base.min.css">
<link rel="stylesheet" href="https://static.pudn.com/base/css/fancy.min.css">
<link rel="stylesheet" href="https://static.pudn.com/prod/directory_preview_static/6390354a8449b3069d9bdd1c/raw.css">
<script src="https://static.pudn.com/base/js/compatibility.min.js"></script>
<script src="https://static.pudn.com/base/js/pdf2htmlEX.min.js"></script>
<script>
try{
pdf2htmlEX.defaultViewer = new pdf2htmlEX.Viewer({});
}catch(e){}
</script>
<title></title>
</head>
<body>
<div id="sidebar" style="display: none">
<div id="outline">
</div>
</div>
<div id="pf1" class="pf w0 h0" data-page-no="1"><div class="pc pc1 w0 h0"><img class="bi x0 y0 w1 h1" alt="" src="https://static.pudn.com/prod/directory_preview_static/6390354a8449b3069d9bdd1c/bg1.jpg"><div class="c x0 y1 w2 h2"><div class="t m0 x1 h3 y2 ff1 fs0 fc0 sc0 ls0 ws0">Data W<span class="_ _0"></span>arehousing</div><div class="t m0 x2 h3 y3 ff1 fs0 fc0 sc0 ls0 ws0">and Da<span class="_ _1"></span>ta Mining</div><div class="t m0 x3 h4 y4 ff1 fs1 fc1 sc0 ls0 ws0">Dr. ZHU Yan</div><div class="t m0 x4 h5 y5 ff2 fs2 fc2 sc0 ls0 ws0"> </div><div class="t m0 x5 h6 y6 ff2 fs3 fc3 sc0 ls0 ws0">yan.zhu@n<span class="_ _1"></span>ec.sw<span class="_ _1"></span>jtu.edu.cn</div><div class="t m0 x6 h6 y7 ff2 fs3 fc3 sc0 ls0 ws0">SIST, SWJTU</div><div class="t m0 x7 h7 y8 ff2 fs4 fc3 sc0 ls0 ws0">June 2008</div></div></div><div class="pi" data-data='{"ctm":[1.333333,0.000000,0.000000,1.333333,0.000000,0.000000]}'></div></div>
</body>
</html>
<div id="pf2" class="pf w0 h0" data-page-no="2"><div class="pc pc2 w0 h0"><img class="bi x0 y0 w1 h1" alt="" src="https://static.pudn.com/prod/directory_preview_static/6390354a8449b3069d9bdd1c/bg2.jpg"><div class="t m0 x8 h5 y9 ff2 fs2 fc4 sc0 ls0 ws0">Data W<span class="_ _2"></span>arehousing Y<span class="_ _3"></span>. Zhu</div><div class="t m0 x9 h8 ya ff3 fs4 fc3 sc0 ls0 ws0">2</div><div class="c x0 y1 w2 h2"><div class="t m0 xa h9 yb ff1 fs5 fc5 sc0 ls0 ws0">Chapter 8 </div><div class="t m0 xa h9 yc ff1 fs5 fc5 sc0 ls0 ws0">Web D<span class="_ _4"></span>ata Warehous<span class="_ _4"></span>ing </div><div class="t m0 xa h9 yd ff1 fs5 fc5 sc0 ls0 ws0">and Web<span class="_ _4"></span> Quality Mining</div><div class="t m0 xb ha ye ff4 fs6 fc6 sc0 ls0 ws0"></div><div class="t m0 xc hb yf ff2 fs7 fc7 sc0 ls0 ws0">Web sources/data<span class="_ _1"></span> used for decision-</div><div class="t m0 xc hb y10 ff2 fs7 fc7 sc0 ls0 ws0">making </div><div class="t m0 xb ha y11 ff4 fs6 fc6 sc0 ls0 ws0"></div><div class="t m0 xc hb y12 ff2 fs7 fc7 sc0 ls0 ws0">Web data <span class="_ _1"></span>warehousing</div><div class="t m0 xb ha y13 ff4 fs6 fc6 sc0 ls0 ws0"></div><div class="t m0 xc hb y14 ff2 fs7 fc7 sc0 ls0 ws0">Challenges of Web data<span class="_ _1"></span> quality </div><div class="t m0 xb ha y15 ff4 fs6 fc6 sc0 ls0 ws0"></div><div class="t m0 xc hb y16 ff2 fs7 fc7 sc0 ls0 ws0">Web data <span class="_ _1"></span>quality mining</div><div class="t m0 xd hc y17 ff4 fs8 fc8 sc0 ls0 ws0"></div><div class="t m0 xe hd y18 ff2 fs9 fc7 sc0 ls0 ws0">Quality Evaluation</div><div class="t m0 xd hc y19 ff4 fs8 fc8 sc0 ls0 ws0"></div><div class="t m0 xe hd y1a ff2 fs9 fc7 sc0 ls0 ws0">Fuzzy Qu<span class="_ _4"></span>ality Evaluation<span class="_ _1"></span><span class="ff5"> </span></div></div></div><div class="pi" data-data='{"ctm":[1.333333,0.000000,0.000000,1.333333,0.000000,0.000000]}'></div></div>
<div id="pf3" class="pf w0 h0" data-page-no="3"><div class="pc pc3 w0 h0"><img class="bi x0 y0 w1 h1" alt="" src="https://static.pudn.com/prod/directory_preview_static/6390354a8449b3069d9bdd1c/bg3.jpg"><div class="t m0 x8 h5 y9 ff2 fs2 fc4 sc0 ls0 ws0"><span class="fca sc0">Da</span><span class="fca sc0">ta</span><span class="fca sc0"> </span><span class="fca sc0">W</span><span class="_ _2"></span><span class="fca sc0">a</span><span class="fca sc0">r</span><span class="fca sc0">e</span><span class="fca sc0">h</span><span class="fca sc0">o</span><span class="fca sc0">u</span><span class="fca sc0">si</span><span class="fca sc0">n</span><span class="fca sc0">g</span><span class="fca sc0"> </span><span class="fca sc0"> </span><span class="fca sc0"> </span><span class="fca sc0"> </span><span class="fca sc0"> </span><span class="fca sc0"> </span><span class="fca sc0"> </span><span class="fca sc0"> </span><span class="fca sc0">Y</span><span class="_ _3"></span><span class="fca sc0">.</span><span class="fca sc0"> </span><span class="fca sc0">Zh</span><span class="fca sc0">u</span></div><div class="t m0 x9 h8 ya ff3 fs4 fc3 sc0 ls0 ws0"><span class="fca sc0">3</span></div><div class="c x0 y1 w2 h2"><div class="t m0 xf hb y1b ff1 fs7 fc5 sc0 ls0 ws0">Mult<span class="_ _1"></span>iple Usages<span class="_ _1"></span> of Web </div><div class="t m0 xf hb y1c ff1 fs7 fc5 sc0 ls0 ws0">Sources/Data</div><div class="t m0 x10 he y1d ff2 fsa fc9 sc0 ls0 ws0"><span class="fca sc0">?</span></div></div></div><div class="pi" data-data='{"ctm":[1.333333,0.000000,0.000000,1.333333,0.000000,0.000000]}'></div></div>
<div id="pf4" class="pf w0 h0" data-page-no="4"><div class="pc pc4 w0 h0"><img class="bi x0 y0 w1 h1" alt="" src="https://static.pudn.com/prod/directory_preview_static/6390354a8449b3069d9bdd1c/bg4.jpg"><div class="t m0 x8 h5 y9 ff2 fs2 fc4 sc0 ls0 ws0"><span class="fca sc0">Da</span><span class="fca sc0">ta</span><span class="fca sc0"> </span><span class="fca sc0">W</span><span class="_ _2"></span><span class="fca sc0">a</span><span class="fca sc0">r</span><span class="fca sc0">e</span><span class="fca sc0">h</span><span class="fca sc0">o</span><span class="fca sc0">u</span><span class="fca sc0">si</span><span class="fca sc0">n</span><span class="fca sc0">g</span><span class="fca sc0"> </span><span class="fca sc0"> </span><span class="fca sc0"> </span><span class="fca sc0"> </span><span class="fca sc0"> </span><span class="fca sc0"> </span><span class="fca sc0"> </span><span class="fca sc0"> </span><span class="fca sc0">Y</span><span class="_ _3"></span><span class="fca sc0">.</span><span class="fca sc0"> </span><span class="fca sc0">Zh</span><span class="fca sc0">u</span></div><div class="t m0 x9 h8 ya ff3 fs4 fc3 sc0 ls0 ws0"><span class="fca sc0">4</span></div><div class="c x0 y1 w2 h2"><div class="t m0 xf hb y1e ff1 fs7 fc5 sc0 ls0 ws0">Structure of a Web <span class="_ _1"></span>Data<span class="fca sc0"> </span></div><div class="t m0 xf hb y1f ff1 fs7 fc5 sc0 ls0 ws0">Warehousing <span class="_ _1"></span>S<span class="fca sc0">y</span><span class="fca sc0">s</span><span class="fca sc0">t</span><span class="fca sc0">em</span></div><div class="t m0 x11 hf y20 ff2 fsb fc9 sc0 ls0 ws0">•</div><div class="t m0 x12 hf y21 ff1 fsb fc9 sc0 ls0 ws0"> MIX<span class="ff2">:<span class="fcb"> </span></span>M<span class="ff2">etadata based </span></div><div class="t m0 x11 hf y22 ff1 fsb fc9 sc0 ls0 ws0">I<span class="ff2">ntegration model for <span class="_ _1"></span>data <span class="_ _1"></span></span>X<span class="ff2">-</span></div><div class="t m0 x11 hf y23 ff2 fsb fc9 sc0 ls0 ws0">change (<span class="ff6">Bornhövd, PhD <span class="_ _1"></span>thesis, </span></div><div class="t m0 x11 hf y24 ff6 fsb fc9 sc0 ls0 ws0">2001<span class="ff2">)</span></div><div class="t m0 x11 hf y25 ff2 fsb fc9 sc0 ls0 ws0">•</div><div class="t m0 x12 hf y26 ff2 fsb fc9 sc0 ls0 ws0"> represent Web data with </div><div class="t m0 x11 hf y27 ff2 fsb fc9 sc0 ls0 ws0">semantic context together as a </div><div class="t m0 x11 hf y28 ff2 fsb fc9 sc0 ls0 ws0">semantic object</div><div class="t m0 x11 hf y29 ff2 fsb fc9 sc0 ls0 ws0">•</div><div class="t m0 x12 hf y2a ff2 fsb fc9 sc0 ls0 ws0"> Integration based on </div><div class="t m0 x11 hf y2b ff2 fsb fc9 sc0 ls0 ws0">ontology</div><div class="t m0 x11 hf y2c ff2 fsb fc9 sc0 ls0 ws0">•</div><div class="t m0 x12 hf y2d ff2 fsb fc9 sc0 ls0 ws0"> The MIX-Object has a tree-</div><div class="t m0 x11 hf y2e ff2 fsb fc9 sc0 ls0 ws0">structure, which can be </div><div class="t m0 x11 hf y2f ff2 fsb fc9 sc0 ls0 ws0">represented by <span class="ff1">Labeled Tree</span> </div><div class="t m0 x11 hf y30 ff2 fsb fc9 sc0 ls0 ws0">(<span class="ff6">Buneman et al, VLDB<span class="_ _1"></span> Journal, </span></div><div class="t m0 x11 hf y31 ff6 fsb fc9 sc0 ls0 ws0">2000<span class="ff2">).</span></div><div class="t m0 x13 hf y32 ff1 fsb fc9 sc0 ls0 ws0">MIBIA </div><div class="t m0 x14 hf y33 ff1 fsb fc9 sc0 ls0 ws0">System</div></div></div><div class="pi" data-data='{"ctm":[1.333333,0.000000,0.000000,1.333333,0.000000,0.000000]}'></div></div>
<div id="pf5" class="pf w0 h0" data-page-no="5"><div class="pc pc5 w0 h0"><img class="bi x0 y0 w1 h1" alt="" src="https://static.pudn.com/prod/directory_preview_static/6390354a8449b3069d9bdd1c/bg5.jpg"><div class="t m0 x8 h5 y9 ff2 fs2 fc4 sc0 ls0 ws0"><span class="fca sc0">Da</span><span class="fca sc0">ta</span><span class="fca sc0"> </span><span class="fca sc0">W</span><span class="_ _2"></span><span class="fca sc0">a</span><span class="fca sc0">r</span><span class="fca sc0">e</span><span class="fca sc0">h</span><span class="fca sc0">o</span><span class="fca sc0">u</span><span class="fca sc0">si</span><span class="fca sc0">n</span><span class="fca sc0">g</span><span class="fca sc0"> </span><span class="fca sc0"> </span><span class="fca sc0"> </span><span class="fca sc0"> </span><span class="fca sc0"> </span><span class="fca sc0"> </span><span class="fca sc0"> </span><span class="fca sc0"> </span><span class="fca sc0">Y</span><span class="_ _3"></span><span class="fca sc0">.</span><span class="fca sc0"> </span><span class="fca sc0">Zh</span><span class="fca sc0">u</span></div><div class="t m0 x9 h8 ya ff3 fs4 fc3 sc0 ls0 ws0"><span class="fca sc0">5</span></div><div class="c x0 y1 w2 h2"><div class="t m0 xa h9 y34 ff1 fs5 fc5 sc0 ls0 ws0">LT-Str<span class="_ _4"></span>ucture of M<span class="_ _4"></span>IX-Object</div></div></div><div class="pi" data-data='{"ctm":[1.333333,0.000000,0.000000,1.333333,0.000000,0.000000]}'></div></div>