Changeset 33856

Show
Ignore:
Timestamp:
22.01.2020 16:33:31 (4 weeks ago)
Author:
ak19
Message:

Forgot to commit. Last week, Dr Bainbridge had properly cropped the SVG flow chart of the common crawl to mongodb process in Inkscape. He'd also exported a PDF of it cropped to the bounds, which I'm also adding to SVN now.

Location:
other-projects/maori-lang-detection/journal-paper
Files:
1 added
1 modified

Legend:

Unmodified
Added
Removed
  • other-projects/maori-lang-detection/journal-paper/CommonCrawl_flow.svg

    r33841 r33856  
    1414   viewBox="0 0 210 297" 
    1515   version="1.1" 
    16    id="svg8" 
     16   id="svg3410" 
    1717   inkscape:version="0.92.4 (5da689c313, 2019-01-14)" 
    1818   sodipodi:docname="CommonCrawl_flow2.svg"> 
    1919  <defs 
    20      id="defs2"> 
    21     <marker 
    22        inkscape:stockid="Arrow2Lstart" 
    23        orient="auto" 
    24        refY="0.0" 
    25        refX="0.0" 
    26        id="Arrow2Lstart" 
    27        style="overflow:visible" 
    28        inkscape:isstock="true"> 
    29       <path 
    30          id="path6664" 
    31          style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round;stroke:#000000;stroke-opacity:1;fill:#000000;fill-opacity:1" 
    32          d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z " 
    33          transform="scale(1.1) translate(1,0)" /> 
    34     </marker> 
    35     <filter 
    36        inkscape:collect="always" 
    37        style="color-interpolation-filters:sRGB" 
    38        id="filter6144" 
    39        x="-0.014400269" 
    40        width="1.0288005" 
    41        y="-0.010285577" 
    42        height="1.0205712"> 
    43       <feGaussianBlur 
    44          inkscape:collect="always" 
    45          stdDeviation="0.59036602" 
    46          id="feGaussianBlur6146" /> 
    47     </filter> 
     20     id="defs3404"> 
    4821    <marker 
    4922       inkscape:stockid="Arrow2Lstart" 
     
    5124       refY="0" 
    5225       refX="0" 
    53        id="Arrow2Lstart-2" 
     26       id="Arrow2Lstart-2-8-8-6" 
    5427       style="overflow:visible" 
    5528       inkscape:isstock="true"> 
    5629      <path 
    5730         inkscape:connector-curvature="0" 
    58          id="path6664-1" 
     31         id="path6664-1-8-8-0" 
     32         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1" 
     33         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" 
     34         transform="matrix(1.1,0,0,1.1,1.1,0)" /> 
     35    </marker> 
     36    <marker 
     37       inkscape:stockid="Arrow2Lstart" 
     38       orient="auto" 
     39       refY="0" 
     40       refX="0" 
     41       id="Arrow2Lstart-2-9-2-8" 
     42       style="overflow:visible" 
     43       inkscape:isstock="true"> 
     44      <path 
     45         inkscape:connector-curvature="0" 
     46         id="path6664-1-1-3-1" 
     47         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1" 
     48         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" 
     49         transform="matrix(1.1,0,0,1.1,1.1,0)" /> 
     50    </marker> 
     51    <marker 
     52       inkscape:stockid="Arrow2Lstart" 
     53       orient="auto" 
     54       refY="0" 
     55       refX="0" 
     56       id="Arrow2Lstart-2-8-8" 
     57       style="overflow:visible" 
     58       inkscape:isstock="true"> 
     59      <path 
     60         inkscape:connector-curvature="0" 
     61         id="path6664-1-8-8" 
     62         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1" 
     63         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" 
     64         transform="matrix(1.1,0,0,1.1,1.1,0)" /> 
     65    </marker> 
     66    <marker 
     67       inkscape:stockid="Arrow2Lstart" 
     68       orient="auto" 
     69       refY="0" 
     70       refX="0" 
     71       id="Arrow2Lstart-2-9-2" 
     72       style="overflow:visible" 
     73       inkscape:isstock="true"> 
     74      <path 
     75         inkscape:connector-curvature="0" 
     76         id="path6664-1-1-3" 
     77         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1" 
     78         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" 
     79         transform="matrix(1.1,0,0,1.1,1.1,0)" /> 
     80    </marker> 
     81    <marker 
     82       inkscape:stockid="Arrow2Lstart" 
     83       orient="auto" 
     84       refY="0" 
     85       refX="0" 
     86       id="Arrow2Lstart-2-9" 
     87       style="overflow:visible" 
     88       inkscape:isstock="true"> 
     89      <path 
     90         inkscape:connector-curvature="0" 
     91         id="path6664-1-1" 
    5992         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1" 
    6093         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" 
     
    81114       refY="0" 
    82115       refX="0" 
    83        id="Arrow2Lstart-2-9" 
     116       id="Arrow2Lstart-2" 
    84117       style="overflow:visible" 
    85118       inkscape:isstock="true"> 
    86119      <path 
    87120         inkscape:connector-curvature="0" 
    88          id="path6664-1-1" 
     121         id="path6664-1" 
    89122         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1" 
    90123         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" 
     
    96129       refY="0" 
    97130       refX="0" 
    98        id="Arrow2Lstart-2-9-2" 
     131       id="Arrow2Lstart" 
    99132       style="overflow:visible" 
    100133       inkscape:isstock="true"> 
    101134      <path 
    102135         inkscape:connector-curvature="0" 
    103          id="path6664-1-1-3" 
    104          style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1" 
    105          d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" 
    106          transform="matrix(1.1,0,0,1.1,1.1,0)" /> 
    107     </marker> 
    108     <marker 
    109        inkscape:stockid="Arrow2Lstart" 
    110        orient="auto" 
    111        refY="0" 
    112        refX="0" 
    113        id="Arrow2Lstart-2-8-8" 
    114        style="overflow:visible" 
    115        inkscape:isstock="true"> 
    116       <path 
    117          inkscape:connector-curvature="0" 
    118          id="path6664-1-8-8" 
    119          style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1" 
    120          d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" 
    121          transform="matrix(1.1,0,0,1.1,1.1,0)" /> 
    122     </marker> 
    123     <marker 
    124        inkscape:stockid="Arrow2Lstart" 
    125        orient="auto" 
    126        refY="0" 
    127        refX="0" 
    128        id="Arrow2Lstart-2-9-2-8" 
    129        style="overflow:visible" 
    130        inkscape:isstock="true"> 
    131       <path 
    132          inkscape:connector-curvature="0" 
    133          id="path6664-1-1-3-1" 
    134          style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1" 
    135          d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" 
    136          transform="matrix(1.1,0,0,1.1,1.1,0)" /> 
    137     </marker> 
    138     <marker 
    139        inkscape:stockid="Arrow2Lstart" 
    140        orient="auto" 
    141        refY="0" 
    142        refX="0" 
    143        id="Arrow2Lstart-2-8-8-6" 
    144        style="overflow:visible" 
    145        inkscape:isstock="true"> 
    146       <path 
    147          inkscape:connector-curvature="0" 
    148          id="path6664-1-8-8-0" 
     136         id="path6664" 
    149137         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1" 
    150138         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z" 
     
    160148     inkscape:pageshadow="2" 
    161149     inkscape:zoom="0.7" 
    162      inkscape:cx="281.24246" 
    163      inkscape:cy="703.19328" 
     150     inkscape:cx="229.97759" 
     151     inkscape:cy="567.58817" 
    164152     inkscape:document-units="mm" 
    165      inkscape:current-layer="g6386" 
     153     inkscape:current-layer="layer1" 
    166154     showgrid="false" 
    167      inkscape:snap-text-baseline="true" 
    168155     inkscape:window-width="1680" 
    169156     inkscape:window-height="988" 
    170157     inkscape:window-x="-8" 
    171158     inkscape:window-y="-8" 
    172      inkscape:window-maximized="1" 
    173      inkscape:snap-smooth-nodes="true" /> 
     159     inkscape:window-maximized="1" /> 
    174160  <metadata 
    175      id="metadata5"> 
     161     id="metadata3407"> 
    176162    <rdf:RDF> 
    177163      <cc:Work 
     
    180166        <dc:type 
    181167           rdf:resource="http://purl.org/dc/dcmitype/StillImage" /> 
    182         <dc:title /> 
     168        <dc:title></dc:title> 
    183169      </cc:Work> 
    184170    </rdf:RDF> 
     
    188174     inkscape:groupmode="layer" 
    189175     id="layer1"> 
    190     <rect 
    191        id="rect3713" 
    192        width="55.5625" 
    193        height="65.767853" 
    194        x="7.5595236" 
    195        y="8.9821434" 
    196        style="stroke-width:0.26458332;fill:#000000;fill-opacity:1;opacity:0;stroke:#000000;stroke-opacity:1" /> 
    197176    <g 
    198        id="g5994"> 
    199       <text 
    200          xml:space="preserve" 
    201          style="font-style:normal;font-weight:normal;font-size:10.58333302px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332" 
    202          x="36.663689" 
    203          y="24.440805" 
    204          id="text6012"><tspan 
    205            sodipodi:role="line" 
    206            id="tspan6010" 
    207            x="36.663689" 
    208            y="33.804573" 
    209            style="stroke-width:0.26458332" /><tspan 
    210            sodipodi:role="line" 
    211            x="36.663689" 
    212            y="47.033741" 
    213            style="stroke-width:0.26458332" 
    214            id="tspan6014" /></text> 
    215       <text 
    216          xml:space="preserve" 
    217          style="font-style:normal;font-weight:normal;font-size:10.58333302px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332" 
    218          x="39.6875" 
    219          y="47.913689" 
    220          id="text6018"><tspan 
    221            sodipodi:role="line" 
    222            id="tspan6016" 
    223            x="39.6875" 
    224            y="57.277458" 
    225            style="stroke-width:0.26458332" /></text> 
     177       id="g13905" 
     178       transform="translate(-2.6849314,-0.31280524)"> 
     179      <rect 
     180         y="8.9821434" 
     181         x="36.174416" 
     182         height="32.630665" 
     183         width="26.947613" 
     184         id="rect5954" 
     185         style="opacity:1;fill:#64e000;fill-opacity:0;stroke:#000000;stroke-width:0.12860805;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" /> 
    226186      <flowRoot 
    227187         xml:space="preserve" 
    228          id="flowRoot6092" 
    229          style="fill:black;fill-opacity:1;stroke:none;font-family:sans-serif;font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;letter-spacing:0px;word-spacing:0px"><flowRegion 
    230            id="flowRegion6094"><rect 
    231              id="rect6096" 
    232              width="11.428572" 
    233              height="140" 
    234              x="410" 
    235              y="195.37683" /></flowRegion><flowPara 
    236            id="flowPara6098" /></flowRoot>      <flowRoot 
    237          xml:space="preserve" 
    238          id="flowRoot6100" 
    239          style="fill:black;fill-opacity:1;stroke:none;font-family:sans-serif;font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;letter-spacing:0px;word-spacing:0px"><flowRegion 
    240            id="flowRegion6102"><rect 
    241              id="rect6104" 
    242              width="15.714286" 
    243              height="54.285713" 
    244              x="405.71429" 
    245              y="189.66254" /></flowRegion><flowPara 
    246            id="flowPara6106" /></flowRoot>      <flowRoot 
    247          xml:space="preserve" 
    248          id="flowRoot6108" 
    249          style="fill:black;fill-opacity:1;stroke:none;font-family:sans-serif;font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;letter-spacing:0px;word-spacing:0px"><flowRegion 
    250            id="flowRegion6110"><rect 
    251              id="rect6112" 
    252              width="38.57143" 
    253              height="47.142857" 
    254              x="407.14285" 
    255              y="193.94826" /></flowRegion><flowPara 
    256            id="flowPara6114" /></flowRoot>      <g 
    257          id="g6386" 
    258          transform="translate(-3.8146973e-6)"> 
    259         <flowRoot 
    260            transform="matrix(0.26458333,0,0,0.26458333,-33.04705,-17.589917)" 
    261            style="font-style:normal;font-weight:normal;font-size:40px;line-height:1;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;filter:url(#filter6144)" 
    262            id="flowRoot6080" 
    263            xml:space="preserve"><flowRegion 
    264              style="line-height:1" 
    265              id="flowRegion6082"><rect 
    266                style="line-height:1" 
    267                y="106.8054" 
    268                x="262.85715" 
    269                height="208.57144" 
    270                width="107.14282" 
    271                id="rect6084" /></flowRegion><flowPara 
    272              style="font-size:24px;line-height:1" 
    273              id="flowPara6088" /><flowPara 
    274              id="flowPara6090" /></flowRoot>        <g 
    275            id="g13905"> 
    276           <rect 
    277              y="8.9821434" 
    278              x="36.174416" 
    279              height="32.630665" 
    280              width="26.947613" 
    281              id="rect5954" 
    282              style="opacity:1;fill:#64e000;fill-opacity:0;stroke:#000000;stroke-width:0.12860805;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" /> 
    283           <flowRoot 
    284              xml:space="preserve" 
    285              id="flowRoot6219" 
    286              style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:24px;line-height:1.25;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none" 
    287              transform="matrix(0.26458333,0,0,0.26458333,20.323189,-24.166702)"><flowRegion 
    288                id="flowRegion6221"><rect 
    289                  id="rect6223" 
    290                  width="54.285713" 
    291                  height="37.142857" 
    292                  x="310" 
    293                  y="185.37683" /></flowRegion><flowPara 
    294                id="flowPara6225">...</flowPara></flowRoot>          <flowRoot 
    295              transform="matrix(0.26458333,0,0,0.26458333,1.8898848,4.5357143)" 
    296              style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:18.66666603px;line-height:1.25;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none" 
    297              id="flowRoot13798" 
    298              xml:space="preserve"><flowRegion 
    299                id="flowRegion13800"><rect 
    300                  y="33.948257" 
    301                  x="136.72218" 
    302                  height="106.18565" 
    303                  width="94.70639" 
    304                  id="rect13802" /></flowRegion><flowPara 
    305                id="flowPara13804">CC Sep 2018</flowPara><flowPara 
    306                id="flowPara13806">Columnar Index</flowPara><flowPara 
    307                id="flowPara13808" /></flowRoot>          <rect 
    308              y="8.8441896" 
    309              x="69.302986" 
    310              height="32.630665" 
    311              width="26.947613" 
    312              id="rect5954-4" 
    313              style="opacity:1;fill:#64e000;fill-opacity:0;stroke:#000000;stroke-width:0.12860805;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" /> 
    314           <flowRoot 
    315              transform="matrix(0.26458333,0,0,0.26458333,35.018452,4.3977563)" 
    316              style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:18.66666603px;line-height:1.25;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none" 
    317              id="flowRoot13798-0" 
    318              xml:space="preserve"><flowRegion 
    319                id="flowRegion13800-2"><rect 
    320                  y="33.948257" 
    321                  x="136.72218" 
    322                  height="106.18565" 
    323                  width="94.70639" 
    324                  id="rect13802-0" /></flowRegion><flowPara 
    325                id="flowPara13804-8">CC Oct 2018</flowPara><flowPara 
    326                id="flowPara13806-1">Columnar Index</flowPara><flowPara 
    327                id="flowPara13808-6" /></flowRoot>          <rect 
    328              y="8.9197874" 
    329              x="113.52618" 
    330              height="32.630665" 
    331              width="26.947613" 
    332              id="rect5954-1" 
    333              style="opacity:1;fill:#64e000;fill-opacity:0;stroke:#000000;stroke-width:0.12860805;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" /> 
    334           <flowRoot 
    335              transform="matrix(0.26458333,0,0,0.26458333,79.241667,4.4733578)" 
    336              style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:18.66666603px;line-height:1.25;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none" 
    337              id="flowRoot13798-09" 
    338              xml:space="preserve"><flowRegion 
    339                id="flowRegion13800-0"><rect 
    340                  y="33.948257" 
    341                  x="136.72218" 
    342                  height="106.18565" 
    343                  width="94.70639" 
    344                  id="rect13802-07" /></flowRegion><flowPara 
    345                id="flowPara13876">CC Aug 2019</flowPara><flowPara 
    346                id="flowPara13806-6">Columnar Index</flowPara><flowPara 
    347                id="flowPara13808-1" /></flowRoot>        </g> 
    348       </g> 
    349     </g> 
    350     <flowRoot 
    351        xml:space="preserve" 
    352        id="flowRoot5970" 
    353        style="fill:black;fill-opacity:1;stroke:none;font-family:sans-serif;font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;letter-spacing:0px;word-spacing:0px"><flowRegion 
    354          id="flowRegion5972"><rect 
    355            id="rect5974" 
    356            width="312.85715" 
    357            height="85.714287" 
    358            x="154.28572" 
    359            y="568.23395" /></flowRegion><flowPara 
    360          id="flowPara5976" /></flowRoot>    <flowRoot 
    361        xml:space="preserve" 
    362        id="flowRoot5980" 
    363        style="fill:black;fill-opacity:1;stroke:none;font-family:sans-serif;font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;letter-spacing:0px;word-spacing:0px"><flowRegion 
    364          id="flowRegion5982"><rect 
    365            id="rect5984" 
    366            width="128.57143" 
    367            height="78.571426" 
    368            x="1.4285715" 
    369            y="65.376831" /></flowRegion><flowPara 
    370          id="flowPara5986" /></flowRoot>    <flowRoot 
    371        xml:space="preserve" 
    372        id="flowRoot6469" 
    373        style="fill:black;fill-opacity:1;stroke:none;font-family:Arial;font-style:normal;font-weight:normal;font-size:24px;line-height:1.25;letter-spacing:0px;word-spacing:0px;-inkscape-font-specification:'Arial, Normal';font-stretch:normal;font-variant:normal;text-anchor:start;text-align:start;writing-mode:lr;font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal"><flowRegion 
    374          id="flowRegion6471"><rect 
    375            id="rect6473" 
    376            width="192.85715" 
    377            height="91.428566" 
    378            x="90.714287" 
    379            y="612.51971" /></flowRegion><flowPara 
    380          id="flowPara6475" /></flowRoot>    <g 
     188         id="flowRoot6219" 
     189         style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:24px;line-height:1.25;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none" 
     190         transform="matrix(0.26458333,0,0,0.26458333,20.323189,-24.166702)"><flowRegion 
     191           id="flowRegion6221"><rect 
     192             id="rect6223" 
     193             width="54.285713" 
     194             height="37.142857" 
     195             x="310" 
     196             y="185.37683" /></flowRegion><flowPara 
     197           id="flowPara6225">...</flowPara></flowRoot>      <flowRoot 
     198         transform="matrix(0.26458333,0,0,0.26458333,1.8898848,4.5357143)" 
     199         style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:18.66666603px;line-height:1.25;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none" 
     200         id="flowRoot13798" 
     201         xml:space="preserve"><flowRegion 
     202           id="flowRegion13800"><rect 
     203             y="33.948257" 
     204             x="136.72218" 
     205             height="106.18565" 
     206             width="94.70639" 
     207             id="rect13802" /></flowRegion><flowPara 
     208           id="flowPara13804">CC Sep 2018</flowPara><flowPara 
     209           id="flowPara13806">Columnar Index</flowPara><flowPara 
     210           id="flowPara13808" /></flowRoot>      <rect 
     211         y="8.8441896" 
     212         x="69.302986" 
     213         height="32.630665" 
     214         width="26.947613" 
     215         id="rect5954-4" 
     216         style="opacity:1;fill:#64e000;fill-opacity:0;stroke:#000000;stroke-width:0.12860805;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" /> 
     217      <flowRoot 
     218         transform="matrix(0.26458333,0,0,0.26458333,35.018452,4.3977563)" 
     219         style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:18.66666603px;line-height:1.25;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none" 
     220         id="flowRoot13798-0" 
     221         xml:space="preserve"><flowRegion 
     222           id="flowRegion13800-2"><rect 
     223             y="33.948257" 
     224             x="136.72218" 
     225             height="106.18565" 
     226             width="94.70639" 
     227             id="rect13802-0" /></flowRegion><flowPara 
     228           id="flowPara13804-8">CC Oct 2018</flowPara><flowPara 
     229           id="flowPara13806-1">Columnar Index</flowPara><flowPara 
     230           id="flowPara13808-6" /></flowRoot>      <rect 
     231         y="8.9197874" 
     232         x="113.52618" 
     233         height="32.630665" 
     234         width="26.947613" 
     235         id="rect5954-1" 
     236         style="opacity:1;fill:#64e000;fill-opacity:0;stroke:#000000;stroke-width:0.12860805;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" /> 
     237      <flowRoot 
     238         transform="matrix(0.26458333,0,0,0.26458333,79.241667,4.4733578)" 
     239         style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:18.66666603px;line-height:1.25;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none" 
     240         id="flowRoot13798-09" 
     241         xml:space="preserve"><flowRegion 
     242           id="flowRegion13800-0"><rect 
     243             y="33.948257" 
     244             x="136.72218" 
     245             height="106.18565" 
     246             width="94.70639" 
     247             id="rect13802-07" /></flowRegion><flowPara 
     248           id="flowPara13876">CC Aug 2019</flowPara><flowPara 
     249           id="flowPara13806-6">Columnar Index</flowPara><flowPara 
     250           id="flowPara13808-1" /></flowRoot>    </g> 
     251    <g 
     252       transform="translate(-2.6849314,-0.31280524)" 
    381253       id="g8700"> 
    382254      <flowRoot 
     
    502374    <path 
    503375       style="fill:none;stroke:#000000;stroke-width:0.64353597px;stroke-linecap:butt;stroke-linejoin:round;stroke-opacity:1;marker-start:url(#Arrow2Lstart)" 
    504        d="M 91.563371,63.532871 C 91.031116,43.490147 92.909569,43.490147 92.909569,43.490147" 
     376       d="M 88.878446,63.220066 C 88.346192,43.177342 90.224644,43.177342 90.224644,43.177342" 
    505377       id="path6644" 
    506378       inkscape:connector-curvature="0" /> 
    507379    <text 
    508380       xml:space="preserve" 
    509        style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:6.3499999px;line-height:1.25;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332" 
    510        x="169.53333" 
    511        y="61.083336" 
    512        id="text8520"><tspan 
    513          sodipodi:role="line" 
    514          id="tspan8518" 
    515          x="169.53333" 
    516          y="66.803123" 
    517          style="stroke-width:0.26458332" /><tspan 
    518          sodipodi:role="line" 
    519          x="169.53333" 
    520          y="74.740623" 
    521          style="stroke-width:0.26458332" 
    522          id="tspan8526" /><tspan 
    523          sodipodi:role="line" 
    524          x="169.53333" 
    525          y="82.678123" 
    526          style="stroke-width:0.26458332" 
    527          id="tspan8522" /><tspan 
    528          sodipodi:role="line" 
    529          x="169.53333" 
    530          y="90.615623" 
    531          style="stroke-width:0.26458332" 
    532          id="tspan8524" /></text> 
    533     <flowRoot 
    534        xml:space="preserve" 
    535        id="flowRoot8528" 
    536        style="fill:black;fill-opacity:1;stroke:none;font-family:Arial;font-style:normal;font-weight:normal;font-size:24px;line-height:1.25;letter-spacing:0px;word-spacing:0px;-inkscape-font-specification:'Arial, Normal';font-stretch:normal;font-variant:normal;text-anchor:start;text-align:start;writing-mode:lr;font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal"><flowRegion 
    537          id="flowRegion8530"><rect 
    538            id="rect8532" 
    539            width="210.83565" 
    540            height="59.692326" 
    541            x="372.02237" 
    542            y="209.24846" /></flowRegion><flowPara 
    543          id="flowPara8534" /></flowRoot>    <flowRoot 
    544        xml:space="preserve" 
    545        id="flowRoot8538" 
    546        style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:24px;line-height:1.25;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none" 
    547        transform="scale(0.26458333)"><flowRegion 
    548          id="flowRegion8540"><rect 
    549            id="rect8542" 
    550            width="152.53304" 
    551            height="46.467018" 
    552            x="390.92905" 
    553            y="231.56514" /></flowRegion><flowPara 
    554          id="flowPara8544" /></flowRoot>    <text 
    555        xml:space="preserve" 
    556381       style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.93888855px;line-height:1;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:center;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332" 
    557        x="126.03919" 
    558        y="49.118912" 
     382       x="123.35426" 
     383       y="48.80611" 
    559384       id="text8548"><tspan 
    560385         sodipodi:role="line" 
    561          x="126.03919" 
    562          y="49.118912" 
     386         x="123.35426" 
     387         y="48.80611" 
    563388         style="line-height:1;text-align:center;text-anchor:middle;stroke-width:0.26458332" 
    564389         id="tspan8550">content_languages = 'mri'</tspan><tspan 
    565390         sodipodi:role="line" 
    566          x="126.03919" 
    567          y="54.0578" 
     391         x="123.35426" 
     392         y="53.744999" 
    568393         style="line-height:1;text-align:center;text-anchor:middle;stroke-width:0.26458332" 
    569394         id="tspan8558">+</tspan><tspan 
    570395         sodipodi:role="line" 
    571          x="126.03919" 
    572          y="58.996689" 
     396         x="123.35426" 
     397         y="58.683887" 
    573398         style="line-height:1;text-align:center;text-anchor:middle;stroke-width:0.26458332" 
    574399         id="tspan8560">warc to wet</tspan></text> 
    575400    <flowRoot 
    576        transform="matrix(0.26458333,0,0,0.26458333,43.156253,110.17777)" 
     401       transform="matrix(0.26458333,0,0,0.26458333,40.471328,109.86496)" 
    577402       style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:24px;line-height:1.25;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none" 
    578403       id="flowRoot6219-8" 
     
    586411         id="flowPara6225-5">...</flowPara></flowRoot>    <g 
    587412       id="g8667" 
    588        transform="translate(-4.1058568,21.78947)"> 
     413       transform="translate(-6.7907814,21.476664)"> 
    589414      <rect 
    590415         y="124.0766" 
     
    616441    <g 
    617442       id="g8709" 
    618        transform="translate(8.9474833,31.976784)"> 
     443       transform="translate(6.2625586,31.663979)"> 
    619444      <g 
    620445         id="g8744"> 
     
    649474    <g 
    650475       id="g8658" 
    651        transform="translate(-9.8520303,-1.5010822)"> 
     476       transform="translate(-12.536961,-1.8138882)"> 
    652477      <rect 
    653478         y="153.60989" 
     
    679504    <path 
    680505       style="fill:none;stroke:#000000;stroke-width:0.62406325px;stroke-linecap:butt;stroke-linejoin:round;stroke-opacity:1;marker-start:url(#Arrow2Lstart-2)" 
    681        d="M 81.998023,141.73889 C 81.457172,123.19052 83.36598,123.19052 83.36598,123.19052" 
     506       d="m 79.313097,141.42609 c -0.54085,-18.54837 1.367957,-18.54837 1.367957,-18.54837" 
    682507       id="path6644-5" 
    683508       inkscape:connector-curvature="0" /> 
    684509    <path 
    685510       style="fill:none;stroke:#000000;stroke-width:0.64353597px;stroke-linecap:butt;stroke-linejoin:round;stroke-opacity:1;marker-start:url(#Arrow2Lstart-2-8)" 
    686        d="M 94.763655,112.71836 C 111.855,102.23609 112.79422,103.86287 112.79422,103.86287" 
     511       d="m 92.07873,112.40555 c 17.09134,-10.48227 18.03057,-8.85549 18.03057,-8.85549" 
    687512       id="path6644-5-4" 
    688513       inkscape:connector-curvature="0" /> 
    689514    <path 
    690515       style="fill:none;stroke:#000000;stroke-width:0.59350747px;stroke-linecap:butt;stroke-linejoin:round;stroke-opacity:1;marker-start:url(#Arrow2Lstart-2-9)" 
    691        d="m 81.814159,111.48634 c -0.53225,-17.047567 1.3462,-17.047567 1.3462,-17.047567" 
     516       d="m 79.129233,111.17354 c -0.532249,-17.047571 1.3462,-17.047571 1.3462,-17.047571" 
    692517       id="path6644-5-5" 
    693518       inkscape:connector-curvature="0" /> 
     
    696521       id="flowRoot10549" 
    697522       style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.93888855px;line-height:1.25;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332" 
    698        transform="translate(-4.5959653,-47.786576)"><flowRegion 
     523       transform="translate(-7.2808914,-48.099385)"><flowRegion 
    699524         id="flowRegion10551" 
    700525         style="stroke-width:0.26458332"><rect 
     
    710535         style="stroke-width:0.26458332">sites needing custom handling</flowPara></flowRoot>    <g 
    711536       id="g14180" 
    712        transform="translate(-1.6508314,16.447997)"> 
     537       transform="translate(-4.3357614,16.135192)"> 
    713538      <g 
    714539         transform="translate(8.0502882,-22.849676)" 
     
    824649       id="flowRoot10757" 
    825650       style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:18.66666603px;line-height:1.25;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:center;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none" 
    826        transform="matrix(0.26458333,0,0,0.26458333,5.1165915,-20.486313)"><flowRegion 
     651       transform="matrix(0.26458333,0,0,0.26458333,2.4316686,-20.799115)"><flowRegion 
    827652         id="flowRegion10759" 
    828653         style="text-align:center;text-anchor:middle"><rect 
     
    833658           y="509.66254" 
    834659           style="text-align:center;text-anchor:middle" /></flowRegion><flowPara 
    835          id="flowPara10763">CCWETProcessor.java</flowPara></flowRoot>    <flowRoot 
    836        xml:space="preserve" 
    837        id="flowRoot11618" 
    838        style="fill:black;fill-opacity:1;stroke:none;font-family:Arial;font-style:normal;font-weight:normal;font-size:18.66666667px;line-height:1.25;letter-spacing:0px;word-spacing:0px;-inkscape-font-specification:'Arial, Normal';font-stretch:normal;font-variant:normal;text-anchor:start;text-align:start;writing-mode:lr;font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal"><flowRegion 
    839          id="flowRegion11620"><rect 
    840            id="rect11622" 
    841            width="294.28571" 
    842            height="95.714287" 
    843            x="255.71428" 
    844            y="806.80542" /></flowRegion><flowPara 
    845          id="flowPara11624" /></flowRoot>    <flowRoot 
    846        xml:space="preserve" 
    847        id="flowRoot11626" 
    848        style="fill:black;fill-opacity:1;stroke:none;font-family:Arial;font-style:normal;font-weight:normal;font-size:24px;line-height:1.25;letter-spacing:0px;word-spacing:0px;-inkscape-font-specification:'Arial, Normal';font-stretch:normal;font-variant:normal;text-anchor:start;text-align:start;writing-mode:lr;font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal"><flowRegion 
    849          id="flowRegion11628"><rect 
    850            id="rect11630" 
    851            width="178.57143" 
    852            height="70" 
    853            x="364.28571" 
    854            y="835.37683" /></flowRegion><flowPara 
    855          id="flowPara11632" /></flowRoot>    <flowRoot 
    856        xml:space="preserve" 
    857        id="flowRoot11636" 
    858        style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:24px;line-height:1.25;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none" 
    859        transform="matrix(0.26458333,0,0,0.26458333,41.665157,-34.992356)"><flowRegion 
    860          id="flowRegion11638"><rect 
    861            id="rect11640" 
    862            width="174.28572" 
    863            height="61.42857" 
    864            x="340" 
    865            y="826.80542" /></flowRegion><flowPara 
    866          id="flowPara11642" /><flowPara 
    867          id="flowPara11644" /></flowRoot>    <rect 
    868        style="opacity:1;fill:#ffffff;fill-opacity:0.98412697;stroke:#000000;stroke-width:1.00157475;stroke-linejoin:bevel;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1" 
    869        id="rect12432" 
    870        width="0" 
    871        height="0" 
    872        x="0" 
    873        y="0" 
    874        transform="scale(0.26458333)" /> 
    875     <flowRoot 
    876        xml:space="preserve" 
    877        id="flowRoot12548" 
    878        style="fill:#000000;fill-opacity:0;stroke:none;font-family:Arial;font-style:normal;font-weight:normal;font-size:24px;line-height:1.25;letter-spacing:0px;word-spacing:0px;-inkscape-font-specification:'Arial, Normal';font-stretch:normal;font-variant:normal;text-anchor:start;text-align:start;writing-mode:lr;font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;"><flowRegion 
    879          id="flowRegion12550" 
    880          style="fill:#000000;fill-opacity:0;"><rect 
    881            id="rect12552" 
    882            width="96.564629" 
    883            height="62.566341" 
    884            x="432.34528" 
    885            y="875.03229" 
    886            style="fill:#000000;fill-opacity:0;" /></flowRegion><flowPara 
    887          id="flowPara12554" /></flowRoot>    <flowRoot 
    888        xml:space="preserve" 
    889        id="flowRoot12556" 
    890        style="fill:black;fill-opacity:1;stroke:none;font-family:Arial;font-style:normal;font-weight:normal;font-size:18.66666667px;line-height:1.25;letter-spacing:0px;word-spacing:0px;-inkscape-font-specification:'Arial, Normal';font-stretch:normal;font-variant:normal;text-anchor:start;text-align:start;writing-mode:lr;font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal"><flowRegion 
    891          id="flowRegion12558"><rect 
    892            id="rect12560" 
    893            width="97.069702" 
    894            height="66.101875" 
    895            x="431.84021" 
    896            y="871.49677" /></flowRegion><flowPara 
    897          id="flowPara12562" /></flowRoot>    <g 
     660         id="flowPara10763">CCWETProcessor.java</flowPara></flowRoot>    <g 
    898661       id="g12584" 
    899        transform="translate(35.409039,-23.306338)"> 
     662       transform="translate(32.724113,-23.619145)"> 
    900663      <g 
    901664         transform="translate(59.46747,-6.5481034)" 
     
    940703    <path 
    941704       style="fill:none;stroke:#000000;stroke-width:0.61500657px;stroke-linecap:butt;stroke-linejoin:round;stroke-opacity:1;marker-start:url(#Arrow2Lstart-2-9-2)" 
    942        d="m 161.40032,200.57914 c -0.49158,-19.81955 1.24333,-19.81955 1.24333,-19.81955" 
     705       d="m 158.71539,200.26634 c -0.49158,-19.81955 1.24333,-19.81955 1.24333,-19.81955" 
    943706       id="path6644-5-5-7" 
    944707       inkscape:connector-curvature="0" /> 
    945708    <path 
    946709       style="fill:none;stroke:#000000;stroke-width:0.78573805px;stroke-linecap:butt;stroke-linejoin:round;stroke-opacity:1;marker-start:url(#Arrow2Lstart-2-8-8)" 
    947        d="m 110.10792,211.95692 c 30.18095,-0.52693 30.18093,1.33273 30.18093,1.33273" 
     710       d="m 107.42299,211.64412 c 30.18095,-0.52693 30.18093,1.33273 30.18093,1.33273" 
    948711       id="path6644-5-4-0" 
    949712       inkscape:connector-curvature="0" /> 
    950     <flowRoot 
    951        xml:space="preserve" 
    952        id="flowRoot13119" 
    953        style="fill:black;fill-opacity:1;stroke:none;font-family:Arial;font-style:normal;font-weight:normal;font-size:18.66666667px;line-height:1.25;letter-spacing:0px;word-spacing:0px;-inkscape-font-specification:'Arial, Normal';font-stretch:normal;font-variant:normal;text-anchor:start;text-align:start;writing-mode:lr;font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal"><flowRegion 
    954          id="flowRegion13121"><rect 
    955            id="rect13123" 
    956            width="72.730934" 
    957            height="19.19297" 
    958            x="79.814285" 
    959            y="893.01202" /></flowRegion><flowPara 
    960          id="flowPara13125" /></flowRoot>    <g 
     713    <g 
    961714       id="g13297" 
    962        transform="translate(9.4494046,-25.713581)"> 
     715       transform="translate(6.7644786,-26.026385)"> 
    963716      <g 
    964717         transform="translate(-80.712161,97.488904)" 
     
    1089842    <g 
    1090843       id="g12584-4" 
    1091        transform="translate(-83.111041,35.25929)"> 
     844       transform="translate(-85.795971,34.946485)"> 
    1092845      <g 
    1093846         transform="translate(59.46747,-6.5481034)" 
     
    1133886       xml:space="preserve" 
    1134887       style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.93888855px;line-height:1.25;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332" 
    1135        x="128.51186" 
    1136        y="186.63091" 
     888       x="125.82693" 
     889       y="186.3181" 
    1137890       id="text14721"><tspan 
    1138891         sodipodi:role="line" 
    1139892         id="tspan14719" 
    1140          x="128.51186" 
    1141          y="186.63091" 
     893         x="125.82693" 
     894         y="186.3181" 
    1142895         style="stroke-width:0.26458332">Crawl with </tspan><tspan 
    1143896         sodipodi:role="line" 
    1144          x="128.51186" 
    1145          y="192.80452" 
     897         x="125.82693" 
     898         y="192.4917" 
    1146899         style="stroke-width:0.26458332" 
    1147900         id="tspan15630">Apache Nutch</tspan></text> 
    1148901    <path 
    1149902       style="fill:none;stroke:#000000;stroke-width:0.62741137px;stroke-linecap:butt;stroke-linejoin:round;stroke-opacity:1;marker-start:url(#Arrow2Lstart-2-9-2-8)" 
    1150        d="m 41.504639,258.88526 c -0.418819,-24.21071 1.059298,-24.21071 1.059298,-24.21071" 
     903       d="m 38.819713,258.57246 c -0.418819,-24.21071 1.059299,-24.21071 1.059299,-24.21071" 
    1151904       id="path6644-5-5-7-9" 
    1152905       inkscape:connector-curvature="0" /> 
     
    1155908       id="flowRoot15304" 
    1156909       style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:18.66666603px;line-height:1.25;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none" 
    1157        transform="matrix(0.26458333,0,0,0.26458333,-9.0714287,-1.889881)"><flowRegion 
     910       transform="matrix(0.26458333,0,0,0.26458333,-11.756351,-2.2026872)"><flowRegion 
    1158911         id="flowRegion15306"><rect 
    1159912           id="rect15308" 
     
    1167920       id="flowRoot10757-0" 
    1168921       style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:18.66666603px;line-height:1.25;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none" 
    1169        transform="matrix(0.26458333,0,0,0.26458333,9.8528943,103.04083)"><flowRegion 
     922       transform="matrix(0.26458333,0,0,0.26458333,7.1679686,102.72802)"><flowRegion 
    1170923         id="flowRegion10759-1" 
    1171924         style="text-align:start;text-anchor:start"><rect 
     
    1178931         id="flowPara10763-6">NutchTextDumpToMongoDB.java</flowPara><flowPara 
    1179932         id="flowPara15752">- compute + store site and page level meta</flowPara><flowPara 
    1180          id="flowPara15748">- store full text per web page</flowPara></flowRoot>    <flowRoot 
    1181        xml:space="preserve" 
    1182        id="flowRoot15734" 
    1183        style="fill:black;fill-opacity:1;stroke:none;font-family:Arial;font-style:normal;font-weight:normal;font-size:18.66666667px;line-height:1.25;letter-spacing:0px;word-spacing:0px;-inkscape-font-specification:'Arial, Normal';font-stretch:normal;font-variant:normal;text-anchor:start;text-align:start;writing-mode:lr;font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal"><flowRegion 
    1184          id="flowRegion15736"><rect 
    1185            id="rect15738" 
    1186            width="184.28572" 
    1187            height="52.312145" 
    1188            x="345.71429" 
    1189            y="944.49323" /></flowRegion><flowPara 
    1190          id="flowPara15740" /></flowRoot>    <ellipse 
     933         id="flowPara15748">- store full text per web page</flowPara></flowRoot>    <ellipse 
    1191934       style="opacity:1;fill:#000000;fill-opacity:0;stroke:#000000;stroke-width:0.26499999;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1" 
    1192935       id="path15809" 
    1193        cx="144.57588" 
    1194        cy="271.4866" 
     936       cx="141.89096" 
     937       cy="271.1738" 
    1195938       rx="41.766369" 
    1196939       ry="14.930058" /> 
    1197940    <path 
    1198941       style="fill:none;stroke:#000000;stroke-width:0.78573805px;stroke-linecap:butt;stroke-linejoin:round;stroke-opacity:1;marker-start:url(#Arrow2Lstart-2-8-8-6)" 
    1199        d="m 91.922716,272.44387 c -30.180951,-0.52693 -30.18093,1.33273 -30.18093,1.33273" 
     942       d="M 89.23779,272.13107 C 59.056841,271.60414 59.056862,273.4638 59.056862,273.4638" 
    1200943       id="path6644-5-4-0-2" 
    1201944       inkscape:connector-curvature="0" /> 
    1202945    <flowRoot 
     946       transform="matrix(0.26458333,0,0,0.26458333,-2.6849314,-0.31280524)" 
    1203947       xml:space="preserve" 
    1204948       id="flowRoot16118" 
    1205        style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:18.66666603px;line-height:1.25;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:center;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none" 
    1206        transform="scale(0.26458333)"><flowRegion 
     949       style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:18.66666603px;line-height:1.25;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:center;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none"><flowRegion 
    1207950         id="flowRegion16120" 
    1208951         style="text-align:center;text-anchor:middle"><rect