Changeset 33856 for other-projects


Ignore:
Timestamp:
2020-01-22T16:33:31+13:00 (4 years ago)
Author:
ak19
Message:

Forgot to commit. Last week, Dr Bainbridge had properly cropped the SVG flow chart of the common crawl to mongodb process in Inkscape. He'd also exported a PDF of it cropped to the bounds, which I'm also adding to SVN now.

Location:
other-projects/maori-lang-detection/journal-paper
Files:
1 added
1 edited

Legend:

Unmodified
Added
Removed
  • other-projects/maori-lang-detection/journal-paper/CommonCrawl_flow.svg

    r33841 r33856  
    1414   viewBox="0 0 210 297"
    1515   version="1.1"
    16    id="svg8"
     16   id="svg3410"
    1717   inkscape:version="0.92.4 (5da689c313, 2019-01-14)"
    1818   sodipodi:docname="CommonCrawl_flow2.svg">
    1919  <defs
    20      id="defs2">
    21     <marker
    22        inkscape:stockid="Arrow2Lstart"
    23        orient="auto"
    24        refY="0.0"
    25        refX="0.0"
    26        id="Arrow2Lstart"
    27        style="overflow:visible"
    28        inkscape:isstock="true">
    29       <path
    30          id="path6664"
    31          style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round;stroke:#000000;stroke-opacity:1;fill:#000000;fill-opacity:1"
    32          d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
    33          transform="scale(1.1) translate(1,0)" />
    34     </marker>
    35     <filter
    36        inkscape:collect="always"
    37        style="color-interpolation-filters:sRGB"
    38        id="filter6144"
    39        x="-0.014400269"
    40        width="1.0288005"
    41        y="-0.010285577"
    42        height="1.0205712">
    43       <feGaussianBlur
    44          inkscape:collect="always"
    45          stdDeviation="0.59036602"
    46          id="feGaussianBlur6146" />
    47     </filter>
     20     id="defs3404">
    4821    <marker
    4922       inkscape:stockid="Arrow2Lstart"
     
    5124       refY="0"
    5225       refX="0"
    53        id="Arrow2Lstart-2"
     26       id="Arrow2Lstart-2-8-8-6"
    5427       style="overflow:visible"
    5528       inkscape:isstock="true">
    5629      <path
    5730         inkscape:connector-curvature="0"
    58          id="path6664-1"
     31         id="path6664-1-8-8-0"
     32         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
     33         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
     34         transform="matrix(1.1,0,0,1.1,1.1,0)" />
     35    </marker>
     36    <marker
     37       inkscape:stockid="Arrow2Lstart"
     38       orient="auto"
     39       refY="0"
     40       refX="0"
     41       id="Arrow2Lstart-2-9-2-8"
     42       style="overflow:visible"
     43       inkscape:isstock="true">
     44      <path
     45         inkscape:connector-curvature="0"
     46         id="path6664-1-1-3-1"
     47         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
     48         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
     49         transform="matrix(1.1,0,0,1.1,1.1,0)" />
     50    </marker>
     51    <marker
     52       inkscape:stockid="Arrow2Lstart"
     53       orient="auto"
     54       refY="0"
     55       refX="0"
     56       id="Arrow2Lstart-2-8-8"
     57       style="overflow:visible"
     58       inkscape:isstock="true">
     59      <path
     60         inkscape:connector-curvature="0"
     61         id="path6664-1-8-8"
     62         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
     63         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
     64         transform="matrix(1.1,0,0,1.1,1.1,0)" />
     65    </marker>
     66    <marker
     67       inkscape:stockid="Arrow2Lstart"
     68       orient="auto"
     69       refY="0"
     70       refX="0"
     71       id="Arrow2Lstart-2-9-2"
     72       style="overflow:visible"
     73       inkscape:isstock="true">
     74      <path
     75         inkscape:connector-curvature="0"
     76         id="path6664-1-1-3"
     77         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
     78         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
     79         transform="matrix(1.1,0,0,1.1,1.1,0)" />
     80    </marker>
     81    <marker
     82       inkscape:stockid="Arrow2Lstart"
     83       orient="auto"
     84       refY="0"
     85       refX="0"
     86       id="Arrow2Lstart-2-9"
     87       style="overflow:visible"
     88       inkscape:isstock="true">
     89      <path
     90         inkscape:connector-curvature="0"
     91         id="path6664-1-1"
    5992         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
    6093         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
     
    81114       refY="0"
    82115       refX="0"
    83        id="Arrow2Lstart-2-9"
     116       id="Arrow2Lstart-2"
    84117       style="overflow:visible"
    85118       inkscape:isstock="true">
    86119      <path
    87120         inkscape:connector-curvature="0"
    88          id="path6664-1-1"
     121         id="path6664-1"
    89122         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
    90123         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
     
    96129       refY="0"
    97130       refX="0"
    98        id="Arrow2Lstart-2-9-2"
     131       id="Arrow2Lstart"
    99132       style="overflow:visible"
    100133       inkscape:isstock="true">
    101134      <path
    102135         inkscape:connector-curvature="0"
    103          id="path6664-1-1-3"
    104          style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
    105          d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
    106          transform="matrix(1.1,0,0,1.1,1.1,0)" />
    107     </marker>
    108     <marker
    109        inkscape:stockid="Arrow2Lstart"
    110        orient="auto"
    111        refY="0"
    112        refX="0"
    113        id="Arrow2Lstart-2-8-8"
    114        style="overflow:visible"
    115        inkscape:isstock="true">
    116       <path
    117          inkscape:connector-curvature="0"
    118          id="path6664-1-8-8"
    119          style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
    120          d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
    121          transform="matrix(1.1,0,0,1.1,1.1,0)" />
    122     </marker>
    123     <marker
    124        inkscape:stockid="Arrow2Lstart"
    125        orient="auto"
    126        refY="0"
    127        refX="0"
    128        id="Arrow2Lstart-2-9-2-8"
    129        style="overflow:visible"
    130        inkscape:isstock="true">
    131       <path
    132          inkscape:connector-curvature="0"
    133          id="path6664-1-1-3-1"
    134          style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
    135          d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
    136          transform="matrix(1.1,0,0,1.1,1.1,0)" />
    137     </marker>
    138     <marker
    139        inkscape:stockid="Arrow2Lstart"
    140        orient="auto"
    141        refY="0"
    142        refX="0"
    143        id="Arrow2Lstart-2-8-8-6"
    144        style="overflow:visible"
    145        inkscape:isstock="true">
    146       <path
    147          inkscape:connector-curvature="0"
    148          id="path6664-1-8-8-0"
     136         id="path6664"
    149137         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
    150138         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
     
    160148     inkscape:pageshadow="2"
    161149     inkscape:zoom="0.7"
    162      inkscape:cx="281.24246"
    163      inkscape:cy="703.19328"
     150     inkscape:cx="229.97759"
     151     inkscape:cy="567.58817"
    164152     inkscape:document-units="mm"
    165      inkscape:current-layer="g6386"
     153     inkscape:current-layer="layer1"
    166154     showgrid="false"
    167      inkscape:snap-text-baseline="true"
    168155     inkscape:window-width="1680"
    169156     inkscape:window-height="988"
    170157     inkscape:window-x="-8"
    171158     inkscape:window-y="-8"
    172      inkscape:window-maximized="1"
    173      inkscape:snap-smooth-nodes="true" />
     159     inkscape:window-maximized="1" />
    174160  <metadata
    175      id="metadata5">
     161     id="metadata3407">
    176162    <rdf:RDF>
    177163      <cc:Work
     
    180166        <dc:type
    181167           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
    182         <dc:title />
     168        <dc:title></dc:title>
    183169      </cc:Work>
    184170    </rdf:RDF>
     
    188174     inkscape:groupmode="layer"
    189175     id="layer1">
    190     <rect
    191        id="rect3713"
    192        width="55.5625"
    193        height="65.767853"
    194        x="7.5595236"
    195        y="8.9821434"
    196        style="stroke-width:0.26458332;fill:#000000;fill-opacity:1;opacity:0;stroke:#000000;stroke-opacity:1" />
    197176    <g
    198        id="g5994">
    199       <text
    200          xml:space="preserve"
    201          style="font-style:normal;font-weight:normal;font-size:10.58333302px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
    202          x="36.663689"
    203          y="24.440805"
    204          id="text6012"><tspan
    205            sodipodi:role="line"
    206            id="tspan6010"
    207            x="36.663689"
    208            y="33.804573"
    209            style="stroke-width:0.26458332" /><tspan
    210            sodipodi:role="line"
    211            x="36.663689"
    212            y="47.033741"
    213            style="stroke-width:0.26458332"
    214            id="tspan6014" /></text>
    215       <text
    216          xml:space="preserve"
    217          style="font-style:normal;font-weight:normal;font-size:10.58333302px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
    218          x="39.6875"
    219          y="47.913689"
    220          id="text6018"><tspan
    221            sodipodi:role="line"
    222            id="tspan6016"
    223            x="39.6875"
    224            y="57.277458"
    225            style="stroke-width:0.26458332" /></text>
     177       id="g13905"
     178       transform="translate(-2.6849314,-0.31280524)">
     179      <rect
     180         y="8.9821434"
     181         x="36.174416"
     182         height="32.630665"
     183         width="26.947613"
     184         id="rect5954"
     185         style="opacity:1;fill:#64e000;fill-opacity:0;stroke:#000000;stroke-width:0.12860805;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
    226186      <flowRoot
    227187         xml:space="preserve"
    228          id="flowRoot6092"
    229          style="fill:black;fill-opacity:1;stroke:none;font-family:sans-serif;font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;letter-spacing:0px;word-spacing:0px"><flowRegion
    230            id="flowRegion6094"><rect
    231              id="rect6096"
    232              width="11.428572"
    233              height="140"
    234              x="410"
    235              y="195.37683" /></flowRegion><flowPara
    236            id="flowPara6098" /></flowRoot>      <flowRoot
    237          xml:space="preserve"
    238          id="flowRoot6100"
    239          style="fill:black;fill-opacity:1;stroke:none;font-family:sans-serif;font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;letter-spacing:0px;word-spacing:0px"><flowRegion
    240            id="flowRegion6102"><rect
    241              id="rect6104"
    242              width="15.714286"
    243              height="54.285713"
    244              x="405.71429"
    245              y="189.66254" /></flowRegion><flowPara
    246            id="flowPara6106" /></flowRoot>      <flowRoot
    247          xml:space="preserve"
    248          id="flowRoot6108"
    249          style="fill:black;fill-opacity:1;stroke:none;font-family:sans-serif;font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;letter-spacing:0px;word-spacing:0px"><flowRegion
    250            id="flowRegion6110"><rect
    251              id="rect6112"
    252              width="38.57143"
    253              height="47.142857"
    254              x="407.14285"
    255              y="193.94826" /></flowRegion><flowPara
    256            id="flowPara6114" /></flowRoot>      <g
    257          id="g6386"
    258          transform="translate(-3.8146973e-6)">
    259         <flowRoot
    260            transform="matrix(0.26458333,0,0,0.26458333,-33.04705,-17.589917)"
    261            style="font-style:normal;font-weight:normal;font-size:40px;line-height:1;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;filter:url(#filter6144)"
    262            id="flowRoot6080"
    263            xml:space="preserve"><flowRegion
    264              style="line-height:1"
    265              id="flowRegion6082"><rect
    266                style="line-height:1"
    267                y="106.8054"
    268                x="262.85715"
    269                height="208.57144"
    270                width="107.14282"
    271                id="rect6084" /></flowRegion><flowPara
    272              style="font-size:24px;line-height:1"
    273              id="flowPara6088" /><flowPara
    274              id="flowPara6090" /></flowRoot>        <g
    275            id="g13905">
    276           <rect
    277              y="8.9821434"
    278              x="36.174416"
    279              height="32.630665"
    280              width="26.947613"
    281              id="rect5954"
    282              style="opacity:1;fill:#64e000;fill-opacity:0;stroke:#000000;stroke-width:0.12860805;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
    283           <flowRoot
    284              xml:space="preserve"
    285              id="flowRoot6219"
    286              style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:24px;line-height:1.25;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none"
    287              transform="matrix(0.26458333,0,0,0.26458333,20.323189,-24.166702)"><flowRegion
    288                id="flowRegion6221"><rect
    289                  id="rect6223"
    290                  width="54.285713"
    291                  height="37.142857"
    292                  x="310"
    293                  y="185.37683" /></flowRegion><flowPara
    294                id="flowPara6225">...</flowPara></flowRoot>          <flowRoot
    295              transform="matrix(0.26458333,0,0,0.26458333,1.8898848,4.5357143)"
    296              style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:18.66666603px;line-height:1.25;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none"
    297              id="flowRoot13798"
    298              xml:space="preserve"><flowRegion
    299                id="flowRegion13800"><rect
    300                  y="33.948257"
    301                  x="136.72218"
    302                  height="106.18565"
    303                  width="94.70639"
    304                  id="rect13802" /></flowRegion><flowPara
    305                id="flowPara13804">CC Sep 2018</flowPara><flowPara
    306                id="flowPara13806">Columnar Index</flowPara><flowPara
    307                id="flowPara13808" /></flowRoot>          <rect
    308              y="8.8441896"
    309              x="69.302986"
    310              height="32.630665"
    311              width="26.947613"
    312              id="rect5954-4"
    313              style="opacity:1;fill:#64e000;fill-opacity:0;stroke:#000000;stroke-width:0.12860805;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
    314           <flowRoot
    315              transform="matrix(0.26458333,0,0,0.26458333,35.018452,4.3977563)"
    316              style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:18.66666603px;line-height:1.25;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none"
    317              id="flowRoot13798-0"
    318              xml:space="preserve"><flowRegion
    319                id="flowRegion13800-2"><rect
    320                  y="33.948257"
    321                  x="136.72218"
    322                  height="106.18565"
    323                  width="94.70639"
    324                  id="rect13802-0" /></flowRegion><flowPara
    325                id="flowPara13804-8">CC Oct 2018</flowPara><flowPara
    326                id="flowPara13806-1">Columnar Index</flowPara><flowPara
    327                id="flowPara13808-6" /></flowRoot>          <rect
    328              y="8.9197874"
    329              x="113.52618"
    330              height="32.630665"
    331              width="26.947613"
    332              id="rect5954-1"
    333              style="opacity:1;fill:#64e000;fill-opacity:0;stroke:#000000;stroke-width:0.12860805;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
    334           <flowRoot
    335              transform="matrix(0.26458333,0,0,0.26458333,79.241667,4.4733578)"
    336              style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:18.66666603px;line-height:1.25;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none"
    337              id="flowRoot13798-09"
    338              xml:space="preserve"><flowRegion
    339                id="flowRegion13800-0"><rect
    340                  y="33.948257"
    341                  x="136.72218"
    342                  height="106.18565"
    343                  width="94.70639"
    344                  id="rect13802-07" /></flowRegion><flowPara
    345                id="flowPara13876">CC Aug 2019</flowPara><flowPara
    346                id="flowPara13806-6">Columnar Index</flowPara><flowPara
    347                id="flowPara13808-1" /></flowRoot>        </g>
    348       </g>
    349     </g>
    350     <flowRoot
    351        xml:space="preserve"
    352        id="flowRoot5970"
    353        style="fill:black;fill-opacity:1;stroke:none;font-family:sans-serif;font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;letter-spacing:0px;word-spacing:0px"><flowRegion
    354          id="flowRegion5972"><rect
    355            id="rect5974"
    356            width="312.85715"
    357            height="85.714287"
    358            x="154.28572"
    359            y="568.23395" /></flowRegion><flowPara
    360          id="flowPara5976" /></flowRoot>    <flowRoot
    361        xml:space="preserve"
    362        id="flowRoot5980"
    363        style="fill:black;fill-opacity:1;stroke:none;font-family:sans-serif;font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;letter-spacing:0px;word-spacing:0px"><flowRegion
    364          id="flowRegion5982"><rect
    365            id="rect5984"
    366            width="128.57143"
    367            height="78.571426"
    368            x="1.4285715"
    369            y="65.376831" /></flowRegion><flowPara
    370          id="flowPara5986" /></flowRoot>    <flowRoot
    371        xml:space="preserve"
    372        id="flowRoot6469"
    373        style="fill:black;fill-opacity:1;stroke:none;font-family:Arial;font-style:normal;font-weight:normal;font-size:24px;line-height:1.25;letter-spacing:0px;word-spacing:0px;-inkscape-font-specification:'Arial, Normal';font-stretch:normal;font-variant:normal;text-anchor:start;text-align:start;writing-mode:lr;font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal"><flowRegion
    374          id="flowRegion6471"><rect
    375            id="rect6473"
    376            width="192.85715"
    377            height="91.428566"
    378            x="90.714287"
    379            y="612.51971" /></flowRegion><flowPara
    380          id="flowPara6475" /></flowRoot>    <g
     188         id="flowRoot6219"
     189         style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:24px;line-height:1.25;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none"
     190         transform="matrix(0.26458333,0,0,0.26458333,20.323189,-24.166702)"><flowRegion
     191           id="flowRegion6221"><rect
     192             id="rect6223"
     193             width="54.285713"
     194             height="37.142857"
     195             x="310"
     196             y="185.37683" /></flowRegion><flowPara
     197           id="flowPara6225">...</flowPara></flowRoot>      <flowRoot
     198         transform="matrix(0.26458333,0,0,0.26458333,1.8898848,4.5357143)"
     199         style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:18.66666603px;line-height:1.25;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none"
     200         id="flowRoot13798"
     201         xml:space="preserve"><flowRegion
     202           id="flowRegion13800"><rect
     203             y="33.948257"
     204             x="136.72218"
     205             height="106.18565"
     206             width="94.70639"
     207             id="rect13802" /></flowRegion><flowPara
     208           id="flowPara13804">CC Sep 2018</flowPara><flowPara
     209           id="flowPara13806">Columnar Index</flowPara><flowPara
     210           id="flowPara13808" /></flowRoot>      <rect
     211         y="8.8441896"
     212         x="69.302986"
     213         height="32.630665"
     214         width="26.947613"
     215         id="rect5954-4"
     216         style="opacity:1;fill:#64e000;fill-opacity:0;stroke:#000000;stroke-width:0.12860805;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
     217      <flowRoot
     218         transform="matrix(0.26458333,0,0,0.26458333,35.018452,4.3977563)"
     219         style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:18.66666603px;line-height:1.25;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none"
     220         id="flowRoot13798-0"
     221         xml:space="preserve"><flowRegion
     222           id="flowRegion13800-2"><rect
     223             y="33.948257"
     224             x="136.72218"
     225             height="106.18565"
     226             width="94.70639"
     227             id="rect13802-0" /></flowRegion><flowPara
     228           id="flowPara13804-8">CC Oct 2018</flowPara><flowPara
     229           id="flowPara13806-1">Columnar Index</flowPara><flowPara
     230           id="flowPara13808-6" /></flowRoot>      <rect
     231         y="8.9197874"
     232         x="113.52618"
     233         height="32.630665"
     234         width="26.947613"
     235         id="rect5954-1"
     236         style="opacity:1;fill:#64e000;fill-opacity:0;stroke:#000000;stroke-width:0.12860805;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
     237      <flowRoot
     238         transform="matrix(0.26458333,0,0,0.26458333,79.241667,4.4733578)"
     239         style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:18.66666603px;line-height:1.25;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none"
     240         id="flowRoot13798-09"
     241         xml:space="preserve"><flowRegion
     242           id="flowRegion13800-0"><rect
     243             y="33.948257"
     244             x="136.72218"
     245             height="106.18565"
     246             width="94.70639"
     247             id="rect13802-07" /></flowRegion><flowPara
     248           id="flowPara13876">CC Aug 2019</flowPara><flowPara
     249           id="flowPara13806-6">Columnar Index</flowPara><flowPara
     250           id="flowPara13808-1" /></flowRoot>    </g>
     251    <g
     252       transform="translate(-2.6849314,-0.31280524)"
    381253       id="g8700">
    382254      <flowRoot
     
    502374    <path
    503375       style="fill:none;stroke:#000000;stroke-width:0.64353597px;stroke-linecap:butt;stroke-linejoin:round;stroke-opacity:1;marker-start:url(#Arrow2Lstart)"
    504        d="M 91.563371,63.532871 C 91.031116,43.490147 92.909569,43.490147 92.909569,43.490147"
     376       d="M 88.878446,63.220066 C 88.346192,43.177342 90.224644,43.177342 90.224644,43.177342"
    505377       id="path6644"
    506378       inkscape:connector-curvature="0" />
    507379    <text
    508380       xml:space="preserve"
    509        style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:6.3499999px;line-height:1.25;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
    510        x="169.53333"
    511        y="61.083336"
    512        id="text8520"><tspan
    513          sodipodi:role="line"
    514          id="tspan8518"
    515          x="169.53333"
    516          y="66.803123"
    517          style="stroke-width:0.26458332" /><tspan
    518          sodipodi:role="line"
    519          x="169.53333"
    520          y="74.740623"
    521          style="stroke-width:0.26458332"
    522          id="tspan8526" /><tspan
    523          sodipodi:role="line"
    524          x="169.53333"
    525          y="82.678123"
    526          style="stroke-width:0.26458332"
    527          id="tspan8522" /><tspan
    528          sodipodi:role="line"
    529          x="169.53333"
    530          y="90.615623"
    531          style="stroke-width:0.26458332"
    532          id="tspan8524" /></text>
    533     <flowRoot
    534        xml:space="preserve"
    535        id="flowRoot8528"
    536        style="fill:black;fill-opacity:1;stroke:none;font-family:Arial;font-style:normal;font-weight:normal;font-size:24px;line-height:1.25;letter-spacing:0px;word-spacing:0px;-inkscape-font-specification:'Arial, Normal';font-stretch:normal;font-variant:normal;text-anchor:start;text-align:start;writing-mode:lr;font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal"><flowRegion
    537          id="flowRegion8530"><rect
    538            id="rect8532"
    539            width="210.83565"
    540            height="59.692326"
    541            x="372.02237"
    542            y="209.24846" /></flowRegion><flowPara
    543          id="flowPara8534" /></flowRoot>    <flowRoot
    544        xml:space="preserve"
    545        id="flowRoot8538"
    546        style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:24px;line-height:1.25;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none"
    547        transform="scale(0.26458333)"><flowRegion
    548          id="flowRegion8540"><rect
    549            id="rect8542"
    550            width="152.53304"
    551            height="46.467018"
    552            x="390.92905"
    553            y="231.56514" /></flowRegion><flowPara
    554          id="flowPara8544" /></flowRoot>    <text
    555        xml:space="preserve"
    556381       style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.93888855px;line-height:1;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:center;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
    557        x="126.03919"
    558        y="49.118912"
     382       x="123.35426"
     383       y="48.80611"
    559384       id="text8548"><tspan
    560385         sodipodi:role="line"
    561          x="126.03919"
    562          y="49.118912"
     386         x="123.35426"
     387         y="48.80611"
    563388         style="line-height:1;text-align:center;text-anchor:middle;stroke-width:0.26458332"
    564389         id="tspan8550">content_languages = 'mri'</tspan><tspan
    565390         sodipodi:role="line"
    566          x="126.03919"
    567          y="54.0578"
     391         x="123.35426"
     392         y="53.744999"
    568393         style="line-height:1;text-align:center;text-anchor:middle;stroke-width:0.26458332"
    569394         id="tspan8558">+</tspan><tspan
    570395         sodipodi:role="line"
    571          x="126.03919"
    572          y="58.996689"
     396         x="123.35426"
     397         y="58.683887"
    573398         style="line-height:1;text-align:center;text-anchor:middle;stroke-width:0.26458332"
    574399         id="tspan8560">warc to wet</tspan></text>
    575400    <flowRoot
    576        transform="matrix(0.26458333,0,0,0.26458333,43.156253,110.17777)"
     401       transform="matrix(0.26458333,0,0,0.26458333,40.471328,109.86496)"
    577402       style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:24px;line-height:1.25;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none"
    578403       id="flowRoot6219-8"
     
    586411         id="flowPara6225-5">...</flowPara></flowRoot>    <g
    587412       id="g8667"
    588        transform="translate(-4.1058568,21.78947)">
     413       transform="translate(-6.7907814,21.476664)">
    589414      <rect
    590415         y="124.0766"
     
    616441    <g
    617442       id="g8709"
    618        transform="translate(8.9474833,31.976784)">
     443       transform="translate(6.2625586,31.663979)">
    619444      <g
    620445         id="g8744">
     
    649474    <g
    650475       id="g8658"
    651        transform="translate(-9.8520303,-1.5010822)">
     476       transform="translate(-12.536961,-1.8138882)">
    652477      <rect
    653478         y="153.60989"
     
    679504    <path
    680505       style="fill:none;stroke:#000000;stroke-width:0.62406325px;stroke-linecap:butt;stroke-linejoin:round;stroke-opacity:1;marker-start:url(#Arrow2Lstart-2)"
    681        d="M 81.998023,141.73889 C 81.457172,123.19052 83.36598,123.19052 83.36598,123.19052"
     506       d="m 79.313097,141.42609 c -0.54085,-18.54837 1.367957,-18.54837 1.367957,-18.54837"
    682507       id="path6644-5"
    683508       inkscape:connector-curvature="0" />
    684509    <path
    685510       style="fill:none;stroke:#000000;stroke-width:0.64353597px;stroke-linecap:butt;stroke-linejoin:round;stroke-opacity:1;marker-start:url(#Arrow2Lstart-2-8)"
    686        d="M 94.763655,112.71836 C 111.855,102.23609 112.79422,103.86287 112.79422,103.86287"
     511       d="m 92.07873,112.40555 c 17.09134,-10.48227 18.03057,-8.85549 18.03057,-8.85549"
    687512       id="path6644-5-4"
    688513       inkscape:connector-curvature="0" />
    689514    <path
    690515       style="fill:none;stroke:#000000;stroke-width:0.59350747px;stroke-linecap:butt;stroke-linejoin:round;stroke-opacity:1;marker-start:url(#Arrow2Lstart-2-9)"
    691        d="m 81.814159,111.48634 c -0.53225,-17.047567 1.3462,-17.047567 1.3462,-17.047567"
     516       d="m 79.129233,111.17354 c -0.532249,-17.047571 1.3462,-17.047571 1.3462,-17.047571"
    692517       id="path6644-5-5"
    693518       inkscape:connector-curvature="0" />
     
    696521       id="flowRoot10549"
    697522       style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.93888855px;line-height:1.25;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
    698        transform="translate(-4.5959653,-47.786576)"><flowRegion
     523       transform="translate(-7.2808914,-48.099385)"><flowRegion
    699524         id="flowRegion10551"
    700525         style="stroke-width:0.26458332"><rect
     
    710535         style="stroke-width:0.26458332">sites needing custom handling</flowPara></flowRoot>    <g
    711536       id="g14180"
    712        transform="translate(-1.6508314,16.447997)">
     537       transform="translate(-4.3357614,16.135192)">
    713538      <g
    714539         transform="translate(8.0502882,-22.849676)"
     
    824649       id="flowRoot10757"
    825650       style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:18.66666603px;line-height:1.25;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:center;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none"
    826        transform="matrix(0.26458333,0,0,0.26458333,5.1165915,-20.486313)"><flowRegion
     651       transform="matrix(0.26458333,0,0,0.26458333,2.4316686,-20.799115)"><flowRegion
    827652         id="flowRegion10759"
    828653         style="text-align:center;text-anchor:middle"><rect
     
    833658           y="509.66254"
    834659           style="text-align:center;text-anchor:middle" /></flowRegion><flowPara
    835          id="flowPara10763">CCWETProcessor.java</flowPara></flowRoot>    <flowRoot
    836        xml:space="preserve"
    837        id="flowRoot11618"
    838        style="fill:black;fill-opacity:1;stroke:none;font-family:Arial;font-style:normal;font-weight:normal;font-size:18.66666667px;line-height:1.25;letter-spacing:0px;word-spacing:0px;-inkscape-font-specification:'Arial, Normal';font-stretch:normal;font-variant:normal;text-anchor:start;text-align:start;writing-mode:lr;font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal"><flowRegion
    839          id="flowRegion11620"><rect
    840            id="rect11622"
    841            width="294.28571"
    842            height="95.714287"
    843            x="255.71428"
    844            y="806.80542" /></flowRegion><flowPara
    845          id="flowPara11624" /></flowRoot>    <flowRoot
    846        xml:space="preserve"
    847        id="flowRoot11626"
    848        style="fill:black;fill-opacity:1;stroke:none;font-family:Arial;font-style:normal;font-weight:normal;font-size:24px;line-height:1.25;letter-spacing:0px;word-spacing:0px;-inkscape-font-specification:'Arial, Normal';font-stretch:normal;font-variant:normal;text-anchor:start;text-align:start;writing-mode:lr;font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal"><flowRegion
    849          id="flowRegion11628"><rect
    850            id="rect11630"
    851            width="178.57143"
    852            height="70"
    853            x="364.28571"
    854            y="835.37683" /></flowRegion><flowPara
    855          id="flowPara11632" /></flowRoot>    <flowRoot
    856        xml:space="preserve"
    857        id="flowRoot11636"
    858        style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:24px;line-height:1.25;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none"
    859        transform="matrix(0.26458333,0,0,0.26458333,41.665157,-34.992356)"><flowRegion
    860          id="flowRegion11638"><rect
    861            id="rect11640"
    862            width="174.28572"
    863            height="61.42857"
    864            x="340"
    865            y="826.80542" /></flowRegion><flowPara
    866          id="flowPara11642" /><flowPara
    867          id="flowPara11644" /></flowRoot>    <rect
    868        style="opacity:1;fill:#ffffff;fill-opacity:0.98412697;stroke:#000000;stroke-width:1.00157475;stroke-linejoin:bevel;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
    869        id="rect12432"
    870        width="0"
    871        height="0"
    872        x="0"
    873        y="0"
    874        transform="scale(0.26458333)" />
    875     <flowRoot
    876        xml:space="preserve"
    877        id="flowRoot12548"
    878        style="fill:#000000;fill-opacity:0;stroke:none;font-family:Arial;font-style:normal;font-weight:normal;font-size:24px;line-height:1.25;letter-spacing:0px;word-spacing:0px;-inkscape-font-specification:'Arial, Normal';font-stretch:normal;font-variant:normal;text-anchor:start;text-align:start;writing-mode:lr;font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;"><flowRegion
    879          id="flowRegion12550"
    880          style="fill:#000000;fill-opacity:0;"><rect
    881            id="rect12552"
    882            width="96.564629"
    883            height="62.566341"
    884            x="432.34528"
    885            y="875.03229"
    886            style="fill:#000000;fill-opacity:0;" /></flowRegion><flowPara
    887          id="flowPara12554" /></flowRoot>    <flowRoot
    888        xml:space="preserve"
    889        id="flowRoot12556"
    890        style="fill:black;fill-opacity:1;stroke:none;font-family:Arial;font-style:normal;font-weight:normal;font-size:18.66666667px;line-height:1.25;letter-spacing:0px;word-spacing:0px;-inkscape-font-specification:'Arial, Normal';font-stretch:normal;font-variant:normal;text-anchor:start;text-align:start;writing-mode:lr;font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal"><flowRegion
    891          id="flowRegion12558"><rect
    892            id="rect12560"
    893            width="97.069702"
    894            height="66.101875"
    895            x="431.84021"
    896            y="871.49677" /></flowRegion><flowPara
    897          id="flowPara12562" /></flowRoot>    <g
     660         id="flowPara10763">CCWETProcessor.java</flowPara></flowRoot>    <g
    898661       id="g12584"
    899        transform="translate(35.409039,-23.306338)">
     662       transform="translate(32.724113,-23.619145)">
    900663      <g
    901664         transform="translate(59.46747,-6.5481034)"
     
    940703    <path
    941704       style="fill:none;stroke:#000000;stroke-width:0.61500657px;stroke-linecap:butt;stroke-linejoin:round;stroke-opacity:1;marker-start:url(#Arrow2Lstart-2-9-2)"
    942        d="m 161.40032,200.57914 c -0.49158,-19.81955 1.24333,-19.81955 1.24333,-19.81955"
     705       d="m 158.71539,200.26634 c -0.49158,-19.81955 1.24333,-19.81955 1.24333,-19.81955"
    943706       id="path6644-5-5-7"
    944707       inkscape:connector-curvature="0" />
    945708    <path
    946709       style="fill:none;stroke:#000000;stroke-width:0.78573805px;stroke-linecap:butt;stroke-linejoin:round;stroke-opacity:1;marker-start:url(#Arrow2Lstart-2-8-8)"
    947        d="m 110.10792,211.95692 c 30.18095,-0.52693 30.18093,1.33273 30.18093,1.33273"
     710       d="m 107.42299,211.64412 c 30.18095,-0.52693 30.18093,1.33273 30.18093,1.33273"
    948711       id="path6644-5-4-0"
    949712       inkscape:connector-curvature="0" />
    950     <flowRoot
    951        xml:space="preserve"
    952        id="flowRoot13119"
    953        style="fill:black;fill-opacity:1;stroke:none;font-family:Arial;font-style:normal;font-weight:normal;font-size:18.66666667px;line-height:1.25;letter-spacing:0px;word-spacing:0px;-inkscape-font-specification:'Arial, Normal';font-stretch:normal;font-variant:normal;text-anchor:start;text-align:start;writing-mode:lr;font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal"><flowRegion
    954          id="flowRegion13121"><rect
    955            id="rect13123"
    956            width="72.730934"
    957            height="19.19297"
    958            x="79.814285"
    959            y="893.01202" /></flowRegion><flowPara
    960          id="flowPara13125" /></flowRoot>    <g
     713    <g
    961714       id="g13297"
    962        transform="translate(9.4494046,-25.713581)">
     715       transform="translate(6.7644786,-26.026385)">
    963716      <g
    964717         transform="translate(-80.712161,97.488904)"
     
    1089842    <g
    1090843       id="g12584-4"
    1091        transform="translate(-83.111041,35.25929)">
     844       transform="translate(-85.795971,34.946485)">
    1092845      <g
    1093846         transform="translate(59.46747,-6.5481034)"
     
    1133886       xml:space="preserve"
    1134887       style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.93888855px;line-height:1.25;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
    1135        x="128.51186"
    1136        y="186.63091"
     888       x="125.82693"
     889       y="186.3181"
    1137890       id="text14721"><tspan
    1138891         sodipodi:role="line"
    1139892         id="tspan14719"
    1140          x="128.51186"
    1141          y="186.63091"
     893         x="125.82693"
     894         y="186.3181"
    1142895         style="stroke-width:0.26458332">Crawl with </tspan><tspan
    1143896         sodipodi:role="line"
    1144          x="128.51186"
    1145          y="192.80452"
     897         x="125.82693"
     898         y="192.4917"
    1146899         style="stroke-width:0.26458332"
    1147900         id="tspan15630">Apache Nutch</tspan></text>
    1148901    <path
    1149902       style="fill:none;stroke:#000000;stroke-width:0.62741137px;stroke-linecap:butt;stroke-linejoin:round;stroke-opacity:1;marker-start:url(#Arrow2Lstart-2-9-2-8)"
    1150        d="m 41.504639,258.88526 c -0.418819,-24.21071 1.059298,-24.21071 1.059298,-24.21071"
     903       d="m 38.819713,258.57246 c -0.418819,-24.21071 1.059299,-24.21071 1.059299,-24.21071"
    1151904       id="path6644-5-5-7-9"
    1152905       inkscape:connector-curvature="0" />
     
    1155908       id="flowRoot15304"
    1156909       style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:18.66666603px;line-height:1.25;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none"
    1157        transform="matrix(0.26458333,0,0,0.26458333,-9.0714287,-1.889881)"><flowRegion
     910       transform="matrix(0.26458333,0,0,0.26458333,-11.756351,-2.2026872)"><flowRegion
    1158911         id="flowRegion15306"><rect
    1159912           id="rect15308"
     
    1167920       id="flowRoot10757-0"
    1168921       style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:18.66666603px;line-height:1.25;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none"
    1169        transform="matrix(0.26458333,0,0,0.26458333,9.8528943,103.04083)"><flowRegion
     922       transform="matrix(0.26458333,0,0,0.26458333,7.1679686,102.72802)"><flowRegion
    1170923         id="flowRegion10759-1"
    1171924         style="text-align:start;text-anchor:start"><rect
     
    1178931         id="flowPara10763-6">NutchTextDumpToMongoDB.java</flowPara><flowPara
    1179932         id="flowPara15752">- compute + store site and page level meta</flowPara><flowPara
    1180          id="flowPara15748">- store full text per web page</flowPara></flowRoot>    <flowRoot
    1181        xml:space="preserve"
    1182        id="flowRoot15734"
    1183        style="fill:black;fill-opacity:1;stroke:none;font-family:Arial;font-style:normal;font-weight:normal;font-size:18.66666667px;line-height:1.25;letter-spacing:0px;word-spacing:0px;-inkscape-font-specification:'Arial, Normal';font-stretch:normal;font-variant:normal;text-anchor:start;text-align:start;writing-mode:lr;font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal"><flowRegion
    1184          id="flowRegion15736"><rect
    1185            id="rect15738"
    1186            width="184.28572"
    1187            height="52.312145"
    1188            x="345.71429"
    1189            y="944.49323" /></flowRegion><flowPara
    1190          id="flowPara15740" /></flowRoot>    <ellipse
     933         id="flowPara15748">- store full text per web page</flowPara></flowRoot>    <ellipse
    1191934       style="opacity:1;fill:#000000;fill-opacity:0;stroke:#000000;stroke-width:0.26499999;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
    1192935       id="path15809"
    1193        cx="144.57588"
    1194        cy="271.4866"
     936       cx="141.89096"
     937       cy="271.1738"
    1195938       rx="41.766369"
    1196939       ry="14.930058" />
    1197940    <path
    1198941       style="fill:none;stroke:#000000;stroke-width:0.78573805px;stroke-linecap:butt;stroke-linejoin:round;stroke-opacity:1;marker-start:url(#Arrow2Lstart-2-8-8-6)"
    1199        d="m 91.922716,272.44387 c -30.180951,-0.52693 -30.18093,1.33273 -30.18093,1.33273"
     942       d="M 89.23779,272.13107 C 59.056841,271.60414 59.056862,273.4638 59.056862,273.4638"
    1200943       id="path6644-5-4-0-2"
    1201944       inkscape:connector-curvature="0" />
    1202945    <flowRoot
     946       transform="matrix(0.26458333,0,0,0.26458333,-2.6849314,-0.31280524)"
    1203947       xml:space="preserve"
    1204948       id="flowRoot16118"
    1205        style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:18.66666603px;line-height:1.25;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:center;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none"
    1206        transform="scale(0.26458333)"><flowRegion
     949       style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:18.66666603px;line-height:1.25;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:center;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none"><flowRegion
    1207950         id="flowRegion16120"
    1208951         style="text-align:center;text-anchor:middle"><rect
Note: See TracChangeset for help on using the changeset viewer.