Changeset 33965 for other-projects
- Timestamp:
- 2020-02-21T20:59:07+13:00 (4 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/maori-lang-detection/src/org/greenstone/atea/ManualURLInspection.java
r33963 r33965 641 641 CSVPrinter csvWriter = new CSVPrinter(new FileWriter(tmpOutFile), CSVFormat.DEFAULT.withQuoteMode(QuoteMode.MINIMAL)); 642 642 ) { 643 643 644 644 645 int recordCount = 0; 646 647 csvWriter.printRecord("origSequence", "basicDomain","pageURL", 648 "countryCode","mostlyMRI","qualityLevel", 649 "numPagesInMRIForDomainSuffix","totalPagesInMRIForDomainSuffix"); 650 651 645 652 for (CSVRecord csvRecord : parser) { 646 653 … … 649 656 continue; 650 657 } 651 652 658 recordCount++; 653 659 654 660 String basicURL = Utility.stripProtocolAndWWWFromURL(Utility.getDomainForURL(url, false)); 655 661 656 662 String countryCode = csvRecord.get(COUNTRY_CODE_COLUMN); 657 663 String isReallyInMRI = ""; … … 677 683 678 684 // Save the CSV record into the tmp file with the 2 counts columns 679 csvWriter.printRecord(url, countryCode, isReallyInMRI, qualityLevel, 685 csvWriter.printRecord(recordCount, basicURL, 686 url, countryCode, isReallyInMRI, qualityLevel, 680 687 countNumPagesInMRI, countTotalPages); 681 688 } … … 694 701 System.err.println("Usage: ManualURLInspection webPageURLs.txt"); 695 702 } 696 703 704 //^https?://(www.)? 697 705 /** 698 706 * If no args are passed in, generates complete containsMRI file listings for NZ and overseas web SITES (domains),
Note:
See TracChangeset
for help on using the changeset viewer.