Changeset 33710 for other-projects
- Timestamp:
- 2019-11-20T23:23:29+13:00 (4 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/maori-lang-detection/MoreReading/mongodb.txt
r33698 r33710 417 417 418 418 https://docs.mongodb.com/manual/aggregation/ 419 EXAMPLE: 419 420 db.orders.aggregate([ 420 421 { $match: { status: "A" } }, 421 422 { $group: { _id: "$cust_id", total: { $sum: "$amount" } } } 422 423 ]) 423 db.Websites.aggregate({ $match:{urlContainsLangCodeInpath:true}}, $group: {geoLocationCountryCode:1, total: $count}) 424 425 X db.Websites.aggregate([{ $match:{urlContainsLangCodeInPath:true}}, $group: {geoLocationCountryCode:1, total: $count}]) 426 427 428 X db.Websites.aggregate([ 429 { $match:{urlContainsLangCodeInPath:true}}, 430 {$group: {geoLocationCountryCode:1}} 431 ]) 432 433 WORKS (but an "unwind" will get rid of "null"): 434 db.Websites.aggregate([ 435 { $match:{urlContainsLangCodeInPath:true}}, 436 {$group: {_id: "$geoLocationCountryCode", count: {$sum: 1}}}, 437 { $sort : { count : -1} } 438 ]) 439 440 441 # COUNT OF ALL GEOLOCATION COUNTRIES 442 #https://stackoverflow.com/questions/14924495/mongodb-count-num-of-distinct-values-per-field-key 443 # LIST 444 db.Websites.distinct('geoLocationCountryCode'); 445 446 # COUNT 447 db.Websites.distinct('geoLocationCountryCode').length; 448 449 # A COUNT WITH QUERY - https://docs.mongodb.com/manual/reference/command/distinct/#dbcmd.distinct 450 451 db.runCommand ( { distinct: "Websites", key: "geoLocationCountryCode", query: { "urlContainsLangCodeInPath": true} } ); 452 453 # DISTINCT WITH QUERY WITHOUT COUNT - https://docs.mongodb.com/manual/reference/method/db.collection.distinct/ 454 db.Websites.distinct('geoLocationCountryCode', {"urlContainsLangCodeInPath": true}); 455 456 #SORTED - https://stackoverflow.com/questions/4759437/get-distinct-values-with-sorted-data 457 db.Websites.distinct('geoLocationCountryCode', {"urlContainsLangCodeInPath": true}).sort(); 458 459 460 # AGGREGATION QUERIES THAT WORK: 461 #https://stackoverflow.com/questions/14924495/mongodb-count-num-of-distinct-values-per-field-key 462 463 db.Websites.aggregate([ 464 { 465 $match: { 466 urlContainsLangCodeInPath: true 467 } 468 }, 469 { $unwind: "$geoLocationCountryCode" }, 470 { 471 $group: { 472 _id: {$toLower: '$geoLocationCountryCode'}, 473 count: { $sum: 1 } 474 } 475 }, 476 { $sort : { count : -1} }, 477 { $limit : 100 } 478 ]); 479 480 481 WORKS: 482 db.Websites.aggregate([ 483 { 484 $match: { 485 geoLocationCountryCode: {$ne : "UNKNOWN"} 486 } 487 }, 488 { $unwind: "$geoLocationCountryCode" }, 489 { 490 $group: { 491 _id: "$geoLocationCountryCode", 492 count: { $sum: 1 } 493 } 494 }, 495 { $sort : { count : -1} }, 496 { $limit : 100 } 497 ]); 498 499 WORKS: 500 db.Websites.aggregate([ 501 { 502 $match: { 503 "urlContainsLangCodeInPath": true 504 } 505 }, 506 { $unwind: "$geoLocationCountryCode" }, 507 { 508 $group: { 509 _id: "$geoLocationCountryCode", 510 count: { $sum: 1 } 511 } 512 }, 513 { $sort : { count : -1} }, 514 { $limit : 100 } 515 ]); 516 517 518 KEEP ADDITIONAL FIELDS - https://stackoverflow.com/questions/16662405/mongo-group-query-how-to-keep-fields: 519 a. KEEPS ONLY FIRST DOMAIN URL FOR EACH COUNTED COUNTRY CODE: 520 521 db.Websites.aggregate([ 522 { 523 $match: { 524 "urlContainsLangCodeInPath": true 525 } 526 }, 527 { $unwind: "$geoLocationCountryCode" }, 528 { 529 $group: { 530 _id: "$geoLocationCountryCode", count: { $sum: 1 }, 531 domain: {$first: '$domain'} 532 } 533 }, 534 { $sort : { count : -1} } 535 ]); 536 537 b. KEEP ALL DOMAIN URLS: 538 db.Websites.aggregate([ 539 { 540 $match: { 541 "urlContainsLangCodeInPath": true 542 } 543 }, 544 { $unwind: "$geoLocationCountryCode" }, 545 { 546 $group: { 547 _id: "$geoLocationCountryCode", 548 count: { $sum: 1 }, 549 domain: { $addToSet: '$domain' } 550 } 551 }, 552 { $sort : { count : -1} } 553 ]); 554 555 556 # WANT TO GET THE ABOVE INTO WORLD MAP, use geojson.tools found by Dr Bainbridge 557 geojson.tools 558 USAGE: https://www.here.xyz/viewer-tool/ 424 559 425 560 … … 435 570 docs 436 571 572 573 574 575 /* 1 */ 576 { 577 "_id" : "US", 578 "count" : 93.0, 579 -95.8,40.33 580 } 581 582 /* 2 */ 583 { 584 "_id" : "AU", 585 "count" : 7.0, 586 135.8,-25.33 587 } 588 589 /* 3 */ 590 { 591 "_id" : "CN", 592 "count" : 7.0, 593 100.8, 594 32.33 595 } 596 597 /* 4 */ 598 { 599 "_id" : "NZ", 600 "count" : 5.0, 601 175.8, 602 -40.33 603 } 604 605 /* 5 */ 606 { 607 "_id" : "DE", 608 "count" : 5.0, 609 10.8, 610 50.33 611 } 612 613 /* 6 */ 614 { 615 "_id" : "HK", 616 "count" : 5.0, 617 114, 618 22.33 619 } 620 621 /* 7 */ 622 { 623 "_id" : "RU", 624 "count" : 4.0, 625 38.4, 626 55.5 627 } 628 629 /* 8 */ 630 { 631 "_id" : "JP", 632 "count" : 3.0, 633 137.8, 634 36 635 } 636 637 /* 9 */ 638 { 639 "_id" : "GB", 640 "count" : 3.0, 641 -2, 642 53.33 643 } 644 645 /* 10 */ 646 { 647 "_id" : "CA", 648 "count" : 2.0, 649 -105.8, 650 55.33 651 } 652 653 /* 11 */ 654 { 655 "_id" : "FR", 656 "count" : 2.0, 657 3, 658 47.33 659 } 660 661 /* 12 */ 662 { 663 "_id" : "DK", 664 "count" : 2.0, 665 9.5, 666 55.33 667 } 668 669 /* 13 British Virgin Islands */ 670 { 671 "_id" : "VG", 672 "count" : 2.0, 673 -64.8, 674 18.35 675 } 676 677 /* 14 Ukraine */ 678 { 679 "_id" : "UA", 680 "count" : 1.0, 681 31.5, 682 48.5 683 } 684 685 /* 15 */ 686 { 687 "_id" : "CZ", 688 "count" : 1.0, 689 16.2, 690 49.7 691 } 692 693 /* 16 Switzerland */ 694 { 695 "_id" : "CH", 696 "count" : 1.0, 697 8.5, 698 47 699 } 700 701 /* 17 Zuid-Afrika */ 702 { 703 "_id" : "ZA", 704 "count" : 1.0, 705 24.2, 706 -30.7 707 } 708 709 /* 18 */ 710 { 711 "_id" : "NL", 712 "count" : 1.0, 713 5.8, 714 52.33 715 } 716 717 /* 19 */ 718 { 719 "_id" : "KR", 720 "count" : 1.0, 721 127.8, 722 36.8 723 } 724 725 726 /** http://geojson.tools/ 727 728 729 { 730 "type": "MultiPoint", 731 "coordinates": [ 732 [ 733 -95.8, 734 40.33 735 ], 736 [ 737 135.8, 738 -25.33 739 ], 740 [ 741 100.8, 742 32.33 743 ], 744 [ 745 175.8, 746 -40.33 747 ], 748 [ 749 10.8, 750 50.33 751 ], 752 [ 753 10.8, 754 50.33 755 ], 756 [ 757 114, 758 22.33 759 ], 760 [ 761 38.4, 762 55.5 763 ], 764 [ 765 -2, 766 53.33 767 ], 768 [ 769 137.8, 770 36 771 ], 772 [ 773 -105.8, 774 55.33 775 ], 776 [ 777 3, 778 47.33 779 ], 780 [ 781 9.5, 782 55.33 783 ], 784 [ 785 -64.8, 786 18.35 787 ], 788 [ 789 31.5, 790 48.5 791 ], 792 [ 793 16.2, 794 49.7 795 ], 796 [ 797 8.5, 798 47 799 ], 800 [ 801 24.2, 802 -30.7 803 ], 804 [ 805 5.8, 806 52.33 807 ], 808 [ 809 127.8, 810 36.8 811 ] 812 ] 813 } 814 815 */ 816 817 /* 1 */ 818 { 819 "_id" : "US", 820 "count" : 93.0, 821 -95.8,40.33 822 } 823 824 /* 2 */ 825 { 826 "_id" : "AU", 827 "count" : 7.0, 828 135.8,-25.33 829 } 830 831 /* 3 */ 832 { 833 "_id" : "CN", 834 "count" : 7.0, 835 100.8, 836 32.33 837 } 838 839 /* 4 */ 840 { 841 "_id" : "NZ", 842 "count" : 5.0, 843 175.8, 844 -40.33 845 } 846 847 /* 5 */ 848 { 849 "_id" : "DE", 850 "count" : 5.0, 851 10.8, 852 50.33 853 } 854 855 /* 6 */ 856 { 857 "_id" : "HK", 858 "count" : 5.0, 859 114, 860 22.33 861 } 862 863 /* 7 */ 864 { 865 "_id" : "RU", 866 "count" : 4.0, 867 38.4, 868 55.5 869 } 870 871 /* 8 */ 872 { 873 "_id" : "JP", 874 "count" : 3.0, 875 137.8, 876 36 877 } 878 879 /* 9 */ 880 { 881 "_id" : "GB", 882 "count" : 3.0, 883 -2, 884 53.33 885 } 886 887 /* 10 */ 888 { 889 "_id" : "CA", 890 "count" : 2.0, 891 -105.8, 892 55.33 893 } 894 895 /* 11 */ 896 { 897 "_id" : "FR", 898 "count" : 2.0, 899 3, 900 47.33 901 } 902 903 /* 12 */ 904 { 905 "_id" : "DK", 906 "count" : 2.0, 907 9.5, 908 55.33 909 } 910 911 /* 13 British Virgin Islands */ 912 { 913 "_id" : "VG", 914 "count" : 2.0, 915 -64.8, 916 18.35 917 } 918 919 /* 14 Ukraine */ 920 { 921 "_id" : "UA", 922 "count" : 1.0, 923 31.5, 924 48.5 925 } 926 927 /* 15 */ 928 { 929 "_id" : "CZ", 930 "count" : 1.0, 931 16.2, 932 49.7 933 } 934 935 /* 16 Switzerland */ 936 { 937 "_id" : "CH", 938 "count" : 1.0, 939 8.5, 940 47 941 } 942 943 /* 17 Zuid-Afrika */ 944 { 945 "_id" : "ZA", 946 "count" : 1.0, 947 24.2, 948 -30.7 949 } 950 951 /* 18 */ 952 { 953 "_id" : "NL", 954 "count" : 1.0, 955 5.8, 956 52.33 957 } 958 959 /* 19 */ 960 { 961 "_id" : "KR", 962 "count" : 1.0, 963 127.8, 964 36.8 965 } 966 967 968 /** http://geojson.tools/ 969 970 971 { 972 "type": "MultiPoint", 973 "coordinates": [ 974 [ 975 -95.8, 976 40.33 977 ], 978 [ 979 135.8, 980 -25.33 981 ], 982 [ 983 100.8, 984 32.33 985 ], 986 [ 987 175.8, 988 -40.33 989 ], 990 [ 991 10.8, 992 50.33 993 ], 994 [ 995 10.8, 996 50.33 997 ], 998 [ 999 114, 1000 22.33 1001 ], 1002 [ 1003 38.4, 1004 55.5 1005 ], 1006 [ 1007 -2, 1008 53.33 1009 ], 1010 [ 1011 137.8, 1012 36 1013 ], 1014 [ 1015 -105.8, 1016 55.33 1017 ], 1018 [ 1019 3, 1020 47.33 1021 ], 1022 [ 1023 9.5, 1024 55.33 1025 ], 1026 [ 1027 -64.8, 1028 18.35 1029 ], 1030 [ 1031 31.5, 1032 48.5 1033 ], 1034 [ 1035 16.2, 1036 49.7 1037 ], 1038 [ 1039 8.5, 1040 47 1041 ], 1042 [ 1043 24.2, 1044 -30.7 1045 ], 1046 [ 1047 5.8, 1048 52.33 1049 ], 1050 [ 1051 127.8, 1052 36.8 1053 ] 1054 ] 1055 } 1056 1057 */
Note:
See TracChangeset
for help on using the changeset viewer.