source: other-projects/metadata-encoding/py/using-mongodb/mongoAnalysis.py@ 38773

Last change on this file since 38773 was 38773, checked in by jc550, 4 months ago

last batch of python to move

File size: 1.7 KB
Line 
1# function to display cool(?) statistics about the subtitle
2import datetime
3from dateutil.relativedelta import relativedelta
4from alive_progress import alive_bar
5from pymongo import MongoClient
6
7# 1. get minimum date
8# 2. get number of document that fulfill the criteria for each month starting from min month
9# 3. display on graph or put into csv for easy R implementation
10
11def main():
12 mClient = MongoClient("mongodb://localhost:27017/")
13 mDatabase = mClient["test"]
14 mCollection = mDatabase["crossref"]
15
16 getMinDateQuery = "asdas"
17
18 minimumDateQuery = mCollection.find().sort("dateCreated").limit(1)
19 sortedCollectionQuery = mCollection.find().sort("dateCreated")
20 countQuery = mCollection.estimated_document_count()
21
22
23 minimumDate = minimumDateQuery[0]["dateCreated"]
24
25 #print("done with process")
26 #print(found)
27
28 #print(found[0]["dateCreated"])
29
30 dTMinimumDate = datetime.datetime.strptime(minimumDate, "%Y-%m-%dT%H:%M:%SZ")
31 print(dTMinimumDate)
32
33 statistics = []
34 statsToAdd = [0]
35 currentDate = dTMinimumDate + relativedelta(months = 1)
36
37 with alive_bar(countQuery) as bar:
38 for item in sortedCollectionQuery:
39 itemDate = item["dateCreated"]
40 if itemDate <= currentDate:
41 if "subtitle" in item.keys():
42 statsToAdd[0] = statsToAdd[0] + 1
43 else:
44 statistics.append(statsToAdd)
45 statsToAdd = []
46 currentDate = currentDate + relativedelta(months = 1)
47 bar()
48
49 #if not past a month past current month:
50 #add to array
51 #else
52 #add array to statistics
53 #increase currentdate by a month
54
55main()
Note: See TracBrowser for help on using the repository browser.