source: other-projects/metadata-encoding/py/using-mongodb/mongoAnalysis.py@ 38791

Last change on this file since 38791 was 38791, checked in by jc550, 4 months ago

add comments adding context to functions that require it

File size: 1.9 KB
Line 
1# function to display cool(?) statistics about the subtitle
2
3import datetime
4from dateutil.relativedelta import relativedelta
5from alive_progress import alive_bar
6from pymongo import MongoClient
7
8# 1. get minimum date
9# 2. get number of document that fulfill the criteria for each month starting from min month
10# 3. display on graph or put into csv for easy R implementation
11
12def main():
13 # Connect to database
14 mClient = MongoClient("mongodb://localhost:27017/")
15 mDatabase = mClient["test"]
16 mCollection = mDatabase["crossref"]
17
18 # Get minimum date
19 minimumDateQuery = mCollection.find().sort("dateCreated").limit(1)
20 # Get documents sorted by date
21 sortedCollectionQuery = mCollection.find().sort("dateCreated")
22 # And the count
23 countQuery = mCollection.estimated_document_count()
24
25
26 minimumDate = minimumDateQuery[0]["dateCreated"]
27
28 #print("done with process")
29 #print(found)
30
31 #print(found[0]["dateCreated"])
32
33 dTMinimumDate = datetime.datetime.strptime(minimumDate, "%Y-%m-%dT%H:%M:%SZ")
34 print(dTMinimumDate)
35
36 statistics = []
37 statsToAdd = [0]
38 currentDate = dTMinimumDate + relativedelta(months = 1)
39
40 with alive_bar(countQuery) as bar:
41 for item in sortedCollectionQuery:
42 itemDate = item["dateCreated"]
43 if itemDate <= currentDate:
44 if "subtitle" in item.keys():
45 statsToAdd[0] = statsToAdd[0] + 1
46 else:
47 statistics.append(statsToAdd)
48 statsToAdd = []
49 currentDate = currentDate + relativedelta(months = 1)
50 bar()
51
52 #if not past a month past current month:
53 #add to array
54 #else
55 #add array to statistics
56 #increase currentdate by a month
57
58 # currently unfinished. Need to finish the statistics insertion part of the method, and actually do something with that data.
59
60main()
Note: See TracBrowser for help on using the repository browser.