1 | # function to display cool(?) statistics about the subtitle
|
---|
2 |
|
---|
3 | import datetime
|
---|
4 | from dateutil.relativedelta import relativedelta
|
---|
5 | from alive_progress import alive_bar
|
---|
6 | from pymongo import MongoClient
|
---|
7 |
|
---|
8 | # 1. get minimum date
|
---|
9 | # 2. get number of document that fulfill the criteria for each month starting from min month
|
---|
10 | # 3. display on graph or put into csv for easy R implementation
|
---|
11 |
|
---|
12 | def main():
|
---|
13 | # Connect to database
|
---|
14 | mClient = MongoClient("mongodb://localhost:27017/")
|
---|
15 | mDatabase = mClient["test"]
|
---|
16 | mCollection = mDatabase["crossref"]
|
---|
17 |
|
---|
18 | # Get minimum date
|
---|
19 | minimumDateQuery = mCollection.find().sort("dateCreated").limit(1)
|
---|
20 | # Get documents sorted by date
|
---|
21 | sortedCollectionQuery = mCollection.find().sort("dateCreated")
|
---|
22 | # And the count
|
---|
23 | countQuery = mCollection.estimated_document_count()
|
---|
24 |
|
---|
25 |
|
---|
26 | minimumDate = minimumDateQuery[0]["dateCreated"]
|
---|
27 |
|
---|
28 | #print("done with process")
|
---|
29 | #print(found)
|
---|
30 |
|
---|
31 | #print(found[0]["dateCreated"])
|
---|
32 |
|
---|
33 | dTMinimumDate = datetime.datetime.strptime(minimumDate, "%Y-%m-%dT%H:%M:%SZ")
|
---|
34 | print(dTMinimumDate)
|
---|
35 |
|
---|
36 | statistics = []
|
---|
37 | statsToAdd = [0]
|
---|
38 | currentDate = dTMinimumDate + relativedelta(months = 1)
|
---|
39 |
|
---|
40 | with alive_bar(countQuery) as bar:
|
---|
41 | for item in sortedCollectionQuery:
|
---|
42 | itemDate = item["dateCreated"]
|
---|
43 | if itemDate <= currentDate:
|
---|
44 | if "subtitle" in item.keys():
|
---|
45 | statsToAdd[0] = statsToAdd[0] + 1
|
---|
46 | else:
|
---|
47 | statistics.append(statsToAdd)
|
---|
48 | statsToAdd = []
|
---|
49 | currentDate = currentDate + relativedelta(months = 1)
|
---|
50 | bar()
|
---|
51 |
|
---|
52 | #if not past a month past current month:
|
---|
53 | #add to array
|
---|
54 | #else
|
---|
55 | #add array to statistics
|
---|
56 | #increase currentdate by a month
|
---|
57 |
|
---|
58 | # currently unfinished. Need to finish the statistics insertion part of the method, and actually do something with that data.
|
---|
59 |
|
---|
60 | main() |
---|