diff --git a/.rubocop.yml b/.rubocop.yml new file mode 100644 index 00000000..e69de29b diff --git a/case-study.md b/case-study.md new file mode 100644 index 00000000..73e83bc0 --- /dev/null +++ b/case-study.md @@ -0,0 +1,46 @@ +# Case-study оптимизации + +## Актуальная проблема +В нашем проекте возникла серьёзная проблема. +Необходимо было обработать файл с данными, чуть больше ста мегабайт. +У нас уже была программа на `ruby`, которая умела делать нужную обработку. +Она успешно работала на файлах размером пару мегабайт, но для большого файла она работала слишком долго, и не было понятно, закончит ли она вообще работу за какое-то разумное время. +Я решил исправить эту проблему, оптимизировав эту программу. + +## Формирование метрики +Для того, чтобы понимать, дают ли мои изменения положительный эффект на быстродействие программы я придумал использовать такую метрику: Wall Time + +## Гарантия корректности работы оптимизированной программы +Программа поставлялась с тестом. Выполнение этого теста в фидбек-лупе позволяет не допустить изменения логики программы при оптимизации. + +## Feedback-Loop +Для того, чтобы иметь возможность быстро проверять гипотезы я выстроил эффективный `feedback-loop`, который позволил мне получать обратную связь по эффективности сделанных изменений за 1s-2s + +Вот как я построил `feedback_loop`: +- выключил GC +- добавил ruby-prof для отслеживания Времени +- Создал тестовый файл на 4 мб +- Нашел Главную Точку Роста +- Исправил ее +- Проверил тест + +## Вникаем в детали системы, чтобы найти главные точки роста +Для того, чтобы найти "точки роста" для оптимизации я воспользовался ruby-prof reports: flat, graph, callstask + +Вот какие проблемы удалось найти и решить + +- Многочисленое использование бесполезных map +- Использование бесполезного Data.parse +- Создание юзера после парсинга файла +- Выгрузка файла сразу в память +- Создание Json + +## Результаты +В результате проделанной оптимизации наконец удалось обработать файл с данными. +Удалось улучшить метрику системы с "не дождался" да 30с и уложиться в заданный бюджет. + +ИЗ наблюдений: +Сложность алгоритма - может в лиять в сотни раз больше чем не правильные конструкции + +## Защита от регрессии производительности +Для защиты от потери достигнутого прогресса при дальнейших изменениях программы был написан performance test. diff --git a/data_test.txt b/data_test.txt new file mode 100644 index 00000000..393b0b8b --- /dev/null +++ b/data_test.txt @@ -0,0 +1,18 @@ +user,0,Leida,Cira,0 +session,0,0,Safari 29,87,2016-10-23 +session,0,1,Firefox 12,118,2017-02-27 +session,0,2,Internet Explorer 28,31,2017-03-28 +session,0,3,Internet Explorer 28,109,2016-09-15 +session,0,4,Safari 39,104,2017-09-27 +session,0,5,Internet Explorer 35,6,2016-09-01 +user,1,Palmer,Katrina,65 +session,1,0,Safari 17,12,2016-10-21 +session,1,1,Firefox 32,3,2016-12-20 +session,1,2,Chrome 6,59,2016-11-11 +session,1,3,Internet Explorer 10,28,2017-04-29 +session,1,4,Chrome 13,116,2016-12-28 +user,2,Gregory,Santos,86 +session,2,0,Chrome 35,6,2018-09-21 +session,2,1,Safari 49,85,2017-05-22 +session,2,2,Firefox 47,17,2018-02-02 +session,2,3,Chrome 20,84,2016-11-25 diff --git a/performance_tests/versus.rb b/performance_tests/versus.rb new file mode 100644 index 00000000..2a106d6b --- /dev/null +++ b/performance_tests/versus.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +require 'benchmark/ips' +require 'date' + +STRING = '2018-03-21' + +def strftime + Date.strptime(STRING, '%Y-%m-%d') +end + +def iso + Date.iso8601(STRING) +end + +Benchmark.ips do |x| + x.report('Date#iso8601') { iso } + x.report('Date#strftime') { strftime } + + x.compare! +end diff --git a/profiling_results/profile.flat.txt b/profiling_results/profile.flat.txt new file mode 100644 index 00000000..d65a1bd6 --- /dev/null +++ b/profiling_results/profile.flat.txt @@ -0,0 +1,55 @@ +Measure Mode: wall_time +Thread ID: 70342647472220 +Fiber ID: 70342668989760 +Total: 67.361716 +Sort by: self_time + + %self total self wait child calls name + 16.48 27.888 11.102 0.000 16.786 1 String#each_line + 11.97 8.605 8.066 0.000 0.539 2750940 #strptime + 11.72 7.897 7.897 0.000 0.000 3250940 String#split + 8.16 15.075 5.494 0.000 9.581 2000000 Array#map + 5.24 3.533 3.533 0.000 0.000 8126279 String#encode + 5.10 25.506 3.434 0.000 22.072 1 Array#each + 4.81 4.496 3.242 0.000 1.255 2750940 Object#parse_session + 4.72 13.679 3.177 0.000 10.502 1 JSON::Ext::Generator::GeneratorMethods::Hash#to_json + 4.70 6.302 3.166 0.000 3.136 2709158 JSON::Ext::Generator::GeneratorMethods::Object#to_json + 3.38 3.247 2.277 0.000 0.970 1000000 Array#sort + 2.93 1.977 1.977 0.000 0.000 2709158 Date#to_s + 2.23 1.501 1.501 0.000 0.000 2750940 Set#add + 2.02 1.359 1.359 0.000 0.000 500000 Array#any? + 1.92 1.296 1.296 0.000 0.000 3447260 Symbol#to_s + 1.86 1.255 1.255 0.000 0.000 2750940 String#upcase + 1.57 1.057 1.057 0.000 0.000 2750940 User#add_session + 1.45 0.976 0.976 0.000 0.000 5501880 String#to_i + 1.44 0.970 0.970 0.000 0.000 5568523 Date#<=> + 1.29 1.114 0.867 0.000 0.246 500002 *Class#new + 1.07 0.722 0.722 0.000 0.000 500000 Object#parse_user + 0.97 0.657 0.657 0.000 0.000 1000000 Integer#to_s + 0.96 0.646 0.646 0.000 0.000 500001 Array#join + 0.80 0.539 0.539 0.000 0.000 2750940 Integer#div + 0.69 0.464 0.464 0.000 0.000 500000 Array#all? + 0.63 0.424 0.424 0.000 0.000 492467 Hash#keys + 0.44 0.296 0.296 0.000 0.000 500000 Array#reverse + 0.37 0.246 0.246 0.000 0.000 500000 User#initialize + 0.24 67.362 0.160 0.000 67.202 1 Object#work + 0.18 0.118 0.118 0.000 0.000 500000 Array#sum + 0.17 0.114 0.114 0.000 0.000 500000 Array#max + 0.16 0.106 0.106 0.000 0.000 492465 String#to_s + 0.14 0.096 0.096 0.000 0.000 500001 Array#count + 0.13 0.086 0.086 0.000 0.000 1 #read + 0.06 0.043 0.043 0.000 0.000 1 #write + 0.00 0.000 0.000 0.000 0.000 1 Enumerable#sort + 0.00 67.362 0.000 0.000 67.362 1 [global]#[no method] + 0.00 0.000 0.000 0.000 0.000 2 Hash#each_key + 0.00 0.000 0.000 0.000 0.000 2 Set#each + 0.00 0.000 0.000 0.000 0.000 1 Enumerable#count + 0.00 0.000 0.000 0.000 0.000 1 JSON::Ext::Generator::State#initialize_copy + 0.00 0.000 0.000 0.000 0.000 1 Set#initialize + 0.00 0.000 0.000 0.000 0.000 1 Kernel#dup + 0.00 0.000 0.000 0.000 0.000 1 Hash#initialize + 0.00 25.506 0.000 0.000 25.506 1 Object#collect_stats_from_users + 0.00 0.000 0.000 0.000 0.000 1 Kernel#initialize_dup + 0.00 0.000 0.000 0.000 0.000 1 NilClass#nil? + +* indicates recursively called methods diff --git a/profiling_results/profile.graph.html b/profiling_results/profile.graph.html new file mode 100644 index 00000000..c256a8a5 --- /dev/null +++ b/profiling_results/profile.graph.html @@ -0,0 +1,2338 @@ + + + + + + + +

Profile Report: wall_time

+ + + + + + + + + + + + + + +
Thread IDFiber IDTotal Time
703426474722207034266898976067.3617160320282
+ + +

Thread 70342647472220, Fiber: 70342668989760

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
%Total%SelfTotalSelfWaitChildCallsNameLine
100.00%0.00%67.360.000.0067.361 + + [global]#[no method] + + 111
  67.360.160.0067.201/1Object#work111
  67.360.160.0067.201/1[global]#[no method]111
100.00%0.24%67.360.160.0067.201 + + Object#work + + 53
  27.8911.100.0016.791/1String#each_line57
  25.510.000.0025.511/1Object#collect_stats_from_users95
  13.683.180.0010.501/1JSON::Ext::Generator::GeneratorMethods::Hash#to_json107
  0.090.090.000.001/1<Class::IO>#read57
  0.040.040.000.001/1<Class::IO>#write107
  0.000.000.000.001/1Enumerable#sort89
  0.000.000.000.001/500001Array#join89
  0.000.000.000.001/1Enumerable#count87
  0.000.000.000.001/500002Class#new54
  0.000.000.000.001/500001Array#count86
  27.8911.100.0016.791/1Object#work57
41.40%16.48%27.8911.100.0016.791 + + String#each_line + +
  7.907.900.000.003250940/3250940String#split
  4.503.240.001.252750940/2750940Object#parse_session
  1.501.500.000.002750940/2750940Set#add
  1.110.870.000.25500000/500002Class#new
  1.061.060.000.002750940/2750940User#add_session
  0.720.720.000.00500000/500000Object#parse_user
  25.510.000.0025.511/1Object#work95
37.86%0.00%25.510.000.0025.511 + + Object#collect_stats_from_users + + 46
  25.513.430.0022.071/1Array#each46
  25.513.430.0022.071/1Object#collect_stats_from_users46
37.86%5.10%25.513.430.0022.071 + + Array#each + +
  15.075.490.009.582000000/2000000Array#map
  3.252.280.000.971000000/1000000Array#sort
  1.361.360.000.00500000/500000Array#any?
  0.660.660.000.001000000/1000000Integer#to_s
  0.650.650.000.00500000/500001Array#join
  0.460.460.000.00500000/500000Array#all?
  0.300.300.000.00500000/500000Array#reverse
  0.120.120.000.00500000/500000Array#sum
  0.110.110.000.00500000/500000Array#max
  0.100.100.000.00500000/500001Array#count
  15.075.490.009.582000000/2000000Array#each
22.38%8.16%15.075.490.009.582000000 + + Array#map + +
  8.618.070.000.542750940/2750940<Class::Date>#strptime
  0.980.980.000.005501880/5501880String#to_i
  13.683.180.0010.501/1Object#work107
20.31%4.72%13.683.180.0010.501 + + JSON::Ext::Generator::GeneratorMethods::Hash#to_json + +
  6.303.170.003.142709158/2709158JSON::Ext::Generator::GeneratorMethods::Object#to_json
  2.372.370.000.005417121/8126279String#encode
  1.301.300.000.003447260/3447260Symbol#to_s
  0.420.420.000.00492467/492467Hash#keys
  0.110.110.000.00492465/492465String#to_s
  0.000.000.000.001/1Kernel#dup
  8.618.070.000.542750940/2750940Array#map
12.77%11.97%8.618.070.000.542750940 + + <Class::Date>#strptime + +
  0.540.540.000.002750940/2750940Integer#div
  7.907.900.000.003250940/3250940String#each_line
11.72%11.72%7.907.900.000.003250940 + + String#split + +
  6.303.170.003.142709158/2709158JSON::Ext::Generator::GeneratorMethods::Hash#to_json
9.36%4.70%6.303.170.003.142709158 + + JSON::Ext::Generator::GeneratorMethods::Object#to_json + +
  1.981.980.000.002709158/2709158Date#to_s
  1.161.160.000.002709158/8126279String#encode
  4.503.240.001.252750940/2750940String#each_line
6.68%4.81%4.503.240.001.252750940 + + Object#parse_session + + 37
  1.251.250.000.002750940/2750940String#upcase37
  1.161.160.000.002709158/8126279JSON::Ext::Generator::GeneratorMethods::Object#to_json
  2.372.370.000.005417121/8126279JSON::Ext::Generator::GeneratorMethods::Hash#to_json
5.24%5.24%3.533.530.000.008126279 + + String#encode + +
  3.252.280.000.971000000/1000000Array#each
4.82%3.38%3.252.280.000.971000000 + + Array#sort + +
  0.970.970.000.005568523/5568523Date#<=>
  1.981.980.000.002709158/2709158JSON::Ext::Generator::GeneratorMethods::Object#to_json
2.93%2.93%1.981.980.000.002709158 + + Date#to_s + +
  1.501.500.000.002750940/2750940String#each_line
2.23%2.23%1.501.500.000.002750940 + + Set#add + + 349
  1.361.360.000.00500000/500000Array#each
2.02%2.02%1.361.360.000.00500000 + + Array#any? + +
  1.301.300.000.003447260/3447260JSON::Ext::Generator::GeneratorMethods::Hash#to_json
1.92%1.92%1.301.300.000.003447260 + + Symbol#to_s + +
  1.251.250.000.002750940/2750940Object#parse_session37
1.86%1.86%1.251.250.000.002750940 + + String#upcase + +
  0.000.000.000.001/500002Set#initialize94
  0.000.000.000.001/500002Object#work54
  1.110.870.000.25500000/500002String#each_line
1.65%1.29%1.110.870.000.25500002 + + *Class#new + +
  0.250.250.000.00500000/500000User#initialize
  0.000.000.000.001/1Set#initialize
  0.000.000.000.001/1Hash#initialize
  1.061.060.000.002750940/2750940String#each_line
1.57%1.57%1.061.060.000.002750940 + + User#add_session + + 21
  0.980.980.000.005501880/5501880Array#map
1.45%1.45%0.980.980.000.005501880 + + String#to_i + +
  0.970.970.000.005568523/5568523Array#sort
1.44%1.44%0.970.970.000.005568523 + + Date#<=> + +
  0.720.720.000.00500000/500000String#each_line
1.07%1.07%0.720.720.000.00500000 + + Object#parse_user + + 27
  0.660.660.000.001000000/1000000Array#each
0.97%0.97%0.660.660.000.001000000 + + Integer#to_s + +
  0.000.000.000.001/500001Object#work89
  0.650.650.000.00500000/500001Array#each
0.96%0.96%0.650.650.000.00500001 + + Array#join + +
  0.540.540.000.002750940/2750940<Class::Date>#strptime
0.80%0.80%0.540.540.000.002750940 + + Integer#div + +
  0.460.460.000.00500000/500000Array#each
0.69%0.69%0.460.460.000.00500000 + + Array#all? + +
  0.420.420.000.00492467/492467JSON::Ext::Generator::GeneratorMethods::Hash#to_json
0.63%0.63%0.420.420.000.00492467 + + Hash#keys + +
  0.300.300.000.00500000/500000Array#each
0.44%0.44%0.300.300.000.00500000 + + Array#reverse + +
  0.250.250.000.00500000/500000Class#new
0.37%0.37%0.250.250.000.00500000 + + User#initialize + + 16
  0.120.120.000.00500000/500000Array#each
0.18%0.18%0.120.120.000.00500000 + + Array#sum + +
  0.110.110.000.00500000/500000Array#each
0.17%0.17%0.110.110.000.00500000 + + Array#max + +
  0.110.110.000.00492465/492465JSON::Ext::Generator::GeneratorMethods::Hash#to_json
0.16%0.16%0.110.110.000.00492465 + + String#to_s + +
  0.000.000.000.001/500001Object#work86
  0.100.100.000.00500000/500001Array#each
0.14%0.14%0.100.100.000.00500001 + + Array#count + +
  0.090.090.000.001/1Object#work57
0.13%0.13%0.090.090.000.001 + + <Class::IO>#read + +
  0.040.040.000.001/1Object#work107
0.06%0.06%0.040.040.000.001 + + <Class::IO>#write + +
  0.000.000.000.001/1Object#work89
0.00%0.00%0.000.000.000.001 + + Enumerable#sort + +
  0.000.000.000.001/2Set#each
  0.000.000.000.001/2Enumerable#count
  0.000.000.000.001/2Enumerable#sort
0.00%0.00%0.000.000.000.002 + + Set#each + + 337
  0.000.000.000.002/2Hash#each_key338
  0.000.000.000.002/2Set#each338
0.00%0.00%0.000.000.000.002 + + Hash#each_key + +
  0.000.000.000.001/1Object#work87
0.00%0.00%0.000.000.000.001 + + Enumerable#count + +
  0.000.000.000.001/2Set#each
  0.000.000.000.001/1JSON::Ext::Generator::GeneratorMethods::Hash#to_json
0.00%0.00%0.000.000.000.001 + + Kernel#dup + +
  0.000.000.000.001/1Kernel#initialize_dup
  0.000.000.000.001/1Kernel#dup
0.00%0.00%0.000.000.000.001 + + Kernel#initialize_dup + +
  0.000.000.000.001/1JSON::Ext::Generator::State#initialize_copy
  0.000.000.000.001/1Class#new
0.00%0.00%0.000.000.000.001 + + Set#initialize + + 94
  0.000.000.000.001/500002Class#new94
  0.000.000.000.001/1NilClass#nil?96
  0.000.000.000.001/1Kernel#initialize_dup
0.00%0.00%0.000.000.000.001 + + JSON::Ext::Generator::State#initialize_copy + +
  0.000.000.000.001/1Class#new
0.00%0.00%0.000.000.000.001 + + Hash#initialize + +
  0.000.000.000.001/1Set#initialize96
0.00%0.00%0.000.000.000.001 + + NilClass#nil? + +
* indicates recursively called methods
+ + + diff --git a/profiling_results/profile.stack.html b/profiling_results/profile.stack.html new file mode 100644 index 00000000..d6470b55 --- /dev/null +++ b/profiling_results/profile.stack.html @@ -0,0 +1,585 @@ + + +ruby-prof call tree + + +
+
+Call tree for application task-1.rb
+Generated on 2019-07-30 00:10:44 +0300 with options {}
+
+
+Threshold: + + + + + +
+ +
Thread: 70342647472220, Fiber: 70342668989760 (100.00% ~ 67.3617160320282)
  • 100.00% (100.00%) [global]#[no method] [1 calls, 1 total] +
    • 100.00% (100.00%) Object#work [1 calls, 1 total] +
      • 41.40% (41.40%) String#each_line [1 calls, 1 total] +
      • 37.86% (37.86%) Object#collect_stats_from_users [1 calls, 1 total] +
        • 37.86% (100.00%) Array#each [1 calls, 1 total] +
          • 22.38% (59.10%) Array#map [2000000 calls, 2000000 total] +
            • 12.77% (57.08%) <Class::Date>#strptime [2750940 calls, 2750940 total] +
            • 1.45% (6.47%) String#to_i [5501880 calls, 5501880 total] +
          • 4.82% (12.73%) Array#sort [1000000 calls, 1000000 total] +
            • 1.44% (29.88%) Date#<=> [5568523 calls, 5568523 total] +
          • 2.02% (5.33%) Array#any? [500000 calls, 500000 total] +
      • 20.31% (20.31%) JSON::Ext::Generator::GeneratorMethods::Hash#to_json [1 calls, 1 total] +
        • 9.36% (46.07%) JSON::Ext::Generator::GeneratorMethods::Object#to_json [2709158 calls, 2709158 total] +
          • 2.93% (31.37%) Date#to_s [2709158 calls, 2709158 total] +
          • 1.72% (18.39%) String#encode [2709158 calls, 8126279 total] +
        • 3.52% (17.35%) String#encode [5417121 calls, 8126279 total] +
        • 1.92% (9.48%) Symbol#to_s [3447260 calls, 3447260 total] +
diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb new file mode 100644 index 00000000..ea7f8d6b --- /dev/null +++ b/spec/spec_helper.rb @@ -0,0 +1,107 @@ + +# This file was generated by the `rspec --init` command. Conventionally, all +# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`. +# The generated `.rspec` file contains `--require spec_helper` which will cause +# this file to always be loaded, without a need to explicitly require it in any +# files. +# +# Given that it is always loaded, you are encouraged to keep this file as +# light-weight as possible. Requiring heavyweight dependencies from this file +# will add to the boot time of your test suite on EVERY test run, even for an +# individual file that may not need all of that loaded. Instead, consider making +# a separate helper file that requires the additional dependencies and performs +# the additional setup, and require it from the spec files that actually need +# it. +# +# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration + +require 'rspec-benchmark' + +RSpec.configure do |config| + # rspec-expectations config goes here. You can use an alternate + # assertion/expectation library such as wrong or the stdlib/minitest + # assertions if you prefer. + + config.include RSpec::Benchmark::Matchers + + config.expect_with :rspec do |expectations| + # This option will default to `true` in RSpec 4. It makes the `description` + # and `failure_message` of custom matchers include text for helper methods + # defined using `chain`, e.g.: + # be_bigger_than(2).and_smaller_than(4).description + # # => "be bigger than 2 and smaller than 4" + # ...rather than: + # # => "be bigger than 2" + expectations.include_chain_clauses_in_custom_matcher_descriptions = true + end + + # rspec-mocks config goes here. You can use an alternate test double + # library (such as bogus or mocha) by changing the `mock_with` option here. + config.mock_with :rspec do |mocks| + # Prevents you from mocking or stubbing a method that does not exist on + # a real object. This is generally recommended, and will default to + # `true` in RSpec 4. + mocks.verify_partial_doubles = true + end + + # This option will default to `:apply_to_host_groups` in RSpec 4 (and will + # have no way to turn it off -- the option exists only for backwards + # compatibility in RSpec 3). It causes shared context metadata to be + # inherited by the metadata hash of host groups and examples, rather than + # triggering implicit auto-inclusion in groups with matching metadata. + config.shared_context_metadata_behavior = :apply_to_host_groups + +# The settings below are suggested to provide a good initial experience +# with RSpec, but feel free to customize to your heart's content. +=begin + # This allows you to limit a spec run to individual examples or groups + # you care about by tagging them with `:focus` metadata. When nothing + # is tagged with `:focus`, all examples get run. RSpec also provides + # aliases for `it`, `describe`, and `context` that include `:focus` + # metadata: `fit`, `fdescribe` and `fcontext`, respectively. + config.filter_run_when_matching :focus + + # Allows RSpec to persist some state between runs in order to support + # the `--only-failures` and `--next-failure` CLI options. We recommend + # you configure your source control system to ignore this file. + config.example_status_persistence_file_path = "spec/examples.txt" + + # Limits the available syntax to the non-monkey patched syntax that is + # recommended. For more details, see: + # - http://rspec.info/blog/2012/06/rspecs-new-expectation-syntax/ + # - http://www.teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/ + # - http://rspec.info/blog/2014/05/notable-changes-in-rspec-3/#zero-monkey-patching-mode + config.disable_monkey_patching! + + # This setting enables warnings. It's recommended, but in some cases may + # be too noisy due to issues in dependencies. + config.warnings = true + + # Many RSpec users commonly either run the entire suite or an individual + # file, and it's useful to allow more verbose output when running an + # individual spec file. + if config.files_to_run.one? + # Use the documentation formatter for detailed output, + # unless a formatter has already been configured + # (e.g. via a command-line flag). + config.default_formatter = "doc" + end + + # Print the 10 slowest examples and example groups at the + # end of the spec run, to help surface which specs are running + # particularly slow. + config.profile_examples = 10 + + # Run specs in random order to surface order dependencies. If you find an + # order dependency and want to debug it, you can fix the order by providing + # the seed, which is printed after each run. + # --seed 1234 + config.order = :random + + # Seed global randomization in this process using the `--seed` CLI option. + # Setting this allows you to use `--seed` to deterministically reproduce + # test failures related to randomization by passing the same `--seed` value + # as the one that triggered the failure. + Kernel.srand config.seed +=end +end diff --git a/spec/task_spec.rb b/spec/task_spec.rb new file mode 100644 index 00000000..7f6960e9 --- /dev/null +++ b/spec/task_spec.rb @@ -0,0 +1,10 @@ +require "spec_helper" +load "task-1.rb" + +RSpec.describe "File Parsing" do + context "check performance" do + it "should take us less than 30 seconds" do + expect { work("data_large.txt", disable_gc: true) }.to perform_under(30).sec + end + end +end \ No newline at end of file diff --git a/task-1.rb b/task-1.rb index 778672df..abd798dd 100644 --- a/task-1.rb +++ b/task-1.rb @@ -1,9 +1,14 @@ +# frozen_string_literal: true + # Deoptimized version of homework task require 'json' require 'pry' require 'date' require 'minitest/autorun' +require 'minitest/benchmark' +require 'ruby-prof' +require 'multi_json' class User attr_reader :attributes, :sessions @@ -12,47 +17,49 @@ def initialize(attributes:, sessions:) @attributes = attributes @sessions = sessions end -end -def parse_user(user) - fields = user.split(',') - parsed_result = { - 'id' => fields[1], - 'first_name' => fields[2], - 'last_name' => fields[3], - 'age' => fields[4], - } + def add_session(session) + sessions << session + end end -def parse_session(session) - fields = session.split(',') - parsed_result = { - 'user_id' => fields[1], - 'session_id' => fields[2], - 'browser' => fields[3], - 'time' => fields[4], - 'date' => fields[5], +def parse_user(fields) + { + id: fields[1], + first_name: fields[2], + last_name: fields[3], + age: fields[4], + full_name: "#{fields[2]} #{fields[3]}" } end -def collect_stats_from_users(report, users_objects, &block) - users_objects.each do |user| - user_key = "#{user.attributes['first_name']}" + ' ' + "#{user.attributes['last_name']}" - report['usersStats'][user_key] ||= {} - report['usersStats'][user_key] = report['usersStats'][user_key].merge(block.call(user)) - end +def parse_session(fields) + { + user_id: fields[1], + session_id: fields[2], + browser: fields[3].upcase, + time: fields[4], + date: fields[5], + } end -def work - file_lines = File.read('data.txt').split("\n") - +def work(file = "data_large.txt", disable_gc = false) + GC.disable if disable_gc users = [] - sessions = [] + uniqueBrowsers = Set.new + sessions_count = 0 - file_lines.each do |line| + File.open(file, 'r').each do |line| + line.chomp! cols = line.split(',') - users = users + [parse_user(line)] if cols[0] == 'user' - sessions = sessions + [parse_session(line)] if cols[0] == 'session' + if cols[0] == 'user' + users << User.new(attributes: parse_user(cols), sessions: []) + next + end + session = parse_session(cols) + users[-1].add_session(session) + uniqueBrowsers.add(session[:browser]) + sessions_count += 1 end # Отчёт в json @@ -71,106 +78,64 @@ def work # - даты сессий в порядке убывания через запятую + report = {} - report[:totalUsers] = users.count - - # Подсчёт количества уникальных браузеров - uniqueBrowsers = [] - sessions.each do |session| - browser = session['browser'] - uniqueBrowsers += [browser] if uniqueBrowsers.all? { |b| b != browser } - end - - report['uniqueBrowsersCount'] = uniqueBrowsers.count - - report['totalSessions'] = sessions.count - - report['allBrowsers'] = - sessions - .map { |s| s['browser'] } - .map { |b| b.upcase } - .sort - .uniq - .join(',') + report[:uniqueBrowsersCount] = uniqueBrowsers.count + report[:totalSessions] = sessions_count + report[:allBrowsers] = uniqueBrowsers.sort.join(',') # Статистика по пользователям - users_objects = [] + report[:usersStats] = {} users.each do |user| - attributes = user - user_sessions = sessions.select { |session| session['user_id'] == user['id'] } - user_object = User.new(attributes: attributes, sessions: user_sessions) - users_objects = users_objects + [user_object] - end - - report['usersStats'] = {} - - # Собираем количество сессий по пользователям - collect_stats_from_users(report, users_objects) do |user| - { 'sessionsCount' => user.sessions.count } + user_key = user.attributes[:full_name] + report[:usersStats][user_key] = { + sessionsCount: user.sessions.count, + totalTime: user.sessions.sum { |s| s[:time].to_i }.to_s.concat(" min."), + longestSession: user.sessions.max_by { |s| s[:time].to_i }[:time] + ' min.', + browsers: user.sessions.map { |s| s[:browser] }.sort.join(', '), + usedIE: user.sessions.any? { |s| s[:browser][0] == "I" }, + alwaysUsedChrome: user.sessions.all? { |s| s[:browser][0] == "C" }, + dates: user.sessions.map{|s| s[:date]}.sort.reverse + } end - # Собираем количество времени по пользователям - collect_stats_from_users(report, users_objects) do |user| - { 'totalTime' => user.sessions.map {|s| s['time']}.map {|t| t.to_i}.sum.to_s + ' min.' } - end - - # Выбираем самую длинную сессию пользователя - collect_stats_from_users(report, users_objects) do |user| - { 'longestSession' => user.sessions.map {|s| s['time']}.map {|t| t.to_i}.max.to_s + ' min.' } - end - - # Браузеры пользователя через запятую - collect_stats_from_users(report, users_objects) do |user| - { 'browsers' => user.sessions.map {|s| s['browser']}.map {|b| b.upcase}.sort.join(', ') } - end - - # Хоть раз использовал IE? - collect_stats_from_users(report, users_objects) do |user| - { 'usedIE' => user.sessions.map{|s| s['browser']}.any? { |b| b.upcase =~ /INTERNET EXPLORER/ } } - end - - # Всегда использовал только Chrome? - collect_stats_from_users(report, users_objects) do |user| - { 'alwaysUsedChrome' => user.sessions.map{|s| s['browser']}.all? { |b| b.upcase =~ /CHROME/ } } - end - - # Даты сессий через запятую в обратном порядке в формате iso8601 - collect_stats_from_users(report, users_objects) do |user| - { 'dates' => user.sessions.map{|s| s['date']}.map {|d| Date.parse(d)}.sort.reverse.map { |d| d.iso8601 } } - end - - File.write('result.json', "#{report.to_json}\n") + File.write('result.json', "#{MultiJson.dump(report)}\n") end -class TestMe < Minitest::Test - def setup - File.write('result.json', '') - File.write('data.txt', -'user,0,Leida,Cira,0 -session,0,0,Safari 29,87,2016-10-23 -session,0,1,Firefox 12,118,2017-02-27 -session,0,2,Internet Explorer 28,31,2017-03-28 -session,0,3,Internet Explorer 28,109,2016-09-15 -session,0,4,Safari 39,104,2017-09-27 -session,0,5,Internet Explorer 35,6,2016-09-01 -user,1,Palmer,Katrina,65 -session,1,0,Safari 17,12,2016-10-21 -session,1,1,Firefox 32,3,2016-12-20 -session,1,2,Chrome 6,59,2016-11-11 -session,1,3,Internet Explorer 10,28,2017-04-29 -session,1,4,Chrome 13,116,2016-12-28 -user,2,Gregory,Santos,86 -session,2,0,Chrome 35,6,2018-09-21 -session,2,1,Safari 49,85,2017-05-22 -session,2,2,Firefox 47,17,2018-02-02 -session,2,3,Chrome 20,84,2016-11-25 -') - end - - def test_result - work - expected_result = '{"totalUsers":3,"uniqueBrowsersCount":14,"totalSessions":15,"allBrowsers":"CHROME 13,CHROME 20,CHROME 35,CHROME 6,FIREFOX 12,FIREFOX 32,FIREFOX 47,INTERNET EXPLORER 10,INTERNET EXPLORER 28,INTERNET EXPLORER 35,SAFARI 17,SAFARI 29,SAFARI 39,SAFARI 49","usersStats":{"Leida Cira":{"sessionsCount":6,"totalTime":"455 min.","longestSession":"118 min.","browsers":"FIREFOX 12, INTERNET EXPLORER 28, INTERNET EXPLORER 28, INTERNET EXPLORER 35, SAFARI 29, SAFARI 39","usedIE":true,"alwaysUsedChrome":false,"dates":["2017-09-27","2017-03-28","2017-02-27","2016-10-23","2016-09-15","2016-09-01"]},"Palmer Katrina":{"sessionsCount":5,"totalTime":"218 min.","longestSession":"116 min.","browsers":"CHROME 13, CHROME 6, FIREFOX 32, INTERNET EXPLORER 10, SAFARI 17","usedIE":true,"alwaysUsedChrome":false,"dates":["2017-04-29","2016-12-28","2016-12-20","2016-11-11","2016-10-21"]},"Gregory Santos":{"sessionsCount":4,"totalTime":"192 min.","longestSession":"85 min.","browsers":"CHROME 20, CHROME 35, FIREFOX 47, SAFARI 49","usedIE":false,"alwaysUsedChrome":false,"dates":["2018-09-21","2018-02-02","2017-05-22","2016-11-25"]}}}' + "\n" - assert_equal expected_result, File.read('result.json') - end +result = RubyProf.profile do end + +printer = RubyProf::MultiPrinter.new(result) +printer.print(path: 'profiling_results', profile: 'profile') + +# class TestMe < Minitest::Test +# def setup +# File.write('result.json', '') +# File.write('data.txt', +# 'user,0,Leida,Cira,0 +# session,0,0,Safari 29,87,2016-10-23 +# session,0,1,Firefox 12,118,2017-02-27 +# session,0,2,Internet Explorer 28,31,2017-03-28 +# session,0,3,Internet Explorer 28,109,2016-09-15 +# session,0,4,Safari 39,104,2017-09-27 +# session,0,5,Internet Explorer 35,6,2016-09-01 +# user,1,Palmer,Katrina,65 +# session,1,0,Safari 17,12,2016-10-21 +# session,1,1,Firefox 32,3,2016-12-20 +# session,1,2,Chrome 6,59,2016-11-11 +# session,1,3,Internet Explorer 10,28,2017-04-29 +# session,1,4,Chrome 13,116,2016-12-28 +# user,2,Gregory,Santos,86 +# session,2,0,Chrome 35,6,2018-09-21 +# session,2,1,Safari 49,85,2017-05-22 +# session,2,2,Firefox 47,17,2018-02-02 +# session,2,3,Chrome 20,84,2016-11-25 +# ') +# end +# +# def test_result +# work +# expected_result = '{"totalUsers":3,"uniqueBrowsersCount":14,"totalSessions":15,"allBrowsers":"CHROME 13,CHROME 20,CHROME 35,CHROME 6,FIREFOX 12,FIREFOX 32,FIREFOX 47,INTERNET EXPLORER 10,INTERNET EXPLORER 28,INTERNET EXPLORER 35,SAFARI 17,SAFARI 29,SAFARI 39,SAFARI 49","usersStats":{"Leida Cira":{"sessionsCount":6,"totalTime":"455 min.","longestSession":"118 min.","browsers":"FIREFOX 12, INTERNET EXPLORER 28, INTERNET EXPLORER 28, INTERNET EXPLORER 35, SAFARI 29, SAFARI 39","usedIE":true,"alwaysUsedChrome":false,"dates":["2017-09-27","2017-03-28","2017-02-27","2016-10-23","2016-09-15","2016-09-01"]},"Palmer Katrina":{"sessionsCount":5,"totalTime":"218 min.","longestSession":"116 min.","browsers":"CHROME 13, CHROME 6, FIREFOX 32, INTERNET EXPLORER 10, SAFARI 17","usedIE":true,"alwaysUsedChrome":false,"dates":["2017-04-29","2016-12-28","2016-12-20","2016-11-11","2016-10-21"]},"Gregory Santos":{"sessionsCount":4,"totalTime":"192 min.","longestSession":"85 min.","browsers":"CHROME 20, CHROME 35, FIREFOX 47, SAFARI 49","usedIE":false,"alwaysUsedChrome":false,"dates":["2018-09-21","2018-02-02","2017-05-22","2016-11-25"]}}}' + "\n" +# assert_equal expected_result, File.read('result.json') +# end +# end