Project

General

Profile

Patch #10470 » 0002-Process-new-git-revisions-all-at-once-rather-than-pe.patch

Batch processes all new revisions for all branches in a single pass - Jeremy Bopp, 2012-03-16 21:16

View differences:

app/models/repository/git.rb
79 79
    scm.tags
80 80
  end
81 81

  
82
  def heads
83
    h = {}
84
    scm.heads.each do |revision, branch|
85
      h[branch] = { 'last_scmid' => revision }
86
    end
87
    h
88
  end
89

  
82 90
  def default_branch
83 91
    scm.default_branch
84 92
  rescue Exception => e
......
99 107
                options = {:report_last_commit => extra_report_last_commit})
100 108
  end
101 109

  
102
  # With SCMs that have a sequential commit numbering,
103
  # such as Subversion and Mercurial,
104
  # Redmine is able to be clever and only fetch changesets
105
  # going forward from the most recent one it knows about.
110
  # With SCMs that have a sequential commit numbering, such as Subversion and
111
  # Mercurial, Redmine is able to be clever and only fetch changesets going
112
  # forward from the most recent one it knows about.
106 113
  #
107 114
  # However, Git does not have a sequential commit numbering.
108 115
  #
109
  # In order to fetch only new adding revisions,
110
  # Redmine needs to parse revisions per branch.
111
  # Branch "last_scmid" is for this requirement.
116
  # In order to fetch only new revisions, Redmine needs to parse revisions
117
  # introduced by the new heads of new and existing branches since the last time
118
  # the branch heads were processed.  Branch "last_scmid" is for this
119
  # requirement.
112 120
  #
113 121
  # In Git and Mercurial, revisions are not in date order.
114 122
  # Redmine Mercurial fixed issues.
......
118 126
  #      http://www.redmine.org/issues/3567
119 127
  #
120 128
  # Database revision column is text, so Redmine can not sort by revision.
121
  # Mercurial has revision number, and revision number guarantees revision order.
122
  # Redmine Mercurial model stored revisions ordered by database id to database.
123
  # So, Redmine Mercurial model can use correct ordering revisions.
129
  # Mercurial has revision number, and revision number guarantees revision
130
  # order.  Redmine Mercurial model stored revisions ordered by database id to
131
  # database.  So, Redmine Mercurial model can use correct ordering revisions.
124 132
  #
125
  # Redmine Mercurial adapter uses "hg log -r 0:tip --limit 10"
126
  # to get limited revisions from old to new.
127
  # But, Git 1.7.3.4 does not support --reverse with -n or --skip.
133
  # Redmine Mercurial adapter uses "hg log -r 0:tip --limit 10" to get limited
134
  # revisions from old to new.  But, Git 1.7.3.4 does not support --reverse with
135
  # -n or --skip.
128 136
  #
129 137
  # The repository can still be fully reloaded by calling #clear_changesets
130 138
  # before fetching changesets (eg. for offline resync)
131 139
  def fetch_changesets
132
    scm_brs = branches
133
    return if scm_brs.nil? || scm_brs.empty?
134
    h1 = extra_info || {}
135
    h  = h1.dup
136
    h["branches"]       ||= {}
137
    h["db_consistent"]  ||= {}
138
    if changesets.count == 0
139
      h["db_consistent"]["ordering"] = 1
140
      merge_extra_info(h)
141
      self.save
142
    elsif ! h["db_consistent"].has_key?("ordering")
143
      h["db_consistent"]["ordering"] = 0
140
    transaction do
141
      scm_heads = heads
142
      return if scm_heads.nil? || scm_heads.empty?
143
      h1 = extra_info || {}
144
      h  = h1.dup
145
      h["branches"]       ||= {}
146
      h["db_consistent"]  ||= {}
147
      if changesets.count == 0
148
        h["db_consistent"]["ordering"] = 1
149
      elsif ! h["db_consistent"].has_key?("ordering")
150
        h["db_consistent"]["ordering"] = 0
151
      end
152

  
153
      begin
154
        save_revisions(h["branches"], scm_heads)
155
      rescue Redmine::Scm::Adapters::CommandFailed
156
        raise ActiveRecord::Rollback, $!.message
157
      end
158

  
159
      h["branches"] = scm_heads
144 160
      merge_extra_info(h)
145 161
      self.save
146 162
    end
147
    save_revisions(h, scm_brs)
148 163
  end
149 164

  
150
  def save_revisions(h, scm_brs)
151
    # Remember what revisions we already processed (in any branches)
152
    all_revisions = []
153
    scm_brs.each do |br1|
154
      br = br1.to_s
155
      last_revision = nil
156
      from_scmid = nil
157
      from_scmid = h["branches"][br]["last_scmid"] if h["branches"][br]
158
      h["branches"][br] ||= {}
159

  
160
      revisions = scm.revisions('', from_scmid, br, {:reverse => true})
161
      next if revisions.blank?
162

  
163
      # Remember the last commit id here, before we start removing revisions from the array.
164
      # We'll do that for optimization, but it also means, that we may lose even all revisions.
165
      last_revision  = revisions.last
166

  
167
      # remove revisions that we have already processed (possibly in other branches)
168
      revisions.reject!{|r| all_revisions.include?(r.scmid)}
169
      # add revisions that we are to parse now to 'all processed revisions'
170
      # (this equals to a union, because we executed diff above)
171
      all_revisions += revisions.map{|r| r.scmid}
172

  
173
      # Make the search for existing revisions in the database in a more sufficient manner
174
      # This is replacing the one-after-one queries.
175
      # Find all revisions, that are in the database, and then remove them from the revision array.
176
      # Then later we won't need any conditions for db existence.
177
      # Query for several revisions at once, and remove them from the revisions array, if they are there.
178
      # Do this in chunks, to avoid eventual memory problems (in case of tens of thousands of commits).
179
      # If there are no revisions (because the original code's algoritm filtered them),
180
      # then this part will be stepped over.
181
      # We make queries, just if there is any revision.
182
      limit = 100
183
      offset = 0
184
      revisions_copy = revisions.clone # revisions will change
185
      while offset < revisions_copy.size
186
        recent_changesets_slice = changesets.find(
187
                                     :all,
188
                                     :conditions => [
189
                                        'scmid IN (?)',
190
                                        revisions_copy.slice(offset, limit).map{|x| x.scmid}
191
                                      ]
192
                                    )
193
        # Subtract revisions that redmine already knows about
194
        recent_revisions = recent_changesets_slice.map{|c| c.scmid}
195
        revisions.reject!{|r| recent_revisions.include?(r.scmid)}
196
        offset += limit
197
      end
198

  
199
      revisions.each do |rev|
200
        transaction do
201
          # There is no search in the db for this revision, because above we ensured,
202
          # that it's not in the db.
203
          db_saved_rev = save_revision(rev)
204
          parents = {}
205
          parents[db_saved_rev] = rev.parents unless rev.parents.nil?
206
          parents.each do |ch, chparents|
207
            ch.parents = chparents.collect{|rp| find_changeset_by_name(rp)}.compact
208
          end
209
          # saving the last scmid was moved from here, because we won't come in here,
210
          # if the revision was already added for another branch
211
        end
212
      end
213

  
214
      # save the data about the last revision for this branch
215
      unless last_revision.nil?
216
        h["branches"][br]["last_scmid"] = last_revision.scmid
217
        merge_extra_info(h)
218
        self.save
219
      end
165
  def save_revisions(old_scm_heads, new_scm_heads)
166
    scm.revisions(
167
      '', nil, nil,
168
      :reverse => true,
169
      :includes => heads_from_branches_hash(new_scm_heads),
170
      :excludes => heads_from_branches_hash(old_scm_heads)
171
    ) do |revision|
172
      save_revision(revision)
220 173
    end
221 174
  end
222 175
  private :save_revisions
223 176

  
224 177
  def save_revision(rev)
178
    parents = (rev.parents || []).collect{|rp| find_changeset_by_name(rp)}.compact
225 179
    changeset = Changeset.new(
226 180
              :repository   => self,
227 181
              :revision     => rev.identifier,
228 182
              :scmid        => rev.scmid,
229 183
              :committer    => rev.author,
230 184
              :committed_on => rev.time,
231
              :comments     => rev.message
185
              :comments     => rev.message,
186
              :parents      => parents
232 187
              )
233 188
    if changeset.save
234 189
      rev.paths.each do |file|
......
242 197
  end
243 198
  private :save_revision
244 199

  
245
  def heads_from_branches_hash
246
    h1 = extra_info || {}
247
    h  = h1.dup
248
    h["branches"] ||= {}
249
    h['branches'].map{|br, hs| hs['last_scmid']}
200
  def heads_from_branches_hash(branches)
201
    branches.map{|br, hs| hs['last_scmid']}
250 202
  end
251 203

  
252 204
  def latest_changesets(path,rev,limit=10)
lib/redmine/scm/adapters/git_adapter.rb
107 107
          nil
108 108
        end
109 109

  
110
        def heads
111
          return @heads if @heads
112
          cmd_args = %w|show-ref --heads|
113
          @heads = scm_cmd(cmd_args) do |io|
114
            io.map do |line|
115
              line.chomp.match('(.*?)\s+refs/heads/(.*)')[1, 2]
116
            end.sort
117
          end
118
        rescue ScmCommandAborted
119
          []
120
        end
121

  
110 122
        def default_branch
111 123
          bras = self.branches
112 124
          return nil if bras.nil?
test/unit/lib/redmine/scm/adapters/git_adapter_test.rb
105 105
            ], @adapter.tags
106 106
      end
107 107

  
108
      def test_heads
109
        assert_equal  [
110
              ["1ca7f5ed374f3cb31a93ae5215c2e25cc6ec5127", "latin-1-path-encoding"],
111
              ["2a682156a3b6e77a8bf9cd4590e8db757f3c6c78", "issue-8857"],
112
              ["67e7792ce20ccae2e4bb73eed09bb397819c8834", "test-latin-1"],
113
              ["83ca5fd546063a3c7dc2e568ba3355661a9e2b2c", "master"],
114
              ["83ca5fd546063a3c7dc2e568ba3355661a9e2b2c", "master-20120212"],
115
              ["fba357b886984ee71185ad2065e65fc0417d9b92", "test_branch"]
116
            ], @adapter.heads
117
      end
118

  
119

  
108 120
      def test_revisions_master_all
109 121
        revs1 = []
110 122
        @adapter.revisions('', nil, "master",{}) do |rev|
test/unit/repository_git_test.rb
146 146
      h = @repository.extra_info.dup
147 147
      h["branches"]["master"]["last_scmid"] =
148 148
            "4a07fe31bffcf2888791f3e6cbc9c4545cefe3e8"
149
      h["branches"]["master-20120212"]["last_scmid"] =
150
            "4a07fe31bffcf2888791f3e6cbc9c4545cefe3e8"
149 151
      @repository.merge_extra_info(h)
150 152
      @repository.save
151 153
      @project.reload
152 154
      extra_info_db_1 = @repository.extra_info["branches"]
153 155
      assert_equal "4a07fe31bffcf2888791f3e6cbc9c4545cefe3e8",
154 156
                    extra_info_db_1["master"]["last_scmid"]
157
      assert_equal "4a07fe31bffcf2888791f3e6cbc9c4545cefe3e8",
158
                    extra_info_db_1["master-20120212"]["last_scmid"]
155 159

  
156 160
      @repository.fetch_changesets
157 161
      @project.reload
......
264 268
      h = @repository.extra_info.dup
265 269
      h["branches"]["master"]["last_scmid"] =
266 270
            "4a07fe31bffcf2888791f3e6cbc9c4545cefe3e8"
271
      h["branches"]["master-20120212"]["last_scmid"] =
272
            "4a07fe31bffcf2888791f3e6cbc9c4545cefe3e8"
267 273
      @repository.merge_extra_info(h)
268 274
      @repository.save
269 275
      @project.reload
270 276
      extra_info_db_1 = @repository.extra_info["branches"]
271 277
      assert_equal "4a07fe31bffcf2888791f3e6cbc9c4545cefe3e8",
272 278
                    extra_info_db_1["master"]["last_scmid"]
279
      assert_equal "4a07fe31bffcf2888791f3e6cbc9c4545cefe3e8",
280
                    extra_info_db_1["master-20120212"]["last_scmid"]
273 281

  
274 282
      @repository.fetch_changesets
275 283
      assert_equal NUM_REV, @repository.changesets.count
......
277 285
    end
278 286

  
279 287
    def test_heads_from_branches_hash
280
      assert_nil @repository.extra_info
281
      assert_equal 0, @repository.changesets.count
282
      assert_equal [], @repository.heads_from_branches_hash
283 288
      h = {}
284
      h["branches"] = {}
285
      h["branches"]["test1"] = {}
286
      h["branches"]["test1"]["last_scmid"] = "1234abcd"
287
      h["branches"]["test2"] = {}
288
      h["branches"]["test2"]["last_scmid"] = "abcd1234"
289
      @repository.merge_extra_info(h)
290
      @repository.save
291
      @project.reload
292
      assert_equal ["1234abcd", "abcd1234"], @repository.heads_from_branches_hash.sort
289
      assert_equal [], @repository.heads_from_branches_hash(h)
290
      h["test1"] = {}
291
      h["test1"]["last_scmid"] = "1234abcd"
292
      h["test2"] = {}
293
      h["test2"]["last_scmid"] = "abcd1234"
294
      assert_equal ["1234abcd", "abcd1234"], @repository.heads_from_branches_hash(h).sort
293 295
    end
294 296

  
295 297
    def test_latest_changesets
......
511 513
      @repository.fetch_changesets
512 514
      @project.reload
513 515
      assert_equal NUM_REV, @repository.changesets.count
514
      %w|95488a44bc25f7d1f97d775a31359539ff333a63 95488a44b|.each do |r1|
516
      %w|7234cb2750b63f47bff735edc50a1c0a433c2518 7234cb27|.each do |r1|
515 517
        changeset = @repository.find_changeset_by_name(r1)
516 518
        assert_nil changeset.previous
517 519
      end
......
535 537
      @repository.fetch_changesets
536 538
      @project.reload
537 539
      assert_equal NUM_REV, @repository.changesets.count
538
      %w|67e7792ce20ccae2e4bb73eed09bb397819c8834 67e7792ce20cca|.each do |r1|
540
      %w|2a682156a3b6e77a8bf9cd4590e8db757f3c6c78 2a682156|.each do |r1|
539 541
        changeset = @repository.find_changeset_by_name(r1)
540 542
        assert_nil changeset.next
541 543
      end
(2-2/2)