From 50b186311efa903b47b8cad5053b050bd2704a59 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?F=C3=A1bi=C3=A1n=20Gergely?= <gergely.fabian@moveoneinc.com>
Date: Thu, 8 Mar 2012 21:20:29 +0100
Subject: [PATCH] Git repo parsing optimization without db query grouping

This is a test version.
---
 app/models/repository/git.rb |   49 ++++++++++-------------------------------
 1 files changed, 12 insertions(+), 37 deletions(-)

diff --git a/app/models/repository/git.rb b/app/models/repository/git.rb
index 933b5e3..f875378 100644
--- a/app/models/repository/git.rb
+++ b/app/models/repository/git.rb
@@ -156,10 +156,10 @@ class Repository::Git < Repository
       from_scmid = nil
       from_scmid = h["branches"][br]["last_scmid"] if h["branches"][br]
       h["branches"][br] ||= {}
-
+      
       revisions = scm.revisions('', from_scmid, br, {:reverse => true})
       next if revisions.blank?
-
+      
       # Remember the last commit id here, before we start removing revisions from the array.
       # We'll do that for optimization, but it also means, that we may lose even all revisions.
       last_revision  = revisions.last
@@ -170,47 +170,22 @@ class Repository::Git < Repository
       # (this equals to a union, because we executed diff above)
       all_revisions += revisions.map{|r| r.scmid}
 
-      # Make the search for existing revisions in the database in a more sufficient manner
-      # This is replacing the one-after-one queries.
-      # Find all revisions, that are in the database, and then remove them from the revision array.
-      # Then later we won't need any conditions for db existence.
-      # Query for several revisions at once, and remove them from the revisions array, if they are there.
-      # Do this in chunks, to avoid eventual memory problems (in case of tens of thousands of commits).
-      # If there are no revisions (because the original code's algoritm filtered them),
-      # then this part will be stepped over.
-      # We make queries, just if there is any revision.
-      limit = 100
-      offset = 0
-      revisions_copy = revisions.clone # revisions will change
-      while offset < revisions_copy.size
-        recent_changesets_slice = changesets.find(
-                                     :all,
-                                     :conditions => [
-                                        'scmid IN (?)',
-                                        revisions_copy.slice(offset, limit).map{|x| x.scmid}
-                                      ]
-                                    )
-        # Subtract revisions that redmine already knows about
-        recent_revisions = recent_changesets_slice.map{|c| c.scmid}
-        revisions.reject!{|r| recent_revisions.include?(r.scmid)}
-        offset += limit
-      end
-
       revisions.each do |rev|
-        transaction do
-          # There is no search in the db for this revision, because above we ensured,
-          # that it's not in the db.
-          db_saved_rev = save_revision(rev)
-          parents = {}
-          parents[db_saved_rev] = rev.parents unless rev.parents.nil?
-          parents.each do |ch, chparents|
-            ch.parents = chparents.collect{|rp| find_changeset_by_name(rp)}.compact
+        db_rev = find_changeset_by_name(rev.revision)
+        if db_rev.nil?
+          transaction do
+            db_saved_rev = save_revision(rev)
+            parents = {}
+            parents[db_saved_rev] = rev.parents unless rev.parents.nil?
+            parents.each do |ch, chparents|
+              ch.parents = chparents.collect{|rp| find_changeset_by_name(rp)}.compact
+            end
           end
           # saving the last scmid was moved from here, because we won't come in here,
           # if the revision was already added for another branch
         end
       end
-
+      
       # save the data about the last revision for this branch
       unless last_revision.nil?
         h["branches"][br]["last_scmid"] = last_revision.scmid
-- 
1.7.4.1

