CoCalc -- bb_pipeline_all_together(3).ipynb

Project: Test

Path: bb_pipeline_all_together(3).ipynb

Views: ¹⁰⁵

Kernel: Python 3 (Ubuntu Linux)

In [0]:

# Pipeline for analyzing 2018 Konstanz data, including stitching the images together before processing them.

In [16]:


# going to copy this out, so that each process is on one GPU.  Will need a total of 8 copies (2 per GPU) 

# all of these folders are on kn-crec18

# when the first one finishes (.162, GPU CUDA 1), I'll begin working on the outdoor videos, on kn-crec21
# outdoor videos, start00 = 1.1TB, start05 = 1.7TB

In [70]:

# import sys
# !{sys.executable} -m pip install -U imgstore # Update imgstore...

Collecting imgstore
  Downloading https://files.pythonhosted.org/packages/c5/e1/8055c143a027a445033a48e4dc2865155c8cdadfb3294a02b9a16ec91f67/imgstore-0.2.0-py2.py3-none-any.whl (897kB)
    100% |████████████████████████████████| 901kB 1.6MB/s eta 0:00:01
Collecting pandas (from imgstore)
  Using cached https://files.pythonhosted.org/packages/e1/d8/feeb346d41f181e83fba45224ab14a8d8af019b48af742e047f3845d8cff/pandas-0.23.4-cp36-cp36m-manylinux1_x86_64.whl
Collecting pyyaml (from imgstore)
Collecting tzlocal (from imgstore)
Collecting python-dateutil (from imgstore)
  Downloading https://files.pythonhosted.org/packages/74/68/d87d9b36af36f44254a8d512cbfc48369103a3b9e474be9bdfe536abfc45/python_dateutil-2.7.5-py2.py3-none-any.whl (225kB)
    100% |████████████████████████████████| 235kB 3.4MB/s eta 0:00:01
Collecting numpy (from imgstore)
  Using cached https://files.pythonhosted.org/packages/ff/7f/9d804d2348471c67a7d8b5f84f9bc59fd1cefa148986f2b74552f8573555/numpy-1.15.4-cp36-cp36m-manylinux1_x86_64.whl
Collecting pytz (from imgstore)
  Downloading https://files.pythonhosted.org/packages/f8/0e/2365ddc010afb3d79147f1dd544e5ee24bf4ece58ab99b16fbb465ce6dc0/pytz-2018.7-py2.py3-none-any.whl (506kB)
    100% |████████████████████████████████| 512kB 2.4MB/s eta 0:00:01
Collecting six>=1.5 (from python-dateutil->imgstore)
  Downloading https://files.pythonhosted.org/packages/73/fb/00a976f728d0d1fecfe898238ce23f502a721c0ac0ecfedb80e0d88c64e9/six-1.12.0-py2.py3-none-any.whl
Installing collected packages: numpy, six, python-dateutil, pytz, pandas, pyyaml, tzlocal, imgstore
Successfully installed imgstore-0.2.1 numpy-1.15.4 pandas-0.23.4 python-dateutil-2.7.5 pytz-2018.7 pyyaml-3.13 six-1.12.0 tzlocal-1.5.1

In [0]:


# Have two workstations (.162 and .163)
# Each workstation has two GPU's (0 and 1)
# Each GPU can handle two videos at a time
# Each video is now actually going to be two videos stitched together

import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0" # Change here whether you're on GPU 0 or GPU 1

In [13]:


# Import everything you'll need.  
# May need to add extra bits to this, because there's the "untagged bee" part that's been added to the pipeline... 

from pipeline import Pipeline
from pipeline.objects import Filename, Positions, Saliencies, IDs, Image, CrownOverlay, FinalResultOverlay, PipelineResult
from pipeline.pipeline import get_auto_config

from pipeline.io import BBBinaryRepoSink
from bb_binary import DataSource, Repository

import glob
import imgstore
import numpy as np
import cv2
import datetime
import matplotlib.pyplot as plt
from imgstore import new_for_filename
import os
from tqdm import trange
from cv2 import resize

In [37]:


# Get the stitching matrices onto the workstation (should be on workstation .162, need to add to .163)

# How to load the warp matrices: 
im_warp_matrices = np.load('../im_warp_matrices.npz') # What's the name for workstation .162, instead of using ../?

# Also load the crop_area_metrics:
crop_area_metrics = np.load('../crop_area_metrics.npz')

In [38]:

# Can view the matrices if you want to make sure they're correct:
im_warp_matrices['im_warped_220w219_1']

In [0]:

# And the crop metrics: 
crop_area_metrics['crop_area_220w219_1']

In [0]:

# Ben's stuff starts here

In [0]:

# Instead of having a which_to_stitch class and a frame_index_offset class I think
# it's easier to have one class containing all the information we need for one pair
# of cameras in one time interval

# Define a class that will let you know which stitching matrix to use for which sets of cameras in which recordings

class Job:
  def __init__(self, cam_top, cam_bottom, startXX, 
               im_warp_matrix, crop_area_metric, offset_top, offset_bottom):
    self.cam_top = cam_top
    self.cam_bottom = cam_bottom
    self.startXX = startXX
    self.im_warp_matrix = im_warp_matrix
    self.crop_area_metric = crop_area_metric
    self.offset_top = offset_top
    self.offset_bottom = offset_bottom
    
    # which is the top camera (220 or 680)
    # which is the bottom camera (219 or 674)
    # which recording is it coming from (start00 - start09)
    # which stitching matrix should you use
    # which crop area metrics you should use (metric, not matrix)
    # how much offset there should be for each of the cameras (1 = it should skip the first image frame, 0, and start at frame 1)

In [0]:


job220_219_00 = Job(220, 219, "start00", im_warp_matrices['im_warped_220w219_1'], crop_area_metrics['crop_area_220w219_1'], 1, 0)
job220_219_01 = Job(220, 219, "start01", im_warp_matrices['im_warped_220w219_1'], crop_area_metrics['crop_area_220w219_1'], 0, 0)
job220_219_02 = Job(220, 219, "start02", im_warp_matrices['im_warped_220w219_2'], crop_area_metrics['crop_area_220w219_2'], 0, 0)
job220_219_03 = Job(220, 219, "start03", im_warp_matrices['im_warped_220w219_3'], crop_area_metrics['crop_area_220w219_3'], 0, 1)
job220_219_04 = Job(220, 219, "start04", im_warp_matrices['im_warped_220w219_3'], crop_area_metrics['crop_area_220w219_3'], 0, 0)
job220_219_05 = Job(220, 219, "start05", im_warp_matrices['im_warped_220w219_3'], crop_area_metrics['crop_area_220w219_3'], 0, 0)
job220_219_06 = Job(220, 219, "start06", im_warp_matrices['im_warped_220w219_3'], crop_area_metrics['crop_area_220w219_3'], 0, 0)
job220_219_07 = Job(220, 219, "start07", im_warp_matrices['im_warped_220w219_3'], crop_area_metrics['crop_area_220w219_3'], 0, 0)
job220_219_08 = Job(220, 219, "start08", im_warp_matrices['im_warped_220w219_3'], crop_area_metrics['crop_area_220w219_3'], 0, 1)
job220_219_09 = Job(220, 219, "start09", im_warp_matrices['im_warped_220w219_3'], crop_area_metrics['crop_area_220w219_3'], 0, 0)

job680_674_00 = Job(680, 674, "start00", im_warp_matrices['im_warped_680w674'], crop_area_metrics['crop_area_680w674'], 1, 0)
job680_674_01 = Job(680, 674, "start01", im_warp_matrices['im_warped_680w674'], crop_area_metrics['crop_area_680w674'], 0, 0)
job680_674_02 = Job(680, 674, "start02", im_warp_matrices['im_warped_680w674'], crop_area_metrics['crop_area_680w674'], 0, 0)
job680_674_03 = Job(680, 674, "start03", im_warp_matrices['im_warped_680w674'], crop_area_metrics['crop_area_680w674'], 0, 0)
job680_674_04 = Job(680, 674, "start04", im_warp_matrices['im_warped_680w674'], crop_area_metrics['crop_area_680w674'], 0, 0)
job680_674_05 = Job(680, 674, "start05", im_warp_matrices['im_warped_680w674'], crop_area_metrics['crop_area_680w674'], 0, 0)
job680_674_06 = Job(680, 674, "start06", im_warp_matrices['im_warped_680w674'], crop_area_metrics['crop_area_680w674'], 0, 0)
job680_674_07 = Job(680, 674, "start07", im_warp_matrices['im_warped_680w674'], crop_area_metrics['crop_area_680w674'], 1, 0)
job680_674_08 = Job(680, 674, "start08", im_warp_matrices['im_warped_680w674'], crop_area_metrics['crop_area_680w674'], 0, 0)
job680_674_09 = Job(680, 674, "start09", im_warp_matrices['im_warped_680w674'], crop_area_metrics['crop_area_680w674'], 0, 0)

In [0]:

jobs = {
    0: [job220_219_00, job680_674_00],
    1: [job220_219_01, job680_674_01],
    2: [job220_219_02, job680_674_02],
    3: [job220_219_03, job680_674_03],
    4: [job220_219_04, job680_674_04, job680_674_08],
    5: [job680_674_05, job680_674_09],
    6: [job220_219_05, job220_219_06, job680_674_06, job220_219_08],
    7: [job220_219_07, job680_674_07, job220_219_09]
}

# workstation .162, GPU CUDA 0 (A), = start00 (220_219) and (680_674)
# workstation .162, GPU CUDA 0 (B), = start01 (220_219) and (680_674)
# workstation .162, GPU CUDA 1 (A), = start02 (220_219) and (680_674)
# workstation .162, GPU CUDA 1 (B), = start03 (220_219) and (680_674)
# workstation .163, GPU CUDA 0 (A), = start04 (220_219) and (680_674), plus start08 (680_674)
# workstation .163, GPU CUDA 0 (B), = start05 (680_674) only, plus start09 (680_674)
# workstation .163, GPU CUDA 1 (A), = start06 (220_219) and (680_674), plus start05 (220_219), and plus start08 (220_219)
# workstation .163, GPU CUDA 1 (B), = start07 (220_219) and (680_674), plus start09 (220_219)

# We have an A and a B for each workstation/GPU combination, because each GPU can run two pipelines simultaneously.  
# Note, I moved half of start05 (220_219) to job id 6, because job id 5 has to deal with 680_674_09, which is the biggest one

In [0]:

process_id = 0 # one process id for each pair of workstation and gpu
# Change process_id based on which workstation, cuda, A/B you're working with... 

# i.e. 
# workstation162 + cuda0 + A = process_id 0
# workstation162 + cuda0 + B = process_id 1
# workstation162 + cuda1 + A = process_id 2
# workstation162 + cuda1 + B = process_id 3

# workstation163 + cuda0 + A = process_id 4
# workstation163 + cuda0 + B = process_id 5
# workstation163 + cuda1 + A = process_id 6
# workstation163 + cuda1 + B = process_id 7

# There's no real reason it's A and B, just that each GPU can handle two pipelines at a time.

In [0]:

repo = Repository('/home/msmith/repo_process_{}'.format(process_id)) # can pass any path here, even the same one for each script
# This will be on the workstation, so we'll have some of the repo on workstation .162, some on .163
# I wonder... would it be better to call this based on the job instead of the process id?  
# Ie, 'job220_219_00' gives us more info than just 'process_id = 1'
# Should I be making a folder or something to distinguish this? 
# ('repo') technically already exists on the workstation from an early test

In [0]:

# Ben TODO: install new pipeline on michaels workstations

In [0]:

pipeline = Pipeline([Image],  # inputs
                    [PipelineResult],  # outputs
                    **get_auto_config())

In [0]:

camera_keys = {674:0, 680:0, 219:1, 220:1}

In [0]:

# Ben: this can be added to the top part where all the other imports are, just putting it here so that you see it

from scipy.spatial import cKDTree

In [0]:

for job in jobs[process_id]:
    cam_id = camera_keys[job.cam_top]
    assert(camera_keys[job.cam_top] == camera_keys[job.cam_bottom])
    
    repo = Repository('/home/msmith/repo_process_{}'.format(job.startXX)) # can pass any path here, even the same one for each script
    
    # MLS ADDED SINK HERE... 
    # Ben: we already create a sink in the main loop every 1000 frames, no need to also do it here I think
    #sink = BBBinaryRepoSink(repo, cam_id) # using the cam_id defined above.  
    
    # load all imgstore paths for job.startXX
    path = os.path.join('/mount/zfs-kn/recordings/kn-crec18', job.startXX, job.startXX+"*")
    stores = sorted(glob.glob(path))
    
    # determine which imgstores correspond to job.cam_top and job.cam_bottom
    top_store_path = [s for s in stores if s.endswith(str(job.cam_top))][0]
    bottom_store_path = [s for s in stores if s.endswith(str(job.cam_bottom))][0]

    # load imgstores
    top_store = new_for_filename(top_store_path+'/metadata.yaml')
    bottom_store = new_for_filename(bottom_store_path+'/metadata.yaml')
    
    top_metadata = top_store.get_frame_metadata()
    top_store_timestamps = top_metadata['frame_time']

    bottom_metadata = bottom_store.get_frame_metadata()
    bottom_store_timestamps = np.array(bottom_metadata['frame_time'])
    bottom_store_timestamps_kd = cKDTree(bottom_store_timestamps[:, None], compact_nodes=True, balanced_tree=False)
        
    # prepare BBBinaryRepoSink for data storage
    data_source = DataSource.new_message(filename=stores[3]) # is this correct?  stores[3], doesn't that mean the third item in stores, not the last three digits in the string that is stores?
    # Nope, thats not correct. Not sure what you want to use here. Maybe top_store_path + bottom_store_path?
        
    # iterate over pairs of images from the two imgstores
    for idx in trange(top_store.frame_count):
        if idx % 1000 == 0:
            if idx > 0:
                sink.finish()
            sink = BBBinaryRepoSink(repo, cam_id)
        
        i0, (frame_number_0, frame_timestamp_0) = top_store.get_next_image()
        
        #frame_index_nearest = np.argmin(abs(bottom_store_timestamps - frame_timestamp_0)) # MLS says: ok, so this is what's different?
        _, frame_index_nearest = bottom_store_timestamps_kd.query([frame_timestamp_0])
        #i1, (frame_number_1, frame_timestamp_1) = bottom_store.get_next_image()
        i1, (frame_number_1, frame_timestamp_1) = bottom_store.get_image(frame_number=None, frame_index=frame_index_nearest)
        
        # call the correct warping matrix (h), and the correct cropping area (crop_area) 
        h = job.im_warp_matrix
        crop_area = job.crop_area_metric
        
        # stitch images
        im_warped = cv2.warpAffine(i1, h, (crop_area[2], crop_area[3]))
        im_warped[0:i0.shape[0], 0:i0.shape[1]] = i0
    
        # process stitched image
        results = pipeline([im_warped])
        
        # prepare results for storage in bb_binary repo
        sink.add_frame(data_source, results, frame_timestamp_0) # MLS added "_0" to the frame_timestamp
        # I'm thinking that adding frame_timestamp_0 will just add timestamp for one of the two images, 
        # but that should be fine because it can represent both images stitched?  Ok to do that?

In [0]:

In [0]:

In [0]:

# Send notebook from laptop to workstation: 
# scp -r /Users/michaelsmith/Desktop/bb_pipeline_all_together_3.ipynb [email protected]:/home/msmith/pipeline
# have to remove the (3) because it doesn't like that, change to: _3 at the end of the file name...