CoCalc Shared Filesbb_pipeline_all_together(3).ipynbOpen in CoCalc with one click!
Authors: David D, Michael Smith, William A. Stein, Benjamin Wild
Views : 26
In [ ]:
# Pipeline for analyzing 2018 Konstanz data, including stitching the images together before processing them.
In [16]:
# going to copy this out, so that each process is on one GPU. Will need a total of 8 copies (2 per GPU) # all of these folders are on kn-crec18 # when the first one finishes (.162, GPU CUDA 1), I'll begin working on the outdoor videos, on kn-crec21 # outdoor videos, start00 = 1.1TB, start05 = 1.7TB
In [70]:
# import sys # !{sys.executable} -m pip install -U imgstore # Update imgstore...
Collecting imgstore Downloading https://files.pythonhosted.org/packages/c5/e1/8055c143a027a445033a48e4dc2865155c8cdadfb3294a02b9a16ec91f67/imgstore-0.2.0-py2.py3-none-any.whl (897kB) 100% |████████████████████████████████| 901kB 1.6MB/s eta 0:00:01 Collecting pandas (from imgstore) Using cached https://files.pythonhosted.org/packages/e1/d8/feeb346d41f181e83fba45224ab14a8d8af019b48af742e047f3845d8cff/pandas-0.23.4-cp36-cp36m-manylinux1_x86_64.whl Collecting pyyaml (from imgstore) Collecting tzlocal (from imgstore) Collecting python-dateutil (from imgstore) Downloading https://files.pythonhosted.org/packages/74/68/d87d9b36af36f44254a8d512cbfc48369103a3b9e474be9bdfe536abfc45/python_dateutil-2.7.5-py2.py3-none-any.whl (225kB) 100% |████████████████████████████████| 235kB 3.4MB/s eta 0:00:01 Collecting numpy (from imgstore) Using cached https://files.pythonhosted.org/packages/ff/7f/9d804d2348471c67a7d8b5f84f9bc59fd1cefa148986f2b74552f8573555/numpy-1.15.4-cp36-cp36m-manylinux1_x86_64.whl Collecting pytz (from imgstore) Downloading https://files.pythonhosted.org/packages/f8/0e/2365ddc010afb3d79147f1dd544e5ee24bf4ece58ab99b16fbb465ce6dc0/pytz-2018.7-py2.py3-none-any.whl (506kB) 100% |████████████████████████████████| 512kB 2.4MB/s eta 0:00:01 Collecting six>=1.5 (from python-dateutil->imgstore) Downloading https://files.pythonhosted.org/packages/73/fb/00a976f728d0d1fecfe898238ce23f502a721c0ac0ecfedb80e0d88c64e9/six-1.12.0-py2.py3-none-any.whl Installing collected packages: numpy, six, python-dateutil, pytz, pandas, pyyaml, tzlocal, imgstore Successfully installed imgstore-0.2.1 numpy-1.15.4 pandas-0.23.4 python-dateutil-2.7.5 pytz-2018.7 pyyaml-3.13 six-1.12.0 tzlocal-1.5.1
In [ ]:
# Have two workstations (.162 and .163) # Each workstation has two GPU's (0 and 1) # Each GPU can handle two videos at a time # Each video is now actually going to be two videos stitched together import os os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"]="0" # Change here whether you're on GPU 0 or GPU 1
In [13]:
# Import everything you'll need. # May need to add extra bits to this, because there's the "untagged bee" part that's been added to the pipeline... from pipeline import Pipeline from pipeline.objects import Filename, Positions, Saliencies, IDs, Image, CrownOverlay, FinalResultOverlay, PipelineResult from pipeline.pipeline import get_auto_config from pipeline.io import BBBinaryRepoSink from bb_binary import DataSource, Repository import glob import imgstore import numpy as np import cv2 import datetime import matplotlib.pyplot as plt from imgstore import new_for_filename import os from tqdm import trange from cv2 import resize
In [37]:
# Get the stitching matrices onto the workstation (should be on workstation .162, need to add to .163) # How to load the warp matrices: im_warp_matrices = np.load('../im_warp_matrices.npz') # What's the name for workstation .162, instead of using ../? # Also load the crop_area_metrics: crop_area_metrics = np.load('../crop_area_metrics.npz')
In [38]:
# Can view the matrices if you want to make sure they're correct: im_warp_matrices['im_warped_220w219_1']
In [ ]:
# And the crop metrics: crop_area_metrics['crop_area_220w219_1']
In [ ]:
# Ben's stuff starts here
In [ ]:
# Instead of having a which_to_stitch class and a frame_index_offset class I think # it's easier to have one class containing all the information we need for one pair # of cameras in one time interval # Define a class that will let you know which stitching matrix to use for which sets of cameras in which recordings class Job: def __init__(self, cam_top, cam_bottom, startXX, im_warp_matrix, crop_area_metric, offset_top, offset_bottom): self.cam_top = cam_top self.cam_bottom = cam_bottom self.startXX = startXX self.im_warp_matrix = im_warp_matrix self.crop_area_metric = crop_area_metric self.offset_top = offset_top self.offset_bottom = offset_bottom # which is the top camera (220 or 680) # which is the bottom camera (219 or 674) # which recording is it coming from (start00 - start09) # which stitching matrix should you use # which crop area metrics you should use (metric, not matrix) # how much offset there should be for each of the cameras (1 = it should skip the first image frame, 0, and start at frame 1)
In [ ]:
job220_219_00 = Job(220, 219, "start00", im_warp_matrices['im_warped_220w219_1'], crop_area_metrics['crop_area_220w219_1'], 1, 0) job220_219_01 = Job(220, 219, "start01", im_warp_matrices['im_warped_220w219_1'], crop_area_metrics['crop_area_220w219_1'], 0, 0) job220_219_02 = Job(220, 219, "start02", im_warp_matrices['im_warped_220w219_2'], crop_area_metrics['crop_area_220w219_2'], 0, 0) job220_219_03 = Job(220, 219, "start03", im_warp_matrices['im_warped_220w219_3'], crop_area_metrics['crop_area_220w219_3'], 0, 1) job220_219_04 = Job(220, 219, "start04", im_warp_matrices['im_warped_220w219_3'], crop_area_metrics['crop_area_220w219_3'], 0, 0) job220_219_05 = Job(220, 219, "start05", im_warp_matrices['im_warped_220w219_3'], crop_area_metrics['crop_area_220w219_3'], 0, 0) job220_219_06 = Job(220, 219, "start06", im_warp_matrices['im_warped_220w219_3'], crop_area_metrics['crop_area_220w219_3'], 0, 0) job220_219_07 = Job(220, 219, "start07", im_warp_matrices['im_warped_220w219_3'], crop_area_metrics['crop_area_220w219_3'], 0, 0) job220_219_08 = Job(220, 219, "start08", im_warp_matrices['im_warped_220w219_3'], crop_area_metrics['crop_area_220w219_3'], 0, 1) job220_219_09 = Job(220, 219, "start09", im_warp_matrices['im_warped_220w219_3'], crop_area_metrics['crop_area_220w219_3'], 0, 0) job680_674_00 = Job(680, 674, "start00", im_warp_matrices['im_warped_680w674'], crop_area_metrics['crop_area_680w674'], 1, 0) job680_674_01 = Job(680, 674, "start01", im_warp_matrices['im_warped_680w674'], crop_area_metrics['crop_area_680w674'], 0, 0) job680_674_02 = Job(680, 674, "start02", im_warp_matrices['im_warped_680w674'], crop_area_metrics['crop_area_680w674'], 0, 0) job680_674_03 = Job(680, 674, "start03", im_warp_matrices['im_warped_680w674'], crop_area_metrics['crop_area_680w674'], 0, 0) job680_674_04 = Job(680, 674, "start04", im_warp_matrices['im_warped_680w674'], crop_area_metrics['crop_area_680w674'], 0, 0) job680_674_05 = Job(680, 674, "start05", im_warp_matrices['im_warped_680w674'], crop_area_metrics['crop_area_680w674'], 0, 0) job680_674_06 = Job(680, 674, "start06", im_warp_matrices['im_warped_680w674'], crop_area_metrics['crop_area_680w674'], 0, 0) job680_674_07 = Job(680, 674, "start07", im_warp_matrices['im_warped_680w674'], crop_area_metrics['crop_area_680w674'], 1, 0) job680_674_08 = Job(680, 674, "start08", im_warp_matrices['im_warped_680w674'], crop_area_metrics['crop_area_680w674'], 0, 0) job680_674_09 = Job(680, 674, "start09", im_warp_matrices['im_warped_680w674'], crop_area_metrics['crop_area_680w674'], 0, 0)
In [ ]:
jobs = { 0: [job220_219_00, job680_674_00], 1: [job220_219_01, job680_674_01], 2: [job220_219_02, job680_674_02], 3: [job220_219_03, job680_674_03], 4: [job220_219_04, job680_674_04, job680_674_08], 5: [job680_674_05, job680_674_09], 6: [job220_219_05, job220_219_06, job680_674_06, job220_219_08], 7: [job220_219_07, job680_674_07, job220_219_09] } # workstation .162, GPU CUDA 0 (A), = start00 (220_219) and (680_674) # workstation .162, GPU CUDA 0 (B), = start01 (220_219) and (680_674) # workstation .162, GPU CUDA 1 (A), = start02 (220_219) and (680_674) # workstation .162, GPU CUDA 1 (B), = start03 (220_219) and (680_674) # workstation .163, GPU CUDA 0 (A), = start04 (220_219) and (680_674), plus start08 (680_674) # workstation .163, GPU CUDA 0 (B), = start05 (680_674) only, plus start09 (680_674) # workstation .163, GPU CUDA 1 (A), = start06 (220_219) and (680_674), plus start05 (220_219), and plus start08 (220_219) # workstation .163, GPU CUDA 1 (B), = start07 (220_219) and (680_674), plus start09 (220_219) # We have an A and a B for each workstation/GPU combination, because each GPU can run two pipelines simultaneously. # Note, I moved half of start05 (220_219) to job id 6, because job id 5 has to deal with 680_674_09, which is the biggest one
In [ ]:
process_id = 0 # one process id for each pair of workstation and gpu # Change process_id based on which workstation, cuda, A/B you're working with... # i.e. # workstation162 + cuda0 + A = process_id 0 # workstation162 + cuda0 + B = process_id 1 # workstation162 + cuda1 + A = process_id 2 # workstation162 + cuda1 + B = process_id 3 # workstation163 + cuda0 + A = process_id 4 # workstation163 + cuda0 + B = process_id 5 # workstation163 + cuda1 + A = process_id 6 # workstation163 + cuda1 + B = process_id 7 # There's no real reason it's A and B, just that each GPU can handle two pipelines at a time.
In [ ]:
repo = Repository('/home/msmith/repo_process_{}'.format(process_id)) # can pass any path here, even the same one for each script # This will be on the workstation, so we'll have some of the repo on workstation .162, some on .163 # I wonder... would it be better to call this based on the job instead of the process id? # Ie, 'job220_219_00' gives us more info than just 'process_id = 1' # Should I be making a folder or something to distinguish this? # ('repo') technically already exists on the workstation from an early test
In [ ]:
# Ben TODO: install new pipeline on michaels workstations
In [ ]:
pipeline = Pipeline([Image], # inputs [PipelineResult], # outputs **get_auto_config())
In [ ]:
camera_keys = {674:0, 680:0, 219:1, 220:1}
In [ ]:
# Ben: this can be added to the top part where all the other imports are, just putting it here so that you see it from scipy.spatial import cKDTree
In [ ]:
for job in jobs[process_id]: cam_id = camera_keys[job.cam_top] assert(camera_keys[job.cam_top] == camera_keys[job.cam_bottom]) repo = Repository('/home/msmith/repo_process_{}'.format(job.startXX)) # can pass any path here, even the same one for each script # MLS ADDED SINK HERE... # Ben: we already create a sink in the main loop every 1000 frames, no need to also do it here I think #sink = BBBinaryRepoSink(repo, cam_id) # using the cam_id defined above. # load all imgstore paths for job.startXX path = os.path.join('/mount/zfs-kn/recordings/kn-crec18', job.startXX, job.startXX+"*") stores = sorted(glob.glob(path)) # determine which imgstores correspond to job.cam_top and job.cam_bottom top_store_path = [s for s in stores if s.endswith(str(job.cam_top))][0] bottom_store_path = [s for s in stores if s.endswith(str(job.cam_bottom))][0] # load imgstores top_store = new_for_filename(top_store_path+'/metadata.yaml') bottom_store = new_for_filename(bottom_store_path+'/metadata.yaml') top_metadata = top_store.get_frame_metadata() top_store_timestamps = top_metadata['frame_time'] bottom_metadata = bottom_store.get_frame_metadata() bottom_store_timestamps = np.array(bottom_metadata['frame_time']) bottom_store_timestamps_kd = cKDTree(bottom_store_timestamps[:, None], compact_nodes=True, balanced_tree=False) # prepare BBBinaryRepoSink for data storage data_source = DataSource.new_message(filename=stores[3]) # is this correct? stores[3], doesn't that mean the third item in stores, not the last three digits in the string that is stores? # Nope, thats not correct. Not sure what you want to use here. Maybe top_store_path + bottom_store_path? # iterate over pairs of images from the two imgstores for idx in trange(top_store.frame_count): if idx % 1000 == 0: if idx > 0: sink.finish() sink = BBBinaryRepoSink(repo, cam_id) i0, (frame_number_0, frame_timestamp_0) = top_store.get_next_image() #frame_index_nearest = np.argmin(abs(bottom_store_timestamps - frame_timestamp_0)) # MLS says: ok, so this is what's different? _, frame_index_nearest = bottom_store_timestamps_kd.query([frame_timestamp_0]) #i1, (frame_number_1, frame_timestamp_1) = bottom_store.get_next_image() i1, (frame_number_1, frame_timestamp_1) = bottom_store.get_image(frame_number=None, frame_index=frame_index_nearest) # call the correct warping matrix (h), and the correct cropping area (crop_area) h = job.im_warp_matrix crop_area = job.crop_area_metric # stitch images im_warped = cv2.warpAffine(i1, h, (crop_area[2], crop_area[3])) im_warped[0:i0.shape[0], 0:i0.shape[1]] = i0 # process stitched image results = pipeline([im_warped]) # prepare results for storage in bb_binary repo sink.add_frame(data_source, results, frame_timestamp_0) # MLS added "_0" to the frame_timestamp # I'm thinking that adding frame_timestamp_0 will just add timestamp for one of the two images, # but that should be fine because it can represent both images stitched? Ok to do that?
In [ ]:
In [ ]:
In [ ]:
# Send notebook from laptop to workstation: # scp -r /Users/michaelsmith/Desktop/bb_pipeline_all_together_3.ipynb [email protected]:/home/msmith/pipeline # have to remove the (3) because it doesn't like that, change to: _3 at the end of the file name...