refactor scifact 1 (#133)

* refactor scifact 1 * Update experiments/verT5erini/README.md Co-authored-by: Rodrigo Frassetto Nogueira <[email protected]> Co-authored-by: Rodrigo Frassetto Nogueira <[email protected]>
castorini · Dec 26, 2020 · b0d8901 · b0d8901
1 parent a70269a
commit b0d8901
Show file tree

Hide file tree

Showing 3 changed files with 53 additions and 49 deletions.
diff --git a/experiments/verT5erini/README.md b/experiments/verT5erini/README.md
@@ -46,6 +46,7 @@ pip install --editable mesh
 ```
 
 ## Download Dataset
+
 Please download the dataset by follow the guidance in original [SciFact](https:/allenai/scifact) repo
 
 ## Abstract Retrieval
@@ -59,15 +60,15 @@ python bm25_retrieval.py --index scifact_index \
  --results bm25_retrieval_top20_dev.jsonl
 ```
 
-## Abstract Rerank (AR)
+## Abstract Reranking (AR)
 
-1. create input file for T5 inference
+1. Create input file for T5 inference
 ```
-python prepare_AR_input.py --corpus corpus.jsonl \
+python prepare_ar_input.py --corpus corpus.jsonl \
  --claims claims_dev.jsonl \
  --retrieval bm25_retrieval_top20_dev.jsonl \
- --t5_input_ids AR_inference_dev_ids.txt \
- --t5_input AR_inference_dev.txt
+ --t5_input_ids ar_inference_dev_ids.txt \
+ --t5_input ar_inference_dev.txt
 ```
 
 2. Run T5 inference for abstract rerank
@@ -76,16 +77,16 @@ t5_mesh_transformer \
  --tpu="${TPU_NAME}" \
  --gcp_project="${PROJECT_NAME}" \
  --tpu_zone="europe-west4-a" \
- --model_dir="gs://neuralresearcher_data/doc2query/experiments/363" \
+ --model_dir="gs://castorini/med-monot5/experiments/3B" \
  --gin_file="gs://t5-data/pretrained_models/3B/operative_config.gin" \
  --gin_file="infer.gin" \
  --gin_file="beam_search.gin" \
  --gin_param="utils.tpu_mesh_shape.tpu_topology = '2x2'" \
  --gin_param="infer_checkpoint_step = 1010000" \
  --gin_param="utils.run.sequence_length = {'inputs': 512, 'targets': 2}" \
  --gin_param="Bitransformer.decode.max_decode_length = 2" \
- --gin_param="input_filename = 'AR_inference_dev.txt'" \
- --gin_param="output_filename = 'AR_inference_dev_monot5-3b_output.txt'" \
+ --gin_param="input_filename = 'ar_inference_dev.txt'" \
+ --gin_param="output_filename = 'ar_inference_dev_monot5-3b_output.txt'" \
  --gin_param="tokens_per_batch = 65536" \
  --gin_param="Bitransformer.decode.beam_size = 1" \
  --gin_param="Bitransformer.decode.temperature = 0.0" \
@@ -94,21 +95,21 @@ t5_mesh_transformer \
 
 3. Convert T5 output to abstract retrieval/rerank result file (in `jsonl` format)
 ```
-python create_AR_result.py --t5_output_ids AR_inference_dev_ids.txt \
- --t5_output AR_inference_dev_monot5-3b_output.txt-1010000 \
+python create_ar_result.py --t5_output_ids ar_inference_dev_ids.txt \
+ --t5_output ar_inference_dev_monot5-3b_output.txt-1010000 \
  --topk 3 \
- --results AR_dev.jsonl
+ --results ar_dev.jsonl
 ```
 
 ## Sentence Selection (SS)
 
-1. prepare T5 input for sentence selection from abstract retrieval result
+1. Prepare T5 input for sentence selection from the abstract retrieval result
 ```
-python prepare_SS_input.py --corpus corpus.jsonl \
+python prepare_ss_input.py --corpus corpus.jsonl \
  --claims claims_dev.jsonl \
- --retrieval AR_dev.jsonl \
- --t5_input_ids SS_inference_dev_ids.txt \
- --t5_input SS_inference_dev.txt
+ --retrieval ar_dev.jsonl \
+ --t5_input_ids ss_inference_dev_ids.txt \
+ --t5_input ss_inference_dev.txt
 ```
 
 2. Run T5 inference for sentence selection
@@ -125,8 +126,8 @@ t5_mesh_transformer \
  --gin_param="infer_checkpoint_step = 1012500" \
  --gin_param="utils.run.sequence_length = {'inputs': 512, 'targets': 2}" \
  --gin_param="Bitransformer.decode.max_decode_length = 2" \
- --gin_param="input_filename = 'SS_inference_dev.txt'" \
- --gin_param="output_filename = 'SS_inference_dev_monot5-3b_output.txt'" \
+ --gin_param="input_filename = 'ss_inference_dev.txt'" \
+ --gin_param="output_filename = 'ss_inference_dev_monot5-3b_output.txt'" \
  --gin_param="tokens_per_batch = 65536" \
  --gin_param="Bitransformer.decode.beam_size = 1" \
  --gin_param="Bitransformer.decode.temperature = 0.0" \
@@ -135,20 +136,21 @@ t5_mesh_transformer \
 
 3. Convert T5 output to sentence selection result file (in `jsonl` format)
 ```
-python create_SS_result.py --claims claims_dev.jsonl \
- --t5_output_ids SS_inference_dev_ids.txt \
- --t5_output SS_inference_dev_monot5-3b_output.txt-1012500 \
- --results SS_dev.jsonl
+python create_ss_result.py --claims claims_dev.jsonl \
+ --t5_output_ids ss_inference_dev_ids.txt \
+ --t5_output ss_inference_dev_monot5-3b_output.txt-1012500 \
+ --results ss_dev.jsonl
 ```
 
 ## Label Prediction (LP)
-1. prepare T5 input for label prediction from sentence selection result
+
+1. Prepare T5 input for label prediction from sentence selection result
 ```
-python prepare_LP_input.py --corpus corpus.jsonl \
+python prepare_lp_input.py --corpus corpus.jsonl \
  --claims claims_dev.jsonl \
- --sentence_selection SS_dev.jsonl \
- --t5_input_ids LP_inference_dev_ids.txt \
- --t5_input LP_inference_dev.txt
+ --sentence_selection ss_dev.jsonl \
+ --t5_input_ids lp_inference_dev_ids.txt \
+ --t5_input lp_inference_dev.txt
 ```
 
 2. Run T5 inference for label prediction
@@ -165,8 +167,8 @@ t5_mesh_transformer \
  --gin_param="infer_checkpoint_step = 1000600" \
  --gin_param="utils.run.sequence_length = {'inputs': 512, 'targets': 2}" \
  --gin_param="Bitransformer.decode.max_decode_length = 2" \
- --gin_param="input_filename = 'LP_inference_dev.txt'" \
- --gin_param="output_filename = 'LP_inference_dev_monot5-3b_output.txt'" \
+ --gin_param="input_filename = 'lp_inference_dev.txt'" \
+ --gin_param="output_filename = 'lp_inference_dev_monot5-3b_output.txt'" \
  --gin_param="tokens_per_batch = 65536" \
  --gin_param="Bitransformer.decode.beam_size = 1" \
  --gin_param="Bitransformer.decode.temperature = 0.0" \
@@ -175,23 +177,23 @@ t5_mesh_transformer \
 
 3. Convert T5 output to label prediction result file (in `jsonl` format)
 ```
-python create_LP_result.py --t5_output_ids LP_inference_dev_ids.txt \
- --t5_output LP_inference_dev_monot5-3b_output.txt-1000600 \
+python create_lp_result.py --t5_output_ids lp_inference_dev_ids.txt \
+ --t5_output lp_inference_dev_monot5-3b_output.txt-1000600 \
  --claims claims_dev.jsonl \
- --results LP_dev.jsonl
+ --results lp_dev.jsonl
 ```
 
 ## Evaluate
 
-1. create full pipeline evaluate file
+1. Create full pipeline evaluation file
 ```
-python create_full_pipe_eval.py --AR_result AR_dev.jsonl \
- --SS_result SS_dev.jsonl \
- --LP_result LP_dev.jsonl \
- --LP_eval full_pipeline_eval.jsonl
+python create_full_pipe_eval.py --ar_result ar_dev.jsonl \
+ --ss_result ss_dev.jsonl \
+ --lp_result lp_dev.jsonl \
+ --lp_eval full_pipeline_eval.jsonl
 ```
 
-2. evaluate full pipeline
+2. Evaluate full pipeline
 Please download the evaluate folder from original [SciFact](https:/allenai/scifact) repo. 
 
 ```
@@ -202,14 +204,15 @@ python evaluate/pipeline.py --gold claims_dev.jsonl \
 
 ## Result
 
-We are expect to see following result for the full pipeline evaluation
+We can expect to see the following results for the full pipeline evaluation of the development set:
 
 | | sentence_selection | sentence_label | abstract_label_only | abstract_rationalized |
 |---|---|---|---|---|
 |precision | 0.644172 | 0.604294 | 0.650718 | 0.617225 |
 |recall | 0.573770 | 0.538251 | 0.650718 | 0.617225 |
 |f1 | 0.606936 | 0.569364 | 0.650718 | 0.617225 |
 
+## Replication Log
 
 
 

diff --git a/experiments/verT5erini/create_full_pipe_eval.py b/experiments/verT5erini/create_full_pipe_eval.py
@@ -4,19 +4,19 @@
 from tqdm import tqdm
 
 parser = argparse.ArgumentParser()
-parser.add_argument("--AR_result", type=str, required=True)
-parser.add_argument("--SS_result", type=str, required=True)
-parser.add_argument("--LP_result", type=str, required=True)
-# parser.add_argument("--SS_eval", type=str, required=True)
-parser.add_argument("--LP_eval", type=str, required=True)
+parser.add_argument("--ar_result", type=str, required=True)
+parser.add_argument("--ss_result", type=str, required=True)
+parser.add_argument("--lp_result", type=str, required=True)
+# parser.add_argument("--ss_eval", type=str, required=True)
+parser.add_argument("--lp_eval", type=str, required=True)
 args = parser.parse_args()
 
 
-claim_labels = jsonlines.open(args.LP_result)
-evaluate_file = jsonlines.open(args.LP_eval, mode="w")
-abstract_retrieval = jsonlines.open(args.AR_result)
-rationale_selection = jsonlines.open(args.SS_result)
-# rationale_selection_eval = jsonlines.open(args.SS_eval, mode="w")
+claim_labels = jsonlines.open(args.lp_result)
+evaluate_file = jsonlines.open(args.lp_eval, mode="w")
+abstract_retrieval = jsonlines.open(args.ar_result)
+rationale_selection = jsonlines.open(args.ss_result)
+# rationale_selection_eval = jsonlines.open(args.ss_eval, mode="w")
 
 selection_dict = {}
 for line in rationale_selection:

diff --git a/experiments/vert5erini/requirements.txt b/experiments/vert5erini/requirements.txt
@@ -0,0 +1 @@
+jsonlines